• Frank Barchard's avatar
    scale by 1 for neon implemented · 451af5e9
    Frank Barchard authored
    void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) {
      asm volatile (
      "1:                                          \n"
        MEMACCESS(0)
        "ld1        {v1.16b}, [%0], #16            \n"  // load 8 shorts
        "subs       %w2, %w2, #8                   \n"  // 8 pixels per loop
        "uxtl       v2.4s, v1.4h                   \n"  // 8 int's
        "uxtl2      v1.4s, v1.8h                   \n"
        "scvtf      v2.4s, v2.4s                   \n"  // 8 floats
        "scvtf      v1.4s, v1.4s                   \n"
        "fcvtn      v4.4h, v2.4s                   \n"  // 8 floatsgit
        "fcvtn2     v4.8h, v1.4s                   \n"
       MEMACCESS(1)
        "st1        {v4.16b}, [%1], #16            \n"  // store 8 shorts
        "b.gt       1b                             \n"
      : "+r"(src),    // %0
        "+r"(dst),    // %1
        "+r"(width)   // %2
      :
      : "cc", "memory", "v1", "v2", "v4"
      );
    }
    
    void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
      asm volatile (
      "1:                                          \n"
        MEMACCESS(0)
        "ld1        {v1.16b}, [%0], #16            \n"  // load 8 shorts
        "subs       %w2, %w2, #8                   \n"  // 8 pixels per loop
        "uxtl       v2.4s, v1.4h                   \n"  // 8 int's
        "uxtl2      v1.4s, v1.8h                   \n"
        "scvtf      v2.4s, v2.4s                   \n"  // 8 floats
        "scvtf      v1.4s, v1.4s                   \n"
        "fmul       v2.4s, v2.4s, %3.s[0]          \n"  // adjust exponent
        "fmul       v1.4s, v1.4s, %3.s[0]          \n"
        "uqshrn     v4.4h, v2.4s, #13              \n"  // isolate halffloat
        "uqshrn2    v4.8h, v1.4s, #13              \n"
       MEMACCESS(1)
        "st1        {v4.16b}, [%1], #16            \n"  // store 8 shorts
        "b.gt       1b                             \n"
      : "+r"(src),    // %0
        "+r"(dst),    // %1
        "+r"(width)   // %2
      : "w"(scale * 1.9259299444e-34f)    // %3
      : "cc", "memory", "v1", "v2", "v4"
      );
    }
    
    TEST=LibYUVPlanarTest.TestHalfFloatPlane_One
    BUG=libyuv:560
    R=hubbe@chromium.org
    
    Review URL: https://codereview.chromium.org/2430313008 .
    451af5e9
Name
Last commit
Last update
build_overrides Loading commit data...
chromium Loading commit data...
docs Loading commit data...
include Loading commit data...
source Loading commit data...
third_party/gflags Loading commit data...
tools Loading commit data...
unit_test Loading commit data...
util Loading commit data...
.gitignore Loading commit data...
.gn Loading commit data...
AUTHORS Loading commit data...
Android.mk Loading commit data...
BUILD.gn Loading commit data...
CMakeLists.txt Loading commit data...
DEPS Loading commit data...
LICENSE Loading commit data...
LICENSE_THIRD_PARTY Loading commit data...
OWNERS Loading commit data...
PATENTS Loading commit data...
PRESUBMIT.py Loading commit data...
README.chromium Loading commit data...
README.md Loading commit data...
all.gyp Loading commit data...
codereview.settings Loading commit data...
download_vs_toolchain.py Loading commit data...
gyp_libyuv Loading commit data...
gyp_libyuv.py Loading commit data...
libyuv.gni Loading commit data...
libyuv.gyp Loading commit data...
libyuv.gypi Loading commit data...
libyuv_nacl.gyp Loading commit data...
libyuv_test.gyp Loading commit data...
linux.mk Loading commit data...
public.mk Loading commit data...
setup_links.py Loading commit data...
sync_chromium.py Loading commit data...
winarm.mk Loading commit data...