Use xor/mov bx instead of movzx to avoid drmemory bug

BUG=none TEST=none R=johannkoenig@google.com, tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/4879004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@891 16f28f9a-4ce2-e073-06de-1de4eb20be90

Use xor/mov bx instead of movzx to avoid drmemory bug
BUG=none TEST=none R=johannkoenig@google.com, tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/4879004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@891 16f28f9a-4ce2-e073-06de-1de4eb20be90
48e53643 · fbarchard@google.com · 064d2768 · 48e53643 · 48e53643 · 48e53643
Commit 48e53643 authored Dec 04, 2013 by fbarchard@google.com
6 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 890
+Version: 891
 License: BSD
 License File: LICENSE

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 890
+#define LIBYUV_VERSION 891
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -95,12 +95,12 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
  // Resample U plane.
  ScalePlane(src_u, src_stride_u, halfwidth, height,
             dst_u, dst_stride_u, halfwidth, halfheight,
-             kFilterNone);
+             kFilterBilinear);
  // Resample V plane.
  ScalePlane(src_v, src_stride_v, halfwidth, height,
             dst_v, dst_stride_v, halfwidth, halfheight,
-             kFilterNone);
+             kFilterBilinear);
  return 0;
 }
@@ -141,17 +141,19 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
  // Resample U plane.
  ScalePlane(src_u, src_stride_u, width, height,
             dst_u, dst_stride_u, halfwidth, halfheight,
-             kFilterNone);
+             kFilterBilinear);
  // Resample V plane.
  ScalePlane(src_v, src_stride_v, width, height,
             dst_v, dst_stride_v, halfwidth, halfheight,
-             kFilterNone);
+             kFilterBilinear);
  return 0;
 }
 // 411 chroma is 1/4 width, 1x height
 // 420 chroma is 1/2 width, 1/2 height
+// TODO(fbarchard): Change to kFilterBilinear; Test with valgrind.
+// TODO(fbarchard): Share code for 444 and 422 to 420.
 LIBYUV_API
 int I411ToI420(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,

--- a/source/scale_posix.cc
+++ b/source/scale_posix.cc
@@ -108,12 +108,12 @@ static uvec16 kScaleAb2 =
 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    #opcode " (%%r15,%%r14),%%" #reg "\n"
-#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
-    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
-    #opcode " (%%r15,%%r14),%k" #reg "\n"
 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    #opcode " %%" #reg ",(%%r15,%%r14)\n"
+#define MEMOP(opcode, offset, base, index, scale) \
+    "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
+    #opcode " (%%r15,%%r14)"
 #define BUNDLEALIGN ".p2align 5\n"
 #else
 #define MEMACCESS(base) "(%" #base ")"
@@ -125,10 +125,10 @@ static uvec16 kScaleAb2 =
    #offset "(%" #base ",%" #index "," #scale ")"
 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
-#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
-    #opcode " " #offset "(%" #base ",%" #index "," #scale "),%k" #reg "\n"
 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
+#define MEMOP(opcode, offset, base, index, scale) \
+    #opcode " " #offset "(%" #base ",%" #index "," #scale ")"
 #define BUNDLEALIGN
 #endif
@@ -857,11 +857,13 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
  "2:                                          \n"
    "movdqa    %%xmm2,%%xmm1                   \n"
    "paddd     %%xmm3,%%xmm2                   \n"
-    MEMOPREGK(movzwl,0x00,1,3,1,2)             //  movzwl  (%1,%3,1),%k2
+    "xor       %2,%2                           \n"
+    MEMOP(mov,0x00,1,3,1) ",%w2                \n"  //  mov  (%1,%3,1),%w2
    "movd      %k2,%%xmm0                      \n"
    "psrlw     $0x9,%%xmm1                     \n"
    BUNDLEALIGN
-    MEMOPREGK(movzwl,0x00,1,4,1,2)             //  movzwl  (%1,%4,1),%k2
+    "xor       %2,%2                           \n"
+    MEMOP(mov,0x00,1,4,1) ",%w2                \n"  //  mov  (%1,%4,1),%w2
    "movd      %k2,%%xmm4                      \n"
    "pshufb    %%xmm5,%%xmm1                   \n"
    "punpcklwd %%xmm4,%%xmm0                   \n"
@@ -881,7 +883,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
  "29:                                         \n"
    "addl      $0x1,%5                         \n"
    "jl        99f                             \n"
-    MEMOPREGK(movzwl,0x00,1,3,1,2)             //  movzwl  (%1,%3,1),%k2
+    "xor       %2,%2                           \n"
+    MEMOP(mov,0x00,1,3,1) ",%w2                \n"  //  mov  (%1,%3,1),%w2
    "movd      %k2,%%xmm0                      \n"
    "psrlw     $0x9,%%xmm2                     \n"
    "pshufb    %%xmm5,%%xmm2                   \n"

--- a/source/scale_win.cc
+++ b/source/scale_win.cc
@@ -791,6 +791,13 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 // Bilinear column filtering. SSSE3 version.
 // TODO(fbarchard): Port to Neon
+// TODO(fbarchard): Switch the following:
+//    xor        ebx, ebx
+//    mov        bx, word ptr [esi + eax]  // 2 source x0 pixels
+// To
+//    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
+// when drmemory bug fixed.
+// https://code.google.com/p/drmemory/issues/detail?id=1396
 __declspec(naked) __declspec(align(16))
 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
@@ -824,10 +831,12 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
  xloop2:
    movdqa     xmm1, xmm2           // x0, x1 fractions.
    paddd      xmm2, xmm3           // x += dx
-    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
+    xor        ebx, ebx
+    mov        bx, word ptr [esi + eax]  // 2 source x0 pixels
    movd       xmm0, ebx
    psrlw      xmm1, 9              // 7 bit fractions.
-    movzx      ebx, word ptr [esi + edx]  // 2 source x1 pixels
+    xor        ebx, ebx
+    mov        bx, word ptr [esi + edx]  // 2 source x1 pixels
    movd       xmm4, ebx
    pshufb     xmm1, xmm5           // 0011
    punpcklwd  xmm0, xmm4
@@ -850,7 +859,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
    jl         xloop99
    // 1 pixel remainder
-    movzx      ebx, word ptr [esi + eax]  // 2 source x0 pixels
+    xor        ebx, ebx
+    mov        bx, word ptr [esi + eax]  // 2 source x0 pixels
    movd       xmm0, ebx
    psrlw      xmm2, 9              // 7 bit fractions.
    pshufb     xmm2, xmm5           // 0011

--- a/unit_test/unit_test.cc
+++ b/unit_test/unit_test.cc
@@ -19,8 +19,8 @@
 #define BENCHMARK_ITERATIONS 1
 libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
-    benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(22),
+    benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33),
-    benchmark_height_(14) {
+    benchmark_height_(17) {
    const char* repeat = getenv("LIBYUV_REPEAT");
    if (repeat) {
      benchmark_iterations_ = atoi(repeat);  // NOLINT