Commit 48e53643 authored by fbarchard@google.com's avatar fbarchard@google.com

Use xor/mov bx instead of movzx to avoid drmemory bug

BUG=none
TEST=none
R=johannkoenig@google.com, tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/4879004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@891 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 064d2768
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 890 Version: 891
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 890 #define LIBYUV_VERSION 891
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -95,12 +95,12 @@ int I422ToI420(const uint8* src_y, int src_stride_y, ...@@ -95,12 +95,12 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
// Resample U plane. // Resample U plane.
ScalePlane(src_u, src_stride_u, halfwidth, height, ScalePlane(src_u, src_stride_u, halfwidth, height,
dst_u, dst_stride_u, halfwidth, halfheight, dst_u, dst_stride_u, halfwidth, halfheight,
kFilterNone); kFilterBilinear);
// Resample V plane. // Resample V plane.
ScalePlane(src_v, src_stride_v, halfwidth, height, ScalePlane(src_v, src_stride_v, halfwidth, height,
dst_v, dst_stride_v, halfwidth, halfheight, dst_v, dst_stride_v, halfwidth, halfheight,
kFilterNone); kFilterBilinear);
return 0; return 0;
} }
...@@ -141,17 +141,19 @@ int I444ToI420(const uint8* src_y, int src_stride_y, ...@@ -141,17 +141,19 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
// Resample U plane. // Resample U plane.
ScalePlane(src_u, src_stride_u, width, height, ScalePlane(src_u, src_stride_u, width, height,
dst_u, dst_stride_u, halfwidth, halfheight, dst_u, dst_stride_u, halfwidth, halfheight,
kFilterNone); kFilterBilinear);
// Resample V plane. // Resample V plane.
ScalePlane(src_v, src_stride_v, width, height, ScalePlane(src_v, src_stride_v, width, height,
dst_v, dst_stride_v, halfwidth, halfheight, dst_v, dst_stride_v, halfwidth, halfheight,
kFilterNone); kFilterBilinear);
return 0; return 0;
} }
// 411 chroma is 1/4 width, 1x height // 411 chroma is 1/4 width, 1x height
// 420 chroma is 1/2 width, 1/2 height // 420 chroma is 1/2 width, 1/2 height
// TODO(fbarchard): Change to kFilterBilinear; Test with valgrind.
// TODO(fbarchard): Share code for 444 and 422 to 420.
LIBYUV_API LIBYUV_API
int I411ToI420(const uint8* src_y, int src_stride_y, int I411ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
......
...@@ -108,12 +108,12 @@ static uvec16 kScaleAb2 = ...@@ -108,12 +108,12 @@ static uvec16 kScaleAb2 =
#define MEMOPREG(opcode, offset, base, index, scale, reg) \ #define MEMOPREG(opcode, offset, base, index, scale, reg) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14),%%" #reg "\n" #opcode " (%%r15,%%r14),%%" #reg "\n"
#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14),%k" #reg "\n"
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " %%" #reg ",(%%r15,%%r14)\n" #opcode " %%" #reg ",(%%r15,%%r14)\n"
#define MEMOP(opcode, offset, base, index, scale) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14)"
#define BUNDLEALIGN ".p2align 5\n" #define BUNDLEALIGN ".p2align 5\n"
#else #else
#define MEMACCESS(base) "(%" #base ")" #define MEMACCESS(base) "(%" #base ")"
...@@ -125,10 +125,10 @@ static uvec16 kScaleAb2 = ...@@ -125,10 +125,10 @@ static uvec16 kScaleAb2 =
#offset "(%" #base ",%" #index "," #scale ")" #offset "(%" #base ",%" #index "," #scale ")"
#define MEMOPREG(opcode, offset, base, index, scale, reg) \ #define MEMOPREG(opcode, offset, base, index, scale, reg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%k" #reg "\n"
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
#define MEMOP(opcode, offset, base, index, scale) \
#opcode " " #offset "(%" #base ",%" #index "," #scale ")"
#define BUNDLEALIGN #define BUNDLEALIGN
#endif #endif
...@@ -857,11 +857,13 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -857,11 +857,13 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"2: \n" "2: \n"
"movdqa %%xmm2,%%xmm1 \n" "movdqa %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm2 \n" "paddd %%xmm3,%%xmm2 \n"
MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2 "xor %2,%2 \n"
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
"movd %k2,%%xmm0 \n" "movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n" "psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN BUNDLEALIGN
MEMOPREGK(movzwl,0x00,1,4,1,2) // movzwl (%1,%4,1),%k2 "xor %2,%2 \n"
MEMOP(mov,0x00,1,4,1) ",%w2 \n" // mov (%1,%4,1),%w2
"movd %k2,%%xmm4 \n" "movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n"
"punpcklwd %%xmm4,%%xmm0 \n" "punpcklwd %%xmm4,%%xmm0 \n"
...@@ -881,7 +883,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -881,7 +883,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"29: \n" "29: \n"
"addl $0x1,%5 \n" "addl $0x1,%5 \n"
"jl 99f \n" "jl 99f \n"
MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2 "xor %2,%2 \n"
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
"movd %k2,%%xmm0 \n" "movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm2 \n" "psrlw $0x9,%%xmm2 \n"
"pshufb %%xmm5,%%xmm2 \n" "pshufb %%xmm5,%%xmm2 \n"
......
...@@ -791,6 +791,13 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -791,6 +791,13 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
// Bilinear column filtering. SSSE3 version. // Bilinear column filtering. SSSE3 version.
// TODO(fbarchard): Port to Neon // TODO(fbarchard): Port to Neon
// TODO(fbarchard): Switch the following:
// xor ebx, ebx
// mov bx, word ptr [esi + eax] // 2 source x0 pixels
// To
// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
// when drmemory bug fixed.
// https://code.google.com/p/drmemory/issues/detail?id=1396
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
...@@ -824,10 +831,12 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -824,10 +831,12 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
xloop2: xloop2:
movdqa xmm1, xmm2 // x0, x1 fractions. movdqa xmm1, xmm2 // x0, x1 fractions.
paddd xmm2, xmm3 // x += dx paddd xmm2, xmm3 // x += dx
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels xor ebx, ebx
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx movd xmm0, ebx
psrlw xmm1, 9 // 7 bit fractions. psrlw xmm1, 9 // 7 bit fractions.
movzx ebx, word ptr [esi + edx] // 2 source x1 pixels xor ebx, ebx
mov bx, word ptr [esi + edx] // 2 source x1 pixels
movd xmm4, ebx movd xmm4, ebx
pshufb xmm1, xmm5 // 0011 pshufb xmm1, xmm5 // 0011
punpcklwd xmm0, xmm4 punpcklwd xmm0, xmm4
...@@ -850,7 +859,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -850,7 +859,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
jl xloop99 jl xloop99
// 1 pixel remainder // 1 pixel remainder
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels xor ebx, ebx
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx movd xmm0, ebx
psrlw xmm2, 9 // 7 bit fractions. psrlw xmm2, 9 // 7 bit fractions.
pshufb xmm2, xmm5 // 0011 pshufb xmm2, xmm5 // 0011
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
#define BENCHMARK_ITERATIONS 1 #define BENCHMARK_ITERATIONS 1
libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128), libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(22), benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33),
benchmark_height_(14) { benchmark_height_(17) {
const char* repeat = getenv("LIBYUV_REPEAT"); const char* repeat = getenv("LIBYUV_REPEAT");
if (repeat) { if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT benchmark_iterations_ = atoi(repeat); // NOLINT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment