Commit 48e53643 authored by fbarchard@google.com's avatar fbarchard@google.com

Use xor/mov bx instead of movzx to avoid drmemory bug

BUG=none
TEST=none
R=johannkoenig@google.com, tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/4879004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@891 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 064d2768
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 890
Version: 891
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 890
#define LIBYUV_VERSION 891
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -95,12 +95,12 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
// Resample U plane.
ScalePlane(src_u, src_stride_u, halfwidth, height,
dst_u, dst_stride_u, halfwidth, halfheight,
kFilterNone);
kFilterBilinear);
// Resample V plane.
ScalePlane(src_v, src_stride_v, halfwidth, height,
dst_v, dst_stride_v, halfwidth, halfheight,
kFilterNone);
kFilterBilinear);
return 0;
}
......@@ -141,17 +141,19 @@ int I444ToI420(const uint8* src_y, int src_stride_y,
// Resample U plane.
ScalePlane(src_u, src_stride_u, width, height,
dst_u, dst_stride_u, halfwidth, halfheight,
kFilterNone);
kFilterBilinear);
// Resample V plane.
ScalePlane(src_v, src_stride_v, width, height,
dst_v, dst_stride_v, halfwidth, halfheight,
kFilterNone);
kFilterBilinear);
return 0;
}
// 411 chroma is 1/4 width, 1x height
// 420 chroma is 1/2 width, 1/2 height
// TODO(fbarchard): Change to kFilterBilinear; Test with valgrind.
// TODO(fbarchard): Share code for 444 and 422 to 420.
LIBYUV_API
int I411ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......
......@@ -108,12 +108,12 @@ static uvec16 kScaleAb2 =
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14),%%" #reg "\n"
#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14),%k" #reg "\n"
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " %%" #reg ",(%%r15,%%r14)\n"
#define MEMOP(opcode, offset, base, index, scale) \
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14)"
#define BUNDLEALIGN ".p2align 5\n"
#else
#define MEMACCESS(base) "(%" #base ")"
......@@ -125,10 +125,10 @@ static uvec16 kScaleAb2 =
#offset "(%" #base ",%" #index "," #scale ")"
#define MEMOPREG(opcode, offset, base, index, scale, reg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
#define MEMOPREGK(opcode, offset, base, index, scale, reg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%k" #reg "\n"
#define MEMOPMEM(opcode, reg, offset, base, index, scale) \
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
#define MEMOP(opcode, offset, base, index, scale) \
#opcode " " #offset "(%" #base ",%" #index "," #scale ")"
#define BUNDLEALIGN
#endif
......@@ -857,11 +857,13 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"2: \n"
"movdqa %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm2 \n"
MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2
"xor %2,%2 \n"
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN
MEMOPREGK(movzwl,0x00,1,4,1,2) // movzwl (%1,%4,1),%k2
"xor %2,%2 \n"
MEMOP(mov,0x00,1,4,1) ",%w2 \n" // mov (%1,%4,1),%w2
"movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n"
"punpcklwd %%xmm4,%%xmm0 \n"
......@@ -881,7 +883,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"29: \n"
"addl $0x1,%5 \n"
"jl 99f \n"
MEMOPREGK(movzwl,0x00,1,3,1,2) // movzwl (%1,%3,1),%k2
"xor %2,%2 \n"
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm2 \n"
"pshufb %%xmm5,%%xmm2 \n"
......
......@@ -791,6 +791,13 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
// Bilinear column filtering. SSSE3 version.
// TODO(fbarchard): Port to Neon
// TODO(fbarchard): Switch the following:
// xor ebx, ebx
// mov bx, word ptr [esi + eax] // 2 source x0 pixels
// To
// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
// when drmemory bug fixed.
// https://code.google.com/p/drmemory/issues/detail?id=1396
__declspec(naked) __declspec(align(16))
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
......@@ -824,10 +831,12 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
xloop2:
movdqa xmm1, xmm2 // x0, x1 fractions.
paddd xmm2, xmm3 // x += dx
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
xor ebx, ebx
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx
psrlw xmm1, 9 // 7 bit fractions.
movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
xor ebx, ebx
mov bx, word ptr [esi + edx] // 2 source x1 pixels
movd xmm4, ebx
pshufb xmm1, xmm5 // 0011
punpcklwd xmm0, xmm4
......@@ -850,7 +859,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
jl xloop99
// 1 pixel remainder
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
xor ebx, ebx
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx
psrlw xmm2, 9 // 7 bit fractions.
pshufb xmm2, xmm5 // 0011
......
......@@ -19,8 +19,8 @@
#define BENCHMARK_ITERATIONS 1
libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(22),
benchmark_height_(14) {
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33),
benchmark_height_(17) {
const char* repeat = getenv("LIBYUV_REPEAT");
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment