Commit 0db78ad1 authored by fbarchard@google.com's avatar fbarchard@google.com

Switch from xor/mov bx, to movzx ebx, which still passes drmemory and valgrind.

BUG=none
TESTED=drmemory

Review URL: https://webrtc-codereview.appspot.com/5339004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@904 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 5f29eaaf
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 902
Version: 904
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 902
#define LIBYUV_VERSION 904
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -857,13 +857,11 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"2: \n"
"movdqa %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm2 \n"
"xor %2,%2 \n"
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
MEMOP(movzwl,0x00,1,3,1) ",%k2 \n" // movzwl (%1,%3,1),%k2
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN
"xor %2,%2 \n"
MEMOP(mov,0x00,1,4,1) ",%w2 \n" // mov (%1,%4,1),%w2
MEMOP(movzwl,0x00,1,4,1) ",%k2 \n" // movzwl (%1,%4,1),%k2
"movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n"
"punpcklwd %%xmm4,%%xmm0 \n"
......@@ -883,8 +881,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"29: \n"
"addl $0x1,%5 \n"
"jl 99f \n"
"xor %2,%2 \n"
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
MEMOP(movzwl,0x00,1,3,1) ",%k2 \n" // movzwl (%1,%3,1),%k2
"movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm2 \n"
"pshufb %%xmm5,%%xmm2 \n"
......
......@@ -831,12 +831,10 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
xloop2:
movdqa xmm1, xmm2 // x0, x1 fractions.
paddd xmm2, xmm3 // x += dx
xor ebx, ebx
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx
psrlw xmm1, 9 // 7 bit fractions.
xor ebx, ebx
mov bx, word ptr [esi + edx] // 2 source x1 pixels
movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
movd xmm4, ebx
pshufb xmm1, xmm5 // 0011
punpcklwd xmm0, xmm4
......@@ -859,8 +857,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
jl xloop99
// 1 pixel remainder
xor ebx, ebx
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx
psrlw xmm2, 9 // 7 bit fractions.
pshufb xmm2, xmm5 // 0011
......
......@@ -19,8 +19,8 @@
#define BENCHMARK_ITERATIONS 1
libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(353),
benchmark_height_(264) {
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33),
benchmark_height_(17) {
const char* repeat = getenv("LIBYUV_REPEAT");
if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment