Commit 0db78ad1 authored by fbarchard@google.com's avatar fbarchard@google.com

Switch from xor/mov bx, to movzx ebx, which still passes drmemory and valgrind.

BUG=none
TESTED=drmemory

Review URL: https://webrtc-codereview.appspot.com/5339004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@904 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 5f29eaaf
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 902 Version: 904
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 902 #define LIBYUV_VERSION 904
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -857,13 +857,11 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -857,13 +857,11 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"2: \n" "2: \n"
"movdqa %%xmm2,%%xmm1 \n" "movdqa %%xmm2,%%xmm1 \n"
"paddd %%xmm3,%%xmm2 \n" "paddd %%xmm3,%%xmm2 \n"
"xor %2,%2 \n" MEMOP(movzwl,0x00,1,3,1) ",%k2 \n" // movzwl (%1,%3,1),%k2
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
"movd %k2,%%xmm0 \n" "movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm1 \n" "psrlw $0x9,%%xmm1 \n"
BUNDLEALIGN BUNDLEALIGN
"xor %2,%2 \n" MEMOP(movzwl,0x00,1,4,1) ",%k2 \n" // movzwl (%1,%4,1),%k2
MEMOP(mov,0x00,1,4,1) ",%w2 \n" // mov (%1,%4,1),%w2
"movd %k2,%%xmm4 \n" "movd %k2,%%xmm4 \n"
"pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n"
"punpcklwd %%xmm4,%%xmm0 \n" "punpcklwd %%xmm4,%%xmm0 \n"
...@@ -883,8 +881,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -883,8 +881,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"29: \n" "29: \n"
"addl $0x1,%5 \n" "addl $0x1,%5 \n"
"jl 99f \n" "jl 99f \n"
"xor %2,%2 \n" MEMOP(movzwl,0x00,1,3,1) ",%k2 \n" // movzwl (%1,%3,1),%k2
MEMOP(mov,0x00,1,3,1) ",%w2 \n" // mov (%1,%3,1),%w2
"movd %k2,%%xmm0 \n" "movd %k2,%%xmm0 \n"
"psrlw $0x9,%%xmm2 \n" "psrlw $0x9,%%xmm2 \n"
"pshufb %%xmm5,%%xmm2 \n" "pshufb %%xmm5,%%xmm2 \n"
......
...@@ -831,12 +831,10 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -831,12 +831,10 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
xloop2: xloop2:
movdqa xmm1, xmm2 // x0, x1 fractions. movdqa xmm1, xmm2 // x0, x1 fractions.
paddd xmm2, xmm3 // x += dx paddd xmm2, xmm3 // x += dx
xor ebx, ebx movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx movd xmm0, ebx
psrlw xmm1, 9 // 7 bit fractions. psrlw xmm1, 9 // 7 bit fractions.
xor ebx, ebx movzx ebx, word ptr [esi + edx] // 2 source x1 pixels
mov bx, word ptr [esi + edx] // 2 source x1 pixels
movd xmm4, ebx movd xmm4, ebx
pshufb xmm1, xmm5 // 0011 pshufb xmm1, xmm5 // 0011
punpcklwd xmm0, xmm4 punpcklwd xmm0, xmm4
...@@ -859,8 +857,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -859,8 +857,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
jl xloop99 jl xloop99
// 1 pixel remainder // 1 pixel remainder
xor ebx, ebx movzx ebx, word ptr [esi + eax] // 2 source x0 pixels
mov bx, word ptr [esi + eax] // 2 source x0 pixels
movd xmm0, ebx movd xmm0, ebx
psrlw xmm2, 9 // 7 bit fractions. psrlw xmm2, 9 // 7 bit fractions.
pshufb xmm2, xmm5 // 0011 pshufb xmm2, xmm5 // 0011
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
#define BENCHMARK_ITERATIONS 1 #define BENCHMARK_ITERATIONS 1
libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128), libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(353), benchmark_iterations_(BENCHMARK_ITERATIONS), benchmark_width_(33),
benchmark_height_(264) { benchmark_height_(17) {
const char* repeat = getenv("LIBYUV_REPEAT"); const char* repeat = getenv("LIBYUV_REPEAT");
if (repeat) { if (repeat) {
benchmark_iterations_ = atoi(repeat); // NOLINT benchmark_iterations_ = atoi(repeat); // NOLINT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment