Commit 1eb636d2 authored by fbarchard@google.com's avatar fbarchard@google.com

remove initial lea in mirror functions and add the offset in the address mode.

BUG=none
TESTED=local libyuv unittests on windows
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/26169004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1165 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 35508d09
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1163 Version: 1164
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1163 #define LIBYUV_VERSION 1164
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -2392,6 +2392,7 @@ void YToARGBRow_SSE2(const uint8* y_buf, ...@@ -2392,6 +2392,7 @@ void YToARGBRow_SSE2(const uint8* y_buf,
} }
#endif // HAS_YTOARGBROW_SSE2 #endif // HAS_YTOARGBROW_SSE2
#ifdef HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes. // Shuffle table for reversing the bytes.
static const uvec8 kShuffleMirror = { static const uvec8 kShuffleMirror = {
...@@ -2406,11 +2407,10 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -2406,11 +2407,10 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
movdqa xmm5, kShuffleMirror movdqa xmm5, kShuffleMirror
lea eax, [eax - 16]
align 4 align 4
convertloop: convertloop:
movdqu xmm0, [eax + ecx] movdqu xmm0, [eax - 16 + ecx]
pshufb xmm0, xmm5 pshufb xmm0, xmm5
sub ecx, 16 sub ecx, 16
movdqu [edx], xmm0 movdqu [edx], xmm0
...@@ -2429,11 +2429,10 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2429,11 +2429,10 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
vbroadcastf128 ymm5, kShuffleMirror vbroadcastf128 ymm5, kShuffleMirror
lea eax, [eax - 32]
align 4 align 4
convertloop: convertloop:
vmovdqu ymm0, [eax + ecx] vmovdqu ymm0, [eax - 32 + ecx]
vpshufb ymm0, ymm0, ymm5 vpshufb ymm0, ymm0, ymm5
vpermq ymm0, ymm0, 0x4e // swap high and low halfs vpermq ymm0, ymm0, 0x4e // swap high and low halfs
sub ecx, 32 sub ecx, 32
...@@ -2453,11 +2452,10 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -2453,11 +2452,10 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
lea eax, [eax - 16]
align 4 align 4
convertloop: convertloop:
movdqu xmm0, [eax + ecx] movdqu xmm0, [eax - 16 + ecx]
movdqa xmm1, xmm0 // swap bytes movdqa xmm1, xmm0 // swap bytes
psllw xmm0, 8 psllw xmm0, 8
psrlw xmm1, 8 psrlw xmm1, 8
...@@ -2551,12 +2549,11 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { ...@@ -2551,12 +2549,11 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width mov ecx, [esp + 12] // width
lea eax, [eax - 32]
vmovdqa ymm5, kARGBShuffleMirror_AVX2 vmovdqa ymm5, kARGBShuffleMirror_AVX2
align 4 align 4
convertloop: convertloop:
vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order
sub ecx, 8 sub ecx, 8
vmovdqu [edx], ymm0 vmovdqu [edx], ymm0
lea edx, [edx + 32] lea edx, [edx + 32]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment