Commit 5822505e authored by fbarchard@google.com's avatar fbarchard@google.com

Remove extra unaligned loop from alphablender. Both aligned and unaligned loops…

Remove extra unaligned loop from alphablender.  Both aligned and unaligned loops were the same, so remove the extra.
BUG=none
TESTED=try bots.
R=brucedawson@google.com, harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/29059004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1166 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 1eb636d2
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1164
Version: 1165
License: BSD
License File: LICENSE
......
......@@ -207,12 +207,12 @@ extern "C" {
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBATTENUATEROW_AVX2
#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_ARGBMIRRORROW_AVX2
#endif
// The following are require VS2012.
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
#define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1164
#define LIBYUV_VERSION 1165
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -2183,10 +2183,9 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
intptr_t temp_width = (intptr_t)(width);
asm volatile (
"movdqa %3,%%xmm5 \n"
"lea " MEMLEA(-0x10,0) ",%0 \n"
LABELALIGN
"1: \n"
MEMOPREG(movdqu,0x00,0,2,1,xmm0) // movdqu (%0,%2),%%xmm0
MEMOPREG(movdqu,-0x10,0,2,1,xmm0) // movdqu -0x10(%0,%2),%%xmm0
"pshufb %%xmm5,%%xmm0 \n"
"sub $0x10,%2 \n"
"movdqu %%xmm0," MEMACCESS(1) " \n"
......@@ -3378,10 +3377,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"19: \n"
"add $1-4,%3 \n"
"jl 49f \n"
"test $0xf,%0 \n"
"jne 41f \n"
"test $0xf,%1 \n"
"jne 41f \n"
// 4 pixel loop.
LABELALIGN
......@@ -3408,33 +3403,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
"movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
"jge 40b \n"
"jmp 49f \n"
// 4 pixel loop.
LABELALIGN
"41: \n"
"movdqu " MEMACCESS(0) ",%%xmm3 \n"
"lea " MEMLEA(0x10,0) ",%0 \n"
"movdqa %%xmm3,%%xmm0 \n"
"pxor %%xmm4,%%xmm3 \n"
"movdqu " MEMACCESS(1) ",%%xmm2 \n"
"pshufb %4,%%xmm3 \n"
"pand %%xmm6,%%xmm2 \n"
"paddw %%xmm7,%%xmm3 \n"
"pmullw %%xmm3,%%xmm2 \n"
"movdqu " MEMACCESS(1) ",%%xmm1 \n"
"lea " MEMLEA(0x10,1) ",%1 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm4,%%xmm0 \n"
"pmullw %%xmm3,%%xmm1 \n"
"psrlw $0x8,%%xmm2 \n"
"paddusb %%xmm2,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"paddusb %%xmm1,%%xmm0 \n"
"sub $0x4,%3 \n"
"movdqu %%xmm0," MEMACCESS(2) " \n"
"lea " MEMLEA(0x10,2) ",%2 \n"
"jge 41b \n"
"49: \n"
"add $0x3,%3 \n"
......
......@@ -2392,7 +2392,6 @@ void YToARGBRow_SSE2(const uint8* y_buf,
}
#endif // HAS_YTOARGBROW_SSE2
#ifdef HAS_MIRRORROW_SSSE3
// Shuffle table for reversing the bytes.
static const uvec8 kShuffleMirror = {
......@@ -2432,7 +2431,7 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
align 4
convertloop:
vmovdqu ymm0, [eax - 32 + ecx]
vmovdqu ymm0, -32[eax + ecx]
vpshufb ymm0, ymm0, ymm5
vpermq ymm0, ymm0, 0x4e // swap high and low halfs
sub ecx, 32
......@@ -2455,7 +2454,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
align 4
convertloop:
movdqu xmm0, [eax - 16 + ecx]
movdqu xmm0, -16[eax + ecx]
movdqa xmm1, xmm0 // swap bytes
psllw xmm0, 8
psrlw xmm1, 8
......@@ -2553,7 +2552,7 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
align 4
convertloop:
vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order
vpermd ymm0, ymm5, -32[eax + ecx * 4] // permute dword order
sub ecx, 8
vmovdqu [edx], ymm0
lea edx, [edx + 32]
......@@ -3608,11 +3607,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
add ecx, 1 - 4
jl convertloop4b
test eax, 15 // unaligned?
jne convertuloop4
test esi, 15 // unaligned?
jne convertuloop4
// 4 pixel loop.
convertloop4:
movdqu xmm3, [eax] // src argb
......@@ -3637,32 +3631,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
movdqu [edx], xmm0
lea edx, [edx + 16]
jge convertloop4
jmp convertloop4b
// 4 pixel unaligned loop.
convertuloop4:
movdqu xmm3, [eax] // src argb
lea eax, [eax + 16]
movdqa xmm0, xmm3 // src argb
pxor xmm3, xmm4 // ~alpha
movdqu xmm2, [esi] // _r_b
pshufb xmm3, kShuffleAlpha // alpha
pand xmm2, xmm6 // _r_b
paddw xmm3, xmm7 // 256 - alpha
pmullw xmm2, xmm3 // _r_b * alpha
movdqu xmm1, [esi] // _a_g
lea esi, [esi + 16]
psrlw xmm1, 8 // _a_g
por xmm0, xmm4 // set alpha to 255
pmullw xmm1, xmm3 // _a_g * alpha
psrlw xmm2, 8 // _r_b convert to 8 bits again
paddusb xmm0, xmm2 // + src argb
pand xmm1, xmm5 // a_g_ convert to 8 bits again
paddusb xmm0, xmm1 // + src argb
sub ecx, 4
movdqu [edx], xmm0
lea edx, [edx + 16]
jge convertuloop4
convertloop4b:
add ecx, 4 - 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment