Commit 976423fe authored by fbarchard@google.com's avatar fbarchard@google.com

alpha blend last pixel fix

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/439008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@210 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 90310ddb
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 209
Version: 210
License: BSD
License File: LICENSE
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 209
#define LIBYUV_VERSION 210
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -218,10 +218,10 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
// loop will run one extra time.
"sub %2, #16 \n"
// mirror the bytes in the 64 bit segments. unable to mirror
// mirror the bytes in the 64 bit segments. unable to mirror
// the bytes in the entire 128 bits in one go.
// because of the inability to mirror the entire 128 bits
// mirror the writing out of the two 64 bit segments.
// mirror the writing out of the two 64 bit segments.
"1: \n"
"vld1.8 {q0}, [%0]! \n" // src += 16
"vrev64.8 q0, q0 \n"
......
......@@ -1931,29 +1931,29 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
"sub %0,%1 \n"
"mov (%0),%3 \n"
"sub $0x1,%2 \n"
"je 8f \n" // last1
"jle 8f \n" // last1
"cmp $0xff000000,%3 \n"
"jae 2f \n" // opaqueloop
"cmp $0xffffff,%3 \n"
"ja 3f \n" // translucientloop
"ja 3f \n" // translucentloop
// transparentloop
"1: \n"
"sub $0x1,%2 \n"
"lea 0x4(%0),%0 \n"
"je 8f \n" // last1
"jle 8f \n" // last1
"mov (%0),%3 \n"
"cmp $0xffffff,%3 \n"
"jbe 1b \n" // transparentloop
"cmp $0xff000000,%3 \n"
"jb 3f \n" // translucientloop
"jb 3f \n" // translucentloop
// opaqueloop
"2: \n"
"mov %3,(%0,%1,1) \n"
"lea 0x4(%0),%0 \n"
"sub $0x1,%2 \n"
"je 8f \n" // last1
"jle 8f \n" // last1
"mov (%0),%3 \n"
"cmp $0xff000000,%3 \n"
"jae 2b \n" // opaqueloop
......@@ -1961,48 +1961,50 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
"jbe 1b \n" // transparentloop
"nop \n"
// translucientloop
// translucentloop
"3: \n"
"movq (%0),%%xmm0 \n"
"movq (%0,%1,1),%%xmm1 \n"
"movd %3,%%xmm0 \n"
"mov (%0,%1,1),%3 \n"
"movd %3,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
"punpcklbw %%xmm1,%%xmm1 \n"
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
"pshufhw $0xff,%%xmm2,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"pxor %%xmm4,%%xmm3 \n"
"pmulhuw %%xmm2,%%xmm0 \n"
"pmulhuw %%xmm3,%%xmm1 \n"
"paddw %%xmm1,%%xmm0 \n"
"paddusw %%xmm1,%%xmm0 \n"
"psrlw $0x8,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movq %%xmm0,(%0,%1,1) \n"
"movd %%xmm0,%3 \n"
"mov %3,(%0,%1,1) \n"
"lea 0x8(%0),%0 \n"
"sub $0x2,%2 \n"
"jbe 8f \n" // last1
"jle 8f \n" // last1
"mov (%0),%3 \n"
"cmp $0xffffff,%3 \n"
"jbe 1b \n" // transparentloop
"cmp $0xff000000,%3 \n"
"jb 3b \n" // translucientloop
"jb 3b \n" // translucentloop
"jmp 2b \n" // opaqueloop
// last1
"8: \n"
"add $0x1,%2 \n"
"je 9f \n" // done
"add $0x1,%2 \n" // 1 pixel left?
"cmp $0x1,%2 \n"
"jl 9f \n" // done
"mov (%0),%3 \n"
"movd %3,%%xmm0 \n"
"mov (%0,%1,1),%3 \n"
"movd %3,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
"punpcklbw %%xmm1,%%xmm1 \n"
"pshuflw $0xff,%%xmm0,%%xmm2 \n"
"pshufhw $0xff,%%xmm2,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"pxor %%xmm4,%%xmm3 \n"
"pmulhuw %%xmm2,%%xmm0 \n"
"pmulhuw %%xmm3,%%xmm1 \n"
"paddw %%xmm1,%%xmm0 \n"
"paddusw %%xmm1,%%xmm0 \n"
"psrlw $0x8,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movd %%xmm0,%3 \n"
......
......@@ -477,7 +477,6 @@ __asm {
}
}
// TODO(fbarchard): Port to gcc
__declspec(naked)
void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
......@@ -1965,40 +1964,42 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
mov edx, [esp + 4 + 8] // dst_argb
mov ecx, [esp + 4 + 12] // width
pcmpeqb xmm4, xmm4 // generate 0xffffffff do negative alpha
pcmpeqb xmm5, xmm5 // generate 0xff000000 for alpha
pslld xmm5, 24
sub edx, esi
mov eax, [esi] // get first pixel
sub ecx, 1 // ensure there are at least 2 pixels
je last1 // last pixel?
jle last1 // last pixel?
cmp eax, 0xFF000000 // opaque?
jae opaqueloop
cmp eax, 0x00FFFFFF // translucient?
ja translucientloop
cmp eax, 0x00FFFFFF // translucent?
ja translucentloop
align 16
transparentloop:
sub ecx, 1
lea esi, [esi + 4]
je last1
mov eax, [esi] // handle remaining pixel
jle last1
mov eax, [esi] // get next pixel
cmp eax, 0x00FFFFFF // transparent?
jbe transparentloop
cmp eax, 0xFF000000 // translucient?
jb translucientloop
cmp eax, 0xFF000000 // translucent?
jb translucentloop
align 16
opaqueloop:
mov dword ptr [esi + edx], eax
lea esi, [esi + 4]
sub ecx, 1
je last1
mov eax, [esi] // handle remaining pixel
jle last1
mov eax, [esi] // get next pixel
cmp eax, 0xFF000000 // opaque?
jae opaqueloop
cmp eax, 0x00FFFFFF // transparent?
jbe transparentloop
align 16
translucientloop:
translucentloop:
movq xmm0, qword ptr [esi] // fetch 2 pixels
movq xmm1, qword ptr [esi + edx]
punpcklbw xmm0, xmm0 // src 16 bits
......@@ -2009,39 +2010,42 @@ void ARGBBlendRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
pxor xmm3, xmm4
pmulhuw xmm0, xmm2 // src * a
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
paddw xmm0, xmm1
paddusw xmm0, xmm1
psrlw xmm0, 8
packuswb xmm0, xmm0 // pack 2 pixels
por xmm0, xmm5 // set alpha
movq qword ptr [esi + edx], xmm0
lea esi, [esi + 8]
sub ecx, 2
jbe last1
mov eax, [esi] // handle remaining pixel
jle last1
mov eax, [esi]
cmp eax, 0x00FFFFFF // transparent?
jbe transparentloop
cmp eax, 0xFF000000 // translucient?
jb translucientloop
cmp eax, 0xFF000000 // translucent?
jb translucentloop
jmp opaqueloop
align 16
last1:
add ecx, 1
je done
cmp ecx, 1 // 1 left?
jl done
mov eax, [esi] // get next pixel
movd xmm0, eax
mov eax, [esi + edx]
movd xmm1, eax
punpcklbw xmm0, xmm0 // src 16 bits
punpcklbw xmm1, xmm1 // dst 16 bits
pshuflw xmm2, xmm0, 0xff // src alpha
pshufhw xmm2, xmm2, 0xff
movdqa xmm3, xmm2 // dst alpha
pxor xmm3, xmm4
pmulhuw xmm0, xmm2 // src * a
pmulhuw xmm1, xmm3 // dst * (a ^ 0xffff)
paddw xmm0, xmm1
paddusw xmm0, xmm1
psrlw xmm0, 8
packuswb xmm0, xmm0 // pack to bytes
por xmm0, xmm5 // set alpha
movd eax, xmm0
mov dword ptr [esi + edx], eax
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment