Commit d4594bee authored by Frank Barchard's avatar Frank Barchard

switch from ebp to ebx.

ebx encodes more efficiently (1 byte less) for most address modes, than ebp.
previously it was used for 411 format, but the reader uses pinsrw now avoiding
gpr register.

BUG=libyuv:488
R=harryjin@google.com

Review URL: https://codereview.chromium.org/1365003003 .
parent 8fb2048e
...@@ -40,16 +40,16 @@ extern "C" { ...@@ -40,16 +40,16 @@ extern "C" {
y_buf += 8; \ y_buf += 8; \
// Convert 8 pixels: 8 UV and 8 Y. // Convert 8 pixels: 8 UV and 8 Y.
#define YUVTORGB(YuvConstants) \ #define YUVTORGB(yuvconstants) \
xmm1 = _mm_loadu_si128(&xmm0); \ xmm1 = _mm_loadu_si128(&xmm0); \
xmm2 = _mm_loadu_si128(&xmm0); \ xmm2 = _mm_loadu_si128(&xmm0); \
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \
xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \
xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \
xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \ xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \
xmm0 = _mm_adds_epi16(xmm0, xmm4); \ xmm0 = _mm_adds_epi16(xmm0, xmm4); \
xmm1 = _mm_adds_epi16(xmm1, xmm4); \ xmm1 = _mm_adds_epi16(xmm1, xmm4); \
xmm2 = _mm_adds_epi16(xmm2, xmm4); \ xmm2 = _mm_adds_epi16(xmm2, xmm4); \
...@@ -2030,25 +2030,25 @@ void I422ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2030,25 +2030,25 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREARGB_AVX2 STOREARGB_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2070,24 +2070,24 @@ void I444ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2070,24 +2070,24 @@ void I444ToARGBRow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV444_AVX2 READYUV444_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREARGB_AVX2 STOREARGB_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2109,24 +2109,24 @@ void I444ToABGRRow_AVX2(const uint8* y_buf, ...@@ -2109,24 +2109,24 @@ void I444ToABGRRow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV444_AVX2 READYUV444_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREABGR_AVX2 STOREABGR_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2148,25 +2148,25 @@ void I411ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2148,25 +2148,25 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV411_AVX2 READYUV411_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREARGB_AVX2 STOREARGB_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2186,23 +2186,23 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2186,23 +2186,23 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
int width) { int width) {
__asm { __asm {
push esi push esi
push ebp push ebx
mov eax, [esp + 8 + 4] // Y mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // UV mov esi, [esp + 8 + 8] // UV
mov edx, [esp + 8 + 12] // argb mov edx, [esp + 8 + 12] // argb
mov ebp, [esp + 8 + 16] // yuvconstants mov ebx, [esp + 8 + 16] // yuvconstants
mov ecx, [esp + 8 + 20] // width mov ecx, [esp + 8 + 20] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READNV12_AVX2 READNV12_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREARGB_AVX2 STOREARGB_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop esi pop esi
vzeroupper vzeroupper
ret ret
...@@ -2218,22 +2218,22 @@ void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, ...@@ -2218,22 +2218,22 @@ void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
struct YuvConstants* yuvconstants, struct YuvConstants* yuvconstants,
int width) { int width) {
__asm { __asm {
push ebp push ebx
mov eax, [esp + 4 + 4] // yuy2 mov eax, [esp + 4 + 4] // yuy2
mov edx, [esp + 4 + 8] // argb mov edx, [esp + 4 + 8] // argb
mov ebp, [esp + 4 + 12] // yuvconstants mov ebx, [esp + 4 + 12] // yuvconstants
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUY2_AVX2 READYUY2_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREARGB_AVX2 STOREARGB_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
vzeroupper vzeroupper
ret ret
} }
...@@ -2247,22 +2247,22 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy, ...@@ -2247,22 +2247,22 @@ void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
struct YuvConstants* yuvconstants, struct YuvConstants* yuvconstants,
int width) { int width) {
__asm { __asm {
push ebp push ebx
mov eax, [esp + 4 + 4] // uyvy mov eax, [esp + 4 + 4] // uyvy
mov edx, [esp + 4 + 8] // argb mov edx, [esp + 4 + 8] // argb
mov ebp, [esp + 4 + 12] // yuvconstants mov ebx, [esp + 4 + 12] // yuvconstants
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READUYVY_AVX2 READUYVY_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREARGB_AVX2 STOREARGB_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
vzeroupper vzeroupper
ret ret
} }
...@@ -2283,25 +2283,25 @@ void I422ToBGRARow_AVX2(const uint8* y_buf, ...@@ -2283,25 +2283,25 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREBGRA_AVX2 STOREBGRA_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2323,25 +2323,25 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, ...@@ -2323,25 +2323,25 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STORERGBA_AVX2 STORERGBA_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2363,25 +2363,25 @@ void I422ToABGRRow_AVX2(const uint8* y_buf, ...@@ -2363,25 +2363,25 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2(ebp) YUVTORGB_AVX2(ebx)
STOREABGR_AVX2 STOREABGR_AVX2
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
vzeroupper vzeroupper
...@@ -2615,25 +2615,25 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2615,25 +2615,25 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUV444 READYUV444
YUVTORGB(ebp) YUVTORGB(ebx)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2652,25 +2652,25 @@ void I444ToABGRRow_SSSE3(const uint8* y_buf, ...@@ -2652,25 +2652,25 @@ void I444ToABGRRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUV444 READYUV444
YUVTORGB(ebp) YUVTORGB(ebx)
STOREABGR STOREABGR
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2689,12 +2689,12 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, ...@@ -2689,12 +2689,12 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
...@@ -2702,13 +2702,13 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, ...@@ -2702,13 +2702,13 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STORERGB24 STORERGB24
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2727,12 +2727,12 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf, ...@@ -2727,12 +2727,12 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
...@@ -2740,13 +2740,13 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf, ...@@ -2740,13 +2740,13 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STORERAW STORERAW
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2765,12 +2765,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, ...@@ -2765,12 +2765,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate mask 0x0000001f pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
...@@ -2783,13 +2783,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, ...@@ -2783,13 +2783,13 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STORERGB565 STORERGB565
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2808,25 +2808,25 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2808,25 +2808,25 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2846,25 +2846,25 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2846,25 +2846,25 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // abgr mov edx, [esp + 12 + 16] // abgr
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUV411 READYUV411
YUVTORGB(ebp) YUVTORGB(ebx)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -2881,23 +2881,23 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2881,23 +2881,23 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
int width) { int width) {
__asm { __asm {
push esi push esi
push ebp push ebx
mov eax, [esp + 8 + 4] // Y mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // UV mov esi, [esp + 8 + 8] // UV
mov edx, [esp + 8 + 12] // argb mov edx, [esp + 8 + 12] // argb
mov ebp, [esp + 8 + 16] // yuvconstants mov ebx, [esp + 8 + 16] // yuvconstants
mov ecx, [esp + 8 + 20] // width mov ecx, [esp + 8 + 20] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READNV12 READNV12
YUVTORGB(ebp) YUVTORGB(ebx)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop esi pop esi
ret ret
} }
...@@ -2911,22 +2911,22 @@ void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, ...@@ -2911,22 +2911,22 @@ void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
struct YuvConstants* yuvconstants, struct YuvConstants* yuvconstants,
int width) { int width) {
__asm { __asm {
push ebp push ebx
mov eax, [esp + 4 + 4] // yuy2 mov eax, [esp + 4 + 4] // yuy2
mov edx, [esp + 4 + 8] // argb mov edx, [esp + 4 + 8] // argb
mov ebp, [esp + 4 + 12] // yuvconstants mov ebx, [esp + 4 + 12] // yuvconstants
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUY2 READYUY2
YUVTORGB(ebp) YUVTORGB(ebx)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
ret ret
} }
} }
...@@ -2939,22 +2939,22 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, ...@@ -2939,22 +2939,22 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
struct YuvConstants* yuvconstants, struct YuvConstants* yuvconstants,
int width) { int width) {
__asm { __asm {
push ebp push ebx
mov eax, [esp + 4 + 4] // uyvy mov eax, [esp + 4 + 4] // uyvy
mov edx, [esp + 4 + 8] // argb mov edx, [esp + 4 + 8] // argb
mov ebp, [esp + 4 + 12] // yuvconstants mov ebx, [esp + 4 + 12] // yuvconstants
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READUYVY READUYVY
YUVTORGB(ebp) YUVTORGB(ebx)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
ret ret
} }
} }
...@@ -2969,24 +2969,24 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf, ...@@ -2969,24 +2969,24 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STOREBGRA STOREBGRA
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -3003,25 +3003,25 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf, ...@@ -3003,25 +3003,25 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STOREABGR STOREABGR
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
...@@ -3038,24 +3038,24 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -3038,24 +3038,24 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
push edi push edi
push ebp push ebx
mov eax, [esp + 12 + 4] // Y mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // yuvconstants mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width mov ecx, [esp + 12 + 24] // width
sub edi, esi sub edi, esi
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB(ebp) YUVTORGB(ebx)
STORERGBA STORERGBA
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp pop ebx
pop edi pop edi
pop esi pop esi
ret ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment