Commit 3c4f5735 authored by Frank Barchard's avatar Frank Barchard

use pointer to inverse table for clangcl

R=harryjin@google.com
TBR=harryjin@google.com
BUG=none

Review URL: https://webrtc-codereview.appspot.com/54859004.
parent 5452cce4
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1473 Version: 1474
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1473 #define LIBYUV_VERSION 1474
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -4331,19 +4331,21 @@ __declspec(naked) ...@@ -4331,19 +4331,21 @@ __declspec(naked)
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) { int width) {
__asm { __asm {
push ebx
push esi push esi
push edi push edi
mov eax, [esp + 8 + 4] // src_argb0 mov eax, [esp + 12 + 4] // src_argb
mov edx, [esp + 8 + 8] // dst_argb mov edx, [esp + 12 + 8] // dst_argb
mov ecx, [esp + 8 + 12] // width mov ecx, [esp + 12 + 12] // width
lea ebx, fixed_invtbl8
convertloop: convertloop:
movdqu xmm0, [eax] // read 4 pixels movdqu xmm0, [eax] // read 4 pixels
movzx esi, byte ptr [eax + 3] // first alpha movzx esi, byte ptr [eax + 3] // first alpha
movzx edi, byte ptr [eax + 7] // second alpha movzx edi, byte ptr [eax + 7] // second alpha
punpcklbw xmm0, xmm0 // first 2 punpcklbw xmm0, xmm0 // first 2
movd xmm2, dword ptr [fixed_invtbl8 + esi * 4] movd xmm2, dword ptr [ebx + esi * 4]
movd xmm3, dword ptr [fixed_invtbl8 + edi * 4] movd xmm3, dword ptr [ebx + edi * 4]
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
movlhps xmm2, xmm3 movlhps xmm2, xmm3
...@@ -4353,21 +4355,22 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -4353,21 +4355,22 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
movzx esi, byte ptr [eax + 11] // third alpha movzx esi, byte ptr [eax + 11] // third alpha
movzx edi, byte ptr [eax + 15] // forth alpha movzx edi, byte ptr [eax + 15] // forth alpha
punpckhbw xmm1, xmm1 // next 2 punpckhbw xmm1, xmm1 // next 2
movd xmm2, dword ptr [fixed_invtbl8 + esi * 4] movd xmm2, dword ptr [ebx + esi * 4]
movd xmm3, dword ptr [fixed_invtbl8 + edi * 4] movd xmm3, dword ptr [ebx + edi * 4]
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
movlhps xmm2, xmm3 movlhps xmm2, xmm3
pmulhuw xmm1, xmm2 // rgb * a pmulhuw xmm1, xmm2 // rgb * a
lea eax, [eax + 16] lea eax, [eax + 16]
packuswb xmm0, xmm1 packuswb xmm0, xmm1
movdqu [edx], xmm0 movdqu [edx], xmm0
lea edx, [edx + 16] lea edx, [edx + 16]
sub ecx, 4 sub ecx, 4
jg convertloop jg convertloop
pop edi pop edi
pop esi pop esi
pop ebx
ret ret
} }
} }
...@@ -4420,36 +4423,37 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, ...@@ -4420,36 +4423,37 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) { int width) {
__asm { __asm {
mov eax, [esp + 4] // src_argb0 push ebx
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
push esi push esi
push edi push edi
mov eax, [esp + 12 + 4] // src_argb
mov edx, [esp + 12 + 8] // dst_argb
mov ecx, [esp + 12 + 12] // width
sub edx, eax
lea ebx, fixed_invtbl8
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
convertloop: convertloop:
// replace VPGATHER // replace VPGATHER
movzx esi, byte ptr [eax + 3] // alpha0 movzx esi, byte ptr [eax + 3] // alpha0
movzx edi, byte ptr [eax + 7] // alpha1 movzx edi, byte ptr [eax + 7] // alpha1
vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a0] vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0]
vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a1] vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
movzx esi, byte ptr [eax + 11] // alpha2 movzx esi, byte ptr [eax + 11] // alpha2
movzx edi, byte ptr [eax + 15] // alpha3 movzx edi, byte ptr [eax + 15] // alpha3
vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a2] vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2]
vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a3] vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3]
movzx esi, byte ptr [eax + 19] // alpha4 movzx esi, byte ptr [eax + 19] // alpha4
movzx edi, byte ptr [eax + 23] // alpha5 movzx edi, byte ptr [eax + 23] // alpha5
vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a4] vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4]
vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a5] vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
movzx esi, byte ptr [eax + 27] // alpha6 movzx esi, byte ptr [eax + 27] // alpha6
movzx edi, byte ptr [eax + 31] // alpha7 movzx edi, byte ptr [eax + 31] // alpha7
vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a6] vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6]
vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a7] vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7]
vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
...@@ -4473,6 +4477,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, ...@@ -4473,6 +4477,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
pop edi pop edi
pop esi pop esi
pop ebx
vzeroupper vzeroupper
ret ret
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment