Commit 3c4f5735 authored by Frank Barchard's avatar Frank Barchard

use pointer to inverse table for clangcl

R=harryjin@google.com
TBR=harryjin@google.com
BUG=none

Review URL: https://webrtc-codereview.appspot.com/54859004.
parent 5452cce4
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1473
Version: 1474
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1473
#define LIBYUV_VERSION 1474
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -4331,19 +4331,21 @@ __declspec(naked)
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
push ebx
push esi
push edi
mov eax, [esp + 8 + 4] // src_argb0
mov edx, [esp + 8 + 8] // dst_argb
mov ecx, [esp + 8 + 12] // width
mov eax, [esp + 12 + 4] // src_argb
mov edx, [esp + 12 + 8] // dst_argb
mov ecx, [esp + 12 + 12] // width
lea ebx, fixed_invtbl8
convertloop:
movdqu xmm0, [eax] // read 4 pixels
movzx esi, byte ptr [eax + 3] // first alpha
movzx edi, byte ptr [eax + 7] // second alpha
punpcklbw xmm0, xmm0 // first 2
movd xmm2, dword ptr [fixed_invtbl8 + esi * 4]
movd xmm3, dword ptr [fixed_invtbl8 + edi * 4]
movd xmm2, dword ptr [ebx + esi * 4]
movd xmm3, dword ptr [ebx + edi * 4]
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
movlhps xmm2, xmm3
......@@ -4353,21 +4355,22 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
movzx esi, byte ptr [eax + 11] // third alpha
movzx edi, byte ptr [eax + 15] // forth alpha
punpckhbw xmm1, xmm1 // next 2
movd xmm2, dword ptr [fixed_invtbl8 + esi * 4]
movd xmm3, dword ptr [fixed_invtbl8 + edi * 4]
movd xmm2, dword ptr [ebx + esi * 4]
movd xmm3, dword ptr [ebx + edi * 4]
pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words
pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words
movlhps xmm2, xmm3
pmulhuw xmm1, xmm2 // rgb * a
lea eax, [eax + 16]
packuswb xmm0, xmm1
movdqu [edx], xmm0
lea edx, [edx + 16]
sub ecx, 4
jg convertloop
pop edi
pop esi
pop ebx
ret
}
}
......@@ -4420,36 +4423,37 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
int width) {
__asm {
mov eax, [esp + 4] // src_argb0
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
sub edx, eax
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
push ebx
push esi
push edi
mov eax, [esp + 12 + 4] // src_argb
mov edx, [esp + 12 + 8] // dst_argb
mov ecx, [esp + 12 + 12] // width
sub edx, eax
lea ebx, fixed_invtbl8
vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2
convertloop:
// replace VPGATHER
movzx esi, byte ptr [eax + 3] // alpha0
movzx edi, byte ptr [eax + 7] // alpha1
vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a0]
vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a1]
vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0]
vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
movzx esi, byte ptr [eax + 11] // alpha2
movzx edi, byte ptr [eax + 15] // alpha3
vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a2]
vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a3]
vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2]
vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3]
movzx esi, byte ptr [eax + 19] // alpha4
movzx edi, byte ptr [eax + 23] // alpha5
vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2]
vmovd xmm0, dword ptr [fixed_invtbl8 + esi * 4] // [1,a4]
vmovd xmm1, dword ptr [fixed_invtbl8 + edi * 4] // [1,a5]
vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4]
vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
movzx esi, byte ptr [eax + 27] // alpha6
movzx edi, byte ptr [eax + 31] // alpha7
vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
vmovd xmm2, dword ptr [fixed_invtbl8 + esi * 4] // [1,a6]
vmovd xmm3, dword ptr [fixed_invtbl8 + edi * 4] // [1,a7]
vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6]
vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7]
vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6]
vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0]
vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4]
......@@ -4473,6 +4477,7 @@ void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
pop edi
pop esi
pop ebx
vzeroupper
ret
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment