Commit 20828059 authored by fbarchard@google.com's avatar fbarchard@google.com

Remove vmovdqa from UV code

BUG=181
TESTED=c:\intelsde\sde -hsw -- out\release\libyuv_unittest.exe --gtest_filter=*ARGBToI420*
Review URL: https://webrtc-codereview.appspot.com/1091010

git-svn-id: http://libyuv.googlecode.com/svn/trunk@567 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 551d2b29
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 566 Version: 567
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 566 #define LIBYUV_VERSION 567
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -797,10 +797,6 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -797,10 +797,6 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) {
lea edx, [edx + 32] lea edx, [edx + 32]
jg convertloop jg convertloop
ret ret
vphaddw ymm0, ymm0, ymm1
vpermq ymm0, ymm0, 0xd8
vpackuswb ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 0xd8
} }
} }
#endif // HAS_ARGBTOYROW_AVX2 #endif // HAS_ARGBTOYROW_AVX2
...@@ -1176,39 +1172,34 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, ...@@ -1176,39 +1172,34 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
vpavgb ymm2, ymm2, [eax + esi + 64] vpavgb ymm2, ymm2, [eax + esi + 64]
vpavgb ymm3, ymm3, [eax + esi + 96] vpavgb ymm3, ymm3, [eax + esi + 96]
lea eax, [eax + 128] lea eax, [eax + 128]
vmovdqa ymm4, ymm0 // TODO(fbarchard): Remove. vshufps ymm4, ymm0, ymm1, 0x88
vshufps ymm0, ymm0, ymm1, 0x88 vshufps ymm0, ymm0, ymm1, 0xdd
vshufps ymm4, ymm4, ymm1, 0xdd
vpavgb ymm0, ymm0, ymm4 vpavgb ymm0, ymm0, ymm4
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove. vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
vmovdqa ymm4, ymm2 // TODO(fbarchard): Remove. vshufps ymm4, ymm2, ymm3, 0x88
vshufps ymm2, ymm2, ymm3, 0x88 vshufps ymm2, ymm2, ymm3, 0xdd
vshufps ymm4, ymm4, ymm3, 0xdd
vpavgb ymm2, ymm2, ymm4 vpavgb ymm2, ymm2, ymm4
vpermq ymm2, ymm2, 0xd8 // TODO(fbarchard): Remove. vpermq ymm2, ymm2, 0xd8 // TODO(fbarchard): Remove.
// step 2 - convert to U and V // step 2 - convert to U and V
// from here down is very similar to Y code except // from here down is very similar to Y code except
// instead of 32 different pixels, its 16 pixels of U and 16 of V // instead of 32 different pixels, its 16 pixels of U and 16 of V
vmovdqa ymm1, ymm0 // TODO(fbarchard): Remove. vpmaddubsw ymm1, ymm0, ymm7 // U
vmovdqa ymm3, ymm2 // TODO(fbarchard): Remove. vpmaddubsw ymm3, ymm2, ymm7
vpmaddubsw ymm0, ymm0, ymm7 // U vpmaddubsw ymm0, ymm0, ymm6 // V
vpmaddubsw ymm2, ymm2, ymm7 vpmaddubsw ymm2, ymm2, ymm6
vpmaddubsw ymm1, ymm1, ymm6 // V
vpmaddubsw ymm3, ymm3, ymm6
vphaddw ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
vphaddw ymm1, ymm1, ymm3 vphaddw ymm1, ymm1, ymm3
vpermq ymm1, ymm1, 0xd8 // TODO(fbarchard): Remove. vpermq ymm1, ymm1, 0xd8 // TODO(fbarchard): Remove.
vpsraw ymm0, ymm0, 8 vphaddw ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
vpsraw ymm1, ymm1, 8 vpsraw ymm1, ymm1, 8
vpacksswb ymm0, ymm0, ymm1 vpsraw ymm0, ymm0, 8
vpacksswb ymm0, ymm1, ymm0
vpermq ymm0, ymm0, 0xd8 vpermq ymm0, ymm0, 0xd8
vpaddb ymm0, ymm0, ymm5 // -> unsigned vpaddb ymm0, ymm0, ymm5 // -> unsigned
// step 3 - store 16 U and 16 V values // step 3 - store 16 U and 16 V values
sub ecx, 32 sub ecx, 32
vmovdqa ymm1, ymm0
vextractf128 qword ptr [edx], ymm0, 0 // U vextractf128 qword ptr [edx], ymm0, 0 // U
vextractf128 qword ptr [edx + edi], ymm0, 1 // V vextractf128 qword ptr [edx + edi], ymm0, 1 // V
lea edx, [edx + 16] lea edx, [edx + 16]
...@@ -1320,39 +1311,34 @@ void ARGBToUVRow_Unaligned_AVX2(const uint8* src_argb0, int src_stride_argb, ...@@ -1320,39 +1311,34 @@ void ARGBToUVRow_Unaligned_AVX2(const uint8* src_argb0, int src_stride_argb,
vpavgb ymm2, ymm2, [eax + esi + 64] vpavgb ymm2, ymm2, [eax + esi + 64]
vpavgb ymm3, ymm3, [eax + esi + 96] vpavgb ymm3, ymm3, [eax + esi + 96]
lea eax, [eax + 128] lea eax, [eax + 128]
vmovdqa ymm4, ymm0 vshufps ymm4, ymm0, ymm1, 0x88
vshufps ymm0, ymm0, ymm1, 0x88 vshufps ymm0, ymm0, ymm1, 0xdd
vshufps ymm4, ymm4, ymm1, 0xdd
vpavgb ymm0, ymm0, ymm4 vpavgb ymm0, ymm0, ymm4
vpermq ymm0, ymm0, 0xd8 vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
vmovdqa ymm4, ymm2 vshufps ymm4, ymm2, ymm3, 0x88
vshufps ymm2, ymm2, ymm3, 0x88 vshufps ymm2, ymm2, ymm3, 0xdd
vshufps ymm4, ymm4, ymm3, 0xdd
vpavgb ymm2, ymm2, ymm4 vpavgb ymm2, ymm2, ymm4
vpermq ymm2, ymm2, 0xd8 vpermq ymm2, ymm2, 0xd8 // TODO(fbarchard): Remove.
// step 2 - convert to U and V // step 2 - convert to U and V
// from here down is very similar to Y code except // from here down is very similar to Y code except
// instead of 32 different pixels, its 16 pixels of U and 16 of V // instead of 32 different pixels, its 16 pixels of U and 16 of V
vmovdqa ymm1, ymm0 vpmaddubsw ymm1, ymm0, ymm7 // U
vmovdqa ymm3, ymm2 vpmaddubsw ymm3, ymm2, ymm7
vpmaddubsw ymm0, ymm0, ymm7 // U vpmaddubsw ymm0, ymm0, ymm6 // V
vpmaddubsw ymm2, ymm2, ymm7 vpmaddubsw ymm2, ymm2, ymm6
vpmaddubsw ymm1, ymm1, ymm6 // V
vpmaddubsw ymm3, ymm3, ymm6
vphaddw ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 0xd8
vphaddw ymm1, ymm1, ymm3 vphaddw ymm1, ymm1, ymm3
vpermq ymm1, ymm1, 0xd8 vpermq ymm1, ymm1, 0xd8 // TODO(fbarchard): Remove.
vpsraw ymm0, ymm0, 8 vphaddw ymm0, ymm0, ymm2
vpermq ymm0, ymm0, 0xd8 // TODO(fbarchard): Remove.
vpsraw ymm1, ymm1, 8 vpsraw ymm1, ymm1, 8
vpacksswb ymm0, ymm0, ymm1 vpsraw ymm0, ymm0, 8
vpacksswb ymm0, ymm1, ymm0
vpermq ymm0, ymm0, 0xd8 vpermq ymm0, ymm0, 0xd8
vpaddb ymm0, ymm0, ymm5 // -> unsigned vpaddb ymm0, ymm0, ymm5 // -> unsigned
// step 3 - store 16 U and 16 V values // step 3 - store 16 U and 16 V values
sub ecx, 32 sub ecx, 32
vmovdqa ymm1, ymm0
vextractf128 qword ptr [edx], ymm0, 0 // U vextractf128 qword ptr [edx], ymm0, 0 // U
vextractf128 qword ptr [edx + edi], ymm0, 1 // V vextractf128 qword ptr [edx + edi], ymm0, 1 // V
lea edx, [edx + 16] lea edx, [edx + 16]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment