Commit f5f5d15d authored by fbarchard@google.com's avatar fbarchard@google.com

Fix register order for ARGBToUV_AVX2

BUG=269
TESTED=try bots
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/29249004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1200 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 11c30157
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1198 Version: 1200
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -189,7 +189,7 @@ extern "C" { ...@@ -189,7 +189,7 @@ extern "C" {
#define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2
//#define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYROW_AVX2
#define HAS_COPYROW_AVX #define HAS_COPYROW_AVX
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1198 #define LIBYUV_VERSION 1200
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1006,9 +1006,9 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, ...@@ -1006,9 +1006,9 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
"+r"(dst_v), // %2 "+r"(dst_v), // %2
"+rm"(width) // %3 "+rm"(width) // %3
: "r"((intptr_t)(src_stride_argb)), // %4 : "r"((intptr_t)(src_stride_argb)), // %4
"m"(kARGBToV), // %5 "m"(kAddUV128), // %5
"m"(kARGBToU), // %6 "m"(kARGBToV), // %6
"m"(kAddUV128), // %7 "m"(kARGBToU), // %7
"m"(kShufARGBToUV_AVX) // %8 "m"(kShufARGBToUV_AVX) // %8
: "memory", "cc" : "memory", "cc"
#if defined(__native_client__) && defined(__x86_64__) #if defined(__native_client__) && defined(__x86_64__)
......
...@@ -847,8 +847,8 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -847,8 +847,8 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kBGRAToY movdqa xmm4, kBGRAToY
movdqa xmm5, kAddY16
convertloop: convertloop:
movdqu xmm0, [eax] movdqu xmm0, [eax]
...@@ -880,8 +880,8 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -880,8 +880,8 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kABGRToY movdqa xmm4, kABGRToY
movdqa xmm5, kAddY16
convertloop: convertloop:
movdqu xmm0, [eax] movdqu xmm0, [eax]
...@@ -913,8 +913,8 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { ...@@ -913,8 +913,8 @@ void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
mov eax, [esp + 4] /* src_argb */ mov eax, [esp + 4] /* src_argb */
mov edx, [esp + 8] /* dst_y */ mov edx, [esp + 8] /* dst_y */
mov ecx, [esp + 12] /* pix */ mov ecx, [esp + 12] /* pix */
movdqa xmm5, kAddY16
movdqa xmm4, kRGBAToY movdqa xmm4, kRGBAToY
movdqa xmm5, kAddY16
convertloop: convertloop:
movdqu xmm0, [eax] movdqu xmm0, [eax]
...@@ -951,9 +951,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -951,9 +951,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
...@@ -1021,9 +1021,9 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -1021,9 +1021,9 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kARGBToUJ
movdqa xmm6, kARGBToVJ
movdqa xmm5, kAddUVJ128 movdqa xmm5, kAddUVJ128
movdqa xmm6, kARGBToVJ
movdqa xmm7, kARGBToUJ
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
...@@ -1156,9 +1156,9 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, ...@@ -1156,9 +1156,9 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0,
mov edx, [esp + 4 + 8] // dst_u mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
...@@ -1213,9 +1213,9 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0, ...@@ -1213,9 +1213,9 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0,
mov edx, [esp + 4 + 8] // dst_u mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix mov ecx, [esp + 4 + 16] // pix
movdqa xmm7, kARGBToU
movdqa xmm6, kARGBToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kARGBToV
movdqa xmm7, kARGBToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
...@@ -1273,9 +1273,9 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -1273,9 +1273,9 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kBGRAToU
movdqa xmm6, kBGRAToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kBGRAToV
movdqa xmm7, kBGRAToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
...@@ -1343,9 +1343,9 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -1343,9 +1343,9 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kABGRToU
movdqa xmm6, kABGRToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kABGRToV
movdqa xmm7, kABGRToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
...@@ -1413,9 +1413,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -1413,9 +1413,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
mov edx, [esp + 8 + 12] // dst_u mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, kRGBAToU
movdqa xmm6, kRGBAToV
movdqa xmm5, kAddUV128 movdqa xmm5, kAddUV128
movdqa xmm6, kRGBAToV
movdqa xmm7, kRGBAToU
sub edi, edx // stride from u to v sub edi, edx // stride from u to v
convertloop: convertloop:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment