Commit 7892ea1f authored by fbarchard@google.com's avatar fbarchard@google.com

Fix for ARGBToUV on AVX2

BUG=269
TESTED=local testing
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/33669004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1202 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent ddee77cd
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1201
Version: 1203
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1201
#define LIBYUV_VERSION 1203
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -983,6 +983,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
"vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
......@@ -995,6 +996,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb,
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpshufb %8,%%ymm0,%%ymm0 \n"
"vpaddb %%ymm5,%%ymm0,%%ymm0 \n"
"vextractf128 $0x0,%%ymm0," MEMACCESS(1) " \n"
VEXTOPMEM(vextractf128,ymm0,0x1,1,2,1) // vextractf128 $0x1,%%ymm0,(%1,%2,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
......
......@@ -1473,7 +1473,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
#endif // HAS_ARGBTOYROW_SSSE3
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
static const lvec8 kUVToB_AVX = {
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB,
......@@ -1502,6 +1501,7 @@ static const lvec16 kUVBiasG_AVX = {
static const lvec16 kUVBiasR_AVX = {
BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR
};
#endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
// Read 8 UV from 422, upsample to 16 UV.
#define READYUV422_AVX2 __asm { \
......@@ -1540,7 +1540,7 @@ static const lvec16 kUVBiasR_AVX = {
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
}
#if defined(HAS_I422TOARGBROW_AVX2)
#ifdef HAS_I422TOARGBROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
......@@ -1584,7 +1584,9 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I422TOARGBROW_AVX2
#ifdef HAS_I422TOBGRAROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
......@@ -1629,7 +1631,9 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I422TOBGRAROW_AVX2
#ifdef HAS_I422TORGBAROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
......@@ -1674,7 +1678,9 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I422TORGBAROW_AVX2
#ifdef HAS_I422TOABGRROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
// TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
......@@ -1719,7 +1725,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
ret
}
}
#endif // HAS_I422TOARGBROW_AVX2
#endif // HAS_I422TOABGRROW_AVX2
#ifdef HAS_I422TOARGBROW_SSSE3
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment