Commit 925c3d9e authored by Frank Barchard's avatar Frank Barchard

I420ToARGB conversion with matrix.

Take color conversion constants as a parameter to row function for I420ToARGBMatrixRow_SSSE3.
Allows future variations of color space using a single low level.

R=harryjin@google.com
BUG=libyuv:488

Review URL: https://webrtc-codereview.appspot.com/56669004 .
parent 0bc626a5
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1475
Version: 1476
License: BSD
License File: LICENSE
......
......@@ -88,6 +88,7 @@ extern "C" {
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOARGBMATRIXROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I422TORAWROW_SSSE3
#define HAS_I422TORGB24ROW_SSSE3
......@@ -161,6 +162,7 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
(!defined(__clang__) || defined(__SSSE3__))
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOARGBMATRIXROW_SSSE3
#endif
// GCC >= 4.7.0 required for AVX2.
......@@ -223,6 +225,7 @@ extern "C" {
#define HAS_I400TOARGBROW_AVX2
#define HAS_I422TOABGRROW_AVX2
#define HAS_I422TOARGBROW_AVX2
#define HAS_I422TOARGBMATRIXROW_AVX2
#define HAS_I422TOBGRAROW_AVX2
#define HAS_I422TORAWROW_AVX2
#define HAS_I422TORGB24ROW_AVX2
......@@ -290,6 +293,8 @@ extern "C" {
#define HAS_I422TOARGB1555ROW_NEON
#define HAS_I422TOARGB4444ROW_NEON
#define HAS_I422TOARGBROW_NEON
// TODO(fbarchard): Implement NEON version
#define HAS_I422TOARGBMATRIXROW_NEON
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TORAWROW_NEON
#define HAS_I422TORGB24ROW_NEON
......@@ -414,6 +419,21 @@ typedef uint32 ulvec32[8];
typedef uint8 ulvec8[32];
#endif
// This struct is for Intel color conversion.
#if defined(_M_IX86) || defined(_M_X64) || \
defined(__x86_64__) || defined(__i386__)
struct YuvConstants {
lvec8 kUVToB;
lvec8 kUVToG;
lvec8 kUVToR;
lvec16 kUVBiasB;
lvec16 kUVBiasG;
lvec16 kUVBiasR;
lvec16 kYToRgb;
};
#endif
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
#else
......@@ -509,6 +529,12 @@ void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBMatrixRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -962,6 +988,12 @@ void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBMatrixRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I411ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1039,6 +1071,12 @@ void I422ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBMatrixRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I422ToBGRARow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1069,6 +1107,12 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBMatrixRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I411ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1203,6 +1247,12 @@ void I422ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBMatrixRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I422ToBGRARow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1233,6 +1283,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBMatrixRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1463,7 +1519,13 @@ void I422ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width);
void I422ToARGBMatrixRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I411ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1475
#define LIBYUV_VERSION 1476
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -2156,6 +2156,51 @@ void I422ToUYVYRow_C(const uint8* src_y,
}
}
#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
extern struct YuvConstants kYuvConstants;
extern struct YuvConstants kYuvJConstants;
// JPeg color space version of I422ToARGB
void J422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
I422ToARGBMatrixRow_SSSE3(y_buf, u_buf, v_buf, dst_argb,
&kYuvJConstants, width);
}
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
I422ToARGBMatrixRow_SSSE3(y_buf, u_buf, v_buf, dst_argb,
&kYuvConstants, width);
}
#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
// JPeg color space version of I422ToARGB
void J422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
I422ToARGBMatrixRow_AVX2(y_buf, u_buf, v_buf, dst_argb,
&kYuvJConstants, width);
}
void I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
I422ToARGBMatrixRow_AVX2(y_buf, u_buf, v_buf, dst_argb,
&kYuvConstants, width);
}
#endif
#endif
// Maximum temporary width for wrappers to process at a time, in pixels.
#define MAXTWIDTH 2048
......
......@@ -1319,16 +1319,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
struct YuvConstants {
lvec8 kUVToB; // 0
lvec8 kUVToG; // 32
lvec8 kUVToR; // 64
lvec16 kUVBiasB; // 96
lvec16 kUVBiasG; // 128
lvec16 kUVBiasR; // 160
lvec16 kYToRgb; // 192
};
// BT.601 YUV to RGB reference
// R = (Y - 16) * 1.164 - V * -1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
......@@ -1351,7 +1341,7 @@ struct YuvConstants {
#define BR (VR * 128 + YGB)
// BT601 constants for YUV to RGB.
static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
......@@ -1365,7 +1355,7 @@ static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
};
// BT601 constants for NV21 where chroma plane is VU instead of UV.
static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
YuvConstants SIMD_ALIGNED(kYvuConstants) = {
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
......@@ -1658,11 +1648,12 @@ void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
);
}
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
......@@ -1678,33 +1669,7 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
void OMITFP J422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
: [kYuvConstants]"r"(YuvConstants) // %[YuvConstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
......@@ -1939,56 +1904,15 @@ void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
}
#endif // HAS_I422TOBGRAROW_AVX2
#if defined(HAS_I422TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into ARGB
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA
"vpermq $0xd8,%%ymm2,%%ymm2 \n"
"vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels
"vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels
"vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n"
"vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n"
"lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_I422TOARGBROW_AVX2
#if defined(HAS_J422TOARGBROW_AVX2)
#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
......@@ -2016,12 +1940,12 @@ void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf,
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
: [kYuvConstants]"r"(YuvConstants) // %[YuvConstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_J422TOARGBROW_AVX2
#endif // HAS_I422TOARGBMATRIXROW_AVX2
#if defined(HAS_I422TOABGRROW_AVX2)
// 16 pixels
......
......@@ -25,16 +25,6 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
struct YuvConstants {
lvec8 kUVToB;
lvec8 kUVToG;
lvec8 kUVToR;
lvec16 kUVBiasB;
lvec16 kUVBiasG;
lvec16 kUVBiasR;
lvec16 kYToRgb;
};
#define KUVTOB 0
#define KUVTOG 32
#define KUVTOR 64
......@@ -65,7 +55,7 @@ struct YuvConstants {
#define BR (VR * 128 + YGB)
// BT601 constants for YUV to RGB.
static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
......@@ -79,7 +69,7 @@ static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
};
// BT601 constants for NV21 where chroma plane is VU instead of UV.
static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
YuvConstants SIMD_ALIGNED(kYvuConstants) = {
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
......@@ -124,7 +114,7 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
#define BRJ (VRJ * 128 + YGBJ)
// JPEG constants for YUV to RGB.
static YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
{ UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 },
{ UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
......@@ -155,12 +145,13 @@ static YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
// 64 bit
#if defined(_M_X64)
#if defined(HAS_I422TOARGBROW_SSSE3)
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
......@@ -172,15 +163,15 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
xmm1 = _mm_loadu_si128(&xmm0);
xmm2 = _mm_loadu_si128(&xmm0);
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kYuvConstants.kUVToB);
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kYuvConstants.kUVToG);
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kYuvConstants.kUVToR);
xmm0 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasB, xmm0);
xmm1 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasG, xmm1);
xmm2 = _mm_sub_epi16(*(__m128i*)kYuvConstants.kUVBiasR, xmm2);
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB);
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG);
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR);
xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0);
xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1);
xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2);
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
xmm3 = _mm_unpacklo_epi8(xmm3, xmm3);
xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)kYuvConstants.kYToRgb);
xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb);
xmm0 = _mm_adds_epi16(xmm0, xmm3);
xmm1 = _mm_adds_epi16(xmm1, xmm3);
xmm2 = _mm_adds_epi16(xmm2, xmm3);
......@@ -2012,77 +2003,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
__asm lea edx, [edx + 64] \
}
#ifdef HAS_I422TOARGBROW_AVX2
#ifdef HAS_I422TOARGBMATRIXROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
void I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
pop edi
pop esi
vzeroupper
ret
}
}
#endif // HAS_I422TOARGBROW_AVX2
#ifdef HAS_J422TOARGBROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
void J422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 20] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
YUVTORGB_AVX2(kYuvJConstants)
YUVTORGB_AVX2(ebp)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
pop ebp
pop edi
pop esi
vzeroupper
ret
}
}
#endif // HAS_J422TOARGBROW_AVX2
#endif // HAS_I422TOARGBMATRIXROW_AVX2
#ifdef HAS_I444TOARGBROW_AVX2
// 16 pixels
......@@ -2691,11 +2650,12 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
struct YuvConstants* YuvConstants,
int width) {
__asm {
push esi
push edi
......@@ -2704,8 +2664,9 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ecx, [esp + 12 + 20] // width
lea ebp, kYuvConstants
mov ebp, [esp + 12 + 20] // YuvConstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -2724,40 +2685,6 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
}
}
// 8 pixels.
// JPeg color space version of I422ToARGB
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
void J422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READYUV422
YUVTORGB(kYuvJConstants)
STOREARGB
sub ecx, 8
jg convertloop
pop edi
pop esi
ret
}
}
// 8 pixels.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment