Commit 3bb829a4 authored by fbarchard@google.com's avatar fbarchard@google.com

Add a macro for YUV to RGB on Windows. Allows multiple color matrix structures in the future.

BUG=393
TESTED=local build
R=brucedawson@google.com, harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/38079004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1275 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 97a3850e
...@@ -1499,23 +1499,23 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = { ...@@ -1499,23 +1499,23 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
"punpcklwd %%xmm0,%%xmm0 \n" "punpcklwd %%xmm0,%%xmm0 \n"
// Convert 8 pixels: 8 UV and 8 Y // Convert 8 pixels: 8 UV and 8 Y
#define YUVTORGB(kYuvConstants) \ #define YUVTORGB(YuvConstants) \
"movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm3 \n" \ "movdqa %%xmm0,%%xmm3 \n" \
"movdqa " MEMACCESS2(96, [kYuvConstants]) ",%%xmm0 \n" \ "movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
"pmaddubsw " MEMACCESS([kYuvConstants]) ",%%xmm1 \n" \ "pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
"psubw %%xmm1,%%xmm0 \n" \ "psubw %%xmm1,%%xmm0 \n" \
"movdqa " MEMACCESS2(128, [kYuvConstants]) ",%%xmm1 \n" \ "movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
"pmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2 \n" \ "pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
"psubw %%xmm2,%%xmm1 \n" \ "psubw %%xmm2,%%xmm1 \n" \
"movdqa " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \ "movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
"pmaddubsw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm3 \n" \ "pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
"psubw %%xmm3,%%xmm2 \n" \ "psubw %%xmm3,%%xmm2 \n" \
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
"punpcklbw %%xmm3,%%xmm3 \n" \ "punpcklbw %%xmm3,%%xmm3 \n" \
"pmulhuw " MEMACCESS2(192, [kYuvConstants]) ",%%xmm3 \n" \ "pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
"paddsw %%xmm3,%%xmm0 \n" \ "paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \ "paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \ "paddsw %%xmm3,%%xmm2 \n" \
...@@ -1887,21 +1887,21 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -1887,21 +1887,21 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n"
// Convert 16 pixels: 16 UV and 16 Y. // Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_AVX2(kYuvConstants) \ #define YUVTORGB_AVX2(YuvConstants) \
"vpmaddubsw " MEMACCESS2(64, [kYuvConstants]) ",%%ymm0,%%ymm2 \n" \ "vpmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%ymm0,%%ymm2 \n" \
"vpmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%ymm0,%%ymm1 \n" \ "vpmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%ymm0,%%ymm1 \n" \
"vpmaddubsw " MEMACCESS([kYuvConstants]) ",%%ymm0,%%ymm0 \n" \ "vpmaddubsw " MEMACCESS([YuvConstants]) ",%%ymm0,%%ymm0 \n" \
"vmovdqu " MEMACCESS2(160, [kYuvConstants]) ",%%ymm3 \n" \ "vmovdqu " MEMACCESS2(160, [YuvConstants]) ",%%ymm3 \n" \
"vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \
"vmovdqu " MEMACCESS2(128, [kYuvConstants]) ",%%ymm2 \n" \ "vmovdqu " MEMACCESS2(128, [YuvConstants]) ",%%ymm2 \n" \
"vpsubw %%ymm1,%%ymm2,%%ymm1 \n" \ "vpsubw %%ymm1,%%ymm2,%%ymm1 \n" \
"vmovdqu " MEMACCESS2(96, [kYuvConstants]) ",%%ymm1 \n" \ "vmovdqu " MEMACCESS2(96, [YuvConstants]) ",%%ymm1 \n" \
"vpsubw %%ymm0,%%ymm1,%%ymm0 \n" \ "vpsubw %%ymm0,%%ymm1,%%ymm0 \n" \
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \ "vmovdqu " MEMACCESS([y_buf]) ",%%xmm3 \n" \
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \ "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
"vpermq $0xd8,%%ymm3,%%ymm3 \n" \ "vpermq $0xd8,%%ymm3,%%ymm3 \n" \
"vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \ "vpunpcklbw %%ymm3,%%ymm3,%%ymm3 \n" \
"vpmulhuw " MEMACCESS2(192, [kYuvConstants]) ",%%ymm3,%%ymm3 \n" \ "vpmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%ymm3,%%ymm3 \n" \
"vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \ "vpaddsw %%ymm3,%%ymm0,%%ymm0 \n" \
"vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \ "vpaddsw %%ymm3,%%ymm1,%%ymm1 \n" \
"vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \ "vpaddsw %%ymm3,%%ymm2,%%ymm2 \n" \
......
...@@ -1472,30 +1472,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -1472,30 +1472,45 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
#endif // HAS_ARGBTOYROW_SSSE3 #endif // HAS_ARGBTOYROW_SSSE3
#if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
static const lvec8 kUVToB_AVX = {
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, struct YuvConstants {
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 lvec8 kUVToB; // 0
}; lvec8 kUVToG; // 32
static const lvec8 kUVToR_AVX = { lvec8 kUVToR; // 64
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, lvec16 kUVBiasB; // 96
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR lvec16 kUVBiasG; // 128
}; lvec16 kUVBiasR; // 160
static const lvec8 kUVToG_AVX = { lvec16 kYToRgb; // 192
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
};
static const lvec16 kYToRgb_AVX = {
YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG
};
static const lvec16 kUVBiasB_AVX = {
BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB
}; };
static const lvec16 kUVBiasG_AVX = {
BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG // BT601 constants for YUV to RGB.
static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
{ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
{ 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
}; };
static const lvec16 kUVBiasR_AVX = {
BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR // BT601 constants for NV21 where chroma plane is VU instead of UV.
static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
{ 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
{ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
{ VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
{ BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
{ BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
}; };
#endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2) #endif // defined(HAS_I422TOARGBROW_AVX2) || defined(HAS_I422TOBGRAROW_AVX2)
// Read 8 UV from 422, upsample to 16 UV. // Read 8 UV from 422, upsample to 16 UV.
...@@ -1509,23 +1524,23 @@ static const lvec16 kUVBiasR_AVX = { ...@@ -1509,23 +1524,23 @@ static const lvec16 kUVBiasR_AVX = {
} }
// Convert 16 pixels: 16 UV and 16 Y. // Convert 16 pixels: 16 UV and 16 Y.
#define YUVTORGB_AVX2 __asm { \ #define YUVTORGB_AVX2(YuvConstants) __asm { \
/* Step 1: Find 8 UV contributions to 16 R,G,B values */ \ /* Step 1: Find 8 UV contributions to 16 R,G,B values */ \
__asm vpmaddubsw ymm2, ymm0, kUVToR_AVX /* scale R UV */ \ __asm vpmaddubsw ymm2, ymm0, YuvConstants.kUVToR /* scale R UV */ \
__asm vpmaddubsw ymm1, ymm0, kUVToG_AVX /* scale G UV */ \ __asm vpmaddubsw ymm1, ymm0, YuvConstants.kUVToG /* scale G UV */ \
__asm vpmaddubsw ymm0, ymm0, kUVToB_AVX /* scale B UV */ \ __asm vpmaddubsw ymm0, ymm0, YuvConstants.kUVToB /* scale B UV */ \
__asm vmovdqu ymm3, kUVBiasR_AVX \ __asm vmovdqu ymm3, YuvConstants.kUVBiasR \
__asm vpsubw ymm2, ymm3, ymm2 \ __asm vpsubw ymm2, ymm3, ymm2 \
__asm vmovdqu ymm3, kUVBiasG_AVX \ __asm vmovdqu ymm3, YuvConstants.kUVBiasG \
__asm vpsubw ymm1, ymm3, ymm1 \ __asm vpsubw ymm1, ymm3, ymm1 \
__asm vmovdqu ymm3, kUVBiasB_AVX \ __asm vmovdqu ymm3, YuvConstants.kUVBiasB \
__asm vpsubw ymm0, ymm3, ymm0 \ __asm vpsubw ymm0, ymm3, ymm0 \
/* Step 2: Find Y contribution to 16 R,G,B values */ \ /* Step 2: Find Y contribution to 16 R,G,B values */ \
__asm vmovdqu xmm3, [eax] /* NOLINT */ \ __asm vmovdqu xmm3, [eax] /* NOLINT */ \
__asm lea eax, [eax + 16] \ __asm lea eax, [eax + 16] \
__asm vpermq ymm3, ymm3, 0xd8 \ __asm vpermq ymm3, ymm3, 0xd8 \
__asm vpunpcklbw ymm3, ymm3, ymm3 \ __asm vpunpcklbw ymm3, ymm3, ymm3 \
__asm vpmulhuw ymm3, ymm3, kYToRgb_AVX \ __asm vpmulhuw ymm3, ymm3, YuvConstants.kYToRgb \
__asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \ __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
__asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \ __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
__asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \ __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
...@@ -1559,7 +1574,7 @@ void I422ToARGBRow_AVX2(const uint8* y_buf, ...@@ -1559,7 +1574,7 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
vpunpcklbw ymm0, ymm0, ymm1 // BG vpunpcklbw ymm0, ymm0, ymm1 // BG
...@@ -1605,7 +1620,7 @@ void I422ToBGRARow_AVX2(const uint8* y_buf, ...@@ -1605,7 +1620,7 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into BGRA // Step 3: Weave into BGRA
vpunpcklbw ymm1, ymm1, ymm0 // GB vpunpcklbw ymm1, ymm1, ymm0 // GB
...@@ -1651,7 +1666,7 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, ...@@ -1651,7 +1666,7 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into RGBA // Step 3: Weave into RGBA
vpunpcklbw ymm1, ymm1, ymm2 // GR vpunpcklbw ymm1, ymm1, ymm2 // GR
...@@ -1697,7 +1712,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf, ...@@ -1697,7 +1712,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
convertloop: convertloop:
READYUV422_AVX2 READYUV422_AVX2
YUVTORGB_AVX2 YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into ABGR // Step 3: Weave into ABGR
vpunpcklbw ymm1, ymm2, ymm1 // RG vpunpcklbw ymm1, ymm2, ymm1 // RG
...@@ -1760,56 +1775,25 @@ void I422ToABGRRow_AVX2(const uint8* y_buf, ...@@ -1760,56 +1775,25 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
} }
// Convert 8 pixels: 8 UV and 8 Y. // Convert 8 pixels: 8 UV and 8 Y.
#define YUVTORGB __asm { \ #define YUVTORGB(YuvConstants) __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \ __asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \ __asm movdqa xmm2, xmm0 \
__asm movdqa xmm3, xmm0 \ __asm movdqa xmm3, xmm0 \
__asm movdqa xmm0, kUVBiasB /* unbias back to signed */ \ __asm movdqa xmm0, YuvConstants.kUVBiasB /* unbias back to signed */ \
__asm pmaddubsw xmm1, kUVToB /* scale B UV */ \ __asm pmaddubsw xmm1, YuvConstants.kUVToB /* scale B UV */ \
__asm psubw xmm0, xmm1 \ __asm psubw xmm0, xmm1 \
__asm movdqa xmm1, kUVBiasG \ __asm movdqa xmm1, YuvConstants.kUVBiasG \
__asm pmaddubsw xmm2, kUVToG /* scale G UV */ \ __asm pmaddubsw xmm2, YuvConstants.kUVToG /* scale G UV */ \
__asm psubw xmm1, xmm2 \ __asm psubw xmm1, xmm2 \
__asm movdqa xmm2, kUVBiasR \ __asm movdqa xmm2, YuvConstants.kUVBiasR \
__asm pmaddubsw xmm3, kUVToR /* scale R UV */ \ __asm pmaddubsw xmm3, YuvConstants.kUVToR /* scale R UV */ \
__asm psubw xmm2, xmm3 \ __asm psubw xmm2, xmm3 \
/* Step 2: Find Y contribution to 8 R,G,B values */ \ /* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \ __asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \ __asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm3 \ __asm punpcklbw xmm3, xmm3 \
__asm pmulhuw xmm3, kYToRgb \ __asm pmulhuw xmm3, YuvConstants.kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
__asm packuswb xmm0, xmm0 /* B */ \
__asm packuswb xmm1, xmm1 /* G */ \
__asm packuswb xmm2, xmm2 /* R */ \
}
// Convert 8 pixels: 8 VU and 8 Y.
#define YVUTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
__asm movdqa xmm3, xmm0 \
__asm movdqa xmm0, kUVBiasB /* unbias back to signed */ \
__asm pmaddubsw xmm1, kVUToB /* scale B UV */ \
__asm psubw xmm0, xmm1 \
__asm movdqa xmm1, kUVBiasG \
__asm pmaddubsw xmm2, kVUToG /* scale G UV */ \
__asm psubw xmm1, xmm2 \
__asm movdqa xmm2, kUVBiasR \
__asm pmaddubsw xmm3, kVUToR /* scale R UV */ \
__asm psubw xmm2, xmm3 \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm3 \
__asm pmulhuw xmm3, kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \ __asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \ __asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \ __asm paddsw xmm2, xmm3 /* R += Y */ \
...@@ -1842,7 +1826,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1842,7 +1826,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV444 READYUV444
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -1884,7 +1868,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf, ...@@ -1884,7 +1868,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RRGB // Step 3: Weave into RRGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -1929,7 +1913,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf, ...@@ -1929,7 +1913,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RRGB // Step 3: Weave into RRGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -1979,7 +1963,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf, ...@@ -1979,7 +1963,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RRGB // Step 3: Weave into RRGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -2044,7 +2028,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2044,7 +2028,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -2087,7 +2071,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2087,7 +2071,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV411 // modifies EBX READYUV411 // modifies EBX
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -2125,7 +2109,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2125,7 +2109,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READNV12 READNV12
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -2145,7 +2129,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2145,7 +2129,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels. // 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). // 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void NV21ToARGBRow_SSSE3(const uint8* y_buf, void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
...@@ -2154,14 +2138,14 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2154,14 +2138,14 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
__asm { __asm {
push esi push esi
mov eax, [esp + 4 + 4] // Y mov eax, [esp + 4 + 4] // Y
mov esi, [esp + 4 + 8] // VU mov esi, [esp + 4 + 8] // UV
mov edx, [esp + 4 + 12] // argb mov edx, [esp + 4 + 12] // argb
mov ecx, [esp + 4 + 16] // width mov ecx, [esp + 4 + 16] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READNV12 READNV12
YVUTORGB YUVTORGB(kYvuConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -2198,7 +2182,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf, ...@@ -2198,7 +2182,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into BGRA // Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
...@@ -2238,7 +2222,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf, ...@@ -2238,7 +2222,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm2, xmm1 // RG punpcklbw xmm2, xmm1 // RG
...@@ -2276,7 +2260,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -2276,7 +2260,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
convertloop: convertloop:
READYUV422 READYUV422
YUVTORGB YUVTORGB(kYuvConstants)
// Step 3: Weave into RGBA // Step 3: Weave into RGBA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment