Commit 3b4f5eb7 authored by fbarchard@google.com's avatar fbarchard@google.com

Port J422 colorspace to GCC

BUG=414
TESTED=try bots
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/43809004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1334 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 92f7f421
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1333 Version: 1334
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -124,6 +124,7 @@ extern "C" { ...@@ -124,6 +124,7 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2 #define HAS_YUY2TOYROW_SSE2
#define HAS_J422TOARGBROW_SSSE3
// Effects: // Effects:
#define HAS_ARGBADDROW_SSE2 #define HAS_ARGBADDROW_SSE2
...@@ -203,8 +204,6 @@ extern "C" { ...@@ -203,8 +204,6 @@ extern "C" {
// TODO(fbarchard): Port to Neon // TODO(fbarchard): Port to Neon
#define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565DITHERROW_SSE2
#define HAS_ARGBTORGB565DITHERROW_AVX2 #define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_J422TOARGBROW_SSSE3
#define HAS_J422TOARGBROW_AVX2
#endif #endif
// The following are available on all x86 platforms, but // The following are available on all x86 platforms, but
...@@ -232,6 +231,7 @@ extern "C" { ...@@ -232,6 +231,7 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_AVX2 #define HAS_YUY2TOUV422ROW_AVX2
#define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOUVROW_AVX2
#define HAS_YUY2TOYROW_AVX2 #define HAS_YUY2TOYROW_AVX2
#define HAS_J422TOARGBROW_AVX2
// The following require HAS_I422TOARGBROW_AVX2 // The following require HAS_I422TOARGBROW_AVX2
#if defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2)
...@@ -247,7 +247,6 @@ extern "C" { ...@@ -247,7 +247,6 @@ extern "C" {
#define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2
#endif #endif
// The following are Yasm x86 only: // The following are Yasm x86 only:
// TODO(fbarchard): Port AVX2 to inline. // TODO(fbarchard): Port AVX2 to inline.
#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM) #if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1333 #define LIBYUV_VERSION 1334
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1414,22 +1414,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, ...@@ -1414,22 +1414,6 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
// YUV to RGB conversion constants.
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */
// U and V contributions to R,G,B.
#define UB -128 /* -min(128, round(2.018 * 64)) */
#define UG 25 /* -round(-0.391 * 64) */
#define VG 52 /* -round(-0.813 * 64) */
#define VR -102 /* -round(1.596 * 64) */
// Bias values to subtract 16 from Y and 128 from U and V.
#define BB (UB * 128 - YGB)
#define BG (UG * 128 + VG * 128 - YGB)
#define BR (VR * 128 - YGB)
struct YuvConstants { struct YuvConstants {
lvec8 kUVToB; // 0 lvec8 kUVToB; // 0
lvec8 kUVToG; // 32 lvec8 kUVToG; // 32
...@@ -1440,6 +1424,27 @@ struct YuvConstants { ...@@ -1440,6 +1424,27 @@ struct YuvConstants {
lvec16 kYToRgb; // 192 lvec16 kYToRgb; // 192
}; };
// BT.601 YUV to RGB reference
// R = (Y - 16) * 1.164 - V * -1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
// B = (Y - 16) * 1.164 - U * -2.018
// Y contribution to R,G,B. Scale and bias.
// TODO(fbarchard): Consider moving constants into a common header.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
// U and V contributions to R,G,B.
#define UB -128 /* max(-128, round(-2.018 * 64)) */
#define UG 25 /* round(0.391 * 64) */
#define VG 52 /* round(0.813 * 64) */
#define VR -102 /* round(-1.596 * 64) */
// Bias values to subtract 16 from Y and 128 from U and V.
#define BB (UB * 128 + YGB)
#define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB)
// BT601 constants for YUV to RGB. // BT601 constants for YUV to RGB.
static YuvConstants SIMD_ALIGNED(kYuvConstants) = { static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
...@@ -1468,6 +1473,67 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = { ...@@ -1468,6 +1473,67 @@ static YuvConstants SIMD_ALIGNED(kYvuConstants) = {
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
}; };
#undef YG
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
#undef BB
#undef BG
#undef BR
// JPEG YUV to RGB reference
// * R = Y - V * -1.40200
// * G = Y - U * 0.34414 - V * 0.71414
// * B = Y - U * -1.77200
// Y contribution to R,G,B. Scale and bias.
// TODO(fbarchard): Consider moving constants into a common header.
#define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
#define YGBJ 32 /* 64 / 2 */
// U and V contributions to R,G,B.
#define UBJ -113 /* round(-1.77200 * 64) */
#define UGJ 22 /* round(0.34414 * 64) */
#define VGJ 46 /* round(0.71414 * 64) */
#define VRJ -90 /* round(-1.40200 * 64) */
// Bias values to subtract 16 from Y and 128 from U and V.
#define BBJ (UBJ * 128 + YGBJ)
#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
#define BRJ (VRJ * 128 + YGBJ)
// JPEG constants for YUV to RGB.
YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
{ UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 },
{ UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ,
UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ },
{ 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ,
0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ },
{ BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ,
BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ },
{ BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ,
BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ },
{ BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ,
BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ },
{ YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ,
YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ }
};
#undef YGJ
#undef YGBJ
#undef UBJ
#undef UGJ
#undef VGJ
#undef VRJ
#undef BBJ
#undef BGJ
#undef BRJ
// Read 8 UV from 411 // Read 8 UV from 411
#define READYUV444 \ #define READYUV444 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
...@@ -1713,6 +1779,32 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1713,6 +1779,32 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
); );
} }
void OMITFP J422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422
YUVTORGB(kYuvConstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -1984,6 +2076,48 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, ...@@ -1984,6 +2076,48 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
} }
#endif // HAS_I422TOARGBROW_AVX2 #endif // HAS_I422TOARGBROW_AVX2
#if defined(HAS_J422TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP J422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2
YUVTORGB_AVX2(kYuvConstants)
// Step 3: Weave into ARGB
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" // RA
"vpermq $0xd8,%%ymm2,%%ymm2 \n"
"vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" // BGRA first 8 pixels
"vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" // BGRA next 8 pixels
"vmovdqu %%ymm1," MEMACCESS([dst_argb]) "\n"
"vmovdqu %%ymm0," MEMACCESS2(0x20,[dst_argb]) "\n"
"lea " MEMLEA(0x40,[dst_argb]) ",%[dst_argb] \n"
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvJConstants.kUVToB) // %[kYuvConstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_J422TOARGBROW_AVX2
#if defined(HAS_I422TOABGRROW_AVX2) #if defined(HAS_I422TOABGRROW_AVX2)
// 16 pixels // 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
......
...@@ -24,6 +24,16 @@ extern "C" { ...@@ -24,6 +24,16 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
(defined(_M_IX86) || defined(_M_X64)) (defined(_M_IX86) || defined(_M_X64))
struct YuvConstants {
lvec8 kUVToB; // 0
lvec8 kUVToG; // 32
lvec8 kUVToR; // 64
lvec16 kUVBiasB; // 96
lvec16 kUVBiasG; // 128
lvec16 kUVBiasR; // 160
lvec16 kYToRgb; // 192
};
// BT.601 YUV to RGB reference // BT.601 YUV to RGB reference
// R = (Y - 16) * 1.164 - V * -1.596 // R = (Y - 16) * 1.164 - V * -1.596
// G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
...@@ -45,16 +55,6 @@ extern "C" { ...@@ -45,16 +55,6 @@ extern "C" {
#define BG (UG * 128 + VG * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB) #define BR (VR * 128 + YGB)
struct YuvConstants {
lvec8 kUVToB; // 0
lvec8 kUVToG; // 32
lvec8 kUVToR; // 64
lvec16 kUVBiasB; // 96
lvec16 kUVBiasG; // 128
lvec16 kUVBiasR; // 160
lvec16 kYToRgb; // 192
};
// BT601 constants for YUV to RGB. // BT601 constants for YUV to RGB.
static YuvConstants SIMD_ALIGNED(kYuvConstants) = { static YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
...@@ -1894,7 +1894,6 @@ void I422ToARGBRow_AVX2(const uint8* y_buf, ...@@ -1894,7 +1894,6 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
} }
#endif // HAS_I422TOARGBROW_AVX2 #endif // HAS_I422TOARGBROW_AVX2
#ifdef HAS_J422TOARGBROW_AVX2 #ifdef HAS_J422TOARGBROW_AVX2
// 16 pixels // 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment