Commit a6465859 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

I210ToARGB conversion from 10 bit YUV to RGB

SSSE3 optimized 10 bit YUV conversion to ARGB in single step.

Bug: libyuv:751
Test:  I010ToARGB
Change-Id: I234b2850e35992113ee6bd638732bafc7010a60d
Reviewed-on: https://chromium-review.googlesource.com/848238
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarFrank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
parent ac088b4b
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1686
Version: 1687
License: BSD
License File: LICENSE
......
......@@ -63,6 +63,32 @@ int I420ToABGR(const uint8* src_y,
int width,
int height);
// Convert I010 to ARGB.
LIBYUV_API
int I010ToARGB(const uint16* src_y,
int src_stride_y,
const uint16* src_u,
int src_stride_u,
const uint16* src_v,
int src_stride_v,
uint8* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert H010 to ARGB.
LIBYUV_API
int H010ToARGB(const uint16* src_y,
int src_stride_y,
const uint16* src_u,
int src_stride_u,
const uint16* src_v,
int src_stride_v,
uint8* dst_argb,
int dst_stride_argb,
int width,
int height);
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y,
......
......@@ -265,6 +265,8 @@ extern "C" {
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
// I210 is for H010. 2 = 422. I for 601 vs H for 709.
#define HAS_I210TOARGBROW_SSSE3
#define HAS_MERGERGBROW_SSSE3
#define HAS_SPLITRGBROW_SSSE3
#endif
......@@ -1735,9 +1737,9 @@ void I422ToARGBRow_C(const uint8* src_y,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
void I210ToARGBRow_C(const uint16* src_y,
const uint16* src_u,
const uint16* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
......@@ -1807,12 +1809,6 @@ void I422ToARGBRow_AVX2(const uint8* src_y,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1849,6 +1845,13 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I210ToARGBRow_SSSE3(const uint16* src_y,
const uint16* src_u,
const uint16* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -1863,12 +1866,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
......@@ -1999,6 +1996,12 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I210ToARGBRow_Any_SSSE3(const uint16* src_y,
const uint16* src_u,
const uint16* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1686
#define LIBYUV_VERSION 1687
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -47,7 +47,7 @@ int ARGBCopy(const uint8* src_argb,
return 0;
}
// Convert I422 to ARGB with matrix
// Convert I420 to ARGB with matrix
static int I420ToARGBMatrix(const uint8* src_y,
int src_stride_y,
const uint8* src_u,
......@@ -573,18 +573,13 @@ static int H010ToARGBMatrix(const uint16* src_y,
uint8* dst_argb,
int dst_stride_argb,
const struct YuvConstants* yuvconstants,
int scale, // 16384 for 10 bits
int width,
int height) {
int y;
int halfwidth = (width + 1) >> 1;
void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale,
int width) = Convert16To8Row_C;
void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
const uint8* v_buf, uint8* rgb_buf,
void (*I210ToARGBRow)(const uint16* y_buf, const uint16* u_buf,
const uint16* v_buf, uint8* rgb_buf,
const struct YuvConstants* yuvconstants, int width) =
I422ToARGBRow_C;
I210ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1;
}
......@@ -594,85 +589,23 @@ static int H010ToARGBMatrix(const uint16* src_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_CONVERT16TO8ROW_SSSE3)
#if defined(HAS_I210TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
Convert16To8Row = Convert16To8Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
Convert16To8Row = Convert16To8Row_SSSE3;
}
}
#endif
#if defined(HAS_CONVERT16TO8ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
Convert16To8Row = Convert16To8Row_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
Convert16To8Row = Convert16To8Row_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGBRow = I422ToARGBRow_Any_MSA;
I210ToARGBRow = I210ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_MSA;
I210ToARGBRow = I210ToARGBRow_SSSE3;
}
}
#endif
{
// Row buffers for 8 bit YUV.
align_buffer_64(row_buf, width + halfwidth * 2);
uint8* row_y = row_buf;
uint8* row_u = row_buf + width;
uint8* row_v = row_buf + width + halfwidth;
for (y = 0; y < height - 1; y += 2) {
Convert16To8Row(src_y, row_y, scale, width);
Convert16To8Row(src_u, row_u, scale, halfwidth);
Convert16To8Row(src_v, row_v, scale, halfwidth);
I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);
Convert16To8Row(src_y + src_stride_y, row_y, scale, width);
I422ToARGBRow(row_y, row_u, row_v, dst_argb + dst_stride_argb,
yuvconstants, width);
dst_argb += dst_stride_argb * 2;
src_y += src_stride_y * 2;
for (y = 0; y < height; ++y) {
I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
if (height & 1) {
Convert16To8Row(src_y, row_y, scale, width);
Convert16To8Row(src_u, row_u, scale, halfwidth);
Convert16To8Row(src_v, row_v, scale, halfwidth);
I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);
}
free_aligned_buffer_64(row_buf);
}
return 0;
}
......@@ -691,7 +624,7 @@ int H010ToARGB(const uint16* src_y,
int height) {
return H010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
src_stride_v, dst_argb, dst_stride_argb,
&kYuvH709Constants, 16384, width, height);
&kYuvH709Constants, width, height);
}
// Convert I444 to ARGB with matrix
......
......@@ -194,6 +194,32 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif
#undef ANY31C
// 64 byte per row for future AVX2
// Any 3 planes of 16 bit to 1 with yuvconstants
// TODO(fbarchard): consider
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(T temp[16 * 3]); \
SIMD_ALIGNED(uint8 out[64]); \
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r * SBPP); \
memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I210TOARGBROW_SSSE3
ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16, 2, 4, 7)
#endif
#undef ANY31CT
// Any 2 planes to 1.
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* uv_buf, uint8* dst_ptr, \
......
......@@ -1295,6 +1295,51 @@ static __inline void YuvPixel(uint8 y,
*r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
}
// C reference code that mimics the YUV 10 bit assembly.
static __inline void YuvPixel10(uint16 y,
uint16 u,
uint16 v,
uint8* b,
uint8* g,
uint8* r,
const struct YuvConstants* yuvconstants) {
#if defined(__aarch64__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = -yuvconstants->kUVToRB[1];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
#elif defined(__arm__)
int ub = -yuvconstants->kUVToRB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[4];
int vr = -yuvconstants->kUVToRB[4];
int bb = yuvconstants->kUVBiasBGR[0];
int bg = yuvconstants->kUVBiasBGR[1];
int br = yuvconstants->kUVBiasBGR[2];
int yg = yuvconstants->kYToRgb[0] / 0x0101;
#else
int ub = yuvconstants->kUVToB[0];
int ug = yuvconstants->kUVToG[0];
int vg = yuvconstants->kUVToG[1];
int vr = yuvconstants->kUVToR[1];
int bb = yuvconstants->kUVBiasB[0];
int bg = yuvconstants->kUVBiasG[0];
int br = yuvconstants->kUVBiasR[0];
int yg = yuvconstants->kYToRgb[0];
#endif
uint32 y1 = (uint32)((y << 6) * yg) >> 16;
u = clamp255(u >> 2);
v = clamp255(v >> 2);
*b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
*g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
*r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
}
// Y contribution to R,G,B. Scale and bias.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
......@@ -1388,6 +1433,33 @@ void I422ToARGBRow_C(const uint8* src_y,
}
}
// 10 bit YUV to ARGB
void I210ToARGBRow_C(const uint16* src_y,
const uint16* src_u,
const uint16* src_v,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void I422AlphaToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -1623,6 +1623,20 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0,
"punpcklbw %%xmm4,%%xmm4 \n" \
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
// Read 4 UV from 422 10 bit, upsample to 8 UV
// TODO(fbarchard): Consider shufb to replace pack/unpack
#define READYUV422_10 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
"punpcklwd %%xmm1,%%xmm0 \n" \
"psraw $0x2,%%xmm0 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
"movdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
"psllw $0x6,%%xmm4 \n" \
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
#define READYUVA422 \
"movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
......@@ -1862,6 +1876,36 @@ void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
);
}
// 10 bit YUV to ARGB
void OMITFP I210ToARGBRow_SSSE3(const uint16* y_buf,
const uint16* u_buf,
const uint16* v_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422_10
YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14 YUVTORGB_REGS
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment