Commit 5d97b933 authored by Frank Barchard's avatar Frank Barchard

refactor I420ToABGR to use I420ToARGBRow

Using a transposed conversion matrix, I420ToARGB can output ABGR.

R=harryjin@google.com, xhwang@chromium.org
BUG=libyuv:473

Review URL: https://codereview.chromium.org/1413573010 .
parent 254ef015
This diff is collapsed.
This diff is collapsed.
......@@ -450,76 +450,6 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
width, height);
}
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvIConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
......@@ -590,67 +520,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I420 to ABGR.
LIBYUV_API
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvIConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGBA.
LIBYUV_API
int I420ToRGBA(const uint8* src_y, int src_stride_y,
......
......@@ -843,75 +843,6 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I422 to ABGR.
LIBYUV_API
int I422ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_abgr == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
}
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvIConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I422 to RGBA.
LIBYUV_API
int I422ToRGBA(const uint8* src_y, int src_stride_y,
......
......@@ -111,7 +111,6 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOARGBROW_SSSE3
ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
#endif
#ifdef HAS_I444TOARGBROW_SSSE3
ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
......@@ -139,9 +138,6 @@ ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
#ifdef HAS_I422TORGBAROW_AVX2
ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422TOABGRROW_AVX2
ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I444TOARGBROW_AVX2
ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
#endif
......@@ -162,7 +158,6 @@ ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
......
This diff is collapsed.
......@@ -715,68 +715,6 @@ void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
);
}
void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128|
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
"1: \n"
YUVTORGB
// Arranging into abgr format
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
"precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0|
"precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0|
"precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0|
"addiu %[width], -4 \n"
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0|
"or $t1, $t1, $s6 \n" // |ff|B1|ff|B0|
"or $t2, $t2, $s6 \n" // |ff|b1|ff|b0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n"
:[y_buf] "+r" (y_buf),
[u_buf] "+r" (u_buf),
[v_buf] "+r" (v_buf),
[width] "+r" (width),
[rgb_buf] "+r" (rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......
......@@ -255,37 +255,6 @@ void I422ToBGRARow_NEON(const uint8* src_y,
);
}
void I422ToABGRRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"1: \n"
READYUV422
YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_abgr), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -265,39 +265,6 @@ void I422ToBGRARow_NEON(const uint8* src_y,
}
#endif // HAS_I422TOBGRAROW_NEON
// TODO(fbarchard): Switch to Matrix version of this function.
#ifdef HAS_I422TOABGRROW_NEON
void I422ToABGRRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"1: \n"
READYUV422
YUVTORGB(v20, v21, v22)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_abgr), // %3
"+r"(width) // %4
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422TOABGRROW_NEON
#ifdef HAS_I422TORGBAROW_NEON
void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
......
......@@ -115,25 +115,6 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
}
#endif
#if defined(HAS_I422TOABGRROW_SSSE3)
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_abgr,
const struct YuvConstants* yuvconstants,
int width) {
__m128i xmm0, xmm1, xmm2, xmm4;
const __m128i xmm5 = _mm_set1_epi8(-1);
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
while (width > 0) {
READYUV422
YUVTORGB(yuvconstants)
STOREABGR
width -= 8;
}
}
#endif
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
......@@ -2455,48 +2436,9 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
}
#endif // HAS_I422TORGBAROW_AVX2
#ifdef HAS_I422TOABGRROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
__declspec(naked)
void I422ToABGRRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
push ebx
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
mov edx, [esp + 12 + 16] // argb
mov ebx, [esp + 12 + 20] // yuvconstants
mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
YUVTORGB_AVX2(ebx)
STOREABGR_AVX2
sub ecx, 16
jg convertloop
pop ebx
pop edi
pop esi
vzeroupper
ret
}
}
#endif // HAS_I422TOABGRROW_AVX2
#if defined(HAS_I422TOARGBROW_SSSE3)
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
// Allows a conversion with half size scaling.
// Read 8 UV from 444.
#define READYUV444 __asm { \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment