Commit e365cdde authored by Frank Barchard's avatar Frank Barchard

I420Alpha row function in 1 pass.

API change - I420AlphaToARGB takes flag indicating if RGB should be
premultiplied by alpha.

This version implements an efficient SSSE3 version for Windows.
C version done in 2 steps.

Was
libyuvTest.I420AlphaToARGB_Any (1136 ms)
libyuvTest.I420AlphaToARGB_Unaligned (1210 ms)
libyuvTest.I420AlphaToARGB_Invert (966 ms)
libyuvTest.I420AlphaToARGB_Opt (1031 ms)
libyuvTest.I420AlphaToABGR_Any (1020 ms)
libyuvTest.I420AlphaToABGR_Unaligned (1359 ms)
libyuvTest.I420AlphaToABGR_Invert (1082 ms)
libyuvTest.I420AlphaToABGR_Opt (986 ms)

R=harryjin@google.com
BUG=libyuv:496

Review URL: https://codereview.chromium.org/1367093002 .
parent d4594bee
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1492
Version: 1493
License: BSD
License File: LICENSE
......
......@@ -83,7 +83,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int width, int height, int attenuate);
// Convert I420 with Alpha to preattenuated ABGR.
LIBYUV_API
......@@ -92,7 +92,7 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
int width, int height, int attenuate);
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
LIBYUV_API
......
......@@ -187,6 +187,14 @@ extern "C" {
#define HAS_I422TOABGRROW_SSSE3
#endif
// The following are available on 32 bit x86 Visual C and clangcl.
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_IX86)
#define HAS_I422ALPHATOARGBROW_SSSE3
#define HAS_I422ALPHATOABGRROW_SSSE3
#endif
// The following are available for AVX2 Visual C and clangcl 32 bit:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
......@@ -257,6 +265,7 @@ extern "C" {
#endif
// The following are disabled when SSSE3 is available:
// TODO(fbarchard): remove sse2. ssse3 is faster and well supported.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_SSSE3_ONLY)
......@@ -1045,6 +1054,20 @@ void I422ToARGBRow_C(const uint8* src_y,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422AlphaToABGRRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1213,6 +1236,20 @@ void I422ToARGBRow_SSSE3(const uint8* src_y,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -1405,6 +1442,20 @@ void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width);
void I422AlphaToABGRRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1492
#define LIBYUV_VERSION 1493
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -336,16 +336,15 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int width, int height, int attenuate) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
ARGBCopyYToAlphaRow_C;
void (*I422AlphaToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) = I422AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
......@@ -358,53 +357,37 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
#if defined(HAS_I422ALPHATOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
#if defined(HAS_I422ALPHATOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
I422AlphaToARGBRow = I422AlphaToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
#if defined(HAS_I422ALPHATOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
}
I422AlphaToARGBRow = I422AlphaToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBATTENUATEROW_SSE2)
......@@ -441,9 +424,10 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
ARGBCopyYToAlphaRow(src_a, dst_argb, width);
ARGBAttenuateRow(dst_argb, dst_argb, width);
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, &kYuvConstants, width);
if (attenuate) {
ARGBAttenuateRow(dst_argb, dst_argb, width);
}
dst_argb += dst_stride_argb;
src_a += src_stride_a;
src_y += src_stride_y;
......@@ -454,24 +438,24 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
}
return 0;
}
// Convert I420 with Alpha to preattenuated ABGR.
// Convert I420 with Alpha to preattenuated ARGB.
LIBYUV_API
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
const uint8* src_a, int src_stride_a,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int width, int height, int attenuate) {
int y;
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
ARGBCopyYToAlphaRow_C;
void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
void (*I422AlphaToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) = I422AlphaToABGRRow_C;
void (*ARGBAttenuateRow)(const uint8* src_abgr, uint8* dst_abgr,
int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
......@@ -483,53 +467,37 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
#if defined(HAS_I422TOABGRROW_SSSE3)
#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_SSSE3;
I422AlphaToABGRRow = I422AlphaToABGRRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOABGRROW_AVX2)
#if defined(HAS_I422ALPHATOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToABGRRow = I422ToABGRRow_Any_AVX2;
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToABGRRow = I422ToABGRRow_AVX2;
I422AlphaToABGRRow = I422AlphaToABGRRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOABGRROW_NEON)
#if defined(HAS_I422ALPHATOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToABGRRow = I422ToABGRRow_Any_NEON;
I422AlphaToABGRRow = I422AlphaToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_NEON;
I422AlphaToABGRRow = I422AlphaToABGRRow_NEON;
}
}
#endif
#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
#if defined(HAS_I422ALPHATOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
}
}
#endif
#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
}
I422AlphaToABGRRow = I422AlphaToABGRRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBATTENUATEROW_SSE2)
......@@ -566,9 +534,10 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
#endif
for (y = 0; y < height; ++y) {
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
I422AlphaToABGRRow(src_y, src_u, src_v, src_a, dst_abgr, &kYuvConstants, width);
if (attenuate) {
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
}
dst_abgr += dst_stride_abgr;
src_a += src_stride_a;
src_y += src_stride_y;
......
......@@ -22,6 +22,34 @@ extern "C" {
// Subsampled source needs to be increase by 1 of not even.
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
// Any 4 planes to 1 with yuvconstants
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
const uint8* a_buf, uint8* dst_ptr, \
struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 5]); \
memset(temp, 0, 64 * 4); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \
memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 192, a_buf + n, r); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
SS(r, DUVSHIFT) * BPP); \
}
#ifdef HAS_I422ALPHATOARGBROW_SSSE3
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
ANY41C(I422AlphaToABGRRow_Any_SSSE3, I422AlphaToABGRRow_SSSE3, 1, 0, 4, 7)
#endif
#undef ANY41C
// Any 3 planes to 1.
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
......@@ -50,7 +78,7 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOUYVYROW_NEON
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#endif
#undef ANY31C
#undef ANY31
// Any 3 planes to 1 with yuvconstants
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
......
......@@ -2412,6 +2412,29 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y,
}
#endif
void I422AlphaToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
const uint8* src_a,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
I422ToARGBRow_C(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
ARGBCopyYToAlphaRow_C(src_a, dst_argb, width);
}
void I422AlphaToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
const uint8* src_a,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
I422ToABGRRow_C(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
ARGBCopyYToAlphaRow_C(src_a, dst_abgr, width);
}
#if defined(HAS_I422TOARGB1555ROW_SSSE3)
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
const uint8* src_u,
......
......@@ -2416,6 +2416,20 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
__asm lea eax, [eax + 8] \
}
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
#define READYUVA422 __asm { \
__asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \
__asm lea esi, [esi + 4] \
__asm punpcklbw xmm0, xmm1 /* UV */ \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
__asm movq xmm4, qword ptr [eax] /* Y */ \
__asm punpcklbw xmm4, xmm4 \
__asm lea eax, [eax + 8] \
__asm movq xmm5, qword ptr [ebp] /* A */ \
__asm lea ebp, [ebp + 8] \
}
// Read 2 UV from 411, upsample to 8 UV.
#define READYUV411 __asm { \
__asm pinsrw xmm0, [esi], 0 /* U */ \
......@@ -2833,6 +2847,88 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
}
}
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB (32 bytes).
__declspec(naked)
void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
push ebx
push ebp
mov eax, [esp + 16 + 4] // Y
mov esi, [esp + 16 + 8] // U
mov edi, [esp + 16 + 12] // V
mov ebp, [esp + 16 + 16] // A
mov edx, [esp + 16 + 20] // argb
mov ebx, [esp + 16 + 24] // yuvconstants
mov ecx, [esp + 16 + 28] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READYUVA422
YUVTORGB(ebx)
STOREARGB
sub ecx, 8
jg convertloop
pop ebp
pop ebx
pop edi
pop esi
ret
}
}
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR (32 bytes).
__declspec(naked)
void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_abgr,
struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
push ebx
push ebp
mov eax, [esp + 16 + 4] // Y
mov esi, [esp + 16 + 8] // U
mov edi, [esp + 16 + 12] // V
mov ebp, [esp + 16 + 16] // A
mov edx, [esp + 16 + 20] // abgr
mov ebx, [esp + 16 + 24] // yuvconstants
mov ecx, [esp + 16 + 28] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READYUVA422
YUVTORGB(ebx)
STOREABGR
sub ecx, 8
jg convertloop
pop ebp
pop ebx
pop edi
pop esi
ret
}
}
// 8 pixels.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more.
......
......@@ -518,7 +518,7 @@ TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1, 0, ARGB, 4)
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1, 0, ARGB, 4)
#define TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, DIFF, N, NEG, OFF) \
YALIGN, W1280, DIFF, N, NEG, OFF, ATTEN) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
......@@ -547,7 +547,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_a + OFF, kWidth, \
dst_argb_c + OFF, kStrideB, \
kWidth, NEG kHeight); \
kWidth, NEG kHeight, ATTEN); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
......@@ -555,7 +555,7 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \
src_a + OFF, kWidth, \
dst_argb_opt + OFF, kStrideB, \
kWidth, NEG kHeight); \
kWidth, NEG kHeight, ATTEN); \
} \
int max_diff = 0; \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
......@@ -578,13 +578,15 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
#define TESTQPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, DIFF) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0) \
YALIGN, benchmark_width_ - 4, DIFF, _Any, +, 0, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Unaligned, +, 1) \
YALIGN, benchmark_width_, DIFF, _Invert, -, 0, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Invert, -, 0) \
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, 0) \
TESTQPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, DIFF, _Opt, +, 0)
YALIGN, benchmark_width_, DIFF, _Premult, +, 0, 1)
TESTQPLANARTOB(I420Alpha, 2, 2, ARGB, 4, 4, 1, 2)
TESTQPLANARTOB(I420Alpha, 2, 2, ABGR, 4, 4, 1, 2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment