Commit ffec313d authored by Frank Barchard's avatar Frank Barchard Committed by Frank Barchard

ABGRToAR30 used AVX2 with reversed shuffler

vpshufb is used to reverse R and B channels;
Code is otherwise the same as ARGBToAR30.

Bug: libyuv:751
Test: ABGRToAR30 unittest
Change-Id: I30e02925f5c729e4496c5963ba4ba4af16633b3b
Reviewed-on: https://chromium-review.googlesource.com/891807
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
parent ff8ab9ba
...@@ -55,6 +55,15 @@ int ARGBToRGBA(const uint8_t* src_argb, ...@@ -55,6 +55,15 @@ int ARGBToRGBA(const uint8_t* src_argb,
int width, int width,
int height); int height);
// Convert ABGR To AR30.
LIBYUV_API
int ABGRToAR30(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert ARGB To AR30. // Convert ARGB To AR30.
LIBYUV_API LIBYUV_API
int ARGBToAR30(const uint8_t* src_argb, int ARGBToAR30(const uint8_t* src_argb,
......
...@@ -252,6 +252,7 @@ extern "C" { ...@@ -252,6 +252,7 @@ extern "C" {
// TODO(fbarchard): Port to Visual C // TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#define HAS_ABGRTOAR30ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2 #define HAS_CONVERT8TO16ROW_SSE2
...@@ -268,6 +269,7 @@ extern "C" { ...@@ -268,6 +269,7 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ABGRTOAR30ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2
...@@ -1688,7 +1690,8 @@ void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb, ...@@ -1688,7 +1690,8 @@ void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb,
void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb, void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToAR30Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ABGRToAR30Row_SSSE3(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
void ARGBToAR30Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
...@@ -1710,7 +1713,8 @@ void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, ...@@ -1710,7 +1713,8 @@ void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToAR30Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ABGRToAR30Row_AVX2(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
void ARGBToAR30Row_AVX2(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
...@@ -1745,7 +1749,8 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); ...@@ -1745,7 +1749,8 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
...@@ -2407,9 +2412,8 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_argb, ...@@ -2407,9 +2412,8 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_argb,
void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_argb, void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_argb, void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
uint8_t* dst_rgb, void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
int width);
void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_argb, void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
...@@ -2429,9 +2433,8 @@ void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_argb, ...@@ -2429,9 +2433,8 @@ void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_argb,
void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_argb, void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToAR30Row_Any_AVX2(const uint8_t* src_argb, void ABGRToAR30Row_Any_AVX2(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
uint8_t* dst_rgb, void ARGBToAR30Row_Any_AVX2(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
int width);
void ARGBToRGB24Row_Any_NEON(const uint8_t* src_argb, void ARGBToRGB24Row_Any_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
......
...@@ -1217,6 +1217,55 @@ int ARGBToARGB4444(const uint8_t* src_argb, ...@@ -1217,6 +1217,55 @@ int ARGBToARGB4444(const uint8_t* src_argb,
return 0; return 0;
} }
// Convert ABGR To AR30.
LIBYUV_API
int ABGRToAR30(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_ar30,
int dst_stride_ar30,
int width,
int height) {
int y;
void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) =
ABGRToAR30Row_C;
if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) {
width *= height;
height = 1;
src_stride_abgr = dst_stride_ar30 = 0;
}
#if defined(HAS_ABGRTOAR30ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToAR30Row = ABGRToAR30Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ABGRToAR30Row = ABGRToAR30Row_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToAR30Row = ABGRToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ABGRToAR30Row = ABGRToAR30Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
ABGRToAR30Row(src_abgr, dst_ar30, width);
src_abgr += src_stride_abgr;
dst_ar30 += dst_stride_ar30;
}
return 0;
}
// Convert ARGB To AR30. // Convert ARGB To AR30.
LIBYUV_API LIBYUV_API
int ARGBToAR30(const uint8_t* src_argb, int ARGBToAR30(const uint8_t* src_argb,
......
...@@ -428,9 +428,15 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) ...@@ -428,9 +428,15 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
#endif #endif
#if defined(HAS_ABGRTOAR30ROW_SSSE3)
ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3)
#endif
#if defined(HAS_ARGBTOAR30ROW_SSSE3) #if defined(HAS_ARGBTOAR30ROW_SSSE3)
ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3)
#endif #endif
#if defined(HAS_ABGRTOAR30ROW_AVX2)
ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
#endif
#if defined(HAS_ARGBTOAR30ROW_AVX2) #if defined(HAS_ARGBTOAR30ROW_AVX2)
ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
#endif #endif
......
...@@ -348,15 +348,28 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { ...@@ -348,15 +348,28 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
} }
} }
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
uint32_t a0 = (src_abgr[3] >> 6);
*(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
dst_ar30 += 4;
src_abgr += 4;
}
}
void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2); uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
uint32_t a0 = (src_argb[3] >> 6); uint32_t a0 = (src_argb[3] >> 6);
*(uint32_t*)(dst_rgb) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
dst_rgb += 4; dst_ar30 += 4;
src_argb += 4; src_argb += 4;
} }
} }
......
...@@ -730,6 +730,10 @@ result left 10 to position the A and G channels. ...@@ -730,6 +730,10 @@ result left 10 to position the A and G channels.
// Shuffle table for converting RAW to RGB24. Last 8. // Shuffle table for converting RAW to RGB24. Last 8.
static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u, static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u,
128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u}; 128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u};
static const uvec8 kShuffleBR30 = {128u, 2u, 128u, 0u, 128u, 6u, 128u, 4u,
128u, 10u, 128u, 8u, 128u, 14u, 128u, 12u};
static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028; static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028;
static const uint32_t kMaskRB10 = 0x3ff003ff; static const uint32_t kMaskRB10 = 0x3ff003ff;
static const uint32_t kMaskAG10 = 0xc000ff00; static const uint32_t kMaskAG10 = 0xc000ff00;
...@@ -774,8 +778,46 @@ void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { ...@@ -774,8 +778,46 @@ void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
} }
#ifdef HAS_ARGBTOAR30ROW_AVX2 void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
"movdqa %3,%%xmm2 \n" // shuffler for RB
"movd %4,%%xmm3 \n" // multipler for RB
"movd %5,%%xmm4 \n" // mask for R10 B10
"movd %6,%%xmm5 \n" // mask for AG
"movd %7,%%xmm6 \n" // multipler for AG
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm6,%%xmm6 \n"
"sub %0,%1 \n"
"1: \n"
"movdqu (%0),%%xmm0 \n" // fetch 4 ABGR pixels
"movdqa %%xmm0,%%xmm1 \n"
"pshufb %%xmm2,%%xmm1 \n" // R0B0
"pand %%xmm5,%%xmm0 \n" // A0G0
"pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10
"pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10
"pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10
"pslld $10,%%xmm0 \n" // A2 x10 G10 x10
"por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10
"movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels
"add $0x10,%0 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "m"(kShuffleBR30), // %3 reversed shuffler
"m"(kMulRB10), // %4
"m"(kMaskRB10), // %5
"m"(kMaskAG10), // %6
"m"(kMulAG10) // %7
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#ifdef HAS_ARGBTOAR30ROW_AVX2
void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
asm volatile( asm volatile(
"vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB
...@@ -812,6 +854,43 @@ void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { ...@@ -812,6 +854,43 @@ void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
} }
#endif #endif
#ifdef HAS_ABGRTOAR30ROW_AVX2
void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB
"vbroadcastss %4,%%ymm3 \n" // multipler for RB
"vbroadcastss %5,%%ymm4 \n" // mask for R10 B10
"vbroadcastss %6,%%ymm5 \n" // mask for AG
"vbroadcastss %7,%%ymm6 \n" // multipler for AG
"sub %0,%1 \n"
"1: \n"
"vmovdqu (%0),%%ymm0 \n" // fetch 8 ABGR pixels
"vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0
"vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0
"vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10
"vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10
"vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10
"vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10
"vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10
"vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels
"add $0x20,%0 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "m"(kShuffleBR30), // %3 reversed shuffler
"m"(kMulRB10), // %4
"m"(kMaskRB10), // %5
"m"(kMaskAG10), // %6
"m"(kMulAG10) // %7
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 Y values. // Convert 16 ARGB pixels (64 bytes) to 16 Y values.
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
......
...@@ -41,6 +41,7 @@ namespace libyuv { ...@@ -41,6 +41,7 @@ namespace libyuv {
// Alias to copy pixels as is // Alias to copy pixels as is
#define AR30ToAR30 ARGBCopy #define AR30ToAR30 ARGBCopy
#define ABGRToABGR ARGBCopy
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
...@@ -1065,6 +1066,7 @@ TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0) ...@@ -1065,6 +1066,7 @@ TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0) TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0) TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4) TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4) TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
...@@ -1945,9 +1947,9 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4) ...@@ -1945,9 +1947,9 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
// Caveat: Destination needs to be 4 bytes // Caveat: Destination needs to be 4 bytes
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4) TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4) TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
TESTPLANETOE(AR30, 1, 4, ABGR, 1, 4, ARGB, 4)
// TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ABGR, 4)
TEST_F(LibYUVConvertTest, RotateWithARGBSource) { TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
// 2x2 frames // 2x2 frames
...@@ -2018,6 +2020,40 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { ...@@ -2018,6 +2020,40 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
} }
#endif // HAS_ARGBTOAR30ROW_AVX2 #endif // HAS_ARGBTOAR30ROW_AVX2
#ifdef HAS_ABGRTOAR30ROW_AVX2
TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
// ABGRToAR30Row_AVX2 expects a multiple of 8 pixels.
const int kPixels = (benchmark_width_ * benchmark_height_ + 7) & ~7;
align_buffer_page_end(src, kPixels * 4);
align_buffer_page_end(dst_opt, kPixels * 4);
align_buffer_page_end(dst_c, kPixels * 4);
MemRandomize(src, kPixels * 4);
memset(dst_opt, 0, kPixels * 4);
memset(dst_c, 1, kPixels * 4);
ABGRToAR30Row_C(src, dst_c, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
ABGRToAR30Row_AVX2(src, dst_opt, kPixels);
} else if (has_ssse3) {
ABGRToAR30Row_SSSE3(src, dst_opt, kPixels);
} else {
ABGRToAR30Row_C(src, dst_opt, kPixels);
}
}
for (int i = 0; i < kPixels * 4; ++i) {
EXPECT_EQ(dst_opt[i], dst_c[i]);
}
free_aligned_buffer_page_end(src);
free_aligned_buffer_page_end(dst_opt);
free_aligned_buffer_page_end(dst_c);
}
#endif // HAS_ABGRTOAR30ROW_AVX2
// TODO(fbarchard): Fix clamping issue affected by U channel. // TODO(fbarchard): Fix clamping issue affected by U channel.
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ #define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \ ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment