Commit a98d6cdb authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

ARGBToAR30 AVX2 conversion function

Bug: libyuv:751
Test: LibYUVConvertTest.ARGBToAR30_Opt
Change-Id: I09c13eb53ba5f1ce1740c013dc587f8300f1d9e0
Reviewed-on: https://chromium-review.googlesource.com/780437
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
parent 19a126dd
......@@ -55,6 +55,15 @@ int ARGBToRGBA(const uint8* src_argb,
int width,
int height);
// Convert ARGB To AR30.
LIBYUV_API
int ARGBToAR30(const uint8* src_argb,
int src_stride_argb,
uint8* dst_ar30,
int dst_stride_ar30,
int width,
int height);
// Convert ARGB To RGB24.
LIBYUV_API
int ARGBToRGB24(const uint8* src_argb,
......
......@@ -277,6 +277,7 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MULTIPLYROW_16_AVX2
#endif
......@@ -1791,6 +1792,7 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb,
void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToAR30Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
......@@ -1817,6 +1819,7 @@ void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
......@@ -2416,6 +2419,9 @@ void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb,
void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb,
uint8* dst_rgb,
int width);
void ARGBToAR30Row_Any_AVX2(const uint8* src_argb,
uint8* dst_rgb,
int width);
void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
......
......@@ -1308,6 +1308,47 @@ int ARGBToARGB4444(const uint8* src_argb,
return 0;
}
// Convert ARGB To AR30.
LIBYUV_API
int ARGBToAR30(const uint8* src_argb,
int src_stride_argb,
uint8* dst_ar30,
int dst_stride_ar30,
int width,
int height) {
int y;
void (*ARGBToAR30Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
ARGBToAR30Row_C;
if (!src_argb || !dst_ar30 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ar30 = 0;
}
#if defined(HAS_ARGBTOAR30ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAR30Row = ARGBToAR30Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAR30Row = ARGBToAR30Row_AVX2;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAR30Row(src_argb, dst_ar30, width);
src_argb += src_stride_argb;
dst_ar30 += dst_stride_ar30;
}
return 0;
}
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
int ARGBToJ420(const uint8* src_argb,
......
......@@ -396,6 +396,9 @@ ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7)
ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7)
#endif
#if defined(HAS_ARGBTOAR30ROW_AVX2)
ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
#endif
#if defined(HAS_J400TOARGBROW_SSE2)
ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
#endif
......
......@@ -301,6 +301,19 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
void ARGBToAR30Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint32 b0 = (src_argb[0] >> 6) | (src_argb[0] << 2);
uint32 g0 = (src_argb[1] >> 6) | (src_argb[1] << 2);
uint32 r0 = (src_argb[2] >> 6) | (src_argb[2] << 2);
uint32 a0 = (src_argb[3] >> 6);
*(uint32*)(dst_rgb) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
dst_rgb += 4;
src_argb += 4;
}
}
static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
}
......
......@@ -700,6 +700,57 @@ void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_RGB24TOARGBROW_SSSE3
#ifdef HAS_ARGBTOAR30ROW_AVX2
void ARGBToAR30Row_AVX2(const uint8* src, uint8* dst, int width) {
asm volatile (
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" // 0x000000ff mask
"vpsrld $0x18,%%ymm4,%%ymm4 \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // 0xc0000000 mask
"vpslld $30,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
// alpha
"vpand %%ymm5,%%ymm0,%%ymm3 \n"
// red
"vpsrld $0x10,%%ymm0,%%ymm1 \n"
"vpand %%ymm4,%%ymm1,%%ymm1 \n"
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
"vpslld $22,%%ymm1,%%ymm1 \n"
"vpslld $20,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
//green
"vpsrld $0x08,%%ymm0,%%ymm1 \n"
"vpand %%ymm4,%%ymm1,%%ymm1 \n"
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
"vpslld $12,%%ymm1,%%ymm1 \n"
"vpslld $10,%%ymm2,%%ymm2 \n"
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
//blue
"vpand %%ymm4,%%ymm0,%%ymm1 \n"
"vpsrld $0x6,%%ymm1,%%ymm2 \n"
"vpslld $2,%%ymm1,%%ymm1 \n"
"vpor %%ymm1,%%ymm3,%%ymm3 \n"
"vpor %%ymm2,%%ymm3,%%ymm3 \n"
"vmovdqu %%ymm3,(%1) \n"
"add $0x20,%0 \n"
"add $0x20,%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
:: "memory", "cc",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif
#ifdef HAS_ARGBTOYROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) {
......
......@@ -11,6 +11,8 @@
#include <stdlib.h>
#include <time.h>
#include "libyuv/row.h" /* For ARGBToAR30Row_AVX2 */
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
......@@ -1069,6 +1071,7 @@ TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB1555, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, ARGB4444, 2, 2, 1, 0)
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
TESTATOB(ARGB, 4, 4, 1, YUY2, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, UYVY, 2, 4, 1, 4)
TESTATOB(ARGB, 4, 4, 1, I400, 1, 1, 1, 2)
......@@ -1928,4 +1931,36 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
EXPECT_EQ(dst[3], src[1]);
}
#ifdef HAS_ARGBTOAR30ROW_AVX2
TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src, kPixels * 4);
align_buffer_page_end(dst_opt, kPixels * 4);
align_buffer_page_end(dst_c, kPixels * 4);
MemRandomize(src, kPixels * 4);
memset(dst_opt, 0, kPixels * 4);
memset(dst_c, 1, kPixels * 4);
ARGBToAR30Row_C(src, dst_c, kPixels);
int has_avx2 = TestCpuFlag(kCpuHasAVX2);
for (int i = 0; i < benchmark_iterations_; ++i) {
if (has_avx2) {
ARGBToAR30Row_AVX2(src, dst_opt, kPixels);
} else {
ARGBToAR30Row_C(src, dst_opt, kPixels);
}
}
for (int i = 0; i < kPixels * 4; ++i) {
EXPECT_EQ(dst_opt[i], dst_c[i]);
}
free_aligned_buffer_page_end(src);
free_aligned_buffer_page_end(dst_opt);
free_aligned_buffer_page_end(dst_c);
}
#endif // HAS_ARGBTOAR30ROW_AVX2
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment