Commit cc33dc68 authored by Frank Barchard's avatar Frank Barchard

Port I411ToARGBRow to AVX2.

An SSSE3 version already exists, and an AVX2 version is available for
Visual C.  This ports the function to AVX2 completing the AVX2 ports of
all YUV to RGB functions for AVX2 on gcc.

TBR=harryjin@google.com
BUG=libyuv:555

Review URL: https://codereview.chromium.org/1687253002 .
parent 0e554b18
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1575 Version: 1576
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -206,6 +206,7 @@ extern "C" { ...@@ -206,6 +206,7 @@ extern "C" {
// https://code.google.com/p/libyuv/issues/detail?id=517 // https://code.google.com/p/libyuv/issues/detail?id=517
#define HAS_I422ALPHATOARGBROW_AVX2 #define HAS_I422ALPHATOARGBROW_AVX2
#endif #endif
#define HAS_I411TOARGBROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TOARGBROW_AVX2 #define HAS_I422TOARGBROW_AVX2
...@@ -248,7 +249,6 @@ extern "C" { ...@@ -248,7 +249,6 @@ extern "C" {
#define HAS_ARGBTOARGB1555ROW_AVX2 #define HAS_ARGBTOARGB1555ROW_AVX2
#define HAS_ARGBTOARGB4444ROW_AVX2 #define HAS_ARGBTOARGB4444ROW_AVX2
#define HAS_ARGBTORGB565ROW_AVX2 #define HAS_ARGBTORGB565ROW_AVX2
#define HAS_I411TOARGBROW_AVX2
#define HAS_J400TOARGBROW_AVX2 #define HAS_J400TOARGBROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2 #define HAS_RGB565TOARGBROW_AVX2
#endif #endif
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1575 #define LIBYUV_VERSION 1576
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, ...@@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
#if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
// Read 8 UV from 411 // Read 8 UV from 444
#define READYUV444 \ #define READYUV444 \
"movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
...@@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, ...@@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
"vpermq $0xd8,%%ymm5,%%ymm5 \n" \ "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
"lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
// Read 4 UV from 411, upsample to 16 UV.
#define READYUV411_AVX2 \
"vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
"lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
"vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
"vpermq $0xd8,%%ymm0,%%ymm0 \n" \
"vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
"vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
"vpermq $0xd8,%%ymm4,%%ymm4 \n" \
"vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
"lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
// Read 8 UV from NV12, upsample to 16 UV. // Read 8 UV from NV12, upsample to 16 UV.
#define READNV12_AVX2 \ #define READNV12_AVX2 \
"vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
...@@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV444_AVX2 READYUV444_AVX2
...@@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
} }
#endif // HAS_I444TOARGBROW_AVX2 #endif // HAS_I444TOARGBROW_AVX2
#ifdef HAS_I411TOARGBROW_AVX2
// 16 pixels
// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV411_AVX2
YUVTORGB_AVX2(yuvconstants)
STOREARGB_AVX2
"sub $0x10,%[width] \n"
"jg 1b \n"
"vzeroupper \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
: [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_I411TOARGBROW_AVX2
#if defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2)
// 16 pixels // 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
...@@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, ...@@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
asm volatile ( asm volatile (
YUVTORGB_SETUP_AVX2(yuvconstants) YUVTORGB_SETUP_AVX2(yuvconstants)
"sub %[u_buf],%[v_buf] \n" "sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
READYUV422_AVX2 READYUV422_AVX2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment