Commit a5e93766 authored by Frank Barchard's avatar Frank Barchard

Add ARGBExtractAlpha_AVX2 function

Port SSE2 version to AVX2.
BUG=libyuv:572
TEST=/usr/local/google/home/fbarchard/intelsde/sde -skx -- out/Release/libyuv_unittest --gtest_filter=*Extract*
R=wangcheng@google.com, magjed@chromium.org

Review URL: https://codereview.chromium.org/2420553002 .
parent 9fb3c31b
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1625
Version: 1626
License: BSD
License File: LICENSE
......
......@@ -239,6 +239,15 @@ extern "C" {
#define HAS_BLENDPLANEROW_AVX2
#endif
// The following are available clang 3.4 or gcc 4.7.
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \
!(defined(__clang__) && defined(_M_IX86) )
#define HAS_ARGBEXTRACTALPHAROW_AVX2
#endif
// The following are available for AVX2 Visual C and clangcl 32 bit:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
......@@ -880,9 +889,12 @@ void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
int width);
void ARGBExtractAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_a,
int width);
void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
int width);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1625
#define LIBYUV_VERSION 1626
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -2702,6 +2702,12 @@ int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
: ARGBExtractAlphaRow_Any_SSE2;
}
#endif
#if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
: ARGBExtractAlphaRow_Any_AVX2;
}
#endif
#if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
......
......@@ -474,6 +474,9 @@ ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7)
#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2
ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 32)
#endif
#ifdef HAS_ARGBEXTRACTALPHAROW_NEON
ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15)
#endif
......
......@@ -2860,6 +2860,47 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
}
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
static const uvec8 kShuffleAlphaShort_AVX2 = {
3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u,
11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u
};
void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
asm volatile (
"vmovdqa %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
LABELALIGN
"1: \n"
"vmovdqu " MEMACCESS(0) ", %%ymm0 \n"
"vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n"
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
"vmovdqu " MEMACCESS2(0x40, 0) ", %%ymm2 \n"
"vmovdqu " MEMACCESS2(0x60, 0) ", %%ymm3 \n"
"lea " MEMLEA(0x80, 0) ", %0 \n"
"vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
"vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
"vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
"vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
"vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate.
"vmovdqu %%ymm0," MEMACCESS(1) " \n"
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x20, %2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_argb), // %0
"+r"(dst_a), // %1
"+rm"(width) // %2
: "m"(kPermdARGBToY_AVX), // %3
"m"(kShuffleAlphaShort_AVX2) // %4
: "memory", "cc"
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment