Commit 8f0caded authored by Frank Barchard's avatar Frank Barchard

port ARGB to 565 dithering AVX2 code to GCC.

Previously the assembly code was only available to Windows.
This CL ports the AVX2 code to GCC syntax.

TBR=harryjin@google.com
BUG=libyuv:492

Review URL: https://codereview.chromium.org/1391273003 .
parent cc89e3a7
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1503
Version: 1504
License: BSD
License File: LICENSE
......
......@@ -182,36 +182,6 @@ extern "C" {
#define HAS_SOBELYROW_SSE2
#endif
// The following are also available on x64 Visual C.
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
(!defined(__clang__) || defined(__SSSE3__))
#define HAS_I422ALPHATOABGRROW_SSSE3
#define HAS_I422ALPHATOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#endif
// The following are available for AVX2 Visual C and clangcl 32 bit:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_ARGB1555TOARGBROW_AVX2
#define HAS_ARGB4444TOARGBROW_AVX2
#define HAS_ARGBTOARGB1555ROW_AVX2
#define HAS_ARGBTOARGB4444ROW_AVX2
#define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_ARGBTORGB565ROW_AVX2
#define HAS_I411TOARGBROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I444TOABGRROW_AVX2
#define HAS_I444TOARGBROW_AVX2
#define HAS_J400TOARGBROW_AVX2
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2
#endif
// The following are available on all x86 platforms, but
// require VS2012, clang 3.4 or gcc 4.7.
// The code supports NaCL but requires a new compiler and validator.
......@@ -222,6 +192,7 @@ extern "C" {
#define HAS_ARGBMIRRORROW_AVX2
#define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBTORGB565DITHERROW_AVX2
#define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2
......@@ -262,6 +233,35 @@ extern "C" {
#define HAS_ARGBUNATTENUATEROW_AVX2
#endif
// The following are available for AVX2 Visual C and clangcl 32 bit:
// TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
#define HAS_ARGB1555TOARGBROW_AVX2
#define HAS_ARGB4444TOARGBROW_AVX2
#define HAS_ARGBTOARGB1555ROW_AVX2
#define HAS_ARGBTOARGB4444ROW_AVX2
#define HAS_ARGBTORGB565ROW_AVX2
#define HAS_I411TOARGBROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2
#define HAS_I422TORGB565ROW_AVX2
#define HAS_I444TOABGRROW_AVX2
#define HAS_I444TOARGBROW_AVX2
#define HAS_J400TOARGBROW_AVX2
#define HAS_NV12TORGB565ROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2
#endif
// The following are also available on x64 Visual C.
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
(!defined(__clang__) || defined(__SSSE3__))
#define HAS_I422ALPHATOABGRROW_SSSE3
#define HAS_I422ALPHATOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1503
#define LIBYUV_VERSION 1504
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -566,12 +566,58 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src, uint8* dst,
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
: "m"(dither4) // %3
: "rm"(dither4) // %3
: "memory", "cc",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
void ARGBToRGB565DitherRow_AVX2(const uint8* src, uint8* dst,
const uint32 dither4, int pix) {
asm volatile (
"vbroadcastss %3,%%xmm6 \n"
"vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n"
"vpermq $0xd8,%%ymm6,%%ymm6 \n"
"vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n"
"vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n"
"vpsrld $0x1b,%%ymm3,%%ymm3 \n"
"vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
"vpsrld $0x1a,%%ymm4,%%ymm4 \n"
"vpslld $0x5,%%ymm4,%%ymm4 \n"
"vpslld $0xb,%%ymm3,%%ymm5 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vpaddusb %%ymm6,%%ymm0,%%ymm0 \n"
"vpsrld $0x5,%%ymm0,%%ymm2 \n"
"vpsrld $0x3,%%ymm0,%%ymm1 \n"
"vpsrld $0x8,%%ymm0,%%ymm0 \n"
"vpand %%ymm4,%%ymm2,%%ymm2 \n"
"vpand %%ymm3,%%ymm1,%%ymm1 \n"
"vpand %%ymm5,%%ymm0,%%ymm0 \n"
"vpor %%ymm2,%%ymm1,%%ymm1 \n"
"vpor %%ymm1,%%ymm0,%%ymm0 \n"
"vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"lea 0x20(%0),%0 \n"
"vmovdqu %%xmm0,(%1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(pix) // %2
: "rm"(dither4) // %3
: "memory", "cc",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
);
}
#endif // HAS_ARGBTORGB565DITHERROW_AVX2
void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
asm volatile (
"pcmpeqb %%xmm4,%%xmm4 \n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment