Commit b99bcab7 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBShuffle_AVX2 for speed up end swapping for Chrome/Java.

BUG=271
TESTED=ARGBShuffle unittest
R=mflodman@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/2320005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@804 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 446f91d0
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 803 Version: 804
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -150,11 +150,14 @@ extern "C" { ...@@ -150,11 +150,14 @@ extern "C" {
#define GCC_HAS_AVX2 1 #define GCC_HAS_AVX2 1
#endif // GNUC >= 4.7 #endif // GNUC >= 4.7
#endif // __GNUC__ #endif // __GNUC__
// TODO(fbarchard): Test with new NaCL tool chain. Change __native_client__AVX2
// to __native_client__ to test.
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
((defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700) || \ ((defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700) || \
defined(__native_client__) || defined(__clang__) || defined(GCC_HAS_AVX2)) defined(__native_client__AVX2) || defined(__clang__) || defined(GCC_HAS_AVX2))
// Effects: // Effects:
#define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2
#define HAS_ARGBSHUFFLEROW_AVX2
#endif #endif
// The following are Windows only: // The following are Windows only:
...@@ -166,7 +169,6 @@ extern "C" { ...@@ -166,7 +169,6 @@ extern "C" {
// Caveat: Visual C 2012 required for AVX2. // Caveat: Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700 #if _MSC_VER >= 1700
#define HAS_ARGBSHUFFLEROW_AVX2
#define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOUVROW_AVX2
#define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYJROW_AVX2
#define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYROW_AVX2
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 803 #define LIBYUV_VERSION 804
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -5726,6 +5726,38 @@ void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, ...@@ -5726,6 +5726,38 @@ void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
} }
#endif // HAS_ARGBSHUFFLEROW_SSSE3 #endif // HAS_ARGBSHUFFLEROW_SSSE3
#ifdef HAS_ARGBSHUFFLEROW_AVX2
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix) {
asm volatile (
"vmovdqa "MEMACCESS(3)",%%xmm5 \n"
"vpermq $0x44,%%ymm5,%%ymm5 \n"
".p2align 4 \n"
"1: \n"
"vmovdqu "MEMACCESS(0)",%%ymm0 \n"
"vmovdqu "MEMACCESS2(0x20,0)",%%ymm1 \n"
"lea "MEMLEA(0x40,0)",%0 \n"
"vpshufb %%ymm5,%%ymm0,%%ymm0 \n"
"vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
"sub $0x10,%2 \n"
"vmovdqu %%ymm0,"MEMACCESS(1)" \n"
"vmovdqu %%ymm1,"MEMACCESS2(0x20,1)" \n"
"lea "MEMLEA(0x40,1)",%1 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
: "r"(shuffler) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm5"
#endif
);
}
#endif // HAS_ARGBSHUFFLEROW_AVX2
#ifdef HAS_I422TOYUY2ROW_SSE2 #ifdef HAS_I422TOYUY2ROW_SSE2
void I422ToYUY2Row_SSE2(const uint8* src_y, void I422ToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment