Commit 551d2b29 authored by fbarchard@google.com's avatar fbarchard@google.com

AVX2 version of ARGBToI420

BUG=181
TEST=unittest
Review URL: https://webrtc-codereview.appspot.com/1090005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@566 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 8e26eada
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 565 Version: 566
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -123,6 +123,12 @@ extern "C" { ...@@ -123,6 +123,12 @@ extern "C" {
// TODO(fbarchard): Port to gcc. // TODO(fbarchard): Port to gcc.
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOLORTABLEROW_X86
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
// TODO(fbarchard): Hook these up to all functions. e.g. format conversion.
#define HAS_ARGBTOYROW_AVX2
#define HAS_ARGBTOUVROW_AVX2
#endif
#endif #endif
// The following are Yasm x86 only. // The following are Yasm x86 only.
...@@ -258,6 +264,13 @@ typedef __declspec(align(16)) int16 vec16[8]; ...@@ -258,6 +264,13 @@ typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) uint16 uvec16[8]; typedef __declspec(align(16)) uint16 uvec16[8];
typedef __declspec(align(16)) int32 vec32[4]; typedef __declspec(align(16)) int32 vec32[4];
typedef __declspec(align(16)) uint32 uvec32[4]; typedef __declspec(align(16)) uint32 uvec32[4];
typedef __declspec(align(32)) int8 lvec8[32];
typedef __declspec(align(32)) uint8 ulvec8[32];
typedef __declspec(align(32)) int16 lvec16[16];
typedef __declspec(align(32)) uint16 ulvec16[16];
typedef __declspec(align(32)) int32 lvec32[8];
typedef __declspec(align(32)) uint32 ulvec32[8];
#elif defined(__GNUC__) #elif defined(__GNUC__)
#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
typedef int8 __attribute__((vector_size(16))) vec8; typedef int8 __attribute__((vector_size(16))) vec8;
...@@ -360,6 +373,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -360,6 +373,9 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb, uint8* dst_argb,
int width); int width);
void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToYRow_Unaligned_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
...@@ -430,6 +446,12 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); ...@@ -430,6 +446,12 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_Unaligned_AVX2(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 565 #define LIBYUV_VERSION 566
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -739,7 +739,26 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, ...@@ -739,7 +739,26 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
} }
} }
} }
#elif defined(HAS_ARGBTOYROW_NEON) #endif
#if defined(HAS_ARGBTOYROW_AVX2)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
clear = true;
ARGBToUVRow = ARGBToUVRow_Any_AVX2;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_AVX2;
ARGBToYRow = ARGBToYRow_Unaligned_AVX2;
if (IS_ALIGNED(src_argb, 32) && IS_ALIGNED(src_stride_argb, 32)) {
ARGBToUVRow = ARGBToUVRow_AVX2;
if (IS_ALIGNED(dst_y, 32) && IS_ALIGNED(dst_stride_y, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
...@@ -767,6 +786,12 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb, ...@@ -767,6 +786,12 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
} }
#if defined(HAS_ARGBTOYROW_AVX2)
if (clear) {
__asm vzeroupper;
}
#endif
return 0; return 0;
} }
......
...@@ -195,6 +195,9 @@ BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C, ...@@ -195,6 +195,9 @@ BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
dst_y + (width - NUM) * BPP, NUM); \ dst_y + (width - NUM) * BPP, NUM); \
} }
#ifdef HAS_ARGBTOYROW_AVX2
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_Unaligned_AVX2, 4, 1, 32)
#endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
...@@ -251,37 +254,40 @@ YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, ...@@ -251,37 +254,40 @@ YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
#endif #endif
// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C. // RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP) \ #define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \
void NAMEANY(const uint8* src_argb, int src_stride_argb, \ void NAMEANY(const uint8* src_argb, int src_stride_argb, \
uint8* dst_u, uint8* dst_v, int width) { \ uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~15; \ int n = width & ~MASK; \
ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \ ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \
ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \ ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \
dst_u + (n >> 1), \ dst_u + (n >> 1), \
dst_v + (n >> 1), \ dst_v + (n >> 1), \
width & 15); \ width & MASK); \
} }
#ifdef HAS_ARGBTOYROW_AVX2
UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_Unaligned_AVX2, ARGBToUVRow_C, 4, 31)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_SSSE3
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4) UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4) UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4) UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4) UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2) UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2) UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
#endif #endif
#ifdef HAS_ARGBTOUVROW_NEON #ifdef HAS_ARGBTOUVROW_NEON
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4) UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4) UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4) UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4) UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3) UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3) UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2) UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2) UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2) UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2) UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2) UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
#endif #endif
#undef UVANY #undef UVANY
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment