Commit 51398e0b authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBMirror AVX2

BUG=none
TEST=out\release\libyuv_unittest --gtest_filter=*ARGBMirror*
Review URL: https://webrtc-codereview.appspot.com/1159005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@594 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2610c069
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 590 Version: 594
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -139,6 +139,7 @@ extern "C" { ...@@ -139,6 +139,7 @@ extern "C" {
#define HAS_UYVYTOYROW_AVX2 #define HAS_UYVYTOYROW_AVX2
#define HAS_HALFROW_AVX2 #define HAS_HALFROW_AVX2
#define HAS_MIRRORROW_AVX2 #define HAS_MIRRORROW_AVX2
#define HAS_ARGBMIRRORROW_AVX2
// Effects // Effects
#define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBATTENUATEROW_AVX2
...@@ -574,6 +575,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -574,6 +575,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width); int width);
void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 590 #define LIBYUV_VERSION 594
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -50,13 +50,11 @@ int I420ToI422(const uint8* src_y, int src_stride_y, ...@@ -50,13 +50,11 @@ int I420ToI422(const uint8* src_y, int src_stride_y,
} }
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_COPYROW_NEON) #if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 32)) {
CopyRow = CopyRow_NEON;
}
#elif defined(HAS_COPYROW_X86)
if (IS_ALIGNED(halfwidth, 4)) { if (IS_ALIGNED(halfwidth, 4)) {
CopyRow = CopyRow_X86; CopyRow = CopyRow_X86;
}
#endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 32) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 32) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
...@@ -66,6 +64,14 @@ int I420ToI422(const uint8* src_y, int src_stride_y, ...@@ -66,6 +64,14 @@ int I420ToI422(const uint8* src_y, int src_stride_y,
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
#if defined(HAS_COPYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
CopyRow = CopyRow_AVX2;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 32)) {
CopyRow = CopyRow_NEON;
} }
#endif #endif
#if defined(HAS_COPYROW_MIPS) #if defined(HAS_COPYROW_MIPS)
......
...@@ -448,7 +448,15 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, ...@@ -448,7 +448,15 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3; ARGBMirrorRow = ARGBMirrorRow_SSSE3;
} }
#elif defined(HAS_ARGBMIRRORROW_NEON) #endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
clear = true;
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
#endif
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON; ARGBMirrorRow = ARGBMirrorRow_NEON;
} }
...@@ -460,6 +468,12 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb, ...@@ -460,6 +468,12 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb,
src_argb += src_stride_argb; src_argb += src_stride_argb;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
} }
#if defined(HAS_ARGBMIRRORROW_AVX2)
if (clear) {
__asm vzeroupper;
}
#endif
return 0; return 0;
} }
......
...@@ -99,7 +99,15 @@ void ARGBRotate180(const uint8* src, int src_stride, ...@@ -99,7 +99,15 @@ void ARGBRotate180(const uint8* src, int src_stride,
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3; ARGBMirrorRow = ARGBMirrorRow_SSSE3;
} }
#elif defined(HAS_ARGBMIRRORROW_NEON) #endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
clear = true;
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
#endif
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON; ARGBMirrorRow = ARGBMirrorRow_NEON;
} }
...@@ -151,6 +159,11 @@ void ARGBRotate180(const uint8* src, int src_stride, ...@@ -151,6 +159,11 @@ void ARGBRotate180(const uint8* src, int src_stride,
src_bot -= src_stride; src_bot -= src_stride;
dst_bot -= dst_stride; dst_bot -= dst_stride;
} }
#if defined(HAS_ARGBMIRRORROW_AVX2)
if (clear) {
__asm vzeroupper;
}
#endif
} }
LIBYUV_API LIBYUV_API
......
...@@ -3056,6 +3056,33 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -3056,6 +3056,33 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
} }
#endif // HAS_ARGBMIRRORROW_SSSE3 #endif // HAS_ARGBMIRRORROW_SSSE3
#ifdef HAS_ARGBMIRRORROW_AVX2
// Shuffle table for reversing the bytes.
static const ulvec32 kARGBShuffleMirror_AVX2 = {
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
};
__declspec(naked) __declspec(align(16))
void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
__asm {
mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst
mov ecx, [esp + 12] // width
lea eax, [eax - 32]
vmovdqa ymm5, kARGBShuffleMirror_AVX2
align 16
convertloop:
vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order
sub ecx, 8
vmovdqu [edx], ymm0
lea edx, [edx + 32]
jg convertloop
ret
}
}
#endif // HAS_ARGBMIRRORROW_AVX2
#ifdef HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_SSE2
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment