Commit e1bb5d94 authored by fbarchard@google.com's avatar fbarchard@google.com

AVX2 version of half rows.

BUG=none
TEST=out\release\libyuv_unittest.exe --gtest_filter=*I422ToI420*
Review URL: https://webrtc-codereview.appspot.com/1157004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@590 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 9246ed04
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 589 Version: 590
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -137,6 +137,7 @@ extern "C" { ...@@ -137,6 +137,7 @@ extern "C" {
#define HAS_UYVYTOUV422ROW_AVX2 #define HAS_UYVYTOUV422ROW_AVX2
#define HAS_UYVYTOUVROW_AVX2 #define HAS_UYVYTOUVROW_AVX2
#define HAS_UYVYTOYROW_AVX2 #define HAS_UYVYTOYROW_AVX2
#define HAS_HALFROW_AVX2
// Effects // Effects
#define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBATTENUATEROW_AVX2
...@@ -1269,6 +1270,8 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride, ...@@ -1269,6 +1270,8 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix); uint8* dst_uv, int pix);
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix); uint8* dst_uv, int pix);
void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix); uint8* dst_uv, int pix);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 589 #define LIBYUV_VERSION 590
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -97,7 +97,15 @@ int I422ToI420(const uint8* src_y, int src_stride_y, ...@@ -97,7 +97,15 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2; HalfRow = HalfRow_SSE2;
} }
#elif defined(HAS_HALFROW_NEON) #endif
#if defined(HAS_HALFROW_AVX2)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(halfwidth, 32)) {
clear = true;
HalfRow = HalfRow_AVX2;
}
#endif
#if defined(HAS_HALFROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
HalfRow = HalfRow_NEON; HalfRow = HalfRow_NEON;
} }
...@@ -128,6 +136,11 @@ int I422ToI420(const uint8* src_y, int src_stride_y, ...@@ -128,6 +136,11 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
if (height & 1) { if (height & 1) {
HalfRow(src_v, 0, dst_v, halfwidth); HalfRow(src_v, 0, dst_v, halfwidth);
} }
#if defined(HAS_HALFROW_AVX2)
if (clear) {
__asm vzeroupper;
}
#endif
return 0; return 0;
} }
......
...@@ -5531,6 +5531,32 @@ void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, ...@@ -5531,6 +5531,32 @@ void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
} }
} }
#ifdef HAS_HALFROW_AVX2
__declspec(naked) __declspec(align(16))
void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
align 16
convertloop:
vmovdqu ymm0, [eax]
vpavgb ymm0, ymm0, [eax + edx]
sub ecx, 32
vmovdqu [eax + edi], ymm0
lea eax, [eax + 32]
jg convertloop
pop edi
ret
}
}
#endif // HAS_HALFROW_AVX2
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToBayerRow_SSSE3(const uint8* src_argb, void ARGBToBayerRow_SSSE3(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix) { uint8* dst_bayer, uint32 selector, int pix) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment