Commit c0d9c346 authored by fbarchard@google.com's avatar fbarchard@google.com

Attenuate and Unattenuate Any variations for sse2, sss3 and avx2

BUG=190
TESTED=out\release\libyuv_unittest --gtest_filter=*Unatt*
Review URL: https://webrtc-codereview.appspot.com/1121005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@579 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 787f8276
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 578 Version: 579
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -1318,6 +1318,8 @@ void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -1318,6 +1318,8 @@ void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
int width); int width);
void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width); int width);
void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
int width);
void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
int width); int width);
...@@ -1326,6 +1328,10 @@ extern uint32 fixed_invtbl8[256]; ...@@ -1326,6 +1328,10 @@ extern uint32 fixed_invtbl8[256];
void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
int width);
void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
int width);
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 578 #define LIBYUV_VERSION 579
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1034,9 +1034,12 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, ...@@ -1034,9 +1034,12 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
#endif #endif
#if defined(HAS_ARGBATTENUATEROW_AVX2) #if defined(HAS_ARGBATTENUATEROW_AVX2)
bool clear = false; bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
bool clear = true; clear = true;
ARGBAttenuateRow = ARGBAttenuateRow_AVX2; ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
}
} }
#endif #endif
#if defined(HAS_ARGBATTENUATEROW_NEON) #if defined(HAS_ARGBATTENUATEROW_NEON)
...@@ -1077,19 +1080,25 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, ...@@ -1077,19 +1080,25 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
int width) = ARGBUnattenuateRow_C; int width) = ARGBUnattenuateRow_C;
#if defined(HAS_ARGBUNATTENUATEROW_SSE2) #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
}
} }
#endif #endif
#if defined(HAS_ARGBUNATTENUATEROW_AVX2) #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
bool clear = false; bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
bool clear = true; clear = true;
ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
}
} }
#endif #endif
// TODO(fbarchard): Neon version. // TODO(fbarchard): Neon version.
...@@ -1099,6 +1108,13 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, ...@@ -1099,6 +1108,13 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
src_argb += src_stride_argb; src_argb += src_stride_argb;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
} }
#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
if (clear) {
__asm vzeroupper;
}
#endif
return 0; return 0;
} }
......
...@@ -250,6 +250,18 @@ YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C, ...@@ -250,6 +250,18 @@ YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C, YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
4, 4, 3) 4, 4, 3)
#endif #endif
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
4, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_AVX2
YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
4, 4, 7)
#endif
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
4, 4, 7)
#endif
#ifdef HAS_ARGBATTENUATEROW_NEON #ifdef HAS_ARGBATTENUATEROW_NEON
YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
4, 4, 7) 4, 4, 7)
......
...@@ -59,9 +59,9 @@ TEST_F(libyuvTest, TestAttenuate) { ...@@ -59,9 +59,9 @@ TEST_F(libyuvTest, TestAttenuate) {
EXPECT_EQ(255u, unatten_pixels[0][1]); EXPECT_EQ(255u, unatten_pixels[0][1]);
EXPECT_EQ(254u, unatten_pixels[0][2]); EXPECT_EQ(254u, unatten_pixels[0][2]);
EXPECT_EQ(128u, unatten_pixels[0][3]); EXPECT_EQ(128u, unatten_pixels[0][3]);
EXPECT_EQ(16u, unatten_pixels[1][0]); EXPECT_EQ(0u, unatten_pixels[1][0]);
EXPECT_EQ(64u, unatten_pixels[1][1]); EXPECT_EQ(0u, unatten_pixels[1][1]);
EXPECT_EQ(192u, unatten_pixels[1][2]); EXPECT_EQ(0u, unatten_pixels[1][2]);
EXPECT_EQ(0u, unatten_pixels[1][3]); EXPECT_EQ(0u, unatten_pixels[1][3]);
EXPECT_EQ(16u, unatten_pixels[2][0]); EXPECT_EQ(16u, unatten_pixels[2][0]);
EXPECT_EQ(64u, unatten_pixels[2][1]); EXPECT_EQ(64u, unatten_pixels[2][1]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment