Commit 9487b9d6 authored by Frank Barchard's avatar Frank Barchard

any allow for avx2 32 pixels at a time of argb

R=harryjin@google.com
BUG=libyuv:461

Review URL: https://webrtc-codereview.appspot.com/54779004.
parent f4705d56
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1442
Version: 1443
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1442
#define LIBYUV_VERSION 1443
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -224,16 +224,16 @@ ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 2]); \
memset(temp, 0, 64); /* for YUY2 and msan */ \
SIMD_ALIGNED(uint8 temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(temp, temp + 64, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
ANY_SIMD(temp, temp + 128, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
#ifdef HAS_COPYROW_AVX
......@@ -593,28 +593,29 @@ ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15)
#undef ANY12
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels.
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, int src_stride_ptr, \
uint8* dst_u, uint8* dst_v, int width) { \
SIMD_ALIGNED(uint8 temp[64 * 4]); \
memset(temp, 0, 64 * 2); /* for msan */ \
SIMD_ALIGNED(uint8 temp[128 * 4]); \
memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \
} \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(temp + 64, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \
SS(r, UVSHIFT) * BPP); \
if ((width & 1) && BPP == 4) { /* repeat last 4 bytes for subsampler */ \
memcpy(temp + SS(r, UVSHIFT) * BPP, \
temp + SS(r, UVSHIFT) * BPP - BPP, 4); \
memcpy(temp + 64 + SS(r, UVSHIFT) * BPP, \
temp + 64 + SS(r, UVSHIFT) * BPP - BPP, 4); \
memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \
temp + 128 + SS(r, UVSHIFT) * BPP - BPP, 4); \
} \
ANY_SIMD(temp, 64, temp + 128, temp + 192, MASK + 1); \
memcpy(dst_u + (n >> 1), temp + 128, SS(r, 1)); \
memcpy(dst_v + (n >> 1), temp + 192, SS(r, 1)); \
ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \
memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \
memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \
}
#ifdef HAS_ARGBTOUVROW_AVX2
......
......@@ -34,7 +34,7 @@ namespace libyuv {
#define ERROR_FULL 5
#endif
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF, CDIFF) \
#define TESTCS(TESTNAME, YUVTOARGB, ARGBTOYUV, HS1, HS, HN, DIFF) \
TEST_F(libyuvTest, TESTNAME) { \
const int kPixels = benchmark_width_ * benchmark_height_; \
const int kHalfPixels = ((benchmark_width_ + 1) / 2) * \
......@@ -93,7 +93,7 @@ TEST_F(libyuvTest, TESTNAME) { \
temp_v, (benchmark_width_ + 1) / 2, \
benchmark_width_, benchmark_height_); \
\
MaskCpuFlags(disable_cpu_flags_); \
MaskCpuFlags(disable_cpu_flags_); \
YUVTOARGB(temp_y, benchmark_width_, \
temp_u, (benchmark_width_ + 1) / 2, \
temp_v, (benchmark_width_ + 1) / 2, \
......@@ -110,7 +110,7 @@ TEST_F(libyuvTest, TESTNAME) { \
} \
/* Test C and SIMD match. */ \
for (int i = 0; i < kPixels * 4; ++i) { \
EXPECT_NEAR(dst_pixels_c[i], dst_pixels_opt[i], CDIFF); \
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
} \
/* Test SIMD is close to original. */ \
for (int i = 0; i < kPixels * 4; ++i) { \
......@@ -129,10 +129,10 @@ TEST_F(libyuvTest, TESTNAME) { \
free_aligned_buffer_64(dst_pixels_c); \
} \
TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL, 0)
TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL, 0)
TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, 3, 0)
TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, 3, 0)
TESTCS(TestI420, I420ToARGB, ARGBToI420, 1, 2, benchmark_width_, ERROR_FULL)
TESTCS(TestI422, I422ToARGB, ARGBToI422, 0, 1, 0, ERROR_FULL)
TESTCS(TestJ420, J420ToARGB, ARGBToJ420, 1, 2, benchmark_width_, 3)
TESTCS(TestJ422, J422ToARGB, ARGBToJ422, 0, 1, 0, 3)
static void YUVToRGB(int y, int u, int v, int* r, int* g, int* b) {
const int kWidth = 16;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment