Commit 5eefbe23 authored by Frank Barchard's avatar Frank Barchard

Fix for drmemory failure on I411ToARGB

Before
I420ToARGB_Opt (594 ms)
I422ToARGB_Opt (483 ms)
I411ToARGB_Opt (748 ms) ***
I444ToARGB_Opt (452 ms)
I400ToARGB_Opt (218 ms)

After
I420ToARGB_Opt (591 ms)
I422ToARGB_Opt (454 ms)
I411ToARGB_Opt (502 ms)  ***
I444ToARGB_Opt (441 ms)
I400ToARGB_Opt (216 ms)

TBR=harryjin@google.com
BUG=libyuv:525

Review URL: https://codereview.chromium.org/1459513002 .
parent ec4b258d
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1539 Version: 1540
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1539 #define LIBYUV_VERSION 1540
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -2440,9 +2440,14 @@ void I422ToRGBARow_AVX2(const uint8* y_buf, ...@@ -2440,9 +2440,14 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
} }
// Read 2 UV from 411, upsample to 8 UV. // Read 2 UV from 411, upsample to 8 UV.
#define READYUV411 __asm { \ // drmemory fails with memory fault if pinsrw used. libyuv bug: 525
__asm pinsrw xmm0, [esi], 0 /* U */ \ // __asm pinsrw xmm0, [esi], 0 /* U */
__asm pinsrw xmm1, [esi + edi], 0 /* V */ \ // __asm pinsrw xmm1, [esi + edi], 0 /* V */
#define READYUV411_EBX __asm { \
__asm movzx ebx, word ptr [esi] /* U */ \
__asm movd xmm0, ebx \
__asm movzx ebx, word ptr [esi + edi] /* V */ \
__asm movd xmm1, ebx \
__asm lea esi, [esi + 2] \ __asm lea esi, [esi + 2] \
__asm punpcklbw xmm0, xmm1 /* UV */ \ __asm punpcklbw xmm0, xmm1 /* UV */ \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
...@@ -2816,23 +2821,25 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -2816,23 +2821,25 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
push esi push esi
push edi push edi
push ebx push ebx
mov eax, [esp + 12 + 4] // Y push ebp
mov esi, [esp + 12 + 8] // U mov eax, [esp + 16 + 4] // Y
mov edi, [esp + 12 + 12] // V mov esi, [esp + 16 + 8] // U
mov edx, [esp + 12 + 16] // abgr mov edi, [esp + 16 + 12] // V
mov ebx, [esp + 12 + 20] // yuvconstants mov edx, [esp + 16 + 16] // abgr
mov ecx, [esp + 12 + 24] // width mov ebp, [esp + 16 + 20] // yuvconstants
mov ecx, [esp + 16 + 24] // width
sub edi, esi sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop: convertloop:
READYUV411 READYUV411_EBX
YUVTORGB(ebx) YUVTORGB(ebp)
STOREARGB STOREARGB
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
pop ebp
pop ebx pop ebx
pop edi pop edi
pop esi pop esi
......
...@@ -415,8 +415,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -415,8 +415,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kSizeUV = \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_64(src_y, kWidth * kHeight + OFF); \ align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, kSizeUV + OFF); \ align_buffer_64(src_u, kSizeUV + OFF); \
align_buffer_64(src_v, kSizeUV + OFF); \ align_buffer_64(src_v, kSizeUV + OFF); \
...@@ -433,15 +433,15 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -433,15 +433,15 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
dst_argb_c + OFF, kStrideB, \ dst_argb_c + OFF, kStrideB, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
dst_argb_opt + OFF, kStrideB, \ dst_argb_opt + OFF, kStrideB, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
} \ } \
...@@ -524,8 +524,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -524,8 +524,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kSizeUV = \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_64(src_y, kWidth * kHeight + OFF); \ align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, kSizeUV + OFF); \ align_buffer_64(src_u, kSizeUV + OFF); \
align_buffer_64(src_v, kSizeUV + OFF); \ align_buffer_64(src_v, kSizeUV + OFF); \
...@@ -544,16 +544,16 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -544,16 +544,16 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
src_a + OFF, kWidth, \ src_a + OFF, kWidth, \
dst_argb_c + OFF, kStrideB, \ dst_argb_c + OFF, kStrideB, \
kWidth, NEG kHeight, ATTEN); \ kWidth, NEG kHeight, ATTEN); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
src_a + OFF, kWidth, \ src_a + OFF, kWidth, \
dst_argb_opt + OFF, kStrideB, \ dst_argb_opt + OFF, kStrideB, \
kWidth, NEG kHeight, ATTEN); \ kWidth, NEG kHeight, ATTEN); \
...@@ -598,32 +598,31 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ ...@@ -598,32 +598,31 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \ const int kStrideB = kWidth * BPP_B; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
align_buffer_64(src_y, kWidth * kHeight + OFF); \ align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_uv, \ align_buffer_64(src_uv, \
SUBSAMPLE(kWidth, SUBSAMP_X) * \ kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
align_buffer_64(dst_argb_c, kStrideB * kHeight); \ align_buffer_64(dst_argb_c, kStrideB * kHeight); \
align_buffer_64(dst_argb_opt, kStrideB * kHeight); \ align_buffer_64(dst_argb_opt, kStrideB * kHeight); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ for (int j = 0; j < kStrideUV * 2; ++j) { \
src_uv[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j + OFF] = \ src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \
(fastrand() & 0xff); \
} \ } \
} \ } \
memset(dst_argb_c, 1, kStrideB * kHeight); \ memset(dst_argb_c, 1, kStrideB * kHeight); \
memset(dst_argb_opt, 101, kStrideB * kHeight); \ memset(dst_argb_opt, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ src_uv + OFF, kStrideUV * 2, \
dst_argb_c, kWidth * BPP_B, \ dst_argb_c, kWidth * BPP_B, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_uv + OFF, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ src_uv + OFF, kStrideUV * 2, \
dst_argb_opt, kWidth * BPP_B, \ dst_argb_opt, kWidth * BPP_B, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
} \ } \
...@@ -677,48 +676,49 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9) ...@@ -677,48 +676,49 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kStride = \ const int kStride = \
(SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \
align_buffer_64(src_argb, kStride * kHeight + OFF); \ align_buffer_64(src_argb, kStride * kHeight + OFF); \
align_buffer_64(dst_y_c, kWidth * kHeight); \ align_buffer_64(dst_y_c, kWidth * kHeight); \
align_buffer_64(dst_u_c, \ align_buffer_64(dst_u_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * \ kStrideUV * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_64(dst_v_c, \ align_buffer_64(dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * \ kStrideUV * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_64(dst_y_opt, kWidth * kHeight); \ align_buffer_64(dst_y_opt, kWidth * kHeight); \
align_buffer_64(dst_u_opt, \ align_buffer_64(dst_u_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * \ kStrideUV * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_64(dst_v_opt, \ align_buffer_64(dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * \ kStrideUV * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_c, 1, kWidth * kHeight); \ memset(dst_y_c, 1, kWidth * kHeight); \
memset(dst_u_c, 2, \ memset(dst_u_c, 2, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_c, 3, \ memset(dst_v_c, 3, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth * kHeight); \ memset(dst_y_opt, 101, kWidth * kHeight); \
memset(dst_u_opt, 102, \ memset(dst_u_opt, 102, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_opt, 103, \ memset(dst_v_opt, 103, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kStride; ++j) \ for (int j = 0; j < kStride; ++j) \
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_c, kWidth, \ dst_y_c, kWidth, \
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ dst_u_c, kStrideUV, \
dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X), \ dst_v_c, kStrideUV, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_opt, kWidth, \ dst_y_opt, kWidth, \
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ dst_u_opt, kStrideUV, \
dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X), \ dst_v_opt, kStrideUV, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
} \ } \
for (int i = 0; i < kHeight; ++i) { \ for (int i = 0; i < kHeight; ++i) { \
...@@ -728,19 +728,17 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ ...@@ -728,19 +728,17 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
} \ } \
} \ } \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ for (int j = 0; j < kStrideUV; ++j) { \
EXPECT_NEAR(static_cast<int>(dst_u_c[i * \ EXPECT_NEAR(static_cast<int>(dst_u_c[i * kStrideUV + j]), \
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), \ static_cast<int>(dst_u_opt[i * kStrideUV + j]), DIFF); \
static_cast<int>(dst_u_opt[i * \
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), DIFF); \
} \ } \
} \ } \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ for (int j = 0; j < kStrideUV; ++j) { \
EXPECT_NEAR(static_cast<int>(dst_v_c[i * \ EXPECT_NEAR(static_cast<int>(dst_v_c[i * \
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), \ kStrideUV + j]), \
static_cast<int>(dst_v_opt[i * \ static_cast<int>(dst_v_opt[i * \
SUBSAMPLE(kWidth, SUBSAMP_X) + j]), DIFF); \ kStrideUV + j]), DIFF); \
} \ } \
} \ } \
free_aligned_buffer_64(dst_y_c); \ free_aligned_buffer_64(dst_y_c); \
...@@ -796,35 +794,28 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ ...@@ -796,35 +794,28 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \ const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
align_buffer_64(src_argb, kStride * kHeight + OFF); \ align_buffer_64(src_argb, kStride * kHeight + OFF); \
align_buffer_64(dst_y_c, kWidth * kHeight); \ align_buffer_64(dst_y_c, kWidth * kHeight); \
align_buffer_64(dst_uv_c, \ align_buffer_64(dst_uv_c, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_64(dst_y_opt, kWidth * kHeight); \ align_buffer_64(dst_y_opt, kWidth * kHeight); \
align_buffer_64(dst_uv_opt, \ align_buffer_64(dst_uv_opt, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kStride; ++j) \ for (int j = 0; j < kStride; ++j) \
src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \
memset(dst_y_c, 1, kWidth * kHeight); \ memset(dst_y_c, 1, kWidth * kHeight); \
memset(dst_uv_c, 2, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth * kHeight); \ memset(dst_y_opt, 101, kWidth * kHeight); \
memset(dst_uv_opt, 102, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_c, kWidth, \ dst_y_c, kWidth, dst_uv_c, kStrideUV * 2, \
dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_opt, kWidth, \ dst_y_opt, kWidth, \
dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2, \ dst_uv_opt, kStrideUV * 2, kWidth, NEG kHeight); \
kWidth, NEG kHeight); \
} \ } \
int max_diff = 0; \ int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \ for (int i = 0; i < kHeight; ++i) { \
...@@ -839,12 +830,10 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \ ...@@ -839,12 +830,10 @@ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
} \ } \
EXPECT_LE(max_diff, 4); \ EXPECT_LE(max_diff, 4); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ for (int j = 0; j < kStrideUV * 2; ++j) { \
int abs_diff = \ int abs_diff = \
abs(static_cast<int>(dst_uv_c[i * \ abs(static_cast<int>(dst_uv_c[i * kStrideUV * 2 + j]) - \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ static_cast<int>(dst_uv_opt[i * kStrideUV * 2 + j])); \
static_cast<int>(dst_uv_opt[i * \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \
if (abs_diff > max_diff) { \ if (abs_diff > max_diff) { \
max_diff = abs_diff; \ max_diff = abs_diff; \
} \ } \
...@@ -1366,8 +1355,9 @@ TEST_F(LibYUVConvertTest, CropNV12) { ...@@ -1366,8 +1355,9 @@ TEST_F(LibYUVConvertTest, CropNV12) {
((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1; ((benchmark_height_ - (benchmark_height_ * 360 / 480)) / 2 + 1) & ~1;
const int kDestWidth = benchmark_width_; const int kDestWidth = benchmark_width_;
const int kDestHeight = benchmark_height_ - crop_y * 2; const int kDestHeight = benchmark_height_ - crop_y * 2;
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X);
const int sample_size = kWidth * kHeight + const int sample_size = kWidth * kHeight +
SUBSAMPLE(kWidth, SUBSAMP_X) * kStrideUV *
SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; SUBSAMPLE(kHeight, SUBSAMP_Y) * 2;
align_buffer_64(src_y, sample_size); align_buffer_64(src_y, sample_size);
uint8* src_uv = src_y + kWidth * kHeight; uint8* src_uv = src_y + kWidth * kHeight;
...@@ -1392,7 +1382,7 @@ TEST_F(LibYUVConvertTest, CropNV12) { ...@@ -1392,7 +1382,7 @@ TEST_F(LibYUVConvertTest, CropNV12) {
src_y[i] = (fastrand() & 0xff); src_y[i] = (fastrand() & 0xff);
} }
for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) * for (int i = 0; i < (SUBSAMPLE(kHeight, SUBSAMP_Y) *
SUBSAMPLE(kWidth, SUBSAMP_X)) * 2; ++i) { kStrideUV) * 2; ++i) {
src_uv[i] = (fastrand() & 0xff); src_uv[i] = (fastrand() & 0xff);
} }
memset(dst_y, 1, kDestWidth * kDestHeight); memset(dst_y, 1, kDestWidth * kDestHeight);
...@@ -1416,8 +1406,8 @@ TEST_F(LibYUVConvertTest, CropNV12) { ...@@ -1416,8 +1406,8 @@ TEST_F(LibYUVConvertTest, CropNV12) {
libyuv::kRotate0, libyuv::FOURCC_NV12); libyuv::kRotate0, libyuv::FOURCC_NV12);
NV12ToI420(src_y + crop_y * kWidth, kWidth, NV12ToI420(src_y + crop_y * kWidth, kWidth,
src_uv + (crop_y / 2) * SUBSAMPLE(kWidth, SUBSAMP_X) * 2, src_uv + (crop_y / 2) * kStrideUV * 2,
SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kStrideUV * 2,
dst_y, kDestWidth, dst_y, kDestWidth,
dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X),
dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X), dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X),
...@@ -1548,8 +1538,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ ...@@ -1548,8 +1538,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kSizeUV = \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_64(src_y, kWidth * kHeight + OFF); \ align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, kSizeUV + OFF); \ align_buffer_64(src_u, kSizeUV + OFF); \
align_buffer_64(src_v, kSizeUV + OFF); \ align_buffer_64(src_v, kSizeUV + OFF); \
...@@ -1566,15 +1556,15 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \ ...@@ -1566,15 +1556,15 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##Dither##N) { \
memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \ memset(dst_argb_opt + OFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
dst_argb_c + OFF, kStrideB, \ dst_argb_c + OFF, kStrideB, \
NULL, kWidth, NEG kHeight); \ NULL, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B##Dither(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
dst_argb_opt + OFF, kStrideB, \ dst_argb_opt + OFF, kStrideB, \
NULL, kWidth, NEG kHeight); \ NULL, kWidth, NEG kHeight); \
} \ } \
...@@ -1698,8 +1688,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ ...@@ -1698,8 +1688,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \ const int kStrideB = kWidth * BPP_B; \
const int kSizeUV = \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_64(src_y, kWidth * kHeight + OFF); \ align_buffer_64(src_y, kWidth * kHeight + OFF); \
align_buffer_64(src_u, kSizeUV + OFF); \ align_buffer_64(src_u, kSizeUV + OFF); \
align_buffer_64(src_v, kSizeUV + OFF); \ align_buffer_64(src_v, kSizeUV + OFF); \
...@@ -1714,8 +1704,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ ...@@ -1714,8 +1704,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
dst_argb_b + OFF, kStrideB, \ dst_argb_b + OFF, kStrideB, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
} \ } \
...@@ -1727,8 +1717,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \ ...@@ -1727,8 +1717,8 @@ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##_##FMT_C##N) { \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \ FMT_PLANAR##To##FMT_C(src_y + OFF, kWidth, \
src_u + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_u + OFF, kStrideUV, \
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), \ src_v + OFF, kStrideUV, \
dst_argb_c + OFF, kStrideC, \ dst_argb_c + OFF, kStrideC, \
kWidth, NEG kHeight); \ kWidth, NEG kHeight); \
/* Convert B to C */ \ /* Convert B to C */ \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment