Commit 3f4d8605 authored by Frank Barchard's avatar Frank Barchard

avx2 interpolate use 8 bit

BUG=libyuv:535
R=dhrosa@google.com

Review URL: https://codereview.chromium.org/1535833003 .
parent f4447745
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1557 Version: 1558
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1557 #define LIBYUV_VERSION 1558
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -2211,30 +2211,30 @@ static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride, ...@@ -2211,30 +2211,30 @@ static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int width, int source_y_fraction) { int width, int source_y_fraction) {
int y1_fraction = source_y_fraction >> 1; int y1_fraction = source_y_fraction ;
int y0_fraction = 128 - y1_fraction; int y0_fraction = 256 - y1_fraction;
const uint8* src_ptr1 = src_ptr + src_stride; const uint8* src_ptr1 = src_ptr + src_stride;
int x; int x;
if (y1_fraction == 0) { if (y1_fraction == 0) {
memcpy(dst_ptr, src_ptr, width); memcpy(dst_ptr, src_ptr, width);
return; return;
} }
if (y1_fraction == 64) { if (y1_fraction == 128) {
HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width); HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
return; return;
} }
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
dst_ptr[0] = dst_ptr[0] =
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 64) >> 7; (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
dst_ptr[1] = dst_ptr[1] =
(src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 64) >> 7; (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
src_ptr += 2; src_ptr += 2;
src_ptr1 += 2; src_ptr1 += 2;
dst_ptr += 2; dst_ptr += 2;
} }
if (width & 1) { if (width & 1) {
dst_ptr[0] = dst_ptr[0] =
(src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 64) >> 7; (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
} }
} }
......
...@@ -4791,20 +4791,19 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4791,20 +4791,19 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int source_y_fraction) { int source_y_fraction) {
asm volatile ( asm volatile (
"sub %1,%0 \n" "sub %1,%0 \n"
"shr %3 \n"
"cmp $0x0,%3 \n" "cmp $0x0,%3 \n"
"je 100f \n" "je 100f \n"
"cmp $0x40,%3 \n" "cmp $0x80,%3 \n"
"je 50f \n" "je 50f \n"
"movd %3,%%xmm0 \n" "movd %3,%%xmm0 \n"
"neg %3 \n" "neg %3 \n"
"add $0x80,%3 \n" "add $0x100,%3 \n"
"movd %3,%%xmm5 \n" "movd %3,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm5 \n" "punpcklbw %%xmm0,%%xmm5 \n"
"punpcklwd %%xmm5,%%xmm5 \n" "punpcklwd %%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n"
"mov $0x400040,%%eax \n" "mov $0x80808080,%%eax \n"
"movd %%eax,%%xmm4 \n" "movd %%eax,%%xmm4 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n"
...@@ -4813,17 +4812,21 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4813,17 +4812,21 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"1: \n" "1: \n"
"movdqu " MEMACCESS(1) ",%%xmm0 \n" "movdqu " MEMACCESS(1) ",%%xmm0 \n"
MEMOPREG(movdqu,0x00,1,4,1,xmm2) MEMOPREG(movdqu,0x00,1,4,1,xmm2)
"movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n" "punpckhbw %%xmm2,%%xmm1 \n"
"pmaddubsw %%xmm5,%%xmm0 \n" "psubb %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm5,%%xmm1 \n" "psubb %%xmm4,%%xmm1 \n"
"paddw %%xmm4,%%xmm0 \n" "movdqa %%xmm5,%%xmm2 \n"
"paddw %%xmm4,%%xmm1 \n" "movdqa %%xmm5,%%xmm3 \n"
"psrlw $0x7,%%xmm0 \n" "pmaddubsw %%xmm0,%%xmm2 \n"
"psrlw $0x7,%%xmm1 \n" "pmaddubsw %%xmm1,%%xmm3 \n"
"packuswb %%xmm1,%%xmm0 \n" "paddw %%xmm4,%%xmm2 \n"
MEMOPMEM(movdqu,xmm0,0x00,1,0,1) "paddw %%xmm4,%%xmm3 \n"
"psrlw $0x8,%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
MEMOPMEM(movdqu,xmm2,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n" "lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"jg 1b \n" "jg 1b \n"
...@@ -4857,7 +4860,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4857,7 +4860,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"+r"(source_y_fraction) // %3 "+r"(source_y_fraction) // %3
: "r"((intptr_t)(src_stride)) // %4 : "r"((intptr_t)(src_stride)) // %4
: "memory", "cc", "eax", NACL_R14 : "memory", "cc", "eax", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm4", "xmm5" "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
); );
} }
#endif // HAS_INTERPOLATEROW_SSSE3 #endif // HAS_INTERPOLATEROW_SSSE3
...@@ -4881,9 +4884,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4881,9 +4884,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
"vmovd %3,%%xmm5 \n" "vmovd %3,%%xmm5 \n"
"vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
"vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
"vpxor %%ymm0,%%ymm0,%%ymm0 \n" "vbroadcastss %%xmm5,%%ymm5 \n"
"vpermd %%ymm5,%%ymm0,%%ymm5 \n" "mov $0x80808080,%%eax \n"
"mov $0x400040,%%eax \n"
"vmovd %%eax,%%xmm4 \n" "vmovd %%eax,%%xmm4 \n"
"vbroadcastss %%xmm4,%%ymm4 \n" "vbroadcastss %%xmm4,%%ymm4 \n"
...@@ -4894,12 +4896,14 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4894,12 +4896,14 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)
"vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
"vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
"vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
"vpaddw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
"vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
"vpaddw %%ymm4,%%ymm1,%%ymm1 \n" "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
"vpsrlw $0x7,%%ymm0,%%ymm0 \n" "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
"vpsrlw $0x7,%%ymm1,%%ymm1 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
"lea " MEMLEA(0x20,1) ",%1 \n" "lea " MEMLEA(0x20,1) ",%1 \n"
......
...@@ -2259,16 +2259,16 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { ...@@ -2259,16 +2259,16 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
void InterpolateRow_NEON(uint8* dst_ptr, void InterpolateRow_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride, const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
int y1_fraction = source_y_fraction >> 1; int y1_fraction = source_y_fraction;
asm volatile ( asm volatile (
"cmp %4, #0 \n" "cmp %4, #0 \n"
"beq 100f \n" "beq 100f \n"
"add %2, %1 \n" "add %2, %1 \n"
"cmp %4, #64 \n" "cmp %4, #128 \n"
"beq 50f \n" "beq 50f \n"
"vdup.8 d5, %4 \n" "vdup.8 d5, %4 \n"
"rsb %4, #128 \n" "rsb %4, #256 \n"
"vdup.8 d4, %4 \n" "vdup.8 d4, %4 \n"
// General purpose row blend. // General purpose row blend.
"1: \n" "1: \n"
...@@ -2281,8 +2281,8 @@ void InterpolateRow_NEON(uint8* dst_ptr, ...@@ -2281,8 +2281,8 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"vmull.u8 q14, d1, d4 \n" "vmull.u8 q14, d1, d4 \n"
"vmlal.u8 q13, d2, d5 \n" "vmlal.u8 q13, d2, d5 \n"
"vmlal.u8 q14, d3, d5 \n" "vmlal.u8 q14, d3, d5 \n"
"vrshrn.u16 d0, q13, #7 \n" "vrshrn.u16 d0, q13, #8 \n"
"vrshrn.u16 d1, q14, #7 \n" "vrshrn.u16 d1, q14, #8 \n"
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n" "vst1.8 {q0}, [%0]! \n"
"bgt 1b \n" "bgt 1b \n"
......
...@@ -2336,13 +2336,13 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) { ...@@ -2336,13 +2336,13 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width) {
void InterpolateRow_NEON(uint8* dst_ptr, void InterpolateRow_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride, const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
int y1_fraction = source_y_fraction >> 1; int y1_fraction = source_y_fraction;
int y0_fraction = 128 - y1_fraction; int y0_fraction = 256 - y1_fraction;
const uint8* src_ptr1 = src_ptr + src_stride; const uint8* src_ptr1 = src_ptr + src_stride;
asm volatile ( asm volatile (
"cmp %w4, #0 \n" "cmp %w4, #0 \n"
"b.eq 100f \n" "b.eq 100f \n"
"cmp %w4, #64 \n" "cmp %w4, #128 \n"
"b.eq 50f \n" "b.eq 50f \n"
"dup v5.16b, %w4 \n" "dup v5.16b, %w4 \n"
...@@ -2358,8 +2358,8 @@ void InterpolateRow_NEON(uint8* dst_ptr, ...@@ -2358,8 +2358,8 @@ void InterpolateRow_NEON(uint8* dst_ptr,
"umull2 v3.8h, v0.16b, v4.16b \n" "umull2 v3.8h, v0.16b, v4.16b \n"
"umlal v2.8h, v1.8b, v5.8b \n" "umlal v2.8h, v1.8b, v5.8b \n"
"umlal2 v3.8h, v1.16b, v5.16b \n" "umlal2 v3.8h, v1.16b, v5.16b \n"
"rshrn v0.8b, v2.8h, #7 \n" "rshrn v0.8b, v2.8h, #8 \n"
"rshrn2 v0.16b, v3.8h, #7 \n" "rshrn2 v0.16b, v3.8h, #8 \n"
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" "st1 {v0.16b}, [%0], #16 \n"
"b.gt 1b \n" "b.gt 1b \n"
......
...@@ -5566,24 +5566,22 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -5566,24 +5566,22 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
mov edx, [esp + 8 + 12] // src_stride mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255) mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
shr eax, 1
// Dispatch to specialized filters if applicable. // Dispatch to specialized filters if applicable.
cmp eax, 0 cmp eax, 0
je xloop100 // 0 / 128. Blend 100 / 0. je xloop100 // 0 / 256. Blend 100 / 0.
sub edi, esi sub edi, esi
cmp eax, 64 cmp eax, 128
je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. je xloop50 // 128 /256 is 0.50. Blend 50 / 50.
vmovd xmm0, eax // high fraction 0..127 vmovd xmm0, eax // high fraction 0..255
neg eax neg eax
add eax, 128 add eax, 256
vmovd xmm5, eax // low fraction 128..1 vmovd xmm5, eax // low fraction 256..1
vpunpcklbw xmm5, xmm5, xmm0 vpunpcklbw xmm5, xmm5, xmm0
vpunpcklwd xmm5, xmm5, xmm5 vpunpcklwd xmm5, xmm5, xmm5
vpxor ymm0, ymm0, ymm0 vbroadcastss ymm5, xmm5
vpermd ymm5, ymm0, ymm5
mov eax, 0x00400040 // 64 for rounding. mov eax, 0x80808080 // 128b for bias and rounding.
vmovd xmm4, eax vmovd xmm4, eax
vbroadcastss ymm4, xmm4 vbroadcastss ymm4, xmm4
...@@ -5591,13 +5589,15 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -5591,13 +5589,15 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
vmovdqu ymm0, [esi] vmovdqu ymm0, [esi]
vmovdqu ymm2, [esi + edx] vmovdqu ymm2, [esi + edx]
vpunpckhbw ymm1, ymm0, ymm2 // mutates vpunpckhbw ymm1, ymm0, ymm2 // mutates
vpunpcklbw ymm0, ymm0, ymm2 // mutates vpunpcklbw ymm0, ymm0, ymm2
vpmaddubsw ymm0, ymm0, ymm5 vpsubb ymm1, ymm1, ymm4 // bias to signed image
vpmaddubsw ymm1, ymm1, ymm5 vpsubb ymm0, ymm0, ymm4
vpmaddubsw ymm1, ymm5, ymm1
vpmaddubsw ymm0, ymm5, ymm0
vpaddw ymm1, ymm1, ymm4 // unbias and round
vpaddw ymm0, ymm0, ymm4 vpaddw ymm0, ymm0, ymm4
vpaddw ymm1, ymm1, ymm4 vpsrlw ymm1, ymm1, 8
vpsrlw ymm0, ymm0, 7 vpsrlw ymm0, ymm0, 8
vpsrlw ymm1, ymm1, 7
vpackuswb ymm0, ymm0, ymm1 // unmutates vpackuswb ymm0, ymm0, ymm1 // unmutates
vmovdqu [esi + edi], ymm0 vmovdqu [esi + edi], ymm0
lea esi, [esi + 32] lea esi, [esi + 32]
...@@ -5629,6 +5629,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -5629,6 +5629,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
#endif // HAS_INTERPOLATEROW_AVX2 #endif // HAS_INTERPOLATEROW_AVX2
// Bilinear filter 16x2 -> 16x1 // Bilinear filter 16x2 -> 16x1
// TODO(fbarchard): Consider allowing 256 using memcpy.
__declspec(naked) __declspec(naked)
void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
...@@ -5636,28 +5637,27 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -5636,28 +5637,27 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
__asm { __asm {
push esi push esi
push edi push edi
mov edi, [esp + 8 + 4] // dst_ptr mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_ptr mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255) mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
sub edi, esi sub edi, esi
shr eax, 1
// Dispatch to specialized filters if applicable. // Dispatch to specialized filters if applicable.
cmp eax, 0 cmp eax, 0
je xloop100 // 0 / 128. Blend 100 / 0. je xloop100 // 0 /256. Blend 100 / 0.
cmp eax, 64 cmp eax, 128
je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. je xloop50 // 128 / 256 is 0.50. Blend 50 / 50.
movd xmm0, eax // high fraction 0..127 movd xmm0, eax // high fraction 0..255
neg eax neg eax
add eax, 128 add eax, 256
movd xmm5, eax // low fraction 128..1 movd xmm5, eax // low fraction 255..1
punpcklbw xmm5, xmm0 punpcklbw xmm5, xmm0
punpcklwd xmm5, xmm5 punpcklwd xmm5, xmm5
pshufd xmm5, xmm5, 0 pshufd xmm5, xmm5, 0
mov eax, 0x80808080 // 128 for biasing image to signed.
mov eax, 0x00400040 // 64 for rounding.
movd xmm4, eax movd xmm4, eax
pshufd xmm4, xmm4, 0x00 pshufd xmm4, xmm4, 0x00
...@@ -5667,14 +5667,18 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -5667,14 +5667,18 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
movdqu xmm1, xmm0 movdqu xmm1, xmm0
punpcklbw xmm0, xmm2 punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2 punpckhbw xmm1, xmm2
pmaddubsw xmm0, xmm5 psubb xmm0, xmm4 // bias image by -128
pmaddubsw xmm1, xmm5 psubb xmm1, xmm4
paddw xmm0, xmm4 movdqa xmm2, xmm5
paddw xmm1, xmm4 movdqa xmm3, xmm5
psrlw xmm0, 7 pmaddubsw xmm2, xmm0
psrlw xmm1, 7 pmaddubsw xmm3, xmm1
packuswb xmm0, xmm1 paddw xmm2, xmm4
movdqu [esi + edi], xmm0 paddw xmm3, xmm4
psrlw xmm2, 8
psrlw xmm3, 8
packuswb xmm2, xmm3
movdqu [esi + edi], xmm2
lea esi, [esi + 16] lea esi, [esi + 16]
sub ecx, 16 sub ecx, 16
jg xloop jg xloop
......
...@@ -940,7 +940,6 @@ TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { ...@@ -940,7 +940,6 @@ TEST_F(LibYUVPlanarTest, TestARGBInterpolate) {
} }
} }
TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
SIMD_ALIGNED(uint8 orig_pixels_0[1280]); SIMD_ALIGNED(uint8 orig_pixels_0[1280]);
SIMD_ALIGNED(uint8 orig_pixels_1[1280]); SIMD_ALIGNED(uint8 orig_pixels_1[1280]);
...@@ -1024,7 +1023,7 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { ...@@ -1024,7 +1023,7 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) {
#define TESTTERP(FMT_A, BPP_A, STRIDE_A, \ #define TESTTERP(FMT_A, BPP_A, STRIDE_A, \
FMT_B, BPP_B, STRIDE_B, \ FMT_B, BPP_B, STRIDE_B, \
W1280, TERP, DIFF, N, NEG, OFF) \ W1280, TERP, N, NEG, OFF) \
TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
...@@ -1050,16 +1049,9 @@ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ ...@@ -1050,16 +1049,9 @@ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
dst_argb_opt, kStrideB, \ dst_argb_opt, kStrideB, \
kWidth, NEG kHeight, TERP); \ kWidth, NEG kHeight, TERP); \
} \ } \
int max_diff = 0; \
for (int i = 0; i < kStrideB * kHeight; ++i) { \ for (int i = 0; i < kStrideB * kHeight; ++i) { \
int abs_diff = \ EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
abs(static_cast<int>(dst_argb_c[i]) - \
static_cast<int>(dst_argb_opt[i])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \ } \
EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_64(src_argb_a); \ free_aligned_buffer_64(src_argb_a); \
free_aligned_buffer_64(src_argb_b); \ free_aligned_buffer_64(src_argb_b); \
free_aligned_buffer_64(dst_argb_c); \ free_aligned_buffer_64(dst_argb_c); \
...@@ -1067,16 +1059,10 @@ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \ ...@@ -1067,16 +1059,10 @@ TEST_F(LibYUVPlanarTest, ARGBInterpolate##TERP##N) { \
} }
#define TESTINTERPOLATE(TERP) \ #define TESTINTERPOLATE(TERP) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_ - 1, TERP, _Any, +, 0) \
benchmark_width_ - 1, TERP, 1, _Any, +, 0) \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Unaligned, +, 1) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Invert, -, 0) \
benchmark_width_, TERP, 1, _Unaligned, +, 1) \ TESTTERP(ARGB, 4, 1, ARGB, 4, 1, benchmark_width_, TERP, _Opt, +, 0)
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \
benchmark_width_, TERP, 1, _Invert, -, 0) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \
benchmark_width_, TERP, 1, _Opt, +, 0) \
TESTTERP(ARGB, 4, 1, ARGB, 4, 1, \
benchmark_width_ - 1, TERP, 1, _Any_Invert, -, 0)
TESTINTERPOLATE(0) TESTINTERPOLATE(0)
TESTINTERPOLATE(64) TESTINTERPOLATE(64)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment