Commit e91bdaca authored by fbarchard@google.com's avatar fbarchard@google.com

Move HalfRow to row_win and port to row_neon

BUG=118
TEST=libyuvTest.I420ToI422_OptVsC (247 ms)
Review URL: https://webrtc-codereview.appspot.com/855012

git-svn-id: http://libyuv.googlecode.com/svn/trunk@400 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 827de16b
...@@ -53,11 +53,13 @@ extern "C" { ...@@ -53,11 +53,13 @@ extern "C" {
#define HAS_BGRATOYROW_SSSE3 #define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86 #define HAS_COPYROW_X86
#define HAS_HALFROW_SSE2
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3 #define HAS_I411TOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3 #define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3 #define HAS_I422TOBGRAROW_SSSE3
#define HAS_I422TORGBAROW_SSSE3
#define HAS_I444TOARGBROW_SSSE3 #define HAS_I444TOARGBROW_SSSE3
#define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROWUV_SSSE3 #define HAS_MIRRORROWUV_SSSE3
...@@ -96,7 +98,6 @@ extern "C" { ...@@ -96,7 +98,6 @@ extern "C" {
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ABGRTOARGBROW_SSSE3 #define HAS_ABGRTOARGBROW_SSSE3
#define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOLORTABLEROW_X86
#define HAS_I422TORGBAROW_SSSE3
#define HAS_RGBATOARGBROW_SSSE3 #define HAS_RGBATOARGBROW_SSSE3
#define HAS_RGBATOUVROW_SSSE3 #define HAS_RGBATOUVROW_SSSE3
#define HAS_RGBATOYROW_SSSE3 #define HAS_RGBATOYROW_SSSE3
...@@ -116,6 +117,7 @@ extern "C" { ...@@ -116,6 +117,7 @@ extern "C" {
// The following are available on Neon platforms // The following are available on Neon platforms
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) #if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_COPYROW_NEON #define HAS_COPYROW_NEON
#define HAS_HALFROW_NEON
#define HAS_I422TOABGRROW_NEON #define HAS_I422TOABGRROW_NEON
#define HAS_I422TOARGBROW_NEON #define HAS_I422TOARGBROW_NEON
#define HAS_I422TOBGRAROW_NEON #define HAS_I422TOBGRAROW_NEON
...@@ -750,6 +752,14 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -750,6 +752,14 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction); int source_y_fraction);
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -62,66 +62,6 @@ int I420Copy(const uint8* src_y, int src_stride_y, ...@@ -62,66 +62,6 @@ int I420Copy(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
// Move to row_win etc.
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_HALFROW_SSE2
__declspec(naked) __declspec(align(16))
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
align 16
convertloop:
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
sub ecx, 16
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
jg convertloop
pop edi
ret
}
}
#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__))
#define HAS_HALFROW_SSE2
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
"sub %0,%1 \n"
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"pavgb (%0,%3),%%xmm0 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm0,(%0,%1) \n"
"lea 0x10(%0),%0 \n"
"jg 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_uv), // %1
"+r"(pix) // %2
: "r"(static_cast<intptr_t>(src_uv_stride)) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0"
#endif
);
}
#endif
static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
for (int x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
LIBYUV_API LIBYUV_API
int I422ToI420(const uint8* src_y, int src_stride_y, int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -149,14 +89,17 @@ int I422ToI420(const uint8* src_y, int src_stride_y, ...@@ -149,14 +89,17 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
void (*HalfRow)(const uint8* src_uv, int src_uv_stride, void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) = HalfRow_C; uint8* dst_uv, int pix) = HalfRow_C;
#if defined(HAS_HALFROW_SSE2) #if defined(HAS_HALFROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2; HalfRow = HalfRow_SSE2;
} }
#elif defined(HAS_HALFROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
HalfRow = HalfRow_NEON;
}
#endif #endif
// Copy Y plane // Copy Y plane
...@@ -296,12 +239,12 @@ int I411ToI420(const uint8* src_y, int src_stride_y, ...@@ -296,12 +239,12 @@ int I411ToI420(const uint8* src_y, int src_stride_y,
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y; src_y = src_y + (height - 1) * src_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u; src_u = src_u + (height - 1) * src_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v; src_v = src_v + (height - 1) * src_stride_v;
dst_stride_y = -dst_stride_y; src_stride_y = -src_stride_y;
dst_stride_u = -dst_stride_u; src_stride_u = -src_stride_u;
dst_stride_v = -dst_stride_v; src_stride_v = -src_stride_v;
} }
// Copy Y plane // Copy Y plane
......
...@@ -1240,6 +1240,13 @@ void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1240,6 +1240,13 @@ void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
} while (dst_ptr < end); } while (dst_ptr < end);
} }
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
for (int x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -821,6 +821,27 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, ...@@ -821,6 +821,27 @@ void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
} }
#endif // HAS_UYVYTOYROW_NEON #endif // HAS_UYVYTOYROW_NEON
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
"1: \n"
"vld1.u8 {q0}, [%0]! \n" // load row 1 16 pixels.
"subs %3, %3, #16 \n" // 16 processed per loop
"vld1.u8 {q1}, [%1]! \n" // load row 2 16 pixels.
"vrhadd.u8 q0, q1 \n" // average row 1 and 2
"vst1.u8 {q0}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(src_uv_stride), // %1
"+r"(dst_uv), // %2
"+r"(pix) // %3
:
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif // __ARM_NEON__ #endif // __ARM_NEON__
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -1816,6 +1816,43 @@ void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, ...@@ -1816,6 +1816,43 @@ void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
); );
} }
void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgba_buf,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
READYUV422
YUVTORGB
"pcmpeqb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm2,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm5 \n"
"movdqa %%xmm5,%%xmm0 \n"
"punpcklwd %%xmm1,%%xmm5 \n"
"punpckhwd %%xmm1,%%xmm0 \n"
"movdqa %%xmm5,(%[argb_buf]) \n"
"movdqa %%xmm0,0x10(%[argb_buf]) \n"
"lea 0x20(%[argb_buf]),%[argb_buf] \n"
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[argb_buf]"+r"(rgba_buf), // %[argb_buf]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -1888,6 +1925,44 @@ void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -1888,6 +1925,44 @@ void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
#endif #endif
); );
} }
void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgba_buf,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
READYUV422
YUVTORGB
"pcmpeqb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm2,%%xmm1 \n"
"punpcklbw %%xmm0,%%xmm5 \n"
"movdqa %%xmm5,%%xmm0 \n"
"punpcklwd %%xmm1,%%xmm5 \n"
"punpckhwd %%xmm1,%%xmm0 \n"
"movdqa %%xmm5,(%[argb_buf]) \n"
"movdqa %%xmm0,0x10(%[argb_buf]) \n"
"lea 0x20(%[argb_buf]),%[argb_buf] \n"
"sub $0x8,%[width] \n"
"jg 1b \n"
: [y_buf]"+r"(y_buf), // %[y_buf]
[u_buf]"+r"(u_buf), // %[u_buf]
[v_buf]"+r"(v_buf), // %[v_buf]
[argb_buf]"+r"(rgba_buf), // %[argb_buf]
[width]"+rm"(width) // %[width]
: [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
#endif // HAS_I422TOARGBROW_SSSE3 #endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_YTOARGBROW_SSE2 #ifdef HAS_YTOARGBROW_SSE2
...@@ -3654,6 +3729,28 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -3654,6 +3729,28 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
); );
} }
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
"sub %0,%1 \n"
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"pavgb (%0,%3),%%xmm0 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm0,(%0,%1) \n"
"lea 0x10(%0),%0 \n"
"jg 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_uv), // %1
"+r"(pix) // %2
: "r"(static_cast<intptr_t>(src_uv_stride)) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0"
#endif
);
}
#endif // defined(__x86_64__) || defined(__i386__) #endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -4193,6 +4193,30 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, ...@@ -4193,6 +4193,30 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
} }
} }
__declspec(naked) __declspec(align(16))
void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
align 16
convertloop:
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
sub ecx, 16
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
jg convertloop
pop edi
ret
}
}
#endif // _M_IX86 #endif // _M_IX86
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -108,19 +108,25 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_C) { ...@@ -108,19 +108,25 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_C) {
align_buffer_16(src_a, kMaxWidth) align_buffer_16(src_a, kMaxWidth)
align_buffer_16(src_b, kMaxWidth) align_buffer_16(src_b, kMaxWidth)
MaskCpuFlags(kCpuInitialized);
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint64 h1 = ComputeSumSquareError(src_a, src_b, 16);
EXPECT_EQ(790u, h1);
for (int i = 0; i < kMaxWidth; ++i) { for (int i = 0; i < kMaxWidth; ++i) {
src_a[i] = i; src_a[i] = i;
src_b[i] = i; src_b[i] = i;
} }
MaskCpuFlags(kCpuInitialized); int count = benchmark_iterations_ * 1280 * 720 / kMaxWidth;
for (int i = 0; i < benchmark_iterations_; ++i) { for (int i = 0; i < count; ++i) {
ComputeSumSquareError(src_a, src_b, kMaxWidth); h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth);
} }
MaskCpuFlags(-1); MaskCpuFlags(-1);
EXPECT_EQ(h1, 0);
EXPECT_EQ(0, 0);
free_aligned_buffer_16(src_a) free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b) free_aligned_buffer_16(src_b)
...@@ -131,16 +137,22 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_OPT) { ...@@ -131,16 +137,22 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_OPT) {
align_buffer_16(src_a, kMaxWidth) align_buffer_16(src_a, kMaxWidth)
align_buffer_16(src_b, kMaxWidth) align_buffer_16(src_b, kMaxWidth)
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint64 h1 = ComputeSumSquareError(src_a, src_b, 16);
EXPECT_EQ(790u, h1);
for (int i = 0; i < kMaxWidth; ++i) { for (int i = 0; i < kMaxWidth; ++i) {
src_a[i] = i; src_a[i] = i;
src_b[i] = i; src_b[i] = i;
} }
for (int i = 0; i < benchmark_iterations_; ++i) { int count = benchmark_iterations_ * 1280 * 720 / kMaxWidth;
ComputeSumSquareError(src_a, src_b, kMaxWidth); for (int i = 0; i < count; ++i) {
h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth);
} }
EXPECT_EQ(0, 0); EXPECT_EQ(h1, 0);
free_aligned_buffer_16(src_a) free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b) free_aligned_buffer_16(src_b)
......
...@@ -28,6 +28,107 @@ ...@@ -28,6 +28,107 @@
namespace libyuv { namespace libyuv {
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, N, NEG) \
TEST_F(libyuvTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N##_OptVsC) { \
const int kWidth = 1280; \
const int kHeight = 720; \
align_buffer_16(src_y, kWidth * kHeight); \
align_buffer_16(src_u, kWidth / SRC_SUBSAMP_X * kHeight / SRC_SUBSAMP_Y); \
align_buffer_16(src_v, kWidth / SRC_SUBSAMP_X * kHeight / SRC_SUBSAMP_Y); \
align_buffer_16(dst_y_c, kWidth * kHeight); \
align_buffer_16(dst_u_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
align_buffer_16(dst_v_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
align_buffer_16(dst_y_opt, kWidth * kHeight); \
align_buffer_16(dst_u_opt, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
align_buffer_16(dst_v_opt, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[(i * kWidth) + j] = (random() & 0xff); \
for (int i = 0; i < kHeight / SRC_SUBSAMP_Y; ++i) \
for (int j = 0; j < kWidth / SRC_SUBSAMP_X; ++j) { \
src_u[(i * kWidth / SRC_SUBSAMP_X) + j] = (random() & 0xff); \
src_v[(i * kWidth / SRC_SUBSAMP_X) + j] = (random() & 0xff); \
} \
MaskCpuFlags(kCpuInitialized); \
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y, kWidth, \
src_u, kWidth / SRC_SUBSAMP_X, \
src_v, kWidth / SRC_SUBSAMP_X, \
dst_y_c, kWidth, \
dst_u_c, kWidth / SUBSAMP_X, \
dst_v_c, kWidth / SUBSAMP_X, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR(src_y, kWidth, \
src_u, kWidth / SRC_SUBSAMP_X, \
src_v, kWidth / SRC_SUBSAMP_X, \
dst_y_opt, kWidth, \
dst_u_opt, kWidth / SUBSAMP_X, \
dst_v_opt, kWidth / SUBSAMP_X, \
kWidth, NEG kHeight); \
} \
int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
int abs_diff = \
abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
static_cast<int>(dst_y_opt[i * kWidth + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \
for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \
int abs_diff = \
abs(static_cast<int>(dst_u_c[i * kWidth / SUBSAMP_X + j]) - \
static_cast<int>(dst_u_opt[i * kWidth / SUBSAMP_X + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \
for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \
int abs_diff = \
abs(static_cast<int>(dst_v_c[i * kWidth / SUBSAMP_X + j]) - \
static_cast<int>(dst_v_opt[i * kWidth / SUBSAMP_X + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_LE(max_diff, 2); \
free_aligned_buffer_16(dst_y_c) \
free_aligned_buffer_16(dst_u_c) \
free_aligned_buffer_16(dst_v_c) \
free_aligned_buffer_16(dst_y_opt) \
free_aligned_buffer_16(dst_u_opt) \
free_aligned_buffer_16(dst_v_opt) \
free_aligned_buffer_16(src_y) \
free_aligned_buffer_16(src_u) \
free_aligned_buffer_16(src_v) \
}
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, , +) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, Invert, -) \
TESTPLANARTOP(I420, 2, 2, I420, 2, 2)
TESTPLANARTOP(I422, 2, 1, I420, 2, 2)
TESTPLANARTOP(I444, 1, 1, I420, 2, 2)
TESTPLANARTOP(I411, 4, 1, I420, 2, 2)
TESTPLANARTOP(I420, 2, 2, I422, 2, 1)
TESTPLANARTOP(I420, 2, 2, I444, 1, 1)
TESTPLANARTOP(I420, 2, 2, I411, 4, 1)
#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, N, NEG) \ #define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, N, NEG) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N##_OptVsC) { \
const int kWidth = 1280; \ const int kWidth = 1280; \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment