Commit 31d05032 authored by fbarchard@google.com's avatar fbarchard@google.com

I420ToYUY2_Any_Neon to handle odd sizes

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/922007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@437 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 95eb5264
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 435
Version: 437
License: BSD
License File: LICENSE
......
......@@ -843,6 +843,14 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToYUY2Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -851,6 +859,14 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToYUY2Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToUYVYRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
#ifdef __cplusplus
} // extern "C"
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 435
#define LIBYUV_VERSION 437
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -281,15 +281,20 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) =
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
#elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
}
}
#endif
......@@ -323,15 +328,20 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) =
I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2;
}
}
#elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON;
}
}
#endif
......@@ -371,15 +381,20 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) =
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
#elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
}
}
#endif
......@@ -413,15 +428,20 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) =
I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2;
}
}
#elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON;
}
}
#endif
......
......@@ -1000,22 +1000,53 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
}
// YUV to RGB does multiple of 8 with SIMD and remainder with C.
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP) \
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, \
const uint8* u_buf, \
const uint8* v_buf, \
uint8* rgb_buf, \
int width) { \
int n = width & ~7; \
int n = width & ~MASK; \
I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \
I420TORGB_C(y_buf + n, \
u_buf + (n >> UV_SHIFT), \
v_buf + (n >> UV_SHIFT), \
rgb_buf + n * BPP, width & 7); \
rgb_buf + n * BPP, width & MASK); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
0, 4, 7)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
1, 4, 7)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
2, 4, 7)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
1, 4, 7)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
1, 4, 7)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
1, 4, 7)
// I422ToRGB24Row_SSSE3 is unaligned.
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_NEON
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_NEON
#undef YANY
// Wrappers to handle odd width
#define Y2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \
#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \
void NAMEANY(const uint8* y_buf, \
const uint8* uv_buf, \
uint8* rgb_buf, \
......@@ -1028,37 +1059,16 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
}
#ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
0, 4)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
1, 4)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
2, 4)
Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
0, 4)
Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
0, 4)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
1, 4)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
1, 4)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
1, 4)
// I422ToRGB24Row_SSSE3 is unaligned.
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_NEON
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4)
Y2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
Y2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3)
YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3)
NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
#endif // HAS_I422TOARGBROW_NEON
#undef YANY
#undef NVANY
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
// SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination.
......
......@@ -974,7 +974,7 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
"+r"(dst_rgb565), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "q8", "q9", "q10"
: "memory", "cc", "d0", "d1", "d2", "d3", "q8", "q9", "q10"
);
}
#endif // HAS_ARGBTORGB565ROW_NEON
......
......@@ -29,7 +29,7 @@ extern "C" {
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width) {
asm volatile (
"1: \n"
"1: \n"
// load even pixels into q0, odd into q1
"vld2.u8 {q0,q1}, [%0]! \n"
"vst1.u8 {q0}, [%1]! \n" // store even pixels
......@@ -48,7 +48,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
asm volatile (
// change the stride to row 2 pointer
"add %1, %0 \n"
"1: \n"
"1: \n"
"vld1.u8 {q0,q1}, [%0]! \n" // load row 1 and post inc
"vld1.u8 {q2,q3}, [%1]! \n" // load row 2 and post inc
"vpaddl.u8 q0, q0 \n" // row 1 add adjacent
......@@ -72,7 +72,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst_ptr, int dst_width) {
asm volatile (
"1: \n"
"1: \n"
"vld2.u8 {d0, d1}, [%0]! \n"
"vtrn.u8 d1, d0 \n"
"vshrn.u16 d0, q0, #8 \n"
......@@ -93,7 +93,7 @@ void ScaleRowDown4Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add r4, %0, %3 \n"
"add r5, r4, %3 \n"
"add %3, r5, %3 \n"
"1: \n"
"1: \n"
"vld1.u8 {q0}, [%0]! \n" // load up 16x4
"vld1.u8 {q1}, [r4]! \n"
"vld1.u8 {q2}, [r5]! \n"
......@@ -123,7 +123,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
ptrdiff_t /* src_stride */,
uint8* dst_ptr, int dst_width) {
asm volatile (
"1: \n"
"1: \n"
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
"vmov d2, d3 \n" // order d0, d1, d2
"vst3.u8 {d0, d1, d2}, [%1]! \n"
......@@ -143,7 +143,7 @@ void ScaleRowDown34_0_Int_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
"1: \n"
"1: \n"
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
"vld4.u8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
......@@ -199,7 +199,7 @@ void ScaleRowDown34_1_Int_NEON(const uint8* src_ptr,
asm volatile (
"vmov.u8 d24, #3 \n"
"add %3, %0 \n"
"1: \n"
"1: \n"
"vld4.u8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
"vld4.u8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
......@@ -251,7 +251,7 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
uint8* dst_ptr, int dst_width) {
asm volatile (
"vld1.u8 {q3}, [%3] \n"
"1: \n"
"1: \n"
"vld1.u8 {d0, d1, d2, d3}, [%0]! \n"
"vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
"vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
......@@ -277,7 +277,7 @@ void OMITFP ScaleRowDown38_3_Int_NEON(const uint8* src_ptr,
"vld1.u8 {q15}, [%6] \n"
"add r4, %0, %3, lsl #1 \n"
"add %3, %0 \n"
"1: \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
// d1 = 10 50 11 51 12 52 13 53
......@@ -385,7 +385,7 @@ void ScaleRowDown38_2_Int_NEON(const uint8* src_ptr,
"vld1.u16 {q13}, [%4] \n"
"vld1.u8 {q14}, [%5] \n"
"add %3, %0 \n"
"1: \n"
"1: \n"
// d0 = 00 40 01 41 02 42 03 43
// d1 = 10 50 11 51 12 52 13 53
......@@ -485,7 +485,7 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"vdup.8 d5, %4 \n"
"rsb %4, #256 \n"
"vdup.8 d4, %4 \n"
"1: \n"
"1: \n"
"vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n"
"subs %3, #16 \n"
......@@ -499,21 +499,21 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"bgt 1b \n"
"b 4f \n"
"2: \n"
"2: \n"
"vld1.u8 {q0}, [%1]! \n"
"subs %3, #16 \n"
"vst1.u8 {q0}, [%0]! \n"
"bgt 2b \n"
"b 4f \n"
"3: \n"
"3: \n"
"vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n"
"subs %3, #16 \n"
"vrhadd.u8 q0, q1 \n"
"vst1.u8 {q0}, [%0]! \n"
"bgt 3b \n"
"4: \n"
"4: \n"
"vst1.u8 {d1[7]}, [%0] \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
......
......@@ -238,15 +238,15 @@ TESTBIPLANARTOP(NV12, 2, 2, I420, 2, 2)
TESTBIPLANARTOP(NV21, 2, 2, I420, 2, 2)
#define TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
W1280, DIFF, N, NEG) \
W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = 720; \
const int kStrideB = ((kWidth * 8 * BPP_B + 7) / 8 + ALIGN - 1) / \
ALIGN * ALIGN; \
align_buffer_16(src_y, kWidth * kHeight); \
align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
align_buffer_16(src_y, kWidth * kHeight + OFF); \
align_buffer_16(src_u, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y + OFF); \
align_buffer_16(src_v, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y + OFF); \
align_buffer_16(dst_argb_c, kStrideB * kHeight); \
align_buffer_16(dst_argb_opt, kStrideB * kHeight); \
memset(dst_argb_c, 0, kStrideB * kHeight); \
......@@ -254,26 +254,26 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
src_y[(i * kWidth) + j] = (random() & 0xff); \
src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \
} \
} \
for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) { \
for (int j = 0; j < kWidth / SUBSAMP_X; ++j) { \
src_u[(i * kWidth / SUBSAMP_X) + j] = (random() & 0xff); \
src_v[(i * kWidth / SUBSAMP_X) + j] = (random() & 0xff); \
src_u[(i * kWidth / SUBSAMP_X) + j + OFF] = (random() & 0xff); \
src_v[(i * kWidth / SUBSAMP_X) + j + OFF] = (random() & 0xff); \
} \
} \
MaskCpuFlags(0); \
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
src_u, kWidth / SUBSAMP_X, \
src_v, kWidth / SUBSAMP_X, \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, kWidth / SUBSAMP_X, \
src_v + OFF, kWidth / SUBSAMP_X, \
dst_argb_c, kStrideB, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
src_u, kWidth / SUBSAMP_X, \
src_v, kWidth / SUBSAMP_X, \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_u + OFF, kWidth / SUBSAMP_X, \
src_v + OFF, kWidth / SUBSAMP_X, \
dst_argb_opt, kStrideB, \
kWidth, NEG kHeight); \
} \
......@@ -313,11 +313,13 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
#define TESTPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
DIFF) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
1280, DIFF, _Opt, +) \
1280, DIFF, _Opt, +, 0) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
1280, DIFF, _Invert, -) \
1280, DIFF, _Unaligned, +, 1) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
1276, DIFF, _Any, +)
1280, DIFF, _Invert, -, 0) \
TESTPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
1276, DIFF, _Any, +, 0)
TESTPLANARTOB(I420, 2, 2, ARGB, 4, 4, 2)
TESTPLANARTOB(I420, 2, 2, BGRA, 4, 4, 2)
......@@ -346,32 +348,32 @@ TESTPLANARTOB(I420, 2, 2, BayerGBRG, 1, 1, 2)
TESTPLANARTOB(I420, 2, 2, BayerGRBG, 1, 1, 2)
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
W1280, DIFF, N, NEG) \
W1280, DIFF, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = 720; \
const int kStrideB = kWidth * BPP_B; \
align_buffer_16(src_y, kWidth * kHeight); \
align_buffer_16(src_uv, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y * 2); \
align_buffer_16(src_y, kWidth * kHeight + OFF); \
align_buffer_16(src_uv, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y * 2 + OFF); \
align_buffer_16(dst_argb_c, kStrideB * kHeight); \
align_buffer_16(dst_argb_opt, kStrideB * kHeight); \
srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[(i * kWidth) + j] = (random() & 0xff); \
src_y[(i * kWidth) + j + OFF] = (random() & 0xff); \
for (int i = 0; i < kHeight / SUBSAMP_Y; ++i) \
for (int j = 0; j < kWidth / SUBSAMP_X * 2; ++j) { \
src_uv[(i * kWidth / SUBSAMP_X) * 2 + j] = (random() & 0xff); \
src_uv[(i * kWidth / SUBSAMP_X) * 2 + j + OFF] = (random() & 0xff); \
} \
MaskCpuFlags(0); \
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
src_uv, kWidth / SUBSAMP_X * 2, \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_uv + OFF, kWidth / SUBSAMP_X * 2, \
dst_argb_c, kWidth * BPP_B, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y, kWidth, \
src_uv, kWidth / SUBSAMP_X * 2, \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, \
src_uv + OFF, kWidth / SUBSAMP_X * 2, \
dst_argb_opt, kWidth * BPP_B, \
kWidth, NEG kHeight); \
} \
......@@ -408,11 +410,13 @@ TEST_F(libyuvTest, FMT_PLANAR##To##FMT_B##N) { \
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \
1280, _Opt, +) \
1280, _Opt, +, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \
1280, _Unaligned, +, 1) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \
1280, _Invert, -) \
1280, _Invert, -, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, DIFF, \
1276, _Any, +)
1276, _Any, +, 0)
TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
......@@ -421,12 +425,12 @@ TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
// TESTBIPLANARTOB(NV21, 2, 2, RGB565, 2, 9)
#define TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG) \
W1280, N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kHeight = 720; \
const int kStride = (kWidth * 8 * BPP_A + 7) / 8; \
align_buffer_16(src_argb, kStride * kHeight); \
align_buffer_16(src_argb, kStride * kHeight + OFF); \
align_buffer_16(dst_y_c, kWidth * kHeight); \
align_buffer_16(dst_u_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
align_buffer_16(dst_v_c, kWidth / SUBSAMP_X * kHeight / SUBSAMP_Y); \
......@@ -436,16 +440,16 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
srandom(time(NULL)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kStride; ++j) \
src_argb[(i * kStride) + j] = (random() & 0xff); \
src_argb[(i * kStride) + j + OFF] = (random() & 0xff); \
MaskCpuFlags(0); \
FMT_A##To##FMT_PLANAR(src_argb, kStride, \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_c, kWidth, \
dst_u_c, kWidth / SUBSAMP_X, \
dst_v_c, kWidth / SUBSAMP_X, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_PLANAR(src_argb, kStride, \
FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, \
dst_y_opt, kWidth, \
dst_u_opt, kWidth / SUBSAMP_X, \
dst_v_opt, kWidth / SUBSAMP_X, \
......@@ -496,11 +500,13 @@ TEST_F(libyuvTest, FMT_A##To##FMT_PLANAR##N) { \
#define TESTATOPLANAR(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
1280, _Opt, +) \
1280, _Opt, +, 0) \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
1280, _Unaligned, +, 1) \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
1280, _Invert, -) \
1280, _Invert, -, 0) \
TESTATOPLANARI(FMT_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
1276, _Any, +)
1276, _Any, +, 0)
TESTATOPLANAR(ARGB, 4, I420, 2, 2)
TESTATOPLANAR(BGRA, 4, I420, 2, 2)
......@@ -526,24 +532,25 @@ TESTATOPLANAR(BayerRGGB, 1, I420, 2, 2)
TESTATOPLANAR(BayerGBRG, 1, I420, 2, 2)
TESTATOPLANAR(BayerGRBG, 1, I420, 2, 2)
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF, N, NEG) \
#define TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, W1280, DIFF, \
N, NEG, OFF) \
TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = 720; \
align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight); \
align_buffer_16(src_argb, (kWidth * BPP_A) * kHeight + OFF); \
align_buffer_16(dst_argb_c, (kWidth * BPP_B) * kHeight); \
align_buffer_16(dst_argb_opt, (kWidth * BPP_B) * kHeight); \
srandom(time(NULL)); \
for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \
src_argb[i] = (random() & 0xff); \
src_argb[i + OFF] = (random() & 0xff); \
} \
MaskCpuFlags(0); \
FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \
dst_argb_c, kWidth * BPP_B, \
kWidth, NEG kHeight); \
MaskCpuFlags(-1); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
FMT_A##To##FMT_B(src_argb + OFF, kWidth * STRIDE_A, \
dst_argb_opt, kWidth * BPP_B, \
kWidth, NEG kHeight); \
} \
......@@ -562,9 +569,11 @@ TEST_F(libyuvTest, FMT_A##To##FMT_B##N) { \
free_aligned_buffer_16(dst_argb_opt) \
}
#define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, DIFF) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Opt, +) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Invert, -) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Any, +)
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Opt, +, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, \
_Unaligned, +, 1) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Invert, -, 0) \
TESTATOBI(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B, 1280, DIFF, _Any, +, 0)
TESTATOB(ARGB, 4, 4, ARGB, 4, 0)
TESTATOB(ARGB, 4, 4, BGRA, 4, 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment