Commit 6825b161 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

HalfFloat SSE2/AVX2 optimized port scheduling.

Uses 1 add instead of 2 leas to reduce port pressure on ports 1 and 5
used for SIMD instructions.

BUG=libyuv:670
TEST=~/iaca-lin64/bin/iaca.sh -arch HSW out/Release/obj/libyuv/row_gcc.o

Change-Id: I3965ee5dcb49941a535efa611b5988d977f5b65c
Reviewed-on: https://chromium-review.googlesource.com/433391Reviewed-by: 's avatarFrank Barchard <fbarchard@google.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
parent 7a54d0a3
...@@ -202,9 +202,8 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { ...@@ -202,9 +202,8 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
uint8 b1 = src_argb[4] >> 3; uint8 b1 = src_argb[4] >> 3;
uint8 g1 = src_argb[5] >> 2; uint8 g1 = src_argb[5] >> 2;
uint8 r1 = src_argb[6] >> 3; uint8 r1 = src_argb[6] >> 3;
WRITEWORD( WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
dst_rgb, (r1 << 27));
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
...@@ -238,9 +237,8 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, ...@@ -238,9 +237,8 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD( WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
dst_rgb, (r1 << 27));
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
......
// VERSION 2
/* /*
* Copyright 2011 The LibYuv Project Authors. All rights reserved. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
* *
...@@ -5457,12 +5456,13 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5457,12 +5456,13 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
asm volatile ( asm volatile (
"pshufd $0x0,%3,%%xmm4 \n" "pshufd $0x0,%3,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
"sub %0,%1 \n"
// 16 pixel loop. // 16 pixel loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu " MEMACCESS(0) ",%%xmm2 \n" // 8 shorts "movdqu " MEMACCESS(0) ",%%xmm2 \n" // 8 shorts
"lea " MEMLEA(0x10,0) ",%0 \n" "add $0x10,%0 \n"
"movdqa %%xmm2,%%xmm3 \n" "movdqa %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1
"cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats
...@@ -5473,8 +5473,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5473,8 +5473,7 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
"psrld $0xd,%%xmm2 \n" "psrld $0xd,%%xmm2 \n"
"psrld $0xd,%%xmm3 \n" "psrld $0xd,%%xmm3 \n"
"packssdw %%xmm3,%%xmm2 \n" "packssdw %%xmm3,%%xmm2 \n"
"movdqu %%xmm2," MEMACCESS(1) " \n" MEMOPMEM(movdqu,xmm2,-0x10,0,1,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x8,%2 \n" "sub $0x8,%2 \n"
"jg 1b \n" "jg 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
...@@ -5488,17 +5487,17 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5488,17 +5487,17 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) {
#endif // HAS_HALFFLOATROW_SSE2 #endif // HAS_HALFFLOATROW_SSE2
#ifdef HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_AVX2
// TODO(fbarchard): consider vadddw instead of vmulps
void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
asm volatile ( asm volatile (
"vbroadcastss %3, %%ymm4 \n" "vbroadcastss %3, %%ymm4 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
"sub %0,%1 \n"
// 16 pixel loop. // 16 pixel loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts
"lea " MEMLEA(0x20,0) ",%0 \n" "add $0x20,%0 \n"
"vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates
"vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n" "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n"
"vcvtdq2ps %%ymm3,%%ymm3 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n"
...@@ -5508,10 +5507,10 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5508,10 +5507,10 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
"vpsrld $0xd,%%ymm3,%%ymm3 \n" "vpsrld $0xd,%%ymm3,%%ymm3 \n"
"vpsrld $0xd,%%ymm2,%%ymm2 \n" "vpsrld $0xd,%%ymm2,%%ymm2 \n"
"vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates
"vmovdqu %%ymm2," MEMACCESS(1) " \n" MEMOPMEM(vmovdqu,ymm2,-0x20,0,1,1)
"lea " MEMLEA(0x20,1) ",%1 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
...@@ -5526,26 +5525,25 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5526,26 +5525,25 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {
#ifdef HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
asm volatile ( asm volatile (
"vbroadcastss %3, %%ymm4 \n" "vbroadcastss %3, %%ymm4 \n"
"sub %0,%1 \n"
// 16 pixel loop. // 16 pixel loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints
"vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"vcvtdq2ps %%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n"
"vcvtdq2ps %%ymm3,%%ymm3 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n"
"vmulps %%ymm2,%%ymm4,%%ymm2 \n" "vmulps %%ymm2,%%ymm4,%%ymm2 \n"
"vmulps %%ymm3,%%ymm4,%%ymm3 \n" "vmulps %%ymm3,%%ymm4,%%ymm3 \n"
"vcvtps2ph $3, %%ymm2, %%xmm2 \n" "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
"vcvtps2ph $3, %%ymm3, %%xmm3 \n" "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
"vmovdqu %%xmm2," MEMACCESS(1) " \n" MEMOPMEM(vmovdqu,xmm2,0x00,0,1,1)
"vmovdqu %%xmm3," MEMACCESS2(0x10,1) " \n" MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1)
"lea " MEMLEA(0x20,1) ",%1 \n" "add $0x20,%0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
...@@ -5560,22 +5558,21 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { ...@@ -5560,22 +5558,21 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {
#ifdef HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C
void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) { void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n"
// 16 pixel loop. // 16 pixel loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints
"vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n"
"lea " MEMLEA(0x20,0) ",%0 \n"
"vcvtdq2ps %%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n"
"vcvtdq2ps %%ymm3,%%ymm3 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n"
"vcvtps2ph $3, %%ymm2, %%xmm2 \n" "vcvtps2ph $3, %%ymm2, %%xmm2 \n"
"vcvtps2ph $3, %%ymm3, %%xmm3 \n" "vcvtps2ph $3, %%ymm3, %%xmm3 \n"
"vmovdqu %%xmm2," MEMACCESS(1) " \n" MEMOPMEM(vmovdqu,xmm2,0x00,0,1,1)
"vmovdqu %%xmm3," MEMACCESS2(0x10,1) " \n" MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1)
"lea " MEMLEA(0x20,1) ",%1 \n" "add $0x20,%0 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"jg 1b \n" "jg 1b \n"
"vzeroupper \n" "vzeroupper \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
......
...@@ -6070,11 +6070,12 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src, ...@@ -6070,11 +6070,12 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
mulss xmm4, kExpBias mulss xmm4, kExpBias
pshufd xmm4, xmm4, 0 pshufd xmm4, xmm4, 0
pxor xmm5, xmm5 pxor xmm5, xmm5
sub edx, eax
// 8 pixel loop. // 8 pixel loop.
convertloop: convertloop:
movdqu xmm2, xmmword ptr [eax] // 8 shorts movdqu xmm2, xmmword ptr [eax] // 8 shorts
lea eax, [eax + 16] add eax, 16
movdqa xmm3, xmm2 movdqa xmm3, xmm2
punpcklwd xmm2, xmm5 punpcklwd xmm2, xmm5
cvtdq2ps xmm2, xmm2 // convert 8 ints to floats cvtdq2ps xmm2, xmm2 // convert 8 ints to floats
...@@ -6085,8 +6086,7 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src, ...@@ -6085,8 +6086,7 @@ __declspec(naked) void HalfFloatRow_SSE2(const uint16* src,
psrld xmm2, 13 psrld xmm2, 13
psrld xmm3, 13 psrld xmm3, 13
packssdw xmm2, xmm3 packssdw xmm2, xmm3
movdqu [edx], xmm2 movdqu [eax + edx - 16], xmm2
lea edx, [edx + 16]
sub ecx, 8 sub ecx, 8
jg convertloop jg convertloop
ret ret
...@@ -6108,11 +6108,12 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src, ...@@ -6108,11 +6108,12 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
vmulss xmm4, xmm4, kExpBias vmulss xmm4, xmm4, kExpBias
vbroadcastss ymm4, xmm4 vbroadcastss ymm4, xmm4
vpxor ymm5, ymm5, ymm5 vpxor ymm5, ymm5, ymm5
sub edx, eax
// 16 pixel loop. // 16 pixel loop.
convertloop: convertloop:
vmovdqu ymm2, [eax] // 16 shorts vmovdqu ymm2, [eax] // 16 shorts
lea eax, [eax + 32] add eax, 32
vpunpckhwd ymm3, ymm2, ymm5 // convert 16 shorts to 16 ints vpunpckhwd ymm3, ymm2, ymm5 // convert 16 shorts to 16 ints
vpunpcklwd ymm2, ymm2, ymm5 vpunpcklwd ymm2, ymm2, ymm5
vcvtdq2ps ymm3, ymm3 // convert 16 ints to floats vcvtdq2ps ymm3, ymm3 // convert 16 ints to floats
...@@ -6122,8 +6123,7 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src, ...@@ -6122,8 +6123,7 @@ __declspec(naked) void HalfFloatRow_AVX2(const uint16* src,
vpsrld ymm3, ymm3, 13 // float convert to 8 half floats truncate vpsrld ymm3, ymm3, 13 // float convert to 8 half floats truncate
vpsrld ymm2, ymm2, 13 vpsrld ymm2, ymm2, 13
vpackssdw ymm2, ymm2, ymm3 vpackssdw ymm2, ymm2, ymm3
vmovdqu [edx], ymm2 vmovdqu [eax + edx - 32], ymm2
lea edx, [edx + 32]
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
vzeroupper vzeroupper
...@@ -6142,21 +6142,21 @@ __declspec(naked) void HalfFloatRow_F16C(const uint16* src, ...@@ -6142,21 +6142,21 @@ __declspec(naked) void HalfFloatRow_F16C(const uint16* src,
mov edx, [esp + 8] /* dst */ mov edx, [esp + 8] /* dst */
vbroadcastss ymm4, [esp + 12] /* scale */ vbroadcastss ymm4, [esp + 12] /* scale */
mov ecx, [esp + 16] /* width */ mov ecx, [esp + 16] /* width */
sub edx, eax
// 16 pixel loop. // 16 pixel loop.
convertloop: convertloop:
vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints
vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts
lea eax, [eax + 32] add eax, 32
vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats
vcvtdq2ps ymm3, ymm3 vcvtdq2ps ymm3, ymm3
vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1 vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1
vmulps ymm3, ymm3, ymm4 vmulps ymm3, ymm3, ymm4
vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate
vcvtps2ph xmm3, ymm3, 3 vcvtps2ph xmm3, ymm3, 3
vmovdqu [edx], xmm2 vmovdqu [eax + edx + 32], xmm2
vmovdqu [edx + 16], xmm3 vmovdqu [eax + edx + 32 + 16], xmm3
lea edx, [edx + 32]
sub ecx, 16 sub ecx, 16
jg convertloop jg convertloop
vzeroupper vzeroupper
......
...@@ -45,9 +45,10 @@ static void ScalePlaneDown2(int src_width, ...@@ -45,9 +45,10 @@ static void ScalePlaneDown2(int src_width,
int y; int y;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) = uint8* dst_ptr, int dst_width) =
filtering == kFilterNone ? ScaleRowDown2_C : (filtering == kFilterLinear filtering == kFilterNone
? ScaleRowDown2Linear_C ? ScaleRowDown2_C
: ScaleRowDown2Box_C); : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
: ScaleRowDown2Box_C);
int row_stride = src_stride << 1; int row_stride = src_stride << 1;
if (!filtering) { if (!filtering) {
src_ptr += src_stride; // Point to odd rows. src_ptr += src_stride; // Point to odd rows.
......
...@@ -30,113 +30,107 @@ namespace libyuv { ...@@ -30,113 +30,107 @@ namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end( \ align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
src_u, \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \
OFF); \ align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
align_buffer_page_end( \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
src_v, \ OFF); \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
OFF); \ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
dst_u_c, \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end( \ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
dst_v_c, \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \ for (int i = 0; i < kHeight; ++i) \
dst_u_opt, \ for (int j = 0; j < kWidth; ++j) \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
align_buffer_page_end( \ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
dst_v_opt, \ for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
for (int i = 0; i < kHeight; ++i) \ (fastrand() & 0xff); \
for (int j = 0; j < kWidth; ++j) \ src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ } \
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ } \
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ memset(dst_y_c, 1, kWidth* kHeight); \
(fastrand() & 0xff); \ memset(dst_u_c, 2, \
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
(fastrand() & 0xff); \ memset(dst_v_c, 3, \
} \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
} \ memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_u_opt, 102, \
memset(dst_u_c, 2, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_v_opt, 103, \
memset(dst_v_c, 3, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \
memset(dst_y_opt, 101, kWidth* kHeight); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
memset(dst_u_opt, 102, \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
memset(dst_v_opt, 103, \ dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(benchmark_cpu_info_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ for (int i = 0; i < benchmark_iterations_; ++i) { \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \
MaskCpuFlags(benchmark_cpu_info_); \ SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ } \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ int max_diff = 0; \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ for (int i = 0; i < kHeight; ++i) { \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ for (int j = 0; j < kWidth; ++j) { \
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \ int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ static_cast<int>(dst_y_opt[i * kWidth + j])); \
} \ if (abs_diff > max_diff) { \
int max_diff = 0; \ max_diff = abs_diff; \
for (int i = 0; i < kHeight; ++i) { \ } \
for (int j = 0; j < kWidth; ++j) { \ } \
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ } \
static_cast<int>(dst_y_opt[i * kWidth + j])); \ EXPECT_EQ(0, max_diff); \
if (abs_diff > max_diff) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
max_diff = abs_diff; \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
} \ int abs_diff = abs( \
} \ static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
} \ static_cast<int>( \
EXPECT_EQ(0, max_diff); \ dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ if (abs_diff > max_diff) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ max_diff = abs_diff; \
int abs_diff = abs( \ } \
static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ } \
static_cast<int>( \ } \
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ EXPECT_LE(max_diff, 3); \
if (abs_diff > max_diff) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
max_diff = abs_diff; \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
} \ int abs_diff = abs( \
} \ static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
} \ static_cast<int>( \
EXPECT_LE(max_diff, 3); \ dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ if (abs_diff > max_diff) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ max_diff = abs_diff; \
int abs_diff = abs( \ } \
static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \ } \
static_cast<int>( \ } \
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \ EXPECT_LE(max_diff, 3); \
if (abs_diff > max_diff) { \ free_aligned_buffer_page_end(dst_y_c); \
max_diff = abs_diff; \ free_aligned_buffer_page_end(dst_u_c); \
} \ free_aligned_buffer_page_end(dst_v_c); \
} \ free_aligned_buffer_page_end(dst_y_opt); \
} \ free_aligned_buffer_page_end(dst_u_opt); \
EXPECT_LE(max_diff, 3); \ free_aligned_buffer_page_end(dst_v_opt); \
free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(dst_u_c); \ free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(dst_v_c); \ free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_u_opt); \
free_aligned_buffer_page_end(dst_v_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
} }
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
...@@ -172,19 +166,15 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1) ...@@ -172,19 +166,15 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
align_buffer_page_end(src_uv, \ align_buffer_page_end(src_uv, \
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end( \ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
dst_u_c, \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
align_buffer_page_end( \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end( \ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
dst_u_opt, \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
align_buffer_page_end( \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
uint8* src_u = src_uv + OFF_U; \ uint8* src_u = src_uv + OFF_U; \
uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
...@@ -288,88 +278,84 @@ TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2) ...@@ -288,88 +278,84 @@ TESTAPLANARTOP(Android420, I420, 1, 0, 0, 2, 2, I420, 2, 2)
TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2) TESTAPLANARTOP(Android420, NV12, 2, 0, 1, 2, 2, I420, 2, 2)
TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
#define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end( \ align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
src_u, \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \
OFF); \ align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
align_buffer_page_end( \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
src_v, \ OFF); \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
OFF); \ align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
dst_uv_c, \ align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ for (int i = 0; i < kHeight; ++i) \
align_buffer_page_end( \ for (int j = 0; j < kWidth; ++j) \
dst_uv_opt, \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
for (int j = 0; j < kWidth; ++j) \ src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ (fastrand() & 0xff); \
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ } \
(fastrand() & 0xff); \ } \
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ memset(dst_y_c, 1, kWidth* kHeight); \
(fastrand() & 0xff); \ memset(dst_uv_c, 2, \
} \ SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
} \ memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_opt, 102, \
memset(dst_uv_c, 2, \ SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \
memset(dst_y_opt, 101, kWidth* kHeight); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
memset(dst_uv_opt, 102, \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \
MaskCpuFlags(disable_cpu_flags_); \ dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ MaskCpuFlags(benchmark_cpu_info_); \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ for (int i = 0; i < benchmark_iterations_; ++i) { \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \
MaskCpuFlags(benchmark_cpu_info_); \ src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \
for (int i = 0; i < benchmark_iterations_; ++i) { \ dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ } \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ int max_diff = 0; \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ for (int i = 0; i < kHeight; ++i) { \
dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X), kWidth, NEG kHeight); \ for (int j = 0; j < kWidth; ++j) { \
} \ int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
int max_diff = 0; \ static_cast<int>(dst_y_opt[i * kWidth + j])); \
for (int i = 0; i < kHeight; ++i) { \ if (abs_diff > max_diff) { \
for (int j = 0; j < kWidth; ++j) { \ max_diff = abs_diff; \
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ } \
static_cast<int>(dst_y_opt[i * kWidth + j])); \ } \
if (abs_diff > max_diff) { \ } \
max_diff = abs_diff; \ EXPECT_LE(max_diff, 1); \
} \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
} \ for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \
} \ int abs_diff = \
EXPECT_LE(max_diff, 1); \ abs(static_cast<int>( \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \
for (int j = 0; j < SUBSAMPLE(kWidth * 2, SUBSAMP_X); ++j) { \ static_cast<int>( \
int abs_diff = \ dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \
abs(static_cast<int>( \ if (abs_diff > max_diff) { \
dst_uv_c[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j]) - \ max_diff = abs_diff; \
static_cast<int>( \ } \
dst_uv_opt[i * SUBSAMPLE(kWidth * 2, SUBSAMP_X) + j])); \ } \
if (abs_diff > max_diff) { \ } \
max_diff = abs_diff; \ EXPECT_LE(max_diff, 1); \
} \ free_aligned_buffer_page_end(dst_y_c); \
} \ free_aligned_buffer_page_end(dst_uv_c); \
} \ free_aligned_buffer_page_end(dst_y_opt); \
EXPECT_LE(max_diff, 1); \ free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(dst_uv_c); \ free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(dst_y_opt); \ free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(src_v); \
} }
#define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
...@@ -393,24 +379,19 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) ...@@ -393,24 +379,19 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, \ align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ OFF); \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end( \ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
dst_u_c, \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
align_buffer_page_end( \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end( \ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
dst_u_opt, \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
align_buffer_page_end( \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
...@@ -1388,12 +1369,10 @@ TEST_F(LibYUVConvertTest, MJPGToI420) { ...@@ -1388,12 +1369,10 @@ TEST_F(LibYUVConvertTest, MJPGToI420) {
const int kSize = kImageSize + kOff; const int kSize = kImageSize + kOff;
align_buffer_page_end(orig_pixels, kSize); align_buffer_page_end(orig_pixels, kSize);
align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_); align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_);
align_buffer_page_end( align_buffer_page_end(dst_u_opt, SUBSAMPLE(benchmark_width_, 2) *
dst_u_opt, SUBSAMPLE(benchmark_height_, 2));
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2)); align_buffer_page_end(dst_v_opt, SUBSAMPLE(benchmark_width_, 2) *
align_buffer_page_end( SUBSAMPLE(benchmark_height_, 2));
dst_v_opt,
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
// EOI, SOI to make MJPG appear valid. // EOI, SOI to make MJPG appear valid.
memset(orig_pixels, 0, kSize); memset(orig_pixels, 0, kSize);
...@@ -1465,20 +1444,16 @@ TEST_F(LibYUVConvertTest, NV12Crop) { ...@@ -1465,20 +1444,16 @@ TEST_F(LibYUVConvertTest, NV12Crop) {
uint8* src_uv = src_y + kWidth * kHeight; uint8* src_uv = src_y + kWidth * kHeight;
align_buffer_page_end(dst_y, kDestWidth * kDestHeight); align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
align_buffer_page_end( align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
dst_u, SUBSAMPLE(kDestHeight, SUBSAMP_Y));
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
align_buffer_page_end( SUBSAMPLE(kDestHeight, SUBSAMP_Y));
dst_v,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
align_buffer_page_end( align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
dst_u_2, SUBSAMPLE(kDestHeight, SUBSAMP_Y));
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y)); align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
align_buffer_page_end( SUBSAMPLE(kDestHeight, SUBSAMP_Y));
dst_v_2,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
for (int i = 0; i < kHeight * kWidth; ++i) { for (int i = 0; i < kHeight * kWidth; ++i) {
src_y[i] = (fastrand() & 0xff); src_y[i] = (fastrand() & 0xff);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment