Commit 681c6c67 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

Add LIBYUV_API to NV12ToABGR and I444Rotate, I444Scale

Gaussian blur low levels ported to 32 bit neon.
But they are not hooked up to anything but a unittest.

Bug:b/248041731, b/132108021, b/129908793
Change-Id: Iccebb8ffd6b719810aa11dd770a525227da4c357
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1611206
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarChong Zhang <chz@google.com>
parent 05f72b86
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1727
Version: 1730
License: BSD
License File: LICENSE
......
......@@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y,
int height);
// Convert NV12 to ABGR.
LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
......
......@@ -126,6 +126,25 @@ int I444Scale(const uint8_t* src_y,
int dst_height,
enum FilterMode filtering);
LIBYUV_API
int I444Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus
// Legacy API. Deprecated.
LIBYUV_API
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1727
#define LIBYUV_VERSION 1730
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y,
}
// Convert NV12 to ABGR.
// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix.
// To swap the UV use NV12 instead of NV21.LIBYUV_API
LIBYUV_API
int NV12ToABGR(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
......
......@@ -521,28 +521,19 @@ int I444Rotate(const uint8_t* src_y,
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate90:
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y,
width, height);
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u,
width, height);
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v,
width, height);
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate270:
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y,
width, height);
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u,
width, height);
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v,
width, height);
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
case libyuv::kRotate180:
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y,
width, height);
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u,
width, height);
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v,
width, height);
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
return 0;
default:
break;
......
......@@ -2685,6 +2685,84 @@ void ByteToFloatRow_NEON(const uint8_t* src,
: "cc", "memory", "q1", "q2", "q3");
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
void GaussCol_NEON(const uint16_t* src0,
const uint16_t* src1,
const uint16_t* src2,
const uint16_t* src3,
const uint16_t* src4,
uint32_t* dst,
int width) {
asm volatile(
"vmov.u16 d6, #4 \n" // constant 4
"vmov.u16 d7, #6 \n" // constant 6
"1: \n"
"vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows
"vld1.16 {q2}, [%4]! \n"
"vaddl.u16 q0, d2, d4 \n" // * 1
"vaddl.u16 q1, d3, d5 \n" // * 1
"vld1.16 {q2}, [%1]! \n"
"vmlal.u16 q0, d4, d6 \n" // * 4
"vmlal.u16 q1, d5, d6 \n" // * 4
"vld1.16 {q2}, [%2]! \n"
"vmlal.u16 q0, d4, d7 \n" // * 6
"vmlal.u16 q1, d5, d7 \n" // * 6
"vld1.16 {q2}, [%3]! \n"
"vmlal.u16 q0, d4, d6 \n" // * 4
"vmlal.u16 q1, d5, d6 \n" // * 4
"subs %6, %6, #8 \n" // 8 processed per loop
"vst1.32 {q0, q1}, [%5]! \n" // store 8 samples
"bgt 1b \n"
: "+r"(src0), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(src4), // %4
"+r"(dst), // %5
"+r"(width) // %6
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
const uint32_t* src1 = src + 1;
const uint32_t* src2 = src + 2;
const uint32_t* src3 = src + 3;
asm volatile(
"vmov.u32 q10, #4 \n" // constant 4
"vmov.u32 q11, #6 \n" // constant 6
"1: \n"
"vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples
"vld1.32 {q2}, [%0] \n"
"vadd.u32 q0, q0, q1 \n" // * 1
"vadd.u32 q1, q1, q2 \n" // * 1
"vld1.32 {q2, q3}, [%2]! \n"
"vmla.u32 q0, q2, q11 \n" // * 6
"vmla.u32 q1, q3, q11 \n" // * 6
"vld1.32 {q2, q3}, [%1]! \n"
"vld1.32 {q8, q9}, [%3]! \n"
"vadd.u32 q2, q2, q8 \n" // add rows for * 4
"vadd.u32 q3, q3, q9 \n"
"vmla.u32 q0, q2, q10 \n" // * 4
"vmla.u32 q1, q3, q10 \n" // * 4
"subs %5, %5, #8 \n" // 8 processed per loop
"vqshrn.u32 d0, q0, #8 \n" // round and pack
"vqshrn.u32 d1, q1, #8 \n"
"vst1.u16 {q0}, [%4]! \n" // store 8 samples
"bgt 1b \n"
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(width) // %5
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
}
// Convert biplanar NV21 to packed YUV24
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
......
......@@ -1824,6 +1824,39 @@ int I444Scale(const uint8_t* src_y,
return 0;
}
LIBYUV_API
int I444Scale_16(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_u,
int src_stride_u,
const uint16_t* src_v,
int src_stride_v,
int src_width,
int src_height,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_u,
int dst_stride_u,
uint16_t* dst_v,
int dst_stride_v,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
dst_width, dst_height, filtering);
ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
dst_width, dst_height, filtering);
ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
dst_width, dst_height, filtering);
return 0;
}
// Deprecated api
LIBYUV_API
int Scale(const uint8_t* src_y,
......
......@@ -3186,7 +3186,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
}
GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
......@@ -3239,7 +3240,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
&orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
640);
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
......
......@@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
benchmark_cpu_info_);
}
static void I444TestRotate(int src_width,
int src_height,
int dst_width,
int dst_height,
libyuv::RotationMode mode,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (src_width < 1) {
src_width = 1;
}
if (src_height == 0) {
src_height = 1;
}
if (dst_width < 1) {
dst_width = 1;
}
if (dst_height < 1) {
dst_height = 1;
}
int src_i444_y_size = src_width * Abs(src_height);
int src_i444_uv_size = src_width * Abs(src_height);
int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
align_buffer_page_end(src_i444, src_i444_size);
for (int i = 0; i < src_i444_size; ++i) {
src_i444[i] = fastrand() & 0xff;
}
int dst_i444_y_size = dst_width * dst_height;
int dst_i444_uv_size = dst_width * dst_height;
int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
align_buffer_page_end(dst_i444_c, dst_i444_size);
align_buffer_page_end(dst_i444_opt, dst_i444_size);
memset(dst_i444_c, 2, dst_i444_size);
memset(dst_i444_opt, 3, dst_i444_size);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
src_width, src_height, mode);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (int i = 0; i < benchmark_iterations; ++i) {
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
dst_width, src_width, src_height, mode);
}
// Rotation should be exact.
for (int i = 0; i < dst_i444_size; ++i) {
EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
}
free_aligned_buffer_page_end(dst_i444_c);
free_aligned_buffer_page_end(dst_i444_opt);
free_aligned_buffer_page_end(src_i444);
}
TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate0, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate90, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
benchmark_height_, kRotate180, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
benchmark_width_, kRotate270, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_);
}
// TODO(fbarchard): Remove odd width tests.
// Odd width tests work but disabled because they use C code and can be
// tested by passing an odd width command line or environment variable.
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_);
}
static void NV12TestRotate(int src_width,
int src_height,
int dst_width,
......
......@@ -22,14 +22,14 @@
namespace libyuv {
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int TestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
static int I420TestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
......@@ -141,14 +141,14 @@ static int TestFilter(int src_width,
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int TestFilter_16(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
static int I420TestFilter_16(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
......@@ -256,6 +256,241 @@ static int TestFilter_16(int src_width,
return max_diff;
}
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static int I444TestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
int i, j;
int src_width_uv = Abs(src_width);
int src_height_uv = Abs(src_height);
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv;
align_buffer_page_end(src_y, src_y_plane_size);
align_buffer_page_end(src_u, src_uv_plane_size);
align_buffer_page_end(src_v, src_uv_plane_size);
if (!src_y || !src_u || !src_v) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
MemRandomize(src_y, src_y_plane_size);
MemRandomize(src_u, src_uv_plane_size);
MemRandomize(src_v, src_uv_plane_size);
int dst_width_uv = dst_width;
int dst_height_uv = dst_height;
int64_t dst_y_plane_size = (dst_width) * (dst_height);
int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
int dst_stride_y = dst_width;
int dst_stride_uv = dst_width_uv;
align_buffer_page_end(dst_y_c, dst_y_plane_size);
align_buffer_page_end(dst_u_c, dst_uv_plane_size);
align_buffer_page_end(dst_v_c, dst_uv_plane_size);
align_buffer_page_end(dst_y_opt, dst_y_plane_size);
align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
!dst_v_opt) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
// Report performance of C vs OPT.
printf("filter %d - %8d us C - %8d us OPT\n", f,
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
// C version may be a little off from the optimized. Order of
// operations may introduce rounding somewhere. So do a difference
// of the buffers and look to see that the max difference is not
// over 3.
int max_diff = 0;
for (i = 0; i < (dst_height); ++i) {
for (j = 0; j < (dst_width); ++j) {
int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
dst_y_opt[(i * dst_stride_y) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
for (i = 0; i < (dst_height_uv); ++i) {
for (j = 0; j < (dst_width_uv); ++j) {
int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
dst_u_opt[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
dst_v_opt[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
free_aligned_buffer_page_end(dst_y_c);
free_aligned_buffer_page_end(dst_u_c);
free_aligned_buffer_page_end(dst_v_c);
free_aligned_buffer_page_end(dst_y_opt);
free_aligned_buffer_page_end(dst_u_opt);
free_aligned_buffer_page_end(dst_v_opt);
free_aligned_buffer_page_end(src_y);
free_aligned_buffer_page_end(src_u);
free_aligned_buffer_page_end(src_v);
return max_diff;
}
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int I444TestFilter_16(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int disable_cpu_flags,
int benchmark_cpu_info) {
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
return 0;
}
int i;
int src_width_uv = Abs(src_width);
int src_height_uv = Abs(src_height);
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv;
align_buffer_page_end(src_y, src_y_plane_size);
align_buffer_page_end(src_u, src_uv_plane_size);
align_buffer_page_end(src_v, src_uv_plane_size);
align_buffer_page_end(src_y_16, src_y_plane_size * 2);
align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return 0;
}
uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
MemRandomize(src_y, src_y_plane_size);
MemRandomize(src_u, src_uv_plane_size);
MemRandomize(src_v, src_uv_plane_size);
for (i = 0; i < src_y_plane_size; ++i) {
p_src_y_16[i] = src_y[i];
}
for (i = 0; i < src_uv_plane_size; ++i) {
p_src_u_16[i] = src_u[i];
p_src_v_16[i] = src_v[i];
}
int dst_width_uv = dst_width;
int dst_height_uv = dst_height;
int dst_y_plane_size = (dst_width) * (dst_height);
int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
int dst_stride_y = dst_width;
int dst_stride_uv = dst_width_uv;
align_buffer_page_end(dst_y_8, dst_y_plane_size);
align_buffer_page_end(dst_u_8, dst_uv_plane_size);
align_buffer_page_end(dst_v_8, dst_uv_plane_size);
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
for (i = 0; i < benchmark_iterations; ++i) {
I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
dst_stride_uv, dst_width, dst_height, f);
}
// Expect an exact match.
int max_diff = 0;
for (i = 0; i < dst_y_plane_size; ++i) {
int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
for (i = 0; i < dst_uv_plane_size; ++i) {
int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_page_end(dst_y_8);
free_aligned_buffer_page_end(dst_u_8);
free_aligned_buffer_page_end(dst_v_8);
free_aligned_buffer_page_end(dst_y_16);
free_aligned_buffer_page_end(dst_u_16);
free_aligned_buffer_page_end(dst_v_16);
free_aligned_buffer_page_end(src_y);
free_aligned_buffer_page_end(src_u);
free_aligned_buffer_page_end(src_v);
free_aligned_buffer_page_end(src_y_16);
free_aligned_buffer_page_end(src_u_16);
free_aligned_buffer_page_end(src_v_16);
return max_diff;
}
// The following adjustments in dimensions ensure the scale factor will be
// exactly achieved.
// 2 is chroma subsample.
......@@ -263,16 +498,32 @@ static int TestFilter_16(int src_width,
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
int diff = TestFilter( \
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
int diff = I420TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
int diff = I444TestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
int diff = TestFilter_16( \
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
int diff = I420TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
int diff = I444TestFilter_16( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
......@@ -300,30 +551,58 @@ TEST_FACTOR(3, 1, 3, 0)
#undef DX
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
int diff = TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, \
I420##name##From##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
TEST_F(LibYUVScaleTest, \
I444##name##From##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
EXPECT_LE(diff, max_diff); \
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment