Commit f1c00932 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

NV21 unittest and benchmark

BUG=libyuv:809

Change-Id: I75afb5612dcd05820479848a90ad16b07a7981bc
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1707229Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
parent f9aacffa
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1732 Version: 1733
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -100,4 +100,8 @@ Inverting can be achieved with almost any libyuv function by passing a negative ...@@ -100,4 +100,8 @@ Inverting can be achieved with almost any libyuv function by passing a negative
I420Mirror and ARGBMirror can also be used to rotate by 180 degrees by passing a negative height. I420Mirror and ARGBMirror can also be used to rotate by 180 degrees by passing a negative height.
# Cropping - Vertical Flip
When cropping from a subsampled format like NV21, the method of setting the start pointers wont work for odd crop start y on the UV plane.
If the height after cropping will be odd, invert the source - point to the last row, negate the strides, and pass negative height, which
will re-invert the image as the conversion outputs.
...@@ -105,6 +105,15 @@ void MergeUVPlane(const uint8_t* src_u, ...@@ -105,6 +105,15 @@ void MergeUVPlane(const uint8_t* src_u,
int width, int width,
int height); int height);
// Swap U and V channels in interleaved UV plane.
LIBYUV_API
void SwapUVPlane(const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_vu,
int dst_stride_vu,
int width,
int height);
// Split interleaved RGB plane into separate R, G and B planes. // Split interleaved RGB plane into separate R, G and B planes.
LIBYUV_API LIBYUV_API
void SplitRGBPlane(const uint8_t* src_rgb, void SplitRGBPlane(const uint8_t* src_rgb,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1732 #define LIBYUV_VERSION 1733
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -503,60 +503,68 @@ void MergeUVPlane(const uint8_t* src_u, ...@@ -503,60 +503,68 @@ void MergeUVPlane(const uint8_t* src_u,
} }
} }
// Convert NV21 to NV12. // Swap U and V channels in interleaved UV plane.
LIBYUV_API LIBYUV_API
int NV21ToNV12(const uint8_t* src_y, void SwapUVPlane(const uint8_t* src_uv,
int src_stride_y, int src_stride_uv,
const uint8_t* src_vu, uint8_t* dst_vu,
int src_stride_vu, int dst_stride_vu,
uint8_t* dst_y, int width,
int dst_stride_y, int height) {
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int y; int y;
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
SwapUVRow_C; SwapUVRow_C;
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_vu || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
halfheight = (height + 1) >> 1; src_uv = src_uv + (height - 1) * src_stride_uv;
src_y = src_y + (height - 1) * src_stride_y; src_stride_uv = -src_stride_uv;
src_vu = src_vu + (halfheight - 1) * src_stride_vu;
src_stride_y = -src_stride_y;
src_stride_vu = -src_stride_vu;
} }
// Coalesce rows. // Coalesce rows.
if (src_stride_vu == halfwidth * 2 && dst_stride_uv == halfwidth * 2) { if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
halfwidth *= halfheight; width *= height;
halfheight = 1; height = 1;
src_stride_vu = dst_stride_uv = 0; src_stride_uv = dst_stride_vu = 0;
} }
#if defined(HAS_SWAPUVROW_NEON) #if defined(HAS_SWAPUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
SwapUVRow = SwapUVRow_Any_NEON; SwapUVRow = SwapUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(width, 16)) {
SwapUVRow = SwapUVRow_NEON; SwapUVRow = SwapUVRow_NEON;
} }
} }
#endif #endif
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); for (y = 0; y < height; ++y) {
SwapUVRow(src_uv, dst_vu, width);
src_uv += src_stride_uv;
dst_vu += dst_stride_vu;
} }
}
for (y = 0; y < halfheight; ++y) { // Convert NV21 to NV12.
SwapUVRow(src_vu, dst_uv, halfwidth); LIBYUV_API
src_vu += src_stride_vu; int NV21ToNV12(const uint8_t* src_y,
dst_uv += dst_stride_uv; int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_vu || !dst_uv || width <= 0 || height == 0) {
return -1;
}
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
} }
SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
halfheight);
return 0; return 0;
} }
......
...@@ -397,67 +397,68 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) ...@@ -397,67 +397,68 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, \
OFF) \ OFF, DOY) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2 * \ align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \ OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ align_buffer_page_end(dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * \ align_buffer_page_end(dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \
src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 0 + OFF] = \ src_uv[(i * 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
(fastrand() & 0xff); \
src_uv[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) * 2 + j + 1 + OFF] = \
(fastrand() & 0xff); \ (fastrand() & 0xff); \
} \ } \
} \ } \
memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_uv_c, 2, \ memset(dst_uv_c, 2, \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_uv_opt, 102, \ memset(dst_uv_opt, 102, \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ 2 * SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_uv + OFF, \ src_y + OFF, kWidth, src_uv + OFF, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_c, kWidth, dst_uv_c, \ 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_c : NULL, kWidth, \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ dst_uv_c, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_uv + OFF, \ src_y + OFF, kWidth, src_uv + OFF, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * 2, dst_y_opt, kWidth, dst_uv_opt, \ 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X), DOY ? dst_y_opt : NULL, \
SUBSAMPLE(kWidth, SUBSAMP_X) * 2, kWidth, NEG kHeight); \ kWidth, dst_uv_opt, 2 * SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, \
NEG kHeight); \
} \ } \
int max_diff = 0; \ int max_diff = 0; \
for (int i = 0; i < kHeight; ++i) { \ if (DOY) { \
for (int j = 0; j < kWidth; ++j) { \ for (int i = 0; i < kHeight; ++i) { \
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \ for (int j = 0; j < kWidth; ++j) { \
static_cast<int>(dst_y_opt[i * kWidth + j])); \ int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
if (abs_diff > max_diff) { \ static_cast<int>(dst_y_opt[i * kWidth + j])); \
max_diff = abs_diff; \ if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \ } \
} \ } \
EXPECT_LE(max_diff, 1); \
} \ } \
EXPECT_LE(max_diff, 1); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X) * 2; ++j) { \ for (int j = 0; j < 2 * SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
int abs_diff = \ int abs_diff = \
abs(static_cast<int>( \ abs(static_cast<int>( \
dst_uv_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j]) - \ dst_uv_c[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
static_cast<int>( \ static_cast<int>( \
dst_uv_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) * 2 + j])); \ dst_uv_opt[i * 2 * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
if (abs_diff > max_diff) { \ if (abs_diff > max_diff) { \
max_diff = abs_diff; \ max_diff = abs_diff; \
} \ } \
...@@ -472,21 +473,21 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2) ...@@ -472,21 +473,21 @@ TESTPLANARTOBP(I400, 2, 2, NV21, 2, 2)
free_aligned_buffer_page_end(src_uv); \ free_aligned_buffer_page_end(src_uv); \
} }
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) \ SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0, 1) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1, \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ 1) \
SUBSAMP_X, SUBSAMP_Y, benchmark_width, _Unaligned, +, 1) \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1) \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1) \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0)
// TODO(fbarchard): Fix msan on this unittest TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
// TESTBIPLANARTOBP(NV21, 2, 2, NV12, 2, 2)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \ FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF, \
...@@ -993,7 +994,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2) ...@@ -993,7 +994,7 @@ TESTATOPLANAR(I400, 1, 1, I420, 2, 2, 2)
TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2) TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 4) TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, 10)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5) TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4) TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2) TESTATOPLANAR(UYVY, 2, 1, I420, 2, 2, 2)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <time.h> #include <time.h>
// row.h defines SIMD_ALIGNED, overriding unit_test.h // row.h defines SIMD_ALIGNED, overriding unit_test.h
// TODO(fbarchard): Remove row.h from unittests. Test public functions.
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */ #include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#include "../unit_test/unit_test.h" #include "../unit_test/unit_test.h"
...@@ -2321,7 +2322,8 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { ...@@ -2321,7 +2322,8 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) {
} }
TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 4); align_buffer_page_end(src_pixels, kPixels * 4);
align_buffer_page_end(dst_pixels_opt, kPixels); align_buffer_page_end(dst_pixels_opt, kPixels);
align_buffer_page_end(dst_pixels_c, kPixels); align_buffer_page_end(dst_pixels_c, kPixels);
...@@ -2349,7 +2351,8 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { ...@@ -2349,7 +2351,8 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) {
} }
TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(orig_pixels, kPixels); align_buffer_page_end(orig_pixels, kPixels);
align_buffer_page_end(dst_pixels_opt, kPixels * 4); align_buffer_page_end(dst_pixels_opt, kPixels * 4);
align_buffer_page_end(dst_pixels_c, kPixels * 4); align_buffer_page_end(dst_pixels_c, kPixels * 4);
...@@ -2482,7 +2485,8 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) { ...@@ -2482,7 +2485,8 @@ TEST_F(LibYUVPlanarTest, SetPlane_Opt) {
} }
TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 2); align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(tmp_pixels_u, kPixels); align_buffer_page_end(tmp_pixels_u, kPixels);
align_buffer_page_end(tmp_pixels_v, kPixels); align_buffer_page_end(tmp_pixels_v, kPixels);
...@@ -2526,7 +2530,8 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { ...@@ -2526,7 +2530,8 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) {
} }
TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 2); align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(tmp_pixels_u, kPixels); align_buffer_page_end(tmp_pixels_u, kPixels);
align_buffer_page_end(tmp_pixels_v, kPixels); align_buffer_page_end(tmp_pixels_v, kPixels);
...@@ -2568,8 +2573,39 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { ...@@ -2568,8 +2573,39 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
free_aligned_buffer_page_end(dst_pixels_c); free_aligned_buffer_page_end(dst_pixels_c);
} }
TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) {
// Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 2);
align_buffer_page_end(dst_pixels_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_c, kPixels * 2);
MemRandomize(src_pixels, kPixels * 2);
MemRandomize(dst_pixels_opt, kPixels * 2);
MemRandomize(dst_pixels_c, kPixels * 2);
MaskCpuFlags(disable_cpu_flags_);
SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_c,
benchmark_width_ * 2, benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
SwapUVPlane(src_pixels, benchmark_width_ * 2, dst_pixels_opt,
benchmark_width_ * 2, benchmark_width_, benchmark_height_);
}
for (int i = 0; i < kPixels * 2; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
free_aligned_buffer_page_end(src_pixels);
free_aligned_buffer_page_end(dst_pixels_opt);
free_aligned_buffer_page_end(dst_pixels_c);
}
TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 3); align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels); align_buffer_page_end(tmp_pixels_r, kPixels);
align_buffer_page_end(tmp_pixels_g, kPixels); align_buffer_page_end(tmp_pixels_g, kPixels);
...@@ -2617,7 +2653,8 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { ...@@ -2617,7 +2653,8 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
} }
TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels, kPixels * 3); align_buffer_page_end(src_pixels, kPixels * 3);
align_buffer_page_end(tmp_pixels_r, kPixels); align_buffer_page_end(tmp_pixels_r, kPixels);
align_buffer_page_end(tmp_pixels_g, kPixels); align_buffer_page_end(tmp_pixels_g, kPixels);
...@@ -2666,7 +2703,8 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { ...@@ -2666,7 +2703,8 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
// TODO(fbarchard): improve test for platforms and cpu detect // TODO(fbarchard): improve test for platforms and cpu detect
#ifdef HAS_MERGEUVROW_16_AVX2 #ifdef HAS_MERGEUVROW_16_AVX2
TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_u, kPixels * 2); align_buffer_page_end(src_pixels_u, kPixels * 2);
align_buffer_page_end(src_pixels_v, kPixels * 2); align_buffer_page_end(src_pixels_v, kPixels * 2);
align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2); align_buffer_page_end(dst_pixels_uv_opt, kPixels * 2 * 2);
...@@ -2710,7 +2748,8 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { ...@@ -2710,7 +2748,8 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) {
// TODO(fbarchard): Improve test for more platforms. // TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_MULTIPLYROW_16_AVX2 #ifdef HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels * 2); align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_y_c, kPixels * 2); align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
...@@ -2746,7 +2785,8 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { ...@@ -2746,7 +2785,8 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
#endif // HAS_MULTIPLYROW_16_AVX2 #endif // HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Plane) { TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels * 2); align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels); align_buffer_page_end(dst_pixels_y_opt, kPixels);
align_buffer_page_end(dst_pixels_y_c, kPixels); align_buffer_page_end(dst_pixels_y_c, kPixels);
...@@ -2823,7 +2863,8 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { ...@@ -2823,7 +2863,8 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
#endif // HAS_CONVERT16TO8ROW_AVX2 #endif // HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert8To16Plane) { TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
const int kPixels = benchmark_width_ * benchmark_height_; // Round count up to multiple of 16
const int kPixels = (benchmark_width_ * benchmark_height_ + 15) & ~15;
align_buffer_page_end(src_pixels_y, kPixels); align_buffer_page_end(src_pixels_y, kPixels);
align_buffer_page_end(dst_pixels_y_opt, kPixels * 2); align_buffer_page_end(dst_pixels_y_opt, kPixels * 2);
align_buffer_page_end(dst_pixels_y_c, kPixels * 2); align_buffer_page_end(dst_pixels_y_c, kPixels * 2);
...@@ -3271,14 +3312,26 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { ...@@ -3271,14 +3312,26 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
TEST_F(LibYUVPlanarTest, SwapUVRow) { TEST_F(LibYUVPlanarTest, SwapUVRow) {
const int kPixels = benchmark_width_ * benchmark_height_; const int kPixels = benchmark_width_ * benchmark_height_;
void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
SwapUVRow_C;
align_buffer_page_end(src_pixels_vu, kPixels * 2); align_buffer_page_end(src_pixels_vu, kPixels * 2);
align_buffer_page_end(dst_pixels_uv, kPixels * 2); align_buffer_page_end(dst_pixels_uv, kPixels * 2);
MemRandomize(src_pixels_vu, kPixels * 2); MemRandomize(src_pixels_vu, kPixels * 2);
memset(dst_pixels_uv, 1, kPixels * 2); memset(dst_pixels_uv, 1, kPixels * 2);
SwapUVRow_C(src_pixels_vu, dst_pixels_uv, kPixels); #if defined(HAS_SWAPUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SwapUVRow = SwapUVRow_Any_NEON;
if (IS_ALIGNED(kPixels, 16)) {
SwapUVRow = SwapUVRow_NEON;
}
}
#endif
for (int j = 0; j < benchmark_iterations_; j++) {
SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels);
}
for (int i = 0; i < kPixels; ++i) { for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]);
EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment