Commit 5336217f authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

H010Copy function to copy 16 bit planar formats

Bug: libyuv:751
Test: LibYUVConvertTest.H010ToH010_Opt
Change-Id: I996d309040a14193a97d05b62ac0b3e1ad1ee74b
Reviewed-on: https://chromium-review.googlesource.com/823445
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarCheng Wang <wangcheng@google.com>
Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
parent 3b81288e
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1682 Version: 1683
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -77,6 +77,42 @@ int I420Copy(const uint8* src_y, ...@@ -77,6 +77,42 @@ int I420Copy(const uint8* src_y,
int width, int width,
int height); int height);
// Copy I010 to I010
#define I010ToI010 I010Copy
#define H010ToH010 I010Copy
LIBYUV_API
int I010Copy(const uint16* src_y,
int src_stride_y,
const uint16* src_u,
int src_stride_u,
const uint16* src_v,
int src_stride_v,
uint16* dst_y,
int dst_stride_y,
uint16* dst_u,
int dst_stride_u,
uint16* dst_v,
int dst_stride_v,
int width,
int height);
// Convert 10 bit YUV to 8 bit
LIBYUV_API
int I010ToI420(const uint16* src_y,
int src_stride_y,
const uint16* src_u,
int src_stride_u,
const uint16* src_v,
int src_stride_v,
uint8* dst_y,
int dst_stride_y,
uint8* dst_u,
int dst_stride_u,
uint8* dst_v,
int dst_stride_v,
int width,
int height);
// Convert I400 (grey) to I420. // Convert I400 (grey) to I420.
LIBYUV_API LIBYUV_API
int I400ToI420(const uint8* src_y, int I400ToI420(const uint8* src_y,
......
...@@ -39,6 +39,15 @@ void CopyPlane_16(const uint16* src_y, ...@@ -39,6 +39,15 @@ void CopyPlane_16(const uint16* src_y,
int width, int width,
int height); int height);
LIBYUV_API
void Convert16To8Plane(const uint16* src_y,
int src_stride_y,
uint8* dst_y,
int dst_stride_y,
int scale, // 16384 for 10 bits
int width,
int height);
// Set a plane of data to a 32 bit value. // Set a plane of data to a 32 bit value.
LIBYUV_API LIBYUV_API
void SetPlane(uint8* dst_y, void SetPlane(uint8* dst_y,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1682 #define LIBYUV_VERSION 1683
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -106,6 +106,92 @@ int I420Copy(const uint8* src_y, ...@@ -106,6 +106,92 @@ int I420Copy(const uint8* src_y,
return 0; return 0;
} }
// Copy I010 with optional flipping
LIBYUV_API
int I010Copy(const uint16* src_y,
int src_stride_y,
const uint16* src_u,
int src_stride_u,
const uint16* src_v,
int src_stride_v,
uint16* dst_y,
int dst_stride_y,
uint16* dst_u,
int dst_stride_u,
uint16* dst_v,
int dst_stride_v,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
if (dst_y) {
CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// Copy UV planes.
CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Convert 10 bit YUV to 8 bit
LIBYUV_API
int I010ToI420(const uint16* src_y,
int src_stride_y,
const uint16* src_u,
int src_stride_u,
const uint16* src_v,
int src_stride_v,
uint8* dst_y,
int dst_stride_y,
uint8* dst_u,
int dst_stride_u,
uint8* dst_v,
int dst_stride_v,
int width,
int height) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
// Convert Y plane.
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width,
height);
// Convert UV planes.
Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth,
halfheight);
Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth,
halfheight);
return 0;
}
// 422 chroma is 1/2 width, 1x height // 422 chroma is 1/2 width, 1x height
// 420 chroma is 1/2 width, 1/2 height // 420 chroma is 1/2 width, 1/2 height
LIBYUV_API LIBYUV_API
......
...@@ -1240,8 +1240,8 @@ int ConvertFromI420(const uint8* y, ...@@ -1240,8 +1240,8 @@ int ConvertFromI420(const uint8* y,
break; break;
case FOURCC_RGBP: case FOURCC_RGBP:
r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample, r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2, dst_sample_stride ? dst_sample_stride : width * 2, width,
width, height); height);
break; break;
case FOURCC_RGBO: case FOURCC_RGBO:
r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample, r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample,
...@@ -1255,8 +1255,8 @@ int ConvertFromI420(const uint8* y, ...@@ -1255,8 +1255,8 @@ int ConvertFromI420(const uint8* y,
break; break;
case FOURCC_24BG: case FOURCC_24BG:
r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample, r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3, dst_sample_stride ? dst_sample_stride : width * 3, width,
width, height); height);
break; break;
case FOURCC_RAW: case FOURCC_RAW:
r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample, r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample,
......
...@@ -50,6 +50,7 @@ void CopyPlane(const uint8* src_y, ...@@ -50,6 +50,7 @@ void CopyPlane(const uint8* src_y,
if (src_y == dst_y && src_stride_y == dst_stride_y) { if (src_y == dst_y && src_stride_y == dst_stride_y) {
return; return;
} }
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
...@@ -120,6 +121,56 @@ void CopyPlane_16(const uint16* src_y, ...@@ -120,6 +121,56 @@ void CopyPlane_16(const uint16* src_y,
} }
} }
// Convert a plane of 16 bit data to 8 bit
LIBYUV_API
void Convert16To8Plane(const uint16* src_y,
int src_stride_y,
uint8* dst_y,
int dst_stride_y,
int scale, // 16384 for 10 bits
int width,
int height) {
int y;
void (*Convert16To8Row)(const uint16* src_y, uint8* dst_y, int scale,
int width) = Convert16To8Row_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_stride_y = -dst_stride_y;
}
// Coalesce rows.
if (src_stride_y == width && dst_stride_y == width) {
width *= height;
height = 1;
src_stride_y = dst_stride_y = 0;
}
#if defined(HAS_CONVERT16TO8ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
Convert16To8Row = Convert16To8Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
Convert16To8Row = Convert16To8Row_SSSE3;
}
}
#endif
#if defined(HAS_CONVERT16TO8ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
Convert16To8Row = Convert16To8Row_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
Convert16To8Row = Convert16To8Row_AVX2;
}
}
#endif
// Copy plane
for (y = 0; y < height; ++y) {
Convert16To8Row(src_y, dst_y, scale, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Copy I422. // Copy I422.
LIBYUV_API LIBYUV_API
int I422Copy(const uint8* src_y, int I422Copy(const uint8* src_y,
......
...@@ -1856,7 +1856,7 @@ void Convert16To8Row_C(const uint16* src_y, ...@@ -1856,7 +1856,7 @@ void Convert16To8Row_C(const uint16* src_y,
int width) { int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
dst_y[x] = (src_y[x] * scale) >> 16; dst_y[x] = clamp255((src_y[x] * scale) >> 16);
} }
} }
......
...@@ -2956,20 +2956,20 @@ void Convert16To8Row_SSSE3(const uint16* src_y, ...@@ -2956,20 +2956,20 @@ void Convert16To8Row_SSSE3(const uint16* src_y,
int width) { int width) {
// clang-format off // clang-format off
asm volatile ( asm volatile (
"movd %3,%%xmm3 \n" "movd %3,%%xmm2 \n"
"punpcklwd %%xmm3,%%xmm3 \n" "punpcklwd %%xmm2,%%xmm2 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n"
// 32 pixels per loop. // 32 pixels per loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"movdqu (%0),%%xmm0 \n" "movdqu (%0),%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm1 \n"
"pmulhuw %%xmm3,%%xmm0 \n" "add $0x20,%0 \n"
"pmulhuw %%xmm3,%%xmm1 \n" "pmulhuw %%xmm2,%%xmm0 \n"
"pmulhuw %%xmm2,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n" "packuswb %%xmm1,%%xmm0 \n"
"movdqu %%xmm0,(%1) \n" "movdqu %%xmm0,(%1) \n"
"add $0x20,%0 \n"
"add $0x10,%1 \n" "add $0x10,%1 \n"
"sub $0x10,%2 \n" "sub $0x10,%2 \n"
"jg 1b \n" "jg 1b \n"
...@@ -2977,7 +2977,7 @@ void Convert16To8Row_SSSE3(const uint16* src_y, ...@@ -2977,7 +2977,7 @@ void Convert16To8Row_SSSE3(const uint16* src_y,
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "r"(scale) // %3 : "r"(scale) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm3"); : "memory", "cc", "xmm0", "xmm1", "xmm2");
// clang-format on // clang-format on
} }
...@@ -2988,22 +2988,21 @@ void Convert16To8Row_AVX2(const uint16* src_y, ...@@ -2988,22 +2988,21 @@ void Convert16To8Row_AVX2(const uint16* src_y,
int width) { int width) {
// clang-format off // clang-format off
asm volatile ( asm volatile (
"vmovd %3,%%xmm3 \n" "vmovd %3,%%xmm2 \n"
"vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n"
"vbroadcastss %%xmm3,%%ymm3 \n" "vbroadcastss %%xmm2,%%ymm2 \n"
// 32 pixels per loop. // 32 pixels per loop.
LABELALIGN LABELALIGN
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm1 \n"
"vpmulhuw %%ymm3,%%ymm0,%%ymm0 \n" "add $0x40,%0 \n"
"vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
"vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates
"vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm0,(%1) \n"
"add $0x40,%0 \n"
"add $0x20,%1 \n" "add $0x20,%1 \n"
"sub $0x20,%2 \n" "sub $0x20,%2 \n"
"jg 1b \n" "jg 1b \n"
...@@ -3012,7 +3011,7 @@ void Convert16To8Row_AVX2(const uint16* src_y, ...@@ -3012,7 +3011,7 @@ void Convert16To8Row_AVX2(const uint16* src_y,
"+r"(dst_y), // %1 "+r"(dst_y), // %1
"+r"(width) // %2 "+r"(width) // %2
: "r"(scale) // %3 : "r"(scale) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm3"); : "memory", "cc", "xmm0", "xmm1", "xmm2");
// clang-format on // clang-format on
} }
#endif // HAS_MULTIPLYROW_16_AVX2 #endif // HAS_MULTIPLYROW_16_AVX2
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include <assert.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
...@@ -35,98 +36,71 @@ namespace libyuv { ...@@ -35,98 +36,71 @@ namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) #define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ // Planar test
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, W1280, N, NEG, OFF) \
#define TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \ TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
assert(SRC_BPC == 1 || SRC_BPC == 2); \
assert(DST_BPC == 1 || DST_BPC == 2); \
assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2); \
assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2); \
assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2); \
assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2); \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
OFF); \ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ align_buffer_page_end(src_u, \
OFF); \ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(src_v, \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
for (int i = 0; i < kHeight; ++i) \ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \
for (int j = 0; j < kWidth; ++j) \ memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
for (int i = 0; i < SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); ++i) { \ memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
for (int j = 0; j < SUBSAMPLE(kWidth, SRC_SUBSAMP_X); ++j) { \ memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
src_u[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \ memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
(fastrand() & 0xff); \ memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
src_v[(i * SUBSAMPLE(kWidth, SRC_SUBSAMP_X)) + j + OFF] = \
(fastrand() & 0xff); \
} \
} \
memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_u_c, 2, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_c, 3, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_u_opt, 102, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_opt, 103, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_c, kWidth, \ reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \
dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_c, \ reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ reinterpret_cast<DST_T*>(dst_y_c), kWidth, \
reinterpret_cast<DST_T*>(dst_u_c), kDstHalfWidth, \
reinterpret_cast<DST_T*>(dst_v_c), kDstHalfWidth, kWidth, \
NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \ MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \ SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y + OFF, kWidth, src_u + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), \ reinterpret_cast<SRC_T*>(src_y + OFF), kWidth, \
src_v + OFF, SUBSAMPLE(kWidth, SRC_SUBSAMP_X), dst_y_opt, kWidth, \ reinterpret_cast<SRC_T*>(src_u + OFF), kSrcHalfWidth, \
dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X), dst_v_opt, \ reinterpret_cast<SRC_T*>(src_v + OFF), kSrcHalfWidth, \
SUBSAMPLE(kWidth, SUBSAMP_X), kWidth, NEG kHeight); \ reinterpret_cast<DST_T*>(dst_y_opt), kWidth, \
reinterpret_cast<DST_T*>(dst_u_opt), kDstHalfWidth, \
reinterpret_cast<DST_T*>(dst_v_opt), kDstHalfWidth, kWidth, \
NEG kHeight); \
} \ } \
int max_diff = 0; \ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
for (int i = 0; i < kHeight; ++i) { \ EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
for (int j = 0; j < kWidth; ++j) { \
int abs_diff = abs(static_cast<int>(dst_y_c[i * kWidth + j]) - \
static_cast<int>(dst_y_opt[i * kWidth + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \
EXPECT_EQ(0, max_diff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
int abs_diff = abs( \
static_cast<int>(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
static_cast<int>( \
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \ } \
EXPECT_LE(max_diff, 3); \ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
int abs_diff = abs( \
static_cast<int>(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]) - \
static_cast<int>( \
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j])); \
if (abs_diff > max_diff) { \
max_diff = abs_diff; \
} \
} \
} \ } \
EXPECT_LE(max_diff, 3); \
free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_u_c); \ free_aligned_buffer_page_end(dst_u_c); \
free_aligned_buffer_page_end(dst_v_c); \ free_aligned_buffer_page_end(dst_v_c); \
...@@ -138,25 +112,32 @@ namespace libyuv { ...@@ -138,25 +112,32 @@ namespace libyuv {
free_aligned_buffer_page_end(src_v); \ free_aligned_buffer_page_end(src_v); \
} }
#define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \ #define TESTPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ DST_SUBSAMP_X, DST_SUBSAMP_Y) \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_ - 4, _Any, +, 0) \ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Unaligned, +, 1) \ benchmark_width_ - 4, _Any, +, 0) \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Invert, -, 0) \ FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, FMT_PLANAR, \ benchmark_width_, _Unaligned, +, 1) \
SUBSAMP_X, SUBSAMP_Y, benchmark_width_, _Opt, +, 0) TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
TESTPLANARTOP(I420, 2, 2, I420, 2, 2) benchmark_width_, _Invert, -, 0) \
TESTPLANARTOP(I422, 2, 1, I420, 2, 2) TESTPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
TESTPLANARTOP(I444, 1, 1, I420, 2, 2) FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
TESTPLANARTOP(I420, 2, 2, I422, 2, 1) benchmark_width_, _Opt, +, 0)
TESTPLANARTOP(I420, 2, 2, I444, 1, 1)
TESTPLANARTOP(I420, 2, 2, I420Mirror, 2, 2) TESTPLANARTOP(I420, uint8, 1, 2, 2, I420, uint8, 1, 2, 2)
TESTPLANARTOP(I422, 2, 1, I422, 2, 1) TESTPLANARTOP(I422, uint8, 1, 2, 1, I420, uint8, 1, 2, 2)
TESTPLANARTOP(I444, 1, 1, I444, 1, 1) TESTPLANARTOP(I444, uint8, 1, 1, 1, I420, uint8, 1, 2, 2)
TESTPLANARTOP(I420, uint8, 1, 2, 2, I422, uint8, 1, 2, 1)
TESTPLANARTOP(I420, uint8, 1, 2, 2, I444, uint8, 1, 1, 1)
TESTPLANARTOP(I420, uint8, 1, 2, 2, I420Mirror, uint8, 1, 2, 2)
TESTPLANARTOP(I422, uint8, 1, 2, 1, I422, uint8, 1, 2, 1)
TESTPLANARTOP(I444, uint8, 1, 1, 1, I444, uint8, 1, 1, 1)
TESTPLANARTOP(I010, uint16, 2, 2, 2, I010, uint16, 2, 2, 2)
TESTPLANARTOP(I010, uint16, 2, 2, 2, I420, uint8, 1, 2, 2)
// Test Android 420 to I420 // Test Android 420 to I420
#define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \ #define TESTAPLANARTOPI(SRC_FMT_PLANAR, PIXEL_STRIDE, SRC_SUBSAMP_X, \
......
...@@ -2699,6 +2699,37 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { ...@@ -2699,6 +2699,37 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) {
} }
#endif // HAS_MULTIPLYROW_16_AVX2 #endif // HAS_MULTIPLYROW_16_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Plane) {
const int kPixels = benchmark_width_ * benchmark_height_;
align_buffer_page_end(src_pixels_y, kPixels * 2);
align_buffer_page_end(dst_pixels_y_opt, kPixels);
align_buffer_page_end(dst_pixels_y_c, kPixels);
MemRandomize(src_pixels_y, kPixels * 2);
memset(dst_pixels_y_opt, 0, kPixels);
memset(dst_pixels_y_c, 1, kPixels);
MaskCpuFlags(disable_cpu_flags_);
Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y),
benchmark_width_, dst_pixels_y_c, benchmark_width_, 16384,
benchmark_width_, benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) {
Convert16To8Plane(reinterpret_cast<const uint16*>(src_pixels_y),
benchmark_width_, dst_pixels_y_opt, benchmark_width_,
16384, benchmark_width_, benchmark_height_);
}
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]);
}
free_aligned_buffer_page_end(src_pixels_y);
free_aligned_buffer_page_end(dst_pixels_y_opt);
free_aligned_buffer_page_end(dst_pixels_y_c);
}
// TODO(fbarchard): Improve test for more platforms. // TODO(fbarchard): Improve test for more platforms.
#ifdef HAS_CONVERT16TO8ROW_AVX2 #ifdef HAS_CONVERT16TO8ROW_AVX2
TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) {
......
...@@ -19,10 +19,6 @@ ...@@ -19,10 +19,6 @@
#endif #endif
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
// Change this to 1000 for benchmarking.
// TODO(fbarchard): Add command line parsing to pass this as option.
#define BENCHMARK_ITERATIONS 1
unsigned int fastrand_seed = 0xfb; unsigned int fastrand_seed = 0xfb;
#ifdef LIBYUV_USE_GFLAGS #ifdef LIBYUV_USE_GFLAGS
...@@ -47,7 +43,7 @@ static const int32 FLAGS_libyuv_cpu_info = 0; ...@@ -47,7 +43,7 @@ static const int32 FLAGS_libyuv_cpu_info = 0;
// Set flags to -1 for benchmarking to avoid slower C code. // Set flags to -1 for benchmarking to avoid slower C code.
LibYUVConvertTest::LibYUVConvertTest() LibYUVConvertTest::LibYUVConvertTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -92,12 +88,6 @@ LibYUVConvertTest::LibYUVConvertTest() ...@@ -92,12 +88,6 @@ LibYUVConvertTest::LibYUVConvertTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
...@@ -107,7 +97,7 @@ LibYUVConvertTest::LibYUVConvertTest() ...@@ -107,7 +97,7 @@ LibYUVConvertTest::LibYUVConvertTest()
} }
LibYUVColorTest::LibYUVColorTest() LibYUVColorTest::LibYUVColorTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -152,12 +142,6 @@ LibYUVColorTest::LibYUVColorTest() ...@@ -152,12 +142,6 @@ LibYUVColorTest::LibYUVColorTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
...@@ -167,7 +151,7 @@ LibYUVColorTest::LibYUVColorTest() ...@@ -167,7 +151,7 @@ LibYUVColorTest::LibYUVColorTest()
} }
LibYUVScaleTest::LibYUVScaleTest() LibYUVScaleTest::LibYUVScaleTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -212,12 +196,6 @@ LibYUVScaleTest::LibYUVScaleTest() ...@@ -212,12 +196,6 @@ LibYUVScaleTest::LibYUVScaleTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
...@@ -227,7 +205,7 @@ LibYUVScaleTest::LibYUVScaleTest() ...@@ -227,7 +205,7 @@ LibYUVScaleTest::LibYUVScaleTest()
} }
LibYUVRotateTest::LibYUVRotateTest() LibYUVRotateTest::LibYUVRotateTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -272,12 +250,6 @@ LibYUVRotateTest::LibYUVRotateTest() ...@@ -272,12 +250,6 @@ LibYUVRotateTest::LibYUVRotateTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
...@@ -287,7 +259,7 @@ LibYUVRotateTest::LibYUVRotateTest() ...@@ -287,7 +259,7 @@ LibYUVRotateTest::LibYUVRotateTest()
} }
LibYUVPlanarTest::LibYUVPlanarTest() LibYUVPlanarTest::LibYUVPlanarTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -332,12 +304,6 @@ LibYUVPlanarTest::LibYUVPlanarTest() ...@@ -332,12 +304,6 @@ LibYUVPlanarTest::LibYUVPlanarTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
...@@ -347,7 +313,7 @@ LibYUVPlanarTest::LibYUVPlanarTest() ...@@ -347,7 +313,7 @@ LibYUVPlanarTest::LibYUVPlanarTest()
} }
LibYUVBaseTest::LibYUVBaseTest() LibYUVBaseTest::LibYUVBaseTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -392,12 +358,6 @@ LibYUVBaseTest::LibYUVBaseTest() ...@@ -392,12 +358,6 @@ LibYUVBaseTest::LibYUVBaseTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
...@@ -407,7 +367,7 @@ LibYUVBaseTest::LibYUVBaseTest() ...@@ -407,7 +367,7 @@ LibYUVBaseTest::LibYUVBaseTest()
} }
LibYUVCompareTest::LibYUVCompareTest() LibYUVCompareTest::LibYUVCompareTest()
: benchmark_iterations_(BENCHMARK_ITERATIONS), : benchmark_iterations_(1),
benchmark_width_(128), benchmark_width_(128),
benchmark_height_(72), benchmark_height_(72),
disable_cpu_flags_(1), disable_cpu_flags_(1),
...@@ -452,12 +412,6 @@ LibYUVCompareTest::LibYUVCompareTest() ...@@ -452,12 +412,6 @@ LibYUVCompareTest::LibYUVCompareTest()
benchmark_cpu_info_ = FLAGS_libyuv_cpu_info; benchmark_cpu_info_ = FLAGS_libyuv_cpu_info;
} }
libyuv::MaskCpuFlags(benchmark_cpu_info_); libyuv::MaskCpuFlags(benchmark_cpu_info_);
benchmark_pixels_div256_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) *
static_cast<double>(benchmark_iterations_) +
255.0) /
256.0);
benchmark_pixels_div1280_ = benchmark_pixels_div1280_ =
static_cast<int>((static_cast<double>(Abs(benchmark_width_)) * static_cast<int>((static_cast<double>(Abs(benchmark_width_)) *
static_cast<double>(Abs(benchmark_height_)) * static_cast<double>(Abs(benchmark_height_)) *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment