Commit f553db2d authored by Frank Barchard's avatar Frank Barchard

HalfFloatPlane unittest for denormal half floats

Halffloats have a limited range.  It shouldnt normally come up, but if the scale value passed in produces a small value, the half floats will be denormals, which are slow and/or flust to zero.  This test ensures they behave the same in C and SIMD and tests the performance of denormals.

TEST=TestHalfFloatPlane_denormal
BUG=libyuv:560
R=hubbe@chromium.org

Review URL: https://codereview.chromium.org/2424233004 .
parent 78c58ab8
...@@ -122,6 +122,10 @@ static_library("libyuv") { ...@@ -122,6 +122,10 @@ static_library("libyuv") {
# Enable optimize for speed (-O2) over size (-Os). # Enable optimize for speed (-O2) over size (-Os).
configs += [ "//build/config/compiler:optimize_max" ] configs += [ "//build/config/compiler:optimize_max" ]
} }
# To enable AVX2 or other cpu optimization, pass flag here
# cflags = [ "-mavx2" ]
} }
if (libyuv_use_neon) { if (libyuv_use_neon) {
...@@ -140,6 +144,14 @@ if (libyuv_use_neon) { ...@@ -140,6 +144,14 @@ if (libyuv_use_neon) {
public_configs = [ ":libyuv_config" ] public_configs = [ ":libyuv_config" ]
# Always enable optimization for Release and NaCl builds (to workaround
# crbug.com/538243).
if (!is_debug) {
configs -= [ "//build/config/compiler:default_optimization" ]
# Enable optimize for speed (-O2) over size (-Os).
configs += [ "//build/config/compiler:optimize_max" ]
}
if (current_cpu != "arm64") { if (current_cpu != "arm64") {
configs -= [ "//build/config/compiler:compiler_arm_fpu" ] configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
cflags = [ "-mfpu=neon" ] cflags = [ "-mfpu=neon" ]
......
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1627 Version: 1628
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -201,7 +201,7 @@ extern "C" { ...@@ -201,7 +201,7 @@ extern "C" {
#define HAS_COPYROW_AVX #define HAS_COPYROW_AVX
#define HAS_H422TOARGBROW_AVX2 #define HAS_H422TOARGBROW_AVX2
#define HAS_HALFFLOATROW_AVX2 #define HAS_HALFFLOATROW_AVX2
// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast // #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
#define HAS_I400TOARGBROW_AVX2 #define HAS_I400TOARGBROW_AVX2
#define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2
#define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2
...@@ -330,6 +330,11 @@ extern "C" { ...@@ -330,6 +330,11 @@ extern "C" {
#define HAS_YUY2TOUVROW_NEON #define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON #define HAS_YUY2TOYROW_NEON
// TODO(fbarchard): Port to 32 bit.
#if defined(__aarch64__)
#define HAS_HALFFLOATROW_NEON
#endif
// Effects: // Effects:
#define HAS_ARGBADDROW_NEON #define HAS_ARGBADDROW_NEON
#define HAS_ARGBATTENUATEROW_NEON #define HAS_ARGBATTENUATEROW_NEON
...@@ -1954,6 +1959,9 @@ void HalfFloatRow_Any_AVX2(const uint16* src, uint16* dst, float scale, ...@@ -1954,6 +1959,9 @@ void HalfFloatRow_Any_AVX2(const uint16* src, uint16* dst, float scale,
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width); void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width);
void HalfFloatRow_Any_F16C(const uint16* src, uint16* dst, float scale, void HalfFloatRow_Any_F16C(const uint16* src, uint16* dst, float scale,
int width); int width);
void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width);
void HalfFloatRow_Any_NEON(const uint16* src, uint16* dst, float scale,
int width);
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
const uint8* luma, uint32 lumacoeff); const uint8* luma, uint32 lumacoeff);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1627 #define LIBYUV_VERSION 1628
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -2585,6 +2585,15 @@ int HalfFloatPlane(const uint16* src_y, int src_stride_y, ...@@ -2585,6 +2585,15 @@ int HalfFloatPlane(const uint16* src_y, int src_stride_y,
} }
} }
#endif #endif
#if defined(HAS_HALFFLOATROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
HalfFloatRow = HalfFloatRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
HalfFloatRow = HalfFloatRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
HalfFloatRow(src_y, dst_y, scale, width); HalfFloatRow(src_y, dst_y, scale, width);
src_y += src_stride_y; src_y += src_stride_y;
......
...@@ -585,6 +585,9 @@ ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 1, 1, 15) ...@@ -585,6 +585,9 @@ ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 1, 1, 15)
#ifdef HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C
ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15) ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15)
#endif #endif
#ifdef HAS_HALFFLOATROW_NEON
ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 1, 1, 7)
#endif
#undef ANY11P16 #undef ANY11P16
// Any 1 to 1 with yuvconstants // Any 1 to 1 with yuvconstants
......
...@@ -2710,6 +2710,32 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, ...@@ -2710,6 +2710,32 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
); );
} }
void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
asm volatile (
"1: \n"
MEMACCESS(0)
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
"subs %w2, %w2, #8 \n" // 8 pixels per loop
"uxtl v2.4s, v1.4h \n" // 8 int's
"uxtl2 v1.4s, v1.8h \n"
"scvtf v2.4s, v2.4s \n" // 8 floats
"scvtf v1.4s, v1.4s \n"
"fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent
"fmul v1.4s, v1.4s, %3.s[0] \n"
"uqshrn v4.4h, v2.4s, #13 \n" // isolate halffloat
"uqshrn2 v4.8h, v1.4s, #13 \n"
MEMACCESS(1)
"st1 {v4.16b}, [%1], #16 \n" // store 8 shorts
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "w"(scale * 1.9259299444e-34f) // %3
: "cc", "memory", "v1", "v2", "v4"
);
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -2081,9 +2081,12 @@ TEST_F(LibYUVPlanarTest, TestARGBPolynomial) { ...@@ -2081,9 +2081,12 @@ TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
} }
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane) { int TestHalfFloatPlane(int benchmark_width, int benchmark_height,
int benchmark_iterations,
int disable_cpu_flags, int benchmark_cpu_info,
float scale) {
int i, j; int i, j;
const int y_plane_size = benchmark_width_ * benchmark_height_ * 2; const int y_plane_size = benchmark_width * benchmark_height * 2;
align_buffer_page_end(orig_y, y_plane_size); align_buffer_page_end(orig_y, y_plane_size);
align_buffer_page_end(dst_c, y_plane_size); align_buffer_page_end(dst_c, y_plane_size);
...@@ -2093,32 +2096,62 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane) { ...@@ -2093,32 +2096,62 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane) {
memset(dst_opt, 1, y_plane_size); memset(dst_opt, 1, y_plane_size);
// Disable all optimizations. // Disable all optimizations.
MaskCpuFlags(disable_cpu_flags_); MaskCpuFlags(disable_cpu_flags);
double c_time = get_time(); double c_time = get_time();
for (j = 0; j < benchmark_iterations_; j++) { for (j = 0; j < benchmark_iterations; j++) {
HalfFloatPlane((uint16*)orig_y, benchmark_width_ * 2, HalfFloatPlane((uint16*)orig_y, benchmark_width * 2,
(uint16*)dst_c, benchmark_width_ * 2, (uint16*)dst_c, benchmark_width * 2,
1.0f / 4096.0f, benchmark_width_, benchmark_height_); scale, benchmark_width, benchmark_height);
} }
c_time = (get_time() - c_time) / benchmark_iterations_; c_time = (get_time() - c_time) / benchmark_iterations;
// Enable optimizations. // Enable optimizations.
MaskCpuFlags(benchmark_cpu_info_); MaskCpuFlags(benchmark_cpu_info);
double opt_time = get_time(); double opt_time = get_time();
for (j = 0; j < benchmark_iterations_; j++) { for (j = 0; j < benchmark_iterations; j++) {
HalfFloatPlane((uint16*)orig_y, benchmark_width_ * 2, HalfFloatPlane((uint16*)orig_y, benchmark_width * 2,
(uint16*)dst_opt, benchmark_width_ * 2, (uint16*)dst_opt, benchmark_width * 2,
1.0f / 4096.0f, benchmark_width_, benchmark_height_); scale, benchmark_width, benchmark_height);
} }
opt_time = (get_time() - opt_time) / benchmark_iterations_; opt_time = (get_time() - opt_time) / benchmark_iterations;
int diff = 0;
for (i = 0; i < y_plane_size; ++i) { for (i = 0; i < y_plane_size; ++i) {
EXPECT_EQ(dst_c[i], dst_opt[i]); diff = dst_c[i] - dst_opt[i];
if (diff) break;
} }
free_aligned_buffer_page_end(orig_y); free_aligned_buffer_page_end(orig_y);
free_aligned_buffer_page_end(dst_c); free_aligned_buffer_page_end(dst_c);
free_aligned_buffer_page_end(dst_opt); free_aligned_buffer_page_end(dst_opt);
return diff;
}
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
// exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
// happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_denormal) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_,
1.0f / 65536.0f);
EXPECT_EQ(diff, 0);
}
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_,
1.0f / 4096.0f);
EXPECT_EQ(diff, 0);
}
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_,
1.0f / 1023.0f);
EXPECT_EQ(diff, 0);
} }
TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment