Commit bb17da97 authored by Frank Barchard's avatar Frank Barchard

Test C vs NEON for ScaleDown2Box_16

TBR=kjellander@chromium.org
BUG=libyuv:718
TEST=LibYUVScaleTest.TestScaleRowDown2Box_16

Change-Id: Ic74d29d6f14983ff26e8af541ef702a0f8bf3f17
Reviewed-on: https://chromium-review.googlesource.com/616189Reviewed-by: 's avatarCheng Wang <wangcheng@google.com>
parent 7e59ee4c
...@@ -1002,7 +1002,7 @@ void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, ...@@ -1002,7 +1002,7 @@ void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
int dst_width) { int dst_width) {
asm volatile( asm volatile(
// change the stride to row 2 pointer // change the stride to row 2 pointer
"add %1, %1, %0 \n" "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
"1: \n" "1: \n"
"ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc "ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc
"ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc "ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc
...@@ -1033,8 +1033,7 @@ void ScaleRowUp2_16_NEON(const uint16* src_ptr, ...@@ -1033,8 +1033,7 @@ void ScaleRowUp2_16_NEON(const uint16* src_ptr,
uint16* dst, uint16* dst,
int dst_width) { int dst_width) {
asm volatile( asm volatile(
// change the stride to row 2 pointer "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2
"add %1, %1, %0 \n"
"movi v20.4h, #1 \n" "movi v20.4h, #1 \n"
"movi v21.4h, #3 \n" // constants "movi v21.4h, #3 \n" // constants
"movi v22.4h, #9 \n" "movi v22.4h, #9 \n"
......
...@@ -471,13 +471,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { ...@@ -471,13 +471,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
orig_pixels[i] = i; orig_pixels[i] = i;
} }
ScaleRowUp2_16_NEON(&orig_pixels[0], ScaleRowUp2_16_NEON(&orig_pixels[0],
640 * 2, 640,
&dst_pixels_c[0], &dst_pixels_c[0],
1280); 1280);
for (int i = 0; i < benchmark_pixels_div1280_; ++i) { for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
ScaleRowUp2_16_NEON(&orig_pixels[0], ScaleRowUp2_16_NEON(&orig_pixels[0],
640 * 2, 640,
&dst_pixels_opt[0], &dst_pixels_opt[0],
1280); 1280);
} }
...@@ -488,6 +488,7 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) { ...@@ -488,6 +488,7 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16); EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
EXPECT_EQ(dst_pixels_c[1279], 800); EXPECT_EQ(dst_pixels_c[1279], 800);
} }
#endif
extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
...@@ -496,34 +497,48 @@ extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr, ...@@ -496,34 +497,48 @@ extern "C" void ScaleRowDown2Box_16_NEON(const uint16* src_ptr,
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]); SIMD_ALIGNED(uint16 orig_pixels[2560 * 2]);
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
SIMD_ALIGNED(uint16 dst_pixels_c[1280]); SIMD_ALIGNED(uint16 dst_pixels_c[1280]);
SIMD_ALIGNED(uint16 dst_pixels_opt[1280]);
memset(orig_pixels, 0, sizeof(orig_pixels)); memset(orig_pixels, 0, sizeof(orig_pixels));
memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt)); memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
memset(dst_pixels_c, 2, sizeof(dst_pixels_c)); memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
for (int i = 0; i < 2560 * 2; ++i) { for (int i = 0; i < 2560 * 2; ++i) {
orig_pixels[i] = i; orig_pixels[i] = i;
} }
ScaleRowDown2Box_16_NEON(&orig_pixels[0], ScaleRowDown2Box_16_C(&orig_pixels[0],
2560 * 2, 2560,
&dst_pixels_c[0], &dst_pixels_c[0],
1280); 1280);
for (int i = 0; i < benchmark_pixels_div1280_; ++i) { for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
int has_neon = TestCpuFlag(kCpuHasNEON);
if (has_neon) {
ScaleRowDown2Box_16_NEON(&orig_pixels[0], ScaleRowDown2Box_16_NEON(&orig_pixels[0],
2560 * 2, 2560,
&dst_pixels_opt[0], &dst_pixels_opt[0],
1280); 1280);
} else {
ScaleRowDown2Box_16_C(&orig_pixels[0],
2560,
&dst_pixels_opt[0],
1280);
}
#else
ScaleRowDown2Box_16_C(&orig_pixels[0],
2560,
&dst_pixels_opt[0],
1280);
#endif
} }
for (int i = 0; i < 1280; ++i) { for (int i = 0; i < 1280; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
} }
EXPECT_EQ(dst_pixels_c[0], 1281);
EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
EXPECT_EQ(dst_pixels_c[1279], 3839); EXPECT_EQ(dst_pixels_c[1279], 3839);
} }
#endif // __aarch64__
} // namespace libyuv } // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment