Commit d41fbf40 authored by fbarchard@google.com's avatar fbarchard@google.com

Handle scale down by factor of 2 efficiently by calling SIMD for multiple of 16…

Handle scale down by factor of 2 efficiently by calling SIMD for multiple of 16 destination pixels, and C for remainder.
BUG=314
TESTED=out\release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*.ScaleDownBy2*
R=bcornell@google.com

Review URL: https://webrtc-codereview.appspot.com/48689004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1344 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0d3bfab6
......@@ -222,6 +222,12 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width,
int src_height);
......@@ -271,10 +277,8 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
// Note - not static due to reuse in convert for 444 to 420.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
......@@ -309,6 +313,13 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
......
......@@ -54,17 +54,27 @@ static void ScalePlaneDown2(int src_width, int src_height,
}
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
(filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
ScaleRowDown2Box_NEON);
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
ScaleRowDown2Box_Any_NEON);
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
(filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
ScaleRowDown2Box_NEON);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
ScaleRowDown2Box_Any_SSE2);
if (IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
ScaleRowDown2Box_SSE2);
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
......
......@@ -35,6 +35,36 @@ CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
#endif
#undef CANY
// Fixed scale down.
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint8* dst_ptr, int dst_width) { \
int n = dst_width & ~MASK; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, dst_width & MASK); \
}
#ifdef HAS_SCALEROWDOWN2_SSE2
SDANY(ScaleRowDown2_Any_SSE2, ScaleRowDown2_SSE2, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_SSE2, ScaleRowDown2Linear_SSE2,
ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_SSE2, ScaleRowDown2Box_SSE2,
ScaleRowDown2Box_C, 2, 1, 15)
#endif
#ifdef HAS_SCALEROWDOWN2_NEON
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_C, 2, 1, 15)
#endif
#undef SDANY
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -666,7 +666,6 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
}
// Bilinear column filtering. SSSE3 version.
// TODO(fbarchard): Port to Neon
// TODO(fbarchard): Switch the following:
// xor ebx, ebx
// mov bx, word ptr [esi + eax] // 2 source x0 pixels
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment