Commit 2b7f6b7d authored by fbarchard@google.com's avatar fbarchard@google.com

ScaleAddRows_Any_SSE2 functions for handling odd widths.

BUG=425
TESTED=out\release\libyuv_unittest_old --gtest_filter=*.ScaleDownBy3_*
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/45219004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1377 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 01db3d1d
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1374
Version: 1375
License: BSD
License File: LICENSE
......
......@@ -266,6 +266,11 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleAddRows_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleAddRows_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleAddRows_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
......@@ -356,6 +361,8 @@ void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleAddRows_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleAddRows_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height);
void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1374
#define LIBYUV_VERSION 1375
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -758,24 +758,6 @@ static void ScalePlaneBox(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle odd width.
if (!IS_ALIGNED(src_width, 16)) {
uint8* dst = dst_ptr;
int j;
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8* src = src_ptr + iy * src_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
boxheight = MIN1((y >> 16) - iy);
ScalePlaneBoxRow_C(dst_width, boxheight, x, dx, src_stride, src, dst);
dst += dst_stride;
}
return;
}
{
// Allocate a row buffer of uint16.
align_buffer_64(row16, src_width * 2);
......@@ -786,18 +768,27 @@ static void ScalePlaneBox(int src_width, int src_height,
void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
#if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleAddRows = ScaleAddRows_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRows = ScaleAddRows_SSE2;
}
}
#endif
#if defined(HAS_SCALEADDROWS_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(src_width, 32)) {
ScaleAddRows = ScaleAddRows_AVX2;
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleAddRows = ScaleAddRows_Any_AVX2;
if (IS_ALIGNED(src_width, 32)) {
ScaleAddRows = ScaleAddRows_AVX2;
}
}
#endif
#if defined(HAS_SCALEADDROWS_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(src_width, 16)) {
ScaleAddRows = ScaleAddRows_NEON;
if (TestCpuFlag(kCpuHasNEON)) {
ScaleAddRows = ScaleAddRows_Any_NEON;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRows = ScaleAddRows_NEON;
}
}
#endif
......
......@@ -88,8 +88,35 @@ SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
#endif
#undef SDANY
// Fixed scale down.
#define SAANY(NAMEANY, SCALEADDROWS_SIMD, SCALEADDROWS_C, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint16* dst_ptr, int src_width, int src_height) { \
int n = src_width & ~MASK; \
if (n > 0) { \
SCALEADDROWS_SIMD(src_ptr, src_stride, dst_ptr, n, src_height); \
} \
SCALEADDROWS_C(src_ptr + n, src_stride, \
dst_ptr + n, src_width & MASK, src_height); \
}
#ifdef HAS_SCALEADDROWS_SSE2
SAANY(ScaleAddRows_Any_SSE2, ScaleAddRows_SSE2, ScaleAddRows_C, 15)
#endif
#ifdef HAS_SCALEADDROWS_AVX2
SAANY(ScaleAddRows_Any_AVX2, ScaleAddRows_AVX2, ScaleAddRows_C, 31)
#endif
#ifdef HAS_SCALEADDROWS_NEON
SAANY(ScaleAddRows_Any_NEON, ScaleAddRows_NEON, ScaleAddRows_C, 15)
#endif
#undef SAANY
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
......@@ -634,7 +634,7 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
sum += s[0];
s += src_stride;
}
// TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
// TODO(fbarchard): Consider limiting height to 256 to avoid overflow.
dst_ptr[x] = sum < 65535u ? sum : 65535u;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment