Commit fc52d8de authored by Frank Barchard's avatar Frank Barchard

Odd width variation of scale down by 2 for subsampling

R=dhrosa@google.com, harryjin@google.com
BUG=libyuv:538

Review URL: https://codereview.chromium.org/1558093003 .
parent 2560df95
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1562 Version: 1563
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -146,6 +146,8 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, ...@@ -146,6 +146,8 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width); uint16* dst, int dst_width);
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width); uint16* dst, int dst_width);
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
...@@ -270,12 +272,16 @@ void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -270,12 +272,16 @@ void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
...@@ -431,6 +437,8 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -431,6 +437,8 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1562 #define LIBYUV_VERSION 1563
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -691,29 +691,41 @@ int I420Blend(const uint8* src_y0, int src_stride_y0, ...@@ -691,29 +691,41 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
} }
} }
#endif #endif
if (!IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
}
#if defined(HAS_SCALEROWDOWN2_NEON) #if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_NEON; ScaleRowDown2 = ScaleRowDown2Box_NEON;
} }
} }
}
#endif #endif
#if defined(HAS_SCALEROWDOWN2_SSSE3) #if defined(HAS_SCALEROWDOWN2_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3; ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_SSSE3; ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_SCALEROWDOWN2_AVX2) #if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
ScaleRowDown2 = ScaleRowDown2Box_AVX2; ScaleRowDown2 = ScaleRowDown2Box_AVX2;
} }
} }
}
#endif #endif
// Row buffer for intermediate alpha pixels. // Row buffer for intermediate alpha pixels.
......
...@@ -55,12 +55,29 @@ CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON, ...@@ -55,12 +55,29 @@ CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
dst_ptr + n * BPP, r); \ dst_ptr + n * BPP, r); \
} }
// Fixed scale down for odd source width. Used by I420Blend subsampling.
// Since dst_width is (width + 1) / 2, this function scales one less pixel
// and copies the last pixel.
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint8* dst_ptr, int dst_width) { \
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r); \
}
#ifdef HAS_SCALEROWDOWN2_SSSE3 #ifdef HAS_SCALEROWDOWN2_SSSE3
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3, SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
ScaleRowDown2Linear_C, 2, 1, 15) ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C, SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
2, 1, 15) 2, 1, 15)
SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
ScaleRowDown2Box_Odd_C, 2, 1, 15)
#endif #endif
#ifdef HAS_SCALEROWDOWN2_AVX2 #ifdef HAS_SCALEROWDOWN2_AVX2
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
...@@ -68,6 +85,8 @@ SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2, ...@@ -68,6 +85,8 @@ SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
ScaleRowDown2Linear_C, 2, 1, 31) ScaleRowDown2Linear_C, 2, 1, 31)
SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C, SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
2, 1, 31) 2, 1, 31)
SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
2, 1, 31)
#endif #endif
#ifdef HAS_SCALEROWDOWN2_NEON #ifdef HAS_SCALEROWDOWN2_NEON
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
...@@ -75,6 +94,8 @@ SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON, ...@@ -75,6 +94,8 @@ SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
ScaleRowDown2Linear_C, 2, 1, 15) ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON, SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_C, 2, 1, 15) ScaleRowDown2Box_C, 2, 1, 15)
SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_Odd_C, 2, 1, 15)
#endif #endif
#ifdef HAS_SCALEROWDOWN4_SSSE3 #ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
......
...@@ -103,6 +103,28 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -103,6 +103,28 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
const uint8* s = src_ptr;
const uint8* t = src_ptr + src_stride;
int x;
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst += 1;
s += 2;
t += 2;
}
dst[0] = (s[0] + t[0] + 1) >> 1;
}
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) { uint16* dst, int dst_width) {
const uint16* s = src_ptr; const uint16* s = src_ptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment