Commit fc52d8de authored by Frank Barchard's avatar Frank Barchard

Odd width variation of scale down by 2 for subsampling

R=dhrosa@google.com, harryjin@google.com
BUG=libyuv:538

Review URL: https://codereview.chromium.org/1558093003 .
parent 2560df95
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1562
Version: 1563
License: BSD
License File: LICENSE
......
......@@ -146,6 +146,8 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
......@@ -269,13 +271,17 @@ void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
uint8* dst_ptr, int dst_width);
void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
......@@ -431,6 +437,8 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1562
#define LIBYUV_VERSION 1563
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -677,7 +677,7 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#if defined(HAS_BLENDPLANEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
if (IS_ALIGNED(halfwidth, 8)) {
BlendPlaneRow = BlendPlaneRow_SSSE3;
}
......@@ -685,33 +685,45 @@ int I420Blend(const uint8* src_y0, int src_stride_y0,
#endif
#if defined(HAS_BLENDPLANEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
BlendPlaneRow = BlendPlaneRow_Any_AVX2;
BlendPlaneRow = BlendPlaneRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
BlendPlaneRow = BlendPlaneRow_AVX2;
}
}
#endif
if (!IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
}
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_NEON;
ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_NEON;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
if (IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
ScaleRowDown2 = ScaleRowDown2Box_AVX2;
ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
if (IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
ScaleRowDown2 = ScaleRowDown2Box_AVX2;
}
}
}
#endif
......
......@@ -55,12 +55,29 @@ CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
dst_ptr + n * BPP, r); \
}
// Fixed scale down for odd source width. Used by I420Blend subsampling.
// Since dst_width is (width + 1) / 2, this function scales one less pixel
// and copies the last pixel.
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
uint8* dst_ptr, int dst_width) { \
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
int n = dst_width - r; \
if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r); \
}
#ifdef HAS_SCALEROWDOWN2_SSSE3
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
2, 1, 15)
SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
ScaleRowDown2Box_Odd_C, 2, 1, 15)
#endif
#ifdef HAS_SCALEROWDOWN2_AVX2
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
......@@ -68,6 +85,8 @@ SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
ScaleRowDown2Linear_C, 2, 1, 31)
SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
2, 1, 31)
SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
2, 1, 31)
#endif
#ifdef HAS_SCALEROWDOWN2_NEON
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
......@@ -75,6 +94,8 @@ SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
ScaleRowDown2Linear_C, 2, 1, 15)
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_C, 2, 1, 15)
SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
ScaleRowDown2Box_Odd_C, 2, 1, 15)
#endif
#ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
......
......@@ -103,6 +103,28 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
}
}
void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {
const uint8* s = src_ptr;
const uint8* t = src_ptr + src_stride;
int x;
dst_width -= 1;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst += 1;
s += 2;
t += 2;
}
dst[0] = (s[0] + t[0] + 1) >> 1;
}
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
const uint16* s = src_ptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment