Commit b911428a authored by fbarchard@google.com's avatar fbarchard@google.com

Adapt row interpolator to do YUV as well as ARGB without extrude so it can be used in I420Scale.

BUG=237
TEST=Scale*
R=ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/1587004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@710 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 8ffe78ab
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 709 Version: 710
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -109,8 +109,8 @@ extern "C" { ...@@ -109,8 +109,8 @@ extern "C" {
#define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBINTERPOLATEROW_SSE2 #define HAS_INTERPOLATEROW_SSE2
#define HAS_ARGBINTERPOLATEROW_SSSE3 #define HAS_INTERPOLATEROW_SSSE3
#define HAS_ARGBMIRRORROW_SSSE3 #define HAS_ARGBMIRRORROW_SSSE3
#define HAS_ARGBMULTIPLYROW_SSE2 #define HAS_ARGBMULTIPLYROW_SSE2
#define HAS_ARGBQUANTIZEROW_SSE2 #define HAS_ARGBQUANTIZEROW_SSE2
...@@ -261,7 +261,6 @@ extern "C" { ...@@ -261,7 +261,6 @@ extern "C" {
#define HAS_ARGBBLENDROW_NEON #define HAS_ARGBBLENDROW_NEON
#define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBGRAYROW_NEON #define HAS_ARGBGRAYROW_NEON
#define HAS_ARGBINTERPOLATEROW_NEON
#define HAS_ARGBMIRRORROW_NEON #define HAS_ARGBMIRRORROW_NEON
#define HAS_ARGBMULTIPLYROW_NEON #define HAS_ARGBMULTIPLYROW_NEON
#define HAS_ARGBQUANTIZEROW_NEON #define HAS_ARGBQUANTIZEROW_NEON
...@@ -272,6 +271,7 @@ extern "C" { ...@@ -272,6 +271,7 @@ extern "C" {
#define HAS_SOBELXYROW_NEON #define HAS_SOBELXYROW_NEON
#define HAS_SOBELXROW_NEON #define HAS_SOBELXROW_NEON
#define HAS_SOBELYROW_NEON #define HAS_SOBELYROW_NEON
#define HAS_INTERPOLATEROW_NEON
#endif #endif
// The following are available on Mips platforms // The following are available on Mips platforms
...@@ -281,6 +281,7 @@ extern "C" { ...@@ -281,6 +281,7 @@ extern "C" {
#define HAS_I422TOABGRROW_MIPS_DSPR2 #define HAS_I422TOABGRROW_MIPS_DSPR2
#define HAS_I422TOARGBROW_MIPS_DSPR2 #define HAS_I422TOARGBROW_MIPS_DSPR2
#define HAS_I422TOBGRAROW_MIPS_DSPR2 #define HAS_I422TOBGRAROW_MIPS_DSPR2
#define HAS_INTERPOLATEROWS_MIPS_DSPR2
#define HAS_MIRRORROW_MIPS_DSPR2 #define HAS_MIRRORROW_MIPS_DSPR2
#define HAS_MIRRORUVROW_MIPS_DSPR2 #define HAS_MIRRORUVROW_MIPS_DSPR2
#define HAS_SPLITUVROW_MIPS_DSPR2 #define HAS_SPLITUVROW_MIPS_DSPR2
...@@ -1455,33 +1456,39 @@ LIBYUV_API ...@@ -1455,33 +1456,39 @@ LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
uint8* dst_argb, const float* uv_dudv, int width); uint8* dst_argb, const float* uv_dudv, int width);
// Used for ARGBScale and ARGBInterpolate. // Used for I420Scale, ARGBScale, and ARGBInterpolate.
void ARGBInterpolateRow_C(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, ptrdiff_t src_stride_ptr,
int width, int source_y_fraction); int width, int source_y_fraction);
void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_NEON(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_Any_NEON(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_Any_SSE2(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void ARGBInterpolateRow_Any_SSSE3(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_argb, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction);
void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
// Sobel images. // Sobel images.
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 709 #define LIBYUV_VERSION 710
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1642,46 +1642,55 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, ...@@ -1642,46 +1642,55 @@ int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
width * height, 1, width * height, 1,
interpolation); interpolation);
} }
void (*ARGBInterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = ARGBInterpolateRow_C; int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_ARGBINTERPOLATEROW_SSE2) #if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
ARGBInterpolateRow = ARGBInterpolateRow_Any_SSE2; InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBInterpolateRow = ARGBInterpolateRow_Unaligned_SSE2; InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBInterpolateRow = ARGBInterpolateRow_SSE2; InterpolateRow = InterpolateRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_ARGBINTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
ARGBInterpolateRow = ARGBInterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBInterpolateRow = ARGBInterpolateRow_Unaligned_SSSE3; InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBInterpolateRow = ARGBInterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
} }
#endif #endif
#if defined(HAS_ARGBINTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 4) { if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
ARGBInterpolateRow = ARGBInterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBInterpolateRow = ARGBInterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
} }
} }
#endif #endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
}
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
ARGBInterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0, InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
width, interpolation); width * 4, interpolation);
src_argb0 += src_stride_argb0; src_argb0 += src_stride_argb0;
src_argb1 += src_stride_argb1; src_argb1 += src_stride_argb1;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
......
...@@ -483,29 +483,33 @@ YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, ...@@ -483,29 +483,33 @@ YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
#undef YANY #undef YANY
// Interpolate may want to work in place, so last16 method can not be used. // Interpolate may want to work in place, so last16 method can not be used.
#define NANY(NAMEANY, ARGBTERP_SIMD, ARGBTERP_C, SBPP, BPP, MASK) \ #define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \
void NAMEANY(uint8* dst_argb, const uint8* src_argb, \ void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
ptrdiff_t src_stride_argb, int width, \ ptrdiff_t src_stride_ptr, int width, \
int source_y_fraction) { \ int source_y_fraction) { \
int n = width & ~MASK; \ int n = width & ~MASK; \
ARGBTERP_SIMD(dst_argb, src_argb, src_stride_argb, \ TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, \
n, source_y_fraction); \ n, source_y_fraction); \
ARGBTERP_C(dst_argb + n * BPP, \ TERP_C(dst_ptr + n * BPP, \
src_argb + n * SBPP, src_stride_argb, \ src_ptr + n * SBPP, src_stride_ptr, \
width & MASK, source_y_fraction); \ width & MASK, source_y_fraction); \
} }
#ifdef HAS_ARGBINTERPOLATEROW_SSSE3 #ifdef HAS_INTERPOLATEROW_SSSE3
NANY(ARGBInterpolateRow_Any_SSSE3, ARGBInterpolateRow_Unaligned_SSSE3, NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
ARGBInterpolateRow_C, 4, 4, 3) InterpolateRow_C, 1, 1, 15)
#endif #endif
#ifdef HAS_ARGBINTERPOLATEROW_SSE2 #ifdef HAS_INTERPOLATEROW_SSE2
NANY(ARGBInterpolateRow_Any_SSE2, ARGBInterpolateRow_Unaligned_SSE2, NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
ARGBInterpolateRow_C, 4, 4, 3) InterpolateRow_C, 1, 1, 15)
#endif #endif
#ifdef HAS_ARGBINTERPOLATEROW_NEON #ifdef HAS_INTERPOLATEROW_NEON
NANY(ARGBInterpolateRow_Any_NEON, ARGBInterpolateRow_NEON, NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
ARGBInterpolateRow_C, 4, 4, 3) InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
InterpolateRow_C, 1, 1, 3)
#endif #endif
#undef NANY #undef NANY
......
...@@ -1775,7 +1775,7 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, ...@@ -1775,7 +1775,7 @@ void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
} }
// C version 2x2 -> 2x1. // C version 2x2 -> 2x1.
void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
int width, int source_y_fraction) { int width, int source_y_fraction) {
int y1_fraction = source_y_fraction; int y1_fraction = source_y_fraction;
...@@ -1785,21 +1785,12 @@ void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1785,21 +1785,12 @@ void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 1; x += 2) {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8; src_ptr += 2;
dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8; src_ptr1 += 2;
dst_ptr[4] = (src_ptr[4] * y0_fraction + src_ptr1[4] * y1_fraction) >> 8; dst_ptr += 2;
dst_ptr[5] = (src_ptr[5] * y0_fraction + src_ptr1[5] * y1_fraction) >> 8;
dst_ptr[6] = (src_ptr[6] * y0_fraction + src_ptr1[6] * y1_fraction) >> 8;
dst_ptr[7] = (src_ptr[7] * y0_fraction + src_ptr1[7] * y1_fraction) >> 8;
src_ptr += 8;
src_ptr1 += 8;
dst_ptr += 8;
} }
if (width & 1) { if (width & 1) {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
dst_ptr[2] = (src_ptr[2] * y0_fraction + src_ptr1[2] * y1_fraction) >> 8;
dst_ptr[3] = (src_ptr[3] * y0_fraction + src_ptr1[3] * y1_fraction) >> 8;
} }
} }
......
...@@ -909,6 +909,63 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, ...@@ -909,6 +909,63 @@ void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
"s4", "s5", "s6" "s4", "s5", "s6"
); );
} }
// Bilinear filter 8x2 -> 8x1
void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const uint8* src_ptr1 = src_ptr + src_stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"
"lw $t4, 4(%[src_ptr]) \n"
"lw $t5, 4(%[src_ptr1]) \n"
"muleu_s.ph.qbl $t6, $t2, $t0 \n"
"muleu_s.ph.qbr $t7, $t2, $t0 \n"
"muleu_s.ph.qbl $t8, $t3, $t1 \n"
"muleu_s.ph.qbr $t9, $t3, $t1 \n"
"muleu_s.ph.qbl $t2, $t4, $t0 \n"
"muleu_s.ph.qbr $t3, $t4, $t0 \n"
"muleu_s.ph.qbl $t4, $t5, $t1 \n"
"muleu_s.ph.qbr $t5, $t5, $t1 \n"
"addq.ph $t6, $t6, $t8 \n"
"addq.ph $t7, $t7, $t9 \n"
"addq.ph $t2, $t2, $t4 \n"
"addq.ph $t3, $t3, $t5 \n"
"shra.ph $t6, $t6, 8 \n"
"shra.ph $t7, $t7, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"shra.ph $t3, $t3, 8 \n"
"precr.qb.ph $t6, $t6, $t7 \n"
"precr.qb.ph $t2, $t2, $t3 \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[src_ptr1], %[src_ptr1], 8 \n"
"addiu %[dst_width], %[dst_width], -8 \n"
"sw $t6, 0(%[dst_ptr]) \n"
"sw $t2, 4(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[dst_ptr], %[dst_ptr], 8 \n"
".set pop \n"
: [dst_ptr] "+r" (dst_ptr),
[src_ptr1] "+r" (src_ptr1),
[src_ptr] "+r" (src_ptr),
[dst_width] "+r" (dst_width)
: [source_y_fraction] "r" (source_y_fraction),
[y0_fraction] "r" (y0_fraction),
[src_stride] "r" (src_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
#endif // __mips_dsp_rev >= 2 #endif // __mips_dsp_rev >= 2
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -2161,9 +2161,8 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) { ...@@ -2161,9 +2161,8 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) {
); );
} }
// 4x2 -> 4x1 // Bilinear filter 16x2 -> 16x1
// Same as ScaleARGBFilterRows_NEON but with last pixel not duplicated. void InterpolateRow_NEON(uint8* dst_ptr,
void ARGBInterpolateRow_NEON(uint8* dst_ptr,
const uint8* src_ptr, ptrdiff_t src_stride, const uint8* src_ptr, ptrdiff_t src_stride,
int dst_width, int source_y_fraction) { int dst_width, int source_y_fraction) {
asm volatile ( asm volatile (
...@@ -2184,7 +2183,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr, ...@@ -2184,7 +2183,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr,
"1: \n" "1: \n"
"vld1.u8 {q0}, [%1]! \n" "vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n" "vld1.u8 {q1}, [%2]! \n"
"subs %3, #4 \n" "subs %3, %3, #16 \n"
"vmull.u8 q13, d0, d4 \n" "vmull.u8 q13, d0, d4 \n"
"vmull.u8 q14, d1, d4 \n" "vmull.u8 q14, d1, d4 \n"
"vmlal.u8 q13, d2, d5 \n" "vmlal.u8 q13, d2, d5 \n"
...@@ -2199,7 +2198,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr, ...@@ -2199,7 +2198,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr,
"25: \n" "25: \n"
"vld1.u8 {q0}, [%1]! \n" "vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n" "vld1.u8 {q1}, [%2]! \n"
"subs %3, #4 \n" "subs %3, %3, #16 \n"
"vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n"
"vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n"
"vst1.u8 {q0}, [%0]! \n" "vst1.u8 {q0}, [%0]! \n"
...@@ -2210,7 +2209,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr, ...@@ -2210,7 +2209,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr,
"50: \n" "50: \n"
"vld1.u8 {q0}, [%1]! \n" "vld1.u8 {q0}, [%1]! \n"
"vld1.u8 {q1}, [%2]! \n" "vld1.u8 {q1}, [%2]! \n"
"subs %3, #4 \n" "subs %3, %3, #16 \n"
"vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n"
"vst1.u8 {q0}, [%0]! \n" "vst1.u8 {q0}, [%0]! \n"
"bgt 50b \n" "bgt 50b \n"
...@@ -2220,7 +2219,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr, ...@@ -2220,7 +2219,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr,
"75: \n" "75: \n"
"vld1.u8 {q1}, [%1]! \n" "vld1.u8 {q1}, [%1]! \n"
"vld1.u8 {q0}, [%2]! \n" "vld1.u8 {q0}, [%2]! \n"
"subs %3, #4 \n" "subs %3, %3, #16 \n"
"vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n"
"vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n"
"vst1.u8 {q0}, [%0]! \n" "vst1.u8 {q0}, [%0]! \n"
...@@ -2230,7 +2229,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr, ...@@ -2230,7 +2229,7 @@ void ARGBInterpolateRow_NEON(uint8* dst_ptr,
// Blend 100 / 0 - Copy row unchanged. // Blend 100 / 0 - Copy row unchanged.
"100: \n" "100: \n"
"vld1.u8 {q0}, [%1]! \n" "vld1.u8 {q0}, [%1]! \n"
"subs %3, #4 \n" "subs %3, %3, #16 \n"
"vst1.u8 {q0}, [%0]! \n" "vst1.u8 {q0}, [%0]! \n"
"bgt 100b \n" "bgt 100b \n"
......
This diff is collapsed.
...@@ -5923,17 +5923,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, ...@@ -5923,17 +5923,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
} }
#endif // HAS_ARGBAFFINEROW_SSE2 #endif // HAS_ARGBAFFINEROW_SSE2
// Bilinear image filtering. // Bilinear filter 16x2 -> 16x1
// Same as ScaleARGBFilterRows_SSSE3 but without last pixel duplicated.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
__asm { __asm {
push esi push esi
push edi push edi
mov edi, [esp + 8 + 4] // dst_argb mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_argb mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255) mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
...@@ -5969,7 +5968,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -5969,7 +5968,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
psrlw xmm0, 7 psrlw xmm0, 7
psrlw xmm1, 7 psrlw xmm1, 7
packuswb xmm0, xmm1 packuswb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop jg xloop
...@@ -5982,7 +5981,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -5982,7 +5981,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
movdqa xmm1, [esi + edx] movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop25 jg xloop25
...@@ -5994,7 +5993,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -5994,7 +5993,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
movdqa xmm0, [esi] movdqa xmm0, [esi]
movdqa xmm1, [esi + edx] movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop50 jg xloop50
...@@ -6007,7 +6006,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6007,7 +6006,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
movdqa xmm0, [esi + edx] movdqa xmm0, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop75 jg xloop75
...@@ -6017,7 +6016,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6017,7 +6016,7 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
align 16 align 16
xloop100: xloop100:
movdqa xmm0, [esi] movdqa xmm0, [esi]
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop100 jg xloop100
...@@ -6029,17 +6028,16 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6029,17 +6028,16 @@ void ARGBInterpolateRow_SSSE3(uint8* dst_argb, const uint8* src_argb,
} }
} }
// Bilinear image filtering. // Bilinear filter 16x2 -> 16x1
// Same as ScaleARGBFilterRows_SSE2 but without last pixel duplicated.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
__asm { __asm {
push esi push esi
push edi push edi
mov edi, [esp + 8 + 4] // dst_argb mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_argb mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255) mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
...@@ -6081,7 +6079,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6081,7 +6079,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb,
paddw xmm0, xmm2 // sum rows paddw xmm0, xmm2 // sum rows
paddw xmm1, xmm3 paddw xmm1, xmm3
packuswb xmm0, xmm1 packuswb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop jg xloop
...@@ -6094,7 +6092,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6094,7 +6092,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb,
movdqa xmm1, [esi + edx] movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop25 jg xloop25
...@@ -6106,7 +6104,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6106,7 +6104,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb,
movdqa xmm0, [esi] movdqa xmm0, [esi]
movdqa xmm1, [esi + edx] movdqa xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop50 jg xloop50
...@@ -6119,7 +6117,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6119,7 +6117,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb,
movdqa xmm0, [esi + edx] movdqa xmm0, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop75 jg xloop75
...@@ -6129,7 +6127,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6129,7 +6127,7 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb,
align 16 align 16
xloop100: xloop100:
movdqa xmm0, [esi] movdqa xmm0, [esi]
sub ecx, 4 sub ecx, 16
movdqa [esi + edi], xmm0 movdqa [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop100 jg xloop100
...@@ -6141,17 +6139,16 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6141,17 +6139,16 @@ void ARGBInterpolateRow_SSE2(uint8* dst_argb, const uint8* src_argb,
} }
} }
// Bilinear image filtering. // Bilinear filter 16x2 -> 16x1
// Same as ScaleARGBFilterRows_SSSE3 but without last pixel duplicated.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
__asm { __asm {
push esi push esi
push edi push edi
mov edi, [esp + 8 + 4] // dst_argb mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_argb mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255) mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
...@@ -6187,7 +6184,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6187,7 +6184,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb,
psrlw xmm0, 7 psrlw xmm0, 7
psrlw xmm1, 7 psrlw xmm1, 7
packuswb xmm0, xmm1 packuswb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop jg xloop
...@@ -6200,7 +6197,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6200,7 +6197,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb,
movdqu xmm1, [esi + edx] movdqu xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop25 jg xloop25
...@@ -6212,7 +6209,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6212,7 +6209,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb,
movdqu xmm0, [esi] movdqu xmm0, [esi]
movdqu xmm1, [esi + edx] movdqu xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop50 jg xloop50
...@@ -6225,7 +6222,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6225,7 +6222,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb,
movdqu xmm0, [esi + edx] movdqu xmm0, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop75 jg xloop75
...@@ -6235,7 +6232,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6235,7 +6232,7 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb,
align 16 align 16
xloop100: xloop100:
movdqu xmm0, [esi] movdqu xmm0, [esi]
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop100 jg xloop100
...@@ -6247,17 +6244,16 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb, ...@@ -6247,17 +6244,16 @@ void ARGBInterpolateRow_Unaligned_SSSE3(uint8* dst_argb, const uint8* src_argb,
} }
} }
// Bilinear image filtering. // Bilinear filter 16x2 -> 16x1
// Same as ScaleARGBFilterRows_SSE2 but without last pixel duplicated.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) { int source_y_fraction) {
__asm { __asm {
push esi push esi
push edi push edi
mov edi, [esp + 8 + 4] // dst_argb mov edi, [esp + 8 + 4] // dst_ptr
mov esi, [esp + 8 + 8] // src_argb mov esi, [esp + 8 + 8] // src_ptr
mov edx, [esp + 8 + 12] // src_stride mov edx, [esp + 8 + 12] // src_stride
mov ecx, [esp + 8 + 16] // dst_width mov ecx, [esp + 8 + 16] // dst_width
mov eax, [esp + 8 + 20] // source_y_fraction (0..255) mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
...@@ -6299,7 +6295,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6299,7 +6295,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb,
paddw xmm0, xmm2 // sum rows paddw xmm0, xmm2 // sum rows
paddw xmm1, xmm3 paddw xmm1, xmm3
packuswb xmm0, xmm1 packuswb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop jg xloop
...@@ -6312,7 +6308,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6312,7 +6308,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb,
movdqu xmm1, [esi + edx] movdqu xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop25 jg xloop25
...@@ -6324,7 +6320,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6324,7 +6320,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb,
movdqu xmm0, [esi] movdqu xmm0, [esi]
movdqu xmm1, [esi + edx] movdqu xmm1, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop50 jg xloop50
...@@ -6337,7 +6333,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6337,7 +6333,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb,
movdqu xmm0, [esi + edx] movdqu xmm0, [esi + edx]
pavgb xmm0, xmm1 pavgb xmm0, xmm1
pavgb xmm0, xmm1 pavgb xmm0, xmm1
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop75 jg xloop75
...@@ -6347,7 +6343,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb, ...@@ -6347,7 +6343,7 @@ void ARGBInterpolateRow_Unaligned_SSE2(uint8* dst_argb, const uint8* src_argb,
align 16 align 16
xloop100: xloop100:
movdqu xmm0, [esi] movdqu xmm0, [esi]
sub ecx, 4 sub ecx, 16
movdqu [esi + edi], xmm0 movdqu [esi + edi], xmm0
lea esi, [esi + 16] lea esi, [esi + 16]
jg xloop100 jg xloop100
......
This diff is collapsed.
...@@ -824,41 +824,51 @@ static void ScaleARGBBilinearDown(int src_height, ...@@ -824,41 +824,51 @@ static void ScaleARGBBilinearDown(int src_height,
int xr = (dx >= 0) ? xlast : x; int xr = (dx >= 0) ? xlast : x;
xl = (xl >> 16) & ~3; // Left edge aligned. xl = (xl >> 16) & ~3; // Left edge aligned.
xr = (xr >> 16) + 1; // Right most pixel used. xr = (xr >> 16) + 1; // Right most pixel used.
int clip_src_width = ((xr - xl) + 1 + 3) & ~3; // Width aligned to 4. int clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4.
src_argb += xl * 4; src_argb += xl * 4;
x -= (xl << 16); x -= (xl << 16);
assert(clip_src_width * 4 <= kMaxStride); assert(clip_src_width <= kMaxStride);
// TODO(fbarchard): Remove clip_src_width alignment checks.
SIMD_ALIGNED(uint8 row[kMaxStride + 16]); SIMD_ALIGNED(uint8 row[kMaxStride + 16]);
void (*ScaleARGBFilterRows)(uint8* dst_argb, const uint8* src_argb, void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
ARGBInterpolateRow_C; InterpolateRow_C;
#if defined(HAS_ARGBINTERPOLATEROW_SSE2) #if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 4) { if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSE2; InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(clip_src_width, 4)) { if (IS_ALIGNED(clip_src_width, 16)) {
ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSE2; InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleARGBFilterRows = ARGBInterpolateRow_SSE2; InterpolateRow = InterpolateRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_ARGBINTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 4) { if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 4)) { if (IS_ALIGNED(clip_src_width, 16)) {
ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSSE3; InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleARGBFilterRows = ARGBInterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
}
} }
} }
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
} }
#endif #endif
#if defined(HAS_ARGBINTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 4) { if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
ScaleARGBFilterRows = ARGBInterpolateRow_Any_NEON; IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) { if (IS_ALIGNED(clip_src_width, 4)) {
ScaleARGBFilterRows = ARGBInterpolateRow_NEON; InterpolateRow = InterpolateRow_MIPS_DSPR2;
} }
} }
#endif #endif
...@@ -877,7 +887,7 @@ static void ScaleARGBBilinearDown(int src_height, ...@@ -877,7 +887,7 @@ static void ScaleARGBBilinearDown(int src_height,
int yi = y >> 16; int yi = y >> 16;
int yf = (y >> 8) & 255; int yf = (y >> 8) & 255;
const uint8* src = src_argb + yi * src_stride; const uint8* src = src_argb + yi * src_stride;
ScaleARGBFilterRows(row, src, src_stride, clip_src_width, yf); InterpolateRow(row, src, src_stride, clip_src_width, yf);
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
dst_argb += dst_stride; dst_argb += dst_stride;
y += dy; y += dy;
...@@ -895,38 +905,44 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ...@@ -895,38 +905,44 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
assert(dst_width > 0); assert(dst_width > 0);
assert(dst_height > 0); assert(dst_height > 0);
assert(dst_width * 4 <= kMaxStride); assert(dst_width * 4 <= kMaxStride);
void (*ScaleARGBFilterRows)(uint8* dst_argb, const uint8* src_argb, void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
ARGBInterpolateRow_C; InterpolateRow_C;
#if defined(HAS_ARGBINTERPOLATEROW_SSE2) #if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSE2; InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSE2; InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterRows = ARGBInterpolateRow_SSE2; InterpolateRow = InterpolateRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_ARGBINTERPOLATEROW_SSSE3) #if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
ScaleARGBFilterRows = ARGBInterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterRows = ARGBInterpolateRow_Unaligned_SSSE3; InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterRows = ARGBInterpolateRow_SSSE3; InterpolateRow = InterpolateRow_SSSE3;
} }
} }
} }
#endif #endif
#if defined(HAS_ARGBINTERPOLATEROW_NEON) #if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
ScaleARGBFilterRows = ARGBInterpolateRow_Any_NEON; InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) { if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterRows = ARGBInterpolateRow_NEON; InterpolateRow = InterpolateRow_NEON;
} }
} }
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
#endif #endif
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) = ScaleARGBFilterCols_C; int dst_width, int x, int dx) = ScaleARGBFilterCols_C;
...@@ -965,7 +981,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ...@@ -965,7 +981,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
} }
} }
int yf = (y >> 8) & 255; int yf = (y >> 8) & 255;
ScaleARGBFilterRows(dst_argb, rowptr, rowstride, dst_width, yf); InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
dst_argb += dst_stride; dst_argb += dst_stride;
y += dy; y += dy;
} }
...@@ -1024,24 +1040,23 @@ static void ScaleARGBAnySize(int src_width, int src_height, ...@@ -1024,24 +1040,23 @@ static void ScaleARGBAnySize(int src_width, int src_height,
const uint8* src_argb, uint8* dst_argb, const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy, int x, int dx, int y, int dy,
FilterMode filtering) { FilterMode filtering) {
if (!filtering || if (filtering && dy < 65536 && dst_width * 4 <= kMaxStride) {
(src_width * 4 > kMaxStride && dst_width * 4 > kMaxStride)) { ScaleARGBBilinearUp(src_width, src_height,
ScaleARGBSimple(src_width, src_height, clip_width, clip_height, clip_width, clip_height,
src_stride, dst_stride, src_argb, dst_argb, src_stride, dst_stride, src_argb, dst_argb,
x, dx, y, dy); x, dx, y, dy);
return; return;
} }
if (dy >= 65536 || dst_width * 4 > kMaxStride) { if (filtering && src_width * 4 < kMaxStride) {
ScaleARGBBilinearDown(src_height, ScaleARGBBilinearDown(src_height,
clip_width, clip_height, clip_width, clip_height,
src_stride, dst_stride, src_argb, dst_argb, src_stride, dst_stride, src_argb, dst_argb,
x, dx, y, dy); x, dx, y, dy);
} else { return;
ScaleARGBBilinearUp(src_width, src_height, }
clip_width, clip_height, ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src_argb, dst_argb, src_stride, dst_stride, src_argb, dst_argb,
x, dx, y, dy); x, dx, y, dy);
}
} }
// ScaleARGB a ARGB. // ScaleARGB a ARGB.
......
...@@ -629,64 +629,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, ...@@ -629,64 +629,6 @@ void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
); );
} }
void ScaleFilterRows_MIPS_DSPR2(unsigned char *dst_ptr,
const unsigned char* src_ptr,
ptrdiff_t src_stride,
int dst_width, int source_y_fraction) {
int y0_fraction = 256 - source_y_fraction;
const unsigned char* src_ptr1 = src_ptr + src_stride;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"replv.ph $t0, %[y0_fraction] \n"
"replv.ph $t1, %[source_y_fraction] \n"
"1: \n"
"lw $t2, 0(%[src_ptr]) \n"
"lw $t3, 0(%[src_ptr1]) \n"
"lw $t4, 4(%[src_ptr]) \n"
"lw $t5, 4(%[src_ptr1]) \n"
"muleu_s.ph.qbl $t6, $t2, $t0 \n"
"muleu_s.ph.qbr $t7, $t2, $t0 \n"
"muleu_s.ph.qbl $t8, $t3, $t1 \n"
"muleu_s.ph.qbr $t9, $t3, $t1 \n"
"muleu_s.ph.qbl $t2, $t4, $t0 \n"
"muleu_s.ph.qbr $t3, $t4, $t0 \n"
"muleu_s.ph.qbl $t4, $t5, $t1 \n"
"muleu_s.ph.qbr $t5, $t5, $t1 \n"
"addq.ph $t6, $t6, $t8 \n"
"addq.ph $t7, $t7, $t9 \n"
"addq.ph $t2, $t2, $t4 \n"
"addq.ph $t3, $t3, $t5 \n"
"shra.ph $t6, $t6, 8 \n"
"shra.ph $t7, $t7, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"shra.ph $t3, $t3, 8 \n"
"precr.qb.ph $t6, $t6, $t7 \n"
"precr.qb.ph $t2, $t2, $t3 \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
"addiu %[src_ptr1], %[src_ptr1], 8 \n"
"addiu %[dst_width], %[dst_width], -8 \n"
"sw $t6, 0(%[dst_ptr]) \n"
"sw $t2, 4(%[dst_ptr]) \n"
"bgtz %[dst_width], 1b \n"
" addiu %[dst_ptr], %[dst_ptr], 8 \n"
"lbu $t0, -1(%[dst_ptr]) \n"
"sb $t0, 0(%[dst_ptr]) \n"
".set pop \n"
: [dst_ptr] "+r" (dst_ptr),
[src_ptr1] "+r" (src_ptr1),
[src_ptr] "+r" (src_ptr),
[dst_width] "+r" (dst_width)
: [source_y_fraction] "r" (source_y_fraction),
[y0_fraction] "r" (y0_fraction),
[src_stride] "r" (src_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9"
);
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -117,7 +117,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy2_None) { ...@@ -117,7 +117,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy2_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy2_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy2_Bilinear) {
...@@ -143,7 +143,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy1_None) { ...@@ -143,7 +143,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy1_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 0); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy1_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy1_Bilinear) {
...@@ -156,7 +156,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy1_Bilinear) { ...@@ -156,7 +156,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy1_Bilinear) {
dst_width, dst_height, dst_width, dst_height,
kFilterBilinear, kFilterBilinear,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 0); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy4_None) { TEST_F(libyuvTest, ARGBScaleDownBy4_None) {
...@@ -169,7 +169,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy4_None) { ...@@ -169,7 +169,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy4_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy4_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy4_Bilinear) {
...@@ -195,7 +195,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy5_None) { ...@@ -195,7 +195,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy5_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy5_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy5_Bilinear) {
...@@ -221,7 +221,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy8_None) { ...@@ -221,7 +221,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy8_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy8_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy8_Bilinear) {
...@@ -247,7 +247,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy16_None) { ...@@ -247,7 +247,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy16_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy16_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy16_Bilinear) {
...@@ -263,6 +263,32 @@ TEST_F(libyuvTest, ARGBScaleDownBy16_Bilinear) { ...@@ -263,6 +263,32 @@ TEST_F(libyuvTest, ARGBScaleDownBy16_Bilinear) {
EXPECT_LE(max_diff, 2); EXPECT_LE(max_diff, 2);
} }
TEST_F(libyuvTest, ARGBScaleDownBy23_None) {
const int src_width = benchmark_width_;
const int src_height = benchmark_height_;
const int dst_width = Abs(src_width) * 2 / 3;
const int dst_height = Abs(src_height) * 2 / 3;
int max_diff = ARGBTestFilter(src_width, src_height,
dst_width, dst_height,
kFilterNone,
benchmark_iterations_);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBScaleDownBy23_Bilinear) {
const int src_width = benchmark_width_;
const int src_height = benchmark_height_;
const int dst_width = Abs(src_width) * 2 / 3;
const int dst_height = Abs(src_height) * 2 / 3;
int max_diff = ARGBTestFilter(src_width, src_height,
dst_width, dst_height,
kFilterBilinear,
benchmark_iterations_);
EXPECT_LE(max_diff, 2);
}
TEST_F(libyuvTest, ARGBScaleDownBy34_None) { TEST_F(libyuvTest, ARGBScaleDownBy34_None) {
const int src_width = benchmark_width_; const int src_width = benchmark_width_;
const int src_height = benchmark_height_; const int src_height = benchmark_height_;
...@@ -273,7 +299,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy34_None) { ...@@ -273,7 +299,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy34_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy34_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy34_Bilinear) {
...@@ -299,7 +325,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy38_None) { ...@@ -299,7 +325,7 @@ TEST_F(libyuvTest, ARGBScaleDownBy38_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleDownBy38_Bilinear) { TEST_F(libyuvTest, ARGBScaleDownBy38_Bilinear) {
...@@ -325,7 +351,7 @@ TEST_F(libyuvTest, ARGBScaleTo1366x768_None) { ...@@ -325,7 +351,7 @@ TEST_F(libyuvTest, ARGBScaleTo1366x768_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleTo1366x768_Bilinear) { TEST_F(libyuvTest, ARGBScaleTo1366x768_Bilinear) {
...@@ -352,7 +378,7 @@ TEST_F(libyuvTest, ARGBScaleTo1280x720_None) { ...@@ -352,7 +378,7 @@ TEST_F(libyuvTest, ARGBScaleTo1280x720_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleTo1280x720_Bilinear) { TEST_F(libyuvTest, ARGBScaleTo1280x720_Bilinear) {
...@@ -378,7 +404,7 @@ TEST_F(libyuvTest, ARGBScaleTo853x480_None) { ...@@ -378,7 +404,7 @@ TEST_F(libyuvTest, ARGBScaleTo853x480_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleTo853x480_Bilinear) { TEST_F(libyuvTest, ARGBScaleTo853x480_Bilinear) {
...@@ -404,7 +430,7 @@ TEST_F(libyuvTest, ARGBScaleFrom640x360_None) { ...@@ -404,7 +430,7 @@ TEST_F(libyuvTest, ARGBScaleFrom640x360_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleFrom640x360_Bilinear) { TEST_F(libyuvTest, ARGBScaleFrom640x360_Bilinear) {
...@@ -675,6 +701,32 @@ TEST_F(libyuvTest, ARGBScaleClipDownBy16_Bilinear) { ...@@ -675,6 +701,32 @@ TEST_F(libyuvTest, ARGBScaleClipDownBy16_Bilinear) {
EXPECT_EQ(0, max_diff); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ARGBScaleClipDownBy23_None) {
const int src_width = benchmark_width_;
const int src_height = benchmark_height_;
const int dst_width = Abs(src_width) * 2 / 3;
const int dst_height = Abs(src_height) * 2 / 3;
int max_diff = ARGBClipTestFilter(src_width, src_height,
dst_width, dst_height,
kFilterNone,
benchmark_iterations_);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBScaleClipDownBy23_Bilinear) {
const int src_width = benchmark_width_;
const int src_height = benchmark_height_;
const int dst_width = Abs(src_width) * 2 / 3;
const int dst_height = Abs(src_height) * 2 / 3;
int max_diff = ARGBClipTestFilter(src_width, src_height,
dst_width, dst_height,
kFilterBilinear,
benchmark_iterations_);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBScaleClipDownBy34_None) { TEST_F(libyuvTest, ARGBScaleClipDownBy34_None) {
const int src_width = benchmark_width_; const int src_width = benchmark_width_;
const int src_height = benchmark_height_; const int src_height = benchmark_height_;
......
...@@ -175,7 +175,7 @@ TEST_F(libyuvTest, ScaleDownBy2_None) { ...@@ -175,7 +175,7 @@ TEST_F(libyuvTest, ScaleDownBy2_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy2_Bilinear) { TEST_F(libyuvTest, ScaleDownBy2_Bilinear) {
...@@ -214,7 +214,7 @@ TEST_F(libyuvTest, ScaleDownBy4_None) { ...@@ -214,7 +214,7 @@ TEST_F(libyuvTest, ScaleDownBy4_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 2); // This is the only scale factor with error of 2. EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy4_Bilinear) { TEST_F(libyuvTest, ScaleDownBy4_Bilinear) {
...@@ -253,7 +253,7 @@ TEST_F(libyuvTest, ScaleDownBy5_None) { ...@@ -253,7 +253,7 @@ TEST_F(libyuvTest, ScaleDownBy5_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy5_Bilinear) { TEST_F(libyuvTest, ScaleDownBy5_Bilinear) {
...@@ -292,7 +292,7 @@ TEST_F(libyuvTest, ScaleDownBy8_None) { ...@@ -292,7 +292,7 @@ TEST_F(libyuvTest, ScaleDownBy8_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy8_Bilinear) { TEST_F(libyuvTest, ScaleDownBy8_Bilinear) {
...@@ -331,7 +331,7 @@ TEST_F(libyuvTest, ScaleDownBy16_None) { ...@@ -331,7 +331,7 @@ TEST_F(libyuvTest, ScaleDownBy16_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy16_Bilinear) { TEST_F(libyuvTest, ScaleDownBy16_Bilinear) {
...@@ -344,7 +344,7 @@ TEST_F(libyuvTest, ScaleDownBy16_Bilinear) { ...@@ -344,7 +344,7 @@ TEST_F(libyuvTest, ScaleDownBy16_Bilinear) {
dst_width, dst_height, dst_width, dst_height,
kFilterBilinear, kFilterBilinear,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_LE(max_diff, 2);
} }
TEST_F(libyuvTest, ScaleDownBy16_Box) { TEST_F(libyuvTest, ScaleDownBy16_Box) {
...@@ -360,6 +360,32 @@ TEST_F(libyuvTest, ScaleDownBy16_Box) { ...@@ -360,6 +360,32 @@ TEST_F(libyuvTest, ScaleDownBy16_Box) {
EXPECT_LE(max_diff, 1); EXPECT_LE(max_diff, 1);
} }
TEST_F(libyuvTest, ScaleDownBy23_None) {
const int src_width = benchmark_width_;
const int src_height = benchmark_height_;
const int dst_width = Abs(src_width) * 2 / 3;
const int dst_height = Abs(src_height) * 2 / 3;
int max_diff = TestFilter(src_width, src_height,
dst_width, dst_height,
kFilterNone,
benchmark_iterations_);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ScaleDownBy23_Bilinear) {
const int src_width = benchmark_width_;
const int src_height = benchmark_height_;
const int dst_width = Abs(src_width) * 2 / 3;
const int dst_height = Abs(src_height) * 2 / 3;
int max_diff = TestFilter(src_width, src_height,
dst_width, dst_height,
kFilterBilinear,
benchmark_iterations_);
EXPECT_LE(max_diff, 2);
}
TEST_F(libyuvTest, ScaleDownBy34_None) { TEST_F(libyuvTest, ScaleDownBy34_None) {
const int src_width = benchmark_width_; const int src_width = benchmark_width_;
const int src_height = benchmark_height_; const int src_height = benchmark_height_;
...@@ -370,7 +396,7 @@ TEST_F(libyuvTest, ScaleDownBy34_None) { ...@@ -370,7 +396,7 @@ TEST_F(libyuvTest, ScaleDownBy34_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy34_Bilinear) { TEST_F(libyuvTest, ScaleDownBy34_Bilinear) {
...@@ -409,7 +435,7 @@ TEST_F(libyuvTest, ScaleDownBy38_None) { ...@@ -409,7 +435,7 @@ TEST_F(libyuvTest, ScaleDownBy38_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleDownBy38_Bilinear) { TEST_F(libyuvTest, ScaleDownBy38_Bilinear) {
...@@ -448,7 +474,7 @@ TEST_F(libyuvTest, ScaleTo1366x768_None) { ...@@ -448,7 +474,7 @@ TEST_F(libyuvTest, ScaleTo1366x768_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleTo1366x768_Bilinear) { TEST_F(libyuvTest, ScaleTo1366x768_Bilinear) {
...@@ -487,7 +513,7 @@ TEST_F(libyuvTest, ScaleTo1280x720_None) { ...@@ -487,7 +513,7 @@ TEST_F(libyuvTest, ScaleTo1280x720_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleTo1280x720_Bilinear) { TEST_F(libyuvTest, ScaleTo1280x720_Bilinear) {
...@@ -526,7 +552,7 @@ TEST_F(libyuvTest, ScaleTo853x480_None) { ...@@ -526,7 +552,7 @@ TEST_F(libyuvTest, ScaleTo853x480_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 1); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleTo853x480_Bilinear) { TEST_F(libyuvTest, ScaleTo853x480_Bilinear) {
...@@ -565,7 +591,7 @@ TEST_F(libyuvTest, ScaleFrom640x360_None) { ...@@ -565,7 +591,7 @@ TEST_F(libyuvTest, ScaleFrom640x360_None) {
dst_width, dst_height, dst_width, dst_height,
kFilterNone, kFilterNone,
benchmark_iterations_); benchmark_iterations_);
EXPECT_LE(max_diff, 2); EXPECT_EQ(0, max_diff);
} }
TEST_F(libyuvTest, ScaleFrom640x360_Bilinear) { TEST_F(libyuvTest, ScaleFrom640x360_Bilinear) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment