Commit b18413e5 authored by fbarchard@google.com's avatar fbarchard@google.com

YUV scaling with 16 bit planes

BUG=331
TESTED=libyuv_unittest --gunit_also_run_disabled_tests --gtest_filter=**.ScaleFrom1280x720*
R=debargha@google.com, tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/17569004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1005 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 8b857c0a
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1004 Version: 1005
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -28,6 +28,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -28,6 +28,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
LIBYUV_API
void CopyPlane_16(const uint16* src_y, int src_stride_y,
uint16* dst_y, int dst_stride_y,
int width, int height);
// Set a plane of data to a 32 bit value. // Set a plane of data to a 32 bit value.
LIBYUV_API LIBYUV_API
void SetPlane(uint8* dst_y, int dst_stride_y, void SetPlane(uint8* dst_y, int dst_stride_y,
......
...@@ -773,6 +773,8 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count); ...@@ -773,6 +773,8 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count);
void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
void CopyRow_C(const uint8* src, uint8* dst, int count); void CopyRow_C(const uint8* src, uint8* dst, int count);
void CopyRow_16_C(const uint16* src, uint16* dst, int count);
void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
...@@ -1458,6 +1460,9 @@ void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, ...@@ -1458,6 +1460,9 @@ void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix); uint8* dst_uv, int pix);
void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
uint16* dst_uv, int pix);
void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix); uint32 selector, int pix);
void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
...@@ -1639,6 +1644,10 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1639,6 +1644,10 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride_ptr, int width, ptrdiff_t src_stride_ptr, int width,
int source_y_fraction); int source_y_fraction);
void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
ptrdiff_t src_stride_ptr,
int width, int source_y_fraction);
// Sobel images. // Sobel images.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
uint8* dst_sobelx, int width); uint8* dst_sobelx, int width);
......
...@@ -34,6 +34,12 @@ void ScalePlane(const uint8* src, int src_stride, ...@@ -34,6 +34,12 @@ void ScalePlane(const uint8* src, int src_stride,
int dst_width, int dst_height, int dst_width, int dst_height,
enum FilterMode filtering); enum FilterMode filtering);
void ScalePlane_16(const uint16* src, int src_stride,
int src_width, int src_height,
uint16* dst, int dst_stride,
int dst_width, int dst_height,
enum FilterMode filtering);
// Scales a YUV 4:2:0 image from the src width and height to the // Scales a YUV 4:2:0 image from the src width and height to the
// dst width and height. // dst width and height.
// If filtering is kFilterNone, a simple nearest-neighbor algorithm is // If filtering is kFilterNone, a simple nearest-neighbor algorithm is
...@@ -55,6 +61,17 @@ int I420Scale(const uint8* src_y, int src_stride_y, ...@@ -55,6 +61,17 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_width, int dst_height, int dst_width, int dst_height,
enum FilterMode filtering); enum FilterMode filtering);
LIBYUV_API
int I420Scale_16(const uint16* src_y, int src_stride_y,
const uint16* src_u, int src_stride_u,
const uint16* src_v, int src_stride_v,
int src_width, int src_height,
uint16* dst_y, int dst_stride_y,
uint16* dst_u, int dst_stride_u,
uint16* dst_v, int dst_stride_v,
int dst_width, int dst_height,
enum FilterMode filtering);
#ifdef __cplusplus #ifdef __cplusplus
// Legacy API. Deprecated. // Legacy API. Deprecated.
LIBYUV_API LIBYUV_API
......
...@@ -70,6 +70,13 @@ void ScalePlaneVertical(int src_height, ...@@ -70,6 +70,13 @@ void ScalePlaneVertical(int src_height,
int x, int y, int dy, int x, int y, int dy,
int bpp, enum FilterMode filtering); int bpp, enum FilterMode filtering);
void ScalePlaneVertical_16(int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_argb, uint16* dst_argb,
int x, int y, int dy,
int wpp, enum FilterMode filtering);
// Simplify the filtering based on scale factors. // Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width, int src_height, enum FilterMode ScaleFilterReduce(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
...@@ -97,37 +104,70 @@ void ScaleSlope(int src_width, int src_height, ...@@ -97,37 +104,70 @@ void ScaleSlope(int src_width, int src_height,
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width); uint8* d, int dst_width);
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* d, int dst_width);
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width); uint8* d, int dst_width);
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* d, int dst_width);
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx);
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int, int); int dst_width, int, int);
void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int, int);
void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx);
void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx); int dst_width, int x, int dx);
void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx);
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width); uint8* dst, int dst_width);
void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width);
void ScaleRowDown38_3_Box_C(const uint8* src_ptr, void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width); uint8* dst_ptr, int dst_width);
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height); uint16* dst_ptr, int src_width, int src_height);
void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height);
void ScaleARGBRowDown2_C(const uint8* src_argb, void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width); uint8* dst_argb, int dst_width);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1004 #define LIBYUV_VERSION 1005
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -73,6 +73,55 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -73,6 +73,55 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
} }
} }
LIBYUV_API
void CopyPlane_16(const uint16* src_y, int src_stride_y,
uint16* dst_y, int dst_stride_y,
int width, int height) {
int y;
void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
// Coalesce rows.
if (src_stride_y == width &&
dst_stride_y == width) {
width *= height;
height = 1;
src_stride_y = dst_stride_y = 0;
}
#if defined(HAS_COPYROW_16_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
CopyRow = CopyRow_16_X86;
}
#endif
#if defined(HAS_COPYROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_16_SSE2;
}
#endif
#if defined(HAS_COPYROW_16_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_16_ERMS;
}
#endif
#if defined(HAS_COPYROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
CopyRow = CopyRow_16_NEON;
}
#endif
#if defined(HAS_COPYROW_16_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_16_MIPS;
}
#endif
// Copy plane
for (y = 0; y < height; ++y) {
CopyRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Copy I422. // Copy I422.
LIBYUV_API LIBYUV_API
int I422Copy(const uint8* src_y, int src_stride_y, int I422Copy(const uint8* src_y, int src_stride_y,
......
...@@ -1565,6 +1565,10 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) { ...@@ -1565,6 +1565,10 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count); memcpy(dst, src, count);
} }
void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
memcpy(dst, src, count * 2);
}
void SetRow_C(uint8* dst, uint32 v8, int count) { void SetRow_C(uint8* dst, uint32 v8, int count) {
#ifdef _MSC_VER #ifdef _MSC_VER
// VC will generate rep stosb. // VC will generate rep stosb.
...@@ -1890,6 +1894,14 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride, ...@@ -1890,6 +1894,14 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride,
} }
} }
void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
uint16* dst_uv, int pix) {
int x;
for (x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
// C version 2x2 -> 2x1. // C version 2x2 -> 2x1.
void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
...@@ -1918,6 +1930,33 @@ void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1918,6 +1930,33 @@ void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
} }
} }
void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
ptrdiff_t src_stride,
int width, int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
const uint16* src_ptr1 = src_ptr + src_stride;
int x;
if (source_y_fraction == 0) {
memcpy(dst_ptr, src_ptr, width * 2);
return;
}
if (source_y_fraction == 128) {
HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
return;
}
for (x = 0; x < width - 1; x += 2) {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
src_ptr += 2;
src_ptr1 += 2;
dst_ptr += 2;
}
if (width & 1) {
dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
}
}
// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
void ARGBToBayerRow_C(const uint8* src_argb, void ARGBToBayerRow_C(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix) { uint8* dst_bayer, uint32 selector, int pix) {
......
...@@ -90,6 +90,62 @@ static void ScalePlaneDown2(int src_width, int src_height, ...@@ -90,6 +90,62 @@ static void ScalePlaneDown2(int src_width, int src_height,
} }
} }
static void ScalePlaneDown2_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width) =
filtering == kFilterNone ? ScaleRowDown2_16_C :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
ScaleRowDown2Box_16_C);
int row_stride = src_stride << 1;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN2_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
ScaleRowDown2_16_NEON;
}
#elif defined(HAS_SCALEROWDOWN2_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
ScaleRowDown2 = filtering == kFilterNone ?
ScaleRowDown2_Unaligned_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
ScaleRowDown2Box_Unaligned_16_SSE2);
if (IS_ALIGNED(src_ptr, 16) &&
IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
(filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
ScaleRowDown2Box_16_SSE2);
}
}
#elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown2 = filtering ?
ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
// TODO(fbarchard): Loop through source height to allow odd height.
for (y = 0; y < dst_height; ++y) {
ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
}
// Scale plane, 1/4 // Scale plane, 1/4
// This is an optimized version for scaling down a plane to 1/4 of // This is an optimized version for scaling down a plane to 1/4 of
// its original size. // its original size.
...@@ -137,6 +193,51 @@ static void ScalePlaneDown4(int src_width, int src_height, ...@@ -137,6 +193,51 @@ static void ScalePlaneDown4(int src_width, int src_height,
} }
} }
static void ScalePlaneDown4_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
int row_stride = src_stride << 2;
if (!filtering) {
src_ptr += src_stride * 2; // Point to row 2.
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN4_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
ScaleRowDown4_16_NEON;
}
#elif defined(HAS_SCALEROWDOWN4_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
ScaleRowDown4_16_SSE2;
}
#elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (y = 0; y < dst_height; ++y) {
ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
src_ptr += row_stride;
dst_ptr += dst_stride;
}
}
// Scale plane down, 3/4 // Scale plane down, 3/4
static void ScalePlaneDown34(int src_width, int src_height, static void ScalePlaneDown34(int src_width, int src_height,
...@@ -219,6 +320,86 @@ static void ScalePlaneDown34(int src_width, int src_height, ...@@ -219,6 +320,86 @@ static void ScalePlaneDown34(int src_width, int src_height,
} }
} }
static void ScalePlaneDown34_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_C;
ScaleRowDown34_1 = ScaleRowDown34_16_C;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
}
#if defined(HAS_SCALEROWDOWN34_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
// Remainder 1 or 2 rows with last row vertically unfiltered
if ((dst_height % 3) == 2) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride;
dst_ptr += dst_stride;
ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
} else if ((dst_height % 3) == 1) {
ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
}
}
// Scale plane, 3/8 // Scale plane, 3/8
// This is an optimized version for scaling down a plane to 3/8 // This is an optimized version for scaling down a plane to 3/8
...@@ -312,6 +493,83 @@ static void ScalePlaneDown38(int src_width, int src_height, ...@@ -312,6 +493,83 @@ static void ScalePlaneDown38(int src_width, int src_height,
} }
} }
static void ScalePlaneDown38_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr,
enum FilterMode filtering) {
int y;
void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width);
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_C;
ScaleRowDown38_2 = ScaleRowDown38_16_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
}
#if defined(HAS_SCALEROWDOWN38_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
}
}
#elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
}
}
#elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
}
}
#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 2;
dst_ptr += dst_stride;
}
// Remainder 1 or 2 rows with last row vertically unfiltered
if ((dst_height % 3) == 2) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
src_ptr += src_stride * 3;
dst_ptr += dst_stride;
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
} else if ((dst_height % 3) == 1) {
ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
}
}
static __inline uint32 SumBox(int iboxwidth, int iboxheight, static __inline uint32 SumBox(int iboxwidth, int iboxheight,
ptrdiff_t src_stride, const uint8* src_ptr) { ptrdiff_t src_stride, const uint8* src_ptr) {
uint32 sum = 0u; uint32 sum = 0u;
...@@ -328,6 +586,22 @@ static __inline uint32 SumBox(int iboxwidth, int iboxheight, ...@@ -328,6 +586,22 @@ static __inline uint32 SumBox(int iboxwidth, int iboxheight,
return sum; return sum;
} }
static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
ptrdiff_t src_stride, const uint16* src_ptr) {
uint32 sum = 0u;
int y;
assert(iboxwidth > 0);
assert(iboxheight > 0);
for (y = 0; y < iboxheight; ++y) {
int x;
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
src_ptr += src_stride;
}
return sum;
}
static void ScalePlaneBoxRow_C(int dst_width, int boxheight, static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride, int x, int dx, ptrdiff_t src_stride,
const uint8* src_ptr, uint8* dst_ptr) { const uint8* src_ptr, uint8* dst_ptr) {
...@@ -342,6 +616,20 @@ static void ScalePlaneBoxRow_C(int dst_width, int boxheight, ...@@ -342,6 +616,20 @@ static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
} }
} }
static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
int x, int dx, ptrdiff_t src_stride,
const uint16* src_ptr, uint16* dst_ptr) {
int i;
int boxwidth;
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
(boxwidth * boxheight);
}
}
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
uint32 sum = 0u; uint32 sum = 0u;
int x; int x;
...@@ -352,6 +640,16 @@ static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { ...@@ -352,6 +640,16 @@ static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
return sum; return sum;
} }
static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
uint32 sum = 0u;
int x;
assert(iboxwidth > 0);
for (x = 0; x < iboxwidth; ++x) {
sum += src_ptr[x];
}
return sum;
}
static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) { const uint16* src_ptr, uint8* dst_ptr) {
int i; int i;
...@@ -369,6 +667,24 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, ...@@ -369,6 +667,24 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
} }
} }
static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
int i;
int scaletbl[2];
int minboxwidth = (dx >> 16);
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
scaletbl[0] = 65536 / (minboxwidth * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
boxwidth = (x >> 16) - ix;
*dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
scaleptr[boxwidth] >> 16;
}
}
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) { const uint16* src_ptr, uint8* dst_ptr) {
int boxwidth = (dx >> 16); int boxwidth = (dx >> 16);
...@@ -380,6 +696,17 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, ...@@ -380,6 +696,17 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
} }
} }
static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
int boxwidth = (dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
x += boxwidth;
}
}
// Scale plane down to any dimensions, with interpolation. // Scale plane down to any dimensions, with interpolation.
// (boxfilter). // (boxfilter).
// //
...@@ -459,46 +786,118 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -459,46 +786,118 @@ static void ScalePlaneBox(int src_width, int src_height,
} }
} }
// Scale plane down with bilinear interpolation. static void ScalePlaneBox_16(int src_width, int src_height,
void ScalePlaneBilinearDown(int src_width, int src_height, int dst_width, int dst_height,
int dst_width, int dst_height, int src_stride, int dst_stride,
int src_stride, int dst_stride, const uint16* src_ptr, uint16* dst_ptr) {
const uint8* src_ptr, uint8* dst_ptr, int j;
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point. // Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0; int x = 0;
int y = 0; int y = 0;
int dx = 0; int dx = 0;
int dy = 0; int dy = 0;
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. const int max_y = (src_height << 16);
// Allocate a row buffer. ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
align_buffer_64(row, src_width);
const int max_y = (src_height - 1) << 16;
int j;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
src_width = Abs(src_width); src_width = Abs(src_width);
// TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
#if defined(HAS_INTERPOLATEROW_SSE2) if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { uint16* dst = dst_ptr;
InterpolateRow = InterpolateRow_Any_SSE2; int j;
if (IS_ALIGNED(src_width, 16)) { for (j = 0; j < dst_height; ++j) {
InterpolateRow = InterpolateRow_Unaligned_SSE2; int boxheight;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { int iy = y >> 16;
InterpolateRow = InterpolateRow_SSE2; const uint16* src = src_ptr + iy * src_stride;
} y += dy;
} if (y > max_y) {
} y = max_y;
#endif }
#if defined(HAS_INTERPOLATEROW_SSSE3) boxheight = (y >> 16) - iy;
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { ScalePlaneBoxRow_16_C(dst_width, boxheight,
x, dx, src_stride,
src, dst);
dst += dst_stride;
}
return;
}
{
// Allocate a row buffer of uint32.
align_buffer_64(row32, src_width * 4);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
#if defined(HAS_SCALEADDROWS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) &&
#endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleAddRows = ScaleAddRows_16_SSE2;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint16* src = src_ptr + iy * src_stride;
y += dy;
if (y > (src_height << 16)) {
y = (src_height << 16);
}
boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, (uint32*)(row32),
src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row32);
}
}
// Scale plane down with bilinear interpolation.
void ScalePlaneBilinearDown(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width);
const int max_y = (src_height - 1) << 16;
int j;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3; InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) { if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3; InterpolateRow = InterpolateRow_Unaligned_SSSE3;
...@@ -562,6 +961,108 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ...@@ -562,6 +961,108 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
} }
void ScalePlaneBilinearDown_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width * 2);
const int max_y = (src_height - 1) << 16;
int j;
void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(src_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(src_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_16_SSSE3;
}
#endif
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint16* src = src_ptr + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
}
dst_ptr += dst_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
}
free_aligned_buffer_64(row);
}
// Scale up down with bilinear interpolation. // Scale up down with bilinear interpolation.
void ScalePlaneBilinearUp(int src_width, int src_height, void ScalePlaneBilinearUp(int src_width, int src_height,
int dst_width, int dst_height, int dst_width, int dst_height,
...@@ -702,6 +1203,145 @@ void ScalePlaneBilinearUp(int src_width, int src_height, ...@@ -702,6 +1203,145 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
} }
} }
void ScalePlaneBilinearUp_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr,
enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
const int max_y = (src_height - 1) << 16;
void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_16_C;
void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
}
}
#endif
if (filtering && src_width >= 32768) {
ScaleFilterCols = ScaleFilterCols64_16_C;
}
#if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_16_SSSE3;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row;
int rowstride = kRowSize;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
src += src_stride;
}
ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
}
dst_ptr += dst_stride;
y += dy;
}
free_aligned_buffer_64(row);
}
}
// Scale Plane to/from any dimensions, without interpolation. // Scale Plane to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits // Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and // of x and dx is the integer part of the source position and
...@@ -742,6 +1382,41 @@ static void ScalePlaneSimple(int src_width, int src_height, ...@@ -742,6 +1382,41 @@ static void ScalePlaneSimple(int src_width, int src_height,
} }
} }
static void ScalePlaneSimple_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr) {
int i;
void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) = ScaleCols_16_C;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleCols = ScaleColsUp2_16_SSE2;
}
#endif
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
}
// Scale a plane. // Scale a plane.
// This function dispatches to a specialized scaler based on scale factor. // This function dispatches to a specialized scaler based on scale factor.
...@@ -829,6 +1504,90 @@ void ScalePlane(const uint8* src, int src_stride, ...@@ -829,6 +1504,90 @@ void ScalePlane(const uint8* src, int src_stride,
src_stride, dst_stride, src, dst); src_stride, dst_stride, src, dst);
} }
LIBYUV_API
void ScalePlane_16(const uint16* src, int src_stride,
int src_width, int src_height,
uint16* dst, int dst_stride,
int dst_width, int dst_height,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2()
if (dst_width == src_width && dst_height == src_height) {
// Straight copy.
CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
if (dst_width == src_width) {
int dy = FixedDiv(src_height, dst_height);
// Arbitrary scale vertically, but unscaled vertically.
ScalePlaneVertical_16(src_height,
dst_width, dst_height,
src_stride, dst_stride, src, dst,
0, 0, dy, 1, filtering);
return;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {
// Scale down.
if (4 * dst_width == 3 * src_width &&
4 * dst_height == 3 * src_height) {
// optimized, 3/4
ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (2 * dst_width == src_width && 2 * dst_height == src_height) {
// optimized, 1/2
ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
// 3/8 rounded up for odd sized chroma height.
if (8 * dst_width == 3 * src_width &&
dst_height == ((src_height * 3 + 7) / 8)) {
// optimized, 3/8
ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
filtering != kFilterBilinear) {
// optimized, 1/4
ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
}
if (filtering == kFilterBox && dst_height * 2 < src_height) {
ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
return;
}
if (filtering && dst_height > src_height) {
ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
if (filtering) {
ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
return;
}
ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst);
}
// Scale an I420 image. // Scale an I420 image.
// This function in turn calls a scaling function for each plane. // This function in turn calls a scaling function for each plane.
...@@ -863,6 +1622,37 @@ int I420Scale(const uint8* src_y, int src_stride_y, ...@@ -863,6 +1622,37 @@ int I420Scale(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
LIBYUV_API
int I420Scale_16(const uint16* src_y, int src_stride_y,
const uint16* src_u, int src_stride_u,
const uint16* src_v, int src_stride_v,
int src_width, int src_height,
uint16* dst_y, int dst_stride_y,
uint16* dst_u, int dst_stride_u,
uint16* dst_v, int dst_stride_v,
int dst_width, int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScalePlane_16(src_y, src_stride_y, src_width, src_height,
dst_y, dst_stride_y, dst_width, dst_height,
filtering);
ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
filtering);
ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
filtering);
return 0;
}
// Deprecated api // Deprecated api
LIBYUV_API LIBYUV_API
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
......
...@@ -42,6 +42,20 @@ void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -42,6 +42,20 @@ void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src_ptr[1];
dst[1] = src_ptr[3];
dst += 2;
src_ptr += 4;
}
if (dst_width & 1) {
dst[0] = src_ptr[1];
}
}
void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
const uint8* s = src_ptr; const uint8* s = src_ptr;
...@@ -57,6 +71,21 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -57,6 +71,21 @@ void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
const uint16* s = src_ptr;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + 1) >> 1;
dst[1] = (s[2] + s[3] + 1) >> 1;
dst += 2;
s += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + 1) >> 1;
}
}
void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
const uint8* s = src_ptr; const uint8* s = src_ptr;
...@@ -74,6 +103,23 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -74,6 +103,23 @@ void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
const uint16* s = src_ptr;
const uint16* t = src_ptr + src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
dst += 2;
s += 4;
t += 4;
}
if (dst_width & 1) {
dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
}
}
void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
int x; int x;
...@@ -88,6 +134,20 @@ void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -88,6 +134,20 @@ void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = src_ptr[2];
dst[1] = src_ptr[6];
dst += 2;
src_ptr += 8;
}
if (dst_width & 1) {
dst[0] = src_ptr[2];
}
}
void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
intptr_t stride = src_stride; intptr_t stride = src_stride;
...@@ -124,6 +184,42 @@ void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -124,6 +184,42 @@ void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
intptr_t stride = src_stride;
int x;
for (x = 0; x < dst_width - 1; x += 2) {
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2] + src_ptr[stride + 3] +
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
8) >> 4;
dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
src_ptr[stride + 4] + src_ptr[stride + 5] +
src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
8) >> 4;
dst += 2;
src_ptr += 8;
}
if (dst_width & 1) {
dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2] + src_ptr[stride + 3] +
src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
8) >> 4;
}
}
void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
int x; int x;
...@@ -137,6 +233,19 @@ void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -137,6 +233,19 @@ void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
dst[0] = src_ptr[0];
dst[1] = src_ptr[1];
dst[2] = src_ptr[3];
dst += 3;
src_ptr += 4;
}
}
// Filter rows 0 and 1 together, 3 : 1 // Filter rows 0 and 1 together, 3 : 1
void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) { uint8* d, int dst_width) {
...@@ -160,6 +269,28 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -160,6 +269,28 @@ void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* d, int dst_width) {
const uint16* s = src_ptr;
const uint16* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 * 3 + b0 + 2) >> 2;
d[1] = (a1 * 3 + b1 + 2) >> 2;
d[2] = (a2 * 3 + b2 + 2) >> 2;
d += 3;
s += 4;
t += 4;
}
}
// Filter rows 1 and 2 together, 1 : 1 // Filter rows 1 and 2 together, 1 : 1
void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width) { uint8* d, int dst_width) {
...@@ -183,6 +314,28 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -183,6 +314,28 @@ void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* d, int dst_width) {
const uint16* s = src_ptr;
const uint16* t = src_ptr + src_stride;
int x;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (x = 0; x < dst_width; x += 3) {
uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 + b0 + 1) >> 1;
d[1] = (a1 + b1 + 1) >> 1;
d[2] = (a2 + b2 + 1) >> 1;
d += 3;
s += 4;
t += 4;
}
}
// Scales a single row of pixels using point sampling. // Scales a single row of pixels using point sampling.
void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
...@@ -199,6 +352,21 @@ void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -199,6 +352,21 @@ void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
} }
} }
void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[0] = src_ptr[x >> 16];
x += dx;
dst_ptr[1] = src_ptr[x >> 16];
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[x >> 16];
}
}
// Scales a single row of pixels up by 2x using point sampling. // Scales a single row of pixels up by 2x using point sampling.
void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
...@@ -213,6 +381,19 @@ void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -213,6 +381,19 @@ void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
} }
} }
void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
dst_ptr[1] = dst_ptr[0] = src_ptr[0];
src_ptr += 1;
dst_ptr += 2;
}
if (dst_width & 1) {
dst_ptr[0] = src_ptr[0];
}
}
// (1-f)a + fb can be replaced with a + f(b-a) // (1-f)a + fb can be replaced with a + f(b-a)
#define BLENDER(a, b, f) (uint8)((int)(a) + \ #define BLENDER(a, b, f) (uint8)((int)(a) + \
((int)(f) * ((int)(b) - (int)(a)) >> 16)) ((int)(f) * ((int)(b) - (int)(a)) >> 16))
...@@ -267,6 +448,59 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, ...@@ -267,6 +448,59 @@ void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
} }
#undef BLENDER #undef BLENDER
#define BLENDER(a, b, f) (uint16)((int)(a) + \
((int)(f) * ((int)(b) - (int)(a)) >> 16))
void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x, int dx) {
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
x += dx;
xi = x >> 16;
a = src_ptr[xi];
b = src_ptr[xi + 1];
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
int xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
}
}
void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
int dst_width, int x32, int dx) {
int64 x = (int64)(x32);
int j;
for (j = 0; j < dst_width - 1; j += 2) {
int64 xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
x += dx;
xi = x >> 16;
a = src_ptr[xi];
b = src_ptr[xi + 1];
dst_ptr[1] = BLENDER(a, b, x & 0xffff);
x += dx;
dst_ptr += 2;
}
if (dst_width & 1) {
int64 xi = x >> 16;
int a = src_ptr[xi];
int b = src_ptr[xi + 1];
dst_ptr[0] = BLENDER(a, b, x & 0xffff);
}
}
#undef BLENDER
void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
int x; int x;
...@@ -280,6 +514,19 @@ void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -280,6 +514,19 @@ void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst, int dst_width) {
int x;
assert(dst_width % 3 == 0);
for (x = 0; x < dst_width; x += 3) {
dst[0] = src_ptr[0];
dst[1] = src_ptr[3];
dst[2] = src_ptr[6];
dst += 3;
src_ptr += 8;
}
}
// 8x3 -> 3x1 // 8x3 -> 3x1
void ScaleRowDown38_3_Box_C(const uint8* src_ptr, void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
...@@ -307,6 +554,32 @@ void ScaleRowDown38_3_Box_C(const uint8* src_ptr, ...@@ -307,6 +554,32 @@ void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
} }
} }
void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
int i;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
(65536 / 9) >> 16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
src_ptr[stride + 3] + src_ptr[stride + 4] +
src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
(65536 / 9) >> 16;
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
src_ptr[stride + 6] + src_ptr[stride + 7] +
src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
(65536 / 6) >> 16;
src_ptr += 8;
dst_ptr += 3;
}
}
// 8x2 -> 3x1 // 8x2 -> 3x1
void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
...@@ -328,6 +601,26 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -328,6 +601,26 @@ void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int dst_width) {
intptr_t stride = src_stride;
int i;
assert((dst_width % 3 == 0) && (dst_width > 0));
for (i = 0; i < dst_width; i += 3) {
dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
src_ptr[stride + 0] + src_ptr[stride + 1] +
src_ptr[stride + 2]) * (65536 / 6) >> 16;
dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
src_ptr[stride + 3] + src_ptr[stride + 4] +
src_ptr[stride + 5]) * (65536 / 6) >> 16;
dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
src_ptr[stride + 6] + src_ptr[stride + 7]) *
(65536 / 4) >> 16;
src_ptr += 8;
dst_ptr += 3;
}
}
void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) { uint16* dst_ptr, int src_width, int src_height) {
int x; int x;
...@@ -346,6 +639,24 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -346,6 +639,24 @@ void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
} }
} }
void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
uint32* dst_ptr, int src_width, int src_height) {
int x;
assert(src_width > 0);
assert(src_height > 0);
for (x = 0; x < src_width; ++x) {
const uint16* s = src_ptr + x;
unsigned int sum = 0u;
int y;
for (y = 0; y < src_height; ++y) {
sum += s[0];
s += src_stride;
}
// No risk of overflow here now
dst_ptr[x] = sum;
}
}
void ScaleARGBRowDown2_C(const uint8* src_argb, void ScaleARGBRowDown2_C(const uint8* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) { uint8* dst_argb, int dst_width) {
...@@ -637,6 +948,88 @@ void ScalePlaneVertical(int src_height, ...@@ -637,6 +948,88 @@ void ScalePlaneVertical(int src_height,
y += dy; y += dy;
} }
} }
void ScalePlaneVertical_16(int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_argb, uint16* dst_argb,
int x, int y, int dy,
int wpp, enum FilterMode filtering) {
// TODO(fbarchard): Allow higher wpp.
int dst_width_words = dst_width * wpp;
void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_16_C;
const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
int j;
assert(wpp >= 1 && wpp <= 2);
assert(src_height != 0);
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSE2;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_16_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_16_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
InterpolateRow = InterpolateRow_Any_16_AVX2;
if (IS_ALIGNED(dst_width_bytes, 32)) {
InterpolateRow = InterpolateRow_16_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
InterpolateRow = InterpolateRow_Any_16_NEON;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_16_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
if (IS_ALIGNED(dst_width_bytes, 4)) {
InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;
int yf;
if (y > max_y) {
y = max_y;
}
yi = y >> 16;
yf = filtering ? ((y >> 8) & 255) : 0;
InterpolateRow(dst_argb, src_argb + yi * src_stride,
src_stride, dst_width_words, yf);
dst_argb += dst_stride;
y += dy;
}
}
// Simplify the filtering based on scale factors. // Simplify the filtering based on scale factors.
enum FilterMode ScaleFilterReduce(int src_width, int src_height, enum FilterMode ScaleFilterReduce(int src_width, int src_height,
......
...@@ -132,6 +132,134 @@ static int TestFilter(int src_width, int src_height, ...@@ -132,6 +132,134 @@ static int TestFilter(int src_width, int src_height,
return max_diff; return max_diff;
} }
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
// 0 = exact.
static int TestFilter_16(int src_width, int src_height,
int dst_width, int dst_height,
FilterMode f, int benchmark_iterations) {
int i, j;
const int b = 0; // 128 to test for padding/stride.
int src_width_uv = (Abs(src_width) + 1) >> 1;
int src_height_uv = (Abs(src_height) + 1) >> 1;
int src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
int src_stride_y = b * 2 + Abs(src_width);
int src_stride_uv = b * 2 + src_width_uv;
align_buffer_page_end(src_y, src_y_plane_size)
align_buffer_page_end(src_u, src_uv_plane_size)
align_buffer_page_end(src_v, src_uv_plane_size)
align_buffer_page_end(src_y_16, src_y_plane_size * 2)
align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
srandom(time(NULL));
MemRandomize(src_y, src_y_plane_size);
MemRandomize(src_u, src_uv_plane_size);
MemRandomize(src_v, src_uv_plane_size);
for (i = b; i < src_height + b; ++i) {
for (j = b; j < src_width + b; ++j) {
p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j];
}
}
for (i = b; i < (src_height_uv + b); ++i) {
for (j = b; j < (src_width_uv + b); ++j) {
p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j];
p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j];
}
}
int dst_width_uv = (dst_width + 1) >> 1;
int dst_height_uv = (dst_height + 1) >> 1;
int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
int dst_stride_y = b * 2 + dst_width;
int dst_stride_uv = b * 2 + dst_width_uv;
align_buffer_page_end(dst_y_8, dst_y_plane_size)
align_buffer_page_end(dst_u_8, dst_uv_plane_size)
align_buffer_page_end(dst_v_8, dst_uv_plane_size)
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
src_u + (src_stride_uv * b) + b, src_stride_uv,
src_v + (src_stride_uv * b) + b, src_stride_uv,
src_width, src_height,
dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv,
dst_width, dst_height, f);
for (i = 0; i < benchmark_iterations; ++i) {
I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv,
src_width, src_height,
p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv,
dst_width, dst_height, f);
}
// Expect an exact match
int max_diff = 0;
for (i = b; i < (dst_height + b); ++i) {
for (j = b; j < (dst_width + b); ++j) {
int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] -
p_dst_y_16[(i * dst_stride_y) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
for (i = b; i < (dst_height_uv + b); ++i) {
for (j = b; j < (dst_width_uv + b); ++j) {
int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] -
p_dst_u_16[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] -
p_dst_v_16[(i * dst_stride_uv) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
}
free_aligned_buffer_page_end(dst_y_8)
free_aligned_buffer_page_end(dst_u_8)
free_aligned_buffer_page_end(dst_v_8)
free_aligned_buffer_page_end(dst_y_16)
free_aligned_buffer_page_end(dst_u_16)
free_aligned_buffer_page_end(dst_v_16)
free_aligned_buffer_page_end(src_y)
free_aligned_buffer_page_end(src_u)
free_aligned_buffer_page_end(src_v)
free_aligned_buffer_page_end(src_y_16)
free_aligned_buffer_page_end(src_u_16)
free_aligned_buffer_page_end(src_v_16)
return max_diff;
}
#define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \ #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff) \
TEST_F(libyuvTest, ScaleDownBy##name##_##filter) { \ TEST_F(libyuvTest, ScaleDownBy##name##_##filter) { \
int diff = TestFilter(benchmark_width_, benchmark_height_, \ int diff = TestFilter(benchmark_width_, benchmark_height_, \
...@@ -139,6 +267,13 @@ static int TestFilter(int src_width, int src_height, ...@@ -139,6 +267,13 @@ static int TestFilter(int src_width, int src_height,
Abs(benchmark_height_) * vfactor, \ Abs(benchmark_height_) * vfactor, \
kFilter##filter, benchmark_iterations_); \ kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \
TEST_F(libyuvTest, ScaleDownBy##name##_##filter##_16) { \
int diff = TestFilter_16(benchmark_width_, benchmark_height_, \
Abs(benchmark_width_) * hfactor, \
Abs(benchmark_height_) * vfactor, \
kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \
} }
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
...@@ -168,6 +303,18 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4) ...@@ -168,6 +303,18 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4)
Abs(benchmark_width_), Abs(benchmark_height_), \ Abs(benchmark_width_), Abs(benchmark_height_), \
kFilter##filter, benchmark_iterations_); \ kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \ EXPECT_LE(diff, max_diff); \
} \
TEST_F(libyuvTest, name##To##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(benchmark_width_, benchmark_height_, \
width, height, \
kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(libyuvTest, name##From##width##x##height##_##filter##_16) { \
int diff = TestFilter_16(width, height, \
Abs(benchmark_width_), Abs(benchmark_height_), \
kFilter##filter, benchmark_iterations_); \
EXPECT_LE(diff, max_diff); \
} }
// Test scale to a specified size with all 4 filters. // Test scale to a specified size with all 4 filters.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment