Commit e86abbd2 authored by fbarchard@google.com's avatar fbarchard@google.com

Use malloc for row buffers in scalers removing size limitations.

BUG=296
TESTED=libyuvTest.Scale*
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/6369004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@925 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent aab73bbe
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 924 Version: 925
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 924 #define LIBYUV_VERSION 925
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -391,8 +391,8 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -391,8 +391,8 @@ static void ScalePlaneBox(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
const int max_y = (src_height << 16); const int max_y = (src_height << 16);
if (!IS_ALIGNED(src_width, 16) || (src_width > kMaxStride) || // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
dst_height * 2 > src_height) { if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
uint8* dst = dst_ptr; uint8* dst = dst_ptr;
for (int j = 0; j < dst_height; ++j) { for (int j = 0; j < dst_height; ++j) {
int iy = y >> 16; int iy = y >> 16;
...@@ -409,13 +409,14 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -409,13 +409,14 @@ static void ScalePlaneBox(int src_width, int src_height,
} }
return; return;
} }
// TODO(fbarchard): Remove kMaxStride limitation. // Allocate a row buffer of uint16.
SIMD_ALIGNED(uint16 row[kMaxStride]); align_buffer_64(row16, src_width * 2);
void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) = const uint16* src_ptr, uint8* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
#if defined(HAS_SCALEADDROWS_SSE2) #if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
#ifdef AVOID_OVERREAD #ifdef AVOID_OVERREAD
...@@ -434,11 +435,13 @@ static void ScalePlaneBox(int src_width, int src_height, ...@@ -434,11 +435,13 @@ static void ScalePlaneBox(int src_width, int src_height,
y = (src_height << 16); y = (src_height << 16);
} }
int boxheight = (y >> 16) - iy; int boxheight = (y >> 16) - iy;
ScaleAddRows(src, src_stride, row, src_width, boxheight); ScaleAddRows(src, src_stride, reinterpret_cast<uint16*>(row16),
ScaleAddCols(dst_width, boxheight, x, dx, row, dst_ptr); src_width, boxheight);
ScaleAddCols(dst_width, boxheight, x, dx, reinterpret_cast<uint16*>(row16),
dst_ptr);
dst_ptr += dst_stride; dst_ptr += dst_stride;
} }
free_aligned_buffer_64(row16);
} }
// Scale plane down with bilinear interpolation. // Scale plane down with bilinear interpolation.
...@@ -450,7 +453,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ...@@ -450,7 +453,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
FilterMode filtering) { FilterMode filtering) {
assert(dst_width > 0); assert(dst_width > 0);
assert(dst_height > 0); assert(dst_height > 0);
assert(Abs(src_width) <= kMaxStride);
// Initial source x/y coordinate and step values as 16.16 fixed point. // Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0; int x = 0;
...@@ -460,8 +462,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ...@@ -460,8 +462,6 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy); &x, &y, &dx, &dy);
SIMD_ALIGNED(uint8 row[kMaxStride + 16]);
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C; InterpolateRow_C;
...@@ -520,6 +520,10 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ...@@ -520,6 +520,10 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
} }
#endif #endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row buffer.
align_buffer_64(row, src_width);
const int max_y = (src_height - 1) << 16; const int max_y = (src_height - 1) << 16;
for (int j = 0; j < dst_height; ++j) { for (int j = 0; j < dst_height; ++j) {
if (y > max_y) { if (y > max_y) {
...@@ -537,6 +541,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height, ...@@ -537,6 +541,7 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
free_aligned_buffer_64(row);
} }
// Scale up down with bilinear interpolation. // Scale up down with bilinear interpolation.
...@@ -550,7 +555,6 @@ void ScalePlaneBilinearUp(int src_width, int src_height, ...@@ -550,7 +555,6 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
assert(src_height != 0); assert(src_height != 0);
assert(dst_width > 0); assert(dst_width > 0);
assert(dst_height > 0); assert(dst_height > 0);
assert(Abs(dst_width) <= kMaxStride);
// Initial source x/y coordinate and step values as 16.16 fixed point. // Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0; int x = 0;
...@@ -635,9 +639,13 @@ void ScalePlaneBilinearUp(int src_width, int src_height, ...@@ -635,9 +639,13 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
} }
int yi = y >> 16; int yi = y >> 16;
const uint8* src = src_ptr + yi * src_stride; const uint8* src = src_ptr + yi * src_stride;
SIMD_ALIGNED(uint8 row[2 * kMaxStride]);
// Allocate 2 row buffers.
const int kRowSize = (dst_width + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row; uint8* rowptr = row;
int rowstride = kMaxStride; int rowstride = kRowSize;
int lasty = yi; int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx); ScaleFilterCols(rowptr, src, dst_width, x, dx);
...@@ -672,6 +680,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height, ...@@ -672,6 +680,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
free_aligned_buffer_64(row);
} }
// Scale Plane to/from any dimensions, without interpolation. // Scale Plane to/from any dimensions, without interpolation.
...@@ -780,18 +789,17 @@ void ScalePlane(const uint8* src, int src_stride, ...@@ -780,18 +789,17 @@ void ScalePlane(const uint8* src, int src_stride,
return; return;
} }
} }
if (filtering == kFilterBox && src_width <= kMaxStride && if (filtering == kFilterBox && dst_height * 2 < src_height ) {
dst_height * 2 < src_height ) {
ScalePlaneBox(src_width, src_height, dst_width, dst_height, ScalePlaneBox(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst); src_stride, dst_stride, src, dst);
return; return;
} }
if (filtering && dst_height > src_height && dst_width <= kMaxStride) { if (filtering && dst_height > src_height) {
ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering); src_stride, dst_stride, src, dst, filtering);
return; return;
} }
if (filtering && src_width <= kMaxStride) { if (filtering) {
ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering); src_stride, dst_stride, src, dst, filtering);
return; return;
......
...@@ -87,10 +87,6 @@ static void ScaleARGBDown4Box(int /* src_width */, int /* src_height */, ...@@ -87,10 +87,6 @@ static void ScaleARGBDown4Box(int /* src_width */, int /* src_height */,
assert(dx == 65536 * 4); // Test scale factor of 4. assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
assert(dst_width * 2 <= kMaxStride);
// TODO(fbarchard): Remove clip_src_width alignment checks.
SIMD_ALIGNED(uint8 row[kMaxStride * 2 + 16]);
// Advance to odd row, even column. // Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
int row_stride = src_stride * (dy >> 16); int row_stride = src_stride * (dy >> 16);
...@@ -109,14 +105,19 @@ static void ScaleARGBDown4Box(int /* src_width */, int /* src_height */, ...@@ -109,14 +105,19 @@ static void ScaleARGBDown4Box(int /* src_width */, int /* src_height */,
} }
#endif #endif
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
for (int y = 0; y < dst_height; ++y) { for (int y = 0; y < dst_height; ++y) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
row + kMaxStride, dst_width * 2); row + kRowSize, dst_width * 2);
ScaleARGBRowDown2(row, kMaxStride, dst_argb, dst_width); ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
src_argb += row_stride; src_argb += row_stride;
dst_argb += dst_stride; dst_argb += dst_stride;
} }
free_aligned_buffer_64(row);
} }
// ScaleARGB ARGB Even // ScaleARGB ARGB Even
...@@ -179,9 +180,6 @@ static void ScaleARGBBilinearDown(int src_height, ...@@ -179,9 +180,6 @@ static void ScaleARGBBilinearDown(int src_height,
int clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4. int clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4.
src_argb += xl * 4; src_argb += xl * 4;
x -= (xl << 16); x -= (xl << 16);
assert(clip_src_width <= kMaxStride);
// TODO(fbarchard): Remove clip_src_width alignment checks.
SIMD_ALIGNED(uint8 row[kMaxStride + 16]);
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C; InterpolateRow_C;
...@@ -239,6 +237,10 @@ static void ScaleARGBBilinearDown(int src_height, ...@@ -239,6 +237,10 @@ static void ScaleARGBBilinearDown(int src_height,
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
} }
#endif #endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
align_buffer_64(row, clip_src_width * 4);
const int max_y = (src_height - 1) << 16; const int max_y = (src_height - 1) << 16;
for (int j = 0; j < dst_height; ++j) { for (int j = 0; j < dst_height; ++j) {
if (y > max_y) { if (y > max_y) {
...@@ -256,6 +258,7 @@ static void ScaleARGBBilinearDown(int src_height, ...@@ -256,6 +258,7 @@ static void ScaleARGBBilinearDown(int src_height,
dst_argb += dst_stride; dst_argb += dst_stride;
y += dy; y += dy;
} }
free_aligned_buffer_64(row);
} }
// Scale ARGB up with bilinear interpolation. // Scale ARGB up with bilinear interpolation.
...@@ -270,7 +273,6 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ...@@ -270,7 +273,6 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
assert(src_height > 0); assert(src_height > 0);
assert(dst_width > 0); assert(dst_width > 0);
assert(dst_height > 0); assert(dst_height > 0);
assert(dst_width * 4 <= kMaxStride);
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C; InterpolateRow_C;
...@@ -348,9 +350,13 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ...@@ -348,9 +350,13 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
} }
int yi = y >> 16; int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride; const uint8* src = src_argb + yi * src_stride;
SIMD_ALIGNED(uint8 row[2 * kMaxStride]);
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row; uint8* rowptr = row;
int rowstride = kMaxStride; int rowstride = kRowSize;
int lasty = yi; int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
...@@ -385,6 +391,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height, ...@@ -385,6 +391,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
dst_argb += dst_stride; dst_argb += dst_stride;
y += dy; y += dy;
} }
free_aligned_buffer_64(row);
} }
#ifdef YUVSCALEUP #ifdef YUVSCALEUP
...@@ -406,7 +413,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, ...@@ -406,7 +413,6 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
assert(src_height > 0); assert(src_height > 0);
assert(dst_width > 0); assert(dst_width > 0);
assert(dst_height > 0); assert(dst_height > 0);
assert(dst_width * 4 <= kMaxStride);
void (*I422ToARGBRow)(const uint8* y_buf, void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -514,12 +520,19 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, ...@@ -514,12 +520,19 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
const uint8* src_row_y = src_y + yi * src_stride_y; const uint8* src_row_y = src_y + yi * src_stride_y;
const uint8* src_row_u = src_u + uv_yi * src_stride_u; const uint8* src_row_u = src_u + uv_yi * src_stride_u;
const uint8* src_row_v = src_v + uv_yi * src_stride_v; const uint8* src_row_v = src_v + uv_yi * src_stride_v;
SIMD_ALIGNED(uint8 row[2 * kMaxStride]);
SIMD_ALIGNED(uint8 argb_row[kMaxStride * 4]); // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
uint8* rowptr = row; uint8* rowptr = row;
int rowstride = kMaxStride; int rowstride = kRowSize;
int lasty = yi; int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
if (src_height > 1) { if (src_height > 1) {
src_row_y += src_stride_y; src_row_y += src_stride_y;
...@@ -571,6 +584,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, ...@@ -571,6 +584,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
y += dy; y += dy;
} }
free_aligned_buffer_64(row);
free_aligned_buffer_64(row_argb);
} }
#endif #endif
...@@ -696,14 +711,14 @@ static void ScaleARGB(const uint8* src, int src_stride, ...@@ -696,14 +711,14 @@ static void ScaleARGB(const uint8* src, int src_stride,
x, y, dy, 4, filtering); x, y, dy, 4, filtering);
return; return;
} }
if (filtering && dy < 65536 && dst_width * 4 <= kMaxStride) { if (filtering && dy < 65536) {
ScaleARGBBilinearUp(src_width, src_height, ScaleARGBBilinearUp(src_width, src_height,
clip_width, clip_height, clip_width, clip_height,
src_stride, dst_stride, src, dst, src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering); x, dx, y, dy, filtering);
return; return;
} }
if (filtering && src_width * 4 < kMaxStride) { if (filtering) {
ScaleARGBBilinearDown(src_height, ScaleARGBBilinearDown(src_height,
clip_width, clip_height, clip_width, clip_height,
src_stride, dst_stride, src, dst, src_stride, dst_stride, src, dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment