scale_argb.cc 24.5 KB
Newer Older
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 4 5 6
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8 9 10 11 12 13 14 15 16 17
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "libyuv/scale.h"

#include <assert.h>
#include <string.h>

#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"  // For CopyARGB
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20 21 22 23 24 25

#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif

26 27 28 29
static __inline int Abs(int v) {
  return v >= 0 ? v : -v;
}

fbarchard@google.com's avatar
fbarchard@google.com committed
30 31 32 33
// ScaleARGB ARGB, 1/2
// This is an optimized version for scaling down a ARGB to 1/2 of
// its original size.

34
static void ScaleARGBDown2(int /* src_width */, int /* src_height */,
35 36
                           int dst_width, int dst_height,
                           int src_stride, int dst_stride,
37
                           const uint8* src_argb, uint8* dst_argb,
fbarchard@google.com's avatar
fbarchard@google.com committed
38
                           int x, int dx, int y, int dy,
39
                           FilterMode filtering) {
fbarchard@google.com's avatar
fbarchard@google.com committed
40
  assert(dx == 65536 * 2);  // Test scale factor of 2.
41
  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
42
  // Advance to odd row, even column.
43 44 45 46 47
  if (filtering) {
    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  } else {
    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
  }
48
  int row_stride = src_stride * (dy >> 16);
49 50
  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
                            uint8* dst_argb, int dst_width) =
51 52 53
    filtering == kFilterNone ? ScaleARGBRowDown2_C :
        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
        ScaleARGBRowDown2Box_C);
54
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
fbarchard@google.com's avatar
fbarchard@google.com committed
55
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
56
      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
57
      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
58 59 60
    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
        ScaleARGBRowDown2Box_SSE2);
61
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
62 63
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
64
      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
65
    ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
fbarchard@google.com's avatar
fbarchard@google.com committed
66 67
        ScaleARGBRowDown2_NEON;
  }
68 69
#endif

70 71 72
  if (filtering == kFilterLinear) {
    src_stride = 0;
  }
73
  for (int y = 0; y < dst_height; ++y) {
74
    ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
75
    src_argb += row_stride;
76
    dst_argb += dst_stride;
77 78 79
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
80 81 82
// ScaleARGB ARGB Even
// This is an optimized version for scaling down a ARGB to even
// multiple of its original size.
fbarchard@google.com's avatar
fbarchard@google.com committed
83 84 85
static void ScaleARGBDownEven(int src_width, int src_height,
                              int dst_width, int dst_height,
                              int src_stride, int dst_stride,
86
                              const uint8* src_argb, uint8* dst_argb,
fbarchard@google.com's avatar
fbarchard@google.com committed
87
                              int x, int dx, int y, int dy,
fbarchard@google.com's avatar
fbarchard@google.com committed
88 89 90
                              FilterMode filtering) {
  assert(IS_ALIGNED(src_width, 2));
  assert(IS_ALIGNED(src_height, 2));
91 92 93
  int col_step = dx >> 16;
  int row_stride = (dy >> 16) * src_stride;
  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
94 95
  void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
                               int src_step, uint8* dst_argb, int dst_width) =
96
      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
fbarchard@google.com's avatar
fbarchard@google.com committed
97
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
98 99
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
100
    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
fbarchard@google.com's avatar
fbarchard@google.com committed
101 102
        ScaleARGBRowDownEven_SSE2;
  }
103 104 105
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
      IS_ALIGNED(src_argb, 4)) {
106
    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
107 108
        ScaleARGBRowDownEven_NEON;
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
109
#endif
110

111 112 113
  if (filtering == kFilterLinear) {
    src_stride = 0;
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
114
  for (int y = 0; y < dst_height; ++y) {
fbarchard@google.com's avatar
fbarchard@google.com committed
115
    ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
116 117
    src_argb += row_stride;
    dst_argb += dst_stride;
fbarchard@google.com's avatar
fbarchard@google.com committed
118 119
  }
}
fbarchard@google.com's avatar
fbarchard@google.com committed
120

121
// Scale ARGB down with bilinear interpolation.
122
SAFEBUFFERS
123
static void ScaleARGBBilinearDown(int src_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
124 125
                                  int dst_width, int dst_height,
                                  int src_stride, int dst_stride,
fbarchard@google.com's avatar
fbarchard@google.com committed
126
                                  const uint8* src_argb, uint8* dst_argb,
127 128
                                  int x, int dx, int y, int dy,
                                  FilterMode filtering) {
fbarchard@google.com's avatar
fbarchard@google.com committed
129
  assert(src_height > 0);
130 131
  assert(dst_width > 0);
  assert(dst_height > 0);
fbarchard@google.com's avatar
fbarchard@google.com committed
132 133 134
  int xlast = x + (dst_width - 1) * dx;
  int xl = (dx >= 0) ? x : xlast;
  int xr = (dx >= 0) ? xlast : x;
135 136
  xl = (xl >> 16) & ~3;  // Left edge aligned.
  xr = (xr >> 16) + 1;  // Right most pixel used.
137
  int clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4;  // Width aligned to 4.
138 139
  src_argb += xl * 4;
  x -= (xl << 16);
140 141
  assert(clip_src_width <= kMaxStride);
  // TODO(fbarchard): Remove clip_src_width alignment checks.
142
  SIMD_ALIGNED(uint8 row[kMaxStride + 16]);
143
  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
144
      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
145 146 147 148 149 150
      InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
    InterpolateRow = InterpolateRow_Any_SSE2;
    if (IS_ALIGNED(clip_src_width, 16)) {
      InterpolateRow = InterpolateRow_Unaligned_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
151
      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
152
        InterpolateRow = InterpolateRow_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
153 154
      }
    }
155 156
  }
#endif
157 158 159 160 161
#if defined(HAS_INTERPOLATEROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
    InterpolateRow = InterpolateRow_Any_SSSE3;
    if (IS_ALIGNED(clip_src_width, 16)) {
      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
162
      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
163
        InterpolateRow = InterpolateRow_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
164 165 166 167
      }
    }
  }
#endif
168 169 170 171 172 173 174 175
#if defined(HAS_INTERPOLATEROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
    InterpolateRow = InterpolateRow_Any_AVX2;
    if (IS_ALIGNED(clip_src_width, 32)) {
      InterpolateRow = InterpolateRow_AVX2;
    }
  }
#endif
176 177 178 179 180 181 182 183 184 185 186 187
#if defined(HAS_INTERPOLATEROW_NEON)
  if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
    InterpolateRow = InterpolateRow_Any_NEON;
    if (IS_ALIGNED(clip_src_width, 16)) {
      InterpolateRow = InterpolateRow_NEON;
    }
  }
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
fbarchard@google.com's avatar
fbarchard@google.com committed
188
    if (IS_ALIGNED(clip_src_width, 4)) {
189
      InterpolateRow = InterpolateRow_MIPS_DSPR2;
fbarchard@google.com's avatar
fbarchard@google.com committed
190
    }
191
  }
192
#endif
193 194
  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
      int dst_width, int x, int dx) = ScaleARGBFilterCols_C;
195 196 197 198 199
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  }
#endif
200
  const int max_y = (src_height - 1) << 16;
201
  for (int j = 0; j < dst_height; ++j) {
202 203
    if (y > max_y) {
      y = max_y;
204
    }
205
    int yi = y >> 16;
206
    const uint8* src = src_argb + yi * src_stride;
207 208 209 210 211 212 213
    if (filtering == kFilterLinear) {
      ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
    } else {
      int yf = (y >> 8) & 255;
      InterpolateRow(row, src, src_stride, clip_src_width, yf);
      ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
    }
214
    dst_argb += dst_stride;
215 216 217 218
    y += dy;
  }
}

219
// Scale ARGB up with bilinear interpolation.
220
SAFEBUFFERS
fbarchard@google.com's avatar
fbarchard@google.com committed
221 222 223
static void ScaleARGBBilinearUp(int src_width, int src_height,
                                int dst_width, int dst_height,
                                int src_stride, int dst_stride,
fbarchard@google.com's avatar
fbarchard@google.com committed
224
                                const uint8* src_argb, uint8* dst_argb,
225 226
                                int x, int dx, int y, int dy,
                                FilterMode filtering) {
fbarchard@google.com's avatar
fbarchard@google.com committed
227 228
  assert(src_width > 0);
  assert(src_height > 0);
fbarchard@google.com's avatar
fbarchard@google.com committed
229 230 231
  assert(dst_width > 0);
  assert(dst_height > 0);
  assert(dst_width * 4 <= kMaxStride);
232
  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
fbarchard@google.com's avatar
fbarchard@google.com committed
233
      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
234 235
      InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
fbarchard@google.com's avatar
fbarchard@google.com committed
236
  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
237
    InterpolateRow = InterpolateRow_Any_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
238
    if (IS_ALIGNED(dst_width, 4)) {
239
      InterpolateRow = InterpolateRow_Unaligned_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
240
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
241
        InterpolateRow = InterpolateRow_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
242 243 244 245
      }
    }
  }
#endif
246
#if defined(HAS_INTERPOLATEROW_SSSE3)
fbarchard@google.com's avatar
fbarchard@google.com committed
247
  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
248
    InterpolateRow = InterpolateRow_Any_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
249
    if (IS_ALIGNED(dst_width, 4)) {
250
      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
251
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
252
        InterpolateRow = InterpolateRow_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
253 254 255 256
      }
    }
  }
#endif
257 258 259 260 261 262 263 264
#if defined(HAS_INTERPOLATEROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
    InterpolateRow = InterpolateRow_Any_AVX2;
    if (IS_ALIGNED(dst_width, 8)) {
      InterpolateRow = InterpolateRow_AVX2;
    }
  }
#endif
265
#if defined(HAS_INTERPOLATEROW_NEON)
fbarchard@google.com's avatar
fbarchard@google.com committed
266
  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
267
    InterpolateRow = InterpolateRow_Any_NEON;
fbarchard@google.com's avatar
fbarchard@google.com committed
268
    if (IS_ALIGNED(dst_width, 4)) {
269
      InterpolateRow = InterpolateRow_NEON;
fbarchard@google.com's avatar
fbarchard@google.com committed
270 271
    }
  }
272 273 274 275 276 277
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
    InterpolateRow = InterpolateRow_MIPS_DSPR2;
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
278 279
#endif
  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
280 281
      int dst_width, int x, int dx) =
      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
fbarchard@google.com's avatar
fbarchard@google.com committed
282
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
283
  if (filtering && TestCpuFlag(kCpuHasSSSE3)) {
fbarchard@google.com's avatar
fbarchard@google.com committed
284 285 286
    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  }
#endif
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
#if defined(HAS_SCALEARGBCOLS_SSE2)
  if (!filtering && TestCpuFlag(kCpuHasSSE2)) {
    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  }
#endif
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    }
#endif
  }

  const int max_y = (src_height - 1) << 16;
304 305
  if (y > max_y) {
    y = max_y;
fbarchard@google.com's avatar
fbarchard@google.com committed
306 307 308 309 310 311
  }
  int yi = y >> 16;
  const uint8* src = src_argb + yi * src_stride;
  SIMD_ALIGNED(uint8 row[2 * kMaxStride]);
  uint8* rowptr = row;
  int rowstride = kMaxStride;
fbarchard@google.com's avatar
fbarchard@google.com committed
312
  int lasty = yi;
fbarchard@google.com's avatar
fbarchard@google.com committed
313 314 315 316 317 318 319 320 321 322 323

  ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
  if (src_height > 1) {
    src += src_stride;
  }
  ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
  src += src_stride;

  for (int j = 0; j < dst_height; ++j) {
    yi = y >> 16;
    if (yi != lasty) {
324 325 326 327 328
      if (y > max_y) {
        y = max_y;
        yi = y >> 16;
      }
      if (yi != lasty) {
fbarchard@google.com's avatar
fbarchard@google.com committed
329 330 331 332 333 334 335
        ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
        rowptr += rowstride;
        rowstride = -rowstride;
        lasty = yi;
        src += src_stride;
      }
    }
336 337 338 339 340 341
    if (filtering == kFilterLinear) {
      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    } else {
      int yf = (y >> 8) & 255;
      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    }
fbarchard@google.com's avatar
fbarchard@google.com committed
342 343 344 345 346
    dst_argb += dst_stride;
    y += dy;
  }
}

347
#ifdef YUVSCALEUP
348
// Scale YUV to ARGB up with bilinear interpolation.
349
SAFEBUFFERS
350 351 352 353 354 355 356 357 358 359
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
                                     int dst_width, int dst_height,
                                     int src_stride_y,
                                     int src_stride_u,
                                     int src_stride_v,
                                     int dst_stride_argb,
                                     const uint8* src_y,
                                     const uint8* src_u,
                                     const uint8* src_v,
                                     uint8* dst_argb,
360 361
                                     int x, int dx, int y, int dy,
                                     FilterMode filtering) {
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
  assert(src_width > 0);
  assert(src_height > 0);
  assert(dst_width > 0);
  assert(dst_height > 0);
  assert(dst_width * 4 <= kMaxStride);

  void (*I422ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
                        int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(src_width, 8)) {
      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
        I422ToARGBRow = I422ToARGBRow_SSSE3;
      }
    }
  }
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(src_width, 16)) {
      I422ToARGBRow = I422ToARGBRow_AVX2;
    }
  }
#endif
#if defined(HAS_I422TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(src_width, 8)) {
      I422ToARGBRow = I422ToARGBRow_NEON;
    }
  }
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
  }
#endif

  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
      InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
    InterpolateRow = InterpolateRow_Any_SSE2;
    if (IS_ALIGNED(dst_width, 4)) {
      InterpolateRow = InterpolateRow_Unaligned_SSE2;
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
        InterpolateRow = InterpolateRow_SSE2;
      }
    }
  }
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
    InterpolateRow = InterpolateRow_Any_SSSE3;
    if (IS_ALIGNED(dst_width, 4)) {
      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
        InterpolateRow = InterpolateRow_SSSE3;
      }
    }
  }
#endif
435 436 437 438 439 440 441 442
#if defined(HAS_INTERPOLATEROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
    InterpolateRow = InterpolateRow_Any_AVX2;
    if (IS_ALIGNED(dst_width, 8)) {
      InterpolateRow = InterpolateRow_AVX2;
    }
  }
#endif
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
#if defined(HAS_INTERPOLATEROW_NEON)
  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
    InterpolateRow = InterpolateRow_Any_NEON;
    if (IS_ALIGNED(dst_width, 4)) {
      InterpolateRow = InterpolateRow_NEON;
    }
  }
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    InterpolateRow = InterpolateRow_MIPS_DSPR2;
  }
#endif
  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
      int dst_width, int x, int dx) = ScaleARGBFilterCols_C;
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  }
#endif
464
  const int max_y = (src_height - 1) << 16;
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
  if (y > max_y) {
    y = max_y;
  }
  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
  int yi = y >> 16;
  int uv_yi = yi >> kYShift;
  const uint8* src_row_y = src_y + yi * src_stride_y;
  const uint8* src_row_u = src_u + uv_yi * src_stride_u;
  const uint8* src_row_v = src_v + uv_yi * src_stride_v;
  SIMD_ALIGNED(uint8 row[2 * kMaxStride]);
  SIMD_ALIGNED(uint8 argb_row[kMaxStride * 4]);
  uint8* rowptr = row;
  int rowstride = kMaxStride;
  int lasty = yi;

  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
  if (src_height > 1) {
    src_row_y += src_stride_y;
    if (yi & 1) {
      src_row_u += src_stride_u;
      src_row_v += src_stride_v;
    }
  }
  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
  if (src_height > 2) {
    src_row_y += src_stride_y;
    if (!(yi & 1)) {
      src_row_u += src_stride_u;
      src_row_v += src_stride_v;
    }
  }

  for (int j = 0; j < dst_height; ++j) {
    yi = y >> 16;
    if (yi != lasty) {
500 501 502 503 504
      if (y > max_y) {
        y = max_y;
        yi = y >> 16;
      }
      if (yi != lasty) {
505 506 507 508 509 510 511 512 513 514 515 516 517
        // TODO(fbarchard): Convert the clipped region of row.
        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
        rowptr += rowstride;
        rowstride = -rowstride;
        lasty = yi;
        src_row_y += src_stride_y;
        if (yi & 1) {
          src_row_u += src_stride_u;
          src_row_v += src_stride_v;
        }
      }
    }
518 519 520 521 522 523
    if (filtering == kFilterLinear) {
      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    } else {
      int yf = (y >> 8) & 255;
      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    }
524 525 526 527
    dst_argb += dst_stride_argb;
    y += dy;
  }
}
528
#endif
529

530
// Scale ARGB to/from any dimensions, without interpolation.
fbarchard@google.com's avatar
fbarchard@google.com committed
531 532 533
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
534 535 536 537

static void ScaleARGBSimple(int src_width, int src_height,
                            int dst_width, int dst_height,
                            int src_stride, int dst_stride,
fbarchard@google.com's avatar
fbarchard@google.com committed
538 539
                            const uint8* src_argb, uint8* dst_argb,
                            int x, int dx, int y, int dy) {
540 541 542 543 544
  void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
      int dst_width, int x, int dx) = ScaleARGBCols_C;
#if defined(HAS_SCALEARGBCOLS_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    ScaleARGBCols = ScaleARGBCols_SSE2;
545 546 547 548 549 550
  }
#endif
  if (src_width * 2 == dst_width && x < 0x8000) {
    ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
551 552 553 554
        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
      ScaleARGBCols = ScaleARGBColsUp2_SSE2;
    }
555
#endif
556
  }
557

558
  for (int i = 0; i < dst_height; ++i) {
559 560
    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
                  dst_width, x, dx);
561
    dst_argb += dst_stride;
562 563 564 565 566 567 568 569 570 571 572
    y += dy;
  }
}

// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
static void ScaleARGB(const uint8* src, int src_stride,
                      int src_width, int src_height,
                      uint8* dst, int dst_stride,
                      int dst_width, int dst_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
573
                      int clip_x, int clip_y, int clip_width, int clip_height,
574
                      FilterMode filtering) {
fbarchard@google.com's avatar
fbarchard@google.com committed
575 576 577 578 579 580 581 582 583 584 585 586 587 588
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
    src = src + (src_height - 1) * src_stride;
    src_stride = -src_stride;
  }
  // Initial source x/y coordinate and step values as 16.16 fixed point.
  int dx = 0;
  int dy = 0;
  int x = 0;
  int y = 0;
  if (filtering) {
    // Scale step for bilinear sampling renders last pixel once for upsample.
    if (dst_width <= Abs(src_width)) {
589
      dx = FixedDiv(Abs(src_width), dst_width);
fbarchard@google.com's avatar
fbarchard@google.com committed
590 591
      x = (dx >> 1) - 32768;
    } else if (dst_width > 1) {
592
      dx = FixedDiv(Abs(src_width) - 1, dst_width - 1);
fbarchard@google.com's avatar
fbarchard@google.com committed
593 594
    }
    if (dst_height <= src_height) {
595
      dy = FixedDiv(src_height,  dst_height);
fbarchard@google.com's avatar
fbarchard@google.com committed
596 597
      y = (dy >> 1) - 32768;
    } else if (dst_height > 1) {
598
      dy = FixedDiv(src_height - 1, dst_height - 1);
fbarchard@google.com's avatar
fbarchard@google.com committed
599 600 601
    }
  } else {
    // Scale step for point sampling duplicates all pixels equally.
602 603
    dx = FixedDiv(Abs(src_width), dst_width);
    dy = FixedDiv(src_height, dst_height);
604 605
    x = dx >> 1;
    y = dy >> 1;
fbarchard@google.com's avatar
fbarchard@google.com committed
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
  }
  // Negative src_width means horizontally mirror.
  if (src_width < 0) {
    x += (dst_width - 1) * dx;
    dx = -dx;
    src_width = -src_width;
  }
  if (clip_x) {
    x += clip_x * dx;
    dst += clip_x * 4;
  }
  if (clip_y) {
    y += clip_y * dy;
    dst += clip_y * dst_stride;
  }

622
  // Special case for integer step values.
623
  if (((dx | dy) & 0xffff) == 0) {
624
    if (!dx || !dy) {  // 1 pixel wide and/or tall.
fbarchard@google.com's avatar
fbarchard@google.com committed
625
      filtering = kFilterNone;
626 627 628 629 630
    } else {
      // Optimized even scale down. ie 2, 4, 6, 8, 10x.
      if (!(dx & 0x10000) && !(dy & 0x10000)) {
        if ((dx >> 16) == 2) {
          // Optimized 1/2 horizontal.
631 632
          ScaleARGBDown2(src_width, src_height,
                         clip_width, clip_height,
633 634 635 636
                         src_stride, dst_stride, src, dst,
                         x, dx, y, dy, filtering);
          return;
        }
637 638
        ScaleARGBDownEven(src_width, src_height,
                          clip_width, clip_height,
639 640
                          src_stride, dst_stride, src, dst,
                          x, dx, y, dy, filtering);
641 642
        return;
      }
643 644 645
      // Optimized odd scale down. ie 3, 5, 7, 9x.
      if ((dx & 0x10000) && (dy & 0x10000)) {
        filtering = kFilterNone;
646
        if (dx == 0x10000 && dy == 0x10000) {
647 648 649 650 651 652
          // Straight copy.
          ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
                   dst, dst_stride, clip_width, clip_height);
          return;
        }
      }
fbarchard@google.com's avatar
fbarchard@google.com committed
653 654
    }
  }
655
  if (dx == 0x10000 && (x & 0xffff) == 0) {
656
    // Arbitrary scale vertically, but unscaled vertically.
657
    ScalePlaneVertical(src_height,
658 659 660
                       clip_width, clip_height,
                       src_stride, dst_stride, src, dst,
                       x, y, dy, 4, filtering);
661 662
    return;
  }
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
  if (filtering && dy < 65536 && dst_width * 4 <= kMaxStride) {
    ScaleARGBBilinearUp(src_width, src_height,
                        clip_width, clip_height,
                        src_stride, dst_stride, src, dst,
                        x, dx, y, dy, filtering);
    return;
  }
  if (filtering && src_width * 4 < kMaxStride) {
    ScaleARGBBilinearDown(src_height,
                          clip_width, clip_height,
                          src_stride, dst_stride, src, dst,
                          x, dx, y, dy, filtering);
    return;
  }
  ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
                  src_stride, dst_stride, src, dst,
                  x, dx, y, dy);
fbarchard@google.com's avatar
fbarchard@google.com committed
680
}
681

fbarchard@google.com's avatar
fbarchard@google.com committed
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
                  int src_width, int src_height,
                  uint8* dst_argb, int dst_stride_argb,
                  int dst_width, int dst_height,
                  int clip_x, int clip_y, int clip_width, int clip_height,
                  enum FilterMode filtering) {
  if (!src_argb || src_width == 0 || src_height == 0 ||
      !dst_argb || dst_width <= 0 || dst_height <= 0 ||
      clip_x < 0 || clip_y < 0 ||
      (clip_x + clip_width) > dst_width ||
      (clip_y + clip_height) > dst_height) {
    return -1;
  }
  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
            dst_argb, dst_stride_argb, dst_width, dst_height,
            clip_x, clip_y, clip_width, clip_height, filtering);
  return 0;
700 701
}

fbarchard@google.com's avatar
fbarchard@google.com committed
702
// Scale an ARGB image.
703
LIBYUV_API
704
int ARGBScale(const uint8* src_argb, int src_stride_argb,
705 706 707 708
              int src_width, int src_height,
              uint8* dst_argb, int dst_stride_argb,
              int dst_width, int dst_height,
              FilterMode filtering) {
709
  if (!src_argb || src_width == 0 || src_height == 0 ||
710
      !dst_argb || dst_width <= 0 || dst_height <= 0) {
711 712 713 714
    return -1;
  }
  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
            dst_argb, dst_stride_argb, dst_width, dst_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
715
            0, 0, dst_width, dst_height, filtering);
716 717 718 719 720 721 722
  return 0;
}

#ifdef __cplusplus
}  // extern "C"
}  // namespace libyuv
#endif