scale_argb.cc 27.9 KB
Newer Older
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 4 5 6
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8 9 10 11 12 13 14 15 16 17
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "libyuv/scale.h"

#include <assert.h>
#include <string.h>

#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"  // For CopyARGB
18
#include "libyuv/row.h"
19
#include "libyuv/scale_row.h"
20 21 22 23 24 25

#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif

26 27 28 29
static __inline int Abs(int v) {
  return v >= 0 ? v : -v;
}

fbarchard@google.com's avatar
fbarchard@google.com committed
30 31 32
// ScaleARGB ARGB, 1/2
// This is an optimized version for scaling down a ARGB to 1/2 of
// its original size.
33
static void ScaleARGBDown2(int src_width, int src_height,
34 35
                           int dst_width, int dst_height,
                           int src_stride, int dst_stride,
36
                           const uint8* src_argb, uint8* dst_argb,
fbarchard@google.com's avatar
fbarchard@google.com committed
37
                           int x, int dx, int y, int dy,
38
                           enum FilterMode filtering) {
39 40 41 42 43 44 45
  int j;
  int row_stride = src_stride * (dy >> 16);
  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
                            uint8* dst_argb, int dst_width) =
    filtering == kFilterNone ? ScaleARGBRowDown2_C :
        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
        ScaleARGBRowDown2Box_C);
fbarchard@google.com's avatar
fbarchard@google.com committed
46
  assert(dx == 65536 * 2);  // Test scale factor of 2.
47
  assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
48
  // Advance to odd row, even column.
fbarchard@google.com's avatar
fbarchard@google.com committed
49
  if (filtering == kFilterBilinear) {
50 51 52 53
    src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  } else {
    src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
  }
54

55
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
fbarchard@google.com's avatar
fbarchard@google.com committed
56
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
57
      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
58
      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
59 60 61
    ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
        (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
        ScaleARGBRowDown2Box_SSE2);
62
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
63 64
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
65
      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
66
    ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
fbarchard@google.com's avatar
fbarchard@google.com committed
67 68
        ScaleARGBRowDown2_NEON;
  }
69 70
#endif

71 72 73
  if (filtering == kFilterLinear) {
    src_stride = 0;
  }
74
  for (j = 0; j < dst_height; ++j) {
75
    ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
76
    src_argb += row_stride;
77
    dst_argb += dst_stride;
78 79 80
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
81 82 83
// ScaleARGB ARGB, 1/4
// This is an optimized version for scaling down a ARGB to 1/4 of
// its original size.
84
static void ScaleARGBDown4Box(int src_width, int src_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
85 86 87 88
                              int dst_width, int dst_height,
                              int src_stride, int dst_stride,
                              const uint8* src_argb, uint8* dst_argb,
                              int x, int dx, int y, int dy) {
89 90 91 92
  int j;
  // Allocate 2 rows of ARGB.
  const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
  align_buffer_64(row, kRowSize * 2);
fbarchard@google.com's avatar
fbarchard@google.com committed
93 94 95
  int row_stride = src_stride * (dy >> 16);
  void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
    uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
96 97 98 99
  // Advance to odd row, even column.
  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
  assert(dx == 65536 * 4);  // Test scale factor of 4.
  assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
fbarchard@google.com's avatar
fbarchard@google.com committed
100 101 102 103 104 105 106 107 108 109 110 111
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
  }
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
    ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
  }
#endif
112
  for (j = 0; j < dst_height; ++j) {
fbarchard@google.com's avatar
fbarchard@google.com committed
113 114
    ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
    ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
115 116
                      row + kRowSize, dst_width * 2);
    ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
fbarchard@google.com's avatar
fbarchard@google.com committed
117 118 119
    src_argb += row_stride;
    dst_argb += dst_stride;
  }
120
  free_aligned_buffer_64(row);
fbarchard@google.com's avatar
fbarchard@google.com committed
121 122
}

fbarchard@google.com's avatar
fbarchard@google.com committed
123 124 125
// ScaleARGB ARGB Even
// This is an optimized version for scaling down a ARGB to even
// multiple of its original size.
fbarchard@google.com's avatar
fbarchard@google.com committed
126 127 128
static void ScaleARGBDownEven(int src_width, int src_height,
                              int dst_width, int dst_height,
                              int src_stride, int dst_stride,
129
                              const uint8* src_argb, uint8* dst_argb,
fbarchard@google.com's avatar
fbarchard@google.com committed
130
                              int x, int dx, int y, int dy,
131
                              enum FilterMode filtering) {
132
  int j;
133 134
  int col_step = dx >> 16;
  int row_stride = (dy >> 16) * src_stride;
135 136
  void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
                               int src_step, uint8* dst_argb, int dst_width) =
137
      filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
138 139 140
  assert(IS_ALIGNED(src_width, 2));
  assert(IS_ALIGNED(src_height, 2));
  src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
fbarchard@google.com's avatar
fbarchard@google.com committed
141
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
142 143
  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
144
    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
fbarchard@google.com's avatar
fbarchard@google.com committed
145 146
        ScaleARGBRowDownEven_SSE2;
  }
147 148 149
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
      IS_ALIGNED(src_argb, 4)) {
150
    ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
151 152
        ScaleARGBRowDownEven_NEON;
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
153
#endif
154

155 156 157
  if (filtering == kFilterLinear) {
    src_stride = 0;
  }
158
  for (j = 0; j < dst_height; ++j) {
fbarchard@google.com's avatar
fbarchard@google.com committed
159
    ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
160 161
    src_argb += row_stride;
    dst_argb += dst_stride;
fbarchard@google.com's avatar
fbarchard@google.com committed
162 163
  }
}
fbarchard@google.com's avatar
fbarchard@google.com committed
164

165
// Scale ARGB down with bilinear interpolation.
166
static void ScaleARGBBilinearDown(int src_width, int src_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
167 168
                                  int dst_width, int dst_height,
                                  int src_stride, int dst_stride,
fbarchard@google.com's avatar
fbarchard@google.com committed
169
                                  const uint8* src_argb, uint8* dst_argb,
170
                                  int x, int dx, int y, int dy,
171
                                  enum FilterMode filtering) {
172
  int j;
173 174 175 176 177 178
  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
      InterpolateRow_C;
  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
      int dst_width, int x, int dx) =
      (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
179
  int64 xlast = x + (int64)(dst_width - 1) * dx;
180 181
  int64 xl = (dx >= 0) ? x : xlast;
  int64 xr = (dx >= 0) ? xlast : x;
182
  int clip_src_width;
183
  xl = (xl >> 16) & ~3;  // Left edge aligned.
184 185 186 187 188
  xr = (xr >> 16) + 1;  // Right most pixel used.  Bilinear uses 2 pixels.
  xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
  if (xr > src_width) {
    xr = src_width;
  }
189
  clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
190
  src_argb += xl * 4;
191
  x -= (int)(xl << 16);
192 193 194 195 196
#if defined(HAS_INTERPOLATEROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
    InterpolateRow = InterpolateRow_Any_SSE2;
    if (IS_ALIGNED(clip_src_width, 16)) {
      InterpolateRow = InterpolateRow_Unaligned_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
197
      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
198
        InterpolateRow = InterpolateRow_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
199 200
      }
    }
201 202
  }
#endif
203 204 205 206 207
#if defined(HAS_INTERPOLATEROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
    InterpolateRow = InterpolateRow_Any_SSSE3;
    if (IS_ALIGNED(clip_src_width, 16)) {
      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
208
      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
209
        InterpolateRow = InterpolateRow_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
210 211 212 213
      }
    }
  }
#endif
214 215 216 217 218 219 220 221
#if defined(HAS_INTERPOLATEROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
    InterpolateRow = InterpolateRow_Any_AVX2;
    if (IS_ALIGNED(clip_src_width, 32)) {
      InterpolateRow = InterpolateRow_AVX2;
    }
  }
#endif
222 223 224 225 226 227 228 229 230 231 232 233
#if defined(HAS_INTERPOLATEROW_NEON)
  if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
    InterpolateRow = InterpolateRow_Any_NEON;
    if (IS_ALIGNED(clip_src_width, 16)) {
      InterpolateRow = InterpolateRow_NEON;
    }
  }
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
      IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
    InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
fbarchard@google.com's avatar
fbarchard@google.com committed
234
    if (IS_ALIGNED(clip_src_width, 4)) {
235
      InterpolateRow = InterpolateRow_MIPS_DSPR2;
fbarchard@google.com's avatar
fbarchard@google.com committed
236
    }
237
  }
238
#endif
239
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
240
  if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
241 242 243
    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  }
#endif
244 245
  // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
  // Allocate a row of ARGB.
246 247
  {
    align_buffer_64(row, clip_src_width * 4);
248

249
    const int max_y = (src_height - 1) << 16;
250 251
    if (y > max_y) {
      y = max_y;
252
    }
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
    for (j = 0; j < dst_height; ++j) {
      int yi = y >> 16;
      const uint8* src = src_argb + yi * src_stride;
      if (filtering == kFilterLinear) {
        ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
      } else {
        int yf = (y >> 8) & 255;
        InterpolateRow(row, src, src_stride, clip_src_width, yf);
        ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
      }
      dst_argb += dst_stride;
      y += dy;
      if (y > max_y) {
        y = max_y;
      }
268
    }
269
    free_aligned_buffer_64(row);
270 271 272
  }
}

273
// Scale ARGB up with bilinear interpolation.
fbarchard@google.com's avatar
fbarchard@google.com committed
274 275 276
static void ScaleARGBBilinearUp(int src_width, int src_height,
                                int dst_width, int dst_height,
                                int src_stride, int dst_stride,
fbarchard@google.com's avatar
fbarchard@google.com committed
277
                                const uint8* src_argb, uint8* dst_argb,
278
                                int x, int dx, int y, int dy,
279
                                enum FilterMode filtering) {
280
  int j;
281
  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
fbarchard@google.com's avatar
fbarchard@google.com committed
282
      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
283
      InterpolateRow_C;
284 285 286
  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
      int dst_width, int x, int dx) =
      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
287
  const int max_y = (src_height - 1) << 16;
288
#if defined(HAS_INTERPOLATEROW_SSE2)
fbarchard@google.com's avatar
fbarchard@google.com committed
289
  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
290
    InterpolateRow = InterpolateRow_Any_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
291
    if (IS_ALIGNED(dst_width, 4)) {
292
      InterpolateRow = InterpolateRow_Unaligned_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
293
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
294
        InterpolateRow = InterpolateRow_SSE2;
fbarchard@google.com's avatar
fbarchard@google.com committed
295 296 297 298
      }
    }
  }
#endif
299
#if defined(HAS_INTERPOLATEROW_SSSE3)
fbarchard@google.com's avatar
fbarchard@google.com committed
300
  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
301
    InterpolateRow = InterpolateRow_Any_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
302
    if (IS_ALIGNED(dst_width, 4)) {
303
      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
304
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
305
        InterpolateRow = InterpolateRow_SSSE3;
fbarchard@google.com's avatar
fbarchard@google.com committed
306 307 308 309
      }
    }
  }
#endif
310 311 312 313 314 315 316 317
#if defined(HAS_INTERPOLATEROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
    InterpolateRow = InterpolateRow_Any_AVX2;
    if (IS_ALIGNED(dst_width, 8)) {
      InterpolateRow = InterpolateRow_AVX2;
    }
  }
#endif
318
#if defined(HAS_INTERPOLATEROW_NEON)
fbarchard@google.com's avatar
fbarchard@google.com committed
319
  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
320
    InterpolateRow = InterpolateRow_Any_NEON;
fbarchard@google.com's avatar
fbarchard@google.com committed
321
    if (IS_ALIGNED(dst_width, 4)) {
322
      InterpolateRow = InterpolateRow_NEON;
fbarchard@google.com's avatar
fbarchard@google.com committed
323 324
    }
  }
325 326 327 328 329 330
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
    InterpolateRow = InterpolateRow_MIPS_DSPR2;
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
331
#endif
332 333 334
  if (src_width >= 32768) {
    ScaleARGBFilterCols = filtering ?
        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
335
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
336
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
337
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
fbarchard@google.com's avatar
fbarchard@google.com committed
338 339 340
    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  }
#endif
341
#if defined(HAS_SCALEARGBCOLS_SSE2)
342
  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
343 344 345 346 347 348 349 350 351 352 353 354 355 356
    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  }
#endif
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    }
#endif
  }

357 358
  if (y > max_y) {
    y = max_y;
fbarchard@google.com's avatar
fbarchard@google.com committed
359
  }
360

361 362 363
  {
    int yi = y >> 16;
    const uint8* src = src_argb + yi * src_stride;
364

365 366 367
    // Allocate 2 rows of ARGB.
    const int kRowSize = (dst_width * 4 + 15) & ~15;
    align_buffer_64(row, kRowSize * 2);
fbarchard@google.com's avatar
fbarchard@google.com committed
368

369 370 371 372 373 374 375 376 377
    uint8* rowptr = row;
    int rowstride = kRowSize;
    int lasty = yi;

    ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
    if (src_height > 1) {
      src += src_stride;
    }
    ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
fbarchard@google.com's avatar
fbarchard@google.com committed
378 379
    src += src_stride;

380 381
    for (j = 0; j < dst_height; ++j) {
      yi = y >> 16;
382
      if (yi != lasty) {
383 384 385 386 387 388 389 390 391 392 393 394
        if (y > max_y) {
          y = max_y;
          yi = y >> 16;
          src = src_argb + yi * src_stride;
        }
        if (yi != lasty) {
          ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
          rowptr += rowstride;
          rowstride = -rowstride;
          lasty = yi;
          src += src_stride;
        }
fbarchard@google.com's avatar
fbarchard@google.com committed
395
      }
396 397 398 399 400 401 402 403
      if (filtering == kFilterLinear) {
        InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
      } else {
        int yf = (y >> 8) & 255;
        InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
      }
      dst_argb += dst_stride;
      y += dy;
fbarchard@google.com's avatar
fbarchard@google.com committed
404
    }
405
    free_aligned_buffer_64(row);
fbarchard@google.com's avatar
fbarchard@google.com committed
406 407 408
  }
}

409
#ifdef YUVSCALEUP
410 411 412 413 414 415 416 417 418 419 420
// Scale YUV to ARGB up with bilinear interpolation.
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
                                     int dst_width, int dst_height,
                                     int src_stride_y,
                                     int src_stride_u,
                                     int src_stride_v,
                                     int dst_stride_argb,
                                     const uint8* src_y,
                                     const uint8* src_u,
                                     const uint8* src_v,
                                     uint8* dst_argb,
421
                                     int x, int dx, int y, int dy,
422
                                     enum FilterMode filtering) {
423
  int j;
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
  void (*I422ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
                        int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(src_width, 8)) {
      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
        I422ToARGBRow = I422ToARGBRow_SSSE3;
      }
    }
  }
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(src_width, 16)) {
      I422ToARGBRow = I422ToARGBRow_AVX2;
    }
  }
#endif
#if defined(HAS_I422TOARGBROW_NEON)
  if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(src_width, 8)) {
      I422ToARGBRow = I422ToARGBRow_NEON;
    }
  }
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
  }
#endif

  void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
      ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
      InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
    InterpolateRow = InterpolateRow_Any_SSE2;
    if (IS_ALIGNED(dst_width, 4)) {
      InterpolateRow = InterpolateRow_Unaligned_SSE2;
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
        InterpolateRow = InterpolateRow_SSE2;
      }
    }
  }
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
    InterpolateRow = InterpolateRow_Any_SSSE3;
    if (IS_ALIGNED(dst_width, 4)) {
      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
        InterpolateRow = InterpolateRow_SSSE3;
      }
    }
  }
#endif
491 492 493 494 495 496 497 498
#if defined(HAS_INTERPOLATEROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
    InterpolateRow = InterpolateRow_Any_AVX2;
    if (IS_ALIGNED(dst_width, 8)) {
      InterpolateRow = InterpolateRow_AVX2;
    }
  }
#endif
499 500 501 502 503 504 505 506 507 508 509 510 511 512
#if defined(HAS_INTERPOLATEROW_NEON)
  if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
    InterpolateRow = InterpolateRow_Any_NEON;
    if (IS_ALIGNED(dst_width, 4)) {
      InterpolateRow = InterpolateRow_NEON;
    }
  }
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
    InterpolateRow = InterpolateRow_MIPS_DSPR2;
  }
#endif
513

514
  void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
515 516
      int dst_width, int x, int dx) =
      filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
517 518 519
  if (src_width >= 32768) {
    ScaleARGBFilterCols = filtering ?
        ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
520
  }
521
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
522
  if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
523 524 525
    ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
  }
#endif
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
#if defined(HAS_SCALEARGBCOLS_SSE2)
  if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
    ScaleARGBFilterCols = ScaleARGBCols_SSE2;
  }
#endif
  if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
    ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
      ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
    }
#endif
  }

542
  const int max_y = (src_height - 1) << 16;
543 544 545 546 547 548 549 550 551
  if (y > max_y) {
    y = max_y;
  }
  const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
  int yi = y >> 16;
  int uv_yi = yi >> kYShift;
  const uint8* src_row_y = src_y + yi * src_stride_y;
  const uint8* src_row_u = src_u + uv_yi * src_stride_u;
  const uint8* src_row_v = src_v + uv_yi * src_stride_v;
552 553 554 555 556 557 558 559

  // Allocate 2 rows of ARGB.
  const int kRowSize = (dst_width * 4 + 15) & ~15;
  align_buffer_64(row, kRowSize * 2);

  // Allocate 1 row of ARGB for source conversion.
  align_buffer_64(argb_row, src_width * 4);

560
  uint8* rowptr = row;
561
  int rowstride = kRowSize;
562 563
  int lasty = yi;

564
  // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
  ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
  if (src_height > 1) {
    src_row_y += src_stride_y;
    if (yi & 1) {
      src_row_u += src_stride_u;
      src_row_v += src_stride_v;
    }
  }
  ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
  if (src_height > 2) {
    src_row_y += src_stride_y;
    if (!(yi & 1)) {
      src_row_u += src_stride_u;
      src_row_v += src_stride_v;
    }
  }

582
  for (j = 0; j < dst_height; ++j) {
583 584
    yi = y >> 16;
    if (yi != lasty) {
585 586 587
      if (y > max_y) {
        y = max_y;
        yi = y >> 16;
588 589 590 591
        uv_yi = yi >> kYShift;
        src_row_y = src_y + yi * src_stride_y;
        src_row_u = src_u + uv_yi * src_stride_u;
        src_row_v = src_v + uv_yi * src_stride_v;
592 593
      }
      if (yi != lasty) {
594 595 596 597 598 599 600 601 602 603 604 605 606
        // TODO(fbarchard): Convert the clipped region of row.
        I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
        ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
        rowptr += rowstride;
        rowstride = -rowstride;
        lasty = yi;
        src_row_y += src_stride_y;
        if (yi & 1) {
          src_row_u += src_stride_u;
          src_row_v += src_stride_v;
        }
      }
    }
607 608 609 610 611 612
    if (filtering == kFilterLinear) {
      InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
    } else {
      int yf = (y >> 8) & 255;
      InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
    }
613 614 615
    dst_argb += dst_stride_argb;
    y += dy;
  }
616 617
  free_aligned_buffer_64(row);
  free_aligned_buffer_64(row_argb);
618
}
619
#endif
620

621
// Scale ARGB to/from any dimensions, without interpolation.
fbarchard@google.com's avatar
fbarchard@google.com committed
622 623 624
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
625 626 627 628

static void ScaleARGBSimple(int src_width, int src_height,
                            int dst_width, int dst_height,
                            int src_stride, int dst_stride,
fbarchard@google.com's avatar
fbarchard@google.com committed
629 630
                            const uint8* src_argb, uint8* dst_argb,
                            int x, int dx, int y, int dy) {
631
  int j;
632
  void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
633 634
      int dst_width, int x, int dx) =
      (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
635
#if defined(HAS_SCALEARGBCOLS_SSE2)
636
  if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
637
    ScaleARGBCols = ScaleARGBCols_SSE2;
638 639 640 641 642 643
  }
#endif
  if (src_width * 2 == dst_width && x < 0x8000) {
    ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
    if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
644 645 646 647
        IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
        IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
      ScaleARGBCols = ScaleARGBColsUp2_SSE2;
    }
648
#endif
649
  }
650

651
  for (j = 0; j < dst_height; ++j) {
652 653
    ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
                  dst_width, x, dx);
654
    dst_argb += dst_stride;
655 656 657 658 659 660 661 662 663 664 665
    y += dy;
  }
}

// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
static void ScaleARGB(const uint8* src, int src_stride,
                      int src_width, int src_height,
                      uint8* dst, int dst_stride,
                      int dst_width, int dst_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
666
                      int clip_x, int clip_y, int clip_width, int clip_height,
667
                      enum FilterMode filtering) {
668 669 670 671 672
  // Initial source x/y coordinate and step values as 16.16 fixed point.
  int x = 0;
  int y = 0;
  int dx = 0;
  int dy = 0;
673 674 675 676 677 678
  // ARGB does not support box filter yet, but allow the user to pass it.
  // Simplify filtering when possible.
  filtering = ScaleFilterReduce(src_width, src_height,
                                dst_width, dst_height,
                                filtering);

fbarchard@google.com's avatar
fbarchard@google.com committed
679 680 681 682 683 684
  // Negative src_height means invert the image.
  if (src_height < 0) {
    src_height = -src_height;
    src = src + (src_height - 1) * src_stride;
    src_stride = -src_stride;
  }
685
  ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
686
             &x, &y, &dx, &dy);
687
  src_width = Abs(src_width);
fbarchard@google.com's avatar
fbarchard@google.com committed
688
  if (clip_x) {
689
    int64 clipf = (int64)(clip_x) * dx;
690 691
    x += (clipf & 0xffff);
    src += (clipf >> 16) * 4;
fbarchard@google.com's avatar
fbarchard@google.com committed
692 693 694
    dst += clip_x * 4;
  }
  if (clip_y) {
695
    int64 clipf = (int64)(clip_y) * dy;
696 697
    y += (clipf & 0xffff);
    src += (clipf >> 16) * src_stride;
fbarchard@google.com's avatar
fbarchard@google.com committed
698 699 700
    dst += clip_y * dst_stride;
  }

701
  // Special case for integer step values.
702
  if (((dx | dy) & 0xffff) == 0) {
703
    if (!dx || !dy) {  // 1 pixel wide and/or tall.
fbarchard@google.com's avatar
fbarchard@google.com committed
704
      filtering = kFilterNone;
705 706 707
    } else {
      // Optimized even scale down. ie 2, 4, 6, 8, 10x.
      if (!(dx & 0x10000) && !(dy & 0x10000)) {
fbarchard@google.com's avatar
fbarchard@google.com committed
708 709
        if (dx == 0x20000) {
          // Optimized 1/2 downsample.
710 711
          ScaleARGBDown2(src_width, src_height,
                         clip_width, clip_height,
712 713 714 715
                         src_stride, dst_stride, src, dst,
                         x, dx, y, dy, filtering);
          return;
        }
fbarchard@google.com's avatar
fbarchard@google.com committed
716 717 718 719 720 721 722 723
        if (dx == 0x40000 && filtering == kFilterBox) {
          // Optimized 1/4 box downsample.
          ScaleARGBDown4Box(src_width, src_height,
                            clip_width, clip_height,
                            src_stride, dst_stride, src, dst,
                            x, dx, y, dy);
          return;
        }
724 725
        ScaleARGBDownEven(src_width, src_height,
                          clip_width, clip_height,
726 727
                          src_stride, dst_stride, src, dst,
                          x, dx, y, dy, filtering);
728 729
        return;
      }
730 731 732
      // Optimized odd scale down. ie 3, 5, 7, 9x.
      if ((dx & 0x10000) && (dy & 0x10000)) {
        filtering = kFilterNone;
733
        if (dx == 0x10000 && dy == 0x10000) {
734 735 736 737 738 739
          // Straight copy.
          ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
                   dst, dst_stride, clip_width, clip_height);
          return;
        }
      }
fbarchard@google.com's avatar
fbarchard@google.com committed
740 741
    }
  }
742
  if (dx == 0x10000 && (x & 0xffff) == 0) {
743
    // Arbitrary scale vertically, but unscaled vertically.
744
    ScalePlaneVertical(src_height,
745 746 747
                       clip_width, clip_height,
                       src_stride, dst_stride, src, dst,
                       x, y, dy, 4, filtering);
748 749
    return;
  }
750
  if (filtering && dy < 65536) {
751 752 753 754 755 756
    ScaleARGBBilinearUp(src_width, src_height,
                        clip_width, clip_height,
                        src_stride, dst_stride, src, dst,
                        x, dx, y, dy, filtering);
    return;
  }
757
  if (filtering) {
758
    ScaleARGBBilinearDown(src_width, src_height,
759 760 761 762 763 764 765 766
                          clip_width, clip_height,
                          src_stride, dst_stride, src, dst,
                          x, dx, y, dy, filtering);
    return;
  }
  ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
                  src_stride, dst_stride, src, dst,
                  x, dx, y, dy);
fbarchard@google.com's avatar
fbarchard@google.com committed
767
}
768

fbarchard@google.com's avatar
fbarchard@google.com committed
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
                  int src_width, int src_height,
                  uint8* dst_argb, int dst_stride_argb,
                  int dst_width, int dst_height,
                  int clip_x, int clip_y, int clip_width, int clip_height,
                  enum FilterMode filtering) {
  if (!src_argb || src_width == 0 || src_height == 0 ||
      !dst_argb || dst_width <= 0 || dst_height <= 0 ||
      clip_x < 0 || clip_y < 0 ||
      (clip_x + clip_width) > dst_width ||
      (clip_y + clip_height) > dst_height) {
    return -1;
  }
  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
            dst_argb, dst_stride_argb, dst_width, dst_height,
            clip_x, clip_y, clip_width, clip_height, filtering);
  return 0;
787 788
}

fbarchard@google.com's avatar
fbarchard@google.com committed
789
// Scale an ARGB image.
790
LIBYUV_API
791
int ARGBScale(const uint8* src_argb, int src_stride_argb,
792 793 794
              int src_width, int src_height,
              uint8* dst_argb, int dst_stride_argb,
              int dst_width, int dst_height,
795
              enum FilterMode filtering) {
796
  if (!src_argb || src_width == 0 || src_height == 0 ||
797
      !dst_argb || dst_width <= 0 || dst_height <= 0) {
798 799 800 801
    return -1;
  }
  ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
            dst_argb, dst_stride_argb, dst_width, dst_height,
fbarchard@google.com's avatar
fbarchard@google.com committed
802
            0, 0, dst_width, dst_height, filtering);
803 804 805 806 807 808 809
  return 0;
}

#ifdef __cplusplus
}  // extern "C"
}  // namespace libyuv
#endif