row_common.cc 71.6 KB
Newer Older
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 4 5 6
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8 9 10
 *  be found in the AUTHORS file in the root of the source tree.
 */

11
#include "libyuv/row.h"
12

13
#include <string.h>  // For memcpy and memset.
frkoenig@google.com's avatar
frkoenig@google.com committed
14

15 16
#include "libyuv/basic_types.h"

17 18
#ifdef __cplusplus
namespace libyuv {
19
extern "C" {
20
#endif
21

22 23 24 25 26 27 28 29 30 31 32 33 34 35
// llvm x86 is poor at ternary operator, so use branchless min/max.

#define USE_BRANCHLESS 1
#if USE_BRANCHLESS
static __inline int32 clamp0(int32 v) {
  return ((-(v) >> 31) & (v));
}

static __inline int32 clamp255(int32 v) {
  return (((255 - (v)) >> 31) | (v)) & 255;
}

static __inline uint32 Clamp(int32 val) {
  int v = clamp0(val);
36
  return (uint32)(clamp255(v));
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
}

static __inline uint32 Abs(int32 v) {
  int m = v >> 31;
  return (v + m) ^ m;
}
#else  // USE_BRANCHLESS
static __inline int32 clamp0(int32 v) {
  return (v < 0) ? 0 : v;
}

static __inline int32 clamp255(int32 v) {
  return (v > 255) ? 255 : v;
}

static __inline uint32 Clamp(int32 val) {
  int v = clamp0(val);
54
  return (uint32)(clamp255(v));
55 56 57 58 59 60 61
}

static __inline uint32 Abs(int32 v) {
  return (v < 0) ? -v : v;
}
#endif  // USE_BRANCHLESS

62
#ifdef LIBYUV_LITTLE_ENDIAN
63
#define WRITEWORD(p, v) *(uint32*)(p) = v
64 65 66 67 68 69 70 71 72
#else
static inline void WRITEWORD(uint8* p, uint32 v) {
  p[0] = (uint8)(v & 255);
  p[1] = (uint8)((v >> 8) & 255);
  p[2] = (uint8)((v >> 16) & 255);
  p[3] = (uint8)((v >> 24) & 255);
}
#endif

73
void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74 75
  int x;
  for (x = 0; x < width; ++x) {
76 77 78
    uint8 b = src_rgb24[0];
    uint8 g = src_rgb24[1];
    uint8 r = src_rgb24[2];
79 80 81 82 83
    dst_argb[0] = b;
    dst_argb[1] = g;
    dst_argb[2] = r;
    dst_argb[3] = 255u;
    dst_argb += 4;
84
    src_rgb24 += 3;
85 86 87
  }
}

88
void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89 90
  int x;
  for (x = 0; x < width; ++x) {
91 92 93
    uint8 r = src_raw[0];
    uint8 g = src_raw[1];
    uint8 b = src_raw[2];
94 95 96 97 98
    dst_argb[0] = b;
    dst_argb[1] = g;
    dst_argb[2] = r;
    dst_argb[3] = 255u;
    dst_argb += 4;
99
    src_raw += 3;
100 101 102
  }
}

103
void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
104 105
  int x;
  for (x = 0; x < width; ++x) {
106 107 108
    uint8 b = src_rgb565[0] & 0x1f;
    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r = src_rgb565[1] >> 3;
109 110 111 112 113
    dst_argb[0] = (b << 3) | (b >> 2);
    dst_argb[1] = (g << 2) | (g >> 4);
    dst_argb[2] = (r << 3) | (r >> 2);
    dst_argb[3] = 255u;
    dst_argb += 4;
114
    src_rgb565 += 2;
115 116 117
  }
}

118 119
void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
                         int width) {
120 121
  int x;
  for (x = 0; x < width; ++x) {
122 123 124 125
    uint8 b = src_argb1555[0] & 0x1f;
    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
    uint8 a = src_argb1555[1] >> 7;
126 127 128
    dst_argb[0] = (b << 3) | (b >> 2);
    dst_argb[1] = (g << 3) | (g >> 2);
    dst_argb[2] = (r << 3) | (r >> 2);
129
    dst_argb[3] = -a;
130
    dst_argb += 4;
131
    src_argb1555 += 2;
132 133 134
  }
}

135 136
void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
                         int width) {
137 138
  int x;
  for (x = 0; x < width; ++x) {
139 140 141 142
    uint8 b = src_argb4444[0] & 0x0f;
    uint8 g = src_argb4444[0] >> 4;
    uint8 r = src_argb4444[1] & 0x0f;
    uint8 a = src_argb4444[1] >> 4;
143 144 145 146 147
    dst_argb[0] = (b << 4) | b;
    dst_argb[1] = (g << 4) | g;
    dst_argb[2] = (r << 4) | r;
    dst_argb[3] = (a << 4) | a;
    dst_argb += 4;
148
    src_argb4444 += 2;
149 150 151
  }
}

152
void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
153 154
  int x;
  for (x = 0; x < width; ++x) {
155 156 157 158 159 160 161 162 163 164 165
    uint8 b = src_argb[0];
    uint8 g = src_argb[1];
    uint8 r = src_argb[2];
    dst_rgb[0] = b;
    dst_rgb[1] = g;
    dst_rgb[2] = r;
    dst_rgb += 3;
    src_argb += 4;
  }
}

166
void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
167 168
  int x;
  for (x = 0; x < width; ++x) {
169 170 171 172 173 174 175 176 177 178 179
    uint8 b = src_argb[0];
    uint8 g = src_argb[1];
    uint8 r = src_argb[2];
    dst_rgb[0] = r;
    dst_rgb[1] = g;
    dst_rgb[2] = b;
    dst_rgb += 3;
    src_argb += 4;
  }
}

180
void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
181 182
  int x;
  for (x = 0; x < width - 1; x += 2) {
183 184 185 186 187 188
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 2;
    uint8 r0 = src_argb[2] >> 3;
    uint8 b1 = src_argb[4] >> 3;
    uint8 g1 = src_argb[5] >> 2;
    uint8 r1 = src_argb[6] >> 3;
189
    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190
              (b1 << 16) | (g1 << 21) | (r1 << 27));
191 192 193 194 195 196 197
    dst_rgb += 4;
    src_argb += 8;
  }
  if (width & 1) {
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 2;
    uint8 r0 = src_argb[2] >> 3;
198
    *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
199 200 201
  }
}

202
void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
203 204
  int x;
  for (x = 0; x < width - 1; x += 2) {
205 206 207 208 209 210 211 212
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 3;
    uint8 r0 = src_argb[2] >> 3;
    uint8 a0 = src_argb[3] >> 7;
    uint8 b1 = src_argb[4] >> 3;
    uint8 g1 = src_argb[5] >> 3;
    uint8 r1 = src_argb[6] >> 3;
    uint8 a1 = src_argb[7] >> 7;
213
    *(uint32*)(dst_rgb) =
214 215 216 217 218 219 220 221 222 223
        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
        (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
    dst_rgb += 4;
    src_argb += 8;
  }
  if (width & 1) {
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 3;
    uint8 r0 = src_argb[2] >> 3;
    uint8 a0 = src_argb[3] >> 7;
224
    *(uint16*)(dst_rgb) =
225
        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
226 227 228
  }
}

229
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
230 231
  int x;
  for (x = 0; x < width - 1; x += 2) {
232 233 234 235 236 237 238 239
    uint8 b0 = src_argb[0] >> 4;
    uint8 g0 = src_argb[1] >> 4;
    uint8 r0 = src_argb[2] >> 4;
    uint8 a0 = src_argb[3] >> 4;
    uint8 b1 = src_argb[4] >> 4;
    uint8 g1 = src_argb[5] >> 4;
    uint8 r1 = src_argb[6] >> 4;
    uint8 a1 = src_argb[7] >> 4;
240
    *(uint32*)(dst_rgb) =
241 242 243 244 245 246 247 248 249 250
        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
    dst_rgb += 4;
    src_argb += 8;
  }
  if (width & 1) {
    uint8 b0 = src_argb[0] >> 4;
    uint8 g0 = src_argb[1] >> 4;
    uint8 r0 = src_argb[2] >> 4;
    uint8 a0 = src_argb[3] >> 4;
251
    *(uint16*)(dst_rgb) =
252
        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
253 254 255
  }
}

256
static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257
  return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
258 259
}

260
static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261
  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
262
}
263
static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264
  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
265 266
}

267
#define MAKEROWY(NAME, R, G, B, BPP) \
268
void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
269 270
  int x;                                                                       \
  for (x = 0; x < width; ++x) {                                                \
271
    dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
272
    src_argb0 += BPP;                                                          \
273 274 275 276 277 278
    dst_y += 1;                                                                \
  }                                                                            \
}                                                                              \
void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
                       uint8* dst_u, uint8* dst_v, int width) {                \
  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
279 280
  int x;                                                                       \
  for (x = 0; x < width - 1; x += 2) {                                         \
281 282 283 284 285 286
    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
287 288
    dst_u[0] = RGBToU(ar, ag, ab);                                             \
    dst_v[0] = RGBToV(ar, ag, ab);                                             \
289 290
    src_rgb0 += BPP * 2;                                                       \
    src_rgb1 += BPP * 2;                                                       \
291 292 293 294 295 296 297 298 299 300 301 302
    dst_u += 1;                                                                \
    dst_v += 1;                                                                \
  }                                                                            \
  if (width & 1) {                                                             \
    uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
    uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
    uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
    dst_u[0] = RGBToU(ar, ag, ab);                                             \
    dst_v[0] = RGBToV(ar, ag, ab);                                             \
  }                                                                            \
}

303 304 305 306 307 308 309 310
MAKEROWY(ARGB, 2, 1, 0, 4)
MAKEROWY(BGRA, 1, 2, 3, 4)
MAKEROWY(ABGR, 0, 1, 2, 4)
MAKEROWY(RGBA, 3, 2, 1, 4)
MAKEROWY(RGB24, 2, 1, 0, 3)
MAKEROWY(RAW, 0, 1, 2, 3)
#undef MAKEROWY

311 312 313 314 315
// JPeg uses a variation on BT.601-1 full range
// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
// BT.601 Mpeg range uses:
316 317 318
// b 0.1016 * 255 = 25.908 = 25
// g 0.5078 * 255 = 129.489 = 129
// r 0.2578 * 255 = 65.739 = 66
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
// JPeg 8 bit Y (not used):
// b 0.11400 * 256 = 29.184 = 29
// g 0.58700 * 256 = 150.272 = 150
// r 0.29900 * 256 = 76.544 = 77
// JPeg 7 bit Y:
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
// JPeg 8 bit U:
// b  0.50000 * 255 = 127.5 = 127
// g -0.33126 * 255 = -84.4713 = -84
// r -0.16874 * 255 = -43.0287 = -43
// JPeg 8 bit V:
// b -0.08131 * 255 = -20.73405 = -20
// g -0.41869 * 255 = -106.76595 = -107
// r  0.50000 * 255 = 127.5 = 127
335

336
static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337
  return (38 * r + 75 * g +  15 * b + 64) >> 7;
338 339
}

340 341 342 343 344 345 346 347 348
static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
  return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
  return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}

#define AVGB(a, b) (((a) + (b) + 1) >> 1)

349 350
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
351 352
  int x;                                                                       \
  for (x = 0; x < width; ++x) {                                                \
353 354 355 356 357
    dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
    src_argb0 += BPP;                                                          \
    dst_y += 1;                                                                \
  }                                                                            \
}                                                                              \
358 359 360
void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
                        uint8* dst_u, uint8* dst_v, int width) {               \
  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
361 362
  int x;                                                                       \
  for (x = 0; x < width - 1; x += 2) {                                         \
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
    uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
                    AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
    uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
                    AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
    uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
                    AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
    src_rgb0 += BPP * 2;                                                       \
    src_rgb1 += BPP * 2;                                                       \
    dst_u += 1;                                                                \
    dst_v += 1;                                                                \
  }                                                                            \
  if (width & 1) {                                                             \
    uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
    uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
    uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
  }                                                                            \
}
384 385 386 387

MAKEROWYJ(ARGB, 2, 1, 0, 4)
#undef MAKEROWYJ

388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
void ARGBToUVJ422Row_C(const uint8* src_argb,
                       uint8* dst_u, uint8* dst_v, int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
    dst_u[0] = RGBToUJ(ar, ag, ab);
    dst_v[0] = RGBToVJ(ar, ag, ab);
    src_argb += 8;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToUJ(ar, ag, ab);
    dst_v[0] = RGBToVJ(ar, ag, ab);
  }
}

410
void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
411 412
  int x;
  for (x = 0; x < width; ++x) {
413 414 415 416 417 418 419 420 421 422 423 424
    uint8 b = src_rgb565[0] & 0x1f;
    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r = src_rgb565[1] >> 3;
    b = (b << 3) | (b >> 2);
    g = (g << 2) | (g >> 4);
    r = (r << 3) | (r >> 2);
    dst_y[0] = RGBToY(r, g, b);
    src_rgb565 += 2;
    dst_y += 1;
  }
}

425
void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
426 427
  int x;
  for (x = 0; x < width; ++x) {
428 429 430 431 432 433 434 435 436 437 438 439 440
    uint8 b = src_argb1555[0] & 0x1f;
    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
    b = (b << 3) | (b >> 2);
    g = (g << 3) | (g >> 2);
    r = (r << 3) | (r >> 2);
    dst_y[0] = RGBToY(r, g, b);
    src_argb1555 += 2;
    dst_y += 1;
  }
}

void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
441 442
  int x;
  for (x = 0; x < width; ++x) {
443 444 445 446 447 448 449 450 451 452 453 454
    uint8 b = src_argb4444[0] & 0x0f;
    uint8 g = src_argb4444[0] >> 4;
    uint8 r = src_argb4444[1] & 0x0f;
    b = (b << 4) | b;
    g = (g << 4) | g;
    r = (r << 4) | r;
    dst_y[0] = RGBToY(r, g, b);
    src_argb4444 += 2;
    dst_y += 1;
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
455
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
456
                     uint8* dst_u, uint8* dst_v, int width) {
fbarchard@google.com's avatar
fbarchard@google.com committed
457
  const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
458 459
  int x;
  for (x = 0; x < width - 1; x += 2) {
fbarchard@google.com's avatar
fbarchard@google.com committed
460 461 462 463 464 465 466 467 468 469 470 471
    uint8 b0 = src_rgb565[0] & 0x1f;
    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r0 = src_rgb565[1] >> 3;
    uint8 b1 = src_rgb565[2] & 0x1f;
    uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
    uint8 r1 = src_rgb565[3] >> 3;
    uint8 b2 = next_rgb565[0] & 0x1f;
    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
    uint8 r2 = next_rgb565[1] >> 3;
    uint8 b3 = next_rgb565[2] & 0x1f;
    uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
    uint8 r3 = next_rgb565[3] >> 3;
472 473 474 475 476 477 478
    uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
    uint8 g = (g0 + g1 + g2 + g3);
    uint8 r = (r0 + r1 + r2 + r3);
    b = (b << 1) | (b >> 6);  // 787 -> 888.
    r = (r << 1) | (r >> 6);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
fbarchard@google.com's avatar
fbarchard@google.com committed
479 480 481 482 483 484 485 486 487 488 489 490
    src_rgb565 += 4;
    next_rgb565 += 4;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 b0 = src_rgb565[0] & 0x1f;
    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r0 = src_rgb565[1] >> 3;
    uint8 b2 = next_rgb565[0] & 0x1f;
    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
    uint8 r2 = next_rgb565[1] >> 3;
491 492 493 494 495 496 497 498 499 500 501 502 503 504
    uint8 b = (b0 + b2);  // 565 * 2 = 676.
    uint8 g = (g0 + g2);
    uint8 r = (r0 + r2);
    b = (b << 2) | (b >> 4);  // 676 -> 888
    g = (g << 1) | (g >> 6);
    r = (r << 2) | (r >> 4);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
  }
}

void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
                       uint8* dst_u, uint8* dst_v, int width) {
  const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
505 506
  int x;
  for (x = 0; x < width - 1; x += 2) {
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
    uint8 b0 = src_argb1555[0] & 0x1f;
    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
    uint8 b1 = src_argb1555[2] & 0x1f;
    uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
    uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
    uint8 b2 = next_argb1555[0] & 0x1f;
    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
    uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
    uint8 b3 = next_argb1555[2] & 0x1f;
    uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
    uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
    uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
    uint8 g = (g0 + g1 + g2 + g3);
    uint8 r = (r0 + r1 + r2 + r3);
    b = (b << 1) | (b >> 6);  // 777 -> 888.
    g = (g << 1) | (g >> 6);
    r = (r << 1) | (r >> 6);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
    src_argb1555 += 4;
    next_argb1555 += 4;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 b0 = src_argb1555[0] & 0x1f;
    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
    uint8 b2 = next_argb1555[0] & 0x1f;
    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
    uint8 r2 = next_argb1555[1] >> 3;
    uint8 b = (b0 + b2);  // 555 * 2 = 666.
    uint8 g = (g0 + g2);
    uint8 r = (r0 + r2);
    b = (b << 2) | (b >> 4);  // 666 -> 888.
    g = (g << 2) | (g >> 4);
    r = (r << 2) | (r >> 4);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
  }
}

void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
                       uint8* dst_u, uint8* dst_v, int width) {
  const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
553 554
  int x;
  for (x = 0; x < width - 1; x += 2) {
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
    uint8 b0 = src_argb4444[0] & 0x0f;
    uint8 g0 = src_argb4444[0] >> 4;
    uint8 r0 = src_argb4444[1] & 0x0f;
    uint8 b1 = src_argb4444[2] & 0x0f;
    uint8 g1 = src_argb4444[2] >> 4;
    uint8 r1 = src_argb4444[3] & 0x0f;
    uint8 b2 = next_argb4444[0] & 0x0f;
    uint8 g2 = next_argb4444[0] >> 4;
    uint8 r2 = next_argb4444[1] & 0x0f;
    uint8 b3 = next_argb4444[2] & 0x0f;
    uint8 g3 = next_argb4444[2] >> 4;
    uint8 r3 = next_argb4444[3] & 0x0f;
    uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
    uint8 g = (g0 + g1 + g2 + g3);
    uint8 r = (r0 + r1 + r2 + r3);
    b = (b << 2) | (b >> 4);  // 666 -> 888.
    g = (g << 2) | (g >> 4);
    r = (r << 2) | (r >> 4);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
    src_argb4444 += 4;
    next_argb4444 += 4;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 b0 = src_argb4444[0] & 0x0f;
    uint8 g0 = src_argb4444[0] >> 4;
    uint8 r0 = src_argb4444[1] & 0x0f;
    uint8 b2 = next_argb4444[0] & 0x0f;
    uint8 g2 = next_argb4444[0] >> 4;
    uint8 r2 = next_argb4444[1] & 0x0f;
    uint8 b = (b0 + b2);  // 444 * 2 = 555.
    uint8 g = (g0 + g2);
    uint8 r = (r0 + r2);
    b = (b << 3) | (b >> 2);  // 555 -> 888.
    g = (g << 3) | (g >> 2);
    r = (r << 3) | (r >> 2);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
fbarchard@google.com's avatar
fbarchard@google.com committed
595 596 597
  }
}

598 599
void ARGBToUV444Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width) {
600 601
  int x;
  for (x = 0; x < width; ++x) {
602 603 604 605 606 607 608 609 610 611 612 613 614
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
    src_argb += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

void ARGBToUV422Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width) {
615 616
  int x;
  for (x = 0; x < width - 1; x += 2) {
617 618 619 620 621 622 623 624 625
    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
    src_argb += 8;
    dst_u += 1;
    dst_v += 1;
  }
626
  if (width & 1) {
627 628 629 630 631 632 633 634 635 636
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  }
}

void ARGBToUV411Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width) {
637 638
  int x;
  for (x = 0; x < width - 3; x += 4) {
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
    src_argb += 16;
    dst_u += 1;
    dst_v += 1;
  }
  if ((width & 3) == 3) {
    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  } else if ((width & 3) == 2) {
    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  } else if ((width & 3) == 1) {
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  }
}
668

669
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
670 671
  int x;
  for (x = 0; x < width; ++x) {
672
    uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
673
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
674
    dst_argb[3] = src_argb[3];
675
    dst_argb += 4;
676
    src_argb += 4;
677 678 679
  }
}

680 681
// Convert a row of image to Sepia tone.
void ARGBSepiaRow_C(uint8* dst_argb, int width) {
682 683
  int x;
  for (x = 0; x < width; ++x) {
684 685 686
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
687 688 689
    int sb = (b * 17 + g * 68 + r * 35) >> 7;
    int sg = (b * 22 + g * 88 + r * 45) >> 7;
    int sr = (b * 24 + g * 98 + r * 50) >> 7;
690
    // b does not over flow. a is preserved from original.
691
    dst_argb[0] = sb;
692 693
    dst_argb[1] = clamp255(sg);
    dst_argb[2] = clamp255(sr);
694 695 696 697
    dst_argb += 4;
  }
}

698
// Apply color matrix to a row of image. Matrix is signed.
699 700 701
// TODO(fbarchard): Consider adding rounding (+32).
void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
                          const int8* matrix_argb, int width) {
702 703
  int x;
  for (x = 0; x < width; ++x) {
704 705 706 707
    int b = src_argb[0];
    int g = src_argb[1];
    int r = src_argb[2];
    int a = src_argb[3];
708
    int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
709
              r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
710
    int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
711
              r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
712
    int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
713 714 715
              r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
    int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
              r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
716 717 718
    dst_argb[0] = Clamp(sb);
    dst_argb[1] = Clamp(sg);
    dst_argb[2] = Clamp(sr);
719 720
    dst_argb[3] = Clamp(sa);
    src_argb += 4;
721 722 723 724
    dst_argb += 4;
  }
}

725 726
// Apply color table to a row of image.
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
727 728
  int x;
  for (x = 0; x < width; ++x) {
729 730 731 732 733 734 735 736 737 738 739 740
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
    int a = dst_argb[3];
    dst_argb[0] = table_argb[b * 4 + 0];
    dst_argb[1] = table_argb[g * 4 + 1];
    dst_argb[2] = table_argb[r * 4 + 2];
    dst_argb[3] = table_argb[a * 4 + 3];
    dst_argb += 4;
  }
}

741 742
// Apply color table to a row of image.
void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
743 744
  int x;
  for (x = 0; x < width; ++x) {
745 746 747 748 749 750 751 752 753 754
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
    dst_argb[0] = table_argb[b * 4 + 0];
    dst_argb[1] = table_argb[g * 4 + 1];
    dst_argb[2] = table_argb[r * 4 + 2];
    dst_argb += 4;
  }
}

755 756
void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
                       int interval_offset, int width) {
757 758
  int x;
  for (x = 0; x < width; ++x) {
759 760 761 762 763 764 765 766 767 768
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
    dst_argb += 4;
  }
}

769 770 771 772 773 774 775 776 777 778
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v * f >> 24

void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
                    uint32 value) {
  const uint32 b_scale = REPEAT8(value & 0xff);
  const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
  const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
  const uint32 a_scale = REPEAT8(value >> 24);

779 780
  int i;
  for (i = 0; i < width; ++i) {
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
    const uint32 b = REPEAT8(src_argb[0]);
    const uint32 g = REPEAT8(src_argb[1]);
    const uint32 r = REPEAT8(src_argb[2]);
    const uint32 a = REPEAT8(src_argb[3]);
    dst_argb[0] = SHADE(b, b_scale);
    dst_argb[1] = SHADE(g, g_scale);
    dst_argb[2] = SHADE(r, r_scale);
    dst_argb[3] = SHADE(a, a_scale);
    src_argb += 4;
    dst_argb += 4;
  }
}
#undef REPEAT8
#undef SHADE

796 797 798
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v * f >> 16

799 800
void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
                       uint8* dst_argb, int width) {
801 802
  int i;
  for (i = 0; i < width; ++i) {
803 804 805 806 807 808 809 810
    const uint32 b = REPEAT8(src_argb0[0]);
    const uint32 g = REPEAT8(src_argb0[1]);
    const uint32 r = REPEAT8(src_argb0[2]);
    const uint32 a = REPEAT8(src_argb0[3]);
    const uint32 b_scale = src_argb1[0];
    const uint32 g_scale = src_argb1[1];
    const uint32 r_scale = src_argb1[2];
    const uint32 a_scale = src_argb1[3];
811 812 813 814
    dst_argb[0] = SHADE(b, b_scale);
    dst_argb[1] = SHADE(g, g_scale);
    dst_argb[2] = SHADE(r, r_scale);
    dst_argb[3] = SHADE(a, a_scale);
815 816
    src_argb0 += 4;
    src_argb1 += 4;
817 818 819 820 821 822
    dst_argb += 4;
  }
}
#undef REPEAT8
#undef SHADE

823
#define SHADE(f, v) clamp255(v + f)
824 825 826

void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
                  uint8* dst_argb, int width) {
827 828
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
829 830 831 832 833 834 835 836
    const int b = src_argb0[0];
    const int g = src_argb0[1];
    const int r = src_argb0[2];
    const int a = src_argb0[3];
    const int b_add = src_argb1[0];
    const int g_add = src_argb1[1];
    const int r_add = src_argb1[2];
    const int a_add = src_argb1[3];
837 838 839 840 841 842 843 844 845 846 847
    dst_argb[0] = SHADE(b, b_add);
    dst_argb[1] = SHADE(g, g_add);
    dst_argb[2] = SHADE(r, r_add);
    dst_argb[3] = SHADE(a, a_add);
    src_argb0 += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
}
#undef SHADE

848
#define SHADE(f, v) clamp0(f - v)
849 850 851

void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
                       uint8* dst_argb, int width) {
852 853
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
854 855 856 857 858 859 860 861
    const int b = src_argb0[0];
    const int g = src_argb0[1];
    const int r = src_argb0[2];
    const int a = src_argb0[3];
    const int b_sub = src_argb1[0];
    const int g_sub = src_argb1[1];
    const int r_sub = src_argb1[2];
    const int a_sub = src_argb1[3];
862 863 864 865 866 867 868 869 870 871 872
    dst_argb[0] = SHADE(b, b_sub);
    dst_argb[1] = SHADE(g, g_sub);
    dst_argb[2] = SHADE(r, r_sub);
    dst_argb[3] = SHADE(a, a_sub);
    src_argb0 += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
}
#undef SHADE

fbarchard@google.com's avatar
fbarchard@google.com committed
873 874 875
// Sobel functions which mimics SSSE3.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
                 uint8* dst_sobelx, int width) {
876 877
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
878 879 880 881 882 883 884 885 886
    int a = src_y0[i];
    int b = src_y1[i];
    int c = src_y2[i];
    int a_sub = src_y0[i + 2];
    int b_sub = src_y1[i + 2];
    int c_sub = src_y2[i + 2];
    int a_diff = a - a_sub;
    int b_diff = b - b_sub;
    int c_diff = c - c_sub;
887
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
888
    dst_sobelx[i] = (uint8)(clamp255(sobel));
fbarchard@google.com's avatar
fbarchard@google.com committed
889 890 891 892 893
  }
}

void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
                 uint8* dst_sobely, int width) {
894 895
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
896 897 898 899 900 901 902 903 904
    int a = src_y0[i + 0];
    int b = src_y0[i + 1];
    int c = src_y0[i + 2];
    int a_sub = src_y1[i + 0];
    int b_sub = src_y1[i + 1];
    int c_sub = src_y1[i + 2];
    int a_diff = a - a_sub;
    int b_diff = b - b_sub;
    int c_diff = c - c_sub;
905
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
906
    dst_sobely[i] = (uint8)(clamp255(sobel));
fbarchard@google.com's avatar
fbarchard@google.com committed
907 908 909
  }
}

910 911
void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                uint8* dst_argb, int width) {
912 913
  int i;
  for (i = 0; i < width; ++i) {
914 915
    int r = src_sobelx[i];
    int b = src_sobely[i];
916
    int s = clamp255(r + b);
917 918 919 920
    dst_argb[0] = (uint8)(s);
    dst_argb[1] = (uint8)(s);
    dst_argb[2] = (uint8)(s);
    dst_argb[3] = (uint8)(255u);
921 922 923 924
    dst_argb += 4;
  }
}

925 926
void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                       uint8* dst_y, int width) {
927 928
  int i;
  for (i = 0; i < width; ++i) {
929 930 931
    int r = src_sobelx[i];
    int b = src_sobely[i];
    int s = clamp255(r + b);
932
    dst_y[i] = (uint8)(s);
933 934 935
  }
}

936 937
void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                  uint8* dst_argb, int width) {
938 939
  int i;
  for (i = 0; i < width; ++i) {
940 941
    int r = src_sobelx[i];
    int b = src_sobely[i];
942
    int g = clamp255(r + b);
943 944 945 946
    dst_argb[0] = (uint8)(b);
    dst_argb[1] = (uint8)(g);
    dst_argb[2] = (uint8)(r);
    dst_argb[3] = (uint8)(255u);
947 948 949 950
    dst_argb += 4;
  }
}

951
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
952
  // Copy a Y to RGB.
953 954
  int x;
  for (x = 0; x < width; ++x) {
955 956 957 958 959 960 961 962
    uint8 y = src_y[0];
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
    dst_argb[3] = 255u;
    dst_argb += 4;
    ++src_y;
  }
}

963
// YUV to RGB conversion constants.
964
// Y contribution to R,G,B.  Scale and bias.
965 966 967
// TODO(fbarchard): Consider moving constants into a common header.
#define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
#define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */
968 969

// U and V contributions to R,G,B.
970 971 972 973
#define UB -128 /* -min(128, round(2.018 * 64)) */
#define UG 25 /* -round(-0.391 * 64) */
#define VG 52 /* -round(-0.813 * 64) */
#define VR -102 /* -round(1.596 * 64) */
974 975 976 977

// Bias values to subtract 16 from Y and 128 from U and V.
#define BB (UB * 128            - YGB)
#define BG (UG * 128 + VG * 128 - YGB)
978
#define BR            (VR * 128 - YGB)
979

980
// C reference code that mimics the YUV assembly.
981 982
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
                              uint8* b, uint8* g, uint8* r) {
983
  uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
984
  *b = Clamp((int32)(BB - (         u * UB) + y1) >> 6);
985
  *g = Clamp((int32)(BG - (v * VG + u * UG) + y1) >> 6);
986
  *r = Clamp((int32)(BR - (v * VR         ) + y1) >> 6);
987
}
988 989 990 991

// C reference code that mimics the YUV assembly.
static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
  uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
992 993 994
  *b = Clamp((int32)(y1 - YGB) >> 6);
  *g = Clamp((int32)(y1 - YGB) >> 6);
  *r = Clamp((int32)(y1 - YGB) >> 6);
995 996
}

997 998 999 1000 1001 1002 1003 1004 1005 1006
#undef YG
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
#undef BB
#undef BG
#undef BR

1007
#if !defined(LIBYUV_DISABLE_NEON) && \
1008
    (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1009 1010
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
1011 1012 1013
void I444ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1014 1015
                     uint8* rgb_buf,
                     int width) {
1016 1017
  int x;
  for (x = 0; x < width - 1; x += 2) {
1018 1019
    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
1020
    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1021
    rgb_buf[3] = 255;
1022
    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1023
    rgb_buf[7] = 255;
1024 1025 1026
    src_y += 2;
    src_u += 2;
    src_v += 2;
1027 1028 1029
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1030 1031
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1032 1033 1034
  }
}
#else
1035 1036 1037
void I444ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1038 1039
                     uint8* rgb_buf,
                     int width) {
1040 1041
  int x;
  for (x = 0; x < width; ++x) {
1042 1043
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1044
    rgb_buf[3] = 255;
1045 1046 1047
    src_y += 1;
    src_u += 1;
    src_v += 1;
1048 1049 1050
    rgb_buf += 4;  // Advance 1 pixel.
  }
}
1051
#endif
1052

1053
// Also used for 420
1054 1055 1056
void I422ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1057 1058
                     uint8* rgb_buf,
                     int width) {
1059 1060
  int x;
  for (x = 0; x < width - 1; x += 2) {
1061 1062
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1063
    rgb_buf[3] = 255;
1064 1065
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1066
    rgb_buf[7] = 255;
1067 1068 1069
    src_y += 2;
    src_u += 1;
    src_v += 1;
1070 1071 1072
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1073 1074
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1075
    rgb_buf[3] = 255;
1076 1077 1078
  }
}

1079 1080 1081 1082 1083
// C reference code that mimics the YUV assembly.
// *  R = Y                + 1.40200 * Cr
// *  G = Y - 0.34414 * Cb - 0.71414 * Cr
// *  B = Y + 1.77200 * Cb

1084
#define YGJ 64 /* (int8)round(1.000 * 64) */
1085

1086 1087
#define UBJ 113 /* (int8)round(1.772 * 64) */
#define UGJ -22 /* (int8)round(-0.34414 * 64) */
1088 1089 1090
#define URJ 0

#define VBJ 0
1091 1092
#define VGJ -46 /* (int8)round(-0.71414 * 64) */
#define VRJ 90 /* (int8)round(1.402 * 64) */
1093 1094

// Bias
1095 1096 1097
#define BBJ (UBJ * 128 + VBJ * 128)
#define BGJ (UGJ * 128 + VGJ * 128)
#define BRJ (URJ * 128 + VRJ * 128)
1098 1099 1100

static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
                              uint8* b, uint8* g, uint8* r) {
1101 1102 1103 1104
  uint32 y1 = (uint32)(y * YGJ);
  *b = Clamp((int32)(u * UBJ + v * VBJ + y1 - BBJ) >> 6);
  *g = Clamp((int32)(u * UGJ + v * VGJ + y1 - BGJ) >> 6);
  *r = Clamp((int32)(u * URJ + v * VRJ + y1 - BRJ) >> 6);
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
}

void J422ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* rgb_buf,
                     int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
    YuvJPixel(src_y[0], src_u[0], src_v[0],
              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvJPixel(src_y[1], src_u[0], src_v[0],
              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
    src_y += 2;
    src_u += 1;
    src_v += 1;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
    YuvJPixel(src_y[0], src_u[0], src_v[0],
              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
  }
}

1132 1133 1134
void I422ToRGB24Row_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
1135 1136
                      uint8* rgb_buf,
                      int width) {
1137 1138
  int x;
  for (x = 0; x < width - 1; x += 2) {
1139 1140 1141 1142
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1143 1144 1145
    src_y += 2;
    src_u += 1;
    src_v += 1;
1146 1147 1148
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
1149 1150
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1151 1152 1153
  }
}

1154 1155 1156
void I422ToRAWRow_C(const uint8* src_y,
                    const uint8* src_u,
                    const uint8* src_v,
1157 1158
                    uint8* rgb_buf,
                    int width) {
1159 1160
  int x;
  for (x = 0; x < width - 1; x += 2) {
1161 1162 1163 1164
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1165 1166 1167
    src_y += 2;
    src_u += 1;
    src_v += 1;
1168 1169 1170
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
1171 1172
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1173 1174 1175
  }
}

1176 1177 1178
void I422ToARGB4444Row_C(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
1179 1180 1181 1182 1183 1184 1185 1186
                         uint8* dst_argb4444,
                         int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1187 1188
  int x;
  for (x = 0; x < width - 1; x += 2) {
1189 1190
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1191 1192 1193 1194 1195 1196
    b0 = b0 >> 4;
    g0 = g0 >> 4;
    r0 = r0 >> 4;
    b1 = b1 >> 4;
    g1 = g1 >> 4;
    r1 = r1 >> 4;
1197
    *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1198
        (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1199 1200 1201
    src_y += 2;
    src_u += 1;
    src_v += 1;
1202 1203 1204
    dst_argb4444 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1205
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1206 1207 1208
    b0 = b0 >> 4;
    g0 = g0 >> 4;
    r0 = r0 >> 4;
1209
    *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1210 1211 1212 1213
        0xf000;
  }
}

1214 1215 1216
void I422ToARGB1555Row_C(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
1217 1218 1219 1220 1221 1222 1223 1224
                         uint8* dst_argb1555,
                         int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1225 1226
  int x;
  for (x = 0; x < width - 1; x += 2) {
1227 1228
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1229 1230 1231 1232 1233 1234
    b0 = b0 >> 3;
    g0 = g0 >> 3;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 3;
    r1 = r1 >> 3;
1235
    *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1236
        (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1237 1238 1239
    src_y += 2;
    src_u += 1;
    src_v += 1;
1240 1241 1242
    dst_argb1555 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1243
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1244 1245 1246
    b0 = b0 >> 3;
    g0 = g0 >> 3;
    r0 = r0 >> 3;
1247
    *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1248 1249 1250 1251
        0x8000;
  }
}

1252
void I422ToRGB565Row_C(const uint8* src_y,
1253 1254 1255 1256
                       const uint8* src_u,
                       const uint8* src_v,
                       uint8* dst_rgb565,
                       int width) {
1257 1258 1259 1260 1261 1262
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1263 1264
  int x;
  for (x = 0; x < width - 1; x += 2) {
1265 1266
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1267 1268 1269 1270 1271 1272
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 2;
    r1 = r1 >> 3;
1273
    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1274
        (b1 << 16) | (g1 << 21) | (r1 << 27);
1275 1276 1277
    src_y += 2;
    src_u += 1;
    src_v += 1;
1278 1279 1280
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1281
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1282 1283 1284
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
1285
    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1286 1287 1288
  }
}

1289 1290 1291
void I411ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1292 1293
                     uint8* rgb_buf,
                     int width) {
1294 1295
  int x;
  for (x = 0; x < width - 3; x += 4) {
1296
    YuvPixel(src_y[0], src_u[0], src_v[0],
1297
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
    YuvPixel(src_y[2], src_u[0], src_v[0],
             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
    rgb_buf[11] = 255;
    YuvPixel(src_y[3], src_u[0], src_v[0],
             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
    rgb_buf[15] = 255;
1308 1309 1310
    src_y += 4;
    src_u += 1;
    src_v += 1;
1311 1312 1313
    rgb_buf += 16;  // Advance 4 pixels.
  }
  if (width & 2) {
1314 1315 1316 1317 1318 1319
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1320
    src_y += 2;
1321 1322 1323
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1324 1325 1326
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1327 1328 1329
  }
}

1330 1331
void NV12ToARGBRow_C(const uint8* src_y,
                     const uint8* usrc_v,
1332 1333
                     uint8* rgb_buf,
                     int width) {
1334 1335
  int x;
  for (x = 0; x < width - 1; x += 2) {
1336 1337 1338 1339 1340 1341
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1342 1343
    src_y += 2;
    usrc_v += 2;
1344 1345 1346
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1347 1348 1349
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1350 1351 1352
  }
}

1353 1354
void NV21ToARGBRow_C(const uint8* src_y,
                     const uint8* src_vu,
1355 1356
                     uint8* rgb_buf,
                     int width) {
1357 1358
  int x;
  for (x = 0; x < width - 1; x += 2) {
1359 1360 1361 1362 1363 1364 1365 1366
    YuvPixel(src_y[0], src_vu[1], src_vu[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;

    YuvPixel(src_y[1], src_vu[1], src_vu[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;

1367 1368
    src_y += 2;
    src_vu += 2;
1369 1370 1371
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1372 1373 1374
    YuvPixel(src_y[0], src_vu[1], src_vu[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1375 1376 1377
  }
}

1378 1379
void NV12ToRGB565Row_C(const uint8* src_y,
                       const uint8* usrc_v,
1380 1381 1382 1383 1384 1385 1386 1387
                       uint8* dst_rgb565,
                       int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1388 1389
  int x;
  for (x = 0; x < width - 1; x += 2) {
1390 1391
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1392 1393 1394 1395 1396 1397
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 2;
    r1 = r1 >> 3;
1398
    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1399
        (b1 << 16) | (g1 << 21) | (r1 << 27);
1400 1401
    src_y += 2;
    usrc_v += 2;
1402 1403 1404
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1405
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1406 1407 1408
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
1409
    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1410 1411 1412
  }
}

1413 1414
void NV21ToRGB565Row_C(const uint8* src_y,
                       const uint8* vsrc_u,
1415 1416 1417 1418 1419 1420 1421 1422
                       uint8* dst_rgb565,
                       int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1423 1424
  int x;
  for (x = 0; x < width - 1; x += 2) {
1425 1426
    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1427 1428 1429 1430 1431 1432
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 2;
    r1 = r1 >> 3;
1433
    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1434
        (b1 << 16) | (g1 << 21) | (r1 << 27);
1435 1436
    src_y += 2;
    vsrc_u += 2;
1437 1438 1439
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1440
    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1441 1442 1443
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
1444
    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1445 1446 1447
  }
}

1448
void YUY2ToARGBRow_C(const uint8* src_yuy2,
1449 1450
                     uint8* rgb_buf,
                     int width) {
1451 1452
  int x;
  for (x = 0; x < width - 1; x += 2) {
1453 1454 1455 1456 1457 1458
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1459
    src_yuy2 += 4;
1460 1461 1462
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1463 1464 1465
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1466 1467 1468
  }
}

1469
void UYVYToARGBRow_C(const uint8* src_uyvy,
1470 1471
                     uint8* rgb_buf,
                     int width) {
1472 1473
  int x;
  for (x = 0; x < width - 1; x += 2) {
1474 1475 1476 1477 1478 1479
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1480
    src_uyvy += 4;
1481 1482 1483
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1484 1485 1486
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1487 1488 1489
  }
}

1490 1491 1492
void I422ToBGRARow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1493 1494
                     uint8* rgb_buf,
                     int width) {
1495 1496
  int x;
  for (x = 0; x < width - 1; x += 2) {
1497 1498 1499 1500 1501 1502
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
    rgb_buf[0] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
    rgb_buf[4] = 255;
1503 1504 1505
    src_y += 2;
    src_u += 1;
    src_v += 1;
1506 1507 1508
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1509 1510 1511
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
    rgb_buf[0] = 255;
1512 1513 1514
  }
}

1515 1516 1517
void I422ToABGRRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1518 1519
                     uint8* rgb_buf,
                     int width) {
1520 1521
  int x;
  for (x = 0; x < width - 1; x += 2) {
1522 1523 1524 1525 1526 1527
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
    rgb_buf[7] = 255;
1528 1529 1530
    src_y += 2;
    src_u += 1;
    src_v += 1;
1531 1532 1533
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1534 1535 1536
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
    rgb_buf[3] = 255;
1537 1538 1539
  }
}

1540 1541 1542
void I422ToRGBARow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1543 1544
                     uint8* rgb_buf,
                     int width) {
1545 1546
  int x;
  for (x = 0; x < width - 1; x += 2) {
1547 1548 1549 1550 1551 1552
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
    rgb_buf[0] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
    rgb_buf[4] = 255;
1553 1554 1555
    src_y += 2;
    src_u += 1;
    src_v += 1;
1556 1557 1558
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1559 1560 1561
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
    rgb_buf[0] = 255;
1562 1563 1564
  }
}

1565
void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1566 1567
  int x;
  for (x = 0; x < width - 1; x += 2) {
1568
    YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1569
    rgb_buf[3] = 255;
1570
    YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1571 1572 1573 1574 1575
    rgb_buf[7] = 255;
    src_y += 2;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1576
    YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1577
    rgb_buf[3] = 255;
1578 1579 1580
  }
}

1581
void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1582
  int x;
1583
  src += width - 1;
1584
  for (x = 0; x < width - 1; x += 2) {
1585 1586 1587 1588 1589 1590
    dst[x] = src[0];
    dst[x + 1] = src[-1];
    src -= 2;
  }
  if (width & 1) {
    dst[width - 1] = src[0];
1591 1592 1593
  }
}

1594
void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1595
  int x;
1596
  src_uv += (width - 1) << 1;
1597
  for (x = 0; x < width - 1; x += 2) {
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609
    dst_u[x] = src_uv[0];
    dst_u[x + 1] = src_uv[-2];
    dst_v[x] = src_uv[1];
    dst_v[x + 1] = src_uv[-2 + 1];
    src_uv -= 4;
  }
  if (width & 1) {
    dst_u[width - 1] = src_uv[0];
    dst_v[width - 1] = src_uv[1];
  }
}

1610
void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1611
  int x;
1612 1613
  const uint32* src32 = (const uint32*)(src);
  uint32* dst32 = (uint32*)(dst);
1614
  src32 += width - 1;
1615
  for (x = 0; x < width - 1; x += 2) {
1616 1617 1618 1619 1620 1621 1622 1623 1624
    dst32[x] = src32[0];
    dst32[x + 1] = src32[-1];
    src32 -= 2;
  }
  if (width & 1) {
    dst32[width - 1] = src32[0];
  }
}

1625
void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1626 1627
  int x;
  for (x = 0; x < width - 1; x += 2) {
1628 1629 1630 1631 1632 1633 1634 1635 1636
    dst_u[x] = src_uv[0];
    dst_u[x + 1] = src_uv[2];
    dst_v[x] = src_uv[1];
    dst_v[x + 1] = src_uv[3];
    src_uv += 4;
  }
  if (width & 1) {
    dst_u[width - 1] = src_uv[0];
    dst_v[width - 1] = src_uv[1];
1637 1638 1639
  }
}

1640 1641
void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                  int width) {
1642 1643
  int x;
  for (x = 0; x < width - 1; x += 2) {
1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655
    dst_uv[0] = src_u[x];
    dst_uv[1] = src_v[x];
    dst_uv[2] = src_u[x + 1];
    dst_uv[3] = src_v[x + 1];
    dst_uv += 4;
  }
  if (width & 1) {
    dst_uv[0] = src_u[width - 1];
    dst_uv[1] = src_v[width - 1];
  }
}

1656 1657 1658 1659
void CopyRow_C(const uint8* src, uint8* dst, int count) {
  memcpy(dst, src, count);
}

1660 1661 1662 1663
void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
  memcpy(dst, src, count * 2);
}

1664 1665
void SetRow_C(uint8* dst, uint8 v8, int width) {
  memset(dst, v8, width);
1666 1667
}

1668 1669 1670 1671 1672
void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
  uint32* d = (uint32*)(dst_argb);
  int x;
  for (x = 0; x < width; ++x) {
    d[x] = v32;
1673 1674 1675
  }
}

1676
// Filter 2 rows of YUY2 UV's (422) into U and V (420).
1677
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1678
                   uint8* dst_u, uint8* dst_v, int width) {
1679
  // Output a row of UV values, filtering 2 rows of YUY2.
1680 1681
  int x;
  for (x = 0; x < width; x += 2) {
1682 1683 1684 1685 1686 1687 1688 1689
    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
    src_yuy2 += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

1690 1691 1692 1693
// Copy row of YUY2 UV's (422) into U and V (422).
void YUY2ToUV422Row_C(const uint8* src_yuy2,
                      uint8* dst_u, uint8* dst_v, int width) {
  // Output a row of UV values.
1694 1695
  int x;
  for (x = 0; x < width; x += 2) {
1696 1697 1698 1699 1700 1701 1702 1703 1704
    dst_u[0] = src_yuy2[1];
    dst_v[0] = src_yuy2[3];
    src_yuy2 += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

// Copy row of YUY2 Y's (422) into Y (420/422).
1705
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1706
  // Output a row of Y values.
1707 1708
  int x;
  for (x = 0; x < width - 1; x += 2) {
1709 1710 1711 1712 1713 1714
    dst_y[x] = src_yuy2[0];
    dst_y[x + 1] = src_yuy2[2];
    src_yuy2 += 4;
  }
  if (width & 1) {
    dst_y[width - 1] = src_yuy2[0];
1715 1716 1717
  }
}

1718
// Filter 2 rows of UYVY UV's (422) into U and V (420).
1719
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1720
                   uint8* dst_u, uint8* dst_v, int width) {
1721
  // Output a row of UV values.
1722 1723
  int x;
  for (x = 0; x < width; x += 2) {
1724 1725 1726 1727 1728 1729 1730 1731
    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
    src_uyvy += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

1732 1733 1734 1735
// Copy row of UYVY UV's (422) into U and V (422).
void UYVYToUV422Row_C(const uint8* src_uyvy,
                      uint8* dst_u, uint8* dst_v, int width) {
  // Output a row of UV values.
1736 1737
  int x;
  for (x = 0; x < width; x += 2) {
1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748
    dst_u[0] = src_uyvy[0];
    dst_v[0] = src_uyvy[2];
    src_uyvy += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

// Copy row of UYVY Y's (422) into Y (420/422).
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
  // Output a row of Y values.
1749 1750
  int x;
  for (x = 0; x < width - 1; x += 2) {
1751 1752 1753
    dst_y[x] = src_uyvy[1];
    dst_y[x + 1] = src_uyvy[3];
    src_uyvy += 4;
1754 1755
  }
  if (width & 1) {
1756
    dst_y[width - 1] = src_uyvy[1];
1757 1758 1759
  }
}

1760
#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1761

1762 1763
// Blend src_argb0 over src_argb1 and store to dst_argb.
// dst_argb may be src_argb0 or src_argb1.
1764
// This code mimics the SSSE3 version for better testability.
1765
void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1766
                    uint8* dst_argb, int width) {
1767 1768
  int x;
  for (x = 0; x < width - 1; x += 2) {
1769 1770 1771
    uint32 fb = src_argb0[0];
    uint32 fg = src_argb0[1];
    uint32 fr = src_argb0[2];
1772
    uint32 a = src_argb0[3];
1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783
    uint32 bb = src_argb1[0];
    uint32 bg = src_argb1[1];
    uint32 br = src_argb1[2];
    dst_argb[0] = BLEND(fb, bb, a);
    dst_argb[1] = BLEND(fg, bg, a);
    dst_argb[2] = BLEND(fr, br, a);
    dst_argb[3] = 255u;

    fb = src_argb0[4 + 0];
    fg = src_argb0[4 + 1];
    fr = src_argb0[4 + 2];
1784
    a = src_argb0[4 + 3];
1785 1786 1787 1788 1789 1790 1791
    bb = src_argb1[4 + 0];
    bg = src_argb1[4 + 1];
    br = src_argb1[4 + 2];
    dst_argb[4 + 0] = BLEND(fb, bb, a);
    dst_argb[4 + 1] = BLEND(fg, bg, a);
    dst_argb[4 + 2] = BLEND(fr, br, a);
    dst_argb[4 + 3] = 255u;
1792 1793 1794 1795 1796 1797
    src_argb0 += 8;
    src_argb1 += 8;
    dst_argb += 8;
  }

  if (width & 1) {
1798 1799 1800
    uint32 fb = src_argb0[0];
    uint32 fg = src_argb0[1];
    uint32 fr = src_argb0[2];
1801
    uint32 a = src_argb0[3];
1802 1803 1804 1805 1806 1807 1808
    uint32 bb = src_argb1[0];
    uint32 bg = src_argb1[1];
    uint32 br = src_argb1[2];
    dst_argb[0] = BLEND(fb, bb, a);
    dst_argb[1] = BLEND(fg, bg, a);
    dst_argb[2] = BLEND(fr, br, a);
    dst_argb[3] = 255u;
1809 1810
  }
}
1811 1812
#undef BLEND
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1813

1814
// Multiply source RGB by alpha and store to destination.
1815
// This code mimics the SSSE3 version for better testability.
1816
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1817 1818
  int i;
  for (i = 0; i < width - 1; i += 2) {
1819 1820 1821 1822
    uint32 b = src_argb[0];
    uint32 g = src_argb[1];
    uint32 r = src_argb[2];
    uint32 a = src_argb[3];
1823 1824 1825
    dst_argb[0] = ATTENUATE(b, a);
    dst_argb[1] = ATTENUATE(g, a);
    dst_argb[2] = ATTENUATE(r, a);
1826 1827 1828 1829 1830
    dst_argb[3] = a;
    b = src_argb[4];
    g = src_argb[5];
    r = src_argb[6];
    a = src_argb[7];
1831 1832 1833
    dst_argb[4] = ATTENUATE(b, a);
    dst_argb[5] = ATTENUATE(g, a);
    dst_argb[6] = ATTENUATE(r, a);
1834 1835 1836 1837 1838 1839 1840 1841 1842 1843
    dst_argb[7] = a;
    src_argb += 8;
    dst_argb += 8;
  }

  if (width & 1) {
    const uint32 b = src_argb[0];
    const uint32 g = src_argb[1];
    const uint32 r = src_argb[2];
    const uint32 a = src_argb[3];
1844 1845 1846
    dst_argb[0] = ATTENUATE(b, a);
    dst_argb[1] = ATTENUATE(g, a);
    dst_argb[2] = ATTENUATE(r, a);
1847 1848 1849
    dst_argb[3] = a;
  }
}
1850
#undef ATTENUATE
1851

1852 1853 1854 1855 1856
// Divide source RGB by alpha and store to destination.
// b = (b * 255 + (a / 2)) / a;
// g = (g * 255 + (a / 2)) / a;
// r = (r * 255 + (a / 2)) / a;
// Reciprocal method is off by 1 on some values. ie 125
1857 1858
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
#define T(a) 0x01000000 + (0x10000 / a)
1859
const uint32 fixed_invtbl8[256] = {
1860
  0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890
  T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
  T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
  T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
  T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
  T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
  T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
  T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
  T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
  T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
  T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
  T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
  T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
  T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
  T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
  T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
  T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
  T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
  T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
  T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
  T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
  T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
  T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
  T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
  T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
  T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
  T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
  T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
  T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
  T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
  T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1891
  T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1892 1893 1894
#undef T

void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1895 1896
  int i;
  for (i = 0; i < width; ++i) {
1897 1898 1899 1900
    uint32 b = src_argb[0];
    uint32 g = src_argb[1];
    uint32 r = src_argb[2];
    const uint32 a = src_argb[3];
fbarchard@google.com's avatar
fbarchard@google.com committed
1901
    const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
1902 1903 1904 1905
    b = (b * ia) >> 8;
    g = (g * ia) >> 8;
    r = (r * ia) >> 8;
    // Clamping should not be necessary but is free in assembly.
1906 1907 1908
    dst_argb[0] = clamp255(b);
    dst_argb[1] = clamp255(g);
    dst_argb[2] = clamp255(r);
1909 1910 1911 1912 1913 1914
    dst_argb[3] = a;
    src_argb += 4;
    dst_argb += 4;
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
1915
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1916
                               const int32* previous_cumsum, int width) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1917
  int32 row_sum[4] = {0, 0, 0, 0};
1918 1919
  int x;
  for (x = 0; x < width; ++x) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930
    row_sum[0] += row[x * 4 + 0];
    row_sum[1] += row[x * 4 + 1];
    row_sum[2] += row[x * 4 + 2];
    row_sum[3] += row[x * 4 + 3];
    cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
    cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
    cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
    cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
  }
}

1931 1932
void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
                                int w, int area, uint8* dst, int count) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1933
  float ooa = 1.0f / area;
1934 1935
  int i;
  for (i = 0; i < count; ++i) {
1936 1937 1938 1939
    dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
    dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
    dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
    dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
fbarchard@google.com's avatar
fbarchard@google.com committed
1940 1941 1942 1943 1944 1945
    dst += 4;
    tl += 4;
    bl += 4;
  }
}

1946
// Copy pixels from rotated source to destination row with a slope.
1947
LIBYUV_API
1948 1949
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
                     uint8* dst_argb, const float* uv_dudv, int width) {
1950
  int i;
1951 1952 1953 1954
  // Render a row of pixels from source into a buffer.
  float uv[2];
  uv[0] = uv_dudv[0];
  uv[1] = uv_dudv[1];
1955
  for (i = 0; i < width; ++i) {
1956 1957 1958 1959
    int x = (int)(uv[0]);
    int y = (int)(uv[1]);
    *(uint32*)(dst_argb) =
        *(const uint32*)(src_argb + y * src_argb_stride +
1960 1961 1962 1963 1964 1965 1966
                                         x * 4);
    dst_argb += 4;
    uv[0] += uv_dudv[2];
    uv[1] += uv_dudv[3];
  }
}

1967 1968 1969
// Blend 2 rows into 1.
static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
                      uint8* dst_uv, int pix) {
1970 1971
  int x;
  for (x = 0; x < pix; ++x) {
1972 1973 1974 1975
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
  }
}

1976 1977
static void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
                         uint16* dst_uv, int pix) {
1978 1979 1980 1981 1982 1983
  int x;
  for (x = 0; x < pix; ++x) {
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
  }
}

1984
// C version 2x2 -> 2x1.
1985 1986 1987
void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
                      ptrdiff_t src_stride,
                      int width, int source_y_fraction) {
1988 1989 1990 1991
  int y1_fraction = source_y_fraction;
  int y0_fraction = 256 - y1_fraction;
  const uint8* src_ptr1 = src_ptr + src_stride;
  int x;
1992 1993 1994 1995
  if (source_y_fraction == 0) {
    memcpy(dst_ptr, src_ptr, width);
    return;
  }
1996
  if (source_y_fraction == 128) {
1997
    HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1998 1999
    return;
  }
2000
  for (x = 0; x < width - 1; x += 2) {
2001 2002
    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2003 2004 2005
    src_ptr += 2;
    src_ptr1 += 2;
    dst_ptr += 2;
2006 2007 2008
  }
  if (width & 1) {
    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035
  }
}

void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
                         ptrdiff_t src_stride,
                         int width, int source_y_fraction) {
  int y1_fraction = source_y_fraction;
  int y0_fraction = 256 - y1_fraction;
  const uint16* src_ptr1 = src_ptr + src_stride;
  int x;
  if (source_y_fraction == 0) {
    memcpy(dst_ptr, src_ptr, width * 2);
    return;
  }
  if (source_y_fraction == 128) {
    HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
    return;
  }
  for (x = 0; x < width - 1; x += 2) {
    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
    src_ptr += 2;
    src_ptr1 += 2;
    dst_ptr += 2;
  }
  if (width & 1) {
    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2036
  }
2037 2038
}

2039 2040
// Select G channel from ARGB.  e.g.  GGGGGGGG
void ARGBToBayerGGRow_C(const uint8* src_argb,
2041
                        uint8* dst_bayer, uint32 selector, int pix) {
fbarchard@google.com's avatar
fbarchard@google.com committed
2042
  // Copy a row of G.
2043 2044
  int x;
  for (x = 0; x < pix - 1; x += 2) {
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054
    dst_bayer[0] = src_argb[1];
    dst_bayer[1] = src_argb[5];
    src_argb += 8;
    dst_bayer += 2;
  }
  if (pix & 1) {
    dst_bayer[0] = src_argb[1];
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
2055 2056 2057 2058 2059 2060 2061 2062
// Use first 4 shuffler values to reorder ARGB channels.
void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
                      const uint8* shuffler, int pix) {
  int index0 = shuffler[0];
  int index1 = shuffler[1];
  int index2 = shuffler[2];
  int index3 = shuffler[3];
  // Shuffle a row of ARGB.
2063 2064
  int x;
  for (x = 0; x < pix; ++x) {
fbarchard@google.com's avatar
fbarchard@google.com committed
2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078
    // To support in-place conversion.
    uint8 b = src_argb[index0];
    uint8 g = src_argb[index1];
    uint8 r = src_argb[index2];
    uint8 a = src_argb[index3];
    dst_argb[0] = b;
    dst_argb[1] = g;
    dst_argb[2] = r;
    dst_argb[3] = a;
    src_argb += 4;
    dst_argb += 4;
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
2079 2080 2081 2082
void I422ToYUY2Row_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* dst_frame, int width) {
2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099
  int x;
  for (x = 0; x < width - 1; x += 2) {
    dst_frame[0] = src_y[0];
    dst_frame[1] = src_u[0];
    dst_frame[2] = src_y[1];
    dst_frame[3] = src_v[0];
    dst_frame += 4;
    src_y += 2;
    src_u += 1;
    src_v += 1;
  }
  if (width & 1) {
    dst_frame[0] = src_y[0];
    dst_frame[1] = src_u[0];
    dst_frame[2] = src_y[0];  // duplicate last y
    dst_frame[3] = src_v[0];
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
2100 2101 2102 2103 2104 2105
}

void I422ToUYVYRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* dst_frame, int width) {
2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122
  int x;
  for (x = 0; x < width - 1; x += 2) {
    dst_frame[0] = src_u[0];
    dst_frame[1] = src_y[0];
    dst_frame[2] = src_v[0];
    dst_frame[3] = src_y[1];
    dst_frame += 4;
    src_y += 2;
    src_u += 1;
    src_v += 1;
  }
  if (width & 1) {
    dst_frame[0] = src_u[0];
    dst_frame[1] = src_y[0];
    dst_frame[2] = src_v[0];
    dst_frame[3] = src_y[0];  // duplicate last y
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
2123
}
2124

2125
// Maximum temporary width for wrappers to process at a time, in pixels.
2126
#define MAXTWIDTH 4096
2127

2128
#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2129
// row_win.cc has asm version, but GCC uses 2 step wrapper.
2130
#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2131 2132 2133
void I422ToRGB565Row_SSSE3(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
2134
                           uint8* dst_rgb565,
2135
                           int width) {
2136 2137 2138 2139
  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
2140
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2141 2142 2143
    src_y += twidth;
    src_u += twidth / 2;
    src_v += twidth / 2;
2144
    dst_rgb565 += twidth * 2;
2145 2146
    width -= twidth;
  }
2147
}
2148
#endif  // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__))
2149

2150
#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2151 2152 2153
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
2154
                             uint8* dst_argb1555,
2155
                             int width) {
2156 2157 2158 2159 2160
  // Row buffer for intermediate ARGB pixels.
  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
2161
    ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2162 2163 2164
    src_y += twidth;
    src_u += twidth / 2;
    src_v += twidth / 2;
2165
    dst_argb1555 += twidth * 2;
2166 2167
    width -= twidth;
  }
2168 2169
}

2170 2171 2172
void I422ToARGB4444Row_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
2173
                             uint8* dst_argb4444,
2174
                             int width) {
2175 2176 2177 2178 2179
  // Row buffer for intermediate ARGB pixels.
  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
2180
    ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2181 2182 2183
    src_y += twidth;
    src_u += twidth / 2;
    src_v += twidth / 2;
2184
    dst_argb4444 += twidth * 2;
2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248
    width -= twidth;
  }
}

void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
                           uint8* dst_rgb565, int width) {
  // Row buffer for intermediate ARGB pixels.
  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth);
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
    src_y += twidth;
    src_uv += twidth;
    dst_rgb565 += twidth * 2;
    width -= twidth;
  }
}

void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
                           uint8* dst_rgb565, int width) {
  // Row buffer for intermediate ARGB pixels.
  SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth);
    ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
    src_y += twidth;
    src_vu += twidth;
    dst_rgb565 += twidth * 2;
    width -= twidth;
  }
}

void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
  // Row buffers for intermediate YUV pixels.
  SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
  SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
  SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
    YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
    I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
    src_yuy2 += twidth * 2;
    dst_argb += twidth * 4;
    width -= twidth;
  }
}

void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
  // Row buffers for intermediate YUV pixels.
  SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
  SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
  SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
  while (width > 0) {
    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
    UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
    UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
    I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
    src_uyvy += twidth * 2;
    dst_argb += twidth * 4;
    width -= twidth;
  }
2249
}
2250
#endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2251
#endif  // !defined(LIBYUV_DISABLE_X86)
2252 2253 2254 2255

void ARGBPolynomialRow_C(const uint8* src_argb,
                         uint8* dst_argb, const float* poly,
                         int width) {
2256 2257
  int i;
  for (i = 0; i < width; ++i) {
2258 2259 2260 2261
    float b = (float)(src_argb[0]);
    float g = (float)(src_argb[1]);
    float r = (float)(src_argb[2]);
    float a = (float)(src_argb[3]);
2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273
    float b2 = b * b;
    float g2 = g * g;
    float r2 = r * r;
    float a2 = a * a;
    float db = poly[0] + poly[4] * b;
    float dg = poly[1] + poly[5] * g;
    float dr = poly[2] + poly[6] * r;
    float da = poly[3] + poly[7] * a;
    float b3 = b2 * b;
    float g3 = g2 * g;
    float r3 = r2 * r;
    float a3 = a2 * a;
2274 2275 2276 2277
    db += poly[8] * b2;
    dg += poly[9] * g2;
    dr += poly[10] * r2;
    da += poly[11] * a2;
2278 2279 2280 2281 2282
    db += poly[12] * b3;
    dg += poly[13] * g3;
    dr += poly[14] * r3;
    da += poly[15] * a3;

2283 2284 2285 2286
    dst_argb[0] = Clamp((int32)(db));
    dst_argb[1] = Clamp((int32)(dg));
    dst_argb[2] = Clamp((int32)(dr));
    dst_argb[3] = Clamp((int32)(da));
2287 2288 2289 2290 2291
    src_argb += 4;
    dst_argb += 4;
  }
}

2292
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2293
                             const uint8* luma, uint32 lumacoeff) {
2294 2295 2296 2297
  uint32 bc = lumacoeff & 0xff;
  uint32 gc = (lumacoeff >> 8) & 0xff;
  uint32 rc = (lumacoeff >> 16) & 0xff;

2298 2299
  int i;
  for (i = 0; i < width - 1; i += 2) {
2300
    // Luminance in rows, color values in columns.
2301 2302
    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
                           src_argb[2] * rc) & 0x7F00u) + luma;
2303
    const uint8* luma1;
2304 2305 2306 2307
    dst_argb[0] = luma0[src_argb[0]];
    dst_argb[1] = luma0[src_argb[1]];
    dst_argb[2] = luma0[src_argb[2]];
    dst_argb[3] = src_argb[3];
2308 2309
    luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
              src_argb[6] * rc) & 0x7F00u) + luma;
2310 2311 2312
    dst_argb[4] = luma1[src_argb[4]];
    dst_argb[5] = luma1[src_argb[5]];
    dst_argb[6] = luma1[src_argb[6]];
2313 2314 2315 2316
    dst_argb[7] = src_argb[7];
    src_argb += 8;
    dst_argb += 8;
  }
2317 2318
  if (width & 1) {
    // Luminance in rows, color values in columns.
2319 2320
    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
                           src_argb[2] * rc) & 0x7F00u) + luma;
2321 2322 2323 2324 2325
    dst_argb[0] = luma0[src_argb[0]];
    dst_argb[1] = luma0[src_argb[1]];
    dst_argb[2] = luma0[src_argb[2]];
    dst_argb[3] = src_argb[3];
  }
2326 2327
}

2328
void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2329 2330
  int i;
  for (i = 0; i < width - 1; i += 2) {
2331 2332 2333 2334 2335 2336 2337 2338 2339 2340
    dst[3] = src[3];
    dst[7] = src[7];
    dst += 8;
    src += 8;
  }
  if (width & 1) {
    dst[3] = src[3];
  }
}

2341
void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2342 2343
  int i;
  for (i = 0; i < width - 1; i += 2) {
2344 2345 2346 2347 2348 2349 2350 2351 2352
    dst[3] = src[0];
    dst[7] = src[1];
    dst += 8;
    src += 2;
  }
  if (width & 1) {
    dst[3] = src[0];
  }
}
2353

2354
#ifdef __cplusplus
2355
}  // extern "C"
2356 2357
}  // namespace libyuv
#endif