row_common.cc 68.4 KB
Newer Older
1
/*
2
 *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 4 5 6
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8 9 10
 *  be found in the AUTHORS file in the root of the source tree.
 */

11
#include "libyuv/row.h"
12

13
#include <string.h>  // For memcpy and memset.
frkoenig@google.com's avatar
frkoenig@google.com committed
14

15 16
#include "libyuv/basic_types.h"

17 18
#ifdef __cplusplus
namespace libyuv {
19
extern "C" {
20
#endif
21

22 23 24 25 26 27 28 29 30 31 32 33 34 35
// llvm x86 is poor at ternary operator, so use branchless min/max.

#define USE_BRANCHLESS 1
#if USE_BRANCHLESS
static __inline int32 clamp0(int32 v) {
  return ((-(v) >> 31) & (v));
}

static __inline int32 clamp255(int32 v) {
  return (((255 - (v)) >> 31) | (v)) & 255;
}

static __inline uint32 Clamp(int32 val) {
  int v = clamp0(val);
36
  return (uint32)(clamp255(v));
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
}

static __inline uint32 Abs(int32 v) {
  int m = v >> 31;
  return (v + m) ^ m;
}
#else  // USE_BRANCHLESS
static __inline int32 clamp0(int32 v) {
  return (v < 0) ? 0 : v;
}

static __inline int32 clamp255(int32 v) {
  return (v > 255) ? 255 : v;
}

static __inline uint32 Clamp(int32 val) {
  int v = clamp0(val);
54
  return (uint32)(clamp255(v));
55 56 57 58 59 60 61
}

static __inline uint32 Abs(int32 v) {
  return (v < 0) ? -v : v;
}
#endif  // USE_BRANCHLESS

62
#ifdef LIBYUV_LITTLE_ENDIAN
63
#define WRITEWORD(p, v) *(uint32*)(p) = v
64 65 66 67 68 69 70 71 72
#else
static inline void WRITEWORD(uint8* p, uint32 v) {
  p[0] = (uint8)(v & 255);
  p[1] = (uint8)((v >> 8) & 255);
  p[2] = (uint8)((v >> 16) & 255);
  p[3] = (uint8)((v >> 24) & 255);
}
#endif

73
void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74 75
  int x;
  for (x = 0; x < width; ++x) {
76 77 78
    uint8 b = src_rgb24[0];
    uint8 g = src_rgb24[1];
    uint8 r = src_rgb24[2];
79 80 81 82 83
    dst_argb[0] = b;
    dst_argb[1] = g;
    dst_argb[2] = r;
    dst_argb[3] = 255u;
    dst_argb += 4;
84
    src_rgb24 += 3;
85 86 87
  }
}

88
void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89 90
  int x;
  for (x = 0; x < width; ++x) {
91 92 93
    uint8 r = src_raw[0];
    uint8 g = src_raw[1];
    uint8 b = src_raw[2];
94 95 96 97 98
    dst_argb[0] = b;
    dst_argb[1] = g;
    dst_argb[2] = r;
    dst_argb[3] = 255u;
    dst_argb += 4;
99
    src_raw += 3;
100 101 102
  }
}

103
void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
104 105
  int x;
  for (x = 0; x < width; ++x) {
106 107 108
    uint8 b = src_rgb565[0] & 0x1f;
    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r = src_rgb565[1] >> 3;
109 110 111 112 113
    dst_argb[0] = (b << 3) | (b >> 2);
    dst_argb[1] = (g << 2) | (g >> 4);
    dst_argb[2] = (r << 3) | (r >> 2);
    dst_argb[3] = 255u;
    dst_argb += 4;
114
    src_rgb565 += 2;
115 116 117
  }
}

118 119
void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
                         int width) {
120 121
  int x;
  for (x = 0; x < width; ++x) {
122 123 124 125
    uint8 b = src_argb1555[0] & 0x1f;
    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
    uint8 a = src_argb1555[1] >> 7;
126 127 128
    dst_argb[0] = (b << 3) | (b >> 2);
    dst_argb[1] = (g << 3) | (g >> 2);
    dst_argb[2] = (r << 3) | (r >> 2);
129
    dst_argb[3] = -a;
130
    dst_argb += 4;
131
    src_argb1555 += 2;
132 133 134
  }
}

135 136
void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
                         int width) {
137 138
  int x;
  for (x = 0; x < width; ++x) {
139 140 141 142
    uint8 b = src_argb4444[0] & 0x0f;
    uint8 g = src_argb4444[0] >> 4;
    uint8 r = src_argb4444[1] & 0x0f;
    uint8 a = src_argb4444[1] >> 4;
143 144 145 146 147
    dst_argb[0] = (b << 4) | b;
    dst_argb[1] = (g << 4) | g;
    dst_argb[2] = (r << 4) | r;
    dst_argb[3] = (a << 4) | a;
    dst_argb += 4;
148
    src_argb4444 += 2;
149 150 151
  }
}

152
void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
153 154
  int x;
  for (x = 0; x < width; ++x) {
155 156 157 158 159 160 161 162 163 164 165
    uint8 b = src_argb[0];
    uint8 g = src_argb[1];
    uint8 r = src_argb[2];
    dst_rgb[0] = b;
    dst_rgb[1] = g;
    dst_rgb[2] = r;
    dst_rgb += 3;
    src_argb += 4;
  }
}

166
void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
167 168
  int x;
  for (x = 0; x < width; ++x) {
169 170 171 172 173 174 175 176 177 178 179
    uint8 b = src_argb[0];
    uint8 g = src_argb[1];
    uint8 r = src_argb[2];
    dst_rgb[0] = r;
    dst_rgb[1] = g;
    dst_rgb[2] = b;
    dst_rgb += 3;
    src_argb += 4;
  }
}

180
void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
181 182
  int x;
  for (x = 0; x < width - 1; x += 2) {
183 184 185 186 187 188
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 2;
    uint8 r0 = src_argb[2] >> 3;
    uint8 b1 = src_argb[4] >> 3;
    uint8 g1 = src_argb[5] >> 2;
    uint8 r1 = src_argb[6] >> 3;
189
    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190
              (b1 << 16) | (g1 << 21) | (r1 << 27));
191 192 193 194 195 196 197
    dst_rgb += 4;
    src_argb += 8;
  }
  if (width & 1) {
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 2;
    uint8 r0 = src_argb[2] >> 3;
198
    *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
199 200 201
  }
}

202
void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
203 204
  int x;
  for (x = 0; x < width - 1; x += 2) {
205 206 207 208 209 210 211 212
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 3;
    uint8 r0 = src_argb[2] >> 3;
    uint8 a0 = src_argb[3] >> 7;
    uint8 b1 = src_argb[4] >> 3;
    uint8 g1 = src_argb[5] >> 3;
    uint8 r1 = src_argb[6] >> 3;
    uint8 a1 = src_argb[7] >> 7;
213
    *(uint32*)(dst_rgb) =
214 215 216 217 218 219 220 221 222 223
        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
        (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
    dst_rgb += 4;
    src_argb += 8;
  }
  if (width & 1) {
    uint8 b0 = src_argb[0] >> 3;
    uint8 g0 = src_argb[1] >> 3;
    uint8 r0 = src_argb[2] >> 3;
    uint8 a0 = src_argb[3] >> 7;
224
    *(uint16*)(dst_rgb) =
225
        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
226 227 228
  }
}

229
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
230 231
  int x;
  for (x = 0; x < width - 1; x += 2) {
232 233 234 235 236 237 238 239
    uint8 b0 = src_argb[0] >> 4;
    uint8 g0 = src_argb[1] >> 4;
    uint8 r0 = src_argb[2] >> 4;
    uint8 a0 = src_argb[3] >> 4;
    uint8 b1 = src_argb[4] >> 4;
    uint8 g1 = src_argb[5] >> 4;
    uint8 r1 = src_argb[6] >> 4;
    uint8 a1 = src_argb[7] >> 4;
240
    *(uint32*)(dst_rgb) =
241 242 243 244 245 246 247 248 249 250
        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
    dst_rgb += 4;
    src_argb += 8;
  }
  if (width & 1) {
    uint8 b0 = src_argb[0] >> 4;
    uint8 g0 = src_argb[1] >> 4;
    uint8 r0 = src_argb[2] >> 4;
    uint8 a0 = src_argb[3] >> 4;
251
    *(uint16*)(dst_rgb) =
252
        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
253 254 255
  }
}

256
static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257
  return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
258 259
}

260
static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261
  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
262
}
263
static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264
  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
265 266
}

267
#define MAKEROWY(NAME, R, G, B, BPP) \
268
void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
269 270
  int x;                                                                       \
  for (x = 0; x < width; ++x) {                                                \
271
    dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
272
    src_argb0 += BPP;                                                          \
273 274 275 276 277 278
    dst_y += 1;                                                                \
  }                                                                            \
}                                                                              \
void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
                       uint8* dst_u, uint8* dst_v, int width) {                \
  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
279 280
  int x;                                                                       \
  for (x = 0; x < width - 1; x += 2) {                                         \
281 282 283 284 285 286
    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
287 288
    dst_u[0] = RGBToU(ar, ag, ab);                                             \
    dst_v[0] = RGBToV(ar, ag, ab);                                             \
289 290
    src_rgb0 += BPP * 2;                                                       \
    src_rgb1 += BPP * 2;                                                       \
291 292 293 294 295 296 297 298 299 300 301 302
    dst_u += 1;                                                                \
    dst_v += 1;                                                                \
  }                                                                            \
  if (width & 1) {                                                             \
    uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
    uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
    uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
    dst_u[0] = RGBToU(ar, ag, ab);                                             \
    dst_v[0] = RGBToV(ar, ag, ab);                                             \
  }                                                                            \
}

303 304 305 306 307 308 309 310
MAKEROWY(ARGB, 2, 1, 0, 4)
MAKEROWY(BGRA, 1, 2, 3, 4)
MAKEROWY(ABGR, 0, 1, 2, 4)
MAKEROWY(RGBA, 3, 2, 1, 4)
MAKEROWY(RGB24, 2, 1, 0, 3)
MAKEROWY(RAW, 0, 1, 2, 3)
#undef MAKEROWY

311 312 313 314 315
// JPeg uses a variation on BT.601-1 full range
// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
// BT.601 Mpeg range uses:
316 317 318
// b 0.1016 * 255 = 25.908 = 25
// g 0.5078 * 255 = 129.489 = 129
// r 0.2578 * 255 = 65.739 = 66
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
// JPeg 8 bit Y (not used):
// b 0.11400 * 256 = 29.184 = 29
// g 0.58700 * 256 = 150.272 = 150
// r 0.29900 * 256 = 76.544 = 77
// JPeg 7 bit Y:
// b 0.11400 * 128 = 14.592 = 15
// g 0.58700 * 128 = 75.136 = 75
// r 0.29900 * 128 = 38.272 = 38
// JPeg 8 bit U:
// b  0.50000 * 255 = 127.5 = 127
// g -0.33126 * 255 = -84.4713 = -84
// r -0.16874 * 255 = -43.0287 = -43
// JPeg 8 bit V:
// b -0.08131 * 255 = -20.73405 = -20
// g -0.41869 * 255 = -106.76595 = -107
// r  0.50000 * 255 = 127.5 = 127
335

336
static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337
  return (38 * r + 75 * g +  15 * b + 64) >> 7;
338 339
}

340 341 342 343 344 345 346 347 348
static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
  return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
}
static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
  return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
}

#define AVGB(a, b) (((a) + (b) + 1) >> 1)

349 350
#define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
351 352
  int x;                                                                       \
  for (x = 0; x < width; ++x) {                                                \
353 354 355 356 357
    dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
    src_argb0 += BPP;                                                          \
    dst_y += 1;                                                                \
  }                                                                            \
}                                                                              \
358 359 360
void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
                        uint8* dst_u, uint8* dst_v, int width) {               \
  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
361 362
  int x;                                                                       \
  for (x = 0; x < width - 1; x += 2) {                                         \
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
    uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
                    AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
    uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
                    AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
    uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
                    AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
    src_rgb0 += BPP * 2;                                                       \
    src_rgb1 += BPP * 2;                                                       \
    dst_u += 1;                                                                \
    dst_v += 1;                                                                \
  }                                                                            \
  if (width & 1) {                                                             \
    uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
    uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
    uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
  }                                                                            \
}
384 385 386 387

MAKEROWYJ(ARGB, 2, 1, 0, 4)
#undef MAKEROWYJ

388
void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
389 390
  int x;
  for (x = 0; x < width; ++x) {
391 392 393 394 395 396 397 398 399 400 401 402
    uint8 b = src_rgb565[0] & 0x1f;
    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r = src_rgb565[1] >> 3;
    b = (b << 3) | (b >> 2);
    g = (g << 2) | (g >> 4);
    r = (r << 3) | (r >> 2);
    dst_y[0] = RGBToY(r, g, b);
    src_rgb565 += 2;
    dst_y += 1;
  }
}

403
void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
404 405
  int x;
  for (x = 0; x < width; ++x) {
406 407 408 409 410 411 412 413 414 415 416 417 418
    uint8 b = src_argb1555[0] & 0x1f;
    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
    b = (b << 3) | (b >> 2);
    g = (g << 3) | (g >> 2);
    r = (r << 3) | (r >> 2);
    dst_y[0] = RGBToY(r, g, b);
    src_argb1555 += 2;
    dst_y += 1;
  }
}

void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
419 420
  int x;
  for (x = 0; x < width; ++x) {
421 422 423 424 425 426 427 428 429 430 431 432
    uint8 b = src_argb4444[0] & 0x0f;
    uint8 g = src_argb4444[0] >> 4;
    uint8 r = src_argb4444[1] & 0x0f;
    b = (b << 4) | b;
    g = (g << 4) | g;
    r = (r << 4) | r;
    dst_y[0] = RGBToY(r, g, b);
    src_argb4444 += 2;
    dst_y += 1;
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
433
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
434
                     uint8* dst_u, uint8* dst_v, int width) {
fbarchard@google.com's avatar
fbarchard@google.com committed
435
  const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
436 437
  int x;
  for (x = 0; x < width - 1; x += 2) {
fbarchard@google.com's avatar
fbarchard@google.com committed
438 439 440 441 442 443 444 445 446 447 448 449
    uint8 b0 = src_rgb565[0] & 0x1f;
    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r0 = src_rgb565[1] >> 3;
    uint8 b1 = src_rgb565[2] & 0x1f;
    uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
    uint8 r1 = src_rgb565[3] >> 3;
    uint8 b2 = next_rgb565[0] & 0x1f;
    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
    uint8 r2 = next_rgb565[1] >> 3;
    uint8 b3 = next_rgb565[2] & 0x1f;
    uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
    uint8 r3 = next_rgb565[3] >> 3;
450 451 452 453 454 455 456
    uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
    uint8 g = (g0 + g1 + g2 + g3);
    uint8 r = (r0 + r1 + r2 + r3);
    b = (b << 1) | (b >> 6);  // 787 -> 888.
    r = (r << 1) | (r >> 6);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
fbarchard@google.com's avatar
fbarchard@google.com committed
457 458 459 460 461 462 463 464 465 466 467 468
    src_rgb565 += 4;
    next_rgb565 += 4;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 b0 = src_rgb565[0] & 0x1f;
    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    uint8 r0 = src_rgb565[1] >> 3;
    uint8 b2 = next_rgb565[0] & 0x1f;
    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
    uint8 r2 = next_rgb565[1] >> 3;
469 470 471 472 473 474 475 476 477 478 479 480 481 482
    uint8 b = (b0 + b2);  // 565 * 2 = 676.
    uint8 g = (g0 + g2);
    uint8 r = (r0 + r2);
    b = (b << 2) | (b >> 4);  // 676 -> 888
    g = (g << 1) | (g >> 6);
    r = (r << 2) | (r >> 4);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
  }
}

void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
                       uint8* dst_u, uint8* dst_v, int width) {
  const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
483 484
  int x;
  for (x = 0; x < width - 1; x += 2) {
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
    uint8 b0 = src_argb1555[0] & 0x1f;
    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
    uint8 b1 = src_argb1555[2] & 0x1f;
    uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
    uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
    uint8 b2 = next_argb1555[0] & 0x1f;
    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
    uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
    uint8 b3 = next_argb1555[2] & 0x1f;
    uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
    uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
    uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
    uint8 g = (g0 + g1 + g2 + g3);
    uint8 r = (r0 + r1 + r2 + r3);
    b = (b << 1) | (b >> 6);  // 777 -> 888.
    g = (g << 1) | (g >> 6);
    r = (r << 1) | (r >> 6);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
    src_argb1555 += 4;
    next_argb1555 += 4;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 b0 = src_argb1555[0] & 0x1f;
    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
    uint8 b2 = next_argb1555[0] & 0x1f;
    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
    uint8 r2 = next_argb1555[1] >> 3;
    uint8 b = (b0 + b2);  // 555 * 2 = 666.
    uint8 g = (g0 + g2);
    uint8 r = (r0 + r2);
    b = (b << 2) | (b >> 4);  // 666 -> 888.
    g = (g << 2) | (g >> 4);
    r = (r << 2) | (r >> 4);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
  }
}

void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
                       uint8* dst_u, uint8* dst_v, int width) {
  const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
531 532
  int x;
  for (x = 0; x < width - 1; x += 2) {
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
    uint8 b0 = src_argb4444[0] & 0x0f;
    uint8 g0 = src_argb4444[0] >> 4;
    uint8 r0 = src_argb4444[1] & 0x0f;
    uint8 b1 = src_argb4444[2] & 0x0f;
    uint8 g1 = src_argb4444[2] >> 4;
    uint8 r1 = src_argb4444[3] & 0x0f;
    uint8 b2 = next_argb4444[0] & 0x0f;
    uint8 g2 = next_argb4444[0] >> 4;
    uint8 r2 = next_argb4444[1] & 0x0f;
    uint8 b3 = next_argb4444[2] & 0x0f;
    uint8 g3 = next_argb4444[2] >> 4;
    uint8 r3 = next_argb4444[3] & 0x0f;
    uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
    uint8 g = (g0 + g1 + g2 + g3);
    uint8 r = (r0 + r1 + r2 + r3);
    b = (b << 2) | (b >> 4);  // 666 -> 888.
    g = (g << 2) | (g >> 4);
    r = (r << 2) | (r >> 4);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
    src_argb4444 += 4;
    next_argb4444 += 4;
    dst_u += 1;
    dst_v += 1;
  }
  if (width & 1) {
    uint8 b0 = src_argb4444[0] & 0x0f;
    uint8 g0 = src_argb4444[0] >> 4;
    uint8 r0 = src_argb4444[1] & 0x0f;
    uint8 b2 = next_argb4444[0] & 0x0f;
    uint8 g2 = next_argb4444[0] >> 4;
    uint8 r2 = next_argb4444[1] & 0x0f;
    uint8 b = (b0 + b2);  // 444 * 2 = 555.
    uint8 g = (g0 + g2);
    uint8 r = (r0 + r2);
    b = (b << 3) | (b >> 2);  // 555 -> 888.
    g = (g << 3) | (g >> 2);
    r = (r << 3) | (r >> 2);
    dst_u[0] = RGBToU(r, g, b);
    dst_v[0] = RGBToV(r, g, b);
fbarchard@google.com's avatar
fbarchard@google.com committed
573 574 575
  }
}

576 577
void ARGBToUV444Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width) {
578 579
  int x;
  for (x = 0; x < width; ++x) {
580 581 582 583 584 585 586 587 588 589 590 591 592
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
    src_argb += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

void ARGBToUV422Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width) {
593 594
  int x;
  for (x = 0; x < width - 1; x += 2) {
595 596 597 598 599 600 601 602 603
    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
    src_argb += 8;
    dst_u += 1;
    dst_v += 1;
  }
604
  if (width & 1) {
605 606 607 608 609 610 611 612 613 614
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  }
}

void ARGBToUV411Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width) {
615 616
  int x;
  for (x = 0; x < width - 3; x += 4) {
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
    src_argb += 16;
    dst_u += 1;
    dst_v += 1;
  }
  if ((width & 3) == 3) {
    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  } else if ((width & 3) == 2) {
    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  } else if ((width & 3) == 1) {
    uint8 ab = src_argb[0];
    uint8 ag = src_argb[1];
    uint8 ar = src_argb[2];
    dst_u[0] = RGBToU(ar, ag, ab);
    dst_v[0] = RGBToV(ar, ag, ab);
  }
}
646

647
void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
648 649
  int x;
  for (x = 0; x < width; ++x) {
650
    uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
651
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
652
    dst_argb[3] = src_argb[3];
653
    dst_argb += 4;
654
    src_argb += 4;
655 656 657
  }
}

658 659
// Convert a row of image to Sepia tone.
void ARGBSepiaRow_C(uint8* dst_argb, int width) {
660 661
  int x;
  for (x = 0; x < width; ++x) {
662 663 664
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
665 666 667
    int sb = (b * 17 + g * 68 + r * 35) >> 7;
    int sg = (b * 22 + g * 88 + r * 45) >> 7;
    int sr = (b * 24 + g * 98 + r * 50) >> 7;
668
    // b does not over flow. a is preserved from original.
669
    dst_argb[0] = sb;
670 671
    dst_argb[1] = clamp255(sg);
    dst_argb[2] = clamp255(sr);
672 673 674 675
    dst_argb += 4;
  }
}

676
// Apply color matrix to a row of image. Matrix is signed.
677 678 679
// TODO(fbarchard): Consider adding rounding (+32).
void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
                          const int8* matrix_argb, int width) {
680 681
  int x;
  for (x = 0; x < width; ++x) {
682 683 684 685
    int b = src_argb[0];
    int g = src_argb[1];
    int r = src_argb[2];
    int a = src_argb[3];
686
    int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
687
              r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
688
    int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
689
              r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
690
    int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
691 692 693
              r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
    int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
              r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
694 695 696
    dst_argb[0] = Clamp(sb);
    dst_argb[1] = Clamp(sg);
    dst_argb[2] = Clamp(sr);
697 698
    dst_argb[3] = Clamp(sa);
    src_argb += 4;
699 700 701 702
    dst_argb += 4;
  }
}

703 704
// Apply color table to a row of image.
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
705 706
  int x;
  for (x = 0; x < width; ++x) {
707 708 709 710 711 712 713 714 715 716 717 718
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
    int a = dst_argb[3];
    dst_argb[0] = table_argb[b * 4 + 0];
    dst_argb[1] = table_argb[g * 4 + 1];
    dst_argb[2] = table_argb[r * 4 + 2];
    dst_argb[3] = table_argb[a * 4 + 3];
    dst_argb += 4;
  }
}

719 720
// Apply color table to a row of image.
void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
721 722
  int x;
  for (x = 0; x < width; ++x) {
723 724 725 726 727 728 729 730 731 732
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
    dst_argb[0] = table_argb[b * 4 + 0];
    dst_argb[1] = table_argb[g * 4 + 1];
    dst_argb[2] = table_argb[r * 4 + 2];
    dst_argb += 4;
  }
}

733 734
void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
                       int interval_offset, int width) {
735 736
  int x;
  for (x = 0; x < width; ++x) {
737 738 739 740 741 742 743 744 745 746
    int b = dst_argb[0];
    int g = dst_argb[1];
    int r = dst_argb[2];
    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
    dst_argb += 4;
  }
}

747 748 749 750 751 752 753 754 755 756
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v * f >> 24

void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
                    uint32 value) {
  const uint32 b_scale = REPEAT8(value & 0xff);
  const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
  const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
  const uint32 a_scale = REPEAT8(value >> 24);

757 758
  int i;
  for (i = 0; i < width; ++i) {
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
    const uint32 b = REPEAT8(src_argb[0]);
    const uint32 g = REPEAT8(src_argb[1]);
    const uint32 r = REPEAT8(src_argb[2]);
    const uint32 a = REPEAT8(src_argb[3]);
    dst_argb[0] = SHADE(b, b_scale);
    dst_argb[1] = SHADE(g, g_scale);
    dst_argb[2] = SHADE(r, r_scale);
    dst_argb[3] = SHADE(a, a_scale);
    src_argb += 4;
    dst_argb += 4;
  }
}
#undef REPEAT8
#undef SHADE

774 775 776
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v * f >> 16

777 778
void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
                       uint8* dst_argb, int width) {
779 780
  int i;
  for (i = 0; i < width; ++i) {
781 782 783 784 785 786 787 788
    const uint32 b = REPEAT8(src_argb0[0]);
    const uint32 g = REPEAT8(src_argb0[1]);
    const uint32 r = REPEAT8(src_argb0[2]);
    const uint32 a = REPEAT8(src_argb0[3]);
    const uint32 b_scale = src_argb1[0];
    const uint32 g_scale = src_argb1[1];
    const uint32 r_scale = src_argb1[2];
    const uint32 a_scale = src_argb1[3];
789 790 791 792
    dst_argb[0] = SHADE(b, b_scale);
    dst_argb[1] = SHADE(g, g_scale);
    dst_argb[2] = SHADE(r, r_scale);
    dst_argb[3] = SHADE(a, a_scale);
793 794
    src_argb0 += 4;
    src_argb1 += 4;
795 796 797 798 799 800
    dst_argb += 4;
  }
}
#undef REPEAT8
#undef SHADE

801
#define SHADE(f, v) clamp255(v + f)
802 803 804

void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
                  uint8* dst_argb, int width) {
805 806
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
807 808 809 810 811 812 813 814
    const int b = src_argb0[0];
    const int g = src_argb0[1];
    const int r = src_argb0[2];
    const int a = src_argb0[3];
    const int b_add = src_argb1[0];
    const int g_add = src_argb1[1];
    const int r_add = src_argb1[2];
    const int a_add = src_argb1[3];
815 816 817 818 819 820 821 822 823 824 825
    dst_argb[0] = SHADE(b, b_add);
    dst_argb[1] = SHADE(g, g_add);
    dst_argb[2] = SHADE(r, r_add);
    dst_argb[3] = SHADE(a, a_add);
    src_argb0 += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
}
#undef SHADE

826
#define SHADE(f, v) clamp0(f - v)
827 828 829

void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
                       uint8* dst_argb, int width) {
830 831
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
832 833 834 835 836 837 838 839
    const int b = src_argb0[0];
    const int g = src_argb0[1];
    const int r = src_argb0[2];
    const int a = src_argb0[3];
    const int b_sub = src_argb1[0];
    const int g_sub = src_argb1[1];
    const int r_sub = src_argb1[2];
    const int a_sub = src_argb1[3];
840 841 842 843 844 845 846 847 848 849 850
    dst_argb[0] = SHADE(b, b_sub);
    dst_argb[1] = SHADE(g, g_sub);
    dst_argb[2] = SHADE(r, r_sub);
    dst_argb[3] = SHADE(a, a_sub);
    src_argb0 += 4;
    src_argb1 += 4;
    dst_argb += 4;
  }
}
#undef SHADE

fbarchard@google.com's avatar
fbarchard@google.com committed
851 852 853
// Sobel functions which mimics SSSE3.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
                 uint8* dst_sobelx, int width) {
854 855
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
856 857 858 859 860 861 862 863 864
    int a = src_y0[i];
    int b = src_y1[i];
    int c = src_y2[i];
    int a_sub = src_y0[i + 2];
    int b_sub = src_y1[i + 2];
    int c_sub = src_y2[i + 2];
    int a_diff = a - a_sub;
    int b_diff = b - b_sub;
    int c_diff = c - c_sub;
865
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
866
    dst_sobelx[i] = (uint8)(clamp255(sobel));
fbarchard@google.com's avatar
fbarchard@google.com committed
867 868 869 870 871
  }
}

void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
                 uint8* dst_sobely, int width) {
872 873
  int i;
  for (i = 0; i < width; ++i) {
fbarchard@google.com's avatar
fbarchard@google.com committed
874 875 876 877 878 879 880 881 882
    int a = src_y0[i + 0];
    int b = src_y0[i + 1];
    int c = src_y0[i + 2];
    int a_sub = src_y1[i + 0];
    int b_sub = src_y1[i + 1];
    int c_sub = src_y1[i + 2];
    int a_diff = a - a_sub;
    int b_diff = b - b_sub;
    int c_diff = c - c_sub;
883
    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
884
    dst_sobely[i] = (uint8)(clamp255(sobel));
fbarchard@google.com's avatar
fbarchard@google.com committed
885 886 887
  }
}

888 889
void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                uint8* dst_argb, int width) {
890 891
  int i;
  for (i = 0; i < width; ++i) {
892 893
    int r = src_sobelx[i];
    int b = src_sobely[i];
894
    int s = clamp255(r + b);
895 896 897 898
    dst_argb[0] = (uint8)(s);
    dst_argb[1] = (uint8)(s);
    dst_argb[2] = (uint8)(s);
    dst_argb[3] = (uint8)(255u);
899 900 901 902
    dst_argb += 4;
  }
}

903 904
void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                       uint8* dst_y, int width) {
905 906
  int i;
  for (i = 0; i < width; ++i) {
907 908 909
    int r = src_sobelx[i];
    int b = src_sobely[i];
    int s = clamp255(r + b);
910
    dst_y[i] = (uint8)(s);
911 912 913
  }
}

914 915
void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                  uint8* dst_argb, int width) {
916 917
  int i;
  for (i = 0; i < width; ++i) {
918 919
    int r = src_sobelx[i];
    int b = src_sobely[i];
920
    int g = clamp255(r + b);
921 922 923 924
    dst_argb[0] = (uint8)(b);
    dst_argb[1] = (uint8)(g);
    dst_argb[2] = (uint8)(r);
    dst_argb[3] = (uint8)(255u);
925 926 927 928
    dst_argb += 4;
  }
}

929
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
930
  // Copy a Y to RGB.
931 932
  int x;
  for (x = 0; x < width; ++x) {
933 934 935 936 937 938 939 940
    uint8 y = src_y[0];
    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
    dst_argb[3] = 255u;
    dst_argb += 4;
    ++src_y;
  }
}

941
// C reference code that mimics the YUV assembly.
942

943
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
944

945 946
#define UB 127 /* min(63,(int8)(2.018 * 64)) */
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
947 948 949
#define UR 0

#define VB 0
950 951
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
952 953 954 955 956 957

// Bias
#define BB UB * 128 + VB * 128
#define BG UG * 128 + VG * 128
#define BR UR * 128 + VR * 128

958 959
static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
                              uint8* b, uint8* g, uint8* r) {
960 961 962 963
  int32 y1 = ((int32)(y) - 16) * YG;
  *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
  *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
  *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
964 965
}

966 967
#if !defined(LIBYUV_DISABLE_NEON) && \
    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
968 969
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
970 971 972
void I444ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
973 974
                     uint8* rgb_buf,
                     int width) {
975 976
  int x;
  for (x = 0; x < width - 1; x += 2) {
977 978
    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
979
    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
980
    rgb_buf[3] = 255;
981
    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
982
    rgb_buf[7] = 255;
983 984 985
    src_y += 2;
    src_u += 2;
    src_v += 2;
986 987 988
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
989 990
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
991 992 993
  }
}
#else
994 995 996
void I444ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
997 998
                     uint8* rgb_buf,
                     int width) {
999 1000
  int x;
  for (x = 0; x < width; ++x) {
1001 1002
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1003
    rgb_buf[3] = 255;
1004 1005 1006
    src_y += 1;
    src_u += 1;
    src_v += 1;
1007 1008 1009
    rgb_buf += 4;  // Advance 1 pixel.
  }
}
1010
#endif
1011
// Also used for 420
1012 1013 1014
void I422ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1015 1016
                     uint8* rgb_buf,
                     int width) {
1017 1018
  int x;
  for (x = 0; x < width - 1; x += 2) {
1019 1020
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1021
    rgb_buf[3] = 255;
1022 1023
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1024
    rgb_buf[7] = 255;
1025 1026 1027
    src_y += 2;
    src_u += 1;
    src_v += 1;
1028 1029 1030
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1031 1032
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1033
    rgb_buf[3] = 255;
1034 1035 1036
  }
}

1037 1038 1039
void I422ToRGB24Row_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,
1040 1041
                      uint8* rgb_buf,
                      int width) {
1042 1043
  int x;
  for (x = 0; x < width - 1; x += 2) {
1044 1045 1046 1047
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1048 1049 1050
    src_y += 2;
    src_u += 1;
    src_v += 1;
1051 1052 1053
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
1054 1055
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1056 1057 1058
  }
}

1059 1060 1061
void I422ToRAWRow_C(const uint8* src_y,
                    const uint8* src_u,
                    const uint8* src_v,
1062 1063
                    uint8* rgb_buf,
                    int width) {
1064 1065
  int x;
  for (x = 0; x < width - 1; x += 2) {
1066 1067 1068 1069
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1070 1071 1072
    src_y += 2;
    src_u += 1;
    src_v += 1;
1073 1074 1075
    rgb_buf += 6;  // Advance 2 pixels.
  }
  if (width & 1) {
1076 1077
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1078 1079 1080
  }
}

1081 1082 1083
void I422ToARGB4444Row_C(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
1084 1085 1086 1087 1088 1089 1090 1091
                         uint8* dst_argb4444,
                         int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1092 1093
  int x;
  for (x = 0; x < width - 1; x += 2) {
1094 1095
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1096 1097 1098 1099 1100 1101
    b0 = b0 >> 4;
    g0 = g0 >> 4;
    r0 = r0 >> 4;
    b1 = b1 >> 4;
    g1 = g1 >> 4;
    r1 = r1 >> 4;
1102
    *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1103
        (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1104 1105 1106
    src_y += 2;
    src_u += 1;
    src_v += 1;
1107 1108 1109
    dst_argb4444 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1110
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1111 1112 1113
    b0 = b0 >> 4;
    g0 = g0 >> 4;
    r0 = r0 >> 4;
1114
    *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1115 1116 1117 1118
        0xf000;
  }
}

1119 1120 1121
void I422ToARGB1555Row_C(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
1122 1123 1124 1125 1126 1127 1128 1129
                         uint8* dst_argb1555,
                         int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1130 1131
  int x;
  for (x = 0; x < width - 1; x += 2) {
1132 1133
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1134 1135 1136 1137 1138 1139
    b0 = b0 >> 3;
    g0 = g0 >> 3;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 3;
    r1 = r1 >> 3;
1140
    *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1141
        (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1142 1143 1144
    src_y += 2;
    src_u += 1;
    src_v += 1;
1145 1146 1147
    dst_argb1555 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1148
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1149 1150 1151
    b0 = b0 >> 3;
    g0 = g0 >> 3;
    r0 = r0 >> 3;
1152
    *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1153 1154 1155 1156
        0x8000;
  }
}

1157
void I422ToRGB565Row_C(const uint8* src_y,
1158 1159 1160 1161
                       const uint8* src_u,
                       const uint8* src_v,
                       uint8* dst_rgb565,
                       int width) {
1162 1163 1164 1165 1166 1167
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1168 1169
  int x;
  for (x = 0; x < width - 1; x += 2) {
1170 1171
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1172 1173 1174 1175 1176 1177
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 2;
    r1 = r1 >> 3;
1178
    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1179
        (b1 << 16) | (g1 << 21) | (r1 << 27);
1180 1181 1182
    src_y += 2;
    src_u += 1;
    src_v += 1;
1183 1184 1185
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1186
    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1187 1188 1189
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
1190
    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1191 1192 1193
  }
}

1194 1195 1196
void I411ToARGBRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1197 1198
                     uint8* rgb_buf,
                     int width) {
1199 1200
  int x;
  for (x = 0; x < width - 3; x += 4) {
1201
    YuvPixel(src_y[0], src_u[0], src_v[0],
1202
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
    YuvPixel(src_y[2], src_u[0], src_v[0],
             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
    rgb_buf[11] = 255;
    YuvPixel(src_y[3], src_u[0], src_v[0],
             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
    rgb_buf[15] = 255;
1213 1214 1215
    src_y += 4;
    src_u += 1;
    src_v += 1;
1216 1217 1218
    rgb_buf += 16;  // Advance 4 pixels.
  }
  if (width & 2) {
1219 1220 1221 1222 1223 1224
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1225
    src_y += 2;
1226 1227 1228
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1229 1230 1231
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1232 1233 1234
  }
}

1235 1236
void NV12ToARGBRow_C(const uint8* src_y,
                     const uint8* usrc_v,
1237 1238
                     uint8* rgb_buf,
                     int width) {
1239 1240
  int x;
  for (x = 0; x < width - 1; x += 2) {
1241 1242 1243 1244 1245 1246
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1247 1248
    src_y += 2;
    usrc_v += 2;
1249 1250 1251
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1252 1253 1254
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1255 1256 1257
  }
}

1258 1259
void NV21ToARGBRow_C(const uint8* src_y,
                     const uint8* src_vu,
1260 1261
                     uint8* rgb_buf,
                     int width) {
1262 1263
  int x;
  for (x = 0; x < width - 1; x += 2) {
1264 1265 1266 1267 1268 1269 1270 1271
    YuvPixel(src_y[0], src_vu[1], src_vu[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;

    YuvPixel(src_y[1], src_vu[1], src_vu[0],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;

1272 1273
    src_y += 2;
    src_vu += 2;
1274 1275 1276
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1277 1278 1279
    YuvPixel(src_y[0], src_vu[1], src_vu[0],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1280 1281 1282
  }
}

1283 1284
void NV12ToRGB565Row_C(const uint8* src_y,
                       const uint8* usrc_v,
1285 1286 1287 1288 1289 1290 1291 1292
                       uint8* dst_rgb565,
                       int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1293 1294
  int x;
  for (x = 0; x < width - 1; x += 2) {
1295 1296
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1297 1298 1299 1300 1301 1302
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 2;
    r1 = r1 >> 3;
1303
    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1304
        (b1 << 16) | (g1 << 21) | (r1 << 27);
1305 1306
    src_y += 2;
    usrc_v += 2;
1307 1308 1309
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1310
    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1311 1312 1313
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
1314
    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1315 1316 1317
  }
}

1318 1319
void NV21ToRGB565Row_C(const uint8* src_y,
                       const uint8* vsrc_u,
1320 1321 1322 1323 1324 1325 1326 1327
                       uint8* dst_rgb565,
                       int width) {
  uint8 b0;
  uint8 g0;
  uint8 r0;
  uint8 b1;
  uint8 g1;
  uint8 r1;
1328 1329
  int x;
  for (x = 0; x < width - 1; x += 2) {
1330 1331
    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1332 1333 1334 1335 1336 1337
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
    b1 = b1 >> 3;
    g1 = g1 >> 2;
    r1 = r1 >> 3;
1338
    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1339
        (b1 << 16) | (g1 << 21) | (r1 << 27);
1340 1341
    src_y += 2;
    vsrc_u += 2;
1342 1343 1344
    dst_rgb565 += 4;  // Advance 2 pixels.
  }
  if (width & 1) {
1345
    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1346 1347 1348
    b0 = b0 >> 3;
    g0 = g0 >> 2;
    r0 = r0 >> 3;
1349
    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1350 1351 1352
  }
}

1353
void YUY2ToARGBRow_C(const uint8* src_yuy2,
1354 1355
                     uint8* rgb_buf,
                     int width) {
1356 1357
  int x;
  for (x = 0; x < width - 1; x += 2) {
1358 1359 1360 1361 1362 1363
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1364
    src_yuy2 += 4;
1365 1366 1367
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1368 1369 1370
    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1371 1372 1373
  }
}

1374
void UYVYToARGBRow_C(const uint8* src_uyvy,
1375 1376
                     uint8* rgb_buf,
                     int width) {
1377 1378
  int x;
  for (x = 0; x < width - 1; x += 2) {
1379 1380 1381 1382 1383 1384
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
1385
    src_uyvy += 4;
1386 1387 1388
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1389 1390 1391
    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1392 1393 1394
  }
}

1395 1396 1397
void I422ToBGRARow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1398 1399
                     uint8* rgb_buf,
                     int width) {
1400 1401
  int x;
  for (x = 0; x < width - 1; x += 2) {
1402 1403 1404 1405 1406 1407
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
    rgb_buf[0] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
    rgb_buf[4] = 255;
1408 1409 1410
    src_y += 2;
    src_u += 1;
    src_v += 1;
1411 1412 1413
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1414 1415 1416
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
    rgb_buf[0] = 255;
1417 1418 1419
  }
}

1420 1421 1422
void I422ToABGRRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1423 1424
                     uint8* rgb_buf,
                     int width) {
1425 1426
  int x;
  for (x = 0; x < width - 1; x += 2) {
1427 1428 1429 1430 1431 1432
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
    rgb_buf[7] = 255;
1433 1434 1435
    src_y += 2;
    src_u += 1;
    src_v += 1;
1436 1437 1438
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1439 1440 1441
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
    rgb_buf[3] = 255;
1442 1443 1444
  }
}

1445 1446 1447
void I422ToRGBARow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
1448 1449
                     uint8* rgb_buf,
                     int width) {
1450 1451
  int x;
  for (x = 0; x < width - 1; x += 2) {
1452 1453 1454 1455 1456 1457
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
    rgb_buf[0] = 255;
    YuvPixel(src_y[1], src_u[0], src_v[0],
             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
    rgb_buf[4] = 255;
1458 1459 1460
    src_y += 2;
    src_u += 1;
    src_v += 1;
1461 1462 1463
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
1464 1465 1466
    YuvPixel(src_y[0], src_u[0], src_v[0],
             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
    rgb_buf[0] = 255;
1467 1468 1469
  }
}

1470
void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1471 1472
  int x;
  for (x = 0; x < width - 1; x += 2) {
1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
    YuvPixel(src_y[0], 128, 128,
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
    YuvPixel(src_y[1], 128, 128,
             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
    rgb_buf[7] = 255;
    src_y += 2;
    rgb_buf += 8;  // Advance 2 pixels.
  }
  if (width & 1) {
    YuvPixel(src_y[0], 128, 128,
             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
    rgb_buf[3] = 255;
1486 1487 1488
  }
}

1489
void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1490
  int x;
1491
  src += width - 1;
1492
  for (x = 0; x < width - 1; x += 2) {
1493 1494 1495 1496 1497 1498
    dst[x] = src[0];
    dst[x + 1] = src[-1];
    src -= 2;
  }
  if (width & 1) {
    dst[width - 1] = src[0];
1499 1500 1501
  }
}

1502
void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1503
  int x;
1504
  src_uv += (width - 1) << 1;
1505
  for (x = 0; x < width - 1; x += 2) {
1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517
    dst_u[x] = src_uv[0];
    dst_u[x + 1] = src_uv[-2];
    dst_v[x] = src_uv[1];
    dst_v[x + 1] = src_uv[-2 + 1];
    src_uv -= 4;
  }
  if (width & 1) {
    dst_u[width - 1] = src_uv[0];
    dst_v[width - 1] = src_uv[1];
  }
}

1518
void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1519
  int x;
1520 1521
  const uint32* src32 = (const uint32*)(src);
  uint32* dst32 = (uint32*)(dst);
1522
  src32 += width - 1;
1523
  for (x = 0; x < width - 1; x += 2) {
1524 1525 1526 1527 1528 1529 1530 1531 1532
    dst32[x] = src32[0];
    dst32[x + 1] = src32[-1];
    src32 -= 2;
  }
  if (width & 1) {
    dst32[width - 1] = src32[0];
  }
}

1533
void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1534 1535
  int x;
  for (x = 0; x < width - 1; x += 2) {
1536 1537 1538 1539 1540 1541 1542 1543 1544
    dst_u[x] = src_uv[0];
    dst_u[x + 1] = src_uv[2];
    dst_v[x] = src_uv[1];
    dst_v[x + 1] = src_uv[3];
    src_uv += 4;
  }
  if (width & 1) {
    dst_u[width - 1] = src_uv[0];
    dst_v[width - 1] = src_uv[1];
1545 1546 1547
  }
}

1548 1549
void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                  int width) {
1550 1551
  int x;
  for (x = 0; x < width - 1; x += 2) {
1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563
    dst_uv[0] = src_u[x];
    dst_uv[1] = src_v[x];
    dst_uv[2] = src_u[x + 1];
    dst_uv[3] = src_v[x + 1];
    dst_uv += 4;
  }
  if (width & 1) {
    dst_uv[0] = src_u[width - 1];
    dst_uv[1] = src_v[width - 1];
  }
}

1564 1565 1566 1567
void CopyRow_C(const uint8* src, uint8* dst, int count) {
  memcpy(dst, src, count);
}

1568
void SetRow_C(uint8* dst, uint32 v8, int count) {
1569 1570
#ifdef _MSC_VER
  // VC will generate rep stosb.
1571 1572
  int x;
  for (x = 0; x < count; ++x) {
1573 1574 1575 1576 1577 1578 1579
    dst[x] = v8;
  }
#else
  memset(dst, v8, count);
#endif
}

1580
void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
1581
                 int dst_stride, int height) {
1582 1583
  int y;
  for (y = 0; y < height; ++y) {
1584
    uint32* d = (uint32*)(dst);
1585 1586
    int x;
    for (x = 0; x < width; ++x) {
1587 1588 1589 1590 1591 1592
      d[x] = v32;
    }
    dst += dst_stride;
  }
}

1593
// Filter 2 rows of YUY2 UV's (422) into U and V (420).
1594
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1595
                   uint8* dst_u, uint8* dst_v, int width) {
1596
  // Output a row of UV values, filtering 2 rows of YUY2.
1597 1598
  int x;
  for (x = 0; x < width; x += 2) {
1599 1600 1601 1602 1603 1604 1605 1606
    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
    src_yuy2 += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

1607 1608 1609 1610
// Copy row of YUY2 UV's (422) into U and V (422).
void YUY2ToUV422Row_C(const uint8* src_yuy2,
                      uint8* dst_u, uint8* dst_v, int width) {
  // Output a row of UV values.
1611 1612
  int x;
  for (x = 0; x < width; x += 2) {
1613 1614 1615 1616 1617 1618 1619 1620 1621
    dst_u[0] = src_yuy2[1];
    dst_v[0] = src_yuy2[3];
    src_yuy2 += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

// Copy row of YUY2 Y's (422) into Y (420/422).
1622
void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1623
  // Output a row of Y values.
1624 1625
  int x;
  for (x = 0; x < width - 1; x += 2) {
1626 1627 1628 1629 1630 1631
    dst_y[x] = src_yuy2[0];
    dst_y[x + 1] = src_yuy2[2];
    src_yuy2 += 4;
  }
  if (width & 1) {
    dst_y[width - 1] = src_yuy2[0];
1632 1633 1634
  }
}

1635
// Filter 2 rows of UYVY UV's (422) into U and V (420).
1636
void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1637
                   uint8* dst_u, uint8* dst_v, int width) {
1638
  // Output a row of UV values.
1639 1640
  int x;
  for (x = 0; x < width; x += 2) {
1641 1642 1643 1644 1645 1646 1647 1648
    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
    src_uyvy += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

1649 1650 1651 1652
// Copy row of UYVY UV's (422) into U and V (422).
void UYVYToUV422Row_C(const uint8* src_uyvy,
                      uint8* dst_u, uint8* dst_v, int width) {
  // Output a row of UV values.
1653 1654
  int x;
  for (x = 0; x < width; x += 2) {
1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665
    dst_u[0] = src_uyvy[0];
    dst_v[0] = src_uyvy[2];
    src_uyvy += 4;
    dst_u += 1;
    dst_v += 1;
  }
}

// Copy row of UYVY Y's (422) into Y (420/422).
void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
  // Output a row of Y values.
1666 1667
  int x;
  for (x = 0; x < width - 1; x += 2) {
1668 1669 1670
    dst_y[x] = src_uyvy[1];
    dst_y[x + 1] = src_uyvy[3];
    src_uyvy += 4;
1671 1672
  }
  if (width & 1) {
1673
    dst_y[width - 1] = src_uyvy[1];
1674 1675 1676
  }
}

1677
#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1678

1679 1680
// Blend src_argb0 over src_argb1 and store to dst_argb.
// dst_argb may be src_argb0 or src_argb1.
1681
// This code mimics the SSSE3 version for better testability.
1682
void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1683
                    uint8* dst_argb, int width) {
1684 1685
  int x;
  for (x = 0; x < width - 1; x += 2) {
1686 1687 1688
    uint32 fb = src_argb0[0];
    uint32 fg = src_argb0[1];
    uint32 fr = src_argb0[2];
1689
    uint32 a = src_argb0[3];
1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700
    uint32 bb = src_argb1[0];
    uint32 bg = src_argb1[1];
    uint32 br = src_argb1[2];
    dst_argb[0] = BLEND(fb, bb, a);
    dst_argb[1] = BLEND(fg, bg, a);
    dst_argb[2] = BLEND(fr, br, a);
    dst_argb[3] = 255u;

    fb = src_argb0[4 + 0];
    fg = src_argb0[4 + 1];
    fr = src_argb0[4 + 2];
1701
    a = src_argb0[4 + 3];
1702 1703 1704 1705 1706 1707 1708
    bb = src_argb1[4 + 0];
    bg = src_argb1[4 + 1];
    br = src_argb1[4 + 2];
    dst_argb[4 + 0] = BLEND(fb, bb, a);
    dst_argb[4 + 1] = BLEND(fg, bg, a);
    dst_argb[4 + 2] = BLEND(fr, br, a);
    dst_argb[4 + 3] = 255u;
1709 1710 1711 1712 1713 1714
    src_argb0 += 8;
    src_argb1 += 8;
    dst_argb += 8;
  }

  if (width & 1) {
1715 1716 1717
    uint32 fb = src_argb0[0];
    uint32 fg = src_argb0[1];
    uint32 fr = src_argb0[2];
1718
    uint32 a = src_argb0[3];
1719 1720 1721 1722 1723 1724 1725
    uint32 bb = src_argb1[0];
    uint32 bg = src_argb1[1];
    uint32 br = src_argb1[2];
    dst_argb[0] = BLEND(fb, bb, a);
    dst_argb[1] = BLEND(fg, bg, a);
    dst_argb[2] = BLEND(fr, br, a);
    dst_argb[3] = 255u;
1726 1727
  }
}
1728 1729
#undef BLEND
#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1730

1731
// Multiply source RGB by alpha and store to destination.
1732
// This code mimics the SSSE3 version for better testability.
1733
void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1734 1735
  int i;
  for (i = 0; i < width - 1; i += 2) {
1736 1737 1738 1739
    uint32 b = src_argb[0];
    uint32 g = src_argb[1];
    uint32 r = src_argb[2];
    uint32 a = src_argb[3];
1740 1741 1742
    dst_argb[0] = ATTENUATE(b, a);
    dst_argb[1] = ATTENUATE(g, a);
    dst_argb[2] = ATTENUATE(r, a);
1743 1744 1745 1746 1747
    dst_argb[3] = a;
    b = src_argb[4];
    g = src_argb[5];
    r = src_argb[6];
    a = src_argb[7];
1748 1749 1750
    dst_argb[4] = ATTENUATE(b, a);
    dst_argb[5] = ATTENUATE(g, a);
    dst_argb[6] = ATTENUATE(r, a);
1751 1752 1753 1754 1755 1756 1757 1758 1759 1760
    dst_argb[7] = a;
    src_argb += 8;
    dst_argb += 8;
  }

  if (width & 1) {
    const uint32 b = src_argb[0];
    const uint32 g = src_argb[1];
    const uint32 r = src_argb[2];
    const uint32 a = src_argb[3];
1761 1762 1763
    dst_argb[0] = ATTENUATE(b, a);
    dst_argb[1] = ATTENUATE(g, a);
    dst_argb[2] = ATTENUATE(r, a);
1764 1765 1766
    dst_argb[3] = a;
  }
}
1767
#undef ATTENUATE
1768

1769 1770 1771 1772 1773
// Divide source RGB by alpha and store to destination.
// b = (b * 255 + (a / 2)) / a;
// g = (g * 255 + (a / 2)) / a;
// r = (r * 255 + (a / 2)) / a;
// Reciprocal method is off by 1 on some values. ie 125
1774 1775
// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
#define T(a) 0x01000000 + (0x10000 / a)
1776
const uint32 fixed_invtbl8[256] = {
1777
  0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807
  T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
  T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
  T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
  T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
  T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
  T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
  T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
  T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
  T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
  T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
  T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
  T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
  T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
  T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
  T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
  T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
  T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
  T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
  T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
  T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
  T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
  T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
  T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
  T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
  T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
  T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
  T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
  T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
  T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
  T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1808
  T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1809 1810 1811
#undef T

void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1812 1813
  int i;
  for (i = 0; i < width; ++i) {
1814 1815 1816 1817
    uint32 b = src_argb[0];
    uint32 g = src_argb[1];
    uint32 r = src_argb[2];
    const uint32 a = src_argb[3];
fbarchard@google.com's avatar
fbarchard@google.com committed
1818
    const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
1819 1820 1821 1822
    b = (b * ia) >> 8;
    g = (g * ia) >> 8;
    r = (r * ia) >> 8;
    // Clamping should not be necessary but is free in assembly.
1823 1824 1825
    dst_argb[0] = clamp255(b);
    dst_argb[1] = clamp255(g);
    dst_argb[2] = clamp255(r);
1826 1827 1828 1829 1830 1831
    dst_argb[3] = a;
    src_argb += 4;
    dst_argb += 4;
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
1832
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1833
                               const int32* previous_cumsum, int width) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1834
  int32 row_sum[4] = {0, 0, 0, 0};
1835 1836
  int x;
  for (x = 0; x < width; ++x) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847
    row_sum[0] += row[x * 4 + 0];
    row_sum[1] += row[x * 4 + 1];
    row_sum[2] += row[x * 4 + 2];
    row_sum[3] += row[x * 4 + 3];
    cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
    cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
    cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
    cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
  }
}

1848 1849
void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
                                int w, int area, uint8* dst, int count) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1850
  float ooa = 1.0f / area;
1851 1852
  int i;
  for (i = 0; i < count; ++i) {
1853 1854 1855 1856
    dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
    dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
    dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
    dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
fbarchard@google.com's avatar
fbarchard@google.com committed
1857 1858 1859 1860 1861 1862
    dst += 4;
    tl += 4;
    bl += 4;
  }
}

1863
// Copy pixels from rotated source to destination row with a slope.
1864
LIBYUV_API
1865 1866
void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
                     uint8* dst_argb, const float* uv_dudv, int width) {
1867
  int i;
1868 1869 1870 1871
  // Render a row of pixels from source into a buffer.
  float uv[2];
  uv[0] = uv_dudv[0];
  uv[1] = uv_dudv[1];
1872
  for (i = 0; i < width; ++i) {
1873 1874 1875 1876
    int x = (int)(uv[0]);
    int y = (int)(uv[1]);
    *(uint32*)(dst_argb) =
        *(const uint32*)(src_argb + y * src_argb_stride +
1877 1878 1879 1880 1881 1882 1883
                                         x * 4);
    dst_argb += 4;
    uv[0] += uv_dudv[2];
    uv[1] += uv_dudv[3];
  }
}

1884 1885 1886
// Blend 2 rows into 1 for conversions such as I422ToI420.
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
               uint8* dst_uv, int pix) {
1887 1888
  int x;
  for (x = 0; x < pix; ++x) {
1889 1890 1891 1892
    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
  }
}

1893
// C version 2x2 -> 2x1.
1894 1895 1896
void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
                      ptrdiff_t src_stride,
                      int width, int source_y_fraction) {
1897 1898 1899 1900
  int y1_fraction = source_y_fraction;
  int y0_fraction = 256 - y1_fraction;
  const uint8* src_ptr1 = src_ptr + src_stride;
  int x;
1901 1902 1903 1904
  if (source_y_fraction == 0) {
    memcpy(dst_ptr, src_ptr, width);
    return;
  }
1905
  if (source_y_fraction == 128) {
1906
    HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1907 1908
    return;
  }
1909
  for (x = 0; x < width - 1; x += 2) {
1910 1911
    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1912 1913 1914
    src_ptr += 2;
    src_ptr1 += 2;
    dst_ptr += 2;
1915 1916 1917 1918
  }
  if (width & 1) {
    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
  }
1919 1920
}

1921 1922 1923 1924 1925 1926
// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
void ARGBToBayerRow_C(const uint8* src_argb,
                      uint8* dst_bayer, uint32 selector, int pix) {
  int index0 = selector & 0xff;
  int index1 = (selector >> 8) & 0xff;
  // Copy a row of Bayer.
1927 1928
  int x;
  for (x = 0; x < pix - 1; x += 2) {
1929 1930 1931 1932 1933 1934 1935 1936 1937 1938
    dst_bayer[0] = src_argb[index0];
    dst_bayer[1] = src_argb[index1];
    src_argb += 8;
    dst_bayer += 2;
  }
  if (pix & 1) {
    dst_bayer[0] = src_argb[index0];
  }
}

1939 1940
// Select G channel from ARGB.  e.g.  GGGGGGGG
void ARGBToBayerGGRow_C(const uint8* src_argb,
1941
                        uint8* dst_bayer, uint32 selector, int pix) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1942
  // Copy a row of G.
1943 1944
  int x;
  for (x = 0; x < pix - 1; x += 2) {
1945 1946 1947 1948 1949 1950 1951 1952 1953 1954
    dst_bayer[0] = src_argb[1];
    dst_bayer[1] = src_argb[5];
    src_argb += 8;
    dst_bayer += 2;
  }
  if (pix & 1) {
    dst_bayer[0] = src_argb[1];
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
1955 1956 1957 1958 1959 1960 1961 1962
// Use first 4 shuffler values to reorder ARGB channels.
void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
                      const uint8* shuffler, int pix) {
  int index0 = shuffler[0];
  int index1 = shuffler[1];
  int index2 = shuffler[2];
  int index3 = shuffler[3];
  // Shuffle a row of ARGB.
1963 1964
  int x;
  for (x = 0; x < pix; ++x) {
fbarchard@google.com's avatar
fbarchard@google.com committed
1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
    // To support in-place conversion.
    uint8 b = src_argb[index0];
    uint8 g = src_argb[index1];
    uint8 r = src_argb[index2];
    uint8 a = src_argb[index3];
    dst_argb[0] = b;
    dst_argb[1] = g;
    dst_argb[2] = r;
    dst_argb[3] = a;
    src_argb += 4;
    dst_argb += 4;
  }
}

fbarchard@google.com's avatar
fbarchard@google.com committed
1979 1980 1981 1982
void I422ToYUY2Row_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* dst_frame, int width) {
1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999
  int x;
  for (x = 0; x < width - 1; x += 2) {
    dst_frame[0] = src_y[0];
    dst_frame[1] = src_u[0];
    dst_frame[2] = src_y[1];
    dst_frame[3] = src_v[0];
    dst_frame += 4;
    src_y += 2;
    src_u += 1;
    src_v += 1;
  }
  if (width & 1) {
    dst_frame[0] = src_y[0];
    dst_frame[1] = src_u[0];
    dst_frame[2] = src_y[0];  // duplicate last y
    dst_frame[3] = src_v[0];
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
2000 2001 2002 2003 2004 2005
}

void I422ToUYVYRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
                     uint8* dst_frame, int width) {
2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
  int x;
  for (x = 0; x < width - 1; x += 2) {
    dst_frame[0] = src_u[0];
    dst_frame[1] = src_y[0];
    dst_frame[2] = src_v[0];
    dst_frame[3] = src_y[1];
    dst_frame += 4;
    src_y += 2;
    src_u += 1;
    src_v += 1;
  }
  if (width & 1) {
    dst_frame[0] = src_u[0];
    dst_frame[1] = src_y[0];
    dst_frame[2] = src_v[0];
    dst_frame[3] = src_y[0];  // duplicate last y
  }
fbarchard@google.com's avatar
fbarchard@google.com committed
2023
}
2024

2025
#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2026
// row_win.cc has asm version, but GCC uses 2 step wrapper.
2027
#if defined(__x86_64__) || defined(__i386__)
2028 2029 2030
void I422ToRGB565Row_SSSE3(const uint8* src_y,
                           const uint8* src_u,
                           const uint8* src_v,
2031 2032
                           uint8* rgb_buf,
                           int width) {
2033 2034
  // Allocate a row of ARGB.
  align_buffer_64(row, width * 4);
2035
  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2036
  ARGBToRGB565Row_SSE2(row, rgb_buf, width);
2037
  free_aligned_buffer_64(row);
2038
}
2039
#endif  // defined(__x86_64__) || defined(__i386__)
2040

2041
#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2042 2043 2044
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
2045 2046
                             uint8* rgb_buf,
                             int width) {
2047 2048
  // Allocate a row of ARGB.
  align_buffer_64(row, width * 4);
2049
  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2050
  ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
2051
  free_aligned_buffer_64(row);
2052 2053
}

2054 2055 2056
void I422ToARGB4444Row_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
2057 2058
                             uint8* rgb_buf,
                             int width) {
2059 2060
  // Allocate a row of ARGB.
  align_buffer_64(row, width * 4);
2061
  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2062
  ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
2063
  free_aligned_buffer_64(row);
2064
}
2065

2066 2067 2068 2069
void NV12ToRGB565Row_SSSE3(const uint8* src_y,
                           const uint8* src_uv,
                           uint8* dst_rgb565,
                           int width) {
2070 2071
  // Allocate a row of ARGB.
  align_buffer_64(row, width * 4);
2072 2073
  NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2074
  free_aligned_buffer_64(row);
2075 2076 2077 2078 2079 2080
}

void NV21ToRGB565Row_SSSE3(const uint8* src_y,
                           const uint8* src_vu,
                           uint8* dst_rgb565,
                           int width) {
2081 2082
  // Allocate a row of ARGB.
  align_buffer_64(row, width * 4);
2083 2084
  NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2085
  free_aligned_buffer_64(row);
2086 2087
}

2088 2089 2090
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
                         uint8* dst_argb,
                         int width) {
2091 2092 2093 2094
  // Allocate a rows of yuv.
  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  uint8* row_u = row_y + ((width + 63) & ~63);
  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2095 2096 2097
  YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
  YUY2ToYRow_SSE2(src_yuy2, row_y, width);
  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2098
  free_aligned_buffer_64(row_y);
2099 2100 2101 2102 2103
}

void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
                                   uint8* dst_argb,
                                   int width) {
2104 2105 2106 2107
  // Allocate a rows of yuv.
  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  uint8* row_u = row_y + ((width + 63) & ~63);
  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2108 2109 2110
  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
  YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2111
  free_aligned_buffer_64(row_y);
2112 2113 2114 2115 2116
}

void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
                         uint8* dst_argb,
                         int width) {
2117 2118 2119 2120
  // Allocate a rows of yuv.
  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  uint8* row_u = row_y + ((width + 63) & ~63);
  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2121 2122 2123
  UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
  UYVYToYRow_SSE2(src_uyvy, row_y, width);
  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2124
  free_aligned_buffer_64(row_y);
2125 2126 2127 2128 2129
}

void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
                                   uint8* dst_argb,
                                   int width) {
2130 2131 2132 2133
  // Allocate a rows of yuv.
  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  uint8* row_u = row_y + ((width + 63) & ~63);
  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2134 2135 2136
  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
  UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2137
  free_aligned_buffer_64(row_y);
2138
}
2139

2140
#endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2141
#endif  // !defined(LIBYUV_DISABLE_X86)
2142 2143 2144 2145

void ARGBPolynomialRow_C(const uint8* src_argb,
                         uint8* dst_argb, const float* poly,
                         int width) {
2146 2147
  int i;
  for (i = 0; i < width; ++i) {
2148 2149 2150 2151
    float b = (float)(src_argb[0]);
    float g = (float)(src_argb[1]);
    float r = (float)(src_argb[2]);
    float a = (float)(src_argb[3]);
2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163
    float b2 = b * b;
    float g2 = g * g;
    float r2 = r * r;
    float a2 = a * a;
    float db = poly[0] + poly[4] * b;
    float dg = poly[1] + poly[5] * g;
    float dr = poly[2] + poly[6] * r;
    float da = poly[3] + poly[7] * a;
    float b3 = b2 * b;
    float g3 = g2 * g;
    float r3 = r2 * r;
    float a3 = a2 * a;
2164 2165 2166 2167
    db += poly[8] * b2;
    dg += poly[9] * g2;
    dr += poly[10] * r2;
    da += poly[11] * a2;
2168 2169 2170 2171 2172
    db += poly[12] * b3;
    dg += poly[13] * g3;
    dr += poly[14] * r3;
    da += poly[15] * a3;

2173 2174 2175 2176
    dst_argb[0] = Clamp((int32)(db));
    dst_argb[1] = Clamp((int32)(dg));
    dst_argb[2] = Clamp((int32)(dr));
    dst_argb[3] = Clamp((int32)(da));
2177 2178 2179 2180 2181
    src_argb += 4;
    dst_argb += 4;
  }
}

2182
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2183
                             const uint8* luma, uint32 lumacoeff) {
2184 2185 2186 2187
  uint32 bc = lumacoeff & 0xff;
  uint32 gc = (lumacoeff >> 8) & 0xff;
  uint32 rc = (lumacoeff >> 16) & 0xff;

2188 2189
  int i;
  for (i = 0; i < width - 1; i += 2) {
2190
    // Luminance in rows, color values in columns.
2191 2192
    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
                           src_argb[2] * rc) & 0x7F00u) + luma;
2193
    const uint8* luma1;
2194 2195 2196 2197
    dst_argb[0] = luma0[src_argb[0]];
    dst_argb[1] = luma0[src_argb[1]];
    dst_argb[2] = luma0[src_argb[2]];
    dst_argb[3] = src_argb[3];
2198 2199
    luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
              src_argb[6] * rc) & 0x7F00u) + luma;
2200 2201 2202
    dst_argb[4] = luma1[src_argb[4]];
    dst_argb[5] = luma1[src_argb[5]];
    dst_argb[6] = luma1[src_argb[6]];
2203 2204 2205 2206
    dst_argb[7] = src_argb[7];
    src_argb += 8;
    dst_argb += 8;
  }
2207 2208
  if (width & 1) {
    // Luminance in rows, color values in columns.
2209 2210
    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
                           src_argb[2] * rc) & 0x7F00u) + luma;
2211 2212 2213 2214 2215
    dst_argb[0] = luma0[src_argb[0]];
    dst_argb[1] = luma0[src_argb[1]];
    dst_argb[2] = luma0[src_argb[2]];
    dst_argb[3] = src_argb[3];
  }
2216 2217
}

2218
void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2219 2220
  int i;
  for (i = 0; i < width - 1; i += 2) {
2221 2222 2223 2224 2225 2226 2227 2228 2229 2230
    dst[3] = src[3];
    dst[7] = src[7];
    dst += 8;
    src += 8;
  }
  if (width & 1) {
    dst[3] = src[3];
  }
}

2231
void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2232 2233
  int i;
  for (i = 0; i < width - 1; i += 2) {
2234 2235 2236 2237 2238 2239 2240 2241 2242
    dst[3] = src[0];
    dst[7] = src[1];
    dst += 8;
    src += 2;
  }
  if (width & 1) {
    dst[3] = src[0];
  }
}
2243

2244
#ifdef __cplusplus
2245
}  // extern "C"
2246 2247
}  // namespace libyuv
#endif