row_any.cc 24.6 KB
Newer Older
1 2 3 4 5 6
/*
 *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS. All contributing project authors may
8 9 10 11 12 13 14 15 16 17 18 19
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "libyuv/row.h"

#include "libyuv/basic_types.h"

#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif

20 21
// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
// TODO(fbarchard): Consider 'any' functions handling odd alignment.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
// YUV to RGB does multiple of 8 with SIMD and remainder with C.
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK)        \
    void NAMEANY(const uint8* y_buf,                                           \
                 const uint8* u_buf,                                           \
                 const uint8* v_buf,                                           \
                 uint8* rgb_buf,                                               \
                 int width) {                                                  \
      int n = width & ~MASK;                                                   \
      I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n);                         \
      I420TORGB_C(y_buf + n,                                                   \
                  u_buf + (n >> UV_SHIFT),                                     \
                  v_buf + (n >> UV_SHIFT),                                     \
                  rgb_buf + n * BPP, width & MASK);                            \
    }

#ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
     0, 4, 7)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
     1, 4, 7)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
     2, 4, 7)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
     1, 4, 7)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
     1, 4, 7)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
     1, 4, 7)
50 51 52 53 54 55 56
// I422ToRGB565Row_SSSE3 is unaligned.
YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
     1, 2, 7)
YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C,
     1, 2, 7)
YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C,
     1, 2, 7)
57 58 59 60 61 62
// I422ToRGB24Row_SSSE3 is unaligned.
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
#endif  // HAS_I422TOARGBROW_SSSE3
63 64 65
#ifdef HAS_I422TOARGBROW_AVX2
YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
#endif  // HAS_I422TOARGBROW_AVX2
66
#ifdef HAS_I422TOARGBROW_NEON
67
YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
68
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
69
YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
70 71 72 73 74
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
75 76 77 78
YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C,
     1, 2, 7)
YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C,
     1, 2, 7)
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7)
YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
#endif  // HAS_I422TOARGBROW_NEON
#undef YANY

// Wrappers to handle odd width
#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP)             \
    void NAMEANY(const uint8* y_buf,                                           \
                 const uint8* uv_buf,                                          \
                 uint8* rgb_buf,                                               \
                 int width) {                                                  \
      int n = width & ~7;                                                      \
      NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n);                               \
      NV12TORGB_C(y_buf + n,                                                   \
                  uv_buf + (n >> UV_SHIFT),                                    \
                  rgb_buf + n * BPP, width & 7);                               \
    }

98
#ifdef HAS_NV12TOARGBROW_SSSE3
99
NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
100
      0, 4)
101
NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
102 103 104
      0, 4)
#endif  // HAS_NV12TOARGBROW_SSSE3
#ifdef HAS_NV12TOARGBROW_NEON
105 106
NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
107 108 109 110 111 112 113 114 115 116 117
#endif  // HAS_NV12TOARGBROW_NEON
#ifdef HAS_NV12TORGB565ROW_SSSE3
NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
      0, 2)
NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
      0, 2)
#endif  // HAS_NV12TORGB565ROW_SSSE3
#ifdef HAS_NV12TORGB565ROW_NEON
NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif  // HAS_NV12TORGB565ROW_NEON
118 119 120
#undef NVANY

#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)          \
121 122
    void NAMEANY(const uint8* src,                                             \
                 uint8* dst,                                                   \
123 124
                 int width) {                                                  \
      int n = width & ~MASK;                                                   \
125 126
      ARGBTORGB_SIMD(src, dst, n);                                             \
      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK);                \
127 128 129 130 131 132 133 134 135 136 137 138 139
    }

#if defined(HAS_ARGBTORGB24ROW_SSSE3)
RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C,
       15, 4, 3)
RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C,
       15, 4, 3)
RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C,
       3, 4, 2)
RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C,
       3, 4, 2)
RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
       3, 4, 2)
140 141
#endif
#if defined(HAS_I400TOARGBROW_SSE2)
142 143
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
       7, 1, 4)
144 145
#endif
#if defined(HAS_YTOARGBROW_SSE2)
146 147
RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
       7, 1, 4)
148
RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
fbarchard@google.com's avatar
fbarchard@google.com committed
149
       15, 2, 4)
150
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
fbarchard@google.com's avatar
fbarchard@google.com committed
151
       15, 2, 4)
152 153 154
// These require alignment on ARGB, so C is used for remainder.
RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
       15, 3, 4)
155
RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C,
156 157 158 159 160 161 162
       15, 3, 4)
RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C,
       7, 2, 4)
RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C,
       7, 2, 4)
RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C,
       7, 2, 4)
163 164 165 166 167 168 169 170 171 172
#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
       7, 4, 2)
RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
       7, 4, 2)
RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
       7, 4, 2)
173 174
RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
       7, 1, 4)
175 176
RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C,
       7, 1, 4)
177 178 179 180
RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
       7, 2, 4)
RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
       7, 2, 4)
181 182 183
#endif
#undef RGBANY

184 185 186 187 188 189 190 191 192 193 194 195
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP)        \
    void NAMEANY(const uint8* src,                                             \
                 uint8* dst, uint32 selector,                                  \
                 int width) {                                                  \
      int n = width & ~MASK;                                                   \
      ARGBTORGB_SIMD(src, dst, selector, n);                                   \
      ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK);      \
    }

#if defined(HAS_ARGBTOBAYERROW_SSSE3)
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
196
         7, 4, 1)
197 198 199
#endif
#if defined(HAS_ARGBTOBAYERROW_NEON)
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
200
         7, 4, 1)
201
#endif
202 203 204 205 206 207 208 209 210
#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C,
         7, 4, 1)
#endif
#if defined(HAS_ARGBTOBAYERGGROW_NEON)
BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C,
         7, 4, 1)
#endif

211 212
#undef BAYERANY

213
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
214
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM)                            \
215 216
    void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
      ARGBTOY_SIMD(src_argb, dst_y, width - NUM);                              \
217
      ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP,                            \
218
                   dst_y + (width - NUM) * BPP, NUM);                          \
219 220
    }

221
#ifdef HAS_ARGBTOYROW_AVX2
222
YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32)
223
YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32)
224 225
YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
226
#endif
227
#ifdef HAS_ARGBTOYROW_SSSE3
228
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16)
229 230
#endif
#ifdef HAS_BGRATOYROW_SSSE3
231 232 233 234 235
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
236
#endif
237 238 239
#ifdef HAS_ARGBTOYJROW_SSSE3
YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16)
#endif
240
#ifdef HAS_ARGBTOYROW_NEON
241
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
242
YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8)
243 244 245 246 247 248
YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8)
YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8)
YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8)
YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8)
YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8)
YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8)
249 250
YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8)
YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8)
251 252 253 254 255
YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16)
YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
256 257
YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
258 259 260
#endif
#undef YANY

261 262 263 264 265 266 267 268
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK)                \
    void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) {             \
      int n = width & ~MASK;                                                   \
      ARGBTOY_SIMD(src_argb, dst_y, n);                                        \
      ARGBTOY_C(src_argb + n * SBPP,                                           \
                dst_y  + n * BPP, width & MASK);                               \
    }

269
// Attenuate is destructive so last16 method can not be used due to overlap.
270 271 272 273 274 275 276 277
#ifdef HAS_ARGBATTENUATEROW_SSSE3
YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C,
     4, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_SSE2
YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C,
     4, 4, 3)
#endif
278 279 280 281 282 283 284 285 286 287 288 289
#ifdef HAS_ARGBUNATTENUATEROW_SSE2
YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C,
     4, 4, 3)
#endif
#ifdef HAS_ARGBATTENUATEROW_AVX2
YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C,
     4, 4, 7)
#endif
#ifdef HAS_ARGBUNATTENUATEROW_AVX2
YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C,
     4, 4, 7)
#endif
290 291 292 293
#ifdef HAS_ARGBATTENUATEROW_NEON
YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C,
     4, 4, 7)
#endif
fbarchard@google.com's avatar
fbarchard@google.com committed
294
#undef YANY
295

296
// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C.
297
#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK)                     \
298 299
    void NAMEANY(const uint8* src_argb, int src_stride_argb,                   \
                 uint8* dst_u, uint8* dst_v, int width) {                      \
300
      int n = width & ~MASK;                                                   \
301 302
      ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n);                \
      ANYTOUV_C(src_argb  + n * BPP, src_stride_argb,                          \
303 304
                dst_u + (n >> 1),                                              \
                dst_v + (n >> 1),                                              \
305
                width & MASK);                                                 \
306 307
    }

308
#ifdef HAS_ARGBTOUVROW_AVX2
309 310 311
UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31)
UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
312
#endif
313
#ifdef HAS_ARGBTOUVROW_SSSE3
314
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15)
315 316
UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C,
      4, 15)
317 318 319 320 321
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15)
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15)
322
#endif
fbarchard@google.com's avatar
fbarchard@google.com committed
323
#ifdef HAS_ARGBTOUVROW_NEON
324
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
325
UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15)
326 327 328 329 330 331 332 333 334 335
UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15)
UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15)
UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15)
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15)
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15)
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15)
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
336 337 338
#endif
#undef UVANY

339
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT)           \
340 341
    void NAMEANY(const uint8* src_uv,                                          \
                 uint8* dst_u, uint8* dst_v, int width) {                      \
342
      int n = width & ~MASK;                                                   \
343 344
      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
      ANYTOUV_C(src_uv  + n * BPP,                                             \
345 346 347
                dst_u + (n >> SHIFT),                                          \
                dst_v + (n >> SHIFT),                                          \
                width & MASK);                                                 \
348 349
    }

350 351 352 353
#ifdef HAS_ARGBTOUV444ROW_SSSE3
UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3,
         ARGBToUV444Row_C, 4, 15, 0)
#endif
354 355 356 357 358 359
#ifdef HAS_YUY2TOUV422ROW_AVX2
UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2,
         YUY2ToUV422Row_C, 2, 31, 1)
UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
         UYVYToUV422Row_C, 2, 31, 1)
#endif
360 361
#ifdef HAS_ARGBTOUVROW_SSSE3
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
362
         ARGBToUV422Row_C, 4, 15, 1)
363
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
364
         YUY2ToUV422Row_C, 2, 15, 1)
365
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
366
         UYVYToUV422Row_C, 2, 15, 1)
367 368
#endif
#ifdef HAS_YUY2TOUV422ROW_NEON
369
UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
370
         ARGBToUV444Row_C, 4, 7, 0)
371 372 373 374
UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
         ARGBToUV422Row_C, 4, 15, 1)
UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
         ARGBToUV411Row_C, 4, 31, 2)
375
UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
376
         YUY2ToUV422Row_C, 2, 15, 1)
377
UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
378
         UYVYToUV422Row_C, 2, 15, 1)
379 380 381
#endif
#undef UV422ANY

382
#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK)                  \
383 384 385 386 387 388 389 390 391 392
    void NAMEANY(const uint8* src_uv,                                          \
                 uint8* dst_u, uint8* dst_v, int width) {                      \
      int n = width & ~MASK;                                                   \
      ANYTOUV_SIMD(src_uv, dst_u, dst_v, n);                                   \
      ANYTOUV_C(src_uv + n * 2,                                                \
                dst_u + n,                                                     \
                dst_v + n,                                                     \
                width & MASK);                                                 \
    }

393
#ifdef HAS_SPLITUVROW_SSE2
394
SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
395
#endif
396
#ifdef HAS_SPLITUVROW_AVX2
397
SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
398
#endif
399
#ifdef HAS_SPLITUVROW_NEON
400
SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
401
#endif
402
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
403
SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
404
              SplitUVRow_C, 15)
405
#endif
406
#undef SPLITUVROWANY
407

408
#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK)                 \
409 410 411 412 413 414 415 416 417 418
    void NAMEANY(const uint8* src_u, const uint8* src_v,                       \
                 uint8* dst_uv, int width) {                                   \
      int n = width & ~MASK;                                                   \
      ANYTOUV_SIMD(src_u, src_v, dst_uv, n);                                   \
      ANYTOUV_C(src_u + n,                                                     \
                src_v + n,                                                     \
                dst_uv + n * 2,                                                \
                width & MASK);                                                 \
    }

419
#ifdef HAS_MERGEUVROW_SSE2
420
MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
421
#endif
422
#ifdef HAS_MERGEUVROW_AVX2
423
MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
424
#endif
425
#ifdef HAS_MERGEUVROW_NEON
426
MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
427
#endif
428
#undef MERGEUVROW_ANY
429

430
#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK)                  \
431 432
    void NAMEANY(const uint8* src_argb0, const uint8* src_argb1,               \
                 uint8* dst_argb, int width) {                                 \
433
      int n = width & ~MASK;                                                   \
434 435
      ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n);                        \
      ARGBMATH_C(src_argb0 + n * 4,                                            \
436
                 src_argb1 + n * 4,                                            \
437 438 439 440 441
                 dst_argb + n * 4,                                             \
                 width & MASK);                                                \
    }

#ifdef HAS_ARGBMULTIPLYROW_SSE2
442 443
MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
            3)
444
#endif
445 446 447
#ifdef HAS_ARGBADDROW_SSE2
MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3)
#endif
448 449 450 451
#ifdef HAS_ARGBSUBTRACTROW_SSE2
MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
            3)
#endif
452 453 454 455 456 457 458 459 460 461 462
#ifdef HAS_ARGBMULTIPLYROW_AVX2
MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
            7)
#endif
#ifdef HAS_ARGBADDROW_AVX2
MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7)
#endif
#ifdef HAS_ARGBSUBTRACTROW_AVX2
MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
            7)
#endif
463 464 465 466 467 468 469
#ifdef HAS_ARGBMULTIPLYROW_NEON
MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
            7)
#endif
#ifdef HAS_ARGBADDROW_NEON
MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7)
#endif
470 471 472 473
#ifdef HAS_ARGBSUBTRACTROW_NEON
MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
            7)
#endif
474 475
#undef MATHROW_ANY

fbarchard@google.com's avatar
fbarchard@google.com committed
476 477 478 479 480 481 482 483 484 485
// Shuffle may want to work in place, so last16 method can not be used.
#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK)                \
    void NAMEANY(const uint8* src_argb, uint8* dst_argb,                       \
                 const uint8* shuffler, int width) {                           \
      int n = width & ~MASK;                                                   \
      ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n);                           \
      ARGBTOY_C(src_argb + n * SBPP,                                           \
                dst_argb  + n * BPP, shuffler, width & MASK);                  \
    }

486 487 488 489
#ifdef HAS_ARGBSHUFFLEROW_SSE2
YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
     ARGBShuffleRow_C, 4, 4, 3)
#endif
fbarchard@google.com's avatar
fbarchard@google.com committed
490 491 492 493 494 495 496 497 498 499 500 501 502 503
#ifdef HAS_ARGBSHUFFLEROW_SSSE3
YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3,
     ARGBShuffleRow_C, 4, 4, 7)
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2
YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2,
     ARGBShuffleRow_C, 4, 4, 15)
#endif
#ifdef HAS_ARGBSHUFFLEROW_NEON
YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON,
     ARGBShuffleRow_C, 4, 4, 3)
#endif
#undef YANY

504
// Interpolate may want to work in place, so last16 method can not be used.
505 506 507
#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK)                      \
    void NAMEANY(uint8* dst_ptr, const uint8* src_ptr,                         \
                 ptrdiff_t src_stride_ptr, int width,                          \
508 509
                 int source_y_fraction) {                                      \
      int n = width & ~MASK;                                                   \
510 511 512 513 514
      TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr,                              \
                n, source_y_fraction);                                         \
      TERP_C(dst_ptr + n * BPP,                                                \
             src_ptr + n * SBPP, src_stride_ptr,                               \
             width & MASK, source_y_fraction);                                 \
515 516
    }

517 518 519 520
#ifdef HAS_INTERPOLATEROW_AVX2
NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2,
     InterpolateRow_C, 1, 1, 32)
#endif
521 522 523
#ifdef HAS_INTERPOLATEROW_SSSE3
NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3,
     InterpolateRow_C, 1, 1, 15)
524
#endif
525 526 527
#ifdef HAS_INTERPOLATEROW_SSE2
NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2,
     InterpolateRow_C, 1, 1, 15)
528
#endif
529 530 531 532 533 534 535
#ifdef HAS_INTERPOLATEROW_NEON
NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON,
     InterpolateRow_C, 1, 1, 15)
#endif
#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2
NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2,
     InterpolateRow_C, 1, 1, 3)
536 537 538
#endif
#undef NANY

539 540 541 542
#ifdef __cplusplus
}  // extern "C"
}  // namespace libyuv
#endif