Commit 793e5a06 authored by fbarchard@google.com's avatar fbarchard@google.com

YUY2ToARGB_NEON in one step

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/964010

git-svn-id: http://libyuv.googlecode.com/svn/trunk@468 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent b883ce6e
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 466 Version: 468
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -92,6 +92,8 @@ extern "C" { ...@@ -92,6 +92,8 @@ extern "C" {
#define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TORGB565ROW_SSSE3 #define HAS_I422TORGB565ROW_SSSE3
#define HAS_YUY2TOARGBROW_SSSE3
#define HAS_UYVYTOARGBROW_SSSE3
// Effects // Effects
#define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2
...@@ -163,6 +165,8 @@ extern "C" { ...@@ -163,6 +165,8 @@ extern "C" {
#define HAS_MIRRORROWUV_NEON #define HAS_MIRRORROWUV_NEON
#define HAS_NV12TOARGBROW_NEON #define HAS_NV12TOARGBROW_NEON
#define HAS_NV21TOARGBROW_NEON #define HAS_NV21TOARGBROW_NEON
#define HAS_YUY2TOARGBROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON #define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TORGB565ROW_NEON #define HAS_NV21TORGB565ROW_NEON
#define HAS_RAWTOARGBROW_NEON #define HAS_RAWTOARGBROW_NEON
...@@ -304,6 +308,12 @@ void NV21ToRGB565Row_NEON(const uint8* y_buf, ...@@ -304,6 +308,12 @@ void NV21ToRGB565Row_NEON(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void YUY2ToARGBRow_NEON(const uint8* yuy2_buf,
uint8* rgb_buf,
int width);
void UYVYToARGBRow_NEON(const uint8* uyvy_buf,
uint8* rgb_buf,
int width);
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
...@@ -501,6 +511,12 @@ void NV21ToARGBRow_C(const uint8* y_buf, ...@@ -501,6 +511,12 @@ void NV21ToARGBRow_C(const uint8* y_buf,
const uint8* vu_buf, const uint8* vu_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
void YUY2ToARGBRow_C(const uint8* yuy2_buf,
uint8* argb_buf,
int width);
void UYVYToARGBRow_C(const uint8* uyvy_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_C(const uint8* y_buf, void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -576,6 +592,12 @@ void NV21ToRGB565Row_SSSE3(const uint8* y_buf, ...@@ -576,6 +592,12 @@ void NV21ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* vu_buf, const uint8* vu_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
void YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
uint8* argb_buf,
int width);
void UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_SSSE3(const uint8* y_buf, void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -641,6 +663,12 @@ void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -641,6 +663,12 @@ void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* vu_buf, const uint8* vu_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* yuy2_buf,
uint8* argb_buf,
int width);
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* uyvy_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -687,6 +715,12 @@ void NV21ToRGB565Row_Any_SSSE3(const uint8* y_buf, ...@@ -687,6 +715,12 @@ void NV21ToRGB565Row_Any_SSSE3(const uint8* y_buf,
const uint8* vu_buf, const uint8* vu_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
void YUY2ToARGBRow_Any_SSSE3(const uint8* yuy2_buf,
uint8* argb_buf,
int width);
void UYVYToARGBRow_Any_SSSE3(const uint8* uyvy_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_Any_SSSE3(const uint8* y_buf, void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -840,6 +874,12 @@ void NV21ToRGB565Row_Any_NEON(const uint8* y_buf, ...@@ -840,6 +874,12 @@ void NV21ToRGB565Row_Any_NEON(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
void YUY2ToARGBRow_Any_NEON(const uint8* yuy2_buf,
uint8* argb_buf,
int width);
void UYVYToARGBRow_Any_NEON(const uint8* uyvy_buf,
uint8* argb_buf,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 466 #define LIBYUV_VERSION 468
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -740,79 +740,28 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, ...@@ -740,79 +740,28 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2;
} }
void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
int pix) = YUY2ToUV422Row_C; YUY2ToARGBRow_C;
void (*YUY2ToYRow)(const uint8* src_yuy2, #if defined(HAS_YUY2TOARGBROW_SSSE3)
uint8* dst_y, int pix) = YUY2ToYRow_C;
#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2;
}
if (IS_ALIGNED(width, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
}
#elif defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (width > 8) {
YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (width > 16) {
YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
}
}
if (IS_ALIGNED(width, 8)) {
YUY2ToYRow = YUY2ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_NEON;
}
}
}
#endif
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
} }
} }
} }
#elif defined(HAS_I422TOARGBROW_NEON) #elif defined(HAS_YUY2TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON; YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON; YUY2ToARGBRow = YUY2ToARGBRow_NEON;
} }
} }
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif #endif
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
YUY2ToUV422Row(src_yuy2, rowu, rowv, width); YUY2ToARGBRow(src_yuy2, dst_argb, width);
YUY2ToYRow(src_yuy2, rowy, width);
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
src_yuy2 += src_stride_yuy2; src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
} }
...@@ -834,63 +783,28 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -834,63 +783,28 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
src_stride_uyvy = -src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy;
} }
void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
int pix) = UYVYToUV422Row_C; UYVYToARGBRow_C;
void (*UYVYToYRow)(const uint8* src_uyvy, #if defined(HAS_UYVYTOARGBROW_SSSE3)
uint8* dst_y, int pix) = UYVYToYRow_C;
#if defined(HAS_UYVYTOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
if (width > 16) {
UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2;
}
if (IS_ALIGNED(width, 16)) {
UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
UYVYToUV422Row = UYVYToUV422Row_SSE2;
UYVYToYRow = UYVYToYRow_SSE2;
}
}
}
#endif
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3; UYVYToARGBRow = UYVYToARGBRow_SSSE3;
} }
} }
} }
#elif defined(HAS_I422TOARGBROW_NEON) #elif defined(HAS_UYVYTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON; UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON; UYVYToARGBRow = UYVYToARGBRow_NEON;
} }
} }
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif #endif
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
UYVYToUV422Row(src_uyvy, rowu, rowv, width); UYVYToARGBRow(src_uyvy, dst_argb, width);
UYVYToYRow(src_uyvy, rowy, width);
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
src_uyvy += src_stride_uyvy; src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
} }
......
...@@ -114,6 +114,7 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2) ...@@ -114,6 +114,7 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif // HAS_NV12TORGB565ROW_NEON #endif // HAS_NV12TORGB565ROW_NEON
#undef NVANY #undef NVANY
// YUY2 to RGB does 8 at a time.
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C. // RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
// SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination. // SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination.
// SSE2 RGB565 is multiple of 4 pixels, ARGB must be aligned to 16 bytes. // SSE2 RGB565 is multiple of 4 pixels, ARGB must be aligned to 16 bytes.
...@@ -141,6 +142,10 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, ...@@ -141,6 +142,10 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
3, 4, 2) 3, 4, 2)
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C, RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C,
7, 1, 4) 7, 1, 4)
RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C,
7, 2, 4)
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C,
7, 2, 4)
#endif #endif
#if defined(HAS_ARGBTORGB24ROW_NEON) #if defined(HAS_ARGBTORGB24ROW_NEON)
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
...@@ -153,10 +158,15 @@ RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, ...@@ -153,10 +158,15 @@ RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
7, 4, 2) 7, 4, 2)
RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C, RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C,
7, 1, 4) 7, 1, 4)
RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C,
7, 2, 4)
RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
7, 2, 4)
#endif #endif
#undef RGBANY #undef RGBANY
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD. // RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
#define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM) \ #define YANY(NAMEANY, ARGBTOY_SIMD, BPP, NUM) \
void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \
ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \ ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \
......
...@@ -801,6 +801,34 @@ void NV21ToRGB565Row_C(const uint8* y_buf, ...@@ -801,6 +801,34 @@ void NV21ToRGB565Row_C(const uint8* y_buf,
} }
} }
void YUY2ToARGBRow_C(const uint8* yuy2_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(yuy2_buf[2], yuy2_buf[1], yuy2_buf[3], rgb_buf + 4, 24, 16, 8, 0);
yuy2_buf += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(yuy2_buf[0], yuy2_buf[1], yuy2_buf[3], rgb_buf + 0, 24, 16, 8, 0);
}
}
void UYVYToARGBRow_C(const uint8* uyvy_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(uyvy_buf[3], uyvy_buf[0], uyvy_buf[2], rgb_buf + 4, 24, 16, 8, 0);
uyvy_buf += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(uyvy_buf[1], uyvy_buf[0], uyvy_buf[2], rgb_buf + 0, 24, 16, 8, 0);
}
}
void I422ToBGRARow_C(const uint8* y_buf, void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
...@@ -1402,6 +1430,7 @@ void I422ToARGB4444Row_SSSE3(const uint8* y_buf, ...@@ -1402,6 +1430,7 @@ void I422ToARGB4444Row_SSSE3(const uint8* y_buf,
I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width); I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToARGB4444Row_SSE2(row, rgb_buf, width); ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
} }
void NV12ToRGB565Row_SSSE3(const uint8* src_y, void NV12ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_uv, const uint8* src_uv,
uint8* dst_rgb565, uint8* dst_rgb565,
...@@ -1420,6 +1449,50 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y, ...@@ -1420,6 +1449,50 @@ void NV21ToRGB565Row_SSSE3(const uint8* src_y,
ARGBToRGB565Row_SSE2(row, dst_rgb565, width); ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
} }
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
YUY2ToUV422Row_SSE2(src_yuy2, rowu, rowv, width);
YUY2ToYRow_SSE2(src_yuy2, rowy, width);
I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
}
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, rowu, rowv, width);
YUY2ToYRow_Unaligned_SSE2(src_yuy2, rowy, width);
I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
}
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
UYVYToUV422Row_SSE2(src_uyvy, rowu, rowv, width);
UYVYToYRow_SSE2(src_uyvy, rowy, width);
I422ToARGBRow_SSSE3(rowy, rowu, rowv, dst_argb, width);
}
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
int width) {
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
SIMD_ALIGNED(uint8 rowu[kMaxStride]);
SIMD_ALIGNED(uint8 rowv[kMaxStride]);
UYVYToUV422Row_Unaligned_SSE2(src_uyvy, rowu, rowv, width);
UYVYToYRow_Unaligned_SSE2(src_uyvy, rowy, width);
I422ToARGBRow_Unaligned_SSSE3(rowy, rowu, rowv, dst_argb, width);
}
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) #endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
#endif // !defined(YUV_DISABLE_ASM) #endif // !defined(YUV_DISABLE_ASM)
......
...@@ -51,7 +51,7 @@ extern "C" { ...@@ -51,7 +51,7 @@ extern "C" {
"vld1.u8 {d2}, [%1]! \n" \ "vld1.u8 {d2}, [%1]! \n" \
"vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
"vuzp.u8 d2, d3 \n" \ "vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n" \ "vtrn.u32 d2, d3 \n"
// Read 8 Y and 4 VU from NV21 // Read 8 Y and 4 VU from NV21
#define READNV21 \ #define READNV21 \
...@@ -59,7 +59,22 @@ extern "C" { ...@@ -59,7 +59,22 @@ extern "C" {
"vld1.u8 {d2}, [%1]! \n" \ "vld1.u8 {d2}, [%1]! \n" \
"vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\
"vuzp.u8 d3, d2 \n" \ "vuzp.u8 d3, d2 \n" \
"vtrn.u32 d2, d3 \n" \ "vtrn.u32 d2, d3 \n"
// Read 8 YUY2
#define READYUY2 \
"vld2.u8 {d0, d2}, [%0]! \n" \
"vmov.u8 d3, d2 \n" \
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
// Read 8 UYVY
#define READUYVY \
"vld2.u8 {d2, d3}, [%0]! \n" \
"vmov.u8 d0, d3 \n" \
"vmov.u8 d3, d2 \n" \
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
#define YUV422TORGB \ #define YUV422TORGB \
"veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\ "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\
...@@ -674,6 +689,64 @@ void NV21ToRGB565Row_NEON(const uint8* src_y, ...@@ -674,6 +689,64 @@ void NV21ToRGB565Row_NEON(const uint8* src_y,
} }
#endif // HAS_NV21TORGB565ROW_NEON #endif // HAS_NV21TORGB565ROW_NEON
#ifdef HAS_YUY2TOARGBROW_NEON
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
int width) {
asm volatile (
"vld1.u8 {d24}, [%3] \n"
"vld1.u8 {d25}, [%4] \n"
"vmov.u8 d26, #128 \n"
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
".p2align 2 \n"
"1: \n"
READYUY2
YUV422TORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(&kUVToRB), // %3
"r"(&kUVToG) // %4
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_YUY2TOARGBROW_NEON
#ifdef HAS_UYVYTOARGBROW_NEON
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
int width) {
asm volatile (
"vld1.u8 {d24}, [%3] \n"
"vld1.u8 {d25}, [%4] \n"
"vmov.u8 d26, #128 \n"
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
".p2align 2 \n"
"1: \n"
READUYVY
YUV422TORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
"vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
: "r"(&kUVToRB), // %3
"r"(&kUVToG) // %4
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_UYVYTOARGBROW_NEON
#ifdef HAS_SPLITUV_NEON #ifdef HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment