Commit 31d05032 authored by fbarchard@google.com's avatar fbarchard@google.com

I420ToYUY2_Any_Neon to handle odd sizes

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/922007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@437 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 95eb5264
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 435 Version: 437
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -843,6 +843,14 @@ void I422ToUYVYRow_SSE2(const uint8* src_y, ...@@ -843,6 +843,14 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
uint8* dst_frame, int width); uint8* dst_frame, int width);
void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToYUY2Row_NEON(const uint8* src_y, void I422ToYUY2Row_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
...@@ -851,6 +859,14 @@ void I422ToUYVYRow_NEON(const uint8* src_y, ...@@ -851,6 +859,14 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
uint8* dst_frame, int width); uint8* dst_frame, int width);
void I422ToYUY2Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
void I422ToUYVYRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 435 #define LIBYUV_VERSION 437
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -281,16 +281,21 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -281,16 +281,21 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) = const uint8* src_v, uint8* dst_frame, int width) =
I422ToYUY2Row_C; I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2) #if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2; I422ToYUY2Row = I422ToYUY2Row_SSE2;
} }
}
#elif defined(HAS_I422TOYUY2ROW_NEON) #elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON; I422ToYUY2Row = I422ToYUY2Row_NEON;
} }
}
#endif #endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
...@@ -323,16 +328,21 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -323,16 +328,21 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) = const uint8* src_v, uint8* dst_frame, int width) =
I422ToYUY2Row_C; I422ToYUY2Row_C;
#if defined(HAS_I422TOYUY2ROW_SSE2) #if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_SSE2; I422ToYUY2Row = I422ToYUY2Row_SSE2;
} }
}
#elif defined(HAS_I422TOYUY2ROW_NEON) #elif defined(HAS_I422TOYUY2ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToYUY2Row = I422ToYUY2Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToYUY2Row = I422ToYUY2Row_NEON; I422ToYUY2Row = I422ToYUY2Row_NEON;
} }
}
#endif #endif
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
...@@ -371,16 +381,21 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y, ...@@ -371,16 +381,21 @@ int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) = const uint8* src_v, uint8* dst_frame, int width) =
I422ToUYVYRow_C; I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2) #if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2; I422ToUYVYRow = I422ToUYVYRow_SSE2;
} }
}
#elif defined(HAS_I422TOUYVYROW_NEON) #elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON; I422ToUYVYRow = I422ToUYVYRow_NEON;
} }
}
#endif #endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
...@@ -413,16 +428,21 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, ...@@ -413,16 +428,21 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_v, uint8* dst_frame, int width) = const uint8* src_v, uint8* dst_frame, int width) =
I422ToUYVYRow_C; I422ToUYVYRow_C;
#if defined(HAS_I422TOUYVYROW_SSE2) #if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && width >= 16 &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_SSE2; I422ToUYVYRow = I422ToUYVYRow_SSE2;
} }
}
#elif defined(HAS_I422TOUYVYROW_NEON) #elif defined(HAS_I422TOUYVYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
I422ToUYVYRow = I422ToUYVYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I422ToUYVYRow = I422ToUYVYRow_NEON; I422ToUYVYRow = I422ToUYVYRow_NEON;
} }
}
#endif #endif
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
......
...@@ -1000,22 +1000,53 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -1000,22 +1000,53 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
} }
// YUV to RGB does multiple of 8 with SIMD and remainder with C. // YUV to RGB does multiple of 8 with SIMD and remainder with C.
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP) \ #define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, \ void NAMEANY(const uint8* y_buf, \
const uint8* u_buf, \ const uint8* u_buf, \
const uint8* v_buf, \ const uint8* v_buf, \
uint8* rgb_buf, \ uint8* rgb_buf, \
int width) { \ int width) { \
int n = width & ~7; \ int n = width & ~MASK; \
I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \ I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \
I420TORGB_C(y_buf + n, \ I420TORGB_C(y_buf + n, \
u_buf + (n >> UV_SHIFT), \ u_buf + (n >> UV_SHIFT), \
v_buf + (n >> UV_SHIFT), \ v_buf + (n >> UV_SHIFT), \
rgb_buf + n * BPP, width & 7); \ rgb_buf + n * BPP, width & MASK); \
} }
#ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
0, 4, 7)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
1, 4, 7)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
2, 4, 7)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
1, 4, 7)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
1, 4, 7)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
1, 4, 7)
// I422ToRGB24Row_SSSE3 is unaligned.
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_NEON
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_NEON
#undef YANY
// Wrappers to handle odd width // Wrappers to handle odd width
#define Y2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \ #define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \
void NAMEANY(const uint8* y_buf, \ void NAMEANY(const uint8* y_buf, \
const uint8* uv_buf, \ const uint8* uv_buf, \
uint8* rgb_buf, \ uint8* rgb_buf, \
...@@ -1028,37 +1059,16 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -1028,37 +1059,16 @@ void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
} }
#ifdef HAS_I422TOARGBROW_SSSE3 #ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
0, 4)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
1, 4)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
2, 4)
Y2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
0, 4) 0, 4)
Y2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
0, 4) 0, 4)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
1, 4)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C,
1, 4)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C,
1, 4)
// I422ToRGB24Row_SSSE3 is unaligned.
YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3)
#endif // HAS_I422TOARGBROW_SSSE3 #endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_NEON #ifdef HAS_I422TOARGBROW_NEON
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4) NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4) NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4)
Y2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
Y2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3)
YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3)
#endif // HAS_I422TOARGBROW_NEON #endif // HAS_I422TOARGBROW_NEON
#undef YANY #undef NVANY
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C. // RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
// SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination. // SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination.
......
...@@ -974,7 +974,7 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { ...@@ -974,7 +974,7 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
"+r"(dst_rgb565), // %1 "+r"(dst_rgb565), // %1
"+r"(pix) // %2 "+r"(pix) // %2
: :
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "q8", "q9", "q10" : "memory", "cc", "d0", "d1", "d2", "d3", "q8", "q9", "q10"
); );
} }
#endif // HAS_ARGBTORGB565ROW_NEON #endif // HAS_ARGBTORGB565ROW_NEON
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment