Commit e214fe3f authored by fbarchard@google.com's avatar fbarchard@google.com

I411ToARGB doing 2 UV values with 8 Y values

BUG=40
TEST=planar_test
Review URL: https://webrtc-codereview.appspot.com/637005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@277 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 6d6b7709
...@@ -31,6 +31,13 @@ void CopyPlane(const uint8* src_y, int src_stride_y, ...@@ -31,6 +31,13 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height); int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v)
int I420ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// I420 mirror. // I420 mirror.
int I420Mirror(const uint8* src_y, int src_stride_y, int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -62,6 +69,13 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -62,6 +69,13 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Convert I444 to ARGB.
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB. // Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y, int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -69,8 +83,8 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, ...@@ -69,8 +83,8 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Convert I444 to ARGB. // Convert I411 to ARGB.
int I444ToARGB(const uint8* src_y, int src_stride_y, int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
......
This diff is collapsed.
...@@ -446,18 +446,18 @@ int I420ToBayer(const uint8* src_y, int src_stride_y, ...@@ -446,18 +446,18 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
src_stride_u = -src_stride_u; src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v; src_stride_v = -src_stride_v;
} }
void (*I420ToARGBRow)(const uint8* y_buf, void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width) = I420ToARGBRow_C; int width) = I422ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON; I422ToARGBRow = I422ToARGBRow_NEON;
} }
#elif defined(HAS_I420TOARGBROW_SSSE3) #elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3; I422ToARGBRow = I422ToARGBRow_SSSE3;
} }
#endif #endif
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
...@@ -478,7 +478,7 @@ int I420ToBayer(const uint8* src_y, int src_stride_y, ...@@ -478,7 +478,7 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
} }
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width); ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
dst_bayer += dst_stride_bayer; dst_bayer += dst_stride_bayer;
src_y += src_stride_y; src_y += src_stride_y;
......
This diff is collapsed.
...@@ -30,7 +30,7 @@ extern "C" { ...@@ -30,7 +30,7 @@ extern "C" {
#define LIBYUV_SSSE3_ONLY #define LIBYUV_SSSE3_ONLY
#endif #endif
// The following are available on all x86 platforms // The following are available on all x86 platforms:
#if !defined(YUV_DISABLE_ASM) && \ #if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ABGRTOARGBROW_SSSE3 #define HAS_ABGRTOARGBROW_SSSE3
...@@ -55,10 +55,11 @@ extern "C" { ...@@ -55,10 +55,11 @@ extern "C" {
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86 #define HAS_COPYROW_X86
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#define HAS_I420TOABGRROW_SSSE3 #define HAS_I422TOABGRROW_SSSE3
#define HAS_I420TOARGBROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3
#define HAS_I420TOBGRAROW_SSSE3 #define HAS_I422TOBGRAROW_SSSE3
#define HAS_I444TOARGBROW_SSSE3 #define HAS_I444TOARGBROW_SSSE3
#define HAS_I411TOARGBROW_SSSE3
#define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROWUV_SSSE3 #define HAS_MIRRORROWUV_SSSE3
#define HAS_ADDROW_SSE2 #define HAS_ADDROW_SSE2
...@@ -75,7 +76,7 @@ extern "C" { ...@@ -75,7 +76,7 @@ extern "C" {
#define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSEPIAROW_SSSE3
#endif #endif
// The following are available only useful when SSSE3 is unavailable. // The following are disabled when SSSE3 is available:
#if !defined(YUV_DISABLE_ASM) && \ #if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_SSSE3_ONLY) !defined(LIBYUV_SSSE3_ONLY)
...@@ -91,9 +92,9 @@ extern "C" { ...@@ -91,9 +92,9 @@ extern "C" {
#define HAS_MIRRORROWUV_NEON #define HAS_MIRRORROWUV_NEON
#define HAS_SPLITUV_NEON #define HAS_SPLITUV_NEON
#define HAS_COPYROW_NEON #define HAS_COPYROW_NEON
#define HAS_I420TOARGBROW_NEON #define HAS_I422TOARGBROW_NEON
#define HAS_I420TOBGRAROW_NEON #define HAS_I422TOBGRAROW_NEON
#define HAS_I420TOABGRROW_NEON #define HAS_I422TOABGRROW_NEON
#endif #endif
#if defined(_MSC_VER) #if defined(_MSC_VER)
...@@ -118,17 +119,17 @@ typedef uint32 __attribute__((vector_size(16))) uvec32; ...@@ -118,17 +119,17 @@ typedef uint32 __attribute__((vector_size(16))) uvec32;
#define OMITFP __attribute__((optimize("omit-frame-pointer"))) #define OMITFP __attribute__((optimize("omit-frame-pointer")))
#endif #endif
void I420ToARGBRow_NEON(const uint8* y_buf, void I422ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToBGRARow_NEON(const uint8* y_buf, void I422ToBGRARow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToABGRRow_NEON(const uint8* y_buf, void I422ToABGRRow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -219,19 +220,19 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); ...@@ -219,19 +220,19 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
void I420ToARGBRow_C(const uint8* y_buf, void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToBGRARow_C(const uint8* y_buf, void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToABGRRow_C(const uint8* y_buf, void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -243,54 +244,78 @@ void I444ToARGBRow_C(const uint8* y_buf, ...@@ -243,54 +244,78 @@ void I444ToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void YToARGBRow_C(const uint8* y_buf, void YToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToARGBRow_SSSE3(const uint8* y_buf, void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width);
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I420ToBGRARow_SSSE3(const uint8* y_buf, void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I420ToABGRRow_SSSE3(const uint8* y_buf, void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* bgra_buf,
int width); int width);
void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width);
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
void I444ToARGBRow_SSSE3(const uint8* y_buf, void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* bgra_buf,
int width); int width);
void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width);
void YToARGBRow_SSE2(const uint8* y_buf, void YToARGBRow_SSE2(const uint8* y_buf,
uint8* rgb_buf, uint8* argb_buf,
int width); int width);
// ARGB preattenuated alpha blend. // ARGB preattenuated alpha blend.
...@@ -310,24 +335,37 @@ void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, ...@@ -310,24 +335,37 @@ void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width); uint8* dst_argb, int width);
// 'Any' functions handle any size and alignment. // 'Any' functions handle any size and alignment.
void I420ToARGBRow_Any_SSSE3(const uint8* y_buf, void I444ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToBGRARow_Any_SSSE3(const uint8* y_buf, void I411ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToABGRRow_Any_SSSE3(const uint8* y_buf, void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I422ToABGRRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
...@@ -344,19 +382,19 @@ void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -344,19 +382,19 @@ void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void I420ToARGBRow_Any_NEON(const uint8* y_buf, void I422ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToBGRARow_Any_NEON(const uint8* y_buf, void I422ToBGRARow_Any_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void I420ToABGRRow_Any_NEON(const uint8* y_buf, void I422ToABGRRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
......
...@@ -359,7 +359,8 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf, ...@@ -359,7 +359,8 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
(255u << ashift); (255u << ashift);
} }
void I420ToARGBRow_C(const uint8* y_buf, // Also used for 420
void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -377,7 +378,7 @@ void I420ToARGBRow_C(const uint8* y_buf, ...@@ -377,7 +378,7 @@ void I420ToARGBRow_C(const uint8* y_buf,
} }
} }
void I420ToBGRARow_C(const uint8* y_buf, void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -395,7 +396,7 @@ void I420ToBGRARow_C(const uint8* y_buf, ...@@ -395,7 +396,7 @@ void I420ToBGRARow_C(const uint8* y_buf,
} }
} }
void I420ToABGRRow_C(const uint8* y_buf, void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -427,6 +428,32 @@ void I444ToARGBRow_C(const uint8* y_buf, ...@@ -427,6 +428,32 @@ void I444ToARGBRow_C(const uint8* y_buf,
} }
} }
void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 3; x += 4) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0);
YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
y_buf += 4;
u_buf += 1;
v_buf += 1;
rgb_buf += 16; // Advance 4 pixels.
}
if (width & 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) { void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
for (int x = 0; x < width; ++x) { for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0); YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
...@@ -686,8 +713,8 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -686,8 +713,8 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
} }
#endif // HAS_ARGBBLENDROW_SSSE3 #endif // HAS_ARGBBLENDROW_SSSE3
// Wrappers to handle odd sizes/alignments // Wrappers to handle odd width
#define YUVANY(NAMEANY, I420TORGB_SSE, I420TORGB_C) \ #define YANY(NAMEANY, I420TORGB_SSE, I420TORGB_C, UV_SHIFT) \
void NAMEANY(const uint8* y_buf, \ void NAMEANY(const uint8* y_buf, \
const uint8* u_buf, \ const uint8* u_buf, \
const uint8* v_buf, \ const uint8* v_buf, \
...@@ -696,22 +723,24 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -696,22 +723,24 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
int n = width & ~7; \ int n = width & ~7; \
I420TORGB_SSE(y_buf, u_buf, v_buf, rgb_buf, n); \ I420TORGB_SSE(y_buf, u_buf, v_buf, rgb_buf, n); \
I420TORGB_C(y_buf + n, \ I420TORGB_C(y_buf + n, \
u_buf + (n >> 1), \ u_buf + (n >> UV_SHIFT), \
v_buf + (n >> 1), \ v_buf + (n >> UV_SHIFT), \
rgb_buf + n * 4, width & 7); \ rgb_buf + n * 4, width & 7); \
} }
#if defined(HAS_I420TOARGBROW_SSSE3) #if defined(HAS_I422TOARGBROW_SSSE3)
YUVANY(I420ToARGBRow_Any_SSSE3, I420ToARGBRow_Unaligned_SSSE3, I420ToARGBRow_C) YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0)
YUVANY(I420ToBGRARow_Any_SSSE3, I420ToBGRARow_Unaligned_SSSE3, I420ToBGRARow_C) YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1)
YUVANY(I420ToABGRRow_Any_SSSE3, I420ToABGRRow_Unaligned_SSSE3, I420ToABGRRow_C) YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
#endif #endif
#if defined(HAS_I420TOARGBROW_NEON) #if defined(HAS_I422TOARGBROW_NEON)
YUVANY(I420ToARGBRow_Any_NEON, I420ToARGBRow_NEON, I420ToARGBRow_C) YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C)
YUVANY(I420ToBGRARow_Any_NEON, I420ToBGRARow_NEON, I420ToBGRARow_C) YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C)
YUVANY(I420ToABGRRow_Any_NEON, I420ToABGRRow_NEON, I420ToABGRRow_C) YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C)
#endif #endif
#undef YUVANY #undef YANY
#define RGBANY(NAMEANY, ARGBTORGB, BPP) \ #define RGBANY(NAMEANY, ARGBTORGB, BPP) \
void NAMEANY(const uint8* argb_buf, \ void NAMEANY(const uint8* argb_buf, \
......
...@@ -18,7 +18,7 @@ extern "C" { ...@@ -18,7 +18,7 @@ extern "C" {
// This module is for GCC Neon // This module is for GCC Neon
#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) #if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__)
#define YUVTORGB \ #define YUV422TORGB \
"vld1.u8 {d0}, [%0]! \n" \ "vld1.u8 {d0}, [%0]! \n" \
"vld1.u32 {d2[0]}, [%1]! \n" \ "vld1.u32 {d2[0]}, [%1]! \n" \
"vld1.u32 {d2[1]}, [%2]! \n" \ "vld1.u32 {d2[1]}, [%2]! \n" \
...@@ -46,17 +46,17 @@ extern "C" { ...@@ -46,17 +46,17 @@ extern "C" {
"vtrn.u8 d22, d23 \n" \ "vtrn.u8 d22, d23 \n" \
"vtrn.u8 d16, d17 \n" \ "vtrn.u8 d16, d17 \n" \
#if defined(HAS_I420TOARGBROW_NEON) || \ #if defined(HAS_I422TOARGBROW_NEON) || \
defined(HAS_I420TOBGRAROW_NEON) || \ defined(HAS_I422TOBGRAROW_NEON) || \
defined(HAS_I420TOABGRROW_NEON) defined(HAS_I422TOABGRROW_NEON)
static const vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102, static const vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102,
0, 0, 0, 0, 0, 0, 0, 0 }; 0, 0, 0, 0, 0, 0, 0, 0 };
static const vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52, static const vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
0, 0, 0, 0, 0, 0, 0, 0 }; 0, 0, 0, 0, 0, 0, 0, 0 };
#endif #endif
#ifdef HAS_I420TOARGBROW_NEON #ifdef HAS_I422TOARGBROW_NEON
void I420ToARGBRow_NEON(const uint8* y_buf, void I422ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -68,7 +68,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf, ...@@ -68,7 +68,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf,
"vmov.u16 q14, #74 \n" "vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n" "vmov.u16 q15, #16 \n"
"1: \n" "1: \n"
YUVTORGB YUV422TORGB
"vmov.u8 d21, d16 \n" "vmov.u8 d21, d16 \n"
"vmov.u8 d23, #255 \n" "vmov.u8 d23, #255 \n"
"vst4.u8 {d20, d21, d22, d23}, [%3]! \n" "vst4.u8 {d20, d21, d22, d23}, [%3]! \n"
...@@ -85,10 +85,10 @@ YUVTORGB ...@@ -85,10 +85,10 @@ YUVTORGB
"q10", "q11", "q12", "q13", "q14", "q15" "q10", "q11", "q12", "q13", "q14", "q15"
); );
} }
#endif #endif // HAS_I422TOARGBROW_NEON
#ifdef HAS_I420TOBGRAROW_NEON #ifdef HAS_I422TOBGRAROW_NEON
void I420ToBGRARow_NEON(const uint8* y_buf, void I422ToBGRARow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -100,7 +100,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf, ...@@ -100,7 +100,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf,
"vmov.u16 q14, #74 \n" "vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n" "vmov.u16 q15, #16 \n"
"1: \n" "1: \n"
YUVTORGB YUV422TORGB
"vswp.u8 d20, d22 \n" "vswp.u8 d20, d22 \n"
"vmov.u8 d21, d16 \n" "vmov.u8 d21, d16 \n"
"vmov.u8 d19, #255 \n" "vmov.u8 d19, #255 \n"
...@@ -118,10 +118,10 @@ YUVTORGB ...@@ -118,10 +118,10 @@ YUVTORGB
"q10", "q11", "q12", "q13", "q14", "q15" "q10", "q11", "q12", "q13", "q14", "q15"
); );
} }
#endif #endif // HAS_I422TOBGRAROW_NEON
#ifdef HAS_I420TOABGRROW_NEON #ifdef HAS_I422TOABGRROW_NEON
void I420ToABGRRow_NEON(const uint8* y_buf, void I422ToABGRRow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -133,7 +133,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf, ...@@ -133,7 +133,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf,
"vmov.u16 q14, #74 \n" "vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n" "vmov.u16 q15, #16 \n"
"1: \n" "1: \n"
YUVTORGB YUV422TORGB
"vswp.u8 d20, d22 \n" "vswp.u8 d20, d22 \n"
"vmov.u8 d21, d16 \n" "vmov.u8 d21, d16 \n"
"vmov.u8 d23, #255 \n" "vmov.u8 d23, #255 \n"
...@@ -151,7 +151,7 @@ YUVTORGB ...@@ -151,7 +151,7 @@ YUVTORGB
"q10", "q11", "q12", "q13", "q14", "q15" "q10", "q11", "q12", "q13", "q14", "q15"
); );
} }
#endif #endif // HAS_I422TOABGRROW_NEON
#ifdef HAS_SPLITUV_NEON #ifdef HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
...@@ -172,7 +172,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { ...@@ -172,7 +172,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
: "memory", "cc", "q0", "q1" // Clobber List : "memory", "cc", "q0", "q1" // Clobber List
); );
} }
#endif #endif // HAS_SPLITUV_NEON
#ifdef HAS_COPYROW_NEON #ifdef HAS_COPYROW_NEON
// Copy multiple of 64 // Copy multiple of 64
...@@ -266,7 +266,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -266,7 +266,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
: "memory", "cc", "r3", "q0" : "memory", "cc", "r3", "q0"
); );
} }
#endif #endif // HAS_MIRRORROW_NEON
#ifdef HAS_MIRRORROWUV_NEON #ifdef HAS_MIRRORROWUV_NEON
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
...@@ -325,7 +325,7 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) { ...@@ -325,7 +325,7 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
: "memory", "cc", "r12", "q0" : "memory", "cc", "r12", "q0"
); );
} }
#endif #endif // HAS_MIRRORROWUV_NEON
#endif // __ARM_NEON__ #endif // __ARM_NEON__
......
This diff is collapsed.
This diff is collapsed.
...@@ -25,88 +25,44 @@ ...@@ -25,88 +25,44 @@
namespace libyuv { namespace libyuv {
TEST_F(libyuvTest, BenchmarkI420ToARGB_C) { #define TESTPLANARTOB(FMT_A, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \
align_buffer_16(src_y, benchmark_width_ * benchmark_height_); TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \
align_buffer_16(src_u, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(src_v, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(dst_argb, (benchmark_width_ << 2) * benchmark_height_);
MaskCpuFlags(kCpuInitialized);
for (int i = 0; i < benchmark_iterations_; ++i)
I420ToARGB(src_y, benchmark_width_,
src_u, benchmark_width_ >> 1,
src_v, benchmark_width_ >> 1,
dst_argb, benchmark_width_ << 2,
benchmark_width_, benchmark_height_);
MaskCpuFlags(-1);
EXPECT_EQ(0, 0);
free_aligned_buffer_16(src_y)
free_aligned_buffer_16(src_u)
free_aligned_buffer_16(src_v)
free_aligned_buffer_16(dst_argb)
}
TEST_F(libyuvTest, BenchmarkI420ToARGB_OPT) {
align_buffer_16(src_y, benchmark_width_ * benchmark_height_);
align_buffer_16(src_u, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(src_v, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(dst_argb, (benchmark_width_ << 2) * benchmark_height_);
for (int i = 0; i < benchmark_iterations_; ++i)
I420ToARGB(src_y, benchmark_width_,
src_u, benchmark_width_ >> 1,
src_v, benchmark_width_ >> 1,
dst_argb, benchmark_width_ << 2,
benchmark_width_, benchmark_height_);
free_aligned_buffer_16(src_y)
free_aligned_buffer_16(src_u)
free_aligned_buffer_16(src_v)
free_aligned_buffer_16(dst_argb)
}
#define TESTI420TO(FMT, BPP) \
TEST_F(libyuvTest, I420To##FMT##_CvsOPT) { \
const int src_width = 1280; \ const int src_width = 1280; \
const int src_height = 720; \ const int src_height = 720; \
align_buffer_16(src_y, src_width * src_height); \ align_buffer_16(src_y, src_width * src_height); \
align_buffer_16(src_u, (src_width * src_height) >> 2); \ align_buffer_16(src_u, src_width / SUBSAMP_X * src_height / SUBSAMP_Y); \
align_buffer_16(src_v, (src_width * src_height) >> 2); \ align_buffer_16(src_v, src_width / SUBSAMP_X * src_height / SUBSAMP_Y); \
align_buffer_16(dst_rgb_c, (src_width * BPP) * src_height); \ align_buffer_16(dst_rgb_c, (src_width * BPP_B) * src_height); \
align_buffer_16(dst_rgb_opt, (src_width * BPP) * src_height); \ align_buffer_16(dst_rgb_opt, (src_width * BPP_B) * src_height); \
srandom(time(NULL)); \ srandom(time(NULL)); \
for (int i = 0; i < src_height; ++i) \ for (int i = 0; i < src_height; ++i) \
for (int j = 0; j < src_width; ++j) \ for (int j = 0; j < src_width; ++j) \
src_y[(i * src_width) + j] = (random() & 0xff); \ src_y[(i * src_width) + j] = (random() & 0xff); \
for (int i = 0; i < src_height >> 1; ++i) \ for (int i = 0; i < src_height / SUBSAMP_X; ++i) \
for (int j = 0; j < src_width >> 1; ++j) { \ for (int j = 0; j < src_width / SUBSAMP_Y; ++j) { \
src_u[(i * src_width >> 1) + j] = (random() & 0xff); \ src_u[(i * src_width / SUBSAMP_X) + j] = (random() & 0xff); \
src_v[(i * src_width >> 1) + j] = (random() & 0xff); \ src_v[(i * src_width / SUBSAMP_X) + j] = (random() & 0xff); \
} \ } \
MaskCpuFlags(kCpuInitialized); \ MaskCpuFlags(kCpuInitialized); \
I420To##FMT(src_y, src_width, \ ##FMT_A##To##FMT_B(src_y, src_width, \
src_u, src_width >> 1, \ src_u, src_width / SUBSAMP_X, \
src_v, src_width >> 1, \ src_v, src_width / SUBSAMP_X, \
dst_rgb_c, src_width * BPP, \ dst_rgb_c, src_width * BPP_B, \
src_width, src_height); \ src_width, src_height); \
MaskCpuFlags(-1); \ MaskCpuFlags(-1); \
const int runs = 1000; \ const int runs = 1000; \
for (int i = 0; i < runs; ++i) { \ for (int i = 0; i < runs; ++i) { \
I420To##FMT(src_y, src_width, \ ##FMT_A##To##FMT_B(src_y, src_width, \
src_u, src_width >> 1, \ src_u, src_width / SUBSAMP_X, \
src_v, src_width >> 1, \ src_v, src_width / SUBSAMP_X, \
dst_rgb_opt, src_width * BPP, \ dst_rgb_opt, src_width * BPP_B, \
src_width, src_height); \ src_width, src_height); \
} \ } \
int err = 0; \ int err = 0; \
for (int i = 0; i < src_height; ++i) { \ for (int i = 0; i < src_height; ++i) { \
for (int j = 0; j < src_width * BPP; ++j) { \ for (int j = 0; j < src_width * BPP_B; ++j) { \
int diff = static_cast<int>(dst_rgb_c[i * src_width * BPP + j]) - \ int diff = static_cast<int>(dst_rgb_c[i * src_width * BPP_B + j]) - \
static_cast<int>(dst_rgb_opt[i * src_width * BPP + j]); \ static_cast<int>(dst_rgb_opt[i * src_width * BPP_B + j]); \
if (abs(diff) > 2) \ if (abs(diff) > 2) \
err++; \ err++; \
} \ } \
...@@ -119,14 +75,17 @@ TEST_F(libyuvTest, I420To##FMT##_CvsOPT) { \ ...@@ -119,14 +75,17 @@ TEST_F(libyuvTest, I420To##FMT##_CvsOPT) { \
free_aligned_buffer_16(dst_rgb_opt) \ free_aligned_buffer_16(dst_rgb_opt) \
} }
TESTI420TO(ARGB, 4) TESTPLANARTOB(I420, 2, 2, ARGB, 4)
TESTI420TO(BGRA, 4) TESTPLANARTOB(I420, 2, 2, BGRA, 4)
TESTI420TO(ABGR, 4) TESTPLANARTOB(I420, 2, 2, ABGR, 4)
TESTI420TO(RAW, 3) TESTPLANARTOB(I420, 2, 2, RAW, 3)
TESTI420TO(RGB24, 3) TESTPLANARTOB(I420, 2, 2, RGB24, 3)
TESTI420TO(RGB565, 2) TESTPLANARTOB(I420, 2, 2, RGB565, 2)
TESTI420TO(ARGB1555, 2) TESTPLANARTOB(I420, 2, 2, ARGB1555, 2)
TESTI420TO(ARGB4444, 2) TESTPLANARTOB(I420, 2, 2, ARGB4444, 2)
TESTPLANARTOB(I411, 4, 1, ARGB, 4)
TESTPLANARTOB(I422, 2, 1, ARGB, 4)
TESTPLANARTOB(I444, 1, 1, ARGB, 4)
#define TESTATOB(FMT_A, BPP_A, FMT_B, BPP_B) \ #define TESTATOB(FMT_A, BPP_A, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \ TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment