Commit e214fe3f authored by fbarchard@google.com's avatar fbarchard@google.com

I411ToARGB doing 2 UV values with 8 Y values

BUG=40
TEST=planar_test
Review URL: https://webrtc-codereview.appspot.com/637005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@277 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 6d6b7709
......@@ -31,6 +31,13 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert I420 to I400. (calls CopyPlane ignoring u/v)
int I420ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// I420 mirror.
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......@@ -62,6 +69,13 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ARGB.
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......@@ -69,8 +83,8 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ARGB.
int I444ToARGB(const uint8* src_y, int src_stride_y,
// Convert I411 to ARGB.
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
......
......@@ -660,32 +660,32 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I420ToARGBRow = I420ToARGBRow_Unaligned_SSSE3;
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
......@@ -708,32 +708,32 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
void (*I420ToBGRARow)(const uint8* y_buf,
void (*I422ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToBGRARow_C;
#if defined(HAS_I420TOBGRAROW_NEON)
int width) = I422ToBGRARow_C;
#if defined(HAS_I422TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON;
I422ToBGRARow = I422ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON;
I422ToBGRARow = I422ToBGRARow_NEON;
}
}
#elif defined(HAS_I420TOBGRAROW_SSSE3)
#elif defined(HAS_I422TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I420ToBGRARow = I420ToBGRARow_Unaligned_SSSE3;
I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3;
I422ToBGRARow = I422ToBGRARow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
......@@ -756,32 +756,32 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
void (*I420ToABGRRow)(const uint8* y_buf,
void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToABGRRow_C;
#if defined(HAS_I420TOABGRROW_NEON)
int width) = I422ToABGRRow_C;
#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON;
I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON;
I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#elif defined(HAS_I420TOABGRROW_SSSE3)
#elif defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I420ToABGRRow = I420ToABGRRow_Unaligned_SSSE3;
I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3;
I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
......@@ -804,18 +804,18 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
......@@ -835,7 +835,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB24Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
......@@ -859,18 +859,18 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
......@@ -890,7 +890,7 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRAWRow(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
......@@ -914,18 +914,18 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
......@@ -944,7 +944,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
src_y += src_stride_y;
......@@ -968,18 +968,18 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
......@@ -998,7 +998,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB1555Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
......@@ -1022,18 +1022,18 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
......@@ -1052,7 +1052,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB4444Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
......
......@@ -446,18 +446,18 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
......@@ -478,7 +478,7 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
dst_bayer += dst_stride_bayer;
src_y += src_stride_y;
......
......@@ -51,6 +51,26 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
}
}
// Convert I420 to I400. (calls CopyPlane ignoring u/v)
int I420ToI400(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8*, int,
uint8*, int,
int width, int height) {
if (!src_y || !dst_y ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
}
// Mirror a plane of data
void MirrorPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
......@@ -202,6 +222,45 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
return 0;
}
// Convert I444 to ARGB.
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I444ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I444ToARGBRow_C;
#if defined(HAS_I444TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I444ToARGBRow = I444ToARGBRow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......@@ -214,30 +273,32 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
......@@ -246,8 +307,8 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I444 to ARGB.
int I444ToARGB(const uint8* src_y, int src_stride_y,
// Convert I411 to ARGB.
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
......@@ -258,21 +319,25 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I444ToARGBRow)(const uint8* y_buf,
void (*I411ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I444ToARGBRow_C;
#if defined(HAS_I444TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I444ToARGBRow = I444ToARGBRow_SSSE3;
int width) = I411ToARGBRow_C;
#if defined(HAS_I411TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I411ToARGBRow = I411ToARGBRow_SSSE3;
}
}
}
#endif
for (int y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
......@@ -281,6 +346,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
return 0;
}
// Convert I400 to ARGB.
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
......@@ -724,24 +790,24 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
......@@ -766,7 +832,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
SplitUV(src_uv, rowuv, rowuv + kMaxStride, halfwidth);
src_uv += src_stride_uv;
}
I420ToARGBRow(src_y, rowuv, rowuv + kMaxStride, dst_argb, width);
I422ToARGBRow(src_y, rowuv, rowuv + kMaxStride, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
}
......@@ -803,24 +869,24 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
}
}
#endif
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
......@@ -832,7 +898,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
for (int y = 0; y < height; ++y) {
YUY2ToUVRow(src_yuy2, src_stride_yuy2, rowu, rowv, width);
YUY2ToYRow(src_yuy2, rowy, width);
I420ToARGBRow(rowy, rowu, rowv, dst_argb, width);
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb;
}
......@@ -869,24 +935,24 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
}
}
#endif
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
......@@ -898,7 +964,7 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
for (int y = 0; y < height; ++y) {
UYVYToUVRow(src_uyvy, src_stride_uyvy, rowu, rowv, width);
UYVYToYRow(src_uyvy, rowy, width);
I420ToARGBRow(rowy, rowu, rowv, dst_argb, width);
I422ToARGBRow(rowy, rowu, rowv, dst_argb, width);
src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb;
}
......@@ -916,18 +982,18 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I420ToARGBRow_C;
#if defined(HAS_I420TOARGBROW_NEON)
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
I422ToARGBRow = I422ToARGBRow_NEON;
}
#elif defined(HAS_I420TOARGBROW_SSSE3)
#elif defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
......@@ -960,7 +1026,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
SplitUV(src_uv, rowuv, rowuv + kMaxStride, halfwidth);
src_uv += src_stride_uv;
}
I420ToARGBRow(src_y, rowuv, rowuv + kMaxStride, row, width);
I422ToARGBRow(src_y, rowuv, rowuv + kMaxStride, row, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
src_y += src_stride_y;
......
......@@ -30,7 +30,7 @@ extern "C" {
#define LIBYUV_SSSE3_ONLY
#endif
// The following are available on all x86 platforms
// The following are available on all x86 platforms:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_ABGRTOARGBROW_SSSE3
......@@ -55,10 +55,11 @@ extern "C" {
#define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86
#define HAS_I400TOARGBROW_SSE2
#define HAS_I420TOABGRROW_SSSE3
#define HAS_I420TOARGBROW_SSSE3
#define HAS_I420TOBGRAROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I444TOARGBROW_SSSE3
#define HAS_I411TOARGBROW_SSSE3
#define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROWUV_SSSE3
#define HAS_ADDROW_SSE2
......@@ -75,7 +76,7 @@ extern "C" {
#define HAS_ARGBSEPIAROW_SSSE3
#endif
// The following are available only useful when SSSE3 is unavailable.
// The following are disabled when SSSE3 is available:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(LIBYUV_SSSE3_ONLY)
......@@ -91,9 +92,9 @@ extern "C" {
#define HAS_MIRRORROWUV_NEON
#define HAS_SPLITUV_NEON
#define HAS_COPYROW_NEON
#define HAS_I420TOARGBROW_NEON
#define HAS_I420TOBGRAROW_NEON
#define HAS_I420TOABGRROW_NEON
#define HAS_I422TOARGBROW_NEON
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TOABGRROW_NEON
#endif
#if defined(_MSC_VER)
......@@ -118,17 +119,17 @@ typedef uint32 __attribute__((vector_size(16))) uvec32;
#define OMITFP __attribute__((optimize("omit-frame-pointer")))
#endif
void I420ToARGBRow_NEON(const uint8* y_buf,
void I422ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToBGRARow_NEON(const uint8* y_buf,
void I422ToBGRARow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToABGRRow_NEON(const uint8* y_buf,
void I422ToABGRRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -219,19 +220,19 @@ void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
void I420ToARGBRow_C(const uint8* y_buf,
void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToBGRARow_C(const uint8* y_buf,
void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToABGRRow_C(const uint8* y_buf,
void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -243,54 +244,78 @@ void I444ToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width);
void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void YToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width);
void I420ToARGBRow_SSSE3(const uint8* y_buf,
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width);
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width);
void I420ToBGRARow_SSSE3(const uint8* y_buf,
void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width);
void I420ToABGRRow_SSSE3(const uint8* y_buf,
void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* bgra_buf,
int width);
void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width);
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width);
void I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width);
void I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width);
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
int width);
void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width);
void YToARGBRow_SSE2(const uint8* y_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width);
// ARGB preattenuated alpha blend.
......@@ -310,24 +335,37 @@ void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width);
// 'Any' functions handle any size and alignment.
void I420ToARGBRow_Any_SSSE3(const uint8* y_buf,
void I444ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToBGRARow_Any_SSSE3(const uint8* y_buf,
void I411ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToABGRRow_Any_SSSE3(const uint8* y_buf,
void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToABGRRow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
......@@ -344,19 +382,19 @@ void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void I420ToARGBRow_Any_NEON(const uint8* y_buf,
void I422ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToBGRARow_Any_NEON(const uint8* y_buf,
void I422ToBGRARow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I420ToABGRRow_Any_NEON(const uint8* y_buf,
void I422ToABGRRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......
......@@ -359,7 +359,8 @@ static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, uint8* rgb_buf,
(255u << ashift);
}
void I420ToARGBRow_C(const uint8* y_buf,
// Also used for 420
void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -377,7 +378,7 @@ void I420ToARGBRow_C(const uint8* y_buf,
}
}
void I420ToBGRARow_C(const uint8* y_buf,
void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -395,7 +396,7 @@ void I420ToBGRARow_C(const uint8* y_buf,
}
}
void I420ToABGRRow_C(const uint8* y_buf,
void I422ToABGRRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -427,6 +428,32 @@ void I444ToARGBRow_C(const uint8* y_buf,
}
}
void I411ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 3; x += 4) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
YuvPixel(y_buf[2], u_buf[0], v_buf[0], rgb_buf + 8, 24, 16, 8, 0);
YuvPixel(y_buf[3], u_buf[0], v_buf[0], rgb_buf + 12, 24, 16, 8, 0);
y_buf += 4;
u_buf += 1;
v_buf += 1;
rgb_buf += 16; // Advance 4 pixels.
}
if (width & 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
......@@ -686,8 +713,8 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
}
#endif // HAS_ARGBBLENDROW_SSSE3
// Wrappers to handle odd sizes/alignments
#define YUVANY(NAMEANY, I420TORGB_SSE, I420TORGB_C) \
// Wrappers to handle odd width
#define YANY(NAMEANY, I420TORGB_SSE, I420TORGB_C, UV_SHIFT) \
void NAMEANY(const uint8* y_buf, \
const uint8* u_buf, \
const uint8* v_buf, \
......@@ -696,22 +723,24 @@ void ARGBBlendRow_Any_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
int n = width & ~7; \
I420TORGB_SSE(y_buf, u_buf, v_buf, rgb_buf, n); \
I420TORGB_C(y_buf + n, \
u_buf + (n >> 1), \
v_buf + (n >> 1), \
rgb_buf + n * 4, width & 7); \
u_buf + (n >> UV_SHIFT), \
v_buf + (n >> UV_SHIFT), \
rgb_buf + n * 4, width & 7); \
}
#if defined(HAS_I420TOARGBROW_SSSE3)
YUVANY(I420ToARGBRow_Any_SSSE3, I420ToARGBRow_Unaligned_SSSE3, I420ToARGBRow_C)
YUVANY(I420ToBGRARow_Any_SSSE3, I420ToBGRARow_Unaligned_SSSE3, I420ToBGRARow_C)
YUVANY(I420ToABGRRow_Any_SSSE3, I420ToABGRRow_Unaligned_SSSE3, I420ToABGRRow_C)
#if defined(HAS_I422TOARGBROW_SSSE3)
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, 0)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, 1)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, 2)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, 1)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, 1)
#endif
#if defined(HAS_I420TOARGBROW_NEON)
YUVANY(I420ToARGBRow_Any_NEON, I420ToARGBRow_NEON, I420ToARGBRow_C)
YUVANY(I420ToBGRARow_Any_NEON, I420ToBGRARow_NEON, I420ToBGRARow_C)
YUVANY(I420ToABGRRow_Any_NEON, I420ToABGRRow_NEON, I420ToABGRRow_C)
#if defined(HAS_I422TOARGBROW_NEON)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C)
#endif
#undef YUVANY
#undef YANY
#define RGBANY(NAMEANY, ARGBTORGB, BPP) \
void NAMEANY(const uint8* argb_buf, \
......
......@@ -18,7 +18,7 @@ extern "C" {
// This module is for GCC Neon
#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__)
#define YUVTORGB \
#define YUV422TORGB \
"vld1.u8 {d0}, [%0]! \n" \
"vld1.u32 {d2[0]}, [%1]! \n" \
"vld1.u32 {d2[1]}, [%2]! \n" \
......@@ -46,17 +46,17 @@ extern "C" {
"vtrn.u8 d22, d23 \n" \
"vtrn.u8 d16, d17 \n" \
#if defined(HAS_I420TOARGBROW_NEON) || \
defined(HAS_I420TOBGRAROW_NEON) || \
defined(HAS_I420TOABGRROW_NEON)
#if defined(HAS_I422TOARGBROW_NEON) || \
defined(HAS_I422TOBGRAROW_NEON) || \
defined(HAS_I422TOABGRROW_NEON)
static const vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102,
0, 0, 0, 0, 0, 0, 0, 0 };
static const vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
0, 0, 0, 0, 0, 0, 0, 0 };
#endif
#ifdef HAS_I420TOARGBROW_NEON
void I420ToARGBRow_NEON(const uint8* y_buf,
#ifdef HAS_I422TOARGBROW_NEON
void I422ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -68,7 +68,7 @@ void I420ToARGBRow_NEON(const uint8* y_buf,
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
"1: \n"
YUVTORGB
YUV422TORGB
"vmov.u8 d21, d16 \n"
"vmov.u8 d23, #255 \n"
"vst4.u8 {d20, d21, d22, d23}, [%3]! \n"
......@@ -85,10 +85,10 @@ YUVTORGB
"q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif
#endif // HAS_I422TOARGBROW_NEON
#ifdef HAS_I420TOBGRAROW_NEON
void I420ToBGRARow_NEON(const uint8* y_buf,
#ifdef HAS_I422TOBGRAROW_NEON
void I422ToBGRARow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -100,7 +100,7 @@ void I420ToBGRARow_NEON(const uint8* y_buf,
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
"1: \n"
YUVTORGB
YUV422TORGB
"vswp.u8 d20, d22 \n"
"vmov.u8 d21, d16 \n"
"vmov.u8 d19, #255 \n"
......@@ -118,10 +118,10 @@ YUVTORGB
"q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif
#endif // HAS_I422TOBGRAROW_NEON
#ifdef HAS_I420TOABGRROW_NEON
void I420ToABGRRow_NEON(const uint8* y_buf,
#ifdef HAS_I422TOABGRROW_NEON
void I422ToABGRRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
......@@ -133,7 +133,7 @@ void I420ToABGRRow_NEON(const uint8* y_buf,
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
"1: \n"
YUVTORGB
YUV422TORGB
"vswp.u8 d20, d22 \n"
"vmov.u8 d21, d16 \n"
"vmov.u8 d23, #255 \n"
......@@ -151,7 +151,7 @@ YUVTORGB
"q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif
#endif // HAS_I422TOABGRROW_NEON
#ifdef HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
......@@ -172,7 +172,7 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif
#endif // HAS_SPLITUV_NEON
#ifdef HAS_COPYROW_NEON
// Copy multiple of 64
......@@ -266,7 +266,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
: "memory", "cc", "r3", "q0"
);
}
#endif
#endif // HAS_MIRRORROW_NEON
#ifdef HAS_MIRRORROWUV_NEON
void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
......@@ -325,7 +325,7 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
: "memory", "cc", "r12", "q0"
);
}
#endif
#endif // HAS_MIRRORROWUV_NEON
#endif // __ARM_NEON__
......
......@@ -1215,7 +1215,7 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
#endif // HAS_ARGBTOYROW_SSSE3
#ifdef HAS_I420TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_SSSE3
#define UB 127 /* min(63,static_cast<int8>(2.018 * 64)) */
#define UG -25 /* static_cast<int8>(-0.391 * 64 - 0.5) */
#define UR 0
......@@ -1251,8 +1251,37 @@ struct {
{ YG, YG, YG, YG, YG, YG, YG, YG }
};
// Convert 8 pixels
#define YUVTORGB \
// Convert 8 pixels: 8 UV and 8 Y
#define YUV444TORGB \
"movq (%1),%%xmm0 \n" \
"movq (%1,%2,1),%%xmm1 \n" \
"lea 0x8(%1),%1 \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
"pmaddubsw (%5),%%xmm0 \n" \
"pmaddubsw 16(%5),%%xmm1 \n" \
"pmaddubsw 32(%5),%%xmm2 \n" \
"psubw 48(%5),%%xmm0 \n" \
"psubw 64(%5),%%xmm1 \n" \
"psubw 80(%5),%%xmm2 \n" \
"movq (%0),%%xmm3 \n" \
"lea 0x8(%0),%0 \n" \
"punpcklbw %%xmm4,%%xmm3 \n" \
"psubsw 96(%5),%%xmm3 \n" \
"pmullw 112(%5),%%xmm3 \n" \
"paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \
"psraw $0x6,%%xmm0 \n" \
"psraw $0x6,%%xmm1 \n" \
"psraw $0x6,%%xmm2 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"packuswb %%xmm1,%%xmm1 \n" \
"packuswb %%xmm2,%%xmm2 \n"
// Convert 8 pixels: 4 UV and 8 Y
#define YUV422TORGB \
"movd (%1),%%xmm0 \n" \
"movd (%1,%2,1),%%xmm1 \n" \
"lea 0x4(%1),%1 \n" \
......@@ -1281,10 +1310,41 @@ struct {
"packuswb %%xmm1,%%xmm1 \n" \
"packuswb %%xmm2,%%xmm2 \n"
void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
// Convert 8 pixels: 2 UV and 8 Y
#define YUV411TORGB \
"movd (%1),%%xmm0 \n" \
"movd (%1,%2,1),%%xmm1 \n" \
"lea 0x2(%1),%1 \n" \
"punpcklbw %%xmm1,%%xmm0 \n" \
"punpcklwd %%xmm0,%%xmm0 \n" \
"punpckldq %%xmm0,%%xmm0 \n" \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
"pmaddubsw (%5),%%xmm0 \n" \
"pmaddubsw 16(%5),%%xmm1 \n" \
"pmaddubsw 32(%5),%%xmm2 \n" \
"psubw 48(%5),%%xmm0 \n" \
"psubw 64(%5),%%xmm1 \n" \
"psubw 80(%5),%%xmm2 \n" \
"movq (%0),%%xmm3 \n" \
"lea 0x8(%0),%0 \n" \
"punpcklbw %%xmm4,%%xmm3 \n" \
"psubsw 96(%5),%%xmm3 \n" \
"pmullw 112(%5),%%xmm3 \n" \
"paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \
"psraw $0x6,%%xmm0 \n" \
"psraw $0x6,%%xmm1 \n" \
"psraw $0x6,%%xmm2 \n" \
"packuswb %%xmm0,%%xmm0 \n" \
"packuswb %%xmm1,%%xmm1 \n" \
"packuswb %%xmm2,%%xmm2 \n"
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
......@@ -1292,7 +1352,7 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUVTORGB
YUV444TORGB
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
......@@ -1306,7 +1366,7 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(argb_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1316,10 +1376,10 @@ void OMITFP I420ToARGBRow_SSSE3(const uint8* y_buf,
);
}
void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf,
void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
......@@ -1327,22 +1387,21 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf,
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUVTORGB
"pcmpeqb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm5 \n"
"movdqa %%xmm5,%%xmm0 \n"
"punpcklwd %%xmm1,%%xmm5 \n"
"punpckhwd %%xmm1,%%xmm0 \n"
"movdqa %%xmm5,(%3) \n"
"movdqa %%xmm0,0x10(%3) \n"
YUV422TORGB
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"punpckhwd %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(argb_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1352,10 +1411,10 @@ void OMITFP I420ToBGRARow_SSSE3(const uint8* y_buf,
);
}
void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
......@@ -1363,13 +1422,13 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUVTORGB
"punpcklbw %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"punpcklwd %%xmm0,%%xmm2 \n"
"punpckhwd %%xmm0,%%xmm1 \n"
"movdqa %%xmm2,(%3) \n"
YUV411TORGB
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"punpckhwd %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
......@@ -1377,7 +1436,42 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(argb_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUV444TORGB
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"punpckhwd %%xmm2,%%xmm1 \n"
"movdqu %%xmm0,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(argb_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1387,10 +1481,10 @@ void OMITFP I420ToABGRRow_SSSE3(const uint8* y_buf,
);
}
void OMITFP I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
......@@ -1398,7 +1492,7 @@ void OMITFP I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUVTORGB
YUV422TORGB
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
......@@ -1412,7 +1506,7 @@ void OMITFP I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(argb_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1422,10 +1516,10 @@ void OMITFP I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
);
}
void OMITFP I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
......@@ -1433,22 +1527,57 @@ void OMITFP I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUVTORGB
YUV411TORGB
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"punpckhwd %%xmm2,%%xmm1 \n"
"movdqu %%xmm0,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(argb_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUV422TORGB
"pcmpeqb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm5 \n"
"movdqa %%xmm5,%%xmm0 \n"
"punpcklwd %%xmm1,%%xmm5 \n"
"punpckhwd %%xmm1,%%xmm0 \n"
"movdqu %%xmm5,(%3) \n"
"movdqu %%xmm0,0x10(%3) \n"
"movdqa %%xmm5,(%3) \n"
"movdqa %%xmm0,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(bgra_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1458,32 +1587,32 @@ void OMITFP I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
);
}
void OMITFP I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUVTORGB
YUV422TORGB
"punpcklbw %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"punpcklwd %%xmm0,%%xmm2 \n"
"punpckhwd %%xmm0,%%xmm1 \n"
"movdqu %%xmm2,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"movdqa %%xmm2,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(abgr_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1493,54 +1622,33 @@ void OMITFP I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
);
}
void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
"movd (%1),%%xmm0 \n"
"movd (%1,%2,1),%%xmm1 \n"
"lea 0x4(%1),%1 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm2 \n"
"pmaddubsw (%5),%%xmm0 \n"
"pmaddubsw 16(%5),%%xmm1 \n"
"pmaddubsw 32(%5),%%xmm2 \n"
"psubw 48(%5),%%xmm0 \n"
"psubw 64(%5),%%xmm1 \n"
"psubw 80(%5),%%xmm2 \n"
"movd (%0),%%xmm3 \n"
"lea 0x4(%0),%0 \n"
"punpcklbw %%xmm4,%%xmm3 \n"
"psubsw 96(%5),%%xmm3 \n"
"pmullw 112(%5),%%xmm3 \n"
"paddsw %%xmm3,%%xmm0 \n"
"paddsw %%xmm3,%%xmm1 \n"
"paddsw %%xmm3,%%xmm2 \n"
"psraw $0x6,%%xmm0 \n"
"psraw $0x6,%%xmm1 \n"
"psraw $0x6,%%xmm2 \n"
"packuswb %%xmm0,%%xmm0 \n"
"packuswb %%xmm1,%%xmm1 \n"
"packuswb %%xmm2,%%xmm2 \n"
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"punpcklwd %%xmm2,%%xmm0 \n"
"sub $0x4,%4 \n"
"movdqa %%xmm0,(%3) \n"
"lea 0x10(%3),%3 \n"
YUV422TORGB
"pcmpeqb %%xmm5,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm5 \n"
"movdqa %%xmm5,%%xmm0 \n"
"punpcklwd %%xmm1,%%xmm5 \n"
"punpckhwd %%xmm1,%%xmm0 \n"
"movdqu %%xmm5,(%3) \n"
"movdqu %%xmm0,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(rgb_buf), // %3
"+r"(bgra_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
......@@ -1549,7 +1657,43 @@ void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
#endif
);
}
void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width) {
asm volatile (
"sub %1,%2 \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
"pxor %%xmm4,%%xmm4 \n"
".p2align 4 \n"
"1: \n"
YUV422TORGB
"punpcklbw %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"punpcklwd %%xmm0,%%xmm2 \n"
"punpckhwd %%xmm0,%%xmm1 \n"
"movdqu %%xmm2,(%3) \n"
"movdqu %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x8,%4 \n"
"jg 1b \n"
: "+r"(y_buf), // %0
"+r"(u_buf), // %1
"+r"(v_buf), // %2
"+r"(abgr_buf), // %3
"+rm"(width) // %4
: "r"(&kYuvConstants.kUVToB) // %5
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
#endif
);
}
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_YTOARGBROW_SSE2
void YToARGBRow_SSE2(const uint8* y_buf,
......
......@@ -1200,7 +1200,7 @@ __asm {
}
}
#ifdef HAS_I420TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_SSSE3
#define YG 74 /* static_cast<int8>(1.164 * 64 + 0.5) */
......@@ -1235,7 +1235,42 @@ static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
#define YUVTORGB __asm { \
// TODO(fbarchard): NV12/NV21 fetch UV and use directly.
// Convert 8 pixels: 8 UV and 8 Y
#define YUV444TORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movq xmm0, qword ptr [esi] /* U */ \
__asm movq xmm1, qword ptr [esi + edi] /* V */ \
__asm lea esi, [esi + 8] \
__asm punpcklbw xmm0, xmm1 /* UV */ \
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
__asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
__asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
__asm pmaddubsw xmm2, kUVToR /* scale R UV */ \
__asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
__asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \
__asm pmullw xmm3, kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
__asm packuswb xmm0, xmm0 /* B */ \
__asm packuswb xmm1, xmm1 /* G */ \
__asm packuswb xmm2, xmm2 /* R */ \
}
// Convert 8 pixels: 4 UV and 8 Y
#define YUV422TORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \
......@@ -1267,11 +1302,47 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm packuswb xmm2, xmm2 /* R */ \
}
// Convert 8 pixels: 2 UV and 8 Y
#define YUV411TORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \
__asm lea esi, [esi + 2] \
__asm punpcklbw xmm0, xmm1 /* UV */ \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
__asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
__asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \
__asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
__asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
__asm pmaddubsw xmm2, kUVToR /* scale R UV */ \
__asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
__asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] /* NOLINT */ \
__asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \
__asm pmullw xmm3, kYToRgb \
__asm paddsw xmm0, xmm3 /* B += Y */ \
__asm paddsw xmm1, xmm3 /* G += Y */ \
__asm paddsw xmm2, xmm3 /* R += Y */ \
__asm psraw xmm0, 6 \
__asm psraw xmm1, 6 \
__asm psraw xmm2, 6 \
__asm packuswb xmm0, xmm0 /* B */ \
__asm packuswb xmm1, xmm1 /* G */ \
__asm packuswb xmm2, xmm2 /* R */ \
}
// 8 pixels, dest aligned 16.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void I420ToARGBRow_SSSE3(const uint8* y_buf,
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
......@@ -1279,7 +1350,7 @@ void I420ToARGBRow_SSSE3(const uint8* y_buf,
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -1287,7 +1358,7 @@ void I420ToARGBRow_SSSE3(const uint8* y_buf,
align 16
convertloop:
YUVTORGB
YUV444TORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
......@@ -1307,11 +1378,13 @@ void I420ToARGBRow_SSSE3(const uint8* y_buf,
}
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void I420ToBGRARow_SSSE3(const uint8* y_buf,
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
......@@ -1319,24 +1392,24 @@ void I420ToBGRARow_SSSE3(const uint8* y_buf,
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
YUVTORGB
YUV422TORGB
// Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
punpcklbw xmm1, xmm0 // GB
punpcklbw xmm5, xmm2 // AR
movdqa xmm0, xmm5
punpcklwd xmm5, xmm1 // BGRA first 4 pixels
punpckhwd xmm0, xmm1 // BGRA next 4 pixels
movdqa [edx], xmm5
movdqa [edx + 16], xmm0
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
......@@ -1347,11 +1420,14 @@ void I420ToBGRARow_SSSE3(const uint8* y_buf,
}
}
// 8 pixels, dest aligned 16.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16))
void I420ToABGRRow_SSSE3(const uint8* y_buf,
void I411ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
......@@ -1359,7 +1435,7 @@ void I420ToABGRRow_SSSE3(const uint8* y_buf,
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -1367,15 +1443,15 @@ void I420ToABGRRow_SSSE3(const uint8* y_buf,
align 16
convertloop:
YUVTORGB
YUV411TORGB
// Step 3: Weave into ARGB
punpcklbw xmm2, xmm1 // RG
punpcklbw xmm0, xmm5 // BA
movdqa xmm1, xmm2
punpcklwd xmm2, xmm0 // RGBA first 4 pixels
punpckhwd xmm1, xmm0 // RGBA next 4 pixels
movdqa [edx], xmm2
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
......@@ -1387,11 +1463,13 @@ void I420ToABGRRow_SSSE3(const uint8* y_buf,
}
}
// 8 pixels, unaligned.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
......@@ -1399,7 +1477,7 @@ void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -1407,7 +1485,7 @@ void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
align 16
convertloop:
YUVTORGB
YUV444TORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
......@@ -1415,8 +1493,50 @@ void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqu [edx], xmm0
movdqu [edx + 16], xmm1
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop edi
pop esi
ret
}
}
// 8 pixels, unaligned.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
__declspec(naked) __declspec(align(16))
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
YUV422TORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
......@@ -1427,11 +1547,14 @@ void I420ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
}
}
// 8 pixels, unaligned.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16))
void I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
uint8* argb_buf,
int width) {
__asm {
push esi
......@@ -1439,14 +1562,54 @@ void I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // argb
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
YUVTORGB
YUV411TORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2 // BGRA first 4 pixels
punpckhwd xmm1, xmm2 // BGRA next 4 pixels
movdqa [edx], xmm0
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop edi
pop esi
ret
}
}
__declspec(naked) __declspec(align(16))
void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // bgra
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pxor xmm4, xmm4
align 16
convertloop:
YUV422TORGB
// Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -1455,8 +1618,8 @@ void I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
movdqa xmm0, xmm5
punpcklwd xmm5, xmm1 // BGRA first 4 pixels
punpckhwd xmm0, xmm1 // BGRA next 4 pixels
movdqu [edx], xmm5
movdqu [edx + 16], xmm0
movdqa [edx], xmm5
movdqa [edx + 16], xmm0
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
......@@ -1468,18 +1631,18 @@ void I420ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
}
__declspec(naked) __declspec(align(16))
void I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
void I422ToABGRRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // abgr
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -1487,7 +1650,7 @@ void I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
align 16
convertloop:
YUVTORGB
YUV422TORGB
// Step 3: Weave into ARGB
punpcklbw xmm2, xmm1 // RG
......@@ -1495,8 +1658,8 @@ void I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
movdqa xmm1, xmm2
punpcklwd xmm2, xmm0 // RGBA first 4 pixels
punpckhwd xmm1, xmm0 // RGBA next 4 pixels
movdqu [edx], xmm2
movdqu [edx + 16], xmm1
movdqa [edx], xmm2
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
......@@ -1508,62 +1671,77 @@ void I420ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
}
__declspec(naked) __declspec(align(16))
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* bgra_buf,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // rgb
mov edx, [esp + 8 + 16] // bgra
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
// Step 1: Find 4 UV contributions to 4 R,G,B values
movd xmm0, [esi] // U
movd xmm1, [esi + edi] // V
lea esi, [esi + 4]
punpcklbw xmm0, xmm1 // UV
movdqa xmm1, xmm0
movdqa xmm2, xmm0
pmaddubsw xmm0, kUVToB // scale B UV
pmaddubsw xmm1, kUVToG // scale G UV
pmaddubsw xmm2, kUVToR // scale R UV
psubw xmm0, kUVBiasB // unbias back to signed
psubw xmm1, kUVBiasG
psubw xmm2, kUVBiasR
// Step 2: Find Y contribution to 4 R,G,B values
movd xmm3, [eax]
lea eax, [eax + 4]
punpcklbw xmm3, xmm4
psubsw xmm3, kYSub16
pmullw xmm3, kYToRgb
paddsw xmm0, xmm3 // B += Y
paddsw xmm1, xmm3 // G += Y
paddsw xmm2, xmm3 // R += Y
psraw xmm0, 6
psraw xmm1, 6
psraw xmm2, 6
packuswb xmm0, xmm0 // B
packuswb xmm1, xmm1 // G
packuswb xmm2, xmm2 // R
YUV422TORGB
// Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
punpcklbw xmm1, xmm0 // GB
punpcklbw xmm5, xmm2 // AR
movdqa xmm0, xmm5
punpcklwd xmm5, xmm1 // BGRA first 4 pixels
punpckhwd xmm0, xmm1 // BGRA next 4 pixels
movdqu [edx], xmm5
movdqu [edx + 16], xmm0
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop edi
pop esi
ret
}
}
__declspec(naked) __declspec(align(16))
void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* abgr_buf,
int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // Y
mov esi, [esp + 8 + 8] // U
mov edi, [esp + 8 + 12] // V
mov edx, [esp + 8 + 16] // abgr
mov ecx, [esp + 8 + 20] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
pxor xmm4, xmm4
align 16
convertloop:
YUV422TORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
punpcklbw xmm2, xmm5 // RA
punpcklwd xmm0, xmm2 // BGRA 4 pixels
movdqa [edx], xmm0
lea edx, [edx + 16]
sub ecx, 4
punpcklbw xmm2, xmm1 // RG
punpcklbw xmm0, xmm5 // BA
movdqa xmm1, xmm2
punpcklwd xmm2, xmm0 // RGBA first 4 pixels
punpckhwd xmm1, xmm0 // RGBA next 4 pixels
movdqu [edx], xmm2
movdqu [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
jg convertloop
pop edi
......@@ -1571,7 +1749,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
ret
}
}
#endif
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_YTOARGBROW_SSE2
__declspec(naked) __declspec(align(16))
......@@ -1617,7 +1795,7 @@ void YToARGBRow_SSE2(const uint8* y_buf,
ret
}
}
#endif
#endif // HAS_YTOARGBROW_SSE2
#endif
#ifdef HAS_MIRRORROW_SSSE3
......
......@@ -25,88 +25,44 @@
namespace libyuv {
TEST_F(libyuvTest, BenchmarkI420ToARGB_C) {
align_buffer_16(src_y, benchmark_width_ * benchmark_height_);
align_buffer_16(src_u, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(src_v, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(dst_argb, (benchmark_width_ << 2) * benchmark_height_);
MaskCpuFlags(kCpuInitialized);
for (int i = 0; i < benchmark_iterations_; ++i)
I420ToARGB(src_y, benchmark_width_,
src_u, benchmark_width_ >> 1,
src_v, benchmark_width_ >> 1,
dst_argb, benchmark_width_ << 2,
benchmark_width_, benchmark_height_);
MaskCpuFlags(-1);
EXPECT_EQ(0, 0);
free_aligned_buffer_16(src_y)
free_aligned_buffer_16(src_u)
free_aligned_buffer_16(src_v)
free_aligned_buffer_16(dst_argb)
}
TEST_F(libyuvTest, BenchmarkI420ToARGB_OPT) {
align_buffer_16(src_y, benchmark_width_ * benchmark_height_);
align_buffer_16(src_u, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(src_v, (benchmark_width_ * benchmark_height_) >> 2);
align_buffer_16(dst_argb, (benchmark_width_ << 2) * benchmark_height_);
for (int i = 0; i < benchmark_iterations_; ++i)
I420ToARGB(src_y, benchmark_width_,
src_u, benchmark_width_ >> 1,
src_v, benchmark_width_ >> 1,
dst_argb, benchmark_width_ << 2,
benchmark_width_, benchmark_height_);
free_aligned_buffer_16(src_y)
free_aligned_buffer_16(src_u)
free_aligned_buffer_16(src_v)
free_aligned_buffer_16(dst_argb)
}
#define TESTI420TO(FMT, BPP) \
TEST_F(libyuvTest, I420To##FMT##_CvsOPT) { \
#define TESTPLANARTOB(FMT_A, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \
const int src_width = 1280; \
const int src_height = 720; \
align_buffer_16(src_y, src_width * src_height); \
align_buffer_16(src_u, (src_width * src_height) >> 2); \
align_buffer_16(src_v, (src_width * src_height) >> 2); \
align_buffer_16(dst_rgb_c, (src_width * BPP) * src_height); \
align_buffer_16(dst_rgb_opt, (src_width * BPP) * src_height); \
align_buffer_16(src_u, src_width / SUBSAMP_X * src_height / SUBSAMP_Y); \
align_buffer_16(src_v, src_width / SUBSAMP_X * src_height / SUBSAMP_Y); \
align_buffer_16(dst_rgb_c, (src_width * BPP_B) * src_height); \
align_buffer_16(dst_rgb_opt, (src_width * BPP_B) * src_height); \
srandom(time(NULL)); \
for (int i = 0; i < src_height; ++i) \
for (int j = 0; j < src_width; ++j) \
src_y[(i * src_width) + j] = (random() & 0xff); \
for (int i = 0; i < src_height >> 1; ++i) \
for (int j = 0; j < src_width >> 1; ++j) { \
src_u[(i * src_width >> 1) + j] = (random() & 0xff); \
src_v[(i * src_width >> 1) + j] = (random() & 0xff); \
for (int i = 0; i < src_height / SUBSAMP_X; ++i) \
for (int j = 0; j < src_width / SUBSAMP_Y; ++j) { \
src_u[(i * src_width / SUBSAMP_X) + j] = (random() & 0xff); \
src_v[(i * src_width / SUBSAMP_X) + j] = (random() & 0xff); \
} \
MaskCpuFlags(kCpuInitialized); \
I420To##FMT(src_y, src_width, \
src_u, src_width >> 1, \
src_v, src_width >> 1, \
dst_rgb_c, src_width * BPP, \
##FMT_A##To##FMT_B(src_y, src_width, \
src_u, src_width / SUBSAMP_X, \
src_v, src_width / SUBSAMP_X, \
dst_rgb_c, src_width * BPP_B, \
src_width, src_height); \
MaskCpuFlags(-1); \
const int runs = 1000; \
for (int i = 0; i < runs; ++i) { \
I420To##FMT(src_y, src_width, \
src_u, src_width >> 1, \
src_v, src_width >> 1, \
dst_rgb_opt, src_width * BPP, \
##FMT_A##To##FMT_B(src_y, src_width, \
src_u, src_width / SUBSAMP_X, \
src_v, src_width / SUBSAMP_X, \
dst_rgb_opt, src_width * BPP_B, \
src_width, src_height); \
} \
int err = 0; \
for (int i = 0; i < src_height; ++i) { \
for (int j = 0; j < src_width * BPP; ++j) { \
int diff = static_cast<int>(dst_rgb_c[i * src_width * BPP + j]) - \
static_cast<int>(dst_rgb_opt[i * src_width * BPP + j]); \
for (int j = 0; j < src_width * BPP_B; ++j) { \
int diff = static_cast<int>(dst_rgb_c[i * src_width * BPP_B + j]) - \
static_cast<int>(dst_rgb_opt[i * src_width * BPP_B + j]); \
if (abs(diff) > 2) \
err++; \
} \
......@@ -119,14 +75,17 @@ TEST_F(libyuvTest, I420To##FMT##_CvsOPT) { \
free_aligned_buffer_16(dst_rgb_opt) \
}
TESTI420TO(ARGB, 4)
TESTI420TO(BGRA, 4)
TESTI420TO(ABGR, 4)
TESTI420TO(RAW, 3)
TESTI420TO(RGB24, 3)
TESTI420TO(RGB565, 2)
TESTI420TO(ARGB1555, 2)
TESTI420TO(ARGB4444, 2)
TESTPLANARTOB(I420, 2, 2, ARGB, 4)
TESTPLANARTOB(I420, 2, 2, BGRA, 4)
TESTPLANARTOB(I420, 2, 2, ABGR, 4)
TESTPLANARTOB(I420, 2, 2, RAW, 3)
TESTPLANARTOB(I420, 2, 2, RGB24, 3)
TESTPLANARTOB(I420, 2, 2, RGB565, 2)
TESTPLANARTOB(I420, 2, 2, ARGB1555, 2)
TESTPLANARTOB(I420, 2, 2, ARGB4444, 2)
TESTPLANARTOB(I411, 4, 1, ARGB, 4)
TESTPLANARTOB(I422, 2, 1, ARGB, 4)
TESTPLANARTOB(I444, 1, 1, ARGB, 4)
#define TESTATOB(FMT_A, BPP_A, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_CvsOPT) { \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment