Commit ba03e4d9 authored by fbarchard@google.com's avatar fbarchard@google.com

Relax alignment checks especially when doing 2 step conversions

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/368004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@147 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 510fe70c
...@@ -373,9 +373,7 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame, ...@@ -373,9 +373,7 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -426,9 +424,7 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame, ...@@ -426,9 +424,7 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
#if defined(HAS_BGRATOUVROW_SSSE3) #if defined(HAS_BGRATOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = BGRAToUVRow_SSSE3; ARGBToUVRow = BGRAToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -479,9 +475,7 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame, ...@@ -479,9 +475,7 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
#if defined(HAS_ABGRTOUVROW_SSSE3) #if defined(HAS_ABGRTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16) && IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ABGRToUVRow_SSSE3; ARGBToUVRow = ABGRToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -522,7 +516,6 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -522,7 +516,6 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RGB24TOARGBROW_SSSE3) #if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) { IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
} else } else
...@@ -541,10 +534,7 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -541,10 +534,7 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ARGBToYRow_C; ARGBToYRow = ARGBToYRow_C;
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -588,7 +578,6 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame, ...@@ -588,7 +578,6 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RAWTOARGBROW_SSSE3) #if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) { IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3; RAWToARGBRow = RAWToARGBRow_SSSE3;
} else } else
...@@ -607,10 +596,7 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame, ...@@ -607,10 +596,7 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ARGBToYRow_C; ARGBToYRow = ARGBToYRow_C;
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -654,7 +640,6 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -654,7 +640,6 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RGB565TOARGBROW_SSE2) #if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) { IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2; RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
} else } else
...@@ -673,10 +658,7 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -673,10 +658,7 @@ int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ARGBToYRow_C; ARGBToYRow = ARGBToYRow_C;
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -720,7 +702,6 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -720,7 +702,6 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGB1555TOARGBROW_SSE2) #if defined(HAS_ARGB1555TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) { IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
} else } else
...@@ -739,10 +720,7 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -739,10 +720,7 @@ int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ARGBToYRow_C; ARGBToYRow = ARGBToYRow_C;
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -786,7 +764,6 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -786,7 +764,6 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGB4444TOARGBROW_SSE2) #if defined(HAS_ARGB4444TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) { IS_ALIGNED(src_frame, 16) && IS_ALIGNED(src_stride_frame, 16)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
} else } else
...@@ -805,10 +782,7 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, ...@@ -805,10 +782,7 @@ int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ARGBToYRow_C; ARGBToYRow = ARGBToYRow_C;
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
......
...@@ -118,8 +118,7 @@ int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb, ...@@ -118,8 +118,7 @@ int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb,
#if defined(HAS_ARGBTOBAYERROW_SSSE3) #if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 4) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_rgb, 16) && IS_ALIGNED(src_stride_rgb, 16) && IS_ALIGNED(src_rgb, 16) && IS_ALIGNED(src_stride_rgb, 16)) {
IS_ALIGNED(dst_bayer, 4) && IS_ALIGNED(dst_stride_bayer, 4)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3; ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else } else
#endif #endif
...@@ -372,7 +371,6 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -372,7 +371,6 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
#if defined(HAS_ARGBTOYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(row, 16) && IS_ALIGNED(kMaxStride, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} else } else
...@@ -381,11 +379,7 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -381,11 +379,7 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
ARGBToYRow = ARGBToYRow_C; ARGBToYRow = ARGBToYRow_C;
} }
#if defined(HAS_ARGBTOUVROW_SSSE3) #if defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(row, 16) && IS_ALIGNED(kMaxStride, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
} else } else
#endif #endif
...@@ -426,7 +420,6 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -426,7 +420,6 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
dst_u += dst_stride_u; dst_u += dst_stride_u;
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
// TODO(fbarchard): Make sure this filters properly
if (height & 1) { if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, row, width); BayerRow0(src_bayer, src_stride_bayer, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToUVRow(row, 0, dst_u, dst_v, width);
......
...@@ -1410,25 +1410,20 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, ...@@ -1410,25 +1410,20 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void (*YUY2ToI420RowY)(const uint8* src_yuy2, void (*YUY2ToI420RowY)(const uint8* src_yuy2,
uint8* dst_y, int pix); uint8* dst_y, int pix);
YUY2ToI420RowY = YUY2ToI420RowY_C;
YUY2ToI420RowUV = YUY2ToI420RowUV_C;
#if defined(HAS_YUY2TOI420ROW_SSE2) #if defined(HAS_YUY2TOI420ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
YUY2ToI420RowUV = YUY2ToI420RowUV_Unaligned_SSE2;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
YUY2ToI420RowUV = YUY2ToI420RowUV_SSE2; YUY2ToI420RowUV = YUY2ToI420RowUV_SSE2;
} else {
YUY2ToI420RowUV = YUY2ToI420RowUV_Unaligned_SSE2;
}
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
YUY2ToI420RowY = YUY2ToI420RowY_SSE2;
} else {
YUY2ToI420RowY = YUY2ToI420RowY_Unaligned_SSE2; YUY2ToI420RowY = YUY2ToI420RowY_Unaligned_SSE2;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
YUY2ToI420RowY = YUY2ToI420RowY_SSE2;
}
} }
} else
#endif
{
YUY2ToI420RowY = YUY2ToI420RowY_C;
YUY2ToI420RowUV = YUY2ToI420RowUV_C;
} }
#endif
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
YUY2ToI420RowUV(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); YUY2ToI420RowUV(src_yuy2, src_stride_yuy2, dst_u, dst_v, width);
dst_u += dst_stride_u; dst_u += dst_stride_u;
...@@ -1461,21 +1456,18 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -1461,21 +1456,18 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void (*UYVYToI420RowY)(const uint8* src_uyvy, void (*UYVYToI420RowY)(const uint8* src_uyvy,
uint8* dst_y, int pix); uint8* dst_y, int pix);
UYVYToI420RowY = UYVYToI420RowY_C;
UYVYToI420RowUV = UYVYToI420RowUV_C;
#if defined(HAS_UYVYTOI420ROW_SSE2) #if defined(HAS_UYVYTOI420ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
IS_ALIGNED(width, 16) && if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) && UYVYToI420RowUV = UYVYToI420RowUV_SSE2;
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) && if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) && UYVYToI420RowY = UYVYToI420RowY_SSE2;
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) { }
UYVYToI420RowY = UYVYToI420RowY_SSE2; }
UYVYToI420RowUV = UYVYToI420RowUV_SSE2;
} else
#endif
{
UYVYToI420RowY = UYVYToI420RowY_C;
UYVYToI420RowUV = UYVYToI420RowUV_C;
} }
#endif
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
UYVYToI420RowUV(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); UYVYToI420RowUV(src_uyvy, src_stride_uyvy, dst_u, dst_v, width);
dst_u += dst_stride_u; dst_u += dst_stride_u;
...@@ -1510,18 +1502,19 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1510,18 +1502,19 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_NEON;
if (IS_ALIGNED(width, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
}
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3)) {
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
}
} else } else
#endif #endif
{ {
...@@ -1543,36 +1536,41 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1543,36 +1536,41 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
int I420ToBGRA(const uint8* src_y, int src_stride_y, int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb, uint8* dst_bgra, int dst_stride_bgra,
int width, int height) { int width, int height) {
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_argb = -dst_stride_argb; dst_stride_bgra = -dst_stride_bgra;
} }
void (*FastConvertYUVToBGRARow)(const uint8* y_buf, void (*FastConvertYUVToBGRARow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON) #if defined(HAS_FASTCONVERTYUVTOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_NEON; FastConvertYUVToBGRARow = FastConvertYUVToBGRAAnyRow_NEON;
if (IS_ALIGNED(width, 16)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_NEON;
}
} else } else
#elif defined(HAS_FASTCONVERTYUVTOBGRAROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3)) {
IS_ALIGNED(width, 8) && FastConvertYUVToBGRARow = FastConvertYUVToBGRAAnyRow_SSSE3;
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { if (IS_ALIGNED(width, 8) &&
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSSE3; IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSSE3;
}
} else } else
#endif #endif
{ {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C; FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
} }
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
FastConvertYUVToBGRARow(src_y, src_u, src_v, dst_argb, width); FastConvertYUVToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_argb += dst_stride_argb; dst_bgra += dst_stride_bgra;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
src_u += src_stride_u; src_u += src_stride_u;
...@@ -1586,36 +1584,41 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -1586,36 +1584,41 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
int I420ToABGR(const uint8* src_y, int src_stride_y, int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb, uint8* dst_abgr, int dst_stride_abgr,
int width, int height) { int width, int height) {
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_argb = -dst_stride_argb; dst_stride_abgr = -dst_stride_abgr;
} }
void (*FastConvertYUVToABGRRow)(const uint8* y_buf, void (*FastConvertYUVToABGRRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOABGRROW_NEON) #if defined(HAS_FASTCONVERTYUVTOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_NEON; FastConvertYUVToABGRRow = FastConvertYUVToABGRAnyRow_NEON;
if (IS_ALIGNED(width, 16)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_NEON;
}
} else } else
#elif defined(HAS_FASTCONVERTYUVTOABGRROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3)) {
IS_ALIGNED(width, 8) && FastConvertYUVToABGRRow = FastConvertYUVToABGRAnyRow_SSSE3;
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { if (IS_ALIGNED(width, 8) &&
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSSE3; IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSSE3;
}
} else } else
#endif #endif
{ {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C; FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
} }
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
FastConvertYUVToABGRRow(src_y, src_u, src_v, dst_argb, width); FastConvertYUVToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_argb += dst_stride_argb; dst_abgr += dst_stride_abgr;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
src_u += src_stride_u; src_u += src_stride_u;
...@@ -1643,11 +1646,11 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y, ...@@ -1643,11 +1646,11 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif #endif
...@@ -1699,11 +1702,11 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, ...@@ -1699,11 +1702,11 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif #endif
...@@ -1741,13 +1744,13 @@ int I420ToRAW(const uint8* src_y, int src_stride_y, ...@@ -1741,13 +1744,13 @@ int I420ToRAW(const uint8* src_y, int src_stride_y,
int I420ToRGB565(const uint8* src_y, int src_stride_y, int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb, uint8* dst_rgb, int dst_stride_rgb,
int width, int height) { int width, int height) {
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_argb = -dst_stride_argb; dst_stride_rgb = -dst_stride_rgb;
} }
void (*FastConvertYUVToARGBRow)(const uint8* y_buf, void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -1755,11 +1758,11 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -1755,11 +1758,11 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif #endif
...@@ -1768,11 +1771,9 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -1768,11 +1771,9 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
} }
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB565ROW_SSE2) #if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
} else } else
#endif #endif
...@@ -1782,8 +1783,8 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -1782,8 +1783,8 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width); FastConvertYUVToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB565Row(row, dst_argb, width); ARGBToRGB565Row(row, dst_rgb, width);
dst_argb += dst_stride_argb; dst_rgb += dst_stride_rgb;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {
src_u += src_stride_u; src_u += src_stride_u;
...@@ -1811,11 +1812,11 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, ...@@ -1811,11 +1812,11 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif #endif
...@@ -1826,9 +1827,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, ...@@ -1826,9 +1827,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB1555ROW_SSE2) #if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
} else } else
#endif #endif
...@@ -1867,11 +1866,11 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, ...@@ -1867,11 +1866,11 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif #endif
...@@ -1882,9 +1881,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, ...@@ -1882,9 +1881,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB4444ROW_SSE2) #if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
} else } else
#endif #endif
...@@ -1918,20 +1915,30 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1918,20 +1915,30 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
dst_stride_argb = -dst_stride_argb; dst_stride_argb = -dst_stride_argb;
} }
void (*FastConvertYUVToARGBRow)(const uint8* y_buf, void (*FastConvertYUVToARGBRow)(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasNEON)) {
IS_ALIGNED(width, 8) && FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_NEON;
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { if (IS_ALIGNED(width, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
}
} else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
}
} else } else
#endif #endif
{ {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
} }
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
FastConvertYUVToARGBRow(src_y, src_u, src_v, dst_argb, width); FastConvertYUVToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb; dst_argb += dst_stride_argb;
...@@ -2266,14 +2273,19 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, ...@@ -2266,14 +2273,19 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
uint8* argb_buf, uint8* argb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_NEON;
if (IS_ALIGNED(width, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
}
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3)) {
IS_ALIGNED(width, 8) && FastConvertYUVToARGBRow = FastConvertYUVToARGBAnyRow_SSSE3;
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { if (IS_ALIGNED(width, 8) &&
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
}
} else } else
#endif #endif
{ {
...@@ -2283,12 +2295,11 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y, ...@@ -2283,12 +2295,11 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON) #if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
SplitUV = SplitUV_NEON; SplitUV = SplitUV_NEON;
} else } else
#elif defined(HAS_SPLITUV_SSE2) #elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) { IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) {
SplitUV = SplitUV_SSE2; SplitUV = SplitUV_SSE2;
} else } else
...@@ -2328,11 +2339,11 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -2328,11 +2339,11 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_NEON;
} else } else
#elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #elif defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasSSSE3)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif #endif
...@@ -2343,9 +2354,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -2343,9 +2354,7 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix); void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB565ROW_SSE2) #if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_rgb, 16) && IS_ALIGNED(dst_stride_rgb, 16)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
} else } else
#endif #endif
...@@ -2356,12 +2365,11 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -2356,12 +2365,11 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON) #if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
SplitUV = SplitUV_NEON; SplitUV = SplitUV_NEON;
} else } else
#elif defined(HAS_SPLITUV_SSE2) #elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) { IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16)) {
SplitUV = SplitUV_SSE2; SplitUV = SplitUV_SSE2;
} else } else
......
...@@ -797,17 +797,13 @@ void TransposePlane(const uint8* src, int src_stride, ...@@ -797,17 +797,13 @@ void TransposePlane(const uint8* src, int src_stride,
#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3) #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(dst, 8) && IS_ALIGNED(dst_stride, 8)) {
TransposeWx8 = TransposeWx8_FAST_SSSE3; TransposeWx8 = TransposeWx8_FAST_SSSE3;
TransposeWxH = TransposeWxH_C; TransposeWxH = TransposeWxH_C;
} else } else
#endif #endif
#if defined(HAS_TRANSPOSE_WX8_SSSE3) #if defined(HAS_TRANSPOSE_WX8_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
IS_ALIGNED(width, 8) &&
IS_ALIGNED(src, 8) && IS_ALIGNED(src_stride, 8) &&
IS_ALIGNED(dst, 8) && IS_ALIGNED(dst_stride, 8)) {
TransposeWx8 = TransposeWx8_SSSE3; TransposeWx8 = TransposeWx8_SSSE3;
TransposeWxH = TransposeWxH_C; TransposeWxH = TransposeWxH_C;
} else } else
...@@ -949,9 +945,7 @@ void TransposeUV(const uint8* src, int src_stride, ...@@ -949,9 +945,7 @@ void TransposeUV(const uint8* src, int src_stride,
#if defined(HAS_TRANSPOSE_UVWX8_SSE2) #if defined(HAS_TRANSPOSE_UVWX8_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 8) && IS_ALIGNED(width, 8) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(dst_a, 8) && IS_ALIGNED(dst_stride_a, 8) &&
IS_ALIGNED(dst_b, 8) && IS_ALIGNED(dst_stride_b, 8)) {
TransposeWx8 = TransposeUVWx8_SSE2; TransposeWx8 = TransposeUVWx8_SSE2;
TransposeWxH = TransposeUVWxH_C; TransposeWxH = TransposeUVWxH_C;
} else } else
...@@ -1099,9 +1093,7 @@ void RotateUV180(const uint8* src, int src_stride, ...@@ -1099,9 +1093,7 @@ void RotateUV180(const uint8* src, int src_stride,
#if defined(HAS_REVERSE_ROW_UV_SSSE3) #if defined(HAS_REVERSE_ROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
IS_ALIGNED(dst_a, 8) && IS_ALIGNED(dst_stride_a, 8) &&
IS_ALIGNED(dst_b, 8) && IS_ALIGNED(dst_stride_b, 8) ) {
ReverseRow = ReverseRowUV_SSSE3; ReverseRow = ReverseRowUV_SSSE3;
} else } else
#endif #endif
......
...@@ -217,12 +217,36 @@ void FastConvertYUVToARGBAnyRow_SSSE3(const uint8* y_buf, ...@@ -217,12 +217,36 @@ void FastConvertYUVToARGBAnyRow_SSSE3(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void FastConvertYUVToBGRAAnyRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRAnyRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGBAnyRow_NEON(const uint8* y_buf, void FastConvertYUVToARGBAnyRow_NEON(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
const uint8* v_buf, const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
void FastConvertYUVToBGRAAnyRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRAnyRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -369,36 +369,26 @@ void ReverseRow_C(const uint8* src, uint8* dst, int width) { ...@@ -369,36 +369,26 @@ void ReverseRow_C(const uint8* src, uint8* dst, int width) {
} }
// Wrappers to handle odd sizes/alignments // Wrappers to handle odd sizes/alignments
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3) #define MAKEYUVANY(NAMEANY, NAME) \
void FastConvertYUVToARGBAnyRow_SSSE3(const uint8* y_buf, void NAMEANY(const uint8* y_buf, \
const uint8* u_buf, const uint8* u_buf, \
const uint8* v_buf, const uint8* v_buf, \
uint8* rgb_buf, uint8* rgb_buf, \
int width) { int width) { \
SIMD_ALIGNED(uint8 row[kMaxStride]); SIMD_ALIGNED(uint8 row[kMaxStride]); \
FastConvertYUVToARGBRow_SSSE3(y_buf, NAME(y_buf, u_buf, v_buf, row, width); \
u_buf, memcpy(rgb_buf, row, width << 2); \
v_buf,
row,
width);
memcpy(rgb_buf, row, width << 2);
} }
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSSE3)
MAKEYUVANY(FastConvertYUVToARGBAnyRow_SSSE3, FastConvertYUVToARGBRow_SSSE3)
MAKEYUVANY(FastConvertYUVToBGRAAnyRow_SSSE3, FastConvertYUVToBGRARow_SSSE3)
MAKEYUVANY(FastConvertYUVToABGRAnyRow_SSSE3, FastConvertYUVToABGRRow_SSSE3)
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON) #if defined(HAS_FASTCONVERTYUVTOARGBROW_NEON)
void FastConvertYUVToARGBAnyRow_NEON(const uint8* y_buf, MAKEYUVANY(FastConvertYUVToARGBAnyRow_NEON, FastConvertYUVToARGBRow_NEON)
const uint8* u_buf, MAKEYUVANY(FastConvertYUVToBGRAAnyRow_NEON, FastConvertYUVToBGRARow_NEON)
const uint8* v_buf, MAKEYUVANY(FastConvertYUVToABGRAnyRow_NEON, FastConvertYUVToABGRRow_NEON)
uint8* rgb_buf,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
FastConvertYUVToARGBRow_NEON(y_buf,
u_buf,
v_buf,
row,
width);
memcpy(rgb_buf, row, width << 2);
}
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment