Commit 522d757c authored by fbarchard@google.com's avatar fbarchard@google.com

Neon optimized ARGBToI444/422/411/420 Any variations, ARGB1555ToI420 Neon, ARGB4444ToI420

BUG=148
TEST=sudo LIBYUV_REPEAT=1000 nice --5 ./libyuv_unittest --gtest_filter=*R*ToI4* | sed 's/\(.*(\)\([0-9]*\)\( ms)\)/\2 - \1\2\3/g' | sort -rn | grep ms
Review URL: https://webrtc-codereview.appspot.com/936020

git-svn-id: http://libyuv.googlecode.com/svn/trunk@480 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f1daa3db
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 479
Version: 480
License: BSD
License File: LICENSE
......
......@@ -48,8 +48,8 @@ extern "C" {
#define HAS_ARGBTORGB24ROW_SSSE3
#define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTORGBAROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOUV422ROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BGRATOARGBROW_SSSE3
#define HAS_BGRATOUVROW_SSSE3
......@@ -60,43 +60,43 @@ extern "C" {
#define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I422TORAWROW_SSSE3
#define HAS_I422TORGB24ROW_SSSE3
#define HAS_I422TORGB565ROW_SSSE3
#define HAS_I422TORGBAROW_SSSE3
#define HAS_I422TOUYVYROW_SSE2
#define HAS_I422TOYUY2ROW_SSE2
#define HAS_I444TOARGBROW_SSSE3
#define HAS_MERGEUV_SSE2
#define HAS_MIRRORROW_SSSE3
#define HAS_MirrorUVRow_SSSE3
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_NV12TORGB565ROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_NV21TORGB565ROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
#define HAS_RGBATOARGBROW_SSSE3
#define HAS_RGBATOUVROW_SSSE3
#define HAS_RGBATOYROW_SSSE3
#define HAS_SETROW_X86
#define HAS_SPLITUV_SSE2
#define HAS_UYVYTOARGBROW_SSSE3
#define HAS_UYVYTOUV422ROW_SSE2
#define HAS_UYVYTOUVROW_SSE2
#define HAS_UYVYTOYROW_SSE2
#define HAS_YTOARGBROW_SSE2
#define HAS_YUY2TOARGBROW_SSSE3
#define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2
#define HAS_I422TOYUY2ROW_SSE2
#define HAS_I422TOUYVYROW_SSE2
#define HAS_MERGEUV_SSE2
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TORGB565ROW_SSSE3
#define HAS_YUY2TOARGBROW_SSSE3
#define HAS_UYVYTOARGBROW_SSSE3
#define HAS_RGB24TOYROW_SSSE3
#define HAS_RAWTOYROW_SSSE3
// Effects
#define HAS_ARGBAFFINEROW_SSE2
......@@ -147,68 +147,75 @@ extern "C" {
// The following are available on Neon platforms
#if !defined(YUV_DISABLE_ASM) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_ABGRTOARGBROW_NEON
#define HAS_ABGRTOUVROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON
#define HAS_ARGB1555TOUVROW_NEON
#define HAS_ARGB1555TOYROW_NEON
#define HAS_ARGB4444TOARGBROW_NEON
#define HAS_ARGB4444TOUVROW_NEON
#define HAS_ARGB4444TOYROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON
#define HAS_ARGBTOBAYERROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_I400TOARGBROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTORGBAROW_NEON
#define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_BGRATOARGBROW_NEON
#define HAS_BGRATOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_COPYROW_NEON
#define HAS_HALFROW_NEON
#define HAS_I400TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON
#define HAS_I422TOABGRROW_NEON
#define HAS_I422TOARGB1555ROW_NEON
#define HAS_I422TOARGB4444ROW_NEON
#define HAS_I422TOARGBROW_NEON
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TORAWROW_NEON
#define HAS_I422TORGB24ROW_NEON
#define HAS_I422TOARGB4444ROW_NEON
#define HAS_I422TOARGB1555ROW_NEON
#define HAS_I422TORGB565ROW_NEON
#define HAS_I422TORGBAROW_NEON
#define HAS_I422TOUYVYROW_NEON
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I444TOARGBROW_NEON
#define HAS_MERGEUV_NEON
#define HAS_MIRRORROW_NEON
#define HAS_MirrorUVRow_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_YUY2TOARGBROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV21TORGB565ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RAWTOUVROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_RGB24TOUVROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RGB565TOARGBROW_NEON
#define HAS_RGB565TOUVROW_NEON
#define HAS_RGB565TOYROW_NEON
#define HAS_RGBATOARGBROW_NEON
#define HAS_RGBATOUVROW_NEON
#define HAS_RGBATOYROW_NEON
#define HAS_SETROW_NEON
#define HAS_SPLITUV_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
#define HAS_UYVYTOUVROW_NEON
#define HAS_UYVYTOYROW_NEON
#define HAS_YTOARGBROW_NEON
#define HAS_YUY2TOARGBROW_NEON
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
#define HAS_I422TOYUY2ROW_NEON
#define HAS_I422TOUYVYROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON
#define HAS_MERGEUV_NEON
#define HAS_YTOARGBROW_NEON
#define HAS_I444TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUV422ROW_NEON
#define HAS_ARGBTOUV411ROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_RGB565TOUVROW_NEON
#define HAS_BGRATOYROW_NEON
#define HAS_ABGRTOYROW_NEON
#define HAS_RGBATOYROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RGB565TOARGBROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON
#define HAS_ARGB4444TOARGBROW_NEON
#define HAS_RGB565TOYROW_NEON
#define HAS_ARGB1555TOYROW_NEON
#define HAS_ARGB4444TOYROW_NEON
#endif
// The following are available on Mips platforms
......@@ -357,6 +364,10 @@ void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix);
void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_u, uint8* dst_v, int pix);
void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_u, uint8* dst_v, int pix);
void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int pix);
void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
......@@ -414,10 +425,22 @@ void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix);
void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix);
void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix);
void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int pix);
void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_u, uint8* dst_v, int pix);
void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
int src_stride_argb1555,
uint8* dst_u, uint8* dst_v, int pix);
void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int pix);
void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
......@@ -428,6 +451,10 @@ void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_u, uint8* dst_v, int width);
void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_u, uint8* dst_v, int width);
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
......@@ -1087,6 +1114,10 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
void ARGBToBayerRow_NEON(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
void ARGBToBayerRow_Any_NEON(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
void I422ToYUY2Row_C(const uint8* src_y,
const uint8* src_u,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 479
#define LIBYUV_VERSION 480
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -943,6 +943,9 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
if (width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
......@@ -1207,6 +1210,9 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
RGB24ToYRow = RGB24ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToYRow = RGB24ToYRow_NEON;
}
if (width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
......@@ -1312,6 +1318,9 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
RAWToYRow = RAWToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYRow = RAWToYRow_NEON;
}
if (width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
......@@ -1482,6 +1491,25 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
src_stride_argb1555 = -src_stride_argb1555;
}
#if defined(HAS_ARGB1555TOYROW_NEON)
void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
ARGB1555ToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
}
if (width >= 16) {
ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
}
}
}
#else // HAS_ARGB1555TOYROW_NEON
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
ARGB1555ToARGBRow_C;
......@@ -1492,15 +1520,7 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB1555TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
}
}
#endif
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
......@@ -1511,20 +1531,6 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
}
}
#endif
#if defined(HAS_ARGB1555TOYROW_NEON)
void (*ARGB1555ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGB1555ToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToYRow = ARGB1555ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
......@@ -1541,13 +1547,14 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
#endif // HAS_ARGB1555TOYROW_NEON
for (int y = 0; y < height - 1; y += 2) {
ARGB1555ToARGBRow(src_argb1555, row, width);
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
#if defined(HAS_ARGB1555TOYROW_NEON)
ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width);
ARGB1555ToYRow(src_argb1555, dst_y, width);
ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, width);
#else
ARGB1555ToARGBRow(src_argb1555, row, width);
ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
#endif
......@@ -1557,11 +1564,12 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
dst_v += dst_stride_v;
}
if (height & 1) {
ARGB1555ToARGBRow_C(src_argb1555, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
#if defined(HAS_ARGB1555TOYROW_NEON)
ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width);
ARGB1555ToYRow(src_argb1555, dst_y, width);
#else
ARGB1555ToARGBRow(src_argb1555, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
#endif
}
......@@ -1586,6 +1594,25 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
src_stride_argb4444 = -src_stride_argb4444;
}
#if defined(HAS_ARGB4444TOYROW_NEON)
void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
ARGB4444ToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
}
if (width >= 16) {
ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
}
}
}
#else // HAS_ARGB4444TOYROW_NEON
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
ARGB4444ToARGBRow_C;
......@@ -1596,15 +1623,7 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB4444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
}
}
#endif
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
......@@ -1615,20 +1634,6 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
}
}
#endif
#if defined(HAS_ARGB4444TOYROW_NEON)
void (*ARGB4444ToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGB4444ToYRow_C;
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToYRow = ARGB4444ToYRow_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#else
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOUVROW_SSSE3)
......@@ -1645,13 +1650,16 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
#endif // HAS_ARGB4444TOYROW_NEON
for (int y = 0; y < height - 1; y += 2) {
ARGB4444ToARGBRow(src_argb4444, row, width);
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kMaxStride, width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
#if defined(HAS_ARGB4444TOYROW_NEON)
ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width);
ARGB4444ToYRow(src_argb4444, dst_y, width);
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, width);
ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y,
width);
#else
ARGB4444ToARGBRow(src_argb4444, row, width);
ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kMaxStride,
width);
ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
#endif
......@@ -1661,11 +1669,12 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
dst_v += dst_stride_v;
}
if (height & 1) {
ARGB4444ToARGBRow_C(src_argb4444, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
#if defined(HAS_ARGB4444TOYROW_NEON)
ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width);
ARGB4444ToYRow(src_argb4444, dst_y, width);
#else
ARGB4444ToARGBRow(src_argb4444, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
#endif
}
......
......@@ -54,6 +54,7 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
ARGBToUV444Row = ARGBToUV444Row_NEON;
......@@ -120,6 +121,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
if (width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_NEON;
}
......@@ -173,6 +177,9 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
if (width >= 32) {
ARGBToUV411Row = ARGBToUV411Row_Any_NEON;
if (IS_ALIGNED(width, 32)) {
ARGBToUV411Row = ARGBToUV411Row_NEON;
}
......
......@@ -72,14 +72,20 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
......@@ -398,7 +404,7 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#elif defined(HAS_I422TOARGBROW_NEON)
......@@ -408,20 +414,34 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
......
......@@ -19,6 +19,9 @@ namespace libyuv {
extern "C" {
#endif
// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels.
// TODO(fbarchard): Consider 'any' functions handling odd alignment.
// YUV to RGB does multiple of 8 with SIMD and remainder with C.
#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \
void NAMEANY(const uint8* y_buf, \
......@@ -114,12 +117,8 @@ NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif // HAS_NV12TORGB565ROW_NEON
#undef NVANY
// YUY2 to RGB does 8 at a time.
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
// SSSE3 RGB24 is multiple of 16 pixels, aligned source and destination.
// SSE2 RGB565 is multiple of 4 pixels, ARGB must be aligned to 16 bytes.
// NEON RGB24 is multiple of 8 pixels, unaligned source and destination.
// I400 To ARGB does multiple of 8 pixels with SIMD and remainder with C.
// TODO(fbarchard): RGBANY use last 16 method.
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* src, \
uint8* dst, \
......@@ -165,6 +164,26 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
#endif
#undef RGBANY
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \
void NAMEANY(const uint8* src, \
uint8* dst, uint32 selector, \
int width) { \
int n = width & ~MASK; \
ARGBTORGB_SIMD(src, dst, selector, n); \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \
}
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
3, 4, 1)
#endif
#if defined(HAS_ARGBTOBAYERROW_NEON)
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
3, 4, 1)
#endif
#undef BAYERANY
// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD.
// TODO(fbarchard): Use last 16 method for all unsubsampled conversions.
#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \
......@@ -230,37 +249,43 @@ UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2)
#ifdef HAS_ARGBTOUVROW_NEON
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4)
UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2)
UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2)
UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2)
UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2)
UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2)
#endif
#undef UVANY
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP) \
#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \
void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~15; \
int n = width & ~MASK; \
ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \
ANYTOUV_C(src_uv + n * BPP, \
dst_u + (n >> 1), \
dst_v + (n >> 1), \
width & 15); \
dst_u + (n >> SHIFT), \
dst_v + (n >> SHIFT), \
width & MASK); \
}
#ifdef HAS_ARGBTOUVROW_SSSE3
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3,
ARGBToUV422Row_C, 4)
#endif
#ifdef HAS_YUY2TOUV422ROW_SSE2
ARGBToUV422Row_C, 4, 15, 1)
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2,
YUY2ToUV422Row_C, 2)
YUY2ToUV422Row_C, 2, 15, 1)
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2,
UYVYToUV422Row_C, 2)
UYVYToUV422Row_C, 2, 15, 1)
#endif
#ifdef HAS_YUY2TOUV422ROW_NEON
UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON,
ARGBToUV444Row_C, 4, 8, 0)
UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON,
ARGBToUV422Row_C, 4, 15, 1)
UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON,
ARGBToUV411Row_C, 4, 31, 2)
UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON,
YUY2ToUV422Row_C, 2)
YUY2ToUV422Row_C, 2, 15, 1)
UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
UYVYToUV422Row_C, 2)
UYVYToUV422Row_C, 2, 15, 1)
#endif
#undef UV422ANY
......
......@@ -365,13 +365,13 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
uint8 b3 = next_rgb565[2] & 0x1f;
uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
uint8 r3 = next_rgb565[3] >> 3;
uint8 ab = (b0 + b1 + b2 + b3);
uint8 ag = (g0 + g1 + g2 + g3);
uint8 ar = (r0 + r1 + r2 + r3);
ab = (ab << 1) | (ab >> 6);
ar = (ar << 1) | (ar >> 6);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
uint8 g = (g0 + g1 + g2 + g3);
uint8 r = (r0 + r1 + r2 + r3);
b = (b << 1) | (b >> 6); // 787 -> 888.
r = (r << 1) | (r >> 6);
dst_u[0] = RGBToU(r, g, b);
dst_v[0] = RGBToV(r, g, b);
src_rgb565 += 4;
next_rgb565 += 4;
dst_u += 1;
......@@ -384,14 +384,108 @@ void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
uint8 b2 = next_rgb565[0] & 0x1f;
uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
uint8 r2 = next_rgb565[1] >> 3;
uint8 ab = (b0 + b2);
uint8 ag = (g0 + g2);
uint8 ar = (r0 + r2);
ab = (ab << 2) | (ab >> 4);
ag = (ag << 1) | (ag >> 6);
ar = (ar << 2) | (ar >> 4);
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
uint8 b = (b0 + b2); // 565 * 2 = 676.
uint8 g = (g0 + g2);
uint8 r = (r0 + r2);
b = (b << 2) | (b >> 4); // 676 -> 888
g = (g << 1) | (g >> 6);
r = (r << 2) | (r >> 4);
dst_u[0] = RGBToU(r, g, b);
dst_v[0] = RGBToV(r, g, b);
}
}
void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_u, uint8* dst_v, int width) {
const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
for (int x = 0; x < width - 1; x += 2) {
uint8 b0 = src_argb1555[0] & 0x1f;
uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
uint8 b1 = src_argb1555[2] & 0x1f;
uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
uint8 b2 = next_argb1555[0] & 0x1f;
uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
uint8 b3 = next_argb1555[2] & 0x1f;
uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
uint8 g = (g0 + g1 + g2 + g3);
uint8 r = (r0 + r1 + r2 + r3);
b = (b << 1) | (b >> 6); // 777 -> 888.
g = (g << 1) | (g >> 6);
r = (r << 1) | (r >> 6);
dst_u[0] = RGBToU(r, g, b);
dst_v[0] = RGBToV(r, g, b);
src_argb1555 += 4;
next_argb1555 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8 b0 = src_argb1555[0] & 0x1f;
uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
uint8 b2 = next_argb1555[0] & 0x1f;
uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
uint8 r2 = next_argb1555[1] >> 3;
uint8 b = (b0 + b2); // 555 * 2 = 666.
uint8 g = (g0 + g2);
uint8 r = (r0 + r2);
b = (b << 2) | (b >> 4); // 666 -> 888.
g = (g << 2) | (g >> 4);
r = (r << 2) | (r >> 4);
dst_u[0] = RGBToU(r, g, b);
dst_v[0] = RGBToV(r, g, b);
}
}
void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_u, uint8* dst_v, int width) {
const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
for (int x = 0; x < width - 1; x += 2) {
uint8 b0 = src_argb4444[0] & 0x0f;
uint8 g0 = src_argb4444[0] >> 4;
uint8 r0 = src_argb4444[1] & 0x0f;
uint8 b1 = src_argb4444[2] & 0x0f;
uint8 g1 = src_argb4444[2] >> 4;
uint8 r1 = src_argb4444[3] & 0x0f;
uint8 b2 = next_argb4444[0] & 0x0f;
uint8 g2 = next_argb4444[0] >> 4;
uint8 r2 = next_argb4444[1] & 0x0f;
uint8 b3 = next_argb4444[2] & 0x0f;
uint8 g3 = next_argb4444[2] >> 4;
uint8 r3 = next_argb4444[3] & 0x0f;
uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
uint8 g = (g0 + g1 + g2 + g3);
uint8 r = (r0 + r1 + r2 + r3);
b = (b << 2) | (b >> 4); // 666 -> 888.
g = (g << 2) | (g >> 4);
r = (r << 2) | (r >> 4);
dst_u[0] = RGBToU(r, g, b);
dst_v[0] = RGBToV(r, g, b);
src_argb4444 += 4;
next_argb4444 += 4;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8 b0 = src_argb4444[0] & 0x0f;
uint8 g0 = src_argb4444[0] >> 4;
uint8 r0 = src_argb4444[1] & 0x0f;
uint8 b2 = next_argb4444[0] & 0x0f;
uint8 g2 = next_argb4444[0] >> 4;
uint8 r2 = next_argb4444[1] & 0x0f;
uint8 b = (b0 + b2); // 444 * 2 = 555.
uint8 g = (g0 + g2);
uint8 r = (r0 + r2);
b = (b << 3) | (b >> 2); // 555 -> 888.
g = (g << 3) | (g >> 2);
r = (r << 3) | (r >> 2);
dst_u[0] = RGBToU(r, g, b);
dst_v[0] = RGBToV(r, g, b);
}
}
......
This diff is collapsed.
......@@ -612,8 +612,9 @@ TESTATOPLANAR(RGBA, 4, I420, 2, 2, 4)
TESTATOPLANAR(RAW, 3, I420, 2, 2, 4)
TESTATOPLANAR(RGB24, 3, I420, 2, 2, 4)
TESTATOPLANAR(RGB565, 2, I420, 2, 2, 5)
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 4)
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 4)
// TODO(fbarchard): Make 1555 neon work same as C code, reduce to diff 9.
TESTATOPLANAR(ARGB1555, 2, I420, 2, 2, 15)
TESTATOPLANAR(ARGB4444, 2, I420, 2, 2, 17)
TESTATOPLANAR(ARGB, 4, I411, 4, 1, 4)
TESTATOPLANAR(ARGB, 4, I422, 2, 1, 2)
TESTATOPLANAR(ARGB, 4, I444, 1, 1, 2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment