Commit 797e9ef8 authored by fbarchard@google.com's avatar fbarchard@google.com

5 RGB formats to ARGB ported to NEON

BUG=68
TEST=unittest
Review URL: https://webrtc-codereview.appspot.com/825004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@369 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 5808cb22
......@@ -34,36 +34,35 @@ extern "C" {
// The following are available on all x86 platforms:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
// Conversions.
#define HAS_ABGRTOARGBROW_SSSE3
#define HAS_ABGRTOUVROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3
#define HAS_ARGBTORGBAROW_SSSE3
#define HAS_ARGB1555TOARGBROW_SSE2
#define HAS_ARGB4444TOARGBROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBTOARGB1555ROW_SSE2
#define HAS_ARGBTOARGB4444ROW_SSE2
#define HAS_ARGBTORAWROW_SSSE3
#define HAS_ARGBTORGB24ROW_SSSE3
#define HAS_ARGBTORGB565ROW_SSE2
#define HAS_ARGBTORGBAROW_SSSE3
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_BGRATOARGBROW_SSSE3
#define HAS_BGRATOUVROW_SSSE3
#define HAS_BGRATOYROW_SSSE3
#define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86
#define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
#define HAS_I444TOARGBROW_SSSE3
#define HAS_I411TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROWUV_SSSE3
#define HAS_ARGBMIRRORROW_SSSE3
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
......@@ -71,26 +70,31 @@ extern "C" {
#define HAS_UYVYTOUVROW_SSE2
#define HAS_UYVYTOYROW_SSE2
#define HAS_YTOARGBROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOUV422ROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
#define HAS_YUY2TOYROW_SSE2
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBSEPIAROW_SSSE3
// Effects
#define HAS_ARGBMIRRORROW_SSSE3
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBBLENDROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBINTERPOLATEROW_SSSE3
#define HAS_ARGBQUANTIZEROW_SSE2
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADE_SSE2
#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2
#define HAS_ARGBSHADE_SSE2
#define HAS_ARGBAFFINEROW_SSE2
#define HAS_ARGBINTERPOLATEROW_SSSE3
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#endif
// The following are Windows only:
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_I422TORGBAROW_SSSE3
#define HAS_ABGRTOARGBROW_SSSE3
#define HAS_RGBATOARGBROW_SSSE3
#define HAS_RGBATOUVROW_SSSE3
#define HAS_RGBATOYROW_SSSE3
......@@ -115,9 +119,11 @@ extern "C" {
#define HAS_I422TOBGRAROW_NEON
#define HAS_I422TOABGRROW_NEON
#define HAS_I422TORGBAROW_NEON
#define HAS_ARGBTORGBAROW_NEON
#define HAS_ARGBTORGB24ROW_NEON
#define HAS_ARGBTORAWROW_NEON
#define HAS_ABGRTOARGBROW_NEON
#define HAS_BGRATOARGBROW_NEON
#define HAS_RGBATOARGBROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER)
......@@ -243,6 +249,12 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix);
void ABGRToARGBRow_NEON(const uint8* src_abgr, uint8* dst_argb, int pix);
void RGBAToARGBRow_NEON(const uint8* src_rgba, uint8* dst_argb, int pix);
void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix);
void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix);
void RGBAToARGBRow_C(const uint8* src_rgba, uint8* dst_argb, int pix);
......
......@@ -360,6 +360,98 @@ void MirrorRowUV_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width) {
}
#endif // HAS_MIRRORROWUV_NEON
#ifdef HAS_BGRATOARGBROW_NEON
void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix) {
asm volatile (
"1: \n"
"vld4.u8 {q1,q2,q3,q4}, [%0]! \n" // load 16 pixels of BGRA.
"subs %2, %2, #16 \n" // 16 processed per loop.
"vswp.u8 q2, q3 \n" // swap G, R
"vswp.u8 q1, q4 \n" // swap B, A
"vst4.u8 {q1,q2,q3,q4}, [%1]! \n" // store 16 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_bgra), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q1", "q2", "q3", "q4" // Clobber List
);
}
#endif // HAS_BGRATOARGBROW_NEON
#ifdef HAS_ABGRTOARGBROW_NEON
void ABGRToARGBRow_NEON(const uint8* src_abgr, uint8* dst_argb, int pix) {
asm volatile (
"1: \n"
"vld4.u8 {q1,q2,q3,q4}, [%0]! \n" // load 16 pixels of ABGR.
"subs %2, %2, #16 \n" // 16 processed per loop.
"vswp.u8 q1, q3 \n" // swap R, B
"vst4.u8 {q1,q2,q3,q4}, [%1]! \n" // store 16 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q1", "q2", "q3", "q4" // Clobber List
);
}
#endif // HAS_ABGRTOARGBROW_NEON
#ifdef HAS_RGBATOARGBROW_NEON
void RGBAToARGBRow_NEON(const uint8* src_rgba, uint8* dst_argb, int pix) {
asm volatile (
"1: \n"
"vld4.u8 {q1,q2,q3,q4}, [%0]! \n" // load 16 pixels of RGBA.
"subs %2, %2, #16 \n" // 16 processed per loop.
"vmov.u8 q5, q1 \n" // move A after RGB
"vst4.u8 {q2,q3,q4,q5}, [%1]! \n" // store 16 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q1", "q2", "q3", "q4", "q5" // Clobber List
);
}
#endif // HAS_RGBATOARGBROW_NEON
#ifdef HAS_RGB24TOARGBROW_NEON
void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) {
asm volatile (
"vmov.u8 q4, #255 \n" // Alpha
"1: \n"
"vld3.u8 {q1,q2,q3}, [%0]! \n" // load 16 pixels of RGB24.
"subs %2, %2, #16 \n" // 16 processed per loop.
"vst4.u8 {q1,q2,q3,q4}, [%1]! \n" // store 16 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q1", "q2", "q3", "q4" // Clobber List
);
}
#endif // HAS_RGB24TOARGBROW_NEON
#ifdef HAS_RAWTOARGBROW_NEON
void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
asm volatile (
"vmov.u8 q4, #255 \n" // Alpha
"1: \n"
"vld3.u8 {q1,q2,q3}, [%0]! \n" // load 16 pixels of RAW.
"subs %2, %2, #16 \n" // 16 processed per loop.
"vswp.u8 q1, q3 \n" // swap R, B
"vst4.u8 {q1,q2,q3,q4}, [%1]! \n" // store 16 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q1", "q2", "q3", "q4" // Clobber List
);
}
#endif // HAS_RAWTOARGBROW_NEON
#ifdef HAS_ARGBTORGBAROW_NEON
void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
asm volatile (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment