Commit 4b4a32cb authored by fbarchard@google.com's avatar fbarchard@google.com

ARGB1555 to ARGB Neon optimized

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/965007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@472 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent bdf7cb59
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 471 Version: 472
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 471 #define LIBYUV_VERSION 472
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -534,7 +534,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, ...@@ -534,7 +534,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height) { int width, int height) {
if (!src_argb1555 || !dst_argb || if (!src_argb1555 || !dst_argb ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
...@@ -543,13 +543,22 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, ...@@ -543,13 +543,22 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
src_stride_argb1555 = -src_stride_argb1555; src_stride_argb1555 = -src_stride_argb1555;
} }
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, int pix) =
int pix) = ARGB1555ToARGBRow_C; ARGB1555ToARGBRow_C;
#if defined(HAS_ARGB1555TOARGBROW_SSE2) #if defined(HAS_ARGB1555TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB1555TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
}
} }
#endif #endif
...@@ -576,13 +585,22 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, ...@@ -576,13 +585,22 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
src_stride_argb4444 = -src_stride_argb4444; src_stride_argb4444 = -src_stride_argb4444;
} }
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, int pix) =
int pix) = ARGB4444ToARGBRow_C; ARGB4444ToARGBRow_C;
#if defined(HAS_ARGB4444TOARGBROW_SSE2) #if defined(HAS_ARGB4444TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB4444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
}
} }
#endif #endif
......
...@@ -184,6 +184,8 @@ YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16) ...@@ -184,6 +184,8 @@ YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16) YANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 3, 4, 16)
YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16) YANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 3, 4, 16)
YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8) YANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 2, 4, 8)
YANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 2, 4, 8)
YANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 2, 4, 8)
#endif #endif
#ifdef HAS_ARGBTOYROW_NEON #ifdef HAS_ARGBTOYROW_NEON
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8) YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
...@@ -198,6 +200,8 @@ YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16) ...@@ -198,6 +200,8 @@ YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16)
YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8) YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8)
YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8) YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8)
YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8) YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8)
YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8)
YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8)
#endif #endif
#undef YANY #undef YANY
......
...@@ -1113,7 +1113,6 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { ...@@ -1113,7 +1113,6 @@ void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) {
#endif // HAS_RAWTOARGBROW_NEON #endif // HAS_RAWTOARGBROW_NEON
#ifdef HAS_RGB565TOARGBROW_NEON #ifdef HAS_RGB565TOARGBROW_NEON
#define RGB565TOARGB \ #define RGB565TOARGB \
"vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
"vshrn.u16 d5, q0, #5 \n" /* G xxGGGGGG */ \ "vshrn.u16 d5, q0, #5 \n" /* G xxGGGGGG */ \
...@@ -1133,7 +1132,7 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { ...@@ -1133,7 +1132,7 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
"vmov.u8 d7, #7 \n" // 5 bit mask "vmov.u8 d7, #7 \n" // 5 bit mask
".p2align 2 \n" ".p2align 2 \n"
"1: \n" "1: \n"
"vld1.8 {q0}, [%0]! \n" // load 8 pixels of RGB565. "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop. "subs %2, %2, #8 \n" // 8 processed per loop.
RGB565TOARGB RGB565TOARGB
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
...@@ -1147,6 +1146,72 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { ...@@ -1147,6 +1146,72 @@ void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) {
} }
#endif // HAS_RGB565TOARGBROW_NEON #endif // HAS_RGB565TOARGBROW_NEON
#ifdef HAS_ARGB1555TOARGBROW_NEON
#define ARGB1555TOARGB \
"vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \
"vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \
"vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \
"vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \
"vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \
"vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \
"vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \
"vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \
"vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \
"vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \
"vorr.u8 q1, q1, q3 \n" /* R,A */ \
"vorr.u8 q0, q0, q2 \n" /* B,G */ \
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
int pix) {
asm volatile (
"vmov.u8 d3, #255 \n" // Alpha
".p2align 2 \n"
"1: \n"
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
ARGB1555TOARGB
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_argb1555), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
#endif // HAS_ARGB1555TOARGBROW_NEON
#ifdef HAS_ARGB4444TOARGBROW_NEON
#define ARGB4444TOARGB \
"vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \
"vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \
"vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \
"vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \
"vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \
"vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \
"vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \
"vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
int pix) {
asm volatile (
"vmov.u8 d3, #255 \n" // Alpha
".p2align 2 \n"
"1: \n"
"vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels.
"subs %2, %2, #8 \n" // 8 processed per loop.
ARGB4444TOARGB
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_argb4444), // %0
"+r"(dst_argb), // %1
"+r"(pix) // %2
:
: "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List
);
}
#endif // HAS_ARGB4444TOARGBROW_NEON
#ifdef HAS_ARGBTORGBAROW_NEON #ifdef HAS_ARGBTORGBAROW_NEON
void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) { void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgba, int pix) {
asm volatile ( asm volatile (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment