Commit 1bdcc4c3 authored by fbarchard@google.com's avatar fbarchard@google.com

rgb565 and argb1555 neon

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/881004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@420 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 48625957
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 418 Version: 420
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -150,6 +150,8 @@ extern "C" { ...@@ -150,6 +150,8 @@ extern "C" {
#define HAS_YUY2TOYROW_NEON #define HAS_YUY2TOYROW_NEON
#define HAS_I422TOYUY2ROW_NEON #define HAS_I422TOYUY2ROW_NEON
#define HAS_I422TOUYVYROW_NEON #define HAS_I422TOUYVYROW_NEON
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOARGB1555ROW_NEON
#define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON
#endif #endif
...@@ -308,8 +310,8 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); ...@@ -308,8 +310,8 @@ void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix);
void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix); void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix);
void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix);
void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix); void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix);
...@@ -337,6 +339,8 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); ...@@ -337,6 +339,8 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
...@@ -593,6 +597,8 @@ void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); ...@@ -593,6 +597,8 @@ void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 418 #define LIBYUV_VERSION 420
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -245,6 +245,13 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, ...@@ -245,6 +245,13 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
ARGBToRGB565Row = ARGBToRGB565Row_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
} }
} }
#elif defined(HAS_ARGBTORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565Row = ARGBToRGB565Row_NEON;
}
}
#endif #endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
...@@ -278,6 +285,13 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, ...@@ -278,6 +285,13 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
} }
} }
#elif defined(HAS_ARGBTOARGB1555ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_NEON;
}
}
#endif #endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
......
...@@ -1073,6 +1073,10 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, ...@@ -1073,6 +1073,10 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
#if defined(HAS_ARGBTORGB24ROW_NEON) #if defined(HAS_ARGBTORGB24ROW_NEON)
RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3)
RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3) RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3)
RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C,
7, 4, 2)
RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C,
7, 4, 2)
RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C,
7, 4, 2) 7, 4, 2)
#endif #endif
......
...@@ -908,6 +908,67 @@ void I422ToUYVYRow_NEON(const uint8* src_y, ...@@ -908,6 +908,67 @@ void I422ToUYVYRow_NEON(const uint8* src_y,
); );
} }
#ifdef HAS_ARGBTORGB565ROW_NEON
void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
asm volatile (
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vshr.u8 d0, d0, #3 \n" // B
"vshr.u8 d1, d1, #2 \n" // G
"vshr.u8 d2, d2, #3 \n" // R
"vmovl.u8 q8, d0 \n" // B
"vmovl.u8 q9, d1 \n" // G
"vmovl.u8 q10, d2 \n" // R
"vshl.u16 q9, q9, #5 \n" // G
"vshl.u16 q10, q10, #11 \n" // R
"vorr q0, q8, q9 \n" // BG
"vorr q0, q0, q10 \n" // BGR
"vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_rgb565), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "d4", "q8", "q9", "q10"
);
}
#endif // HAS_ARGBTORGB565ROW_NEON
#ifdef HAS_ARGBTOARGB1555ROW_NEON
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
int pix) {
asm volatile (
".p2align 2 \n"
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vshr.u8 d0, d0, #3 \n" // B
"vshr.u8 d1, d1, #3 \n" // G
"vshr.u8 d2, d2, #3 \n" // R
"vshr.u8 d3, d3, #7 \n" // A
"vmovl.u8 q8, d0 \n" // B
"vmovl.u8 q9, d1 \n" // G
"vmovl.u8 q10, d2 \n" // R
"vmovl.u8 q11, d3 \n" // A
"vshl.u16 q9, q9, #5 \n" // G
"vshl.u16 q10, q10, #10 \n" // R
"vshl.u16 q11, q11, #15 \n" // A
"vorr q0, q8, q9 \n" // BG
"vorr q1, q10, q11 \n" // RA
"vorr q0, q0, q1 \n" // BGRA
"vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb1555), // %1
"+r"(pix) // %2
:
: "memory", "cc", "d0", "d1", "d2", "d3", "q8", "q9", "q10", "q11"
);
}
#endif // HAS_ARGBTOARGB1555ROW_NEON
#ifdef HAS_ARGBTOARGB4444ROW_NEON #ifdef HAS_ARGBTOARGB4444ROW_NEON
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444,
int pix) { int pix) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment