Commit e246e6c1 authored by yang.zhang@arm.com's avatar yang.zhang@arm.com

Add ARGBToRGB565DitherRow_NEON for ARM32/64

ARM32/64 NEON versions of ARGBToRGB565DitherRow_NEON are implemented.

BUG=407
TESTED=libyuvTest.* on ARM32/64 with Android
R=fbarchard@google.com

Change-Id: Ia689170fb39db964392e5e1113801592ab0628bf

Review URL: https://webrtc-codereview.appspot.com/49409004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1335 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 3b4f5eb7
......@@ -342,6 +342,7 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
#define HAS_ARGBTORGB565DITHERROW_NEON
// Effects:
#define HAS_ARGBADDROW_NEON
......@@ -927,6 +928,8 @@ void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width);
void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
......@@ -1422,6 +1425,8 @@ void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width);
void I444ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
......
......@@ -1081,6 +1081,14 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
}
}
#endif
{
// Allocate a row of argb.
......
......@@ -846,6 +846,14 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
......
......@@ -254,6 +254,10 @@ RGBDANY(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
RGBDANY(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
ARGBToRGB565DitherRow_C, 4, 2, 7)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_NEON)
RGBDANY(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON,
ARGBToRGB565DitherRow_C, 4, 2, 7)
#endif
#undef RGBDANY
// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst.
......
......@@ -1360,6 +1360,30 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
);
}
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width) {
asm volatile (
".p2align 2 \n"
"vdup.32 d2, %2 \n" // dither4
"1: \n"
MEMACCESS(1)
"vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vqadd.u8 d20, d20, d2 \n"
"vqadd.u8 d21, d21, d2 \n"
"vqadd.u8 d22, d22, d2 \n"
ARGBTORGB565
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(dst_rgb) // %0
: "r"(src_argb), // %1
"r"(dither4), // %2
"r"(width) // %3
: "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11"
);
}
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
int pix) {
asm volatile (
......
......@@ -1376,6 +1376,31 @@ void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) {
}
#endif // HAS_ARGBTORGB565ROW_NEON
#ifdef HAS_ARGBTORGB565DITHERROW_NEON
void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int width) {
asm volatile (
"dup v1.4s, %w2 \n" // dither4
"1: \n"
MEMACCESS(1)
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels
"subs %3, %3, #8 \n" // 8 processed per loop.
"uqadd v20.8b, v20.8b, v1.8b \n"
"uqadd v21.8b, v21.8b, v1.8b \n"
"uqadd v22.8b, v22.8b, v1.8b \n"
ARGBTORGB565
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565.
"b.gt 1b \n"
: "+r"(dst_rgb) // %0
: "r"(src_argb), // %1
"r"(dither4), // %2
"r"(width) // %3
: "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23"
);
}
#endif // HAS_ARGBTORGB565ROW_NEON
#ifdef HAS_ARGBTOARGB1555ROW_NEON
void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555,
int pix) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment