Commit 860cc035 authored by Frank Barchard's avatar Frank Barchard

Neon versions of I420AlphaToARGB

Add alpha version of YUV to RGB to neon code for ARMv7 and aarch64.
For other YUV to RGB conversions, hoist alpha set to 255 out of loop.

TBR=harryjin@google.com
BUG=libyuv:516

Review URL: https://codereview.chromium.org/1413763017 .
parent 82d74a37
...@@ -258,6 +258,7 @@ extern "C" { ...@@ -258,6 +258,7 @@ extern "C" {
// The following are available on Neon platforms: // The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \ #if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_I422ALPHATOARGBROW_NEON
#define HAS_ABGRTOUVROW_NEON #define HAS_ABGRTOUVROW_NEON
#define HAS_ABGRTOYROW_NEON #define HAS_ABGRTOYROW_NEON
#define HAS_ARGB1555TOARGBROW_NEON #define HAS_ARGB1555TOARGBROW_NEON
...@@ -553,6 +554,13 @@ void I422ToARGBRow_NEON(const uint8* src_y, ...@@ -553,6 +554,13 @@ void I422ToARGBRow_NEON(const uint8* src_y,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422AlphaToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
const uint8* a_buf,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_NEON(const uint8* src_y, void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
...@@ -1533,6 +1541,13 @@ void I422ToARGBRow_Any_NEON(const uint8* src_y, ...@@ -1533,6 +1541,13 @@ void I422ToARGBRow_Any_NEON(const uint8* src_y,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
const uint8* src_a,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_NEON(const uint8* src_y, void I411ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
......
...@@ -48,7 +48,10 @@ extern "C" { ...@@ -48,7 +48,10 @@ extern "C" {
ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
#endif #endif
#ifdef HAS_I422ALPHATOARGBROW_AVX2 #ifdef HAS_I422ALPHATOARGBROW_AVX2
ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 7) ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15)
#endif
#ifdef HAS_I422ALPHATOARGBROW_NEON
ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
#endif #endif
#undef ANY41C #undef ANY41C
......
...@@ -142,11 +142,11 @@ void I444ToARGBRow_NEON(const uint8* src_y, ...@@ -142,11 +142,11 @@ void I444ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READYUV444 READYUV444
YUVTORGB YUVTORGB
"subs %4, %4, #8 \n" "subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3) MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -172,11 +172,11 @@ void I422ToARGBRow_NEON(const uint8* src_y, ...@@ -172,11 +172,11 @@ void I422ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READYUV422 READYUV422
YUVTORGB YUVTORGB
"subs %4, %4, #8 \n" "subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3) MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -194,6 +194,39 @@ void I422ToARGBRow_NEON(const uint8* src_y, ...@@ -194,6 +194,39 @@ void I422ToARGBRow_NEON(const uint8* src_y,
); );
} }
void I422AlphaToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
const uint8* src_a,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"1: \n"
READYUV422
YUVTORGB
"subs %5, %5, #8 \n"
MEMACCESS(3)
"vld1.8 {d23}, [%3]! \n"
MEMACCESS(4)
"vst4.8 {d20, d21, d22, d23}, [%4]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(src_a), // %3
"+r"(dst_argb), // %4
"+r"(width) // %5
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void I411ToARGBRow_NEON(const uint8* src_y, void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
...@@ -202,11 +235,11 @@ void I411ToARGBRow_NEON(const uint8* src_y, ...@@ -202,11 +235,11 @@ void I411ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READYUV411 READYUV411
YUVTORGB YUVTORGB
"subs %4, %4, #8 \n" "subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3) MEMACCESS(3)
"vst4.8 {d20, d21, d22, d23}, [%3]! \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -232,11 +265,11 @@ void I422ToRGBARow_NEON(const uint8* src_y, ...@@ -232,11 +265,11 @@ void I422ToRGBARow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d19, #255 \n"
"1: \n" "1: \n"
READYUV422 READYUV422
YUVTORGB YUVTORGB
"subs %4, %4, #8 \n" "subs %4, %4, #8 \n"
"vmov.u8 d19, #255 \n"
MEMACCESS(3) MEMACCESS(3)
"vst4.8 {d19, d20, d21, d22}, [%3]! \n" "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -417,11 +450,11 @@ void I400ToARGBRow_NEON(const uint8* src_y, ...@@ -417,11 +450,11 @@ void I400ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READYUV400 READYUV400
YUVTORGB YUVTORGB
"subs %2, %2, #8 \n" "subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1) MEMACCESS(1)
"vst4.8 {d20, d21, d22, d23}, [%1]! \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -466,11 +499,11 @@ void NV12ToARGBRow_NEON(const uint8* src_y, ...@@ -466,11 +499,11 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READNV12 READNV12
YUVTORGB YUVTORGB
"subs %3, %3, #8 \n" "subs %3, %3, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(2) MEMACCESS(2)
"vst4.8 {d20, d21, d22, d23}, [%2]! \n" "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -494,11 +527,11 @@ void NV21ToARGBRow_NEON(const uint8* src_y, ...@@ -494,11 +527,11 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READNV21 READNV21
YUVTORGB YUVTORGB
"subs %3, %3, #8 \n" "subs %3, %3, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(2) MEMACCESS(2)
"vst4.8 {d20, d21, d22, d23}, [%2]! \n" "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -549,11 +582,11 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, ...@@ -549,11 +582,11 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READYUY2 READYUY2
YUVTORGB YUVTORGB
"subs %2, %2, #8 \n" "subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1) MEMACCESS(1)
"vst4.8 {d20, d21, d22, d23}, [%1]! \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n" "bgt 1b \n"
...@@ -575,11 +608,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -575,11 +608,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"vmov.u8 d23, #255 \n"
"1: \n" "1: \n"
READUYVY READUYVY
YUVTORGB YUVTORGB
"subs %2, %2, #8 \n" "subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1) MEMACCESS(1)
"vst4.8 {d20, d21, d22, d23}, [%1]! \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
"bgt 1b \n" "bgt 1b \n"
......
...@@ -127,15 +127,6 @@ extern "C" { ...@@ -127,15 +127,6 @@ extern "C" {
"sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \
"sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \
// TODO(fbarchard): Use structure for constants like 32 bit code.
#define RGBTOUV_SETUP_REG \
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
"movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
"movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
#ifdef HAS_I444TOARGBROW_NEON #ifdef HAS_I444TOARGBROW_NEON
void I444ToARGBRow_NEON(const uint8* src_y, void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
...@@ -145,11 +136,11 @@ void I444ToARGBRow_NEON(const uint8* src_y, ...@@ -145,11 +136,11 @@ void I444ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n" /* A */
"1: \n" "1: \n"
READYUV444 READYUV444
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n" "subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3) MEMACCESS(3)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -168,7 +159,6 @@ void I444ToARGBRow_NEON(const uint8* src_y, ...@@ -168,7 +159,6 @@ void I444ToARGBRow_NEON(const uint8* src_y,
} }
#endif // HAS_I444TOARGBROW_NEON #endif // HAS_I444TOARGBROW_NEON
// TODO(fbarchard): Switch to Matrix version of this function.
#ifdef HAS_I422TOARGBROW_NEON #ifdef HAS_I422TOARGBROW_NEON
void I422ToARGBRow_NEON(const uint8* src_y, void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
...@@ -178,11 +168,11 @@ void I422ToARGBRow_NEON(const uint8* src_y, ...@@ -178,11 +168,11 @@ void I422ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n" /* A */
"1: \n" "1: \n"
READYUV422 READYUV422
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n" "subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3) MEMACCESS(3)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -201,6 +191,41 @@ void I422ToARGBRow_NEON(const uint8* src_y, ...@@ -201,6 +191,41 @@ void I422ToARGBRow_NEON(const uint8* src_y,
} }
#endif // HAS_I422TOARGBROW_NEON #endif // HAS_I422TOARGBROW_NEON
#ifdef HAS_I422ALPHATOARGBROW_NEON
void I422AlphaToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
const uint8* src_a,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"1: \n"
READYUV422
YUVTORGB(v22, v21, v20)
MEMACCESS(3)
"ld1 {v23.8b}, [%3], #8 \n"
"subs %w5, %w5, #8 \n"
MEMACCESS(4)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n"
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(src_a), // %3
"+r"(dst_argb), // %4
"+r"(width) // %5
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_I422ALPHATOARGBROW_NEON
#ifdef HAS_I411TOARGBROW_NEON #ifdef HAS_I411TOARGBROW_NEON
void I411ToARGBRow_NEON(const uint8* src_y, void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
...@@ -210,11 +235,11 @@ void I411ToARGBRow_NEON(const uint8* src_y, ...@@ -210,11 +235,11 @@ void I411ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n" /* A */
"1: \n" "1: \n"
READYUV411 READYUV411
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n" "subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3) MEMACCESS(3)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -242,11 +267,11 @@ void I422ToRGBARow_NEON(const uint8* src_y, ...@@ -242,11 +267,11 @@ void I422ToRGBARow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v20.8b, #255 \n" /* A */
"1: \n" "1: \n"
READYUV422 READYUV422
YUVTORGB(v23, v22, v21) YUVTORGB(v23, v22, v21)
"subs %w4, %w4, #8 \n" "subs %w4, %w4, #8 \n"
"movi v20.8b, #255 \n" /* A */
MEMACCESS(3) MEMACCESS(3)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -353,11 +378,11 @@ void I422ToARGB1555Row_NEON(const uint8* src_y, ...@@ -353,11 +378,11 @@ void I422ToARGB1555Row_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n"
"1: \n" "1: \n"
READYUV422 READYUV422
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n" "subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n"
ARGBTOARGB1555 ARGBTOARGB1555
MEMACCESS(3) MEMACCESS(3)
"st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
...@@ -428,11 +453,11 @@ void I400ToARGBRow_NEON(const uint8* src_y, ...@@ -428,11 +453,11 @@ void I400ToARGBRow_NEON(const uint8* src_y,
int64 width64 = (int64)(width); int64 width64 = (int64)(width);
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n"
"1: \n" "1: \n"
READYUV400 READYUV400
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n" "subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1) MEMACCESS(1)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -481,11 +506,11 @@ void NV12ToARGBRow_NEON(const uint8* src_y, ...@@ -481,11 +506,11 @@ void NV12ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n"
"1: \n" "1: \n"
READNV12 READNV12
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n" "subs %w3, %w3, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(2) MEMACCESS(2)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -511,11 +536,11 @@ void NV21ToARGBRow_NEON(const uint8* src_y, ...@@ -511,11 +536,11 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
int width) { int width) {
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n"
"1: \n" "1: \n"
READNV21 READNV21
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n" "subs %w3, %w3, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(2) MEMACCESS(2)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -571,11 +596,11 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, ...@@ -571,11 +596,11 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
int64 width64 = (int64)(width); int64 width64 = (int64)(width);
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n"
"1: \n" "1: \n"
READYUY2 READYUY2
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n" "subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1) MEMACCESS(1)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -600,11 +625,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -600,11 +625,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
int64 width64 = (int64)(width); int64 width64 = (int64)(width);
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n"
"1: \n" "1: \n"
READUYVY READUYVY
YUVTORGB(v22, v21, v20) YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n" "subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1) MEMACCESS(1)
"st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
"b.gt 1b \n" "b.gt 1b \n"
...@@ -1444,6 +1469,14 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, ...@@ -1444,6 +1469,14 @@ void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
} }
#endif // HAS_ARGBTOUV444ROW_NEON #endif // HAS_ARGBTOUV444ROW_NEON
#define RGBTOUV_SETUP_REG \
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
"movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
"movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
"movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
"movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
// 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#ifdef HAS_ARGBTOUV422ROW_NEON #ifdef HAS_ARGBTOUV422ROW_NEON
void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment