Commit b883ce6e authored by fbarchard@google.com's avatar fbarchard@google.com

I411ToARGB_NEON and I444ToARGB_NEON port.

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/964009

git-svn-id: http://libyuv.googlecode.com/svn/trunk@467 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 30859f75
......@@ -184,6 +184,8 @@ extern "C" {
#define HAS_ARGBTOYROW_NEON
#define HAS_MERGEUV_NEON
#define HAS_YTOARGBROW_NEON
#define HAS_I444TOARGBROW_NEON
#define HAS_I411TOARGBROW_NEON
#endif
// The following are available on Mips platforms
......@@ -231,11 +233,21 @@ typedef uint32 uvec32[4];
#define OMITFP __attribute__((optimize("omit-frame-pointer")))
#endif
void I444ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I411ToARGBRow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToBGRARow_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -757,11 +769,21 @@ void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
void I444ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I411ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToBGRARow_Any_NEON(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......
......@@ -80,6 +80,13 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
}
}
}
#elif defined(HAS_I444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
......@@ -185,6 +192,13 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
}
}
}
#elif defined(HAS_I411TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
......
......@@ -61,7 +61,9 @@ YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_NEON
YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7)
YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7)
YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7)
YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7)
YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7)
YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7)
......
......@@ -467,6 +467,29 @@ static __inline void YuvPixel2(uint8 y, uint8 u, uint8 v,
*r = Clip(static_cast<int32>((u * UR + v * VR) - (BR) + y1) >> 6);
}
#if defined(__ARM_NEON__)
// C mimic assembly.
// TODO(fbarchard): Remove subsampling from Neon.
void I444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
uint8 u = (u_buf[0] + u_buf[1] + 1) >> 1;
uint8 v = (v_buf[0] + v_buf[1] + 1) >> 1;
YuvPixel(y_buf[0], u, v, rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u, v, rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
u_buf += 2;
v_buf += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
#else
void I444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -480,7 +503,7 @@ void I444ToARGBRow_C(const uint8* y_buf,
rgb_buf += 4; // Advance 1 pixel.
}
}
#endif
// Also used for 420
void I422ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
......
......@@ -24,6 +24,22 @@ extern "C" {
"vld1.u32 {d2[0]}, [%1]! \n" \
"vld1.u32 {d2[1]}, [%2]! \n"
// Read 8 Y, 2 U and 2 V from 422
#define READYUV411 \
"vld1.u8 {d0}, [%0]! \n" \
"vld1.u16 {d2[0]}, [%1]! \n" \
"vld1.u16 {d2[1]}, [%2]! \n" \
"vmov.u8 d3, d2 \n" \
"vzip.u8 d2, d3 \n"
// Read 8 Y, 8 U and 8 V from 444
#define READYUV444 \
"vld1.u8 {d0}, [%0]! \n" \
"vld1.u8 {d2}, [%1]! \n" \
"vld1.u8 {d3}, [%2]! \n" \
"vpaddl.u8 q1, q1 \n" \
"vrshrn.u16 d2, q1, #1 \n"
// Read 8 Y, and set 4 U and 4 V to 128
#define READYUV400 \
"vld1.u8 {d0}, [%0]! \n" \
......@@ -79,6 +95,39 @@ static const vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52,
0, 0, 0, 0, 0, 0, 0, 0 };
#endif
#ifdef HAS_I444TOARGBROW_NEON
void I444ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width) {
asm volatile (
"vld1.u8 {d24}, [%5] \n"
"vld1.u8 {d25}, [%6] \n"
"vmov.u8 d26, #128 \n"
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
".p2align 2 \n"
"1: \n"
READYUV444
YUV422TORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: "r"(&kUVToRB), // %5
"r"(&kUVToG) // %6
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_I444TOARGBROW_NEON
#ifdef HAS_I422TOARGBROW_NEON
void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
......@@ -112,6 +161,39 @@ void I422ToARGBRow_NEON(const uint8* src_y,
}
#endif // HAS_I422TOARGBROW_NEON
#ifdef HAS_I411TOARGBROW_NEON
void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width) {
asm volatile (
"vld1.u8 {d24}, [%5] \n"
"vld1.u8 {d25}, [%6] \n"
"vmov.u8 d26, #128 \n"
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
".p2align 2 \n"
"1: \n"
READYUV411
YUV422TORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
"vst4.8 {d20, d21, d22, d23}, [%3]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
: "r"(&kUVToRB), // %5
"r"(&kUVToG) // %6
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_I411TOARGBROW_NEON
#ifdef HAS_I422TOBGRAROW_NEON
void I422ToBGRARow_NEON(const uint8* src_y,
const uint8* src_u,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment