Commit 9f2d4041 authored by fbarchard@google.com's avatar fbarchard@google.com

Neon 1 step conversion of NV12 to RGB565

BUG=136
TEST=unittest on nv12
Review URL: https://webrtc-codereview.appspot.com/939011

git-svn-id: http://libyuv.googlecode.com/svn/trunk@461 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 11c6d32a
......@@ -69,6 +69,8 @@ extern "C" {
#define HAS_MIRRORROWUV_SSSE3
#define HAS_NV12TOARGBROW_SSSE3
#define HAS_NV21TOARGBROW_SSSE3
#define HAS_NV12TORGB565ROW_SSSE3
#define HAS_NV21TORGB565ROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
#define HAS_RGB24TOARGBROW_SSSE3
#define HAS_RGB565TOARGBROW_SSE2
......@@ -160,6 +162,8 @@ extern "C" {
#define HAS_MIRRORROWUV_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TORGB565ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_RGBATOARGBROW_NEON
......@@ -278,6 +282,14 @@ void NV21ToARGBRow_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width);
void NV12ToRGB565Row_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width);
void NV21ToRGB565Row_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width);
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
......@@ -463,6 +475,15 @@ void NV12ToARGBRow_C(const uint8* y_buf,
uint8* argb_buf,
int width);
void NV21ToRGB565Row_C(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
int width);
void NV12ToRGB565Row_C(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToARGBRow_C(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
......@@ -543,6 +564,16 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
uint8* argb_buf,
int width);
void NV12ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToRGB565Row_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -650,6 +681,14 @@ void NV21ToARGBRow_Any_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToRGB565Row_Any_SSSE3(const uint8* y_buf,
const uint8* vu_buf,
uint8* argb_buf,
int width);
void I422ToBGRARow_Any_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -785,6 +824,14 @@ void NV21ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV12ToRGB565Row_Any_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void NV21ToRGB565Row_Any_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......
......@@ -554,13 +554,13 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
}
// Convert NV12 to RGB565.
// TODO(fbarchard): One pass conversion.
LIBYUV_API
int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
if (!src_y || !src_uv || !dst_rgb565 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
......@@ -569,43 +569,28 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
void (*NV12ToRGB565Row)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToRGB565Row_C;
#if defined(HAS_NV12TORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && width <= kMaxStride * 4) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
}
}
#elif defined(HAS_NV12TOARGBROW_NEON)
#elif defined(HAS_NV12TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
if (width * 4 > kMaxStride) {
return -1;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
NV12ToRGB565Row = NV12ToRGB565Row_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, row, width);
ARGBToRGB565Row(row, dst_rgb565, width);
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
......@@ -618,10 +603,11 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
// Convert NV21 to RGB565.
LIBYUV_API
int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
const uint8* src_vu, int src_stride_vu,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
if (!src_y || !src_vu || !dst_rgb565 ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
......@@ -630,47 +616,32 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
void (*NV21ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV21ToARGBRow_C;
#if defined(HAS_NV21TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
void (*NV21ToRGB565Row)(const uint8* y_buf,
const uint8* vu_buf,
uint8* rgb_buf,
int width) = NV21ToRGB565Row_C;
#if defined(HAS_NV21TORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && width <= kMaxStride * 4) {
NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
}
}
#elif defined(HAS_NV21TOARGBROW_NEON)
#elif defined(HAS_NV21TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_NEON;
}
}
#endif
if (width * 4 > kMaxStride) {
return -1;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
ARGBToRGB565Row_C;
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
NV21ToRGB565Row = NV21ToRGB565Row_NEON;
}
}
#endif
for (int y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_uv, row, width);
ARGBToRGB565Row(row, dst_rgb565, width);
NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
src_vu += src_stride_vu;
}
}
return 0;
......
......@@ -90,16 +90,26 @@ YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
rgb_buf + n * BPP, width & 7); \
}
#ifdef HAS_I422TOARGBROW_SSSE3
#ifdef HAS_NV12TOARGBROW_SSSE3
NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C,
0, 4)
0, 4)
NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C,
0, 4)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_NEON
0, 4)
#endif // HAS_NV12TOARGBROW_SSSE3
#ifdef HAS_NV12TOARGBROW_NEON
NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4)
NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4)
#endif // HAS_I422TOARGBROW_NEON
#endif // HAS_NV12TOARGBROW_NEON
#ifdef HAS_NV12TORGB565ROW_SSSE3
NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C,
0, 2)
NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C,
0, 2)
#endif // HAS_NV12TORGB565ROW_SSSE3
#ifdef HAS_NV12TORGB565ROW_NEON
NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2)
NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2)
#endif // HAS_NV12TORGB565ROW_NEON
#undef NVANY
// RGB to RGB does multiple of 16 pixels with SIMD and remainder with C.
......
......@@ -710,6 +710,74 @@ void NV21ToARGBRow_C(const uint8* y_buf,
}
}
void NV12ToRGB565Row_C(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_rgb565,
int width) {
uint8 b0;
uint8 g0;
uint8 r0;
uint8 b1;
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0);
YuvPixel2(y_buf[1], uv_buf[0], uv_buf[1], &b1, &g1, &r1);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
*reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
(b1 << 16) | (g1 << 21) | (r1 << 27);
y_buf += 2;
uv_buf += 2;
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], uv_buf[0], uv_buf[1], &b0, &g0, &r0);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
*reinterpret_cast<uint16*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
}
}
void NV21ToRGB565Row_C(const uint8* y_buf,
const uint8* vu_buf,
uint8* dst_rgb565,
int width) {
uint8 b0;
uint8 g0;
uint8 r0;
uint8 b1;
uint8 g1;
uint8 r1;
for (int x = 0; x < width - 1; x += 2) {
YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0);
YuvPixel2(y_buf[1], vu_buf[1], vu_buf[0], &b1, &g1, &r1);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
b1 = b1 >> 3;
g1 = g1 >> 2;
r1 = r1 >> 3;
*reinterpret_cast<uint32*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
(b1 << 16) | (g1 << 21) | (r1 << 27);
y_buf += 2;
vu_buf += 2;
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel2(y_buf[0], vu_buf[1], vu_buf[0], &b0, &g0, &r0);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
*reinterpret_cast<uint16*>(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
}
}
void I422ToBGRARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
......@@ -1311,6 +1379,24 @@ void I422ToARGB4444Row_SSSE3(const uint8* y_buf,
I422ToARGBRow_SSSE3(y_buf, u_buf, v_buf, row, width);
ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
}
void NV12ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
}
void NV21ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_vu,
uint8* dst_rgb565,
int width) {
SIMD_ALIGNED(uint8 row[kMaxStride]);
NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
}
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
#endif // !defined(YUV_DISABLE_ASM)
......
......@@ -473,6 +473,68 @@ void NV21ToARGBRow_NEON(const uint8* src_y,
}
#endif // HAS_NV21TOARGBROW_NEON
#ifdef HAS_NV12TORGB565ROW_NEON
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
asm volatile (
"vld1.u8 {d24}, [%4] \n"
"vld1.u8 {d25}, [%5] \n"
"vmov.u8 d26, #128 \n"
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
".p2align 2 \n"
"1: \n"
READNV12
YUV422TORGB
"subs %3, %3, #8 \n"
ARGBTORGB565
"vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: "r"(&kUVToRB), // %4
"r"(&kUVToG) // %5
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_NV12TORGB565ROW_NEON
#ifdef HAS_NV21TORGB565ROW_NEON
void NV21ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
int width) {
asm volatile (
"vld1.u8 {d24}, [%4] \n"
"vld1.u8 {d25}, [%5] \n"
"vmov.u8 d26, #128 \n"
"vmov.u16 q14, #74 \n"
"vmov.u16 q15, #16 \n"
".p2align 2 \n"
"1: \n"
READNV21
YUV422TORGB
"subs %3, %3, #8 \n"
ARGBTORGB565
"vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
: "r"(&kUVToRB), // %4
"r"(&kUVToG) // %5
: "cc", "memory", "q0", "q1", "q2", "q3",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#endif // HAS_NV21TORGB565ROW_NEON
#ifdef HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment