Commit 0ea50cbc authored by Frank Barchard's avatar Frank Barchard

NV21ToRGB24_NEON conversion

32 bit thumb2 performance:
NV12ToARGB_Opt (472 ms)
NV21ToARGB_Opt (466 ms)
NV12ToRGB24_Opt (457 ms)
NV21ToRGB24_Opt (457 ms)
NV12ToRGB565_Opt (501 ms)

Bug: libyuv:778
Test: add new NV21ToRGB24 test
Change-Id: I330585789835c79ee4b4da61d164716598268df3
Reviewed-on: https://chromium-review.googlesource.com/924646Reviewed-by: 's avatarCheng Wang <wangcheng@google.com>
parent a96d1dd0
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1700
Version: 1701
License: BSD
License File: LICENSE
......
......@@ -276,6 +276,28 @@ int NV21ToABGR(const uint8_t* src_y,
int width,
int height);
// Convert NV12 to RGB24.
LIBYUV_API
int NV12ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert NV21 to RGB24.
LIBYUV_API
int NV21ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height);
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8_t* src_m420,
......
......@@ -326,8 +326,10 @@ extern "C" {
#define HAS_MIRRORROW_NEON
#define HAS_MIRRORUVROW_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV12TORGB24ROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
#define HAS_NV21TORGB24ROW_NEON
#define HAS_RAWTOARGBROW_NEON
#define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTOUVROW_NEON
......@@ -703,6 +705,16 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
......@@ -1815,6 +1827,16 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
......@@ -2545,6 +2567,16 @@ void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1700
#define LIBYUV_VERSION 1701
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -1757,6 +1757,124 @@ int NV21ToABGR(const uint8_t* src_y,
dst_stride_abgr, &kYvuI601Constants, width, height);
}
// TODO(fbarchard): Consider SSSE3 2 step conversion.
// Convert NV12 to RGB24 with matrix
static int NV12ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*NV12ToRGB24Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C;
if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgb24 = -dst_stride_rgb24;
}
#if defined(HAS_NV12TORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV12ToRGB24Row = NV12ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToRGB24Row = NV12ToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
// Convert NV21 to RGB24 with matrix
static int NV21ToRGB24Matrix(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
const struct YuvConstants* yuvconstants,
int width,
int height) {
int y;
void (*NV21ToRGB24Row)(
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C;
if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24;
dst_stride_rgb24 = -dst_stride_rgb24;
}
#if defined(HAS_NV21TORGB24ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
NV21ToRGB24Row = NV21ToRGB24Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToRGB24Row = NV21ToRGB24Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
src_vu += src_stride_vu;
}
}
return 0;
}
// TODO(fbarchard): \(fbarchard): NV12ToRAW can be implemented by mirrored
// matrix. Convert NV12 to RGB24.
LIBYUV_API
int NV12ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
return NV12ToRGB24Matrix(src_y, src_stride_y, src_uv, src_stride_uv,
dst_rgb24, dst_stride_rgb24, &kYuvI601Constants,
width, height);
}
// Convert NV21 to RGB24.
LIBYUV_API
int NV21ToRGB24(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
int src_stride_vu,
uint8_t* dst_rgb24,
int dst_stride_rgb24,
int width,
int height) {
return NV21ToRGB24Matrix(src_y, src_stride_y, src_vu, src_stride_vu,
dst_rgb24, dst_stride_rgb24, &kYuvI601Constants,
width, height);
}
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8_t* src_m420,
......
......@@ -380,6 +380,12 @@ ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
#ifdef HAS_NV21TOARGBROW_MSA
ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TORGB24ROW_NEON
ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
#endif
#ifdef HAS_NV21TORGB24ROW_NEON
ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_SSSE3
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
#endif
......
......@@ -1858,6 +1858,48 @@ void NV21ToARGBRow_C(const uint8_t* src_y,
}
}
void NV12ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
src_y += 2;
src_uv += 2;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
}
}
void NV21ToRGB24Row_C(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
rgb_buf + 5, yuvconstants);
src_y += 2;
src_vu += 2;
rgb_buf += 6; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
rgb_buf + 2, yuvconstants);
}
}
void NV12ToRGB565Row_C(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
......
......@@ -424,6 +424,60 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
"q10", "q11", "q12", "q13", "q14", "q15");
}
void NV12ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP
"1: \n"
READNV12 YUVTORGB
"subs %3, %3, #8 \n"
"vst3.8 {d20, d21, d22}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb24), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
}
void NV21ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile(
YUVTORGB_SETUP
"1: \n"
READNV21 YUVTORGB
"subs %3, %3, #8 \n"
"vst3.8 {d20, d21, d22}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_vu), // %1
"+r"(dst_rgb24), // %2
"+r"(width) // %3
: [kUVToRB] "r"(&yuvconstants->kUVToRB),
[kUVToG] "r"(&yuvconstants->kUVToG),
[kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR),
[kYToRgb] "r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
}
void NV12ToRGB565Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
......
......@@ -463,6 +463,58 @@ void NV21ToARGBRow_NEON(const uint8_t* src_y,
);
}
void NV12ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"1: \n"
READNV12
YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
"st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n"
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_rgb24), // %2
"+r"(width) // %3
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
void NV21ToRGB24Row_NEON(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
asm volatile (
YUVTORGB_SETUP
"1: \n"
READNV21
YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
"st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n"
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_vu), // %1
"+r"(dst_rgb24), // %2
"+r"(width) // %3
: [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
[kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
void NV12ToRGB565Row_NEON(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
......
......@@ -746,6 +746,8 @@ TESTBIPLANARTOB(NV12, 2, 2, ARGB, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, 4, 2)
TESTBIPLANARTOB(NV12, 2, 2, ABGR, 4, 2)
TESTBIPLANARTOB(NV21, 2, 2, ABGR, 4, 2)
TESTBIPLANARTOB(NV12, 2, 2, RGB24, 3, 2)
TESTBIPLANARTOB(NV21, 2, 2, RGB24, 3, 2)
TESTBIPLANARTOB(NV12, 2, 2, RGB565, 2, 9)
#ifdef DO_THREE_PLANES
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment