Commit 09b8c971 authored by Manojkumar Bhosale's avatar Manojkumar Bhosale

Add MSA optimized NV12/21 To RGB row functions

R=fbarchard@google.com
BUG=libyuv:634

Performance Gain (vs C auto-vectorized)
NV12ToARGBRow_MSA       - ~1.5x
NV12ToARGBRow_Any_MSA   - ~1.4x
NV12ToRGB565Row_MSA     - ~1.4x
NV12ToRGB565Row_Any_MSA - ~1.4x
NV21ToARGBRow_MSA       - ~1.5x
NV21ToARGBRow_Any_MSA   - ~1.5x
SobelRow_MSA            - ~4.3x
SobelRow_Any_MSA        - ~3.4x
SobelToPlaneRow_MSA     - ~8.0x
SobelToPlaneRow_Any_MSA - ~4.7x
SobelXYRow_MSA          - ~3.0x
SobelXYRow_Any_MSA      - ~2.5x

Performance Gain (vs C non-vectorized)
NV12ToARGBRow_MSA       - ~6.5x
NV12ToARGBRow_Any_MSA   - ~6.5x
NV12ToRGB565Row_MSA     - ~6.2x
NV12ToRGB565Row_Any_MSA - ~6.1x
NV21ToARGBRow_MSA       - ~6.5x
NV21ToARGBRow_Any_MSA   - ~6.5x
SobelRow_MSA            - ~14.5x
SobelRow_Any_MSA        - ~11.3x
SobelToPlaneRow_MSA     - ~34.2x
SobelToPlaneRow_Any_MSA - ~19.4x
SobelXYRow_MSA          - ~11.1x
SobelXYRow_Any_MSA      - ~9.1x

Review-Url: https://codereview.chromium.org/2636483002 .
parent a7c87e19
......@@ -428,6 +428,12 @@ extern "C" {
#define HAS_RGB565TOUVROW_MSA
#define HAS_RGB24TOUVROW_MSA
#define HAS_RAWTOUVROW_MSA
#define HAS_NV12TOARGBROW_MSA
#define HAS_NV12TORGB565ROW_MSA
#define HAS_NV21TOARGBROW_MSA
#define HAS_SOBELROW_MSA
#define HAS_SOBELTOPLANEROW_MSA
#define HAS_SOBELXYROW_MSA
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
......@@ -807,6 +813,21 @@ void I422ToARGB1555Row_MSA(const uint8* src_y,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_MSA(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_MSA(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_MSA(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
......@@ -2379,6 +2400,21 @@ void I422ToARGB1555Row_Any_MSA(const uint8* src_y,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_MSA(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_MSA(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_Any_MSA(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_AVX2(const uint8* src_yuy2,
......@@ -2868,6 +2904,10 @@ void SobelRow_NEON(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelRow_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelToPlaneRow_C(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_y,
......@@ -2880,6 +2920,10 @@ void SobelToPlaneRow_NEON(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_y,
int width);
void SobelToPlaneRow_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_y,
int width);
void SobelXYRow_C(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
......@@ -2892,6 +2936,10 @@ void SobelXYRow_NEON(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelXYRow_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelRow_Any_SSE2(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
......@@ -2900,6 +2948,10 @@ void SobelRow_Any_NEON(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelRow_Any_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_y,
......@@ -2908,6 +2960,10 @@ void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_y,
int width);
void SobelToPlaneRow_Any_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_y,
int width);
void SobelXYRow_Any_SSE2(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
......@@ -2916,6 +2972,10 @@ void SobelXYRow_Any_NEON(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void SobelXYRow_Any_MSA(const uint8* src_sobelx,
const uint8* src_sobely,
uint8* dst_argb,
int width);
void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb,
......
......@@ -1326,6 +1326,14 @@ int NV12ToARGB(const uint8* src_y,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
......@@ -1385,6 +1393,14 @@ int NV21ToARGB(const uint8* src_y,
}
}
#endif
#if defined(HAS_NV21TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV21ToARGBRow = NV21ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
......@@ -1450,6 +1466,14 @@ int M420ToARGB(const uint8* src_m420,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
......
......@@ -1443,6 +1443,14 @@ int NV12ToRGB565(const uint8* src_y,
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA;
if (IS_ALIGNED(width, 8)) {
NV12ToRGB565Row = NV12ToRGB565Row_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
......@@ -2650,6 +2658,14 @@ int ARGBSobel(const uint8* src_argb,
SobelRow = SobelRow_NEON;
}
}
#endif
#if defined(HAS_SOBELROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
SobelRow = SobelRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
SobelRow = SobelRow_MSA;
}
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelRow);
......@@ -2680,6 +2696,14 @@ int ARGBSobelToPlane(const uint8* src_argb,
SobelToPlaneRow = SobelToPlaneRow_NEON;
}
}
#endif
#if defined(HAS_SOBELTOPLANEROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
SobelToPlaneRow = SobelToPlaneRow_MSA;
}
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
height, SobelToPlaneRow);
......@@ -2711,6 +2735,14 @@ int ARGBSobelXY(const uint8* src_argb,
SobelXYRow = SobelXYRow_NEON;
}
}
#endif
#if defined(HAS_SOBELXYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
SobelXYRow = SobelXYRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
SobelXYRow = SobelXYRow_MSA;
}
}
#endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelXYRow);
......
......@@ -255,18 +255,27 @@ ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15)
#ifdef HAS_SOBELROW_NEON
ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7)
#endif
#ifdef HAS_SOBELROW_MSA
ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15)
#endif
#ifdef HAS_SOBELTOPLANEROW_SSE2
ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15)
#endif
#ifdef HAS_SOBELTOPLANEROW_NEON
ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15)
#endif
#ifdef HAS_SOBELTOPLANEROW_MSA
ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31)
#endif
#ifdef HAS_SOBELXYROW_SSE2
ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15)
#endif
#ifdef HAS_SOBELXYROW_NEON
ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7)
#endif
#ifdef HAS_SOBELXYROW_MSA
ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
#endif
#undef ANY21
// Any 2 planes to 1 with yuvconstants
......@@ -300,6 +309,9 @@ ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#ifdef HAS_NV12TOARGBROW_DSPR2
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_MSA
ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
......@@ -309,6 +321,9 @@ ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV21TOARGBROW_NEON
ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_MSA
ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_SSSE3
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
#endif
......@@ -318,6 +333,9 @@ ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
#ifdef HAS_NV12TORGB565ROW_NEON
ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_MSA
ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
// Any 1 to 1.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment