Commit 10ce829b authored by Frank Barchard's avatar Frank Barchard

Add MSA optimized I422ToRGB565Row_MSA, I422ToARGB4444Row_MSA and I422ToARGB1555Row_MSA functions

R=fbarchard@google.com
BUG=libyuv:634

Performance Gain (vs C vectorized)
I422ToRGB565Row_MSA             : ~1.5x
I422ToRGB565Row_Any_MSA         : ~1.5x
I422ToARGB4444Row_MSA           : ~1.4x
I422ToARGB4444Row_Any_MSA       : ~1.4x
I422ToARGB1555Row_MSA           : ~1.4x
I422ToARGB1555Row_Any_MSA       : ~1.4x

Performance Gain (vs C non-vectorized)
I422ToRGB565Row_MSA             : ~6.8x
I422ToRGB565Row_Any_MSA         : ~6.8x
I422ToARGB4444Row_MSA           : ~6.6x
I422ToARGB4444Row_Any_MSA       : ~6.6x
I422ToARGB1555Row_MSA           : ~6.6x
I422ToARGB1555Row_Any_MSA       : ~6.6x

Review URL: https://codereview.chromium.org/2445343007 .
parent 532f5708
...@@ -678,6 +678,24 @@ void I422ToRGB24Row_MSA(const uint8* src_y, ...@@ -678,6 +678,24 @@ void I422ToRGB24Row_MSA(const uint8* src_y,
uint8* dst_rgb24, uint8* dst_rgb24,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGB565Row_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
...@@ -1681,6 +1699,24 @@ void I422ToRGB24Row_Any_MSA(const uint8* src_y, ...@@ -1681,6 +1699,24 @@ void I422ToRGB24Row_Any_MSA(const uint8* src_y,
uint8* dst_rgb24, uint8* dst_rgb24,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToRGB565Row_Any_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width); void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
......
...@@ -657,6 +657,14 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y, ...@@ -657,6 +657,14 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
if (IS_ALIGNED(width, 8)) {
I422ToARGB1555Row = I422ToARGB1555Row_MSA;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants, I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
...@@ -720,6 +728,14 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y, ...@@ -720,6 +728,14 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
if (IS_ALIGNED(width, 8)) {
I422ToARGB4444Row = I422ToARGB4444Row_MSA;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants, I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
...@@ -782,6 +798,14 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -782,6 +798,14 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
} }
} }
#endif #endif
#if defined(HAS_I422TORGB565ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToRGB565Row = I422ToRGB565Row_Any_MSA;
if (IS_ALIGNED(width, 8)) {
I422ToRGB565Row = I422ToRGB565Row_MSA;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
......
...@@ -172,6 +172,9 @@ ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) ...@@ -172,6 +172,9 @@ ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15)
ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif #endif
#undef ANY31C #undef ANY31C
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment