Commit 7c64163f authored by Manojkumar Bhosale's avatar Manojkumar Bhosale

Add MSA optimized RAW/RGB/ARGB to ARGB/Y/UV row functions

R=fbarchard@google.com
BUG=libyuv:634

Performance Gain (vs C vectorized)
ARGB1555ToARGBRow_MSA     - 1.85
ARGB1555ToARGBRow_Any_MSA - 1.82
RGB565ToARGBRow_MSA       - 2.14
RGB565ToARGBRow_Any_MSA   - 2.08
RGB24ToARGBRow_MSA        - 8.57
RGB24ToARGBRow_Any_MSA    - 7.42
RAWToARGBRow_MSA          - 8.57
RAWToARGBRow_Any_MSA      - 7.42
ARGB1555ToYRow_MSA        - 2.60
ARGB1555ToYRow_Any_MSA    - 2.47
RGB565ToYRow_MSA          - 2.45
RGB565ToYRow_Any_MSA      - 2.33
RGB24ToYRow_MSA           - 2.23
RGB24ToYRow_Any_MSA       - 2.01
RAWToYRow_MSA             - 2.25
RAWToYRow_Any_MSA         - 2.02
ARGB1555ToUVRow_MSA       - 1.40
ARGB1555ToUVRow_Any_MSA   - 1.37
RGB565ToUVRow_MSA         - 1.68
RGB565ToUVRow_Any_MSA     - 1.63
RGB24ToUVRow_MSA          - 3.02
RGB24ToUVRow_Any_MSA      - 2.87
RAWToUVRow_MSA            - 3.04
RAWToUVRow_Any_MSA        - 2.85

Performance Gain (vs C non-vectorized)
ARGB1555ToARGBRow_MSA     - 4.66
ARGB1555ToARGBRow_Any_MSA - 4.45
RGB565ToARGBRow_MSA       - 5.58
RGB565ToARGBRow_Any_MSA   - 5.34
RGB24ToARGBRow_MSA        - 8.57
RGB24ToARGBRow_Any_MSA    - 7.42
RAWToARGBRow_MSA          - 8.57
RAWToARGBRow_Any_MSA      - 7.42
ARGB1555ToYRow_MSA        - 6.38
ARGB1555ToYRow_Any_MSA    - 5.98
RGB565ToYRow_MSA          - 6.42
RGB565ToYRow_Any_MSA      - 6.05
RGB24ToYRow_MSA           - 7.87
RGB24ToYRow_Any_MSA       - 7.01
RAWToYRow_MSA             - 7.98
RAWToYRow_Any_MSA         - 7.01
ARGB1555ToUVRow_MSA       - 5.39
ARGB1555ToUVRow_Any_MSA   - 5.06
RGB565ToUVRow_MSA         - 6.39
RGB565ToUVRow_Any_MSA     - 5.90
RGB24ToUVRow_MSA          - 3.04
RGB24ToUVRow_Any_MSA      - 2.87
RAWToUVRow_MSA            - 3.04
RAWToUVRow_Any_MSA        - 2.88

Review-Url: https://codereview.chromium.org/2600713002 .
parent cb115594
......@@ -416,6 +416,18 @@ extern "C" {
#define HAS_ARGBSHADEROW_MSA
#define HAS_ARGBGRAYROW_MSA
#define HAS_ARGBSEPIAROW_MSA
#define HAS_ARGB1555TOARGBROW_MSA
#define HAS_RGB565TOARGBROW_MSA
#define HAS_RGB24TOARGBROW_MSA
#define HAS_RAWTOARGBROW_MSA
#define HAS_ARGB1555TOYROW_MSA
#define HAS_RGB565TOYROW_MSA
#define HAS_RGB24TOYROW_MSA
#define HAS_RAWTOYROW_MSA
#define HAS_ARGB1555TOUVROW_MSA
#define HAS_RGB565TOUVROW_MSA
#define HAS_RGB24TOUVROW_MSA
#define HAS_RAWTOUVROW_MSA
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
......@@ -822,6 +834,26 @@ void ARGB4444ToUVRow_NEON(const uint8* src_argb4444,
uint8* dst_u,
uint8* dst_v,
int width);
void RGB24ToUVRow_MSA(const uint8* src_rgb24,
int src_stride_rgb24,
uint8* dst_u,
uint8* dst_v,
int width);
void RAWToUVRow_MSA(const uint8* src_raw,
int src_stride_raw,
uint8* dst_u,
uint8* dst_v,
int width);
void RGB565ToUVRow_MSA(const uint8* src_rgb565,
int src_stride_rgb565,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGB1555ToUVRow_MSA(const uint8* src_argb1555,
int src_stride_argb1555,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width);
void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width);
......@@ -830,6 +862,10 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
void RGB24ToYRow_MSA(const uint8* src_rgb24, uint8* dst_y, int width);
void RAWToYRow_MSA(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_MSA(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_MSA(const uint8* src_argb1555, uint8* dst_y, int width);
void BGRAToUVRow_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
......@@ -890,6 +926,10 @@ void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_y,
int width);
void ARGBToYRow_Any_MSA(const uint8* src_argb, uint8* dst_y, int width);
void RGB24ToYRow_Any_MSA(const uint8* src_rgb24, uint8* dst_y, int width);
void RAWToYRow_Any_MSA(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_Any_MSA(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_Any_MSA(const uint8* src_argb1555, uint8* dst_y, int width);
void ARGBToUVRow_AVX2(const uint8* src_argb,
int src_stride_argb,
......@@ -1024,6 +1064,26 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_u,
uint8* dst_v,
int width);
void RGB24ToUVRow_Any_MSA(const uint8* src_rgb24,
int src_stride_rgb24,
uint8* dst_u,
uint8* dst_v,
int width);
void RAWToUVRow_Any_MSA(const uint8* src_raw,
int src_stride_raw,
uint8* dst_u,
uint8* dst_v,
int width);
void RGB565ToUVRow_Any_MSA(const uint8* src_rgb565,
int src_stride_rgb565,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGB1555ToUVRow_Any_MSA(const uint8* src_argb1555,
int src_stride_argb1555,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
......@@ -1341,12 +1401,18 @@ void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444,
int width);
void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width);
void RGB24ToARGBRow_MSA(const uint8* src_rgb24, uint8* dst_argb, int width);
void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width);
void RAWToARGBRow_MSA(const uint8* src_raw, uint8* dst_argb, int width);
void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width);
void RGB565ToARGBRow_MSA(const uint8* src_rgb565, uint8* dst_argb, int width);
void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB1555ToARGBRow_MSA(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
......@@ -1396,14 +1462,22 @@ void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444,
void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24,
uint8* dst_argb,
int width);
void RGB24ToARGBRow_Any_MSA(const uint8* src_rgb24, uint8* dst_argb, int width);
void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width);
void RAWToARGBRow_Any_MSA(const uint8* src_raw, uint8* dst_argb, int width);
void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565,
uint8* dst_argb,
int width);
void RGB565ToARGBRow_Any_MSA(const uint8* src_rgb565,
uint8* dst_argb,
int width);
void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB1555ToARGBRow_Any_MSA(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
......
This diff is collapsed.
......@@ -962,6 +962,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
......@@ -1021,6 +1029,14 @@ int RAWToARGB(const uint8* src_raw,
}
}
#endif
#if defined(HAS_RAWTOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToARGBRow = RAWToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
......@@ -1088,6 +1104,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width);
......@@ -1155,6 +1179,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
}
}
#endif
#if defined(HAS_ARGB1555TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
......
......@@ -454,15 +454,27 @@ ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7)
#ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RGB24TOYROW_MSA
ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#endif
#ifdef HAS_RAWTOYROW_NEON
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RAWTOYROW_MSA
ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
#endif
#ifdef HAS_RGB565TOYROW_NEON
ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7)
#endif
#ifdef HAS_RGB565TOYROW_MSA
ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15)
#endif
#ifdef HAS_ARGB1555TOYROW_NEON
ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7)
#endif
#ifdef HAS_ARGB1555TOYROW_MSA
ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15)
#endif
#ifdef HAS_ARGB4444TOYROW_NEON
ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7)
#endif
......@@ -481,15 +493,27 @@ ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31)
#ifdef HAS_RGB24TOARGBROW_NEON
ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB24TOARGBROW_MSA
ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15)
#endif
#ifdef HAS_RAWTOARGBROW_NEON
ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_MSA
ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15)
#endif
#ifdef HAS_RGB565TOARGBROW_NEON
ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_MSA
ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15)
#endif
#ifdef HAS_ARGB1555TOARGBROW_NEON
ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_MSA
ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15)
#endif
#ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif
......@@ -922,15 +946,27 @@ ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15)
#ifdef HAS_RGB24TOUVROW_NEON
ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15)
#endif
#ifdef HAS_RGB24TOUVROW_MSA
ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15)
#endif
#ifdef HAS_RAWTOUVROW_NEON
ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15)
#endif
#ifdef HAS_RAWTOUVROW_MSA
ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15)
#endif
#ifdef HAS_RGB565TOUVROW_NEON
ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_RGB565TOUVROW_MSA
ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15)
#endif
#ifdef HAS_ARGB1555TOUVROW_NEON
ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_ARGB1555TOUVROW_MSA
ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15)
#endif
#ifdef HAS_ARGB4444TOUVROW_NEON
ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15)
#endif
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment