Commit 61814908 authored by Frank Barchard's avatar Frank Barchard

Add MIPS SIMD Arch (MSA) optimized ARGBMirrorRow function

This patch adds MSA optimized ARGBMirrorRow function in libYUV project.

Performance gain ~3x

R=fbarchard@google.com
BUG=libyuv:634

Review URL: https://codereview.chromium.org/2368313003 .
parent feaff94b
......@@ -74,5 +74,20 @@
out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
}
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
#define VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
out0 = (RTYPE) __msa_vshf_w((v4i32) mask0, (v4i32) in1, (v4i32) in0); \
out1 = (RTYPE) __msa_vshf_w((v4i32) mask1, (v4i32) in3, (v4i32) in2); \
}
#define VSHF_W2_UB(...) VSHF_W2(v16u8, __VA_ARGS__)
#define VSHF_W2_SB(...) VSHF_W2(v16i8, __VA_ARGS__)
#define VSHF_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
mask0, mask1, mask2, mask3, \
out0, out1, out2, out3) { \
VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
VSHF_W2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3) \
}
#define VSHF_W4_UB(...) VSHF_W4(v16u8, __VA_ARGS__)
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
#endif /* __MACROS_MSA_H__ */
......@@ -374,6 +374,7 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_MIRRORROW_MSA
#define HAS_ARGBMIRRORROW_MSA
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
......@@ -832,10 +833,12 @@ void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
......
......@@ -659,6 +659,14 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb,
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBMirrorRow = ARGBMirrorRow_MSA;
}
}
#endif
// Mirror plane
for (y = 0; y < height; ++y) {
......
......@@ -118,6 +118,14 @@ void ARGBRotate180(const uint8* src, int src_stride,
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ARGBMirrorRow = ARGBMirrorRow_MSA;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
......
......@@ -643,6 +643,9 @@ ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3)
#ifdef HAS_ARGBMIRRORROW_NEON
ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3)
#endif
#ifdef HAS_ARGBMIRRORROW_MSA
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#endif
#undef ANY11M
// Any 1 plane. (memset)
......
......@@ -10,36 +10,52 @@
#include "libyuv/row.h"
// This module is for GCC MSA
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#include "libyuv/macros_msa.h"
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
int count;
int x;
v16u8 src0, src1, src2, src3;
v16u8 dst0, dst1, dst2, dst3;
v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
src += width - 64;
for (count = 0; count < width; count += 64) {
for (x = 0; x < width; x += 64) {
LD_UB4(src, 16, src3, src2, src1, src0);
VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2);
VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0);
ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
dst += 64;
src -= 64;
}
}
void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) {
int x;
v16u8 src0, src1, src2, src3;
v16u8 dst0, dst1, dst2, dst3;
v16i8 shuffler = { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };
src += width * 4 - 64;
for (x = 0; x < width; x += 16) {
LD_UB4(src, 16, src3, src2, src1, src0);
VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2);
VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0);
ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
dst += 64;
src -= 64;
}
}
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment