Commit 000d2fa9 authored by Frank Barchard's avatar Frank Barchard

Libyuv MIPS DSPR2 optimizations.

Optimized functions:

I444ToARGBRow_DSPR2
I422ToARGB4444Row_DSPR2
I422ToARGB1555Row_DSPR2
NV12ToARGBRow_DSPR2
BGRAToUVRow_DSPR2
BGRAToYRow_DSPR2
ABGRToUVRow_DSPR2
ARGBToYRow_DSPR2
ABGRToYRow_DSPR2
RGBAToUVRow_DSPR2
RGBAToYRow_DSPR2
ARGBToUVRow_DSPR2
RGB24ToARGBRow_DSPR2
RAWToARGBRow_DSPR2
RGB565ToARGBRow_DSPR2
ARGB1555ToARGBRow_DSPR2
ARGB4444ToARGBRow_DSPR2
ScaleAddRow_DSPR2

Bug-fixes in functions:

ScaleRowDown2_DSPR2
ScaleRowDown4_DSPR2

BUG=

Review-Url: https://codereview.chromium.org/2626123003 .
parent 288bfbef
This diff is collapsed.
...@@ -101,6 +101,7 @@ extern "C" { ...@@ -101,6 +101,7 @@ extern "C" {
#define HAS_SCALEROWDOWN4_DSPR2 #define HAS_SCALEROWDOWN4_DSPR2
#define HAS_SCALEROWDOWN34_DSPR2 #define HAS_SCALEROWDOWN34_DSPR2
#define HAS_SCALEROWDOWN38_DSPR2 #define HAS_SCALEROWDOWN38_DSPR2
#define HAS_SCALEADDROW_DSPR2
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
...@@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ...@@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, uint8* dst_ptr,
int dst_width); int dst_width);
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
uint16* dst_ptr,
int src_width);
void ScaleRowDown2_MSA(const uint8_t* src_ptr, void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
...@@ -885,9 +890,9 @@ void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr, ...@@ -885,9 +890,9 @@ void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst, uint8_t* dst,
int dst_width); int dst_width);
void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr, void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst, uint8_t* dst,
int dst_width); int dst_width);
void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr, void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst, uint8_t* dst,
......
...@@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb, ...@@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
...@@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb, ...@@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MSA) #if defined(HAS_ARGBTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVRow = ARGBToUVRow_Any_MSA; ARGBToUVRow = ARGBToUVRow_Any_MSA;
...@@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra, ...@@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra,
} }
} }
#endif #endif
#if defined(HAS_BGRATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToYRow = BGRAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_BGRATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
...@@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr, ...@@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr,
} }
} }
#endif #endif
#if defined(HAS_ABGRTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToYRow = ABGRToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
...@@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba, ...@@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba,
} }
} }
#endif #endif
#if defined(HAS_RGBATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToYRow = RGBAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_RGBATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
...@@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw, ...@@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw,
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
...@@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565, ...@@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565,
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
......
...@@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y, ...@@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_I444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
...@@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24, ...@@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
} }
} }
#endif #endif
#if defined(HAS_RGB24TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width); RGB24ToARGBRow(src_rgb24, dst_argb, width);
...@@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw, ...@@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw,
} }
} }
#endif #endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width); RAWToARGBRow(src_raw, dst_argb, width);
...@@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565, ...@@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
} }
} }
#endif #endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width); RGB565ToARGBRow(src_rgb565, dst_argb, width);
...@@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555, ...@@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
} }
} }
#endif #endif
#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width); ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
...@@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444, ...@@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
} }
} }
#endif #endif
#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_MSA) #if defined(HAS_ARGB4444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
...@@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y, ...@@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
...@@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420, ...@@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420,
} }
} }
#endif #endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
......
...@@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y, ...@@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOARGB1555ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_MSA) #if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
...@@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y, ...@@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOARGB4444ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_MSA) #if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
......
...@@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb, ...@@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
...@@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb, ...@@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
...@@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb, ...@@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
...@@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb, ...@@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
...@@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb, ...@@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MSA) #if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA; I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
...@@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb, ...@@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA) #if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA; I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
...@@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb, ...@@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
......
...@@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ...@@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif #endif
#ifdef HAS_I422TOARGBROW_DSPR2
ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_MSA #ifdef HAS_I422TOARGBROW_MSA
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
...@@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) ...@@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV12TOARGBROW_NEON #ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif #endif
#ifdef HAS_NV12TOARGBROW_DSPR2
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3 #ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif #endif
...@@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) ...@@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#ifdef HAS_ARGB4444TOARGBROW_NEON #ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif #endif
#ifdef HAS_RGB24TOARGBROW_DSPR2
ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_DSPR2
ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_DSPR2
ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_DSPR2
ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_DSPR2
ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_BGRATOYROW_DSPR2
ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYROW_DSPR2
ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_DSPR2
ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_DSPR2
ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_MSA #ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif #endif
...@@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) ...@@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#ifdef HAS_UYVYTOUVROW_NEON #ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif #endif
#ifdef HAS_BGRATOUVROW_DSPR2
ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_DSPR2
ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_DSPR2
ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_DSPR2
ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_YUY2TOUVROW_MSA #ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif #endif
......
...@@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { ...@@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
uint8 b1 = src_argb[4] >> 3; uint8 b1 = src_argb[4] >> 3;
uint8 g1 = src_argb[5] >> 2; uint8 g1 = src_argb[5] >> 2;
uint8 r1 = src_argb[6] >> 3; uint8 r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | WRITEWORD(
(r1 << 27)); dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
...@@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, ...@@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | WRITEWORD(
(r1 << 27)); dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
......
This diff is collapsed.
...@@ -2032,7 +2032,7 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0, ...@@ -2032,7 +2032,7 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
__asm vpsraw ymm2, ymm2, 6 \ __asm vpsraw ymm2, ymm2, 6 \
__asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
__asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
} }
// Store 16 ARGB values. // Store 16 ARGB values.
......
...@@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width, ...@@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEADDROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ScaleAddRow = ScaleAddRow_Any_DSPR2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
......
...@@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) ...@@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA #ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif #endif
#ifdef HAS_SCALEADDROW_DSPR2
SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
#endif
#undef SAANY #undef SAANY
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr, ...@@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even. // TODO(fbarchard): Use odd pixels instead of even.
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0| "precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8| "precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16| "precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24| "precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
"addiu %[src_ptr], %[src_ptr], 32 \n" "addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n" "sw $t8, 0(%[dst]) \n"
...@@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr, ...@@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
" nop \n" " nop \n"
"21: \n" "21: \n"
"lbu $t0, 0(%[src_ptr]) \n" "lbu $t0, 1(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n" "addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n" "sb $t0, 0(%[dst]) \n"
...@@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ...@@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0| "precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16| "precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
"addiu %[src_ptr], %[src_ptr], 32 \n" "addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n" "sw $t1, 0(%[dst]) \n"
...@@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ...@@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
" nop \n" " nop \n"
"21: \n" "21: \n"
"lbu $t1, 0(%[src_ptr]) \n" "lbu $t1, 2(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n" "addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n" "sb $t1, 0(%[dst]) \n"
...@@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ...@@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
} }
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
for (x = 0; x < ((src_width - 1)); x += 8) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t5], 0(%[src_ptr]) \n"
"lw %[tmp_t6], 4(%[src_ptr]) \n"
"lw %[tmp_t1], 0(%[dst_ptr]) \n"
"lw %[tmp_t2], 4(%[dst_ptr]) \n"
"lw %[tmp_t3], 8(%[dst_ptr]) \n"
"lw %[tmp_t4], 12(%[dst_ptr]) \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
"addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
"addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
"sw %[tmp_t1], 0(%[dst_ptr]) \n"
"sw %[tmp_t2], 4(%[dst_ptr]) \n"
"sw %[tmp_t3], 8(%[dst_ptr]) \n"
"sw %[tmp_t4], 12(%[dst_ptr]) \n"
".set pop \n"
:
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
[tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
: [dst_ptr] "r"(dst_ptr));
src_ptr += 8;
dst_ptr += 8;
}
if ((src_width)&7) {
for (x = 0; x < ((src_width - 1) & 7); x += 1) {
dst_ptr[0] += src_ptr[0];
src_ptr += 1;
dst_ptr += 1;
}
}
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus #ifdef __cplusplus
......
This diff is collapsed.
...@@ -356,17 +356,19 @@ int main(int argc, const char* argv[]) { ...@@ -356,17 +356,19 @@ int main(int argc, const char* argv[]) {
const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2); const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2);
const size_t total_size = y_size + 2 * uv_size; // NOLINT const size_t total_size = y_size + 2 * uv_size; // NOLINT
#if defined(_MSC_VER) #if defined(_MSC_VER)
_fseeki64(file_org, static_cast<__int64>(num_skip_org) * _fseeki64(
static_cast<__int64>(total_size), file_org,
SEEK_SET); static_cast<__int64>(num_skip_org) * static_cast<__int64>(total_size),
SEEK_SET);
#else #else
fseek(file_org, num_skip_org * total_size, SEEK_SET); fseek(file_org, num_skip_org * total_size, SEEK_SET);
#endif #endif
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
#if defined(_MSC_VER) #if defined(_MSC_VER)
_fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) * _fseeki64(
static_cast<__int64>(total_size), file_rec[cur_rec],
SEEK_SET); static_cast<__int64>(num_skip_rec) * static_cast<__int64>(total_size),
SEEK_SET);
#else #else
fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET); fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET);
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment