Commit 000d2fa9 authored by Frank Barchard's avatar Frank Barchard

Libyuv MIPS DSPR2 optimizations.

Optimized functions:

I444ToARGBRow_DSPR2
I422ToARGB4444Row_DSPR2
I422ToARGB1555Row_DSPR2
NV12ToARGBRow_DSPR2
BGRAToUVRow_DSPR2
BGRAToYRow_DSPR2
ABGRToUVRow_DSPR2
ARGBToYRow_DSPR2
ABGRToYRow_DSPR2
RGBAToUVRow_DSPR2
RGBAToYRow_DSPR2
ARGBToUVRow_DSPR2
RGB24ToARGBRow_DSPR2
RAWToARGBRow_DSPR2
RGB565ToARGBRow_DSPR2
ARGB1555ToARGBRow_DSPR2
ARGB4444ToARGBRow_DSPR2
ScaleAddRow_DSPR2

Bug-fixes in functions:

ScaleRowDown2_DSPR2
ScaleRowDown4_DSPR2

BUG=

Review-Url: https://codereview.chromium.org/2626123003 .
parent 288bfbef
...@@ -364,6 +364,23 @@ extern "C" { ...@@ -364,6 +364,23 @@ extern "C" {
#define HAS_MIRRORROW_DSPR2 #define HAS_MIRRORROW_DSPR2
#define HAS_MIRRORUVROW_DSPR2 #define HAS_MIRRORUVROW_DSPR2
#define HAS_SPLITUVROW_DSPR2 #define HAS_SPLITUVROW_DSPR2
#define HAS_RGB24TOARGBROW_DSPR2
#define HAS_RAWTOARGBROW_DSPR2
#define HAS_RGB565TOARGBROW_DSPR2
#define HAS_ARGB1555TOARGBROW_DSPR2
#define HAS_ARGB4444TOARGBROW_DSPR2
#define HAS_I444TOARGBROW_DSPR2
#define HAS_I422TOARGB4444ROW_DSPR2
#define HAS_I422TOARGB1555ROW_DSPR2
#define HAS_NV12TOARGBROW_DSPR2
#define HAS_BGRATOUVROW_DSPR2
#define HAS_BGRATOYROW_DSPR2
#define HAS_ABGRTOUVROW_DSPR2
#define HAS_ARGBTOYROW_DSPR2
#define HAS_ABGRTOYROW_DSPR2
#define HAS_RGBATOUVROW_DSPR2
#define HAS_RGBATOYROW_DSPR2
#define HAS_ARGBTOUVROW_DSPR2
#endif #endif
#endif #endif
...@@ -660,6 +677,30 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -660,6 +677,30 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I444ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MSA(const uint8* src_y, void I422ToARGBRow_MSA(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
...@@ -789,6 +830,30 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width); ...@@ -789,6 +830,30 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width); void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width); void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width); void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
void BGRAToUVRow_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
void ABGRToUVRow_DSPR2(const uint8* src_abgr,
int src_stride_abgr,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToUVRow_DSPR2(const uint8* src_rgba,
int src_stride_rgba,
uint8* dst_u,
uint8* dst_v,
int width);
void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGBToUVRow_DSPR2(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width); void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width); void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
...@@ -817,6 +882,10 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width); ...@@ -817,6 +882,10 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555,
uint8* dst_y, uint8* dst_y,
int width); int width);
void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_y, uint8* dst_y,
int width); int width);
...@@ -955,6 +1024,36 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, ...@@ -955,6 +1024,36 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_u, uint8* dst_u,
uint8* dst_v, uint8* dst_v,
int width); int width);
void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
uint8* dst_v,
int width);
void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr,
int src_stride_abgr,
uint8* dst_u,
uint8* dst_v,
int width);
void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba,
int src_stride_rgba,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_Any_DSPR2(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVJRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_C(const uint8* src_argb, void ARGBToUVRow_C(const uint8* src_argb,
int src_stride_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_u,
...@@ -1251,6 +1350,15 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, ...@@ -1251,6 +1350,15 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
uint8* dst_argb, uint8* dst_argb,
int width); int width);
void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width);
void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width);
void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
uint8* dst_argb, uint8* dst_argb,
int width); int width);
...@@ -1299,6 +1407,20 @@ void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, ...@@ -1299,6 +1407,20 @@ void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555,
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_argb, uint8* dst_argb,
int width); int width);
void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24,
uint8* dst_argb,
int width);
void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565,
uint8* dst_argb,
int width);
void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444, void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444,
uint8* dst_argb, uint8* dst_argb,
int width); int width);
...@@ -2042,12 +2164,47 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, ...@@ -2042,12 +2164,47 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I444ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_DSPR2(const uint8* src_y, void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width); int width);
void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_DSPR2(const uint8* src_y, void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
......
...@@ -101,6 +101,7 @@ extern "C" { ...@@ -101,6 +101,7 @@ extern "C" {
#define HAS_SCALEROWDOWN4_DSPR2 #define HAS_SCALEROWDOWN4_DSPR2
#define HAS_SCALEROWDOWN34_DSPR2 #define HAS_SCALEROWDOWN34_DSPR2
#define HAS_SCALEROWDOWN38_DSPR2 #define HAS_SCALEROWDOWN38_DSPR2
#define HAS_SCALEADDROW_DSPR2
#endif #endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
...@@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ...@@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, uint8* dst_ptr,
int dst_width); int dst_width);
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
uint16* dst_ptr,
int src_width);
void ScaleRowDown2_MSA(const uint8_t* src_ptr, void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
......
...@@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb, ...@@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
...@@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb, ...@@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MSA) #if defined(HAS_ARGBTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVRow = ARGBToUVRow_Any_MSA; ARGBToUVRow = ARGBToUVRow_Any_MSA;
...@@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra, ...@@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra,
} }
} }
#endif #endif
#if defined(HAS_BGRATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToYRow = BGRAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_BGRATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
...@@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr, ...@@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr,
} }
} }
#endif #endif
#if defined(HAS_ABGRTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToYRow = ABGRToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
...@@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba, ...@@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba,
} }
} }
#endif #endif
#if defined(HAS_RGBATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToYRow = RGBAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_RGBATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
...@@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw, ...@@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw,
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
...@@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565, ...@@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565,
ARGBToYRow = ARGBToYRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2;
} }
} }
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
......
...@@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y, ...@@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_I444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
...@@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24, ...@@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
} }
} }
#endif #endif
#if defined(HAS_RGB24TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width); RGB24ToARGBRow(src_rgb24, dst_argb, width);
...@@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw, ...@@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw,
} }
} }
#endif #endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width); RAWToARGBRow(src_raw, dst_argb, width);
...@@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565, ...@@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
} }
} }
#endif #endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width); RGB565ToARGBRow(src_rgb565, dst_argb, width);
...@@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555, ...@@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
} }
} }
#endif #endif
#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width); ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
...@@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444, ...@@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
} }
} }
#endif #endif
#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_MSA) #if defined(HAS_ARGB4444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
...@@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y, ...@@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width); NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
...@@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420, ...@@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420,
} }
} }
#endif #endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
......
...@@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y, ...@@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOARGB1555ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_MSA) #if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
...@@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y, ...@@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y,
} }
} }
#endif #endif
#if defined(HAS_I422TOARGB4444ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_MSA) #if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
......
...@@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb, ...@@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
...@@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb, ...@@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
...@@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb, ...@@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
...@@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb, ...@@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif #endif
{ {
// Allocate a rows of uv. // Allocate a rows of uv.
...@@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb, ...@@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MSA) #if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA; I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
...@@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb, ...@@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA) #if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA; I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
...@@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb, ...@@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb,
} }
} }
#endif #endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA) #if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) { if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA;
......
...@@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ...@@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif #endif
#ifdef HAS_I422TOARGBROW_DSPR2
ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_MSA #ifdef HAS_I422TOARGBROW_MSA
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
...@@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) ...@@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV12TOARGBROW_NEON #ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif #endif
#ifdef HAS_NV12TOARGBROW_DSPR2
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3 #ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif #endif
...@@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) ...@@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#ifdef HAS_ARGB4444TOARGBROW_NEON #ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif #endif
#ifdef HAS_RGB24TOARGBROW_DSPR2
ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_DSPR2
ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_DSPR2
ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_DSPR2
ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_DSPR2
ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_BGRATOYROW_DSPR2
ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYROW_DSPR2
ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_DSPR2
ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_DSPR2
ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_MSA #ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif #endif
...@@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) ...@@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#ifdef HAS_UYVYTOUVROW_NEON #ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif #endif
#ifdef HAS_BGRATOUVROW_DSPR2
ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_DSPR2
ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_DSPR2
ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_DSPR2
ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_YUY2TOUVROW_MSA #ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif #endif
......
...@@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { ...@@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
uint8 b1 = src_argb[4] >> 3; uint8 b1 = src_argb[4] >> 3;
uint8 g1 = src_argb[5] >> 2; uint8 g1 = src_argb[5] >> 2;
uint8 r1 = src_argb[6] >> 3; uint8 r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | WRITEWORD(
(r1 << 27)); dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
...@@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, ...@@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
uint8 b1 = clamp255(src_argb[4] + dither1) >> 3; uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8 g1 = clamp255(src_argb[5] + dither1) >> 2; uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8 r1 = clamp255(src_argb[6] + dither1) >> 3; uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | WRITEWORD(
(r1 << 27)); dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
......
...@@ -585,126 +585,89 @@ void MirrorUVRow_DSPR2(const uint8* src_uv, ...@@ -585,126 +585,89 @@ void MirrorUVRow_DSPR2(const uint8* src_uv,
: "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9"); : "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9");
} }
// Convert (4 Y and 2 VU) I422 and arrange RGB values into void I422ToARGBRow_DSPR2(const uint8* src_y,
// t5 = | 0 | B0 | 0 | b0 | const uint8* src_u,
// t4 = | 0 | B1 | 0 | b1 | const uint8* src_v,
// t9 = | 0 | G0 | 0 | g0 |
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
"preceu.ph.qbr $t1, $t1 \n" \
"preceu.ph.qbr $t2, $t2 \n" \
"preceu.ph.qbra $t3, $t0 \n" \
"preceu.ph.qbla $t0, $t0 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t3, $t3, $s4 \n" \
"subu.ph $t0, $t0, $s4 \n" \
"mul.ph $t3, $t3, $s0 \n" \
"mul.ph $t0, $t0, $s0 \n" \
"shll.ph $t4, $t1, 0x7 \n" \
"subu.ph $t4, $t4, $t1 \n" \
"mul.ph $t6, $t1, $s1 \n" \
"mul.ph $t1, $t2, $s2 \n" \
"addq_s.ph $t5, $t4, $t3 \n" \
"addq_s.ph $t4, $t4, $t0 \n" \
"shra.ph $t5, $t5, 6 \n" \
"shra.ph $t4, $t4, 6 \n" \
"addiu %[u_buf], 2 \n" \
"addiu %[v_buf], 2 \n" \
"addu.ph $t6, $t6, $t1 \n" \
"mul.ph $t1, $t2, $s3 \n" \
"addu.ph $t9, $t6, $t3 \n" \
"addu.ph $t8, $t6, $t0 \n" \
"shra.ph $t9, $t9, 6 \n" \
"shra.ph $t8, $t8, 6 \n" \
"addu.ph $t2, $t1, $t3 \n" \
"addu.ph $t1, $t1, $t0 \n" \
"shra.ph $t2, $t2, 6 \n" \
"shra.ph $t1, $t1, 6 \n" \
"subu.ph $t5, $t5, $s5 \n" \
"subu.ph $t4, $t4, $s5 \n" \
"subu.ph $t9, $t9, $s5 \n" \
"subu.ph $t8, $t8, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"shll_s.ph $t5, $t5, 8 \n" \
"shll_s.ph $t4, $t4, 8 \n" \
"shll_s.ph $t9, $t9, 8 \n" \
"shll_s.ph $t8, $t8, 8 \n" \
"shll_s.ph $t2, $t2, 8 \n" \
"shll_s.ph $t1, $t1, 8 \n" \
"shra.ph $t5, $t5, 8 \n" \
"shra.ph $t4, $t4, 8 \n" \
"shra.ph $t9, $t9, 8 \n" \
"shra.ph $t8, $t8, 8 \n" \
"shra.ph $t2, $t2, 8 \n" \
"shra.ph $t1, $t1, 8 \n" \
"addu.ph $t5, $t5, $s5 \n" \
"addu.ph $t4, $t4, $s5 \n" \
"addu.ph $t9, $t9, $s5 \n" \
"addu.ph $t8, $t8, $s5 \n" \
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf, uint8* rgb_buf,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_yg;
uint32 tmp_mask = 0x7fff7fff;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__( __asm__ __volatile__(
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
"beqz %[width], 2f \n" "lbu %[tmp_t7], 0(%[src_y]) \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74| "lbu %[tmp_t1], 1(%[src_y]) \n"
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25| "mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52| "mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102| "lbu %[tmp_t2], 0(%[src_u]) \n"
"repl.ph $s4, 16 \n" // |0|16|0|16| "lbu %[tmp_t3], 0(%[src_v]) \n"
"repl.ph $s5, 128 \n" // |128|128| // clipping "replv.ph %[tmp_t2], %[tmp_t2] \n"
"lui $s6, 0xff00 \n" "replv.ph %[tmp_t3], %[tmp_t3] \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff| "mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"1: \n" YUVTORGB "mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
// Arranging into argb format "mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1| "srl %[tmp_t7], %[tmp_t7], 16 \n"
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0| "ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addiu %[width], -4 \n" "addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0| "addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0| "addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0| "addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"addiu %[y_buf], 4 \n" "subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0| "addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0| "shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"or $t1, $t1, $s6 \n" // |ff|R1|ff|R0| "shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"or $t2, $t2, $s6 \n" // |ff|r1|ff|r0| "shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1| "shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1| "shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"sll $t9, $t9, 16 \n" "shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"sll $t8, $t8, 16 \n" "precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0| "precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0| "precrq.ph.w %[tmp_t9], %[tmp_t8], %[tmp_t7] \n"
// Store results. "ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"sw $t2, 0(%[rgb_buf]) \n" "precr.qb.ph %[tmp_t8], %[tmp_t9], %[tmp_t7] \n"
"sw $t0, 4(%[rgb_buf]) \n" "precrq.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"sw $t1, 8(%[rgb_buf]) \n" "sw %[tmp_t8], 0(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n" "sw %[tmp_t7], 4(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
".set pop \n" ".set pop \n"
: [y_buf] "+r"(y_buf), [u_buf] "+r"(u_buf), [v_buf] "+r"(v_buf), : [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[width] "+r"(width), [rgb_buf] "+r"(rgb_buf) [tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
: [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1", [tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
"s2", "s3", "s4", "s5", "s6"); : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
[tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [yg] "r"(yg),
[tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb),
[tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg),
[rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask));
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 4 pixels.
}
} }
// Bilinear filter 8x2 -> 8x1 // Bilinear filter 8x2 -> 8x1
...@@ -740,10 +703,10 @@ void InterpolateRow_DSPR2(uint8* dst_ptr, ...@@ -740,10 +703,10 @@ void InterpolateRow_DSPR2(uint8* dst_ptr,
"addq.ph $t7, $t7, $t9 \n" "addq.ph $t7, $t7, $t9 \n"
"addq.ph $t2, $t2, $t4 \n" "addq.ph $t2, $t2, $t4 \n"
"addq.ph $t3, $t3, $t5 \n" "addq.ph $t3, $t3, $t5 \n"
"shra.ph $t6, $t6, 8 \n" "shra_r.ph $t6, $t6, 8 \n"
"shra.ph $t7, $t7, 8 \n" "shra_r.ph $t7, $t7, 8 \n"
"shra.ph $t2, $t2, 8 \n" "shra_r.ph $t2, $t2, 8 \n"
"shra.ph $t3, $t3, 8 \n" "shra_r.ph $t3, $t3, 8 \n"
"precr.qb.ph $t6, $t6, $t7 \n" "precr.qb.ph $t6, $t6, $t7 \n"
"precr.qb.ph $t2, $t2, $t3 \n" "precr.qb.ph $t2, $t2, $t3 \n"
"addiu %[src_ptr], %[src_ptr], 8 \n" "addiu %[src_ptr], %[src_ptr], 8 \n"
...@@ -761,6 +724,993 @@ void InterpolateRow_DSPR2(uint8* dst_ptr, ...@@ -761,6 +724,993 @@ void InterpolateRow_DSPR2(uint8* dst_ptr,
[y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride) [y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"); : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
} }
#include <stdio.h>
void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width) {
int x;
uint32 tmp_mask = 0xff;
uint32 tmp_t1;
for (x = 0; x < (width - 1); ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"ulw %[tmp_t1], 0(%[src_rgb24]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_rgb24], %[src_rgb24], 3 \n"
"ins %[tmp_t1], %[tmp_mask], 24, 8 \n"
"sw %[tmp_t1], -4(%[dst_argb]) \n"
".set pop \n"
: [src_rgb24] "+r"(src_rgb24), [dst_argb] "+r"(dst_argb),
[tmp_t1] "=&r"(tmp_t1)
: [tmp_mask] "r"(tmp_mask)
: "memory");
}
uint8 b = src_rgb24[0];
uint8 g = src_rgb24[1];
uint8 r = src_rgb24[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
}
void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width) {
int x;
uint32 tmp_mask = 0xff;
uint32 tmp_t1, tmp_t2;
for (x = 0; x < (width - 1); ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"ulw %[tmp_t1], 0(%[src_raw]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_raw], %[src_raw], 3 \n"
"srl %[tmp_t2], %[tmp_t1], 16 \n"
"ins %[tmp_t1], %[tmp_mask], 24, 8 \n"
"ins %[tmp_t1], %[tmp_t1], 16, 8 \n"
"ins %[tmp_t1], %[tmp_t2], 0, 8 \n"
"sw %[tmp_t1], -4(%[dst_argb]) \n"
".set pop \n"
: [src_raw] "+r"(src_raw), [dst_argb] "+r"(dst_argb),
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2)
: [tmp_mask] "r"(tmp_mask)
: "memory");
}
uint8 r = src_raw[0];
uint8 g = src_raw[1];
uint8 b = src_raw[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
}
void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565,
uint8* dst_argb,
int width) {
int x;
uint32 tmp_mask = 0xff;
uint32 tmp_t1, tmp_t2, tmp_t3;
for (x = 0; x < width; ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lhu %[tmp_t1], 0(%[src_rgb565]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_rgb565], %[src_rgb565], 2 \n"
"sll %[tmp_t2], %[tmp_t1], 8 \n"
"ins %[tmp_t2], %[tmp_mask], 24,8 \n"
"ins %[tmp_t2], %[tmp_t1], 3, 16 \n"
"ins %[tmp_t2], %[tmp_t1], 5, 11 \n"
"srl %[tmp_t3], %[tmp_t1], 9 \n"
"ins %[tmp_t2], %[tmp_t3], 8, 2 \n"
"ins %[tmp_t2], %[tmp_t1], 3, 5 \n"
"srl %[tmp_t3], %[tmp_t1], 2 \n"
"ins %[tmp_t2], %[tmp_t3], 0, 3 \n"
"sw %[tmp_t2], -4(%[dst_argb]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [src_rgb565] "+r"(src_rgb565),
[dst_argb] "+r"(dst_argb)
: [tmp_mask] "r"(tmp_mask));
}
}
void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width) {
int x;
uint32 tmp_t1, tmp_t2, tmp_t3;
for (x = 0; x < width; ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lh %[tmp_t1], 0(%[src_argb1555]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_argb1555], %[src_argb1555], 2 \n"
"sll %[tmp_t2], %[tmp_t1], 9 \n"
"ins %[tmp_t2], %[tmp_t1], 4, 15 \n"
"ins %[tmp_t2], %[tmp_t1], 6, 10 \n"
"srl %[tmp_t3], %[tmp_t1], 7 \n"
"ins %[tmp_t2], %[tmp_t3], 8, 3 \n"
"ins %[tmp_t2], %[tmp_t1], 3, 5 \n"
"srl %[tmp_t3], %[tmp_t1], 2 \n"
"ins %[tmp_t2], %[tmp_t3], 0, 3 \n"
"sw %[tmp_t2], -4(%[dst_argb]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [src_argb1555] "+r"(src_argb1555),
[dst_argb] "+r"(dst_argb)
:);
}
}
void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width) {
int x;
uint32 tmp_t1;
for (x = 0; x < width; ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lh %[tmp_t1], 0(%[src_argb4444]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_argb4444], %[src_argb4444], 2 \n"
"ins %[tmp_t1], %[tmp_t1], 16, 16 \n"
"ins %[tmp_t1], %[tmp_t1], 12, 16 \n"
"ins %[tmp_t1], %[tmp_t1], 8, 12 \n"
"ins %[tmp_t1], %[tmp_t1], 4, 8 \n"
"sw %[tmp_t1], -4(%[dst_argb]) \n"
".set pop \n"
: [src_argb4444] "+r"(src_argb4444), [dst_argb] "+r"(dst_argb),
[tmp_t1] "=&r"(tmp_t1));
}
}
void I444ToARGBRow_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_mask = 0x7fff7fff;
uint32 tmp_yg;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[y_buf]) \n"
"lbu %[tmp_t1], 1(%[y_buf]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lh %[tmp_t2], 0(%[u_buf]) \n"
"lh %[tmp_t3], 0(%[v_buf]) \n"
"preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"sw %[tmp_t8], 0(%[rgb_buf]) \n"
"sw %[tmp_t7], 4(%[rgb_buf]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [y_buf] "r"(y_buf), [yg] "r"(yg), [u_buf] "r"(u_buf),
[v_buf] "r"(v_buf), [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug),
[tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb),
[tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg),
[rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask));
y_buf += 2;
u_buf += 2;
v_buf += 2;
rgb_buf += 8; // Advance 1 pixel.
}
}
void I422ToARGB4444Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_yg;
uint32 tmp_mask = 0x7fff7fff;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_u]) \n"
"lbu %[tmp_t3], 0(%[src_v]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"shrl.qb %[tmp_t1], %[tmp_t8], 4 \n"
"shrl.qb %[tmp_t2], %[tmp_t7], 4 \n"
"shrl.ph %[tmp_t8], %[tmp_t1], 4 \n"
"shrl.ph %[tmp_t7], %[tmp_t2], 4 \n"
"or %[tmp_t8], %[tmp_t8], %[tmp_t1] \n"
"or %[tmp_t7], %[tmp_t7], %[tmp_t2] \n"
"precr.qb.ph %[tmp_t8], %[tmp_t7], %[tmp_t8] \n"
"sw %[tmp_t8], 0(%[dst_argb4444]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [dst_argb4444] "r"(dst_argb4444), [yg] "r"(yg), [src_u] "r"(src_u),
[src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub),
[tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr),
[tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br),
[tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask));
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb4444 += 4; // Advance 2 pixels.
}
}
void I422ToARGB1555Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_yg;
uint32 tmp_mask = 0x80008000;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_u]) \n"
"lbu %[tmp_t3], 0(%[src_v]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"ins %[tmp_t3], %[tmp_t8], 7, 24 \n"
"ins %[tmp_t3], %[tmp_t8], 10, 16 \n"
"ins %[tmp_t3], %[tmp_t8], 13, 8 \n"
"ins %[tmp_t4], %[tmp_t7], 7, 24 \n"
"ins %[tmp_t4], %[tmp_t7], 10, 16 \n"
"ins %[tmp_t4], %[tmp_t7], 13, 8 \n"
"precrq.ph.w %[tmp_t8], %[tmp_t4], %[tmp_t3] \n"
"or %[tmp_t8], %[tmp_t8], %[tmp_mask]\n"
"sw %[tmp_t8], 0(%[dst_argb1555]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [dst_argb1555] "r"(dst_argb1555), [yg] "r"(yg), [src_u] "r"(src_u),
[src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub),
[tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr),
[tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br),
[tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask));
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb1555 += 4; // Advance 2 pixels.
}
}
void NV12ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_mask = 0x7fff7fff;
uint32 tmp_yg;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_uv]) \n"
"lbu %[tmp_t3], 1(%[src_uv]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"sw %[tmp_t8], 0(%[rgb_buf]) \n"
"sw %[tmp_t7], 4(%[rgb_buf]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [src_y] "r"(src_y), [src_uv] "r"(src_uv), [yg] "r"(yg),
[tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg),
[tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg),
[tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), [rgb_buf] "r"(rgb_buf),
[tmp_mask] "r"(tmp_mask));
src_y += 2;
src_uv += 2;
rgb_buf += 8; // Advance 2 pixels.
}
}
void BGRAToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffda0000;
int const2 = 0x0070ffb6;
int const3 = 0x00700000;
int const4 = 0xffeeffa2;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_rgb0]) \n"
"lw %[tmp_t2], 4(%[src_rgb0]) \n"
"lw %[tmp_t3], 0(%[src_rgb1]) \n"
"lw %[tmp_t4], 4(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
void BGRAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00420000;
int const2 = 0x00190081;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void ABGRToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffb6ffda;
int const2 = 0x00000070;
int const3 = 0xffa20070;
int const4 = 0x0000ffee;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_rgb0]) \n"
"lw %[tmp_t2], 4(%[src_rgb0]) \n"
"lw %[tmp_t3], 0(%[src_rgb1]) \n"
"lw %[tmp_t4], 4(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
void ARGBToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00810019;
int const2 = 0x00000042;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void ABGRToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00810042;
int const2 = 0x00000019;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void RGBAToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffb60070;
int const2 = 0x0000ffda;
int const3 = 0xffa2ffee;
int const4 = 0x00000070;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"ulw %[tmp_t1], 0+1(%[src_rgb0]) \n"
"ulw %[tmp_t2], 4+1(%[src_rgb0]) \n"
"ulw %[tmp_t3], 0+1(%[src_rgb1]) \n"
"ulw %[tmp_t4], 4+1(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
void RGBAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00420081;
int const2 = 0x00190000;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbl %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbr %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void ARGBToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffb60070;
int const2 = 0x0000ffda;
int const3 = 0xffa2ffee;
int const4 = 0x00000070;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_rgb0]) \n"
"lw %[tmp_t2], 4(%[src_rgb0]) \n"
"lw %[tmp_t3], 0(%[src_rgb1]) \n"
"lw %[tmp_t4], 4(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
#endif // __mips_dsp_rev >= 2 #endif // __mips_dsp_rev >= 2
#endif // defined(__mips__) #endif // defined(__mips__)
......
...@@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width, ...@@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width,
} }
} }
#endif #endif
#if defined(HAS_SCALEADDROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ScaleAddRow = ScaleAddRow_Any_DSPR2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
......
...@@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) ...@@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA #ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif #endif
#ifdef HAS_SCALEADDROW_DSPR2
SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
#endif
#undef SAANY #undef SAANY
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr, ...@@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even. // TODO(fbarchard): Use odd pixels instead of even.
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0| "precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8| "precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16| "precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24| "precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
"addiu %[src_ptr], %[src_ptr], 32 \n" "addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n" "sw $t8, 0(%[dst]) \n"
...@@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr, ...@@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
" nop \n" " nop \n"
"21: \n" "21: \n"
"lbu $t0, 0(%[src_ptr]) \n" "lbu $t0, 1(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n" "addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n" "sb $t0, 0(%[dst]) \n"
...@@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ...@@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0| "precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16| "precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
"addiu %[src_ptr], %[src_ptr], 32 \n" "addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n" "sw $t1, 0(%[dst]) \n"
...@@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr, ...@@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
" nop \n" " nop \n"
"21: \n" "21: \n"
"lbu $t1, 0(%[src_ptr]) \n" "lbu $t1, 2(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n" "addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n" "addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n" "sb $t1, 0(%[dst]) \n"
...@@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ...@@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"); : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
} }
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
for (x = 0; x < ((src_width - 1)); x += 8) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t5], 0(%[src_ptr]) \n"
"lw %[tmp_t6], 4(%[src_ptr]) \n"
"lw %[tmp_t1], 0(%[dst_ptr]) \n"
"lw %[tmp_t2], 4(%[dst_ptr]) \n"
"lw %[tmp_t3], 8(%[dst_ptr]) \n"
"lw %[tmp_t4], 12(%[dst_ptr]) \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
"addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
"addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
"sw %[tmp_t1], 0(%[dst_ptr]) \n"
"sw %[tmp_t2], 4(%[dst_ptr]) \n"
"sw %[tmp_t3], 8(%[dst_ptr]) \n"
"sw %[tmp_t4], 12(%[dst_ptr]) \n"
".set pop \n"
:
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
[tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
: [dst_ptr] "r"(dst_ptr));
src_ptr += 8;
dst_ptr += 8;
}
if ((src_width)&7) {
for (x = 0; x < ((src_width - 1) & 7); x += 1) {
dst_ptr[0] += src_ptr[0];
src_ptr += 1;
dst_ptr += 1;
}
}
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) #endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -36,22 +36,28 @@ namespace libyuv { ...@@ -36,22 +36,28 @@ namespace libyuv {
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ src_u, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \ OFF); \
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ src_v, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \ OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_u_c, \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end( \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_u_opt, \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end( \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
...@@ -166,15 +172,19 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1) ...@@ -166,15 +172,19 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
align_buffer_page_end(src_uv, \ align_buffer_page_end(src_uv, \
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_u_c, \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end( \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_u_opt, \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end( \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
uint8* src_u = src_uv + OFF_U; \ uint8* src_u = src_uv + OFF_U; \
uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
...@@ -284,18 +294,22 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2) ...@@ -284,18 +294,22 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ src_u, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \ OFF); \
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ src_v, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \ OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_uv_c, \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_uv_opt, \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
...@@ -379,19 +393,24 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2) ...@@ -379,19 +393,24 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \ const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \ align_buffer_page_end(src_uv, \
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \ SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \ OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_u_c, \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end( \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end( \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ dst_u_opt, \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end( \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
...@@ -1369,10 +1388,12 @@ TEST_F(LibYUVConvertTest, MJPGToI420) { ...@@ -1369,10 +1388,12 @@ TEST_F(LibYUVConvertTest, MJPGToI420) {
const int kSize = kImageSize + kOff; const int kSize = kImageSize + kOff;
align_buffer_page_end(orig_pixels, kSize); align_buffer_page_end(orig_pixels, kSize);
align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_); align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_);
align_buffer_page_end(dst_u_opt, SUBSAMPLE(benchmark_width_, 2) * align_buffer_page_end(
SUBSAMPLE(benchmark_height_, 2)); dst_u_opt,
align_buffer_page_end(dst_v_opt, SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
SUBSAMPLE(benchmark_height_, 2)); align_buffer_page_end(
dst_v_opt,
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
// EOI, SOI to make MJPG appear valid. // EOI, SOI to make MJPG appear valid.
memset(orig_pixels, 0, kSize); memset(orig_pixels, 0, kSize);
...@@ -1444,16 +1465,20 @@ TEST_F(LibYUVConvertTest, NV12Crop) { ...@@ -1444,16 +1465,20 @@ TEST_F(LibYUVConvertTest, NV12Crop) {
uint8* src_uv = src_y + kWidth * kHeight; uint8* src_uv = src_y + kWidth * kHeight;
align_buffer_page_end(dst_y, kDestWidth * kDestHeight); align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) * align_buffer_page_end(
SUBSAMPLE(kDestHeight, SUBSAMP_Y)); dst_u,
align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
SUBSAMPLE(kDestHeight, SUBSAMP_Y)); align_buffer_page_end(
dst_v,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight); align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * align_buffer_page_end(
SUBSAMPLE(kDestHeight, SUBSAMP_Y)); dst_u_2,
align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
SUBSAMPLE(kDestHeight, SUBSAMP_Y)); align_buffer_page_end(
dst_v_2,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
for (int i = 0; i < kHeight * kWidth; ++i) { for (int i = 0; i < kHeight * kWidth; ++i) {
src_y[i] = (fastrand() & 0xff); src_y[i] = (fastrand() & 0xff);
......
...@@ -356,16 +356,18 @@ int main(int argc, const char* argv[]) { ...@@ -356,16 +356,18 @@ int main(int argc, const char* argv[]) {
const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2); const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2);
const size_t total_size = y_size + 2 * uv_size; // NOLINT const size_t total_size = y_size + 2 * uv_size; // NOLINT
#if defined(_MSC_VER) #if defined(_MSC_VER)
_fseeki64(file_org, static_cast<__int64>(num_skip_org) * _fseeki64(
static_cast<__int64>(total_size), file_org,
static_cast<__int64>(num_skip_org) * static_cast<__int64>(total_size),
SEEK_SET); SEEK_SET);
#else #else
fseek(file_org, num_skip_org * total_size, SEEK_SET); fseek(file_org, num_skip_org * total_size, SEEK_SET);
#endif #endif
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
#if defined(_MSC_VER) #if defined(_MSC_VER)
_fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) * _fseeki64(
static_cast<__int64>(total_size), file_rec[cur_rec],
static_cast<__int64>(num_skip_rec) * static_cast<__int64>(total_size),
SEEK_SET); SEEK_SET);
#else #else
fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET); fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment