Commit 000d2fa9 authored by Frank Barchard's avatar Frank Barchard

Libyuv MIPS DSPR2 optimizations.

Optimized functions:

I444ToARGBRow_DSPR2
I422ToARGB4444Row_DSPR2
I422ToARGB1555Row_DSPR2
NV12ToARGBRow_DSPR2
BGRAToUVRow_DSPR2
BGRAToYRow_DSPR2
ABGRToUVRow_DSPR2
ARGBToYRow_DSPR2
ABGRToYRow_DSPR2
RGBAToUVRow_DSPR2
RGBAToYRow_DSPR2
ARGBToUVRow_DSPR2
RGB24ToARGBRow_DSPR2
RAWToARGBRow_DSPR2
RGB565ToARGBRow_DSPR2
ARGB1555ToARGBRow_DSPR2
ARGB4444ToARGBRow_DSPR2
ScaleAddRow_DSPR2

Bug-fixes in functions:

ScaleRowDown2_DSPR2
ScaleRowDown4_DSPR2

BUG=

Review-Url: https://codereview.chromium.org/2626123003 .
parent 288bfbef
......@@ -364,6 +364,23 @@ extern "C" {
#define HAS_MIRRORROW_DSPR2
#define HAS_MIRRORUVROW_DSPR2
#define HAS_SPLITUVROW_DSPR2
#define HAS_RGB24TOARGBROW_DSPR2
#define HAS_RAWTOARGBROW_DSPR2
#define HAS_RGB565TOARGBROW_DSPR2
#define HAS_ARGB1555TOARGBROW_DSPR2
#define HAS_ARGB4444TOARGBROW_DSPR2
#define HAS_I444TOARGBROW_DSPR2
#define HAS_I422TOARGB4444ROW_DSPR2
#define HAS_I422TOARGB1555ROW_DSPR2
#define HAS_NV12TOARGBROW_DSPR2
#define HAS_BGRATOUVROW_DSPR2
#define HAS_BGRATOYROW_DSPR2
#define HAS_ABGRTOUVROW_DSPR2
#define HAS_ARGBTOYROW_DSPR2
#define HAS_ABGRTOYROW_DSPR2
#define HAS_RGBATOUVROW_DSPR2
#define HAS_RGBATOYROW_DSPR2
#define HAS_ARGBTOUVROW_DSPR2
#endif
#endif
......@@ -660,6 +677,30 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MSA(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......@@ -789,6 +830,30 @@ void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
void BGRAToUVRow_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToYRow_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
void ABGRToUVRow_DSPR2(const uint8* src_abgr,
int src_stride_abgr,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToYRow_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
void ABGRToYRow_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToUVRow_DSPR2(const uint8* src_rgba,
int src_stride_rgba,
uint8* dst_u,
uint8* dst_v,
int width);
void RGBAToYRow_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGBToUVRow_DSPR2(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
......@@ -817,6 +882,10 @@ void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555,
uint8* dst_y,
int width);
void BGRAToYRow_Any_DSPR2(const uint8* src_bgra, uint8* dst_y, int width);
void ARGBToYRow_Any_DSPR2(const uint8* src_argb, uint8* dst_y, int width);
void ABGRToYRow_Any_DSPR2(const uint8* src_abgr, uint8* dst_y, int width);
void RGBAToYRow_Any_DSPR2(const uint8* src_rgba, uint8* dst_y, int width);
void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_y,
int width);
......@@ -955,6 +1024,36 @@ void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_u,
uint8* dst_v,
int width);
void BGRAToUVRow_Any_DSPR2(const uint8* src_bgra,
int src_stride_bgra,
uint8* dst_u,
uint8* dst_v,
int width);
void ABGRToUVRow_Any_DSPR2(const uint8* src_abgr,
int src_stride_abgr,
uint8* dst_u,
uint8* dst_v,
int width);
void RGBAToUVRow_Any_DSPR2(const uint8* src_rgba,
int src_stride_rgba,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_Any_DSPR2(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVJRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width);
void ARGBToUVRow_C(const uint8* src_argb,
int src_stride_argb,
uint8* dst_u,
......@@ -1251,6 +1350,15 @@ void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555,
void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width);
void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565, uint8* dst_argb, int width);
void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
uint8* dst_argb,
int width);
......@@ -1299,6 +1407,20 @@ void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555,
void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void RGB24ToARGBRow_Any_DSPR2(const uint8* src_rgb24,
uint8* dst_argb,
int width);
void RAWToARGBRow_Any_DSPR2(const uint8* src_raw, uint8* dst_argb, int width);
void RGB565ToARGBRow_Any_DSPR2(const uint8* src_rgb565,
uint8* dst_argb,
int width);
void ARGB1555ToARGBRow_Any_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width);
void ARGB4444ToARGBRow_Any_MSA(const uint8* src_argb4444,
uint8* dst_argb,
int width);
......@@ -2042,12 +2164,47 @@ void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
......
......@@ -101,6 +101,7 @@ extern "C" {
#define HAS_SCALEROWDOWN4_DSPR2
#define HAS_SCALEROWDOWN34_DSPR2
#define HAS_SCALEROWDOWN38_DSPR2
#define HAS_SCALEADDROW_DSPR2
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
......@@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
uint16* dst_ptr,
int src_width);
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
......
......@@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......@@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVRow = ARGBToUVRow_Any_MSA;
......@@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra,
}
}
#endif
#if defined(HAS_BGRATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToYRow = BGRAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_BGRATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
......@@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToYRow = ABGRToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
......@@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba,
}
}
#endif
#if defined(HAS_RGBATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToYRow = RGBAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_RGBATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
......@@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw,
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
{
// Allocate 2 rows of ARGB.
......@@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565,
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
{
// Allocate 2 rows of ARGB.
......
......@@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
......@@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
......@@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw,
}
}
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
......@@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width);
......@@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
}
}
#endif
#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
......@@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
......@@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
......@@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
......
......@@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
......@@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
......
......@@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......@@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......@@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
{
// Allocate a rows of uv.
......@@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
{
// Allocate a rows of uv.
......@@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
......@@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
......@@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......
......@@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_DSPR2
ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_MSA
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
......@@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_DSPR2
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
......@@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_RGB24TOARGBROW_DSPR2
ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_DSPR2
ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_DSPR2
ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_DSPR2
ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_DSPR2
ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_BGRATOYROW_DSPR2
ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYROW_DSPR2
ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_DSPR2
ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_DSPR2
ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif
......@@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif
#ifdef HAS_BGRATOUVROW_DSPR2
ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_DSPR2
ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_DSPR2
ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_DSPR2
ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif
......
......@@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
uint8 b1 = src_argb[4] >> 3;
uint8 g1 = src_argb[5] >> 2;
uint8 r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
WRITEWORD(
dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
......@@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
WRITEWORD(
dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
......
......@@ -585,126 +585,89 @@ void MirrorUVRow_DSPR2(const uint8* src_uv,
: "t0", "t1", "t2", "t3", "t4", "t5", "t7", "t8", "t9");
}
// Convert (4 Y and 2 VU) I422 and arrange RGB values into
// t5 = | 0 | B0 | 0 | b0 |
// t4 = | 0 | B1 | 0 | b1 |
// t9 = | 0 | G0 | 0 | g0 |
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
"preceu.ph.qbr $t1, $t1 \n" \
"preceu.ph.qbr $t2, $t2 \n" \
"preceu.ph.qbra $t3, $t0 \n" \
"preceu.ph.qbla $t0, $t0 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t3, $t3, $s4 \n" \
"subu.ph $t0, $t0, $s4 \n" \
"mul.ph $t3, $t3, $s0 \n" \
"mul.ph $t0, $t0, $s0 \n" \
"shll.ph $t4, $t1, 0x7 \n" \
"subu.ph $t4, $t4, $t1 \n" \
"mul.ph $t6, $t1, $s1 \n" \
"mul.ph $t1, $t2, $s2 \n" \
"addq_s.ph $t5, $t4, $t3 \n" \
"addq_s.ph $t4, $t4, $t0 \n" \
"shra.ph $t5, $t5, 6 \n" \
"shra.ph $t4, $t4, 6 \n" \
"addiu %[u_buf], 2 \n" \
"addiu %[v_buf], 2 \n" \
"addu.ph $t6, $t6, $t1 \n" \
"mul.ph $t1, $t2, $s3 \n" \
"addu.ph $t9, $t6, $t3 \n" \
"addu.ph $t8, $t6, $t0 \n" \
"shra.ph $t9, $t9, 6 \n" \
"shra.ph $t8, $t8, 6 \n" \
"addu.ph $t2, $t1, $t3 \n" \
"addu.ph $t1, $t1, $t0 \n" \
"shra.ph $t2, $t2, 6 \n" \
"shra.ph $t1, $t1, 6 \n" \
"subu.ph $t5, $t5, $s5 \n" \
"subu.ph $t4, $t4, $s5 \n" \
"subu.ph $t9, $t9, $s5 \n" \
"subu.ph $t8, $t8, $s5 \n" \
"subu.ph $t2, $t2, $s5 \n" \
"subu.ph $t1, $t1, $s5 \n" \
"shll_s.ph $t5, $t5, 8 \n" \
"shll_s.ph $t4, $t4, 8 \n" \
"shll_s.ph $t9, $t9, 8 \n" \
"shll_s.ph $t8, $t8, 8 \n" \
"shll_s.ph $t2, $t2, 8 \n" \
"shll_s.ph $t1, $t1, 8 \n" \
"shra.ph $t5, $t5, 8 \n" \
"shra.ph $t4, $t4, 8 \n" \
"shra.ph $t9, $t9, 8 \n" \
"shra.ph $t8, $t8, 8 \n" \
"shra.ph $t2, $t2, 8 \n" \
"shra.ph $t1, $t1, 8 \n" \
"addu.ph $t5, $t5, $s5 \n" \
"addu.ph $t4, $t4, $s5 \n" \
"addu.ph $t9, $t9, $s5 \n" \
"addu.ph $t8, $t8, $s5 \n" \
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void I422ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_yg;
uint32 tmp_mask = 0x7fff7fff;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
"repl.ph $s4, 16 \n" // |0|16|0|16|
"repl.ph $s5, 128 \n" // |128|128| // clipping
"lui $s6, 0xff00 \n"
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
"1: \n" YUVTORGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
"addiu %[width], -4 \n"
"precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
"precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
"addiu %[y_buf], 4 \n"
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
"or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
"or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
"sll $t9, $t9, 16 \n"
"sll $t8, $t8, 16 \n"
"packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
"packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
// Store results.
"sw $t2, 0(%[rgb_buf]) \n"
"sw $t0, 4(%[rgb_buf]) \n"
"sw $t1, 8(%[rgb_buf]) \n"
"sw $t3, 12(%[rgb_buf]) \n"
"bnez %[width], 1b \n"
" addiu %[rgb_buf], 16 \n"
"2: \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_u]) \n"
"lbu %[tmp_t3], 0(%[src_v]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t9], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t9], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"sw %[tmp_t8], 0(%[rgb_buf]) \n"
"sw %[tmp_t7], 4(%[rgb_buf]) \n"
".set pop \n"
: [y_buf] "+r"(y_buf), [u_buf] "+r"(u_buf), [v_buf] "+r"(v_buf),
[width] "+r"(width), [rgb_buf] "+r"(rgb_buf)
:
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", "s0", "s1",
"s2", "s3", "s4", "s5", "s6");
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
[tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [yg] "r"(yg),
[tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb),
[tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg),
[rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask));
src_y += 2;
src_u += 1;
src_v += 1;
rgb_buf += 8; // Advance 4 pixels.
}
}
// Bilinear filter 8x2 -> 8x1
......@@ -740,10 +703,10 @@ void InterpolateRow_DSPR2(uint8* dst_ptr,
"addq.ph $t7, $t7, $t9 \n"
"addq.ph $t2, $t2, $t4 \n"
"addq.ph $t3, $t3, $t5 \n"
"shra.ph $t6, $t6, 8 \n"
"shra.ph $t7, $t7, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"shra.ph $t3, $t3, 8 \n"
"shra_r.ph $t6, $t6, 8 \n"
"shra_r.ph $t7, $t7, 8 \n"
"shra_r.ph $t2, $t2, 8 \n"
"shra_r.ph $t3, $t3, 8 \n"
"precr.qb.ph $t6, $t6, $t7 \n"
"precr.qb.ph $t2, $t2, $t3 \n"
"addiu %[src_ptr], %[src_ptr], 8 \n"
......@@ -761,6 +724,993 @@ void InterpolateRow_DSPR2(uint8* dst_ptr,
[y0_fraction] "r"(y0_fraction), [src_stride] "r"(src_stride)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9");
}
#include <stdio.h>
void RGB24ToARGBRow_DSPR2(const uint8* src_rgb24, uint8* dst_argb, int width) {
int x;
uint32 tmp_mask = 0xff;
uint32 tmp_t1;
for (x = 0; x < (width - 1); ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"ulw %[tmp_t1], 0(%[src_rgb24]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_rgb24], %[src_rgb24], 3 \n"
"ins %[tmp_t1], %[tmp_mask], 24, 8 \n"
"sw %[tmp_t1], -4(%[dst_argb]) \n"
".set pop \n"
: [src_rgb24] "+r"(src_rgb24), [dst_argb] "+r"(dst_argb),
[tmp_t1] "=&r"(tmp_t1)
: [tmp_mask] "r"(tmp_mask)
: "memory");
}
uint8 b = src_rgb24[0];
uint8 g = src_rgb24[1];
uint8 r = src_rgb24[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
}
void RAWToARGBRow_DSPR2(const uint8* src_raw, uint8* dst_argb, int width) {
int x;
uint32 tmp_mask = 0xff;
uint32 tmp_t1, tmp_t2;
for (x = 0; x < (width - 1); ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"ulw %[tmp_t1], 0(%[src_raw]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_raw], %[src_raw], 3 \n"
"srl %[tmp_t2], %[tmp_t1], 16 \n"
"ins %[tmp_t1], %[tmp_mask], 24, 8 \n"
"ins %[tmp_t1], %[tmp_t1], 16, 8 \n"
"ins %[tmp_t1], %[tmp_t2], 0, 8 \n"
"sw %[tmp_t1], -4(%[dst_argb]) \n"
".set pop \n"
: [src_raw] "+r"(src_raw), [dst_argb] "+r"(dst_argb),
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2)
: [tmp_mask] "r"(tmp_mask)
: "memory");
}
uint8 r = src_raw[0];
uint8 g = src_raw[1];
uint8 b = src_raw[2];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = 255u;
}
void RGB565ToARGBRow_DSPR2(const uint8* src_rgb565,
uint8* dst_argb,
int width) {
int x;
uint32 tmp_mask = 0xff;
uint32 tmp_t1, tmp_t2, tmp_t3;
for (x = 0; x < width; ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lhu %[tmp_t1], 0(%[src_rgb565]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_rgb565], %[src_rgb565], 2 \n"
"sll %[tmp_t2], %[tmp_t1], 8 \n"
"ins %[tmp_t2], %[tmp_mask], 24,8 \n"
"ins %[tmp_t2], %[tmp_t1], 3, 16 \n"
"ins %[tmp_t2], %[tmp_t1], 5, 11 \n"
"srl %[tmp_t3], %[tmp_t1], 9 \n"
"ins %[tmp_t2], %[tmp_t3], 8, 2 \n"
"ins %[tmp_t2], %[tmp_t1], 3, 5 \n"
"srl %[tmp_t3], %[tmp_t1], 2 \n"
"ins %[tmp_t2], %[tmp_t3], 0, 3 \n"
"sw %[tmp_t2], -4(%[dst_argb]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [src_rgb565] "+r"(src_rgb565),
[dst_argb] "+r"(dst_argb)
: [tmp_mask] "r"(tmp_mask));
}
}
void ARGB1555ToARGBRow_DSPR2(const uint8* src_argb1555,
uint8* dst_argb,
int width) {
int x;
uint32 tmp_t1, tmp_t2, tmp_t3;
for (x = 0; x < width; ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lh %[tmp_t1], 0(%[src_argb1555]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_argb1555], %[src_argb1555], 2 \n"
"sll %[tmp_t2], %[tmp_t1], 9 \n"
"ins %[tmp_t2], %[tmp_t1], 4, 15 \n"
"ins %[tmp_t2], %[tmp_t1], 6, 10 \n"
"srl %[tmp_t3], %[tmp_t1], 7 \n"
"ins %[tmp_t2], %[tmp_t3], 8, 3 \n"
"ins %[tmp_t2], %[tmp_t1], 3, 5 \n"
"srl %[tmp_t3], %[tmp_t1], 2 \n"
"ins %[tmp_t2], %[tmp_t3], 0, 3 \n"
"sw %[tmp_t2], -4(%[dst_argb]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [src_argb1555] "+r"(src_argb1555),
[dst_argb] "+r"(dst_argb)
:);
}
}
void ARGB4444ToARGBRow_DSPR2(const uint8* src_argb4444,
uint8* dst_argb,
int width) {
int x;
uint32 tmp_t1;
for (x = 0; x < width; ++x) {
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lh %[tmp_t1], 0(%[src_argb4444]) \n"
"addiu %[dst_argb], %[dst_argb], 4 \n"
"addiu %[src_argb4444], %[src_argb4444], 2 \n"
"ins %[tmp_t1], %[tmp_t1], 16, 16 \n"
"ins %[tmp_t1], %[tmp_t1], 12, 16 \n"
"ins %[tmp_t1], %[tmp_t1], 8, 12 \n"
"ins %[tmp_t1], %[tmp_t1], 4, 8 \n"
"sw %[tmp_t1], -4(%[dst_argb]) \n"
".set pop \n"
: [src_argb4444] "+r"(src_argb4444), [dst_argb] "+r"(dst_argb),
[tmp_t1] "=&r"(tmp_t1));
}
}
void I444ToARGBRow_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_mask = 0x7fff7fff;
uint32 tmp_yg;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[y_buf]) \n"
"lbu %[tmp_t1], 1(%[y_buf]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lh %[tmp_t2], 0(%[u_buf]) \n"
"lh %[tmp_t3], 0(%[v_buf]) \n"
"preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"sw %[tmp_t8], 0(%[rgb_buf]) \n"
"sw %[tmp_t7], 4(%[rgb_buf]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [y_buf] "r"(y_buf), [yg] "r"(yg), [u_buf] "r"(u_buf),
[v_buf] "r"(v_buf), [tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug),
[tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb),
[tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg),
[rgb_buf] "r"(rgb_buf), [tmp_mask] "r"(tmp_mask));
y_buf += 2;
u_buf += 2;
v_buf += 2;
rgb_buf += 8; // Advance 1 pixel.
}
}
void I422ToARGB4444Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_yg;
uint32 tmp_mask = 0x7fff7fff;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_u]) \n"
"lbu %[tmp_t3], 0(%[src_v]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"shrl.qb %[tmp_t1], %[tmp_t8], 4 \n"
"shrl.qb %[tmp_t2], %[tmp_t7], 4 \n"
"shrl.ph %[tmp_t8], %[tmp_t1], 4 \n"
"shrl.ph %[tmp_t7], %[tmp_t2], 4 \n"
"or %[tmp_t8], %[tmp_t8], %[tmp_t1] \n"
"or %[tmp_t7], %[tmp_t7], %[tmp_t2] \n"
"precr.qb.ph %[tmp_t8], %[tmp_t7], %[tmp_t8] \n"
"sw %[tmp_t8], 0(%[dst_argb4444]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [dst_argb4444] "r"(dst_argb4444), [yg] "r"(yg), [src_u] "r"(src_u),
[src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub),
[tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr),
[tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br),
[tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask));
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb4444 += 4; // Advance 2 pixels.
}
}
void I422ToARGB1555Row_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_yg;
uint32 tmp_mask = 0x80008000;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_u]) \n"
"lbu %[tmp_t3], 0(%[src_v]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"ins %[tmp_t3], %[tmp_t8], 7, 24 \n"
"ins %[tmp_t3], %[tmp_t8], 10, 16 \n"
"ins %[tmp_t3], %[tmp_t8], 13, 8 \n"
"ins %[tmp_t4], %[tmp_t7], 7, 24 \n"
"ins %[tmp_t4], %[tmp_t7], 10, 16 \n"
"ins %[tmp_t4], %[tmp_t7], 13, 8 \n"
"precrq.ph.w %[tmp_t8], %[tmp_t4], %[tmp_t3] \n"
"or %[tmp_t8], %[tmp_t8], %[tmp_mask]\n"
"sw %[tmp_t8], 0(%[dst_argb1555]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [dst_argb1555] "r"(dst_argb1555), [yg] "r"(yg), [src_u] "r"(src_u),
[src_v] "r"(src_v), [src_y] "r"(src_y), [tmp_ub] "r"(tmp_ub),
[tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg), [tmp_vr] "r"(tmp_vr),
[tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg), [tmp_br] "r"(tmp_br),
[tmp_yg] "r"(tmp_yg), [tmp_mask] "r"(tmp_mask));
src_y += 2;
src_u += 1;
src_v += 1;
dst_argb1555 += 4; // Advance 2 pixels.
}
}
void NV12ToARGBRow_DSPR2(const uint8* src_y,
const uint8* src_uv,
uint8* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
int x;
uint32 tmp_ub = yuvconstants->kUVToB[0];
uint32 tmp_ug = yuvconstants->kUVToG[0];
uint32 tmp_vg = yuvconstants->kUVToG[1];
uint32 tmp_vr = yuvconstants->kUVToR[1];
uint32 tmp_bb = yuvconstants->kUVBiasB[0];
uint32 tmp_bg = yuvconstants->kUVBiasG[0];
uint32 tmp_br = yuvconstants->kUVBiasR[0];
uint32 yg = yuvconstants->kYToRgb[0];
uint32 tmp_mask = 0x7fff7fff;
uint32 tmp_yg;
tmp_bb = ((uint)(tmp_bb & 0xffff) << 16) | (tmp_bb & 0xffff);
tmp_bg = ((uint)(tmp_bg & 0xffff) << 16) | (tmp_bg & 0xffff);
tmp_br = ((uint)(tmp_br & 0xffff) << 16) | (tmp_br & 0xffff);
tmp_yg = ((uint)(yg & 0xffff) << 16) | (yg & 0xffff);
tmp_ub = ~(((uint)(tmp_ub & 0xffff) << 16) | (tmp_ub & 0xffff)) + 0x00010001;
tmp_ug = ((uint)(tmp_ug & 0xffff) << 16) | (tmp_ug & 0xffff);
tmp_vg = ((uint)(tmp_vg & 0xffff) << 16) | (tmp_vg & 0xffff);
tmp_vr = ~(((uint)(tmp_vr & 0xffff) << 16) | (tmp_vr & 0xffff)) + 0x00010001;
yg = yg * 0x0101;
for (x = 0; x < width - 1; x += 2) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
uint32 tmp_t6, tmp_t7, tmp_t8, tmp_t9;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lbu %[tmp_t7], 0(%[src_y]) \n"
"lbu %[tmp_t1], 1(%[src_y]) \n"
"mul %[tmp_t7], %[tmp_t7], %[yg] \n"
"mul %[tmp_t1], %[tmp_t1], %[yg] \n"
"lbu %[tmp_t2], 0(%[src_uv]) \n"
"lbu %[tmp_t3], 1(%[src_uv]) \n"
"replv.ph %[tmp_t2], %[tmp_t2] \n"
"replv.ph %[tmp_t3], %[tmp_t3] \n"
"mul.ph %[tmp_t4], %[tmp_t2], %[tmp_ub] \n"
"mul.ph %[tmp_t5], %[tmp_t2], %[tmp_ug] \n"
"mul.ph %[tmp_t6], %[tmp_t3], %[tmp_vr] \n"
"mul.ph %[tmp_t3], %[tmp_t3], %[tmp_vg] \n"
"srl %[tmp_t7], %[tmp_t7], 16 \n"
"ins %[tmp_t1], %[tmp_t7], 0, 16 \n"
"addq_s.ph %[tmp_t7], %[tmp_t1], %[tmp_bb] \n"
"addq_s.ph %[tmp_t8], %[tmp_t1], %[tmp_bg] \n"
"addq_s.ph %[tmp_t9], %[tmp_t1], %[tmp_br] \n"
"addq_s.ph %[tmp_t5], %[tmp_t5], %[tmp_t3] \n"
"addq_s.ph %[tmp_t7], %[tmp_t7], %[tmp_t4] \n"
"subq_s.ph %[tmp_t8], %[tmp_t8], %[tmp_t5] \n"
"addq_s.ph %[tmp_t9], %[tmp_t9], %[tmp_t6] \n"
"shra.ph %[tmp_t7], %[tmp_t7], 6 \n"
"shra.ph %[tmp_t8], %[tmp_t8], 6 \n"
"shra.ph %[tmp_t9], %[tmp_t9], 6 \n"
"shll_s.ph %[tmp_t7], %[tmp_t7], 7 \n"
"shll_s.ph %[tmp_t8], %[tmp_t8], 7 \n"
"shll_s.ph %[tmp_t9], %[tmp_t9], 7 \n"
"precrqu_s.qb.ph %[tmp_t8], %[tmp_mask], %[tmp_t8] \n"
"precrqu_s.qb.ph %[tmp_t7], %[tmp_t9], %[tmp_t7] \n"
"precrq.ph.w %[tmp_t2], %[tmp_t8], %[tmp_t7] \n"
"ins %[tmp_t7], %[tmp_t8], 16, 16 \n"
"precr.qb.ph %[tmp_t8], %[tmp_t2], %[tmp_t7] \n"
"precrq.qb.ph %[tmp_t7], %[tmp_t2], %[tmp_t7] \n"
"sw %[tmp_t8], 0(%[rgb_buf]) \n"
"sw %[tmp_t7], 4(%[rgb_buf]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [tmp_t9] "=&r"(tmp_t9)
: [src_y] "r"(src_y), [src_uv] "r"(src_uv), [yg] "r"(yg),
[tmp_ub] "r"(tmp_ub), [tmp_ug] "r"(tmp_ug), [tmp_vg] "r"(tmp_vg),
[tmp_vr] "r"(tmp_vr), [tmp_bb] "r"(tmp_bb), [tmp_bg] "r"(tmp_bg),
[tmp_br] "r"(tmp_br), [tmp_yg] "r"(tmp_yg), [rgb_buf] "r"(rgb_buf),
[tmp_mask] "r"(tmp_mask));
src_y += 2;
src_uv += 2;
rgb_buf += 8; // Advance 2 pixels.
}
}
void BGRAToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffda0000;
int const2 = 0x0070ffb6;
int const3 = 0x00700000;
int const4 = 0xffeeffa2;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_rgb0]) \n"
"lw %[tmp_t2], 4(%[src_rgb0]) \n"
"lw %[tmp_t3], 0(%[src_rgb1]) \n"
"lw %[tmp_t4], 4(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
void BGRAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00420000;
int const2 = 0x00190081;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void ABGRToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffb6ffda;
int const2 = 0x00000070;
int const3 = 0xffa20070;
int const4 = 0x0000ffee;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_rgb0]) \n"
"lw %[tmp_t2], 4(%[src_rgb0]) \n"
"lw %[tmp_t3], 0(%[src_rgb1]) \n"
"lw %[tmp_t4], 4(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
void ARGBToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00810019;
int const2 = 0x00000042;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void ABGRToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00810042;
int const2 = 0x00000019;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void RGBAToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffb60070;
int const2 = 0x0000ffda;
int const3 = 0xffa2ffee;
int const4 = 0x00000070;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"ulw %[tmp_t1], 0+1(%[src_rgb0]) \n"
"ulw %[tmp_t2], 4+1(%[src_rgb0]) \n"
"ulw %[tmp_t3], 0+1(%[src_rgb1]) \n"
"ulw %[tmp_t4], 4+1(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
void RGBAToYRow_DSPR2(const uint8* src_argb0, uint8* dst_y, int width) {
int x;
int const1 = 0x00420081;
int const2 = 0x00190000;
int const5 = 0x40;
for (x = 0; x < width; x += 4) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_argb0]) \n"
"lw %[tmp_t2], 4(%[src_argb0]) \n"
"lw %[tmp_t3], 8(%[src_argb0]) \n"
"lw %[tmp_t4], 12(%[src_argb0]) \n"
"preceu.ph.qbl %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbr %[tmp_t4], %[tmp_t4] \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"mult $ac2, %[const5], %[const5] \n"
"mult $ac3, %[const5], %[const5] \n"
"dpa.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpa.w.ph $ac1, %[tmp_t6], %[const1] \n"
"dpa.w.ph $ac2, %[tmp_t7], %[const1] \n"
"dpa.w.ph $ac3, %[tmp_t8], %[const1] \n"
"dpa.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpa.w.ph $ac1, %[tmp_t2], %[const2] \n"
"dpa.w.ph $ac2, %[tmp_t3], %[const2] \n"
"dpa.w.ph $ac3, %[tmp_t4], %[const2] \n"
"extr_r.w %[tmp_t1], $ac0, 8 \n"
"extr_r.w %[tmp_t2], $ac1, 8 \n"
"extr_r.w %[tmp_t3], $ac2, 8 \n"
"extr_r.w %[tmp_t4], $ac3, 8 \n"
"addiu %[dst_y], %[dst_y], 4 \n"
"addiu %[src_argb0],%[src_argb0], 16 \n"
"sb %[tmp_t1], -4(%[dst_y]) \n"
"sb %[tmp_t2], -3(%[dst_y]) \n"
"sb %[tmp_t3], -2(%[dst_y]) \n"
"sb %[tmp_t4], -1(%[dst_y]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_argb0] "+r"(src_argb0), [dst_y] "+r"(dst_y)
: [const1] "r"(const1), [const2] "r"(const2), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo",
"$ac3hi");
}
}
void ARGBToUVRow_DSPR2(const uint8* src_rgb0,
int src_stride_rgb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;
int x;
int const1 = 0xffb60070;
int const2 = 0x0000ffda;
int const3 = 0xffa2ffee;
int const4 = 0x00000070;
int const5 = 0x100;
for (x = 0; x < width - 1; x += 2) {
int tmp_t1, tmp_t2, tmp_t3, tmp_t4, tmp_t5;
int tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t1], 0(%[src_rgb0]) \n"
"lw %[tmp_t2], 4(%[src_rgb0]) \n"
"lw %[tmp_t3], 0(%[src_rgb1]) \n"
"lw %[tmp_t4], 4(%[src_rgb1]) \n"
"preceu.ph.qbr %[tmp_t5], %[tmp_t1] \n"
"preceu.ph.qbl %[tmp_t1], %[tmp_t1] \n"
"preceu.ph.qbr %[tmp_t6], %[tmp_t2] \n"
"preceu.ph.qbl %[tmp_t2], %[tmp_t2] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t3] \n"
"preceu.ph.qbl %[tmp_t3], %[tmp_t3] \n"
"preceu.ph.qbr %[tmp_t8], %[tmp_t4] \n"
"preceu.ph.qbl %[tmp_t4], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t6] \n"
"addu.ph %[tmp_t7], %[tmp_t7], %[tmp_t8] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t2] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t4] \n"
"addu.ph %[tmp_t5], %[tmp_t5], %[tmp_t7] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t3] \n"
"shrl.ph %[tmp_t5], %[tmp_t5], 2 \n"
"shrl.ph %[tmp_t1], %[tmp_t1], 2 \n"
"mult $ac0, %[const5], %[const5] \n"
"mult $ac1, %[const5], %[const5] \n"
"dpaq_s.w.ph $ac0, %[tmp_t5], %[const1] \n"
"dpaq_s.w.ph $ac1, %[tmp_t5], %[const3] \n"
"dpaq_s.w.ph $ac0, %[tmp_t1], %[const2] \n"
"dpaq_s.w.ph $ac1, %[tmp_t1], %[const4] \n"
"extr_r.w %[tmp_t7], $ac0, 9 \n"
"extr_r.w %[tmp_t8], $ac1, 9 \n"
"addiu %[src_rgb0], %[src_rgb0], 8 \n"
"addiu %[src_rgb1], %[src_rgb1], 8 \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"addiu %[dst_v], %[dst_v], 1 \n"
"sb %[tmp_t7], -1(%[dst_u]) \n"
"sb %[tmp_t8], -1(%[dst_v]) \n"
".set pop \n"
: [tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2),
[tmp_t3] "=&r"(tmp_t3), [tmp_t4] "=&r"(tmp_t4),
[tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8),
[src_rgb0] "+r"(src_rgb0), [src_rgb1] "+r"(src_rgb1),
[dst_u] "+r"(dst_u), [dst_v] "+r"(dst_v)
: [const1] "r"(const1), [const2] "r"(const2), [const3] "r"(const3),
[const4] "r"(const4), [const5] "r"(const5)
: "hi", "lo", "$ac1lo", "$ac1hi");
}
}
#endif // __mips_dsp_rev >= 2
#endif // defined(__mips__)
......
......@@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
#if defined(HAS_SCALEADDROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ScaleAddRow = ScaleAddRow_Any_DSPR2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
......
......@@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_DSPR2
SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
#endif
#undef SAANY
#ifdef __cplusplus
......
......@@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even.
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
"precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
"precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
"precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
"precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n"
......@@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
" nop \n"
"21: \n"
"lbu $t0, 0(%[src_ptr]) \n"
"lbu $t0, 1(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n"
......@@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
"precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
"precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n"
......@@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
" nop \n"
"21: \n"
"lbu $t1, 0(%[src_ptr]) \n"
"lbu $t1, 2(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n"
......@@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
}
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
for (x = 0; x < ((src_width - 1)); x += 8) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t5], 0(%[src_ptr]) \n"
"lw %[tmp_t6], 4(%[src_ptr]) \n"
"lw %[tmp_t1], 0(%[dst_ptr]) \n"
"lw %[tmp_t2], 4(%[dst_ptr]) \n"
"lw %[tmp_t3], 8(%[dst_ptr]) \n"
"lw %[tmp_t4], 12(%[dst_ptr]) \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
"addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
"addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
"sw %[tmp_t1], 0(%[dst_ptr]) \n"
"sw %[tmp_t2], 4(%[dst_ptr]) \n"
"sw %[tmp_t3], 8(%[dst_ptr]) \n"
"sw %[tmp_t4], 12(%[dst_ptr]) \n"
".set pop \n"
:
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
[tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
: [dst_ptr] "r"(dst_ptr));
src_ptr += 8;
dst_ptr += 8;
}
if ((src_width)&7) {
for (x = 0; x < ((src_width - 1) & 7); x += 1) {
dst_ptr[0] += src_ptr[0];
src_ptr += 1;
dst_ptr += 1;
}
}
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
......
......@@ -36,22 +36,28 @@ namespace libyuv {
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
align_buffer_page_end( \
src_u, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
align_buffer_page_end( \
src_v, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_u_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_u_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
......@@ -166,15 +172,19 @@ TESTPLANARTOP(I444, 1, 1, I444, 1, 1)
align_buffer_page_end(src_uv, \
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_u_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_u_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
uint8* src_u = src_uv + OFF_U; \
uint8* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
......@@ -284,18 +294,22 @@ TESTAPLANARTOP(Android420, NV21, 2, 1, 0, 2, 2, I420, 2, 2)
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
align_buffer_page_end( \
src_u, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(src_v, SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
align_buffer_page_end( \
src_v, \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_uv_c, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_uv_c, \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_uv_opt, SUBSAMPLE(kWidth * 2, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_uv_opt, \
SUBSAMPLE(kWidth * 2, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
......@@ -379,19 +393,24 @@ TESTPLANARTOBP(I420, 2, 2, NV21, 2, 2)
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = benchmark_height_; \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, 2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
align_buffer_page_end(src_uv, \
2 * SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * \
SUBSAMPLE(kHeight, SRC_SUBSAMP_Y) + \
OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_u_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_v_c, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_u_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end( \
dst_v_opt, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
......@@ -1369,10 +1388,12 @@ TEST_F(LibYUVConvertTest, MJPGToI420) {
const int kSize = kImageSize + kOff;
align_buffer_page_end(orig_pixels, kSize);
align_buffer_page_end(dst_y_opt, benchmark_width_ * benchmark_height_);
align_buffer_page_end(dst_u_opt, SUBSAMPLE(benchmark_width_, 2) *
SUBSAMPLE(benchmark_height_, 2));
align_buffer_page_end(dst_v_opt, SUBSAMPLE(benchmark_width_, 2) *
SUBSAMPLE(benchmark_height_, 2));
align_buffer_page_end(
dst_u_opt,
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
align_buffer_page_end(
dst_v_opt,
SUBSAMPLE(benchmark_width_, 2) * SUBSAMPLE(benchmark_height_, 2));
// EOI, SOI to make MJPG appear valid.
memset(orig_pixels, 0, kSize);
......@@ -1444,16 +1465,20 @@ TEST_F(LibYUVConvertTest, NV12Crop) {
uint8* src_uv = src_y + kWidth * kHeight;
align_buffer_page_end(dst_y, kDestWidth * kDestHeight);
align_buffer_page_end(dst_u, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(dst_v, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(
dst_u,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(
dst_v,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(dst_y_2, kDestWidth * kDestHeight);
align_buffer_page_end(dst_u_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(dst_v_2, SUBSAMPLE(kDestWidth, SUBSAMP_X) *
SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(
dst_u_2,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
align_buffer_page_end(
dst_v_2,
SUBSAMPLE(kDestWidth, SUBSAMP_X) * SUBSAMPLE(kDestHeight, SUBSAMP_Y));
for (int i = 0; i < kHeight * kWidth; ++i) {
src_y[i] = (fastrand() & 0xff);
......
......@@ -356,16 +356,18 @@ int main(int argc, const char* argv[]) {
const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2);
const size_t total_size = y_size + 2 * uv_size; // NOLINT
#if defined(_MSC_VER)
_fseeki64(file_org, static_cast<__int64>(num_skip_org) *
static_cast<__int64>(total_size),
_fseeki64(
file_org,
static_cast<__int64>(num_skip_org) * static_cast<__int64>(total_size),
SEEK_SET);
#else
fseek(file_org, num_skip_org * total_size, SEEK_SET);
#endif
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
#if defined(_MSC_VER)
_fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) *
static_cast<__int64>(total_size),
_fseeki64(
file_rec[cur_rec],
static_cast<__int64>(num_skip_rec) * static_cast<__int64>(total_size),
SEEK_SET);
#else
fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment