Commit 000d2fa9 authored by Frank Barchard's avatar Frank Barchard

Libyuv MIPS DSPR2 optimizations.

Optimized functions:

I444ToARGBRow_DSPR2
I422ToARGB4444Row_DSPR2
I422ToARGB1555Row_DSPR2
NV12ToARGBRow_DSPR2
BGRAToUVRow_DSPR2
BGRAToYRow_DSPR2
ABGRToUVRow_DSPR2
ARGBToYRow_DSPR2
ABGRToYRow_DSPR2
RGBAToUVRow_DSPR2
RGBAToYRow_DSPR2
ARGBToUVRow_DSPR2
RGB24ToARGBRow_DSPR2
RAWToARGBRow_DSPR2
RGB565ToARGBRow_DSPR2
ARGB1555ToARGBRow_DSPR2
ARGB4444ToARGBRow_DSPR2
ScaleAddRow_DSPR2

Bug-fixes in functions:

ScaleRowDown2_DSPR2
ScaleRowDown4_DSPR2

BUG=

Review-Url: https://codereview.chromium.org/2626123003 .
parent 288bfbef
This diff is collapsed.
......@@ -101,6 +101,7 @@ extern "C" {
#define HAS_SCALEROWDOWN4_DSPR2
#define HAS_SCALEROWDOWN34_DSPR2
#define HAS_SCALEROWDOWN38_DSPR2
#define HAS_SCALEADDROW_DSPR2
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
......@@ -846,6 +847,10 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width);
void ScaleAddRow_Any_DSPR2(const uint8* src_ptr,
uint16* dst_ptr,
int src_width);
void ScaleRowDown2_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
......@@ -885,9 +890,9 @@ void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
......
......@@ -579,6 +579,14 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......@@ -587,6 +595,14 @@ int ARGBToI420(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVRow = ARGBToUVRow_Any_MSA;
......@@ -664,6 +680,22 @@ int BGRAToI420(const uint8* src_bgra,
}
}
#endif
#if defined(HAS_BGRATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToYRow = BGRAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
BGRAToYRow = BGRAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_BGRATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
BGRAToUVRow = BGRAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width);
......@@ -733,6 +765,22 @@ int ABGRToI420(const uint8* src_abgr,
}
}
#endif
#if defined(HAS_ABGRTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToYRow = ABGRToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ABGRToYRow = ABGRToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ABGRTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ABGRToUVRow = ABGRToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width);
......@@ -802,6 +850,22 @@ int RGBAToI420(const uint8* src_rgba,
}
}
#endif
#if defined(HAS_RGBATOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToYRow = RGBAToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGBAToYRow = RGBAToYRow_DSPR2;
}
}
#endif
#if defined(HAS_RGBATOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGBAToUVRow = RGBAToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width);
......@@ -1014,6 +1078,14 @@ int RAWToI420(const uint8* src_raw,
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
{
// Allocate 2 rows of ARGB.
......@@ -1142,6 +1214,14 @@ int RGB565ToI420(const uint8* src_rgb565,
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
{
// Allocate 2 rows of ARGB.
......
......@@ -485,6 +485,14 @@ static int I444ToARGBMatrix(const uint8* src_y,
}
}
#endif
#if defined(HAS_I444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I444ToARGBRow = I444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
......@@ -946,6 +954,14 @@ int RGB24ToARGB(const uint8* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
......@@ -997,6 +1013,14 @@ int RAWToARGB(const uint8* src_raw,
}
}
#endif
#if defined(HAS_RAWTOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RAWToARGBRow = RAWToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
......@@ -1056,6 +1080,14 @@ int RGB565ToARGB(const uint8* src_rgb565,
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width);
......@@ -1115,6 +1147,14 @@ int ARGB1555ToARGB(const uint8* src_argb1555,
}
}
#endif
#if defined(HAS_ARGB1555TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
......@@ -1174,6 +1214,14 @@ int ARGB4444ToARGB(const uint8* src_argb4444,
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA;
......@@ -1238,6 +1286,14 @@ int NV12ToARGB(const uint8* src_y,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
......@@ -1354,6 +1410,14 @@ int M420ToARGB(const uint8* src_m420,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
NV12ToARGBRow = NV12ToARGBRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_DSPR2;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
......
......@@ -708,6 +708,14 @@ int I420ToARGB1555(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB1555Row = I422ToARGB1555Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA;
......@@ -781,6 +789,14 @@ int I420ToARGB4444(const uint8* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_DSPR2;
if (IS_ALIGNED(width, 4)) {
I422ToARGB4444Row = I422ToARGB4444Row_DSPR2;
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA;
......
......@@ -100,6 +100,14 @@ int ARGBToI444(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......@@ -189,6 +197,23 @@ int ARGBToI422(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......@@ -318,6 +343,22 @@ int ARGBToNV12(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
{
// Allocate a rows of uv.
......@@ -445,6 +486,22 @@ int ARGBToNV21(const uint8* src_argb,
MergeUVRow_ = MergeUVRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
{
// Allocate a rows of uv.
......@@ -570,6 +627,22 @@ int ARGBToYUY2(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MSA;
......@@ -698,6 +771,22 @@ int ARGBToUYVY(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToUVRow = ARGBToUVRow_Any_DSPR2;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_DSPR2;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MSA;
......@@ -775,6 +864,14 @@ int ARGBToI400(const uint8* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ARGBToYRow = ARGBToYRow_Any_DSPR2;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_DSPR2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToYRow = ARGBToYRow_Any_MSA;
......
......@@ -167,6 +167,12 @@ ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_DSPR2
ANY31C(I444ToARGBRow_Any_DSPR2, I444ToARGBRow_DSPR2, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_DSPR2, I422ToARGBRow_DSPR2, 1, 0, 4, 7)
ANY31C(I422ToARGB4444Row_Any_DSPR2, I422ToARGB4444Row_DSPR2, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_DSPR2, I422ToARGB1555Row_DSPR2, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_MSA
ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7)
ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7)
......@@ -291,6 +297,9 @@ ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
#ifdef HAS_NV12TOARGBROW_NEON
ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_DSPR2
ANY21C(NV12ToARGBRow_Any_DSPR2, NV12ToARGBRow_DSPR2, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
......@@ -484,6 +493,33 @@ ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7)
#ifdef HAS_ARGB4444TOARGBROW_NEON
ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7)
#endif
#ifdef HAS_RGB24TOARGBROW_DSPR2
ANY11(RGB24ToARGBRow_Any_DSPR2, RGB24ToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RAWTOARGBROW_DSPR2
ANY11(RAWToARGBRow_Any_DSPR2, RAWToARGBRow_DSPR2, 0, 3, 4, 7)
#endif
#ifdef HAS_RGB565TOARGBROW_DSPR2
ANY11(RGB565ToARGBRow_Any_DSPR2, RGB565ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB1555TOARGBROW_DSPR2
ANY11(ARGB1555ToARGBRow_Any_DSPR2, ARGB1555ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_DSPR2
ANY11(ARGB4444ToARGBRow_Any_DSPR2, ARGB4444ToARGBRow_DSPR2, 0, 2, 4, 7)
#endif
#ifdef HAS_BGRATOYROW_DSPR2
ANY11(BGRAToYRow_Any_DSPR2, BGRAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGBTOYROW_DSPR2
ANY11(ARGBToYRow_Any_DSPR2, ARGBToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ABGRTOYROW_DSPR2
ANY11(ABGRToYRow_Any_DSPR2, ABGRToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_RGBATOYROW_DSPR2
ANY11(RGBAToYRow_Any_DSPR2, RGBAToYRow_DSPR2, 0, 4, 1, 7)
#endif
#ifdef HAS_ARGB4444TOARGBROW_MSA
ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15)
#endif
......@@ -904,6 +940,18 @@ ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15)
#ifdef HAS_UYVYTOUVROW_NEON
ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#endif
#ifdef HAS_BGRATOUVROW_DSPR2
ANY12S(BGRAToUVRow_Any_DSPR2, BGRAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVROW_DSPR2
ANY12S(ABGRToUVRow_Any_DSPR2, ABGRToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_RGBATOUVROW_DSPR2
ANY12S(RGBAToUVRow_Any_DSPR2, RGBAToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_DSPR2
ANY12S(ARGBToUVRow_Any_DSPR2, ARGBToUVRow_DSPR2, 0, 4, 15)
#endif
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif
......
......@@ -202,8 +202,9 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
uint8 b1 = src_argb[4] >> 3;
uint8 g1 = src_argb[5] >> 2;
uint8 r1 = src_argb[6] >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
WRITEWORD(
dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
......@@ -237,8 +238,9 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb,
uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
(r1 << 27));
WRITEWORD(
dst_rgb,
b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27));
dst_rgb += 4;
src_argb += 8;
}
......
This diff is collapsed.
......@@ -2032,7 +2032,7 @@ __declspec(naked) void RGBAToUVRow_SSSE3(const uint8* src_argb0,
__asm vpsraw ymm2, ymm2, 6 \
__asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
__asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
__asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
}
// Store 16 ARGB values.
......
......@@ -894,6 +894,14 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
#if defined(HAS_SCALEADDROW_DSPR2)
if (TestCpuFlag(kCpuHasDSPR2)) {
ScaleAddRow = ScaleAddRow_Any_DSPR2;
if (IS_ALIGNED(src_width, 16)) {
ScaleAddRow = ScaleAddRow_DSPR2;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
......
......@@ -421,6 +421,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_DSPR2
SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15)
#endif
#undef SAANY
#ifdef __cplusplus
......
......@@ -42,10 +42,10 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
// TODO(fbarchard): Use odd pixels instead of even.
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
"precrq.qb.ph $t8, $t1, $t0 \n" // |7|5|3|1|
"precrq.qb.ph $t0, $t3, $t2 \n" // |15|13|11|9|
"precrq.qb.ph $t1, $t5, $t4 \n" // |23|21|19|17|
"precrq.qb.ph $t2, $t7, $t6 \n" // |31|29|27|25|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t8, 0(%[dst]) \n"
......@@ -61,7 +61,7 @@ void ScaleRowDown2_DSPR2(const uint8* src_ptr,
" nop \n"
"21: \n"
"lbu $t0, 0(%[src_ptr]) \n"
"lbu $t0, 1(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 2 \n"
"addiu $t9, $t9, -1 \n"
"sb $t0, 0(%[dst]) \n"
......@@ -198,8 +198,8 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
"precrq.qb.ph $t1, $t2, $t1 \n" // |14|10|6|2|
"precrq.qb.ph $t5, $t6, $t5 \n" // |30|26|22|18|
"addiu %[src_ptr], %[src_ptr], 32 \n"
"addiu $t9, $t9, -1 \n"
"sw $t1, 0(%[dst]) \n"
......@@ -213,7 +213,7 @@ void ScaleRowDown4_DSPR2(const uint8* src_ptr,
" nop \n"
"21: \n"
"lbu $t1, 0(%[src_ptr]) \n"
"lbu $t1, 2(%[src_ptr]) \n"
"addiu %[src_ptr], %[src_ptr], 4 \n"
"addiu $t9, $t9, -1 \n"
"sb $t1, 0(%[dst]) \n"
......@@ -615,6 +615,51 @@ void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8");
}
void ScaleAddRow_DSPR2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
int x;
for (x = 0; x < ((src_width - 1)); x += 8) {
uint32 tmp_t1, tmp_t2, tmp_t3, tmp_t4;
uint32 tmp_t5, tmp_t6, tmp_t7, tmp_t8;
__asm__ __volatile__(
".set push \n"
".set noreorder \n"
"lw %[tmp_t5], 0(%[src_ptr]) \n"
"lw %[tmp_t6], 4(%[src_ptr]) \n"
"lw %[tmp_t1], 0(%[dst_ptr]) \n"
"lw %[tmp_t2], 4(%[dst_ptr]) \n"
"lw %[tmp_t3], 8(%[dst_ptr]) \n"
"lw %[tmp_t4], 12(%[dst_ptr]) \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t5] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t5] \n"
"addu.ph %[tmp_t1], %[tmp_t1], %[tmp_t7] \n"
"addu.ph %[tmp_t2], %[tmp_t2], %[tmp_t8] \n"
"preceu.ph.qbr %[tmp_t7], %[tmp_t6] \n"
"preceu.ph.qbl %[tmp_t8], %[tmp_t6] \n"
"addu.ph %[tmp_t3], %[tmp_t3], %[tmp_t7] \n"
"addu.ph %[tmp_t4], %[tmp_t4], %[tmp_t8] \n"
"sw %[tmp_t1], 0(%[dst_ptr]) \n"
"sw %[tmp_t2], 4(%[dst_ptr]) \n"
"sw %[tmp_t3], 8(%[dst_ptr]) \n"
"sw %[tmp_t4], 12(%[dst_ptr]) \n"
".set pop \n"
:
[tmp_t1] "=&r"(tmp_t1), [tmp_t2] "=&r"(tmp_t2), [tmp_t3] "=&r"(tmp_t3),
[tmp_t4] "=&r"(tmp_t4), [tmp_t5] "=&r"(tmp_t5), [tmp_t6] "=&r"(tmp_t6),
[tmp_t7] "=&r"(tmp_t7), [tmp_t8] "=&r"(tmp_t8), [src_ptr] "+r"(src_ptr)
: [dst_ptr] "r"(dst_ptr));
src_ptr += 8;
dst_ptr += 8;
}
if ((src_width)&7) {
for (x = 0; x < ((src_width - 1) & 7); x += 1) {
dst_ptr[0] += src_ptr[0];
src_ptr += 1;
dst_ptr += 1;
}
}
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
......
This diff is collapsed.
......@@ -356,17 +356,19 @@ int main(int argc, const char* argv[]) {
const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2);
const size_t total_size = y_size + 2 * uv_size; // NOLINT
#if defined(_MSC_VER)
_fseeki64(file_org, static_cast<__int64>(num_skip_org) *
static_cast<__int64>(total_size),
SEEK_SET);
_fseeki64(
file_org,
static_cast<__int64>(num_skip_org) * static_cast<__int64>(total_size),
SEEK_SET);
#else
fseek(file_org, num_skip_org * total_size, SEEK_SET);
#endif
for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) {
#if defined(_MSC_VER)
_fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) *
static_cast<__int64>(total_size),
SEEK_SET);
_fseeki64(
file_rec[cur_rec],
static_cast<__int64>(num_skip_rec) * static_cast<__int64>(total_size),
SEEK_SET);
#else
fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET);
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment