Commit 8cd3e4f3 authored by Frank Barchard's avatar Frank Barchard

Add MSA optimized ScaleFilterCols, ScaleARGBCols, ScaleARGBFilterCols and ScaleRowDown34 functions

TBR=kjellander@chromium.org
R=fbarchard@google.com

Bug:libyuv:634
Change-Id: Ib139b9701fc67e24d27a6886377c0cb8b2773fda
Reviewed-on: https://chromium-review.googlesource.com/620791Reviewed-by: 's avatarFrank Barchard <fbarchard@google.com>
parent 78e44628
......@@ -114,9 +114,14 @@ extern "C" {
#define HAS_SCALEROWDOWN2_MSA
#define HAS_SCALEROWDOWN4_MSA
#define HAS_SCALEADDROW_MSA
#define HAS_SCALEARGBCOLS_MSA
#define HAS_SCALEROWDOWN34_MSA
#ifndef DISABLE_CLANG_MSA
#define HAS_SCALEARGBROWDOWNEVEN_MSA
#define HAS_SCALEROWDOWN38_MSA
#define HAS_SCALEFILTERCOLS_MSA
#define HAS_SCALEARGBFILTERCOLS_MSA
#endif
#endif
......@@ -553,6 +558,26 @@ void ScaleARGBCols_Any_NEON(uint8* dst_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_MSA(uint8* dst_argb,
const uint8* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_MSA(uint8* dst_argb,
const uint8* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBFilterCols_Any_MSA(uint8* dst_argb,
const uint8* src_argb,
int dst_width,
int x,
int dx);
void ScaleARGBCols_Any_MSA(uint8* dst_argb,
const uint8* src_argb,
int dst_width,
int x,
int dx);
// ARGB Row functions
void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
......@@ -892,6 +917,24 @@ void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int dst_width);
void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleFilterCols_MSA(uint8* dst_ptr,
const uint8* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown34_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
......@@ -927,6 +970,23 @@ void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr,
void ScaleAddRow_Any_MSA(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_Any_MSA(uint8* dst_ptr,
const uint8* src_ptr,
int dst_width,
int x,
int dx);
void ScaleRowDown34_Any_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
void ScaleRowDown34_1_Box_Any_MSA(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr,
int dst_width);
#ifdef __cplusplus
} // extern "C"
......
......@@ -371,6 +371,26 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
}
if (dst_width % 48 == 0) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_MSA;
ScaleRowDown34_1 = ScaleRowDown34_MSA;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
......@@ -1078,6 +1098,14 @@ void ScalePlaneBilinearDown(int src_width,
ScaleFilterCols = ScaleFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_MSA)
if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_MSA;
if (IS_ALIGNED(dst_width, 16)) {
ScaleFilterCols = ScaleFilterCols_MSA;
}
}
#endif
if (y > max_y) {
y = max_y;
......@@ -1276,6 +1304,14 @@ void ScalePlaneBilinearUp(int src_width,
ScaleFilterCols = ScaleFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_MSA)
if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
ScaleFilterCols = ScaleFilterCols_Any_MSA;
if (IS_ALIGNED(dst_width, 16)) {
ScaleFilterCols = ScaleFilterCols_MSA;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
......
......@@ -33,9 +33,15 @@ extern "C" {
#ifdef HAS_SCALEFILTERCOLS_NEON
CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
#endif
#ifdef HAS_SCALEFILTERCOLS_MSA
CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
#endif
#ifdef HAS_SCALEARGBCOLS_NEON
CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
#endif
#ifdef HAS_SCALEARGBCOLS_MSA
CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_NEON
CANY(ScaleARGBFilterCols_Any_NEON,
ScaleARGBFilterCols_NEON,
......@@ -43,6 +49,13 @@ CANY(ScaleARGBFilterCols_Any_NEON,
4,
3)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_MSA
CANY(ScaleARGBFilterCols_Any_MSA,
ScaleARGBFilterCols_MSA,
ScaleARGBFilterCols_C,
4,
7)
#endif
#undef CANY
// Fixed scale down.
......@@ -228,6 +241,26 @@ SDANY(ScaleRowDown34_1_Box_Any_NEON,
1,
23)
#endif
#ifdef HAS_SCALEROWDOWN34_MSA
SDANY(ScaleRowDown34_Any_MSA,
ScaleRowDown34_MSA,
ScaleRowDown34_C,
4 / 3,
1,
47)
SDANY(ScaleRowDown34_0_Box_Any_MSA,
ScaleRowDown34_0_Box_MSA,
ScaleRowDown34_0_Box_C,
4 / 3,
1,
47)
SDANY(ScaleRowDown34_1_Box_Any_MSA,
ScaleRowDown34_1_Box_MSA,
ScaleRowDown34_1_Box_C,
4 / 3,
1,
47)
#endif
#ifdef HAS_SCALEROWDOWN38_SSSE3
SDANY(ScaleRowDown38_Any_SSSE3,
ScaleRowDown38_SSSE3,
......
......@@ -335,6 +335,14 @@ static void ScaleARGBBilinearDown(int src_width,
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
}
}
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
......@@ -442,6 +450,14 @@ static void ScaleARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
if (filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
......@@ -454,6 +470,14 @@ static void ScaleARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MSA)
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBCols_MSA;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
......@@ -643,6 +667,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
if (filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
......@@ -655,6 +687,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MSA)
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBCols_MSA;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
......@@ -778,6 +818,14 @@ static void ScaleARGBSimple(int src_width,
ScaleARGBCols = ScaleARGBCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ScaleARGBCols = ScaleARGBCols_Any_MSA;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBCols = ScaleARGBCols_MSA;
}
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment