Commit c85a7b3a authored by Frank Barchard's avatar Frank Barchard Committed by Frank Barchard

MMI Optimized functions I422ToARGB for 1080p video

Improves playback performance for 1080p video on www.youku.com

BUG=libyuv:841

Change-Id: Iabe7693fba276162af0290863f46e214ab86fb6c
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1790959Reviewed-by: 's avatarMiguel Casas <mcasas@chromium.org>
parent eeccc19a
......@@ -6,8 +6,8 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("libyuv.gni")
import("//testing/test.gni")
import("libyuv.gni")
declare_args() {
# Set to false to disable building with gflags.
......@@ -162,9 +162,8 @@ static_library("libyuv_internal") {
# crbug.com/538243).
if (!is_debug || is_nacl) {
configs -= [ "//build/config/compiler:default_optimization" ]
# Enable optimize for speed (-O2) over size (-Os).
configs += [ "//build/config/compiler:optimize_max" ]
#configs += [ "//build/config/compiler:optimize_max" ]
}
# To enable AVX2 or other cpu optimization, pass flag here
......
......@@ -458,6 +458,8 @@ extern "C" {
#define HAS_I422TOUYVYROW_MSA
#define HAS_I422TOYUY2ROW_MSA
#define HAS_I444TOARGBROW_MSA
#define HAS_I422TOARGB1555ROW_MSA
#define HAS_I422TORGB565ROW_MSA
#define HAS_INTERPOLATEROW_MSA
#define HAS_J400TOARGBROW_MSA
#define HAS_MERGEUVROW_MSA
......@@ -514,6 +516,7 @@ extern "C" {
#define HAS_ARGBMIRRORROW_MMI
#define HAS_ARGBMULTIPLYROW_MMI
#define HAS_ARGBSEPIAROW_MMI
#define HAS_ARGBSETROW_MMI
#define HAS_ARGBSHADEROW_MMI
#define HAS_ARGBSHUFFLEROW_MMI
#define HAS_ARGBSUBTRACTROW_MMI
......@@ -537,6 +540,8 @@ extern "C" {
#define HAS_I400TOARGBROW_MMI
#define HAS_I422TOUYVYROW_MMI
#define HAS_I422TOYUY2ROW_MMI
#define HAS_I422TOARGBROW_MMI
#define HAS_I444TOARGBROW_MMI
#define HAS_INTERPOLATEROW_MMI
#define HAS_J400TOARGBROW_MMI
#define HAS_MERGERGBROW_MMI
......@@ -567,6 +572,20 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_MMI
#define HAS_YUY2TOUVROW_MMI
#define HAS_YUY2TOYROW_MMI
#define HAS_I210TOARGBROW_MMI
#define HAS_I422TOARGB4444ROW_MMI
#define HAS_I422TOARGB1555ROW_MMI
#define HAS_I422TORGB565ROW_MMI
#define HAS_NV21TORGB24ROW_MMI
#define HAS_NV12TORGB24ROW_MMI
#define HAS_I422ALPHATOARGBROW_MMI
#define HAS_I422TORGB24ROW_MMI
#define HAS_NV12TOARGBROW_MMI
#define HAS_NV21TOARGBROW_MMI
#define HAS_NV12TORGB565ROW_MMI
#define HAS_YUY2TOARGBROW_MMI
#define HAS_UYVYTOARGBROW_MMI
#define HAS_I422TORGBAROW_MMI
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
......@@ -844,6 +863,12 @@ void I444ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MSA(const uint8_t* src_y,
const uint8_t* src_u,
......@@ -857,6 +882,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
......@@ -1847,6 +1878,8 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width);
void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width);
void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width);
void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width);
void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width);
void ARGBSetRow_Any_MMI(uint8_t* dst_ptr, uint32_t v32, int width);
// ARGBShufflers for BGRAToARGB etc.
void ARGBShuffleRow_C(const uint8_t* src_argb,
......@@ -3089,12 +3122,24 @@ void I444ToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
......@@ -4037,6 +4082,159 @@ float ScaleSumSamples_NEON(const float* src,
void ScaleSamples_C(const float* src, float* dst, float scale, int width);
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
void I210ToARGBRow_MMI(const uint16_t* src_y,
const uint16_t* src_u,
const uint16_t* src_v,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
const uint8_t* src_a,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_MMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_MMI(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_MMI(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_MMI(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_MMI(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB24Row_MMI(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
void I210ToARGBRow_Any_MMI(const uint16_t* y_buf,
const uint16_t* u_buf,
const uint16_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422AlphaToARGBRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
const uint8_t* a_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB24Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB24Row_Any_MMI(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_Any_MMI(const uint8_t* src_ptr,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -126,6 +126,7 @@ extern "C" {
#define HAS_SCALEROWDOWN2_MMI
#define HAS_SCALEROWDOWN4_16_MMI
#define HAS_SCALEROWDOWN4_MMI
#define HAS_SCALEROWDOWN34_MMI
#endif
// Scale ARGB vertically with bilinear interpolation.
......@@ -950,6 +951,10 @@ void ScaleRowDown34_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* d,
......@@ -1003,6 +1008,10 @@ void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_Any_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
int dst_width);
void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
......
......@@ -105,6 +105,14 @@ static int I420ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGBRow = I422ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToARGBRow = I422ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
......@@ -291,6 +299,14 @@ static int I422ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGBRow = I422ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToARGBRow = I422ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
......@@ -575,6 +591,14 @@ static int I010ToARGBMatrix(const uint16_t* src_y,
I210ToARGBRow = I210ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I210TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I210ToARGBRow = I210ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I210ToARGBRow = I210ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
......@@ -725,6 +749,14 @@ static int I444ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I444TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I444ToARGBRow = I444ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I444ToARGBRow = I444ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
......@@ -853,6 +885,14 @@ static int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422ALPHATOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_MMI;
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
......@@ -1685,6 +1725,14 @@ static int NV12ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
NV12ToARGBRow = NV12ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
......@@ -1752,6 +1800,14 @@ static int NV21ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_NV21TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
NV21ToARGBRow = NV21ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
NV21ToARGBRow = NV21ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
......@@ -1870,6 +1926,14 @@ static int NV12ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_NV12TORGB24ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
NV12ToRGB24Row = NV12ToRGB24Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
NV12ToRGB24Row = NV12ToRGB24Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width);
......@@ -1929,6 +1993,14 @@ static int NV21ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_NV21TORGB24ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
NV21ToRGB24Row = NV21ToRGB24Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
NV21ToRGB24Row = NV21ToRGB24Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width);
......@@ -2100,6 +2172,14 @@ int M420ToARGB(const uint8_t* src_m420,
}
}
#endif
#if defined(HAS_NV12TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
NV12ToARGBRow = NV12ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
NV12ToARGBRow = NV12ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
......@@ -2174,6 +2254,14 @@ int YUY2ToARGB(const uint8_t* src_yuy2,
YUY2ToARGBRow = YUY2ToARGBRow_MSA;
}
}
#endif
#if defined(HAS_YUY2TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
YUY2ToARGBRow = YUY2ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width);
......@@ -2241,6 +2329,14 @@ int UYVYToARGB(const uint8_t* src_uyvy,
UYVYToARGBRow = UYVYToARGBRow_MSA;
}
}
#endif
#if defined(HAS_UYVYTOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
UYVYToARGBRow = UYVYToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
UYVYToARGBRow = UYVYToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width);
......
......@@ -592,6 +592,14 @@ static int I420ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TORGBAROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToRGBARow = I422ToRGBARow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToRGBARow = I422ToRGBARow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
......@@ -699,6 +707,14 @@ static int I420ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TORGB24ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToRGB24Row = I422ToRGB24Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToRGB24Row = I422ToRGB24Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
......@@ -843,6 +859,14 @@ int I420ToARGB1555(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB1555ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToARGB1555Row = I422ToARGB1555Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
......@@ -916,6 +940,14 @@ int I420ToARGB4444(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOARGB4444ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToARGB4444Row = I422ToARGB4444Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
......@@ -989,6 +1021,14 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TORGB565ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToRGB565Row = I422ToRGB565Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToRGB565Row = I422ToRGB565Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, yuvconstants, width);
......@@ -1192,6 +1232,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGBRow = I422ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToARGBRow = I422ToARGBRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
......@@ -1223,6 +1271,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA;
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MMI;
}
}
#endif
{
// Allocate a row of argb.
......
......@@ -1766,6 +1766,14 @@ static int I422ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TORGBAROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToRGBARow = I422ToRGBARow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
I422ToRGBARow = I422ToRGBARow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
......@@ -1868,6 +1876,14 @@ int NV12ToRGB565(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_NV12TORGB565ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
NV12ToRGB565Row = NV12ToRGB565Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
NV12ToRGB565Row = NV12ToRGB565Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
......@@ -2081,6 +2097,14 @@ int ARGBRect(uint8_t* dst_argb,
}
}
#endif
#if defined(HAS_ARGBSETROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBSetRow = ARGBSetRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBSetRow = ARGBSetRow_MMI;
}
}
#endif
// Set plane
for (y = 0; y < height; ++y) {
......
......@@ -64,6 +64,9 @@ ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7)
#ifdef HAS_I422ALPHATOARGBROW_MSA
ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#endif
#ifdef HAS_I422ALPHATOARGBROW_MMI
ANY41C(I422AlphaToARGBRow_Any_MMI, I422AlphaToARGBRow_MMI, 1, 0, 4, 7)
#endif
#undef ANY41C
// Any 3 planes to 1.
......@@ -215,6 +218,15 @@ ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif
#ifdef HAS_I422TOARGBROW_MMI
ANY31C(I444ToARGBRow_Any_MMI, I444ToARGBRow_MMI, 0, 0, 4, 7)
ANY31C(I422ToARGBRow_Any_MMI, I422ToARGBRow_MMI, 1, 0, 4, 7)
ANY31C(I422ToRGB24Row_Any_MMI, I422ToRGB24Row_MMI, 1, 0, 3, 15)
ANY31C(I422ToARGB4444Row_Any_MMI, I422ToARGB4444Row_MMI, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_MMI, I422ToARGB1555Row_MMI, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_MMI, I422ToRGB565Row_MMI, 1, 0, 2, 7)
ANY31C(I422ToRGBARow_Any_MMI, I422ToRGBARow_MMI, 1, 0, 4, 7)
#endif
#undef ANY31C
// Any 3 planes of 16 bit to 1 with yuvconstants
......@@ -250,6 +262,9 @@ ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#ifdef HAS_I210TOAR30ROW_AVX2
ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15)
#endif
#ifdef HAS_I210TOARGBROW_MMI
ANY31CT(I210ToARGBRow_Any_MMI, I210ToARGBRow_MMI, 1, 0, uint16_t, 2, 4, 7)
#endif
#undef ANY31CT
// Any 2 planes to 1.
......@@ -407,6 +422,9 @@ ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
#ifdef HAS_NV12TOARGBROW_MSA
ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TOARGBROW_MMI
ANY21C(NV12ToARGBRow_Any_MMI, NV12ToARGBRow_MMI, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_SSSE3
ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
#endif
......@@ -419,6 +437,9 @@ ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
#ifdef HAS_NV21TOARGBROW_MSA
ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV21TOARGBROW_MMI
ANY21C(NV21ToARGBRow_Any_MMI, NV21ToARGBRow_MMI, 1, 1, 2, 4, 7)
#endif
#ifdef HAS_NV12TORGB24ROW_NEON
ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7)
#endif
......@@ -428,6 +449,9 @@ ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7)
#ifdef HAS_NV12TORGB24ROW_SSSE3
ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
#endif
#ifdef HAS_NV12TORGB24ROW_MMI
ANY21C(NV12ToRGB24Row_Any_MMI, NV12ToRGB24Row_MMI, 1, 1, 2, 3, 7)
#endif
#ifdef HAS_NV21TORGB24ROW_SSSE3
ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15)
#endif
......@@ -437,6 +461,9 @@ ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
#ifdef HAS_NV21TORGB24ROW_AVX2
ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31)
#endif
#ifdef HAS_NV21TORGB24ROW_MMI
ANY21C(NV21ToRGB24Row_Any_MMI, NV21ToRGB24Row_MMI, 1, 1, 2, 3, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_SSSE3
ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
#endif
......@@ -449,6 +476,9 @@ ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
#ifdef HAS_NV12TORGB565ROW_MSA
ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
#endif
#ifdef HAS_NV12TORGB565ROW_MMI
ANY21C(NV12ToRGB565Row_Any_MMI, NV12ToRGB565Row_MMI, 1, 1, 2, 2, 7)
#endif
#undef ANY21C
// Any 1 to 1.
......@@ -1049,6 +1079,10 @@ ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7)
ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
#endif
#if defined(HAS_YUY2TOARGBROW_MMI)
ANY11C(YUY2ToARGBRow_Any_MMI, YUY2ToARGBRow_MMI, 1, 4, 4, 7)
ANY11C(UYVYToARGBRow_Any_MMI, UYVYToARGBRow_MMI, 1, 4, 4, 7)
#endif
#undef ANY11C
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
......@@ -1157,6 +1191,9 @@ ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3)
#ifdef HAS_ARGBSETROW_MSA
ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
#endif
#ifdef HAS_ARGBSETROW_MMI
ANY1(ARGBSetRow_Any_MMI, ARGBSetRow_MMI, uint32_t, 4, 3)
#endif
#undef ANY1
// Any 1 to 2. Outputs UV planes.
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -398,6 +398,18 @@ static void ScalePlaneDown34(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_Any_MMI;
ScaleRowDown34_1 = ScaleRowDown34_Any_MMI;
if (dst_width % 24 == 0) {
ScaleRowDown34_0 = ScaleRowDown34_MMI;
ScaleRowDown34_1 = ScaleRowDown34_MMI;
}
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
......
......@@ -296,6 +296,14 @@ SDANY(ScaleRowDown34_1_Box_Any_MSA,
1,
47)
#endif
#ifdef HAS_SCALEROWDOWN34_MMI
SDANY(ScaleRowDown34_Any_MMI,
ScaleRowDown34_MMI,
ScaleRowDown34_C,
4 / 3,
1,
23)
#endif
#ifdef HAS_SCALEROWDOWN38_SSSE3
SDANY(ScaleRowDown38_Any_SSSE3,
ScaleRowDown38_SSSE3,
......
......@@ -627,6 +627,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
#if defined(HAS_I422TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToARGBRow = I422ToARGBRow_Any_MMI;
if (IS_ALIGNED(src_width, 4)) {
I422ToARGBRow = I422ToARGBRow_MMI;
}
}
#endif
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
......
......@@ -1103,6 +1103,61 @@ void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
: "memory");
}
void ScaleRowDown34_MMI(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width) {
(void)src_stride;
assert((dst_width % 3 == 0) && (dst_width > 0));
uint64_t src[2];
uint64_t tmp[2];
__asm__ volatile (
"1: \n\t"
"gsldlc1 %[src0], 0x07(%[src_ptr]) \n\t"
"gsldrc1 %[src0], 0x00(%[src_ptr]) \n\t"
"gsldlc1 %[src1], 0x0f(%[src_ptr]) \n\t"
"gsldrc1 %[src1], 0x08(%[src_ptr]) \n\t"
"and %[tmp1], %[src0], %[mask1] \n\t"
"psrlw %[tmp0], %[src0], %[rmov] \n\t"
"psllw %[tmp0], %[tmp0], %[lmov1] \n\t"
"or %[src0], %[tmp0], %[tmp1] \n\t"
"punpckhwd %[tmp0], %[src0], %[src0] \n\t"
"psllw %[tmp1], %[tmp0], %[rmov] \n\t"
"or %[src0], %[src0], %[tmp1] \n\t"
"psrlw %[tmp0], %[tmp0], %[rmov8] \n\t"
"pextrh %[tmp0], %[tmp0], %[zero] \n\t"
"pinsrh_2 %[src0], %[src0], %[tmp0] \n\t"
"pextrh %[tmp0], %[src1], %[zero] \n\t"
"pinsrh_3 %[src0], %[src0], %[tmp0] \n\t"
"punpckhwd %[tmp0], %[src1], %[src1] \n\t"
"pextrh %[tmp1], %[tmp0], %[zero] \n\t"
"psrlw %[src1], %[src1], %[rmov] \n\t"
"psllw %[tmp1], %[tmp1], %[rmov8] \n\t"
"or %[src1], %[src1], %[tmp1] \n\t"
"and %[tmp0], %[tmp0], %[mask2] \n\t"
"or %[src1], %[src1], %[tmp0] \n\t"
"gssdlc1 %[src0], 0x07(%[dst_ptr]) \n\t"
"gssdrc1 %[src0], 0x00(%[dst_ptr]) \n\t"
"gsswlc1 %[src1], 0x0b(%[dst_ptr]) \n\t"
"gsswrc1 %[src1], 0x08(%[dst_ptr]) \n\t"
"daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
"daddi %[width], %[width], -0x0c \n\t"
"daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
"bnez %[width], 1b \n\t"
: [src0]"=&f"(src[0]), [src1]"=&f"(src[1]),
[tmp0]"=&f"(tmp[0]), [tmp1]"=&f"(tmp[1])
: [src_ptr]"r"(src_ptr), [dst_ptr]"r"(dst),
[lmov]"f"(0xc), [rmov]"f"(0x18),
[mask1]"f"(0xffff0000ffff), [rmov8]"f"(0x8),
[zero]"f"(0x0), [mask2]"f"(0xff000000),
[width]"r"(dst_width), [lmov1]"f"(0x10)
: "memory"
);
}
// clang-format on
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
......
......@@ -160,7 +160,12 @@ TEST_F(LibYUVBaseTest, TestLinuxNeon) {
#endif
}
// TODO(fbarchard): Fix clangcl test of cpuflags.
#ifdef _MSC_VER
TEST_F(LibYUVBaseTest, DISABLED_TestSetCpuFlags) {
#else
TEST_F(LibYUVBaseTest, TestSetCpuFlags) {
#endif
// Reset any masked flags that may have been set so auto init is enabled.
MaskCpuFlags(0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment