Commit 21be9122 authored by lixia zhang's avatar lixia zhang Committed by Commit Bot

libyuv:loongson optimize compare/row/scale/rotate files with mmi.

Currently, libyuv supports MIPS SIMD Arch(MSA),
but libyuv does not supports MultiMedia Instruction(MMI)(such as loongson3a platform).

In order to improve performance of libyuv on loongson3a platform,
this provides optimize 98 functions with mmi.

BUG=libyuv:804

Change-Id: I8947626009efad769b3103a867363ece25d79629
Reviewed-on: https://chromium-review.googlesource.com/1122064
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarFrank Barchard <fbarchard@chromium.org>
parent 55f5d91f
......@@ -69,6 +69,10 @@ group("libyuv") {
deps += [ ":libyuv_msa" ]
}
if (libyuv_use_mmi) {
deps += [ ":libyuv_mmi" ]
}
if (!is_ios) {
# Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang
......@@ -229,6 +233,24 @@ if (libyuv_use_msa) {
}
}
if (libyuv_use_mmi) {
static_library("libyuv_mmi") {
sources = [
# MMI Source Files
"source/compare_mmi.cc",
"source/rotate_mmi.cc",
"source/row_mmi.cc",
"source/scale_mmi.cc",
]
deps = [
":libyuv_internal",
]
public_configs = [ ":libyuv_config" ]
}
}
if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") {
if (!is_win) {
......
......@@ -178,6 +178,15 @@ Running test with C code:
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
### MIPS Linux
mips
gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false is_clang=false use_sysroot=false use_gold=false"
gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false is_clang=false use_sysroot=false use_gold=false"
ninja -v -C out/Debug libyuv_unittest
ninja -v -C out/Release libyuv_unittest
## Building the Library with make
### Linux
......
......@@ -84,6 +84,11 @@ extern "C" {
#define HAS_SUMSQUAREERROR_MSA
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_HAMMINGDISTANCE_MMI
#define HAS_SUMSQUAREERROR_MMI
#endif
uint32_t HammingDistance_C(const uint8_t* src_a,
const uint8_t* src_b,
int count);
......@@ -102,7 +107,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a,
uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_C(const uint8_t* src_a,
const uint8_t* src_b,
int count);
......@@ -118,6 +125,9 @@ uint32_t SumSquareError_NEON(const uint8_t* src_a,
uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
......
......@@ -48,6 +48,7 @@ static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x200000;
static const int kCpuHasMSA = 0x400000;
static const int kCpuHasMMI = 0x800000;
// Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags.
......
......@@ -60,6 +60,11 @@ extern "C" {
#define HAS_TRANSPOSEUVWX16_MSA
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
#define HAS_TRANSPOSEWX8_MMI
#define HAS_TRANSPOSEUVWX8_MMI
#endif
void TransposeWxH_C(const uint8_t* src,
int src_stride,
uint8_t* dst,
......@@ -87,6 +92,11 @@ void TransposeWx8_SSSE3(const uint8_t* src,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
......@@ -108,6 +118,11 @@ void TransposeWx8_Any_SSSE3(const uint8_t* src,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Any_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst,
int dst_stride,
int width);
void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
int src_stride,
uint8_t* dst,
......@@ -156,6 +171,13 @@ void TransposeUVWx8_NEON(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
......@@ -178,6 +200,13 @@ void TransposeUVWx8_Any_NEON(const uint8_t* src,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx8_Any_MMI(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
int dst_stride_a,
uint8_t* dst_b,
int dst_stride_b,
int width);
void TransposeUVWx16_Any_MSA(const uint8_t* src,
int src_stride,
uint8_t* dst_a,
......
This diff is collapsed.
This diff is collapsed.
......@@ -13,8 +13,11 @@ import("//build/config/mips.gni")
declare_args() {
libyuv_include_tests = !build_with_chromium
libyuv_disable_jpeg = false
libyuv_use_neon = (current_cpu == "arm64" ||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") &&
mips_use_msa
libyuv_use_neon =
current_cpu == "arm64" ||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))
libyuv_use_msa =
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
libyuv_use_mmi =
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
}
......@@ -27,8 +27,10 @@
# Link-Time Optimizations.
'use_lto%': 0,
'mips_msa%': 0, # Default to msa off.
'mips_mmi%': 0, # Default to mmi off.
'build_neon': 0,
'build_msa': 0,
'build_mmi': 0,
'conditions': [
['(target_arch == "armv7" or target_arch == "armv7s" or \
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
......@@ -40,6 +42,11 @@
{
'build_msa': 1,
}],
['(target_arch == "mipsel" or target_arch == "mips64el")\
and (mips_mmi == 1)',
{
'build_mmi': 1,
}],
],
},
......@@ -92,6 +99,11 @@
'LIBYUV_MSA',
],
}],
['build_mmi != 0', {
'defines': [
'LIBYUV_MMI',
],
}],
['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG'
......
......@@ -36,6 +36,7 @@
'source/compare_common.cc',
'source/compare_gcc.cc',
'source/compare_msa.cc',
'source/compare_mmi.cc',
'source/compare_neon.cc',
'source/compare_neon64.cc',
'source/compare_win.cc',
......@@ -56,6 +57,7 @@
'source/rotate_common.cc',
'source/rotate_gcc.cc',
'source/rotate_msa.cc',
'source/rotate_mmi.cc',
'source/rotate_neon.cc',
'source/rotate_neon64.cc',
'source/rotate_win.cc',
......@@ -63,6 +65,7 @@
'source/row_common.cc',
'source/row_gcc.cc',
'source/row_msa.cc',
'source/row_mmi.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
'source/row_win.cc',
......@@ -72,6 +75,7 @@
'source/scale_common.cc',
'source/scale_gcc.cc',
'source/scale_msa.cc',
'source/scale_mmi.cc',
'source/scale_neon.cc',
'source/scale_neon64.cc',
'source/scale_win.cc',
......
......@@ -95,6 +95,12 @@
'LIBYUV_MSA'
],
}],
[ '(target_arch == "mipsel" or target_arch == "mips64el") \
and (mips_mmi == 1)', {
'defines': [
'LIBYUV_MMI'
],
}],
], # conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
......
......@@ -154,6 +154,12 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a,
HammingDistance = HammingDistance_MSA;
}
#endif
#if defined(HAS_HAMMINGDISTANCE_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
HammingDistance = HammingDistance_MMI;
}
#endif
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : diff)
#endif
......@@ -210,6 +216,11 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a,
SumSquareError = SumSquareError_MSA;
}
#endif
#if defined(HAS_SUMSQUAREERROR_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
SumSquareError = SumSquareError_MMI;
}
#endif
#ifdef _OPENMP
#pragma omp parallel for reduction(+ : sse)
#endif
......
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/compare_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
// Hakmem method for hamming distance.
uint32_t HammingDistance_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u;
uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
uint64_t c1 = 0x5555555555555555;
uint64_t c2 = 0x3333333333333333;
uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
uint32_t c4 = 0x01010101;
uint64_t s1 = 1, s2 = 2, s3 = 4;
__asm__ volatile(
"1: \n\t"
"ldc1 %[ta], 0(%[src_a]) \n\t"
"ldc1 %[tb], 0(%[src_b]) \n\t"
"xor %[temp], %[ta], %[tb] \n\t"
"psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
"and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
"psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
"and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
"psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
"and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
"paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
"psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
"paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
"and %[temp1], %[temp1], %[c3] \n\t" //&c3
"dmfc1 $t0, %[temp1] \n\t"
"dsrl32 $t0, $t0, 0 \n\t "
"mul $t0, $t0, %[c4] \n\t"
"dsrl $t0, $t0, 24 \n\t"
"dadd %[diff], %[diff], $t0 \n\t"
"dmfc1 $t0, %[temp1] \n\t"
"mul $t0, $t0, %[c4] \n\t"
"dsrl $t0, $t0, 24 \n\t"
"dadd %[diff], %[diff], $t0 \n\t"
"daddiu %[src_a], %[src_a], 8 \n\t"
"daddiu %[src_b], %[src_b], 8 \n\t"
"addiu %[count], %[count], -8 \n\t"
"bgtz %[count], 1b \n\t"
"nop \n\t"
: [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
[count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
[temp1] "+f"(temp1)
: [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
[s2] "f"(s2), [s3] "f"(s3)
: "memory");
return diff;
}
uint32_t SumSquareError_MMI(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse = 0u;
uint32_t sse_hi = 0u, sse_lo = 0u;
uint64_t src1, src2;
uint64_t diff, diff_hi, diff_lo;
uint64_t sse_sum, sse_tmp;
const uint64_t mask = 0x0ULL;
__asm__ volatile(
"xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
"1: \n\t"
"ldc1 %[src1], 0x00(%[src_a]) \n\t"
"ldc1 %[src2], 0x00(%[src_b]) \n\t"
"pasubub %[diff], %[src1], %[src2] \n\t"
"punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
"punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
"pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
"pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
"daddiu %[src_a], %[src_a], 0x08 \n\t"
"daddiu %[src_b], %[src_b], 0x08 \n\t"
"daddiu %[count], %[count], -0x08 \n\t"
"bnez %[count], 1b \n\t"
"mfc1 %[sse_lo], %[sse_sum] \n\t"
"mfhc1 %[sse_hi], %[sse_sum] \n\t"
"daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
: [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
[src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
[sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
[sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
: [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
[mask] "f"(mask)
: "memory");
return sse;
}
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
This diff is collapsed.
......@@ -885,6 +885,14 @@ static int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_ARGBATTENUATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBAttenuateRow = ARGBAttenuateRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
......@@ -1004,6 +1012,14 @@ int I400ToARGB(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I400TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I400ToARGBRow = I400ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, width);
......@@ -1070,6 +1086,14 @@ int J400ToARGB(const uint8_t* src_y,
J400ToARGBRow = J400ToARGBRow_MSA;
}
}
#endif
#if defined(HAS_J400TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
J400ToARGBRow = J400ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
J400ToARGBRow = J400ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
J400ToARGBRow(src_y, dst_argb, width);
......@@ -1201,6 +1225,14 @@ int RGB24ToARGB(const uint8_t* src_rgb24,
}
}
#endif
#if defined(HAS_RGB24TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RGB24ToARGBRow = RGB24ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
......@@ -1260,6 +1292,14 @@ int RAWToARGB(const uint8_t* src_raw,
}
}
#endif
#if defined(HAS_RAWTOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToARGBRow = RAWToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RAWToARGBRow = RAWToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
......@@ -1327,6 +1367,14 @@ int RGB565ToARGB(const uint8_t* src_rgb565,
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
RGB565ToARGBRow = RGB565ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width);
......@@ -1394,6 +1442,14 @@ int ARGB1555ToARGB(const uint8_t* src_argb1555,
}
}
#endif
#if defined(HAS_ARGB1555TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
......@@ -1461,6 +1517,14 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444,
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
......
......@@ -302,6 +302,14 @@ int I420ToYUY2(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToYUY2Row = I422ToYUY2Row_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width);
......@@ -381,6 +389,14 @@ int I422ToUYVY(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
......@@ -448,6 +464,14 @@ int I420ToUYVY(const uint8_t* src_y,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width);
......
......@@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUV444ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUV444Row = ARGBToUV444Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToUV444Row = ARGBToUV444Row_MMI;
}
}
#endif
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
......@@ -108,6 +116,14 @@ int ARGBToI444(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUV444Row(src_argb, dst_u, dst_v, width);
......@@ -208,6 +224,23 @@ int ARGBToI422(const uint8_t* src_argb,
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
ARGBToYRow(src_argb, dst_y, width);
......@@ -298,6 +331,22 @@ int ARGBToNV12(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
......@@ -329,6 +378,14 @@ int ARGBToNV12(const uint8_t* src_argb,
MergeUVRow_ = MergeUVRow_MSA;
}
}
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif
{
// Allocate a rows of uv.
......@@ -434,6 +491,23 @@ int ARGBToNV21(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
MergeUVRow_ = MergeUVRow_Any_SSE2;
......@@ -465,6 +539,14 @@ int ARGBToNV21(const uint8_t* src_argb,
MergeUVRow_ = MergeUVRow_MSA;
}
}
#endif
#if defined(HAS_MERGEUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MergeUVRow_ = MergeUVRow_Any_MMI;
if (IS_ALIGNED(halfwidth, 8)) {
MergeUVRow_ = MergeUVRow_MMI;
}
}
#endif
{
// Allocate a rows of uv.
......@@ -575,6 +657,22 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
......@@ -607,6 +705,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_I422TOYUY2ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToYUY2Row = I422ToYUY2Row_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToYUY2Row = I422ToYUY2Row_MMI;
}
}
#endif
{
// Allocate a rows of yuv.
......@@ -712,6 +818,22 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVRow = ARGBToUVRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_MMI;
}
}
#endif
#if defined(HAS_I422TOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
......@@ -744,6 +866,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_I422TOUYVYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
I422ToUYVYRow = I422ToUYVYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
I422ToUYVYRow = I422ToUYVYRow_MMI;
}
}
#endif
{
// Allocate a rows of yuv.
......@@ -821,6 +951,14 @@ int ARGBToI400(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYRow = ARGBToYRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToYRow(src_argb, dst_y, width);
......@@ -911,6 +1049,14 @@ int ARGBToRGB24(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTORGB24ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB24Row = ARGBToRGB24Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB24Row(src_argb, dst_rgb24, width);
......@@ -977,6 +1123,14 @@ int ARGBToRAW(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTORAWROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRAWRow = ARGBToRAWRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRAWRow = ARGBToRAWRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRAWRow(src_argb, dst_raw, width);
......@@ -1047,6 +1201,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
......@@ -1116,6 +1278,14 @@ int ARGBToRGB565(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTORGB565ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB565Row(src_argb, dst_rgb565, width);
......@@ -1182,6 +1352,14 @@ int ARGBToARGB1555(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOARGB1555ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToARGB1555Row(src_argb, dst_argb1555, width);
......@@ -1248,6 +1426,14 @@ int ARGBToARGB4444(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOARGB4444ROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MMI;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToARGB4444Row(src_argb, dst_argb4444, width);
......@@ -1424,6 +1610,14 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MSA;
......@@ -1432,6 +1626,14 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_MMI;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
......@@ -1525,6 +1727,14 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MSA;
......@@ -1533,6 +1743,14 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToUVJRow = ARGBToUVJRow_Any_MMI;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
......@@ -1602,6 +1820,14 @@ int ARGBToJ400(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBToYJRow = ARGBToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
ARGBToYJRow = ARGBToYJRow_MMI;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToYJRow(src_argb, dst_yj, width);
......
......@@ -173,6 +173,9 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
if (strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
if (strcmp(ase, " mmi") == 0) {
return kCpuHasMMI;
}
return 0;
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
......@@ -185,6 +188,15 @@ LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
}
return 0;
}
} else if(memcmp(cpuinfo_line, "cpu model", 9) == 0) {
char* p = strstr(cpuinfo_line, "Loongson-3");
if (p) {
fclose(f);
if (strcmp(ase, " mmi") == 0) {
return kCpuHasMMI;
}
return 0;
}
}
}
fclose(f);
......@@ -232,6 +244,8 @@ static SAFEBUFFERS int GetCpuFlags(void) {
#if defined(__mips__) && defined(__linux__)
#if defined(__mips_msa)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
#elif defined(_MIPS_ARCH_LOONGSON3A)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " mmi");
#endif
cpu_info |= kCpuHasMIPS;
#endif
......
This diff is collapsed.
......@@ -49,6 +49,11 @@ void TransposePlane(const uint8_t* src,
}
}
#endif
#if defined(HAS_TRANSPOSEWX8_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
TransposeWx8 = TransposeWx8_MMI;
}
#endif
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
......@@ -166,6 +171,14 @@ void RotatePlane180(const uint8_t* src,
}
}
#endif
#if defined(HAS_MIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
MirrorRow = MirrorRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
MirrorRow = MirrorRow_MMI;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
......@@ -186,6 +199,11 @@ void RotatePlane180(const uint8_t* src,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
#if defined(HAS_COPYROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
CopyRow = IS_ALIGNED(width, 8) ? CopyRow_MMI : CopyRow_Any_MMI;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
......@@ -232,6 +250,14 @@ void TransposeUV(const uint8_t* src,
}
}
#endif
#if defined(HAS_TRANSPOSEUVWX8_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
TransposeUVWx8 = TransposeUVWx8_Any_MMI;
if (IS_ALIGNED(width, 4)) {
TransposeUVWx8 = TransposeUVWx8_MMI;
}
}
#endif
#if defined(HAS_TRANSPOSEUVWX16_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
TransposeUVWx16 = TransposeUVWx16_Any_MSA;
......@@ -331,6 +357,11 @@ void RotateUV180(const uint8_t* src,
MirrorUVRow = MirrorUVRow_MSA;
}
#endif
#if defined(HAS_MIRRORUVROW_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 8)) {
MirrorUVRow = MirrorUVRow_MMI;
}
#endif
dst_a += dst_stride_a * (height - 1);
dst_b += dst_stride_b * (height - 1);
......
......@@ -35,6 +35,9 @@ TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
#ifdef HAS_TRANSPOSEWX8_SSSE3
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_MMI
TANY(TransposeWx8_Any_MMI, TransposeWx8_MMI, 7)
#endif
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
#endif
......@@ -62,6 +65,9 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
#ifdef HAS_TRANSPOSEUVWX8_SSE2
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX8_MMI
TUVANY(TransposeUVWx8_Any_MMI, TransposeUVWx8_MMI, 7)
#endif
#ifdef HAS_TRANSPOSEUVWX16_MSA
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
#endif
......
......@@ -56,6 +56,14 @@ static void ARGBTranspose(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MMI;
if (IS_ALIGNED(height, 4)) { // Width of dest.
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MMI;
}
}
#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
......@@ -142,6 +150,14 @@ void ARGBRotate180(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBMIRRORROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
if (IS_ALIGNED(width, 2)) {
ARGBMirrorRow = ARGBMirrorRow_MMI;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -118,6 +118,21 @@ static void ScalePlaneDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI
: ScaleRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI
: (filtering == kFilterLinear
? ScaleRowDown2Linear_MMI
: ScaleRowDown2Box_MMI);
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
......@@ -169,6 +184,15 @@ static void ScalePlaneDown2_16(int src_width,
: ScaleRowDown2Box_16_SSE2);
}
#endif
#if defined(HAS_SCALEROWDOWN2_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleRowDown2 =
filtering == kFilterNone
? ScaleRowDown2_16_MMI
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16_MMI
: ScaleRowDown2Box_16_MMI);
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
......@@ -241,6 +265,15 @@ static void ScalePlaneDown4(int src_width,
}
}
#endif
#if defined(HAS_SCALEROWDOWN4_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI;
if (IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI;
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
......@@ -284,6 +317,12 @@ static void ScalePlaneDown4_16(int src_width,
filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
}
#endif
#if defined(HAS_SCALEROWDOWN4_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleRowDown4 =
filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
......@@ -849,6 +888,14 @@ static void ScalePlaneBox(int src_width,
}
}
#endif
#if defined(HAS_SCALEADDROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleAddRow = ScaleAddRow_Any_MMI;
if (IS_ALIGNED(src_width, 8)) {
ScaleAddRow = ScaleAddRow_MMI;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
......@@ -904,6 +951,11 @@ static void ScalePlaneBox_16(int src_width,
}
#endif
#if defined(HAS_SCALEADDROW_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) {
ScaleAddRow = ScaleAddRow_16_MMI;
}
#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
......@@ -988,6 +1040,14 @@ void ScalePlaneBilinearDown(int src_width,
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
#if defined(HAS_SCALEFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
......@@ -1206,6 +1266,11 @@ void ScalePlaneBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_MMI;
}
#endif
}
......@@ -1333,6 +1398,11 @@ void ScalePlaneBilinearUp_16(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleFilterCols = ScaleColsUp2_16_MMI;
}
#endif
}
......@@ -1418,6 +1488,11 @@ static void ScalePlaneSimple(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_MMI;
}
#endif
}
......@@ -1454,6 +1529,11 @@ static void ScalePlaneSimple_16(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_SSE2;
}
#endif
#if defined(HAS_SCALECOLS_16_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
ScaleCols = ScaleColsUp2_16_MMI;
}
#endif
}
......
......@@ -42,6 +42,9 @@ CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
#ifdef HAS_SCALEARGBCOLS_MSA
CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
#endif
#ifdef HAS_SCALEARGBCOLS_MMI
CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
#endif
#ifdef HAS_SCALEARGBFILTERCOLS_NEON
CANY(ScaleARGBFilterCols_Any_NEON,
ScaleARGBFilterCols_NEON,
......@@ -165,6 +168,27 @@ SDANY(ScaleRowDown2Box_Any_MSA,
1,
31)
#endif
#ifdef HAS_SCALEROWDOWN2_MMI
SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
SDANY(ScaleRowDown2Linear_Any_MMI,
ScaleRowDown2Linear_MMI,
ScaleRowDown2Linear_C,
2,
1,
7)
SDANY(ScaleRowDown2Box_Any_MMI,
ScaleRowDown2Box_MMI,
ScaleRowDown2Box_C,
2,
1,
7)
SDODD(ScaleRowDown2Box_Odd_MMI,
ScaleRowDown2Box_MMI,
ScaleRowDown2Box_Odd_C,
2,
1,
7)
#endif
#ifdef HAS_SCALEROWDOWN4_SSSE3
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_SSSE3,
......@@ -201,6 +225,15 @@ SDANY(ScaleRowDown4Box_Any_MSA,
1,
15)
#endif
#ifdef HAS_SCALEROWDOWN4_MMI
SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
SDANY(ScaleRowDown4Box_Any_MMI,
ScaleRowDown4Box_MMI,
ScaleRowDown4Box_C,
4,
1,
7)
#endif
#ifdef HAS_SCALEROWDOWN34_SSSE3
SDANY(ScaleRowDown34_Any_SSSE3,
ScaleRowDown34_SSSE3,
......@@ -382,6 +415,26 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA,
4,
3)
#endif
#ifdef HAS_SCALEARGBROWDOWN2_MMI
SDANY(ScaleARGBRowDown2_Any_MMI,
ScaleARGBRowDown2_MMI,
ScaleARGBRowDown2_C,
2,
4,
1)
SDANY(ScaleARGBRowDown2Linear_Any_MMI,
ScaleARGBRowDown2Linear_MMI,
ScaleARGBRowDown2Linear_C,
2,
4,
1)
SDANY(ScaleARGBRowDown2Box_Any_MMI,
ScaleARGBRowDown2Box_MMI,
ScaleARGBRowDown2Box_C,
2,
4,
1)
#endif
#undef SDANY
// Scale down by even scale factor.
......@@ -433,6 +486,18 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
4,
3)
#endif
#ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
SDAANY(ScaleARGBRowDownEven_Any_MMI,
ScaleARGBRowDownEven_MMI,
ScaleARGBRowDownEven_C,
4,
1)
SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
ScaleARGBRowDownEvenBox_MMI,
ScaleARGBRowDownEvenBox_C,
4,
1)
#endif
// Add rows box filter scale down.
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
......@@ -456,6 +521,9 @@ SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
#ifdef HAS_SCALEADDROW_MSA
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
#endif
#ifdef HAS_SCALEADDROW_MMI
SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
#endif
#undef SAANY
#ifdef __cplusplus
......
......@@ -111,6 +111,22 @@ static void ScaleARGBDown2(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWN2_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_Any_MMI
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI
: ScaleARGBRowDown2Box_Any_MMI);
if (IS_ALIGNED(dst_width, 2)) {
ScaleARGBRowDown2 =
filtering == kFilterNone
? ScaleARGBRowDown2_MMI
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI
: ScaleARGBRowDown2Box_MMI);
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
......@@ -237,6 +253,16 @@ static void ScaleARGBDownEven(int src_width,
}
}
#endif
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI
: ScaleARGBRowDownEven_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
ScaleARGBRowDownEven =
filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI;
}
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
......@@ -417,6 +443,14 @@ static void ScaleARGBBilinearUp(int src_width,
InterpolateRow = InterpolateRow_MSA;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width, 2)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
if (src_width >= 32768) {
ScaleARGBFilterCols =
......@@ -463,6 +497,14 @@ static void ScaleARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBFilterCols = ScaleARGBCols_MMI;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
......@@ -470,6 +512,11 @@ static void ScaleARGBBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
}
#endif
}
......@@ -665,6 +712,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
ScaleARGBFilterCols = ScaleARGBCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBFilterCols = ScaleARGBCols_MMI;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
......@@ -672,6 +727,11 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
}
#endif
}
......@@ -796,6 +856,14 @@ static void ScaleARGBSimple(int src_width,
ScaleARGBCols = ScaleARGBCols_MSA;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
ScaleARGBCols = ScaleARGBCols_Any_MMI;
if (IS_ALIGNED(dst_width, 1)) {
ScaleARGBCols = ScaleARGBCols_MMI;
}
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
......@@ -803,6 +871,11 @@ static void ScaleARGBSimple(int src_width,
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
ScaleARGBCols = ScaleARGBColsUp2_MMI;
}
#endif
}
......
......@@ -1072,6 +1072,14 @@ void ScalePlaneVertical(int src_height,
InterpolateRow = InterpolateRow_MSA;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
InterpolateRow = InterpolateRow_Any_MMI;
if (IS_ALIGNED(dst_width_bytes, 8)) {
InterpolateRow = InterpolateRow_MMI;
}
}
#endif
for (j = 0; j < dst_height; ++j) {
int yi;
......
This diff is collapsed.
......@@ -67,6 +67,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has MIPS %d\n", has_mips);
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %d\n", has_msa);
int has_mmi = TestCpuFlag(kCpuHasMMI);
printf("Has MMI %d\n", has_mmi);
#endif
}
......
......@@ -437,6 +437,10 @@ extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
int dst_width);
extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,
......@@ -463,6 +467,13 @@ TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
} else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
}
#elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
int has_mmi = TestCpuFlag(kCpuHasMMI);
if (has_mmi) {
ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
} else {
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
}
#else
ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
#endif
......
......@@ -71,6 +71,8 @@ int main(int argc, const char* argv[]) {
if (has_mips) {
int has_msa = TestCpuFlag(kCpuHasMSA);
printf("Has MSA %x\n", has_msa);
int has_mmi = TestCpuFlag(kCpuHasMMI);
printf("Has MMI %x\n", has_mmi);
}
if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment