Commit c5323b0f authored by Frank Barchard's avatar Frank Barchard

Add MIPS SIMD Arch (MSA) optimized MirrorRow function

As per the preparation patch added in Chromium sources at,
2150943003: Add MIPS SIMD Arch (MSA) build flags for GYP/GN builds

This patch adds first MSA optimized function in libYUV project.

BUG=libyuv:634
R=fbarchard@google.com

Review URL: https://codereview.chromium.org/2285683002 .
parent 5da918b4
......@@ -53,6 +53,12 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
source/scale_neon.cc.neon
endif
ifeq ($(TARGET_ARCH_ABI),mips)
LOCAL_CFLAGS += -DLIBYUV_MSA
LOCAL_SRC_FILES += \
source/row_msa.cc
endif
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
......
......@@ -94,6 +94,10 @@ static_library("libyuv") {
deps += [ ":libyuv_neon" ]
}
if (libyuv_use_msa) {
deps += [ ":libyuv_msa" ]
}
if (is_nacl) {
# Always enable optimization under NaCl to workaround crbug.com/538243 .
configs -= [ "//build/config/compiler:default_optimization" ]
......@@ -124,6 +128,17 @@ if (libyuv_use_neon) {
}
}
if (libyuv_use_msa) {
static_library("libyuv_msa") {
sources = [
# MSA Source Files
"source/row_msa.cc",
]
public_configs = [ ":libyuv_config" ]
}
}
if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") {
if (!is_win) {
......
......@@ -40,6 +40,7 @@ set(ly_source_files
${ly_src_dir}/row_any.cc
${ly_src_dir}/row_common.cc
${ly_src_dir}/row_mips.cc
${ly_src_dir}/row_msa.cc
${ly_src_dir}/row_neon.cc
${ly_src_dir}/row_neon64.cc
${ly_src_dir}/row_gcc.cc
......@@ -80,6 +81,7 @@ set(ly_header_files
${ly_inc_dir}/libyuv/convert_from.h
${ly_inc_dir}/libyuv/convert_from_argb.h
${ly_inc_dir}/libyuv/cpu_id.h
${ly_inc_dir}/libyuv/macros_msa.h
${ly_inc_dir}/libyuv/planar_functions.h
${ly_inc_dir}/libyuv/rotate.h
${ly_inc_dir}/libyuv/rotate_argb.h
......
......@@ -195,6 +195,16 @@ Running test with C code:
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
ninja -C out/Official
#### Building mips with GN
mipsel
gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
ninja -C out/Default
mips64el
gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
ninja -C out/Default
### Linux
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
......
......@@ -42,6 +42,7 @@ static const int kCpuHasAVX3 = 0x2000;
// These flags are only valid on MIPS processors.
static const int kCpuHasMIPS = 0x10000;
static const int kCpuHasDSPR2 = 0x20000;
static const int kCpuHasMSA = 0x40000;
// Internal function used to auto-init.
LIBYUV_API
......
/*
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __MACROS_MSA_H__
#define __MACROS_MSA_H__
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#include <stdint.h>
#include <msa.h>
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in)
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
/* Description : Load two vectors with 16 'byte' sized elements
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Return Type - as per RTYPE
Details : Load 16 byte elements in 'out0' from (psrc)
Load 16 byte elements in 'out1' from (psrc + stride)
*/
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
out0 = LD_B(RTYPE, (psrc)); \
out1 = LD_B(RTYPE, (psrc) + stride); \
}
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
LD_B2(RTYPE, (psrc), stride, out0, out1); \
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
}
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
/* Description : Store two vectors with stride each having 16 'byte' sized
elements
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 16 byte elements from 'in0' to (pdst)
Store 16 byte elements from 'in1' to (pdst + stride)
*/
#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
ST_B(RTYPE, in0, (pdst)); \
ST_B(RTYPE, in1, (pdst) + stride); \
}
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
#define ST_SB2(...) ST_B2(v16i8, __VA_ARGS__)
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
ST_B2(RTYPE, in0, in1, (pdst), stride); \
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
}
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)
/* Description : Shuffle byte vector elements as per mask vector
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0' & 'in1' are copied selectively to
'out0' as per control vector 'mask0'
*/
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
}
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
#endif /* __MACROS_MSA_H__ */
......@@ -372,6 +372,10 @@ extern "C" {
#endif
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#define HAS_MIRRORROW_MSA
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
#if defined(VISUALC_HAS_AVX2)
#define SIMD_ALIGNED(var) __declspec(align(32)) var
......@@ -809,11 +813,13 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
void MirrorRow_MSA(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_Any_MSA(const uint8* src, uint8* dst, int width);
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
......
......@@ -8,10 +8,13 @@
import("//build_overrides/build.gni")
import("//build/config/arm.gni")
import("//build/config/mips.gni")
declare_args() {
libyuv_include_tests = !build_with_chromium
libyuv_disable_jpeg = false
libyuv_use_neon = (current_cpu == "arm64" ||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") &&
mips_use_msa
}
......@@ -26,12 +26,18 @@
# Link-Time Optimizations.
'use_lto%': 0,
'build_neon': 0,
'build_msa': 0,
'conditions': [
['(target_arch == "armv7" or target_arch == "armv7s" or \
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
and (arm_neon == 1 or arm_neon_optional == 1)', {
'build_neon': 1,
}],
['(target_arch == "mipsel" or target_arch == "mips64el")\
and (mips_msa == 1)',
{
'build_msa': 1,
}],
],
},
......@@ -79,6 +85,11 @@
}],
],
}],
['build_msa != 0', {
'defines': [
'LIBYUV_MSA',
],
}],
['OS != "ios" and libyuv_disable_jpeg != 1', {
'defines': [
'HAVE_JPEG'
......
......@@ -18,6 +18,7 @@
'include/libyuv/convert_from.h',
'include/libyuv/convert_from_argb.h',
'include/libyuv/cpu_id.h',
'include/libyuv/macros_msa.h',
'include/libyuv/mjpeg_decoder.h',
'include/libyuv/planar_functions.h',
'include/libyuv/rotate.h',
......@@ -61,6 +62,7 @@
'source/row_common.cc',
'source/row_gcc.cc',
'source/row_mips.cc',
'source/row_msa.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
'source/row_win.cc',
......
......@@ -86,6 +86,12 @@
'LIBYUV_NEON'
],
}],
[ '(target_arch == "mipsel" or target_arch == "mips64el") \
and (mips_msa == 1)', {
'defines': [
'LIBYUV_MSA'
],
}],
], # conditions
'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU.
......
......@@ -161,6 +161,38 @@ int ArmCpuCaps(const char* cpuinfo_name) {
return 0;
}
LIBYUV_API SAFEBUFFERS
int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
char cpuinfo_line[512];
int len = strlen(ase);
FILE* f = fopen(cpuinfo_name, "r");
if (!f) {
// ase enabled if /proc/cpuinfo is unavailable.
if(strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
if(strcmp(ase, " dspr2") == 0) {
return kCpuHasDSPR2;
}
}
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
char* p = strstr(cpuinfo_line, ase);
if (p && (p[len] == ' ' || p[len] == '\n')) {
fclose(f);
if(strcmp(ase, " msa") == 0) {
return kCpuHasMSA;
}
if(strcmp(ase, " dspr2") == 0) {
return kCpuHasDSPR2;
}
}
}
}
fclose(f);
return 0;
}
// CPU detect function for SIMD instruction sets.
LIBYUV_API
int cpu_info_ = 0; // cpu_info is not initialized yet.
......@@ -253,11 +285,17 @@ int InitCpuFlags(void) {
#if defined(__mips__) && defined(__linux__)
#if defined(__mips_dspr2)
cpu_info |= kCpuHasDSPR2;
#endif
#if defined(__mips_msa)
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
#endif
cpu_info |= kCpuHasMIPS;
if (getenv("LIBYUV_DISABLE_DSPR2")) {
cpu_info &= ~kCpuHasDSPR2;
}
if (getenv("LIBYUV_DISABLE_MSA")) {
cpu_info &= ~kCpuHasMSA;
}
#endif
#if defined(__arm__) || defined(__aarch64__)
// gcc -mfpu=neon defines __ARM_NEON__
......
......@@ -401,6 +401,14 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
MirrorRow = MirrorRow_DSPR2;
}
#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
if (IS_ALIGNED(width, 64)) {
MirrorRow = MirrorRow_MSA;
}
}
#endif
// Mirror plane
......
......@@ -141,6 +141,14 @@ void RotatePlane180(const uint8* src, int src_stride,
MirrorRow = MirrorRow_DSPR2;
}
#endif
#if defined(HAS_MIRRORROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
MirrorRow = MirrorRow_Any_MSA;
if (IS_ALIGNED(width, 64)) {
MirrorRow = MirrorRow_MSA;
}
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
......
......@@ -631,6 +631,9 @@ ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
#ifdef HAS_MIRRORROW_NEON
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
#endif
#ifdef HAS_MIRRORROW_MSA
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7)
#endif
......
/*
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#include "libyuv/macros_msa.h"
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
int count;
v16u8 src0, src1, src2, src3;
v16u8 dst0, dst1, dst2, dst3;
v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
src += width - 64;
for (count = 0; count < width; count += 64) {
LD_UB4(src, 16, src3, src2, src1, src0);
VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
dst += 64;
src -= 64;
}
}
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment