Commit 6c1b2d38 authored by fbarchard@google.com's avatar fbarchard@google.com

Mips port of libyuv. Includes functionality for convert, rotate, scale and memcpy.

BUG=126
TESTED=tested by mips
Review URL: https://webrtc-codereview.appspot.com/930005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@449 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 1f399dfa
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 447
Version: 449
License: BSD
License File: LICENSE
......
......@@ -19,9 +19,11 @@
#include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/mjpeg_decoder.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/scale.h"
#include "libyuv/scale_argb.h"
#include "libyuv/version.h"
......
......@@ -175,8 +175,14 @@ extern "C" {
// The following are available on Mips platforms
#if !defined(YUV_DISABLE_ASM) && defined(__mips__)
#define HAS_COPYROW_MIPS
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_SPLITUV_MIPS_DSPR2
#define HAS_MIRRORROW_MIPS_DSPR2
#define HAS_MIRRORROWUV_MIPS_DSPR2
#define HAS_I422TOARGBROW_MIPS_DSPR2
#define HAS_I422TOBGRAROW_MIPS_DSPR2
#define HAS_I422TOABGRROW_MIPS_DSPR2
#endif
#endif
......@@ -282,6 +288,9 @@ void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
void MirrorRowUV_MIPS_DSPR2(const uint8* src, uint8* dst_u, uint8* dst_v,
int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
......@@ -321,6 +330,7 @@ void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
void CopyRow_X86(const uint8* src, uint8* dst, int count);
void CopyRow_NEON(const uint8* src, uint8* dst, int count);
void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
void CopyRow_C(const uint8* src, uint8* dst, int count);
void SetRow8_X86(uint8* dst, uint32 v32, int count);
......@@ -694,6 +704,21 @@ void NV21ToARGBRow_Any_NEON(const uint8* y_buf,
const uint8* uv_buf,
uint8* argb_buf,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 447
#define LIBYUV_VERSION 449
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -75,11 +75,12 @@
'source/convert_from_argb.cc',
'source/cpu_id.cc',
'source/format_conversion.cc',
'source/memcpy_mips.S',
'source/memcpy_mips.S', # TODO(fbarchard): Move into row_mips.cc
'source/mjpeg_decoder.cc',
'source/planar_functions.cc',
'source/rotate.cc',
'source/rotate_argb.cc',
'source/rotate_mips.cc',
'source/rotate_neon.cc',
'source/row_common.cc',
'source/row_mips.cc',
......
......@@ -132,6 +132,14 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (int y = 0; y < height; ++y) {
......@@ -756,6 +764,11 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
......@@ -829,6 +842,11 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
SIMD_ALIGNED(uint8 rowy[kMaxStride]);
......
......@@ -599,6 +599,14 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (int y = 0; y < height; ++y) {
......@@ -652,6 +660,14 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
I422ToBGRARow = I422ToBGRARow_NEON;
}
}
#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
}
#endif
for (int y = 0; y < height; ++y) {
......@@ -909,6 +925,13 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
......@@ -975,6 +998,14 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
......@@ -1041,6 +1072,14 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#elif defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
SIMD_ALIGNED(uint8 row[kMaxStride]);
......
......@@ -174,7 +174,7 @@ int InitCpuFlags(void) {
}
}
#endif
// environment variable overrides for testing.
// Environment variable overrides for testing.
if (TestEnv("LIBYUV_DISABLE_X86")) {
cpu_info_ &= ~kCpuHasX86;
}
......@@ -197,7 +197,7 @@ int InitCpuFlags(void) {
cpu_info_ &= ~kCpuHasAVX2;
}
#elif defined(__mips__) && defined(__linux__)
// linux mips parse text file for dsp detect.
// Linux mips parse text file for dsp detect.
cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP.
#if defined(__mips_dspr2)
cpu_info_ |= kCpuHasMIPS_DSPR2;
......@@ -215,7 +215,7 @@ int InitCpuFlags(void) {
}
#elif defined(__arm__)
#if defined(__linux__) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
// linux arm parse text file for neon detect.
// Linux arm parse text file for neon detect.
cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
#elif defined(__ARM_NEON__)
// gcc -mfpu=neon defines __ARM_NEON__
......
#if defined (__mips__)
#
# Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
#
.globl memcpy_MIPS;
.align 2;
.type memcpy_MIPS,@function;
......@@ -21,8 +29,8 @@ memcpy_MIPS:
negu $a3,$a0
andi $a3,$a3,0x3 # we need to copy a3 bytes to make a0/a1 aligned
beq $a3,$zero,chk16w # when a3=0 then the dst (a0) is word-aligned
subu $a2,$a2,$a3 # now a2 is the remining bytes count
beq $a3,$zero,chk16w # when a3=0 then the dst (a0) is
subu $a2,$a2,$a3 # word-aligned now a2 is the remining bytes count
lwr $t8,0($a1)
addu $a1,$a1,$a3
......@@ -30,15 +38,14 @@ memcpy_MIPS:
addu $a0,$a0,$a3
# Now the dst/src are mutually word-aligned with word-aligned addresses
chk16w: andi $t8,$a2,0x3f # any whole 64-byte chunks?
chk16w:
andi $t8,$a2,0x3f # any whole 64-byte chunks?
# t8 is the byte count after 64-byte chunks
beq $a2,$t8,chk8w # if a2==t8, no 64-byte chunks
# There will be at most 1 32-byte chunk after it
subu $a3,$a2,$t8 # subtract from a2 the reminder
# Here a3 counts bytes in 16w chunks
addu $a3,$a0,$a3 # Now a3 is the final dst after 64-byte chunks
addu $t0,$a0,$a2 # t0 is the "past the end" address
# When in the loop we exercise "pref 30,x(a0)", the a0+x should not be past
......@@ -164,7 +171,8 @@ last8loop:
bne $a0,$a3,last8loop
sb $v1,-1($a0)
leave: j $ra
leave:
j $ra
nop
#
......@@ -183,18 +191,16 @@ unaligned:
swr $v1,0($a0)
addu $a0,$a0,$a3 # below the dst will be word aligned (NOTE1)
ua_chk16w: andi $t8,$a2,0x3f # any whole 64-byte chunks?
ua_chk16w:
andi $t8,$a2,0x3f # any whole 64-byte chunks?
# t8 is the byte count after 64-byte chunks
beq $a2,$t8,ua_chk8w # if a2==t8, no 64-byte chunks
# There will be at most 1 32-byte chunk after it
subu $a3,$a2,$t8 # subtract from a2 the reminder
# Here a3 counts bytes in 16w chunks
addu $a3,$a0,$a3 # Now a3 is the final dst after 64-byte chunks
addu $t0,$a0,$a2 # t0 is the "past the end" address
subu $t9,$t0,160 # t9 is the "last safe pref 30,128(a0)" address
pref 0,0($a1) # bring the first line of src, addr 0
pref 0,32($a1) # bring the second line of src, addr 32
pref 0,64($a1) # bring the third line of src, addr 64
......
......@@ -46,6 +46,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
CopyRow = CopyRow_SSE2;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Copy plane
for (int y = 0; y < height; ++y) {
......@@ -424,6 +429,14 @@ int I422ToBGRA(const uint8* src_y, int src_stride_y,
}
}
}
#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
}
#endif
for (int y = 0; y < height; ++y) {
......
......@@ -56,6 +56,23 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
int width);
#endif // defined(__ARM_NEON__)
#if !defined(YUV_DISABLE_ASM) && defined(__mips__)
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_TRANSPOSE_WX8_MIPS_DSPR2
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
#define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width);
#endif
#endif
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_TRANSPOSE_WX8_SSSE3
__declspec(naked) __declspec(align(16))
......@@ -794,6 +811,16 @@ void TransposePlane(const uint8* src, int src_stride,
TransposeWx8 = TransposeWx8_FAST_SSSE3;
}
#endif
#if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2)) {
if (IS_ALIGNED(width, 4) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2;
} else {
TransposeWx8 = TransposeWx8_MIPS_DSPR2;
}
}
#endif
// Work across the source in 8x8 tiles
int i = height;
......@@ -856,6 +883,13 @@ void RotatePlane180(const uint8* src, int src_stride,
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSSE3;
}
#endif
#if defined(HAS_MIRRORROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
MirrorRow = MirrorRow_MIPS_DSPR2;
}
#endif
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_COPYROW_NEON)
......@@ -952,6 +986,11 @@ void TransposeUV(const uint8* src, int src_stride,
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
TransposeUVWx8 = TransposeUVWx8_SSE2;
}
#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
}
#endif
// Work through the source in 8x8 tiles.
......@@ -1021,6 +1060,11 @@ void RotateUV180(const uint8* src, int src_stride,
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRowUV = MirrorRowUV_SSSE3;
}
#elif defined(HAS_MIRRORROWUV_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorRowUV = MirrorRowUV_MIPS_DSPR2;
}
#endif
dst_a += dst_stride_a * (height - 1);
......
This diff is collapsed.
This diff is collapsed.
......@@ -1957,6 +1957,26 @@ void ScaleFilterRows_MIPS_DSPR2(unsigned char *dst_ptr,
const unsigned char* src_ptr,
ptrdiff_t src_stride,
int dst_width, int source_y_fraction);
#define HAS_SCALEROWDOWN4_MIPS_DSPR2
void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width);
void ScaleRowDown4Int_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width);
#define HAS_SCALEROWDOWN34_MIPS_DSPR2
void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width);
void ScaleRowDown34_0_Int_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
void ScaleRowDown34_1_Int_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* d, int dst_width);
#define HAS_SCALEROWDOWN38_MIPS_DSPR2
void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t /* src_stride */,
uint8* dst, int dst_width);
void ScaleRowDown38_2_Int_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
void ScaleRowDown38_3_Int_MIPS_DSPR2(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width);
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
// CPU agnostic row functions
......@@ -2368,6 +2388,13 @@ static void ScalePlaneDown4(int /* src_width */, int /* src_height */,
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2;
}
#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
ScaleRowDown4 = filtering ?
ScaleRowDown4Int_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
}
#endif
for (int y = 0; y < dst_height; ++y) {
......@@ -2461,6 +2488,19 @@ static void ScalePlaneDown34(int /* src_width */, int /* src_height */,
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_MIPS_DSPR2;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_MIPS_DSPR2;
}
}
#endif
for (int y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, src_stride, dst_ptr, dst_width);
......@@ -2541,6 +2581,18 @@ static void ScalePlaneDown38(int /* src_width */, int /* src_height */,
ScaleRowDown38_2 = ScaleRowDown38_2_Int_SSSE3;
}
}
#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_MIPS_DSPR2;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_MIPS_DSPR2;
}
}
#endif
for (int y = 0; y < dst_height - 2; y += 3) {
......
This diff is collapsed.
......@@ -630,4 +630,70 @@ TEST_F(libyuvTest, TestAffine) {
#endif
}
TEST_F(libyuvTest, TestCopyPlane) {
int err = 0;
int yw = benchmark_width_;
int yh = benchmark_height_;
int b = 12;
int i, j;
int y_plane_size = (yw + b * 2) * (yh + b * 2);
srandom(time(NULL));
align_buffer_16(orig_y, y_plane_size)
align_buffer_16(dst_c, y_plane_size)
align_buffer_16(dst_opt, y_plane_size);
memset(orig_y, 0, y_plane_size);
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 0, y_plane_size);
// Fill image buffers with random data.
for (i = b; i < (yh + b); ++i) {
for (j = b; j < (yw + b); ++j) {
orig_y[i * (yw + b * 2) + j] = random() & 0xff;
}
}
// Fill destination buffers with random data.
for (i = 0; i < y_plane_size; ++i) {
uint8 random_number = random() & 0x7f;
dst_c[i] = random_number;
dst_opt[i] = dst_c[i];
}
int y_off = b * (yw + b * 2) + b;
int y_st = yw + b * 2;
int stride = 8;
// Disable all optimizations.
MaskCpuFlags(0);
double c_time = get_time();
for (j = 0; j < benchmark_iterations_; j++) {
CopyPlane(orig_y + y_off, y_st, dst_c + y_off, stride, yw, yh);
}
c_time = (get_time() - c_time) / benchmark_iterations_;
// Enable optimizations.
MaskCpuFlags(-1);
double opt_time = get_time();
for (j = 0; j < benchmark_iterations_; j++) {
CopyPlane(orig_y + y_off, y_st, dst_opt + y_off, stride, yw, yh);
}
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf(" %8d us C - %8d us OPT\n",
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
for (i = 0; i < y_plane_size; ++i) {
if (dst_c[i] != dst_opt[i])
++err;
}
free_aligned_buffer_16(orig_y)
free_aligned_buffer_16(dst_c)
free_aligned_buffer_16(dst_opt)
EXPECT_EQ(0, err);
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment