Commit 3e464447 authored by fbarchard@google.com's avatar fbarchard@google.com

Mirror source to continuous destination with Neon

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/937020

git-svn-id: http://libyuv.googlecode.com/svn/trunk@488 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent fdec4be3
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 486
Version: 488
License: BSD
License File: LICENSE
......
......@@ -188,7 +188,7 @@ extern "C" {
#define HAS_I444TOARGBROW_NEON
#define HAS_MERGEUV_NEON
#define HAS_MIRRORROW_NEON
#define HAS_MirrorUVRow_NEON
#define HAS_MIRRORUVROW_NEON
#define HAS_NV12TOARGBROW_NEON
#define HAS_NV12TORGB565ROW_NEON
#define HAS_NV21TOARGBROW_NEON
......@@ -216,6 +216,7 @@ extern "C" {
#define HAS_YUY2TOUV422ROW_NEON
#define HAS_YUY2TOUVROW_NEON
#define HAS_YUY2TOYROW_NEON
#define HAS_ARGBMIRRORROW_NEON
#endif
// The following are available on Mips platforms
......@@ -434,7 +435,7 @@ void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
int pix);
void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
......@@ -498,18 +499,19 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width);
void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 486
#define LIBYUV_VERSION 488
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -156,7 +156,6 @@ enum FourCCBpp {
FOURCC_BPP_ANY = 0, // 0 means unknown.
};
// Converts fourcc aliases into canonical ones.
LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
......
......@@ -111,14 +111,15 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSE2;
}
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
MirrorRow = MirrorRow_SSSE3;
}
#endif
}
#endif
// Mirror plane
for (int y = 0; y < height; ++y) {
......@@ -330,6 +331,10 @@ int ARGBMirror(const uint8* src_argb, int src_stride_argb,
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3;
}
#elif defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
#endif
// Mirror plane
......
......@@ -864,21 +864,19 @@ void RotatePlane180(const uint8* src, int src_stride,
int width, int height) {
void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_NEON;
}
#endif
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSE2;
}
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
MirrorRow = MirrorRow_SSSE3;
......@@ -1050,13 +1048,12 @@ void RotateUV180(const uint8* src, int src_stride,
int width, int height) {
void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
MirrorUVRow_C;
#if defined(HAS_MIRRORROW_UV_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
#if defined(HAS_MIRRORUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
MirrorRowUV = MirrorUVRow_NEON;
}
#elif defined(HAS_MIRRORROW_UV_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) &&
IS_ALIGNED(width, 16) &&
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRowUV = MirrorUVRow_SSSE3;
}
......
......@@ -39,14 +39,13 @@ static void ARGBTranspose(const uint8* src, int src_stride,
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(height, 4) && // width of dest.
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
}
#endif
int src_pixel_step = src_stride / 4;
int src_pixel_step = src_stride >> 2;
for (int i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
dst += dst_stride;
......@@ -87,6 +86,10 @@ void ARGBRotate180(const uint8* src, int src_stride,
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3;
}
#elif defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
#endif
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_COPYROW_NEON)
......
This diff is collapsed.
......@@ -32,7 +32,7 @@ static uint32 ReferenceHashDjb2(const uint8* src, uint64 count, uint32 seed) {
TEST_F(libyuvTest, BenchmakDjb2_OPT) {
const int kMaxTest = benchmark_width_ * benchmark_height_;
align_buffer_16(src_a, kMaxTest)
align_buffer_64(src_a, kMaxTest)
for (int i = 0; i < kMaxTest; ++i) {
src_a[i] = i;
......@@ -43,12 +43,12 @@ TEST_F(libyuvTest, BenchmakDjb2_OPT) {
h1 = HashDjb2(src_a, kMaxTest, 5381);
}
EXPECT_EQ(h1, h2);
free_aligned_buffer_16(src_a)
free_aligned_buffer_64(src_a)
}
TEST_F(libyuvTest, BenchmakDjb2_Unaligned_OPT) {
const int kMaxTest = benchmark_width_ * benchmark_height_;
align_buffer_16(src_a, kMaxTest + 1)
align_buffer_64(src_a, kMaxTest + 1)
for (int i = 0; i < kMaxTest; ++i) {
src_a[i + 1] = i;
}
......@@ -58,13 +58,13 @@ TEST_F(libyuvTest, BenchmakDjb2_Unaligned_OPT) {
h1 = HashDjb2(src_a + 1, kMaxTest, 5381);
}
EXPECT_EQ(h1, h2);
free_aligned_buffer_16(src_a)
free_aligned_buffer_64(src_a)
}
TEST_F(libyuvTest, BenchmarkSumSquareError_OPT) {
const int kMaxWidth = 4096 * 3;
align_buffer_16(src_a, kMaxWidth)
align_buffer_16(src_b, kMaxWidth)
align_buffer_64(src_a, kMaxWidth)
align_buffer_64(src_b, kMaxWidth)
memset(src_a, 0, kMaxWidth);
memset(src_b, 0, kMaxWidth);
......@@ -88,14 +88,14 @@ TEST_F(libyuvTest, BenchmarkSumSquareError_OPT) {
EXPECT_EQ(0, h1);
free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b)
free_aligned_buffer_64(src_a)
free_aligned_buffer_64(src_b)
}
TEST_F(libyuvTest, SumSquareError) {
const int kMaxWidth = 4096 * 3;
align_buffer_16(src_a, kMaxWidth)
align_buffer_16(src_b, kMaxWidth)
align_buffer_64(src_a, kMaxWidth)
align_buffer_64(src_b, kMaxWidth)
memset(src_a, 0, kMaxWidth);
memset(src_b, 0, kMaxWidth);
......@@ -130,13 +130,13 @@ TEST_F(libyuvTest, SumSquareError) {
EXPECT_EQ(c_err, opt_err);
free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b)
free_aligned_buffer_64(src_a)
free_aligned_buffer_64(src_b)
}
TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
align_buffer_16(src_a, benchmark_width_ * benchmark_height_)
align_buffer_16(src_b, benchmark_width_ * benchmark_height_)
align_buffer_64(src_a, benchmark_width_ * benchmark_height_)
align_buffer_64(src_b, benchmark_width_ * benchmark_height_)
for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
src_a[i] = i;
src_b[i] = i;
......@@ -155,8 +155,8 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
EXPECT_EQ(0, 0);
free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b)
free_aligned_buffer_64(src_a)
free_aligned_buffer_64(src_b)
}
TEST_F(libyuvTest, Psnr) {
......@@ -165,8 +165,8 @@ TEST_F(libyuvTest, Psnr) {
const int b = 128;
const int kSrcPlaneSize = (kSrcWidth + b * 2) * (kSrcHeight + b * 2);
const int kSrcStride = 2 * b + kSrcWidth;
align_buffer_16(src_a, kSrcPlaneSize)
align_buffer_16(src_b, kSrcPlaneSize)
align_buffer_64(src_a, kSrcPlaneSize)
align_buffer_64(src_b, kSrcPlaneSize)
memset(src_a, 0, kSrcPlaneSize);
memset(src_b, 0, kSrcPlaneSize);
......@@ -232,13 +232,13 @@ TEST_F(libyuvTest, Psnr) {
EXPECT_EQ(opt_err, c_err);
free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b)
free_aligned_buffer_64(src_a)
free_aligned_buffer_64(src_b)
}
TEST_F(libyuvTest, BenchmarkSsim_OPT) {
align_buffer_16(src_a, benchmark_width_ * benchmark_height_)
align_buffer_16(src_b, benchmark_width_ * benchmark_height_)
align_buffer_64(src_a, benchmark_width_ * benchmark_height_)
align_buffer_64(src_b, benchmark_width_ * benchmark_height_)
for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) {
src_a[i] = i;
src_b[i] = i;
......@@ -257,8 +257,8 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) {
EXPECT_EQ(0, 0); // Pass if we get this far.
free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b)
free_aligned_buffer_64(src_a)
free_aligned_buffer_64(src_b)
}
TEST_F(libyuvTest, Ssim) {
......@@ -267,8 +267,8 @@ TEST_F(libyuvTest, Ssim) {
const int b = 128;
const int kSrcPlaneSize = (kSrcWidth + b * 2) * (kSrcHeight + b * 2);
const int kSrcStride = 2 * b + kSrcWidth;
align_buffer_16(src_a, kSrcPlaneSize)
align_buffer_16(src_b, kSrcPlaneSize)
align_buffer_64(src_a, kSrcPlaneSize)
align_buffer_64(src_b, kSrcPlaneSize)
memset(src_a, 0, kSrcPlaneSize);
memset(src_b, 0, kSrcPlaneSize);
......@@ -330,8 +330,8 @@ TEST_F(libyuvTest, Ssim) {
EXPECT_EQ(opt_err, c_err);
free_aligned_buffer_16(src_a)
free_aligned_buffer_16(src_b)
free_aligned_buffer_64(src_a)
free_aligned_buffer_64(src_b)
}
} // namespace libyuv
This diff is collapsed.
......@@ -639,9 +639,9 @@ TEST_F(libyuvTest, TestCopyPlane) {
int y_plane_size = (yw + b * 2) * (yh + b * 2);
srandom(time(NULL));
align_buffer_16(orig_y, y_plane_size)
align_buffer_16(dst_c, y_plane_size)
align_buffer_16(dst_opt, y_plane_size);
align_buffer_64(orig_y, y_plane_size)
align_buffer_64(dst_c, y_plane_size)
align_buffer_64(dst_opt, y_plane_size);
memset(orig_y, 0, y_plane_size);
memset(dst_c, 0, y_plane_size);
......@@ -689,9 +689,9 @@ TEST_F(libyuvTest, TestCopyPlane) {
++err;
}
free_aligned_buffer_16(orig_y)
free_aligned_buffer_16(dst_c)
free_aligned_buffer_16(dst_opt)
free_aligned_buffer_64(orig_y)
free_aligned_buffer_64(dst_c)
free_aligned_buffer_64(dst_opt)
EXPECT_EQ(0, err);
}
......
......@@ -24,7 +24,7 @@ static int ARGBTestRotate(int src_width, int src_height,
int src_argb_plane_size = (src_width + b * 2) * (src_height + b * 2) * 4;
int src_stride_argb = (b * 2 + src_width) * 4;
align_buffer_16(src_argb, src_argb_plane_size)
align_buffer_64(src_argb, src_argb_plane_size)
memset(src_argb, 1, src_argb_plane_size);
int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
......@@ -39,8 +39,8 @@ static int ARGBTestRotate(int src_width, int src_height,
}
}
align_buffer_16(dst_argb_c, dst_argb_plane_size)
align_buffer_16(dst_argb_opt, dst_argb_plane_size)
align_buffer_64(dst_argb_c, dst_argb_plane_size)
align_buffer_64(dst_argb_opt, dst_argb_plane_size)
memset(dst_argb_c, 2, dst_argb_plane_size);
memset(dst_argb_opt, 3, dst_argb_plane_size);
......@@ -91,9 +91,9 @@ static int ARGBTestRotate(int src_width, int src_height,
}
}
free_aligned_buffer_16(dst_argb_c)
free_aligned_buffer_16(dst_argb_opt)
free_aligned_buffer_16(src_argb)
free_aligned_buffer_64(dst_argb_c)
free_aligned_buffer_64(dst_argb_opt)
free_aligned_buffer_64(src_argb)
return max_diff;
}
......
This diff is collapsed.
......@@ -24,7 +24,7 @@ static int ARGBTestFilter(int src_width, int src_height,
int src_argb_plane_size = (src_width + b * 2) * (src_height + b * 2) * 4;
int src_stride_argb = (b * 2 + src_width) * 4;
align_buffer_16(src_argb, src_argb_plane_size)
align_buffer_64(src_argb, src_argb_plane_size)
memset(src_argb, 1, src_argb_plane_size);
int dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4;
......@@ -39,8 +39,8 @@ static int ARGBTestFilter(int src_width, int src_height,
}
}
align_buffer_16(dst_argb_c, dst_argb_plane_size)
align_buffer_16(dst_argb_opt, dst_argb_plane_size)
align_buffer_64(dst_argb_c, dst_argb_plane_size)
align_buffer_64(dst_argb_opt, dst_argb_plane_size)
memset(dst_argb_c, 2, dst_argb_plane_size);
memset(dst_argb_opt, 3, dst_argb_plane_size);
......@@ -95,9 +95,9 @@ static int ARGBTestFilter(int src_width, int src_height,
}
}
free_aligned_buffer_16(dst_argb_c)
free_aligned_buffer_16(dst_argb_opt)
free_aligned_buffer_16(src_argb)
free_aligned_buffer_64(dst_argb_c)
free_aligned_buffer_64(dst_argb_opt)
free_aligned_buffer_64(src_argb)
return max_diff;
}
......
......@@ -13,14 +13,14 @@
#include <gtest/gtest.h>
#define align_buffer_16(var, size) \
#define align_buffer_64(var, size) \
uint8* var; \
uint8* var##_mem; \
var##_mem = reinterpret_cast<uint8*>(malloc((size) + 15)); \
var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63)); \
var = reinterpret_cast<uint8*> \
((reinterpret_cast<intptr_t>(var##_mem) + 15) & ~15);
((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63);
#define free_aligned_buffer_16(var) \
#define free_aligned_buffer_64(var) \
free(var##_mem); \
var = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment