Commit 4a86a836 authored by fbarchard@google.com's avatar fbarchard@google.com

On Neon remove aligned SplitUVRow

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/930020

git-svn-id: http://libyuv.googlecode.com/svn/trunk@493 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent cb5262db
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 492 Version: 493
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -524,8 +524,6 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -524,8 +524,6 @@ void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix); int pix);
void SplitUVRow_Unaligned_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Unaligned_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix); int pix);
void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix);
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int pix); uint8* dst_v, int pix);
void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
...@@ -549,8 +547,6 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, ...@@ -549,8 +547,6 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width); uint8* dst_uv, int width);
void MergeUVRow_Unaligned_AVX2(const uint8* src_u, const uint8* src_v, void MergeUVRow_Unaligned_AVX2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width); uint8* dst_uv, int width);
void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width);
void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width); int width);
void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 492 #define LIBYUV_VERSION 493
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -397,12 +397,7 @@ static int X420ToI420(const uint8* src_y, ...@@ -397,12 +397,7 @@ static int X420ToI420(const uint8* src_y,
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
SplitUVRow = SplitUVRow_Any_NEON; SplitUVRow = SplitUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
SplitUVRow = SplitUVRow_Unaligned_NEON; SplitUVRow = SplitUVRow_NEON;
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUVRow = SplitUVRow_NEON;
}
} }
} }
#endif #endif
......
...@@ -520,7 +520,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, ...@@ -520,7 +520,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C; int width) = MergeUVRow_C;
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
...@@ -551,12 +551,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, ...@@ -551,12 +551,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_NEON; MergeUVRow_ = MergeUVRow_NEON;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
} }
} }
#endif #endif
......
...@@ -249,7 +249,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -249,7 +249,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
#endif #endif
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C; int width) = MergeUVRow_C;
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
...@@ -276,10 +276,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -276,10 +276,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_NEON; MergeUVRow_ = MergeUVRow_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
} }
} }
#endif #endif
...@@ -358,7 +355,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -358,7 +355,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
#endif #endif
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C; int width) = MergeUVRow_C;
#if defined(HAS_MERGEUVROW_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
...@@ -385,10 +382,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -385,10 +382,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_NEON; MergeUVRow_ = MergeUVRow_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_NEON;
}
} }
} }
#endif #endif
......
...@@ -312,7 +312,7 @@ SplitUVRowANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15) ...@@ -312,7 +312,7 @@ SplitUVRowANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31) SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31)
#endif #endif
#ifdef HAS_SPLITUVROW_NEON #ifdef HAS_SPLITUVROW_NEON
SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_Unaligned_NEON, SplitUVRow_C, 15) SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
#endif #endif
#ifdef HAS_SPLITUVROW_MIPS_DSPR2 #ifdef HAS_SPLITUVROW_MIPS_DSPR2
SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
...@@ -338,7 +338,7 @@ MergeUVRow_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15) ...@@ -338,7 +338,7 @@ MergeUVRow_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31) MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31)
#endif #endif
#ifdef HAS_MERGEUVROW_NEON #ifdef HAS_MERGEUVROW_NEON
MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_Unaligned_NEON, MergeUVRow_C, 15) MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
#endif #endif
#undef MergeUVRow_ANY #undef MergeUVRow_ANY
......
...@@ -748,33 +748,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -748,33 +748,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
#endif // HAS_UYVYTOARGBROW_NEON #endif // HAS_UYVYTOARGBROW_NEON
#ifdef HAS_SPLITUVROW_NEON #ifdef HAS_SPLITUVROW_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) { int width) {
asm volatile ( asm volatile (
".p2align 2 \n" ".p2align 2 \n"
"1: \n"
"vld2.u8 {q0, q1}, [%0:128]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
"vst1.u8 {q0}, [%1:128]! \n" // store U
"vst1.u8 {q1}, [%2:128]! \n" // store V
"bgt 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3 // Output registers
: // Input registers
: "memory", "cc", "q0", "q1" // Clobber List
);
}
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: Multiple of 16 pixels, pointers unaligned.
void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
"vld2.u8 {q0, q1}, [%0]! \n" // load 16 pairs of UV "vld2.u8 {q0, q1}, [%0]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop "subs %3, %3, #16 \n" // 16 processed per loop
...@@ -793,32 +771,10 @@ void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -793,32 +771,10 @@ void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
#ifdef HAS_MERGEUVROW_NEON #ifdef HAS_MERGEUVROW_NEON
// Reads 16 U's and V's and writes out 16 pairs of UV. // Reads 16 U's and V's and writes out 16 pairs of UV.
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) { int width) {
asm volatile ( asm volatile (
".p2align 2 \n" ".p2align 2 \n"
"1: \n"
"vld1.u8 {q0}, [%0:128]! \n" // load U
"vld1.u8 {q1}, [%1:128]! \n" // load V
"subs %3, %3, #16 \n" // 16 processed per loop
"vst2.u8 {q0, q1}, [%2:128]! \n" // store 16 pairs of UV
"bgt 1b \n"
:
"+r"(src_u), // %0
"+r"(src_v), // %1
"+r"(dst_uv), // %2
"+r"(width) // %3 // Output registers
: // Input registers
: "memory", "cc", "q0", "q1" // Clobber List
);
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) {
asm volatile (
".p2align 2 \n"
"1: \n" "1: \n"
"vld1.u8 {q0}, [%0]! \n" // load U "vld1.u8 {q0}, [%0]! \n" // load U
"vld1.u8 {q1}, [%1]! \n" // load V "vld1.u8 {q1}, [%1]! \n" // load V
......
...@@ -1017,7 +1017,8 @@ static void ScaleARGBSimple(int src_width, int src_height, ...@@ -1017,7 +1017,8 @@ static void ScaleARGBSimple(int src_width, int src_height,
int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1); int x = (dx >= 65536) ? ((dx >> 1) - 32768) : (dx >> 1);
int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1); int y = (dy >= 65536) ? ((dy >> 1) - 32768) : (dy >> 1);
for (int i = 0; i < dst_height; ++i) { for (int i = 0; i < dst_height; ++i) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, dx); ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
dx);
dst_argb += dst_stride; dst_argb += dst_stride;
y += dy; y += dy;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment