Commit 8dd523da authored by fbarchard@google.com's avatar fbarchard@google.com

subs sooner for neon and remove unnecessary alignment check for SplitUV_NEON

BUG=none
TEST=none
Review URL: http://webrtc-codereview.appspot.com/335014

git-svn-id: http://libyuv.googlecode.com/svn/trunk@122 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent afcde1b2
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 119 Version: 122
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -29,9 +29,9 @@ static void SplitUV_NEON(const uint8* src_uv, ...@@ -29,9 +29,9 @@ static void SplitUV_NEON(const uint8* src_uv,
asm volatile ( asm volatile (
"1: \n" "1: \n"
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV "vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
"vst1.u8 {q0}, [%1]! \n" // store U "vst1.u8 {q0}, [%1]! \n" // store U
"vst1.u8 {q1}, [%2]! \n" // Store V "vst1.u8 {q1}, [%2]! \n" // Store V
"subs %3, %3, #16 \n" // 16 processed per loop
"bhi 1b \n" "bhi 1b \n"
: "+r"(src_uv), : "+r"(src_uv),
"+r"(dst_u), "+r"(dst_u),
...@@ -714,11 +714,7 @@ static int X420ToI420(const uint8* src_y, ...@@ -714,11 +714,7 @@ static int X420ToI420(const uint8* src_y,
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON) #if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_NEON; SplitUV = SplitUV_NEON;
} else } else
#elif defined(HAS_SPLITUV_SSE2) #elif defined(HAS_SPLITUV_SSE2)
...@@ -1908,8 +1904,8 @@ static void SetRow8_NEON(uint8* dst, uint32 v32, int count) { ...@@ -1908,8 +1904,8 @@ static void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
asm volatile ( asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints "vdup.u32 q0, %2 \n" // duplicate 4 ints
"1: \n" "1: \n"
"vst1.u32 {q0}, [%0]! \n" // store
"subs %1, %1, #16 \n" // 16 bytes per loop "subs %1, %1, #16 \n" // 16 bytes per loop
"vst1.u32 {q0}, [%0]! \n" // store
"bhi 1b \n" "bhi 1b \n"
: "+r"(dst), // %0 : "+r"(dst), // %0
"+r"(count) // %1 "+r"(count) // %1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment