Commit 0b9a65b0 authored by fbarchard@google.com's avatar fbarchard@google.com

CopyRow_Neon reimplemented with vldm

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/412001

git-svn-id: http://libyuv.googlecode.com/svn/trunk@190 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent aaf5f675
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 189 Version: 190
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 189 #define LIBYUV_VERSION 190
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -67,16 +67,12 @@ extern "C" { ...@@ -67,16 +67,12 @@ extern "C" {
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_MIRRORROW_NEON #define HAS_MIRRORROW_NEON
#define HAS_SPLITUV_NEON #define HAS_SPLITUV_NEON
#define HAS_COPYROW_NEON
#define HAS_I420TOARGBROW_NEON #define HAS_I420TOARGBROW_NEON
#define HAS_I420TOBGRAROW_NEON #define HAS_I420TOBGRAROW_NEON
#define HAS_I420TOABGRROW_NEON #define HAS_I420TOABGRROW_NEON
#endif #endif
// The following are available on Neon platforms
#if defined(__ARM_NEON__) && !defined(__thumb__) && !defined(YUV_DISABLE_ASM)
#define HAS_COPYROW_NEON
#endif
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED(var) __declspec(align(16)) var
typedef __declspec(align(16)) signed char vec8[16]; typedef __declspec(align(16)) signed char vec8[16];
......
...@@ -187,15 +187,15 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ...@@ -187,15 +187,15 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#endif #endif
#if defined(HAS_COPYROW_NEON) #if defined(HAS_COPYROW_NEON)
// TODO(fbarchard): Test with and without pld // TODO(fbarchard): Test without pld on NexusS
// "pld [%0, #0xC0] \n" // preload
// Copy multiple of 64 // Copy multiple of 64
void CopyRow_NEON(const uint8* src, uint8* dst, int count) { void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
asm volatile ( asm volatile (
"1: \n" "1: \n"
"vld1.u8 {q0,q1,q2,q3}, [%0]! \n" // load 64 "pld [%0, #0xC0] \n" // preload
"vldm %0!,{q0,q1,q2,q3} \n" // load 64
"subs %2, %2, #64 \n" // 64 processed per loop "subs %2, %2, #64 \n" // 64 processed per loop
"vst1.u8 {q0,q1,q2,q3}, [%1]! \n" // store 64 "vstm %1!,{q0,q1,q2,q3} \n" // store 64
"bhi 1b \n" "bhi 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment