Commit 4b7a04e8 authored by fbarchard@google.com's avatar fbarchard@google.com

port neon to arm64. the register names have changes from r0 to w0 or x0…

port neon to arm64. the register names have changes from r0 to w0 or x0 depending on size.  Passing them as parameters (e.g. %0) makes the code register name agnostic.
BUG=333
TESTED=32 bit build still works.
R=nfullagar@chromium.org

Review URL: https://webrtc-codereview.appspot.com/20669005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1016 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4e5e44e2
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1015 Version: 1016
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1015 #define LIBYUV_VERSION 1016
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -16,8 +16,7 @@ extern "C" { ...@@ -16,8 +16,7 @@ extern "C" {
#endif #endif
// This module is for GCC Neon. // This module is for GCC Neon.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
!defined(__native_client__)
// NEON downscalers with interpolation. // NEON downscalers with interpolation.
// Provided by Fritz Koenig // Provided by Fritz Koenig
...@@ -95,18 +94,20 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -95,18 +94,20 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile ( const uint8* src_ptr1 = src_ptr + src_stride;
"add r4, %0, %3 \n" const uint8* src_ptr2 = src_ptr + src_stride * 2;
"add r5, r4, %3 \n" const uint8* src_ptr3 = src_ptr + src_stride * 3;
"add %3, r5, %3 \n" asm volatile (
".p2align 2 \n" ".p2align 2 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load up 16x4 "vld1.8 {q0}, [%0]! \n" // load up 16x4
"vld1.8 {q1}, [r4]! \n"
"vld1.8 {q2}, [r5]! \n"
MEMACCESS(3) MEMACCESS(3)
"vld1.8 {q3}, [%3]! \n" "vld1.8 {q1}, [%3]! \n"
MEMACCESS(4)
"vld1.8 {q2}, [%4]! \n"
MEMACCESS(5)
"vld1.8 {q3}, [%5]! \n"
"subs %2, %2, #4 \n" "subs %2, %2, #4 \n"
"vpaddl.u8 q0, q0 \n" "vpaddl.u8 q0, q0 \n"
"vpadal.u8 q0, q1 \n" "vpadal.u8 q0, q1 \n"
...@@ -118,11 +119,14 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -118,11 +119,14 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
MEMACCESS(1) MEMACCESS(1)
"vst1.32 {d0[0]}, [%1]! \n" "vst1.32 {d0[0]}, [%1]! \n"
"bgt 1b \n" "bgt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width) // %2 "+r"(dst_width), // %2
: "r"(src_stride) // %3 "+r"(src_ptr1), // %3
: "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc" "+r"(src_ptr2), // %4
"+r"(src_ptr3) // %5
:
: "q0", "q1", "q2", "q3", "memory", "cc"
); );
} }
...@@ -295,11 +299,12 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, ...@@ -295,11 +299,12 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride * 2;
asm volatile ( asm volatile (
"vld1.16 {q13}, [%4] \n" "vld1.16 {q13}, [%5] \n"
"vld1.8 {q14}, [%5] \n" "vld1.8 {q14}, [%6] \n"
"vld1.8 {q15}, [%6] \n" "vld1.8 {q15}, [%7] \n"
"add r4, %0, %3, lsl #1 \n"
"add %3, %0 \n" "add %3, %0 \n"
".p2align 2 \n" ".p2align 2 \n"
"1: \n" "1: \n"
...@@ -312,7 +317,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, ...@@ -312,7 +317,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n"
MEMACCESS(3) MEMACCESS(3)
"vld4.8 {d4, d5, d6, d7}, [%3]! \n" "vld4.8 {d4, d5, d6, d7}, [%3]! \n"
"vld4.8 {d16, d17, d18, d19}, [r4]! \n" "vld4.8 {d16, d17, d18, d19}, [%4]! \n"
"subs %2, %2, #12 \n" "subs %2, %2, #12 \n"
// Shuffle the input data around to get align the data // Shuffle the input data around to get align the data
...@@ -397,12 +402,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, ...@@ -397,12 +402,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
"+r"(dst_width), // %2 "+r"(dst_width), // %2
"+r"(src_stride) // %3 "+r"(src_stride), // %3
: "r"(&kMult38_Div6), // %4 "+r"(src_ptr1) // %4
"r"(&kShuf38_2), // %5 : "r"(&kMult38_Div6), // %5
"r"(&kMult38_Div9) // %6 "r"(&kShuf38_2), // %6
: "r4", "q0", "q1", "q2", "q3", "q8", "q9", "r"(&kMult38_Div9) // %7
"q13", "q14", "q15", "memory", "cc" : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"
); );
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment