Commit 0eb196f8 authored by zhongwei.yao@arm.com's avatar zhongwei.yao@arm.com

clear aarch64 related macro and fix bugs

fix 2 bugs:
 - build bug libyuv.gyp
 - runtime bug in ScaleRowDown38_2_Box_NEON
BUG=
TESTED=libyuv_unittest
R=fbarchard@google.com, fbarchard@chromium.org

Review URL: https://webrtc-codereview.appspot.com/23939004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1117 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 205c1440
...@@ -44,21 +44,13 @@ extern "C" { ...@@ -44,21 +44,13 @@ extern "C" {
// The following are available on Neon platforms: // The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_SCALEROWDOWN2_NEON #define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON #define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON #define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON
#define HAS_SCALEARGBROWDOWN2_NEON #define HAS_SCALEARGBROWDOWN2_NEON
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__aarch64__) || defined(LIBYUV_NEON))
#define HAS_SCALEROWDOWN2_NEON
#define HAS_SCALEROWDOWN4_NEON
#define HAS_SCALEROWDOWN34_NEON
#define HAS_SCALEROWDOWN38_NEON
#define HAS_SCALEARGBROWDOWN2_NEON
#define HAS_SCALEARGBROWDOWNEVEN_NEON
#endif #endif
// The following are available on Mips platforms: // The following are available on Mips platforms:
......
...@@ -130,16 +130,6 @@ ...@@ -130,16 +130,6 @@
'LIBYUV_DISABLE_X86', 'LIBYUV_DISABLE_X86',
], ],
}], }],
['OS == "android" and target_arch == "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker64',
],
}],
['OS == "android" and target_arch != "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker',
],
}],
], #conditions ], #conditions
'defines': [ 'defines': [
# Enable the following 3 macros to turn off assembly for specified CPU. # Enable the following 3 macros to turn off assembly for specified CPU.
...@@ -159,6 +149,18 @@ ...@@ -159,6 +149,18 @@
'include', 'include',
'.', '.',
], ],
'conditions': [
['OS == "android" and target_arch == "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker64',
],
}],
['OS == "android" and target_arch != "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker',
],
}],
], #conditions
}, },
'sources': [ 'sources': [
'<@(libyuv_sources)', '<@(libyuv_sources)',
......
...@@ -42,11 +42,7 @@ extern "C" { ...@@ -42,11 +42,7 @@ extern "C" {
#endif #endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON)) (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
#define HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
#define HAS_MIRRORROW_UV_NEON
void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
#define HAS_TRANSPOSE_WX8_NEON #define HAS_TRANSPOSE_WX8_NEON
void TransposeWx8_NEON(const uint8* src, int src_stride, void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width); uint8* dst, int dst_stride, int width);
...@@ -55,23 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride, ...@@ -55,23 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a, uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b, uint8* dst_b, int dst_stride_b,
int width); int width);
//following symbol is temporally enable for aarch64, until all neon optimized #endif
//functions have been ported to aarch64
#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__aarch64__) || defined(LIBYUV_NEON))
// #define HAS_MIRRORROW_NEON
// void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
// #define HAS_MIRRORROW_UV_NEON
// void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
#define HAS_TRANSPOSE_WX8_NEON
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride, int width);
#define HAS_TRANSPOSE_UVWX8_NEON
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width);
#endif // defined(__ARM_NEON__)
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
defined(__mips__) && \ defined(__mips__) && \
......
...@@ -20,7 +20,6 @@ extern "C" { ...@@ -20,7 +20,6 @@ extern "C" {
// This module is for GCC Neon armv8 64 bit. // This module is for GCC Neon armv8 64 bit.
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x1 throw away even pixels, and write 16x1. // Read 32x1 throw away even pixels, and write 16x1.
void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
...@@ -40,9 +39,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -40,9 +39,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1" // Clobber List : "v0", "v1" // Clobber List
); );
} }
#endif //HAS_SCALEROWDOWN2_NEON
#ifdef HAS_SCALEROWDOWN2_NEON
// Read 32x2 average down and write 16x1. // Read 32x2 average down and write 16x1.
void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
...@@ -72,9 +69,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -72,9 +69,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3" // Clobber List : "v0", "v1", "v2", "v3" // Clobber List
); );
} }
#endif //HAS_SCALEROWDOWN2_NEON
#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
asm volatile ( asm volatile (
...@@ -92,9 +87,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -92,9 +87,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "v0", "v1", "v2", "v3", "memory", "cc" : "v0", "v1", "v2", "v3", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN4_NEON
#ifdef HAS_SCALEROWDOWN4_NEON
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride; const uint8* src_ptr1 = src_ptr + src_stride;
...@@ -130,9 +123,7 @@ asm volatile ( ...@@ -130,9 +123,7 @@ asm volatile (
: "v0", "v1", "v2", "v3", "memory", "cc" : "v0", "v1", "v2", "v3", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN4_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
// Down scale from 4 to 3 pixels. Use the neon multilane read/write // Down scale from 4 to 3 pixels. Use the neon multilane read/write
// to load up the every 4th pixel into a 4 different registers. // to load up the every 4th pixel into a 4 different registers.
// Point samples 32 pixels to 24 pixels. // Point samples 32 pixels to 24 pixels.
...@@ -155,9 +146,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, ...@@ -155,9 +146,7 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "memory", "cc" : "v0", "v1", "v2", "v3", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN34_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
...@@ -217,9 +206,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, ...@@ -217,9 +206,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"v20", "memory", "cc" "v20", "memory", "cc"
); );
} }
#endif //ScaleRowDown34_0_Box_NEON
#ifdef HAS_SCALEROWDOWN34_NEON
void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) { uint8* dst_ptr, int dst_width) {
...@@ -262,9 +249,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, ...@@ -262,9 +249,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc" : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN34_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
static uvec8 kShuf38 = static uvec8 kShuf38 =
{ 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 };
static uvec8 kShuf38_2 = static uvec8 kShuf38_2 =
...@@ -301,9 +286,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, ...@@ -301,9 +286,6 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
); );
} }
#endif //HAS_SCALEROWDOWN38_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
// 32x3 -> 12x1 // 32x3 -> 12x1
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
...@@ -432,9 +414,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, ...@@ -432,9 +414,7 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"v30", "v31", "memory", "cc" "v30", "v31", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN38_NEON
#ifdef HAS_SCALEROWDOWN38_NEON
// 32x2 -> 12x1 // 32x2 -> 12x1
void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride, ptrdiff_t src_stride,
...@@ -456,7 +436,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, ...@@ -456,7 +436,7 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n"
"subs %3, %3, #12 \n" "subs %3, %3, #12 \n"
// Shuffle the input data around to get align the data // Shuffle the input data around to get align the data
...@@ -541,7 +521,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, ...@@ -541,7 +521,6 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"v18", "v19", "v30", "v31", "memory", "cc" "v18", "v19", "v30", "v31", "memory", "cc"
); );
} }
#endif //HAS_SCALEROWDOWN38_NEON
// 16x2 -> 16x1 // 16x2 -> 16x1
void ScaleFilterRows_NEON(uint8* dst_ptr, void ScaleFilterRows_NEON(uint8* dst_ptr,
...@@ -643,7 +622,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr, ...@@ -643,7 +622,6 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
); );
} }
#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
...@@ -666,9 +644,7 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -666,9 +644,7 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List
); );
} }
#endif //HAS_SCALEARGBROWDOWN2_NEON
#ifdef HAS_SCALEARGBROWDOWN2_NEON
void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) { uint8* dst, int dst_width) {
asm volatile ( asm volatile (
...@@ -703,9 +679,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -703,9 +679,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19" : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"
); );
} }
#endif //HAS_SCALEARGBROWDOWN2_NEON
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time. // Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned. // Alignment requirement: src_argb 4 byte aligned.
void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
...@@ -731,9 +705,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, ...@@ -731,9 +705,7 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
: "memory", "cc", "v0" : "memory", "cc", "v0"
); );
} }
#endif //HAS_SCALEARGBROWDOWNEVEN_NEON
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
// Reads 4 pixels at a time. // Reads 4 pixels at a time.
// Alignment requirement: src_argb 4 byte aligned. // Alignment requirement: src_argb 4 byte aligned.
// TODO, might be worth another optimization pass in future. // TODO, might be worth another optimization pass in future.
...@@ -786,7 +758,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, ...@@ -786,7 +758,6 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
); );
} }
#endif // HAS_SCALEARGBROWDOWNEVEN_NEON
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment