Commit f7d9b9fb authored by fbarchard@google.com's avatar fbarchard@google.com

change vector range notation to a list of registers for clang compatibility.…

change vector range notation to a list of registers for clang compatibility. break compare into 2 neon files for consistency with other neon64 files.
BUG=357
TESTED=local ios build
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/30379004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1085 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent a62a97f1
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1083 Version: 1084
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1083 #define LIBYUV_VERSION 1084
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
'sources': [ 'sources': [
# sources. # sources.
'source/compare_neon.cc', 'source/compare_neon.cc',
'source/compare_neon64.cc',
'source/rotate_neon.cc', 'source/rotate_neon.cc',
'source/rotate_neon64.cc', 'source/rotate_neon64.cc',
'source/row_neon.cc', 'source/row_neon.cc',
......
...@@ -57,45 +57,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -57,45 +57,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
return sse; return sse;
} }
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n"
"subs %2, %2, #16 \n"
"usubl v2.8h, v0.8b, v1.8b \n"
"usubl2 v3.8h, v0.16b, v1.16b \n"
"smlal v16.4s, v2.4h, v2.4h \n"
"smlal v17.4s, v3.4h, v3.4h \n"
"smlal2 v18.4s, v2.8h, v2.8h \n"
"smlal2 v19.4s, v3.8h, v3.8h \n"
"b.gt 1b \n"
"add v16.4s, v16.4s, v17.4s \n"
"add v18.4s, v18.4s, v19.4s \n"
"add v19.4s, v16.4s, v18.4s \n"
"addv s0, v19.4s \n"
"fmov %w3, s0 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
return sse;
}
#endif // defined(__ARM_NEON__) && !defined(__aarch64__) #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
......
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n"
"eor v17.16b, v17.16b, v17.16b \n"
"eor v19.16b, v19.16b, v19.16b \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n"
"subs %2, %2, #16 \n"
"usubl v2.8h, v0.8b, v1.8b \n"
"usubl2 v3.8h, v0.16b, v1.16b \n"
"smlal v16.4s, v2.4h, v2.4h \n"
"smlal v17.4s, v3.4h, v3.4h \n"
"smlal2 v18.4s, v2.8h, v2.8h \n"
"smlal2 v19.4s, v3.8h, v3.8h \n"
"b.gt 1b \n"
"add v16.4s, v16.4s, v17.4s \n"
"add v18.4s, v18.4s, v19.4s \n"
"add v19.4s, v16.4s, v18.4s \n"
"addv s0, v19.4s \n"
"fmov %w3, s0 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
return sse;
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
This diff is collapsed.
...@@ -28,7 +28,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -28,7 +28,7 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"1: \n" "1: \n"
// load even pixels into v0, odd into v1 // load even pixels into v0, odd into v1
MEMACCESS(0) MEMACCESS(0)
"ld2 {v0.16b, v1.16b}, [%0], #32 \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n"
"subs %2, %2, #16 \n" // 16 processed per loop "subs %2, %2, #16 \n" // 16 processed per loop
MEMACCESS(1) MEMACCESS(1)
"st1 {v1.16b}, [%1], #16 \n" // store odd pixels "st1 {v1.16b}, [%1], #16 \n" // store odd pixels
...@@ -51,7 +51,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -51,7 +51,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n" "add %1, %1, %0 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b, v1.16b}, [%0], #32 \n" // load row 1 and post inc "ld1 {v0.16b,v1.16b}, [%0], #32 \n" // load row 1 and post inc
MEMACCESS(1) MEMACCESS(1)
"ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc
"subs %3, %3, #16 \n" // 16 processed per loop "subs %3, %3, #16 \n" // 16 processed per loop
...@@ -80,7 +80,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -80,7 +80,7 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
asm volatile ( asm volatile (
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
"subs %2, %2, #8 \n" // 8 processed per loop "subs %2, %2, #8 \n" // 8 processed per loop
MEMACCESS(1) MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n" "st1 {v2.8b}, [%1], #8 \n"
...@@ -142,11 +142,11 @@ void ScaleRowDown34_NEON(const uint8* src_ptr, ...@@ -142,11 +142,11 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
asm volatile ( asm volatile (
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
"subs %2, %2, #24 \n" "subs %2, %2, #24 \n"
"mov v2.8b, v3.8b \n" // order v0, v1, v2 "mov v2.8b, v3.8b \n" // order v0, v1, v2
MEMACCESS(1) MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
...@@ -166,9 +166,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, ...@@ -166,9 +166,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n" "add %3, %3, %0 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1 "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
"subs %2, %2, #24 \n" "subs %2, %2, #24 \n"
// filter src line 0 with src line 1 // filter src line 0 with src line 1
...@@ -205,7 +205,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, ...@@ -205,7 +205,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v16.8h, #2 \n" "uqrshrn v2.8b, v16.8h, #2 \n"
MEMACCESS(1) MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
...@@ -228,9 +228,9 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, ...@@ -228,9 +228,9 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"add %3, %3, %0 \n" "add %3, %3, %0 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" // src line 1 "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1
"subs %2, %2, #24 \n" "subs %2, %2, #24 \n"
// average src line 0 with src line 1 // average src line 0 with src line 1
"urhadd v0.8b, v0.8b, v4.8b \n" "urhadd v0.8b, v0.8b, v4.8b \n"
...@@ -252,7 +252,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, ...@@ -252,7 +252,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"uqrshrn v2.8b, v4.8h, #2 \n" "uqrshrn v2.8b, v4.8h, #2 \n"
MEMACCESS(1) MEMACCESS(1)
"st3 {v0.8b-v2.8b}, [%1], #24 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_ptr), // %0 : "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1 "+r"(dst_ptr), // %1
...@@ -285,9 +285,9 @@ void ScaleRowDown38_NEON(const uint8* src_ptr, ...@@ -285,9 +285,9 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
"ld1 {v3.16b}, [%3] \n" "ld1 {v3.16b}, [%3] \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b, v1.16b}, [%0], #32 \n" "ld1 {v0.16b,v1.16b}, [%0], #32 \n"
"subs %2, %2, #12 \n" "subs %2, %2, #12 \n"
"tbl v2.16b, {v0.16b, v1.16b}, v3.16b \n" "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v2.8b}, [%1], #8 \n" "st1 {v2.8b}, [%1], #8 \n"
MEMACCESS(1) MEMACCESS(1)
...@@ -325,11 +325,11 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, ...@@ -325,11 +325,11 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63 // 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73 // 30 70 31 71 32 72 33 73
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n"
MEMACCESS(4) MEMACCESS(4)
"ld4 {v16.8b-v19.8b}, [%4], #32 \n" "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%4], #32 \n"
"subs %2, %2, #12 \n" "subs %2, %2, #12 \n"
// Shuffle the input data around to get align the data // Shuffle the input data around to get align the data
...@@ -451,9 +451,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, ...@@ -451,9 +451,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
// 20 60 21 61 22 62 23 63 // 20 60 21 61 22 62 23 63
// 30 70 31 71 32 72 33 73 // 30 70 31 71 32 72 33 73
MEMACCESS(0) MEMACCESS(0)
"ld4 {v0.8b-v3.8b}, [%0], #32 \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n"
MEMACCESS(3) MEMACCESS(3)
"ld4 {v4.8b-v7.8b}, [%3], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n"
"subs %2, %2, #12 \n" "subs %2, %2, #12 \n"
// Shuffle the input data around to get align the data // Shuffle the input data around to get align the data
...@@ -673,14 +673,14 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -673,14 +673,14 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n" "add %1, %1, %0 \n"
"1: \n" "1: \n"
MEMACCESS (0) MEMACCESS (0)
"ld4 {v0.16b - v3.16b}, [%0], #64 \n" // load 8 ARGB pixels. "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop. "subs %3, %3, #8 \n" // 8 processed per loop.
"uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
"uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts. "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts.
MEMACCESS (1) MEMACCESS (1)
"ld4 {v16.16b - v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels. "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 more ARGB pixels.
"uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts. "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts.
"uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts. "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts.
"uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts. "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts.
...@@ -690,7 +690,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, ...@@ -690,7 +690,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"rshrn v2.8b, v2.8h, #2 \n" "rshrn v2.8b, v2.8h, #2 \n"
"rshrn v3.8b, v3.8h, #2 \n" "rshrn v3.8b, v3.8h, #2 \n"
MEMACCESS (2) MEMACCESS (2)
"st4 {v0.8b - v3.8b}, [%2], #32 \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r" (src_ptr), // %0 : "+r" (src_ptr), // %0
"+r" (src_stride), // %1 "+r" (src_stride), // %1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment