Commit b33dc47b authored by fbarchard@google.com's avatar fbarchard@google.com

sobel use LL for constants to be passed in as int64

BUG=437
TESTED=local ios build

Review URL: https://webrtc-codereview.appspot.com/47129004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1404 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 1be66a79
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1401 Version: 1404
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1401 #define LIBYUV_VERSION 1404
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -825,15 +825,16 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { ...@@ -825,15 +825,16 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
#ifdef HAS_MIRRORROW_NEON #ifdef HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
int64 width64 = (int64) width;
asm volatile ( asm volatile (
// Start at end of source row. // Start at end of source row.
"add %0, %0, %w2 \n" "add %0, %0, %2 \n"
"sub %0, %0, #16 \n" "sub %0, %0, #16 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
"subs %w2, %w2, #16 \n" // 16 pixels per loop. "subs %2, %2, #16 \n" // 16 pixels per loop.
"rev64 v0.16b, v0.16b \n" "rev64 v0.16b, v0.16b \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
...@@ -842,7 +843,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -842,7 +843,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width64) // %2
: "r"((ptrdiff_t)-16) // %3 : "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0" : "cc", "memory", "v0"
); );
...@@ -852,15 +853,16 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -852,15 +853,16 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
#ifdef HAS_MIRRORUVROW_NEON #ifdef HAS_MIRRORUVROW_NEON
void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) { int width) {
int64 width64 = (int64) width;
asm volatile ( asm volatile (
// Start at end of source row. // Start at end of source row.
"add %0, %0, %w3, lsl #1 \n" "add %0, %0, %3, lsl #1 \n"
"sub %0, %0, #16 \n" "sub %0, %0, #16 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
"subs %w3, %w3, #8 \n" // 8 pixels per loop. "subs %3, %3, #8 \n" // 8 pixels per loop.
"rev64 v0.8b, v0.8b \n" "rev64 v0.8b, v0.8b \n"
"rev64 v1.8b, v1.8b \n" "rev64 v1.8b, v1.8b \n"
MEMACCESS(1) MEMACCESS(1)
...@@ -871,7 +873,7 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -871,7 +873,7 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
: "+r"(src_uv), // %0 : "+r"(src_uv), // %0
"+r"(dst_u), // %1 "+r"(dst_u), // %1
"+r"(dst_v), // %2 "+r"(dst_v), // %2
"+r"(width) // %3 "+r"(width64) // %3
: "r"((ptrdiff_t)-16) // %4 : "r"((ptrdiff_t)-16) // %4
: "cc", "memory", "v0", "v1" : "cc", "memory", "v0", "v1"
); );
...@@ -880,15 +882,16 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -880,15 +882,16 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
#ifdef HAS_ARGBMIRRORROW_NEON #ifdef HAS_ARGBMIRRORROW_NEON
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
int64 width64 = (int64) width;
asm volatile ( asm volatile (
// Start at end of source row. // Start at end of source row.
"add %0, %0, %w2, lsl #2 \n" "add %0, %0, %2, lsl #2 \n"
"sub %0, %0, #16 \n" "sub %0, %0, #16 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
"subs %w2, %w2, #4 \n" // 4 pixels per loop. "subs %2, %2, #4 \n" // 4 pixels per loop.
"rev64 v0.4s, v0.4s \n" "rev64 v0.4s, v0.4s \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
...@@ -897,7 +900,7 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -897,7 +900,7 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width64) // %2
: "r"((ptrdiff_t)-16) // %3 : "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0" : "cc", "memory", "v0"
); );
...@@ -3019,21 +3022,21 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, ...@@ -3019,21 +3022,21 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
asm volatile ( asm volatile (
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.8b}, [%0],%w5 \n" // top "ld1 {v0.8b}, [%0],%5 \n" // top
MEMACCESS(0) MEMACCESS(0)
"ld1 {v1.8b}, [%0],%w6 \n" "ld1 {v1.8b}, [%0],%6 \n"
"usubl v0.8h, v0.8b, v1.8b \n" "usubl v0.8h, v0.8b, v1.8b \n"
MEMACCESS(1) MEMACCESS(1)
"ld1 {v2.8b}, [%1],%w5 \n" // center * 2 "ld1 {v2.8b}, [%1],%5 \n" // center * 2
MEMACCESS(1) MEMACCESS(1)
"ld1 {v3.8b}, [%1],%w6 \n" "ld1 {v3.8b}, [%1],%6 \n"
"usubl v1.8h, v2.8b, v3.8b \n" "usubl v1.8h, v2.8b, v3.8b \n"
"add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n"
"add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n"
MEMACCESS(2) MEMACCESS(2)
"ld1 {v2.8b}, [%2],%w5 \n" // bottom "ld1 {v2.8b}, [%2],%5 \n" // bottom
MEMACCESS(2) MEMACCESS(2)
"ld1 {v3.8b}, [%2],%w6 \n" "ld1 {v3.8b}, [%2],%6 \n"
"subs %w4, %w4, #8 \n" // 8 pixels "subs %w4, %w4, #8 \n" // 8 pixels
"usubl v1.8h, v2.8b, v3.8b \n" "usubl v1.8h, v2.8b, v3.8b \n"
"add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n"
...@@ -3047,8 +3050,8 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, ...@@ -3047,8 +3050,8 @@ void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
"+r"(src_y2), // %2 "+r"(src_y2), // %2
"+r"(dst_sobelx), // %3 "+r"(dst_sobelx), // %3
"+r"(width) // %4 "+r"(width) // %4
: "r"(2), // %5 : "r"(2LL), // %5
"r"(6) // %6 "r"(6LL) // %6
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
); );
} }
...@@ -3064,21 +3067,21 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, ...@@ -3064,21 +3067,21 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
asm volatile ( asm volatile (
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.8b}, [%0],%w4 \n" // left "ld1 {v0.8b}, [%0],%4 \n" // left
MEMACCESS(1) MEMACCESS(1)
"ld1 {v1.8b}, [%1],%w4 \n" "ld1 {v1.8b}, [%1],%4 \n"
"usubl v0.8h, v0.8b, v1.8b \n" "usubl v0.8h, v0.8b, v1.8b \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v2.8b}, [%0],%w4 \n" // center * 2 "ld1 {v2.8b}, [%0],%4 \n" // center * 2
MEMACCESS(1) MEMACCESS(1)
"ld1 {v3.8b}, [%1],%w4 \n" "ld1 {v3.8b}, [%1],%4 \n"
"usubl v1.8h, v2.8b, v3.8b \n" "usubl v1.8h, v2.8b, v3.8b \n"
"add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n"
"add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v2.8b}, [%0],%w5 \n" // right "ld1 {v2.8b}, [%0],%5 \n" // right
MEMACCESS(1) MEMACCESS(1)
"ld1 {v3.8b}, [%1],%w5 \n" "ld1 {v3.8b}, [%1],%5 \n"
"subs %w3, %w3, #8 \n" // 8 pixels "subs %w3, %w3, #8 \n" // 8 pixels
"usubl v1.8h, v2.8b, v3.8b \n" "usubl v1.8h, v2.8b, v3.8b \n"
"add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n"
...@@ -3091,8 +3094,8 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, ...@@ -3091,8 +3094,8 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
"+r"(src_y1), // %1 "+r"(src_y1), // %1
"+r"(dst_sobely), // %2 "+r"(dst_sobely), // %2
"+r"(width) // %3 "+r"(width) // %3
: "r"(1), // %4 : "r"(1LL), // %4
"r"(6) // %5 "r"(6LL) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
); );
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment