Commit 026be3cd authored by Frank Barchard's avatar Frank Barchard

neon64 use width int directly.

width %w size modifier the int width can be passed directly to arm assembly.
For functions that take input constants, the outputs are declared as early
write using &, meaning the outputs use used before all inputs are consumed.

R=harryjin@google.com
BUG=libyuv:598

Review URL: https://codereview.chromium.org/2043073003 .
parent 17e8a4d3
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1594 Version: 1595
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1594 #define LIBYUV_VERSION 1595
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -563,7 +563,6 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, ...@@ -563,7 +563,6 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
int64 width64 = (int64)(width);
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n" "movi v23.8b, #255 \n"
...@@ -576,7 +575,7 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2, ...@@ -576,7 +575,7 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_yuy2), // %0 : "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1 "+r"(dst_argb), // %1
"+r"(width64) // %2 "+r"(width) // %2
: [kUVToRB]"r"(&yuvconstants->kUVToRB), : [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG), [kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
...@@ -590,7 +589,6 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -590,7 +589,6 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb, uint8* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
int64 width64 = (int64)(width);
asm volatile ( asm volatile (
YUVTORGB_SETUP YUVTORGB_SETUP
"movi v23.8b, #255 \n" "movi v23.8b, #255 \n"
...@@ -603,7 +601,7 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -603,7 +601,7 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_uyvy), // %0 : "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1 "+r"(dst_argb), // %1
"+r"(width64) // %2 "+r"(width) // %2
: [kUVToRB]"r"(&yuvconstants->kUVToRB), : [kUVToRB]"r"(&yuvconstants->kUVToRB),
[kUVToG]"r"(&yuvconstants->kUVToG), [kUVToG]"r"(&yuvconstants->kUVToG),
[kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
...@@ -708,16 +706,14 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { ...@@ -708,16 +706,14 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
} }
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
int64 width64 = (int64) width;
asm volatile ( asm volatile (
// Start at end of source row. // Start at end of source row.
"add %0, %0, %2 \n" "add %0, %0, %w2, sxtw \n"
"sub %0, %0, #16 \n" "sub %0, %0, #16 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
"subs %2, %2, #16 \n" // 16 pixels per loop. "subs %w2, %w2, #16 \n" // 16 pixels per loop.
"rev64 v0.16b, v0.16b \n" "rev64 v0.16b, v0.16b \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
...@@ -726,7 +722,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -726,7 +722,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width64) // %2 "+r"(width) // %2
: "r"((ptrdiff_t)-16) // %3 : "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0" : "cc", "memory", "v0"
); );
...@@ -734,16 +730,14 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -734,16 +730,14 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) { int width) {
int64 width64 = (int64) width;
asm volatile ( asm volatile (
// Start at end of source row. // Start at end of source row.
"add %0, %0, %3, lsl #1 \n" "add %0, %0, %w3, sxtw #1 \n"
"sub %0, %0, #16 \n" "sub %0, %0, #16 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
"subs %3, %3, #8 \n" // 8 pixels per loop. "subs %w3, %w3, #8 \n" // 8 pixels per loop.
"rev64 v0.8b, v0.8b \n" "rev64 v0.8b, v0.8b \n"
"rev64 v1.8b, v1.8b \n" "rev64 v1.8b, v1.8b \n"
MEMACCESS(1) MEMACCESS(1)
...@@ -754,23 +748,21 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -754,23 +748,21 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
: "+r"(src_uv), // %0 : "+r"(src_uv), // %0
"+r"(dst_u), // %1 "+r"(dst_u), // %1
"+r"(dst_v), // %2 "+r"(dst_v), // %2
"+r"(width64) // %3 "+r"(width) // %3
: "r"((ptrdiff_t)-16) // %4 : "r"((ptrdiff_t)-16) // %4
: "cc", "memory", "v0", "v1" : "cc", "memory", "v0", "v1"
); );
} }
void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
int64 width64 = (int64) width;
asm volatile ( asm volatile (
// Start at end of source row. // Start at end of source row.
"add %0, %0, %2, lsl #2 \n" "add %0, %0, %w2, sxtw #2 \n"
"sub %0, %0, #16 \n" "sub %0, %0, #16 \n"
"1: \n" "1: \n"
MEMACCESS(0) MEMACCESS(0)
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
"subs %2, %2, #4 \n" // 4 pixels per loop. "subs %w2, %w2, #4 \n" // 4 pixels per loop.
"rev64 v0.4s, v0.4s \n" "rev64 v0.4s, v0.4s \n"
MEMACCESS(1) MEMACCESS(1)
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
...@@ -779,7 +771,7 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { ...@@ -779,7 +771,7 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src), // %0 : "+r"(src), // %0
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width64) // %2 "+r"(width) // %2
: "r"((ptrdiff_t)-16) // %3 : "r"((ptrdiff_t)-16) // %3
: "cc", "memory", "v0" : "cc", "memory", "v0"
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment