neon64 use width int directly.

width %w size modifier the int width can be passed directly to arm assembly. For functions that take input constants, the outputs are declared as early write using &, meaning the outputs use used before all inputs are consumed. R=harryjin@google.com BUG=libyuv:598 Review URL: https://codereview.chromium.org/2043073003 .

neon64 use width int directly.
width %w size modifier the int width can be passed directly to arm assembly. For functions that take input constants, the outputs are declared as early write using &, meaning the outputs use used before all inputs are consumed. R=harryjin@google.com BUG=libyuv:598 Review URL: https://codereview.chromium.org/2043073003 .
026be3cd · Frank Barchard · 17e8a4d3 · 026be3cd · 026be3cd · 026be3cd
Commit 026be3cd authored Jun 08, 2016 by Frank Barchard
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 21 deletions

README.chromium README.chromium +1 -1

version.h include/libyuv/version.h +1 -1

row_neon64.cc source/row_neon64.cc +11 -19

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1594
+Version: 1595
 License: BSD
 License File: LICENSE


--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1594
+#define LIBYUV_VERSION 1595

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -563,7 +563,6 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
                        uint8* dst_argb,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  int64 width64 = (int64)(width);
  asm volatile (
    YUVTORGB_SETUP
    "movi       v23.8b, #255                   \n"
@@ -576,7 +575,7 @@ void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    "b.gt       1b                             \n"
    : "+r"(src_yuy2),  // %0
      "+r"(dst_argb),  // %1
-      "+r"(width64)    // %2
+      "+r"(width)      // %2
    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
      [kUVToG]"r"(&yuvconstants->kUVToG),
      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
@@ -590,7 +589,6 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
                        uint8* dst_argb,
                        const struct YuvConstants* yuvconstants,
                        int width) {
-  int64 width64 = (int64)(width);
  asm volatile (
    YUVTORGB_SETUP
    "movi       v23.8b, #255                   \n"
@@ -603,7 +601,7 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    "b.gt       1b                             \n"
    : "+r"(src_uyvy),  // %0
      "+r"(dst_argb),  // %1
-      "+r"(width64)    // %2
+      "+r"(width)      // %2
    : [kUVToRB]"r"(&yuvconstants->kUVToRB),
      [kUVToG]"r"(&yuvconstants->kUVToG),
      [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
@@ -708,16 +706,14 @@ void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
 }

 void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
-  int64 width64 = (int64) width;
  asm volatile (
    // Start at end of source row.
-    "add        %0, %0, %2                     \n"
+    "add        %0, %0, %w2, sxtw              \n"
    "sub        %0, %0, #16                    \n"
-
  "1:                                          \n"
    MEMACCESS(0)
    "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
-    "subs       %2, %2, #16                   \n"  // 16 pixels per loop.
+    "subs       %w2, %w2, #16                  \n"  // 16 pixels per loop.
    "rev64      v0.16b, v0.16b                 \n"
    MEMACCESS(1)
    "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
@@ -726,7 +722,7 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
    "b.gt       1b                             \n"
  : "+r"(src),   // %0
    "+r"(dst),   // %1
-    "+r"(width64)  // %2
+    "+r"(width)  // %2
  : "r"((ptrdiff_t)-16)    // %3
  : "cc", "memory", "v0"
  );
@@ -734,16 +730,14 @@ void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {

 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
                      int width) {
-  int64 width64 = (int64) width;
  asm volatile (
    // Start at end of source row.
-    "add        %0, %0, %3, lsl #1             \n"
+    "add        %0, %0, %w3, sxtw #1           \n"
    "sub        %0, %0, #16                    \n"
-
  "1:                                          \n"
    MEMACCESS(0)
    "ld2        {v0.8b, v1.8b}, [%0], %4       \n"  // src -= 16
-    "subs       %3, %3, #8                     \n"  // 8 pixels per loop.
+    "subs       %w3, %w3, #8                   \n"  // 8 pixels per loop.
    "rev64      v0.8b, v0.8b                   \n"
    "rev64      v1.8b, v1.8b                   \n"
    MEMACCESS(1)
@@ -754,23 +748,21 @@ void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
  : "+r"(src_uv),  // %0
    "+r"(dst_u),   // %1
    "+r"(dst_v),   // %2
-    "+r"(width64)    // %3
+    "+r"(width)    // %3
  : "r"((ptrdiff_t)-16)      // %4
  : "cc", "memory", "v0", "v1"
  );
 }

 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
-  int64 width64 = (int64) width;
  asm volatile (
  // Start at end of source row.
-    "add        %0, %0, %2, lsl #2             \n"
+    "add        %0, %0, %w2, sxtw #2           \n"
    "sub        %0, %0, #16                    \n"
-
  "1:                                          \n"
    MEMACCESS(0)
    "ld1        {v0.16b}, [%0], %3             \n"  // src -= 16
-    "subs       %2, %2, #4                     \n"  // 4 pixels per loop.
+    "subs       %w2, %w2, #4                   \n"  // 4 pixels per loop.
    "rev64      v0.4s, v0.4s                   \n"
    MEMACCESS(1)
    "st1        {v0.D}[1], [%1], #8            \n"  // dst += 16
@@ -779,7 +771,7 @@ void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
    "b.gt       1b                             \n"
  : "+r"(src),   // %0
    "+r"(dst),   // %1
-    "+r"(width64)  // %2
+    "+r"(width)  // %2
  : "r"((ptrdiff_t)-16)    // %3
  : "cc", "memory", "v0"
  );