mirror munging and avoid wait. scale addrows use 6 registers for mac

BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/426007 git-svn-id: http://libyuv.googlecode.com/svn/trunk@200 16f28f9a-4ce2-e073-06de-1de4eb20be90

mirror munging and avoid wait. scale addrows use 6 registers for mac
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/426007 git-svn-id: http://libyuv.googlecode.com/svn/trunk@200 16f28f9a-4ce2-e073-06de-1de4eb20be90
f69e90a1 · fbarchard@google.com · 2bc55fa3 · f69e90a1 · f69e90a1 · f69e90a1
Commit f69e90a1 authored Mar 02, 2012 by fbarchard@google.com
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 14 deletions

README.chromium README.chromium +1 -1

version.h include/libyuv/version.h +1 -1

rotate.cc source/rotate.cc +0 -0

scale.cc source/scale.cc +12 -12

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 199
+Version: 200
 License: BSD
 License File: LICENSE

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,7 +11,7 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 199
+#define LIBYUV_VERSION 200
 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/rotate.cc
+++ b/source/rotate.cc
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -1701,15 +1701,15 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
  intptr_t tmp_src = 0;
  asm volatile (
    "pxor      %%xmm4,%%xmm4                   \n"
-    "sub       $0x1,%3                         \n"
+    "sub       $0x1,%5                         \n"
  "1:                                          \n"
    "movdqa    (%0),%%xmm0                     \n"
-    "mov       %0,%5                           \n"
+    "mov       %0,%3                           \n"
    "add       %6,%0                           \n"
    "movdqa    %%xmm0,%%xmm1                   \n"
    "punpcklbw %%xmm4,%%xmm0                   \n"
    "punpckhbw %%xmm4,%%xmm1                   \n"
-    "mov       %3,%4                           \n"
+    "mov       %5,%2                           \n"
  "2:                                          \n"
    "movdqa    (%0),%%xmm2                     \n"
    "add       %6,%0                           \n"
@@ -1718,21 +1718,21 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
    "punpckhbw %%xmm4,%%xmm3                   \n"
    "paddusw   %%xmm2,%%xmm0                   \n"
    "paddusw   %%xmm3,%%xmm1                   \n"
-    "sub       $0x1,%4                         \n"
+    "sub       $0x1,%2                         \n"
    "ja        2b                              \n"
    "movdqa    %%xmm0,(%1)                     \n"
    "movdqa    %%xmm1,0x10(%1)                 \n"
-    "lea       0x10(%5),%0                     \n"
+    "lea       0x10(%3),%0                     \n"
    "lea       0x20(%1),%1                     \n"
-    "sub       $0x10,%2                        \n"
+    "sub       $0x10,%4                        \n"
    "ja        1b                              \n"
  : "+r"(src_ptr),     // %0
    "+r"(dst_ptr),     // %1
-    "+rm"(src_width),  // %2
+    "+r"(tmp_height),  // %2
-    "+rm"(src_height), // %3
+    "+r"(tmp_src),     // %3
-    "+r"(tmp_height),  // %4
+    "+rm"(src_width),  // %4
-    "+r"(tmp_src)      // %5
+    "+rm"(src_height)  // %5
-  : "rm"(static_cast<intptr_t>(src_stride))  // %6
+  : "r"(static_cast<intptr_t>(src_stride))  // %6
  : "memory", "cc"
 #if defined(__SSE2__)
    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
@@ -1740,6 +1740,7 @@ static void ScaleAddRows_SSE2(const uint8* src_ptr, int src_stride,
  );
 }
 #if defined(__i386__)
 extern "C" void ScaleRowDown8Int_SSE2(const uint8* src_ptr, int src_stride,
                                      uint8* dst_ptr, int dst_width);
@@ -2886,7 +2887,6 @@ static void ScaleFilterCols34_C(uint8* dst_ptr, const uint8* src_ptr,
 // (1-f)a + fb can be replaced with a + f(b-a)
 #define BLENDER(a, b, f) ((int)(a) + ((f) * ((int)(b) - (int)(a)) >> 16))
-// TODO(fbarchard): consider +0x8000 for rounding if it can be done for free.
 static void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
                              int dst_width, int x, int dx) {
  for (int j = 0; j < dst_width - 1; j += 2) {