CopyRow_SSE2 ported to Nacl with munging removed

BUG=253 TESTED=libyuvTest.TestCopyPlane R=wuwang@google.com Review URL: https://webrtc-codereview.appspot.com/2060006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@765 16f28f9a-4ce2-e073-06de-1de4eb20be90

CopyRow_SSE2 ported to Nacl with munging removed
BUG=253 TESTED=libyuvTest.TestCopyPlane R=wuwang@google.com Review URL: https://webrtc-codereview.appspot.com/2060006 git-svn-id: http://libyuv.googlecode.com/svn/trunk@765 16f28f9a-4ce2-e073-06de-1de4eb20be90
c140b9d1 · fbarchard@google.com · 1c710a06 · c140b9d1 · c140b9d1 · c140b9d1
Commit c140b9d1 authored Aug 19, 2013 by fbarchard@google.com
Showing with 12 additions and 14 deletions

README.chromium README.chromium +1 -1

row.h include/libyuv/row.h +1 -3

version.h include/libyuv/version.h +1 -1

row_posix.cc source/row_posix.cc +6 -6

row_win.cc source/row_win.cc +3 -3

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 764
+Version: 765
 License: BSD
 License File: LICENSE


--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -67,6 +67,7 @@ extern "C" {
 #define HAS_ARGBTOYJROW_SSSE3
 #define HAS_ARGBTOYROW_SSSE3
 #define HAS_COPYROW_ERMS
+#define HAS_COPYROW_SSE2
 #define HAS_COPYROW_X86
 #define HAS_FIXEDDIV_X86
 #define HAS_I400TOARGBROW_SSE2
@@ -94,7 +95,6 @@ extern "C" {
 #define HAS_ARGBTOUVJROW_SSSE3
 #define HAS_BGRATOUVROW_SSSE3
 #define HAS_BGRATOYROW_SSSE3
-#define HAS_COPYROW_SSE2
 #define HAS_HALFROW_SSE2
 #define HAS_I411TOARGBROW_SSSE3
 #define HAS_I422TOABGRROW_SSSE3
@@ -133,8 +133,6 @@ extern "C" {
 #define HAS_YUY2TOUV422ROW_SSE2
 #define HAS_YUY2TOUVROW_SSE2
 #define HAS_YUY2TOYROW_SSE2
-
-// Effects:
 #endif

 // The following are Windows only:

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 764
+#define LIBYUV_VERSION 765

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -3034,14 +3034,14 @@ void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
 #ifdef HAS_COPYROW_SSE2
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
  asm volatile (
-    "sub        %0,%1                          \n"
    ".p2align  4                               \n"
  "1:                                          \n"
-    "movdqa    (%0),%%xmm0                     \n"
-    "movdqa    0x10(%0),%%xmm1                 \n"
-    "movdqa    %%xmm0,(%0,%1)                  \n"
-    "movdqa    %%xmm1,0x10(%0,%1)              \n"
-    "lea       0x20(%0),%0                     \n"
+    "movdqa    "MEMACCESS(0)",%%xmm0           \n"
+    "movdqa    "MEMACCESS2(0x10,0)",%%xmm1     \n"
+    "lea       "MEMLEA(0x20,0)",%0             \n"
+    "movdqa    %%xmm0,"MEMACCESS(1)"           \n"
+    "movdqa    %%xmm1,"MEMACCESS2(0x10,1)"     \n"
+    "lea       "MEMLEA(0x20,1)",%1             \n"
    "sub       $0x20,%2                        \n"
    "jg        1b                              \n"
  : "+r"(src),   // %0

--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -3585,15 +3585,15 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
    mov        eax, [esp + 4]   // src
    mov        edx, [esp + 8]   // dst
    mov        ecx, [esp + 12]  // count
-    sub        edx, eax

    align      16
  convertloop:
    movdqa     xmm0, [eax]
    movdqa     xmm1, [eax + 16]
-    movdqa     [eax + edx], xmm0
-    movdqa     [eax + edx + 16], xmm1
    lea        eax, [eax + 32]
+    movdqa     [edx], xmm0
+    movdqa     [edx + 16], xmm1
+    lea        edx, [edx + 32]
    sub        ecx, 32
    jg         convertloop
    ret