NaCL port of Affine

BUG=253 TESTED=ncval passed R=dingkai@google.com Review URL: https://webrtc-codereview.appspot.com/2035004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@761 16f28f9a-4ce2-e073-06de-1de4eb20be90

NaCL port of Affine
BUG=253 TESTED=ncval passed R=dingkai@google.com Review URL: https://webrtc-codereview.appspot.com/2035004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@761 16f28f9a-4ce2-e073-06de-1de4eb20be90
6f61eb67 · fbarchard@google.com · d593876b · 6f61eb67 · 6f61eb67 · 6f61eb67
Commit 6f61eb67 authored Aug 14, 2013 by fbarchard@google.com
Show whitespace changes
Inline Side-by-side

Showing with 36 additions and 10 deletions

README.chromium README.chromium +1 -1

row.h include/libyuv/row.h +1 -1

version.h include/libyuv/version.h +1 -1

row_posix.cc source/row_posix.cc +33 -7

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 760
+Version: 761
 License: BSD
 License File: LICENSE

--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -40,6 +40,7 @@ extern "C" {
    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
 // Effects:
 #define HAS_ARGBADDROW_SSE2
+#define HAS_ARGBAFFINEROW_SSE2
 #define HAS_ARGBATTENUATEROW_SSSE3
 #define HAS_ARGBBLENDROW_SSSE3
 #define HAS_ARGBCOLORMATRIXROW_SSSE3
@@ -128,7 +129,6 @@ extern "C" {
 #define HAS_YUY2TOYROW_SSE2
 // Effects:
-#define HAS_ARGBAFFINEROW_SSE2
 #define HAS_ARGBUNATTENUATEROW_SSE2
 #define HAS_INTERPOLATEROW_SSE2
 #define HAS_SOBELROW_SSE2

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 760
+#define LIBYUV_VERSION 761
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -4748,8 +4748,8 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
  intptr_t src_argb_stride_temp = src_argb_stride;
  intptr_t temp = 0;
  asm volatile (
-    "movq      (%3),%%xmm2                     \n"
+    "movq      "MEMACCESS(3)",%%xmm2           \n"
-    "movq      0x8(%3),%%xmm7                  \n"
+    "movq      "MEMACCESS2(0x08,3)",%%xmm7     \n"
    "shl       $0x10,%1                        \n"
    "add       $0x4,%1                         \n"
    "movd      %1,%%xmm5                       \n"
@@ -4775,6 +4775,7 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
    "packssdw  %%xmm1,%%xmm0                   \n"
    "pmaddwd   %%xmm5,%%xmm0                   \n"
 #if defined(__x86_64__)
+// TODO(fbarchard): use a real movd to zero upper with %w1 for x64 and nacl.
    "movd      %%xmm0,%1                       \n"
    "mov       %1,%5                           \n"
    "and       $0x0fffffff,%1                  \n"
@@ -4786,11 +4787,19 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
    "movd      %%xmm0,%5                       \n"
    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
 #endif
+#if defined(__x86_64__) && defined(__native_client__)
+    BUNDLEALIGN
+    "lea       (%q0,%q1,1),%%r14d              \n"
+    "movd      (%%r15,%%r14,1),%%xmm1          \n"
+    "lea       (%q0,%q5,1),%%r14d              \n"
+    "movd      (%%r15,%%r14,1),%%xmm6          \n"
+#else
    "movd      (%0,%1,1),%%xmm1                \n"
    "movd      (%0,%5,1),%%xmm6                \n"
+#endif
    "punpckldq %%xmm6,%%xmm1                   \n"
    "addps     %%xmm4,%%xmm2                   \n"
-    "movq      %%xmm1,(%2)                     \n"
+    "movq      %%xmm1,"MEMACCESS(2)"           \n"
 #if defined(__x86_64__)
    "movd      %%xmm0,%1                       \n"
    "mov       %1,%5                           \n"
@@ -4801,13 +4810,21 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
    "pshufd    $0x39,%%xmm0,%%xmm0             \n"
    "movd      %%xmm0,%5                       \n"
 #endif
+#if defined(__x86_64__) && defined(__native_client__)
+    BUNDLEALIGN
+    "lea       (%q0,%q1,1),%%r14d              \n"
+    "movd      (%%r15,%%r14,1),%%xmm0          \n"
+    "lea       (%q0,%q5,1),%%r14d              \n"
+    "movd      (%%r15,%%r14,1),%%xmm6          \n"
+#else
    "movd      (%0,%1,1),%%xmm0                \n"
    "movd      (%0,%5,1),%%xmm6                \n"
+#endif
    "punpckldq %%xmm6,%%xmm0                   \n"
    "addps     %%xmm4,%%xmm3                   \n"
    "sub       $0x4,%4                         \n"
-    "movq      %%xmm0,0x08(%2)                 \n"
+    "movq      %%xmm0,"MEMACCESS2(0x08,2)"     \n"
-    "lea       0x10(%2),%2                     \n"
+    "lea       "MEMLEA(0x10,2)",%2             \n"
    "jge       40b                             \n"
  "49:                                         \n"
@@ -4825,10 +4842,16 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
 #if defined(__x86_64__)
    "and       $0x0fffffff,%1                  \n"
 #endif
+#if defined(__x86_64__) && defined(__native_client__)
+    BUNDLEALIGN
+    "lea       (%q0,%q1,1),%%r14d              \n"
+    "movd      (%%r15,%%r14,1),%%xmm0          \n"
+#else
    "movd      (%0,%1,1),%%xmm0                \n"
+#endif
    "sub       $0x1,%4                         \n"
-    "movd      %%xmm0,(%2)                     \n"
+    "movd      %%xmm0,"MEMACCESS(2)"           \n"
-    "lea       0x4(%2),%2                      \n"
+    "lea       "MEMLEA(0x04,2)",%2             \n"
    "jge       10b                             \n"
  "19:                                         \n"
  : "+r"(src_argb),  // %0
@@ -4839,6 +4862,9 @@ void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
    "+r"(temp)   // %5
  :
  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
 #if defined(__SSE2__)
    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 #endif