x64 versions of assembly which also work on PIC 32 bit. biplanar take 2 strides…

x64 versions of assembly which also work on PIC 32 bit. biplanar take 2 strides for odd width. inversion for src height TEST=media and planar unittests in Talk BUG=none Review URL: http://webrtc-codereview.appspot.com/244004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@33 16f28f9a-4ce2-e073-06de-1de4eb20be90

x64 versions of assembly which also work on PIC 32 bit. biplanar take 2 strides…
x64 versions of assembly which also work on PIC 32 bit. biplanar take 2 strides for odd width. inversion for src height TEST=media and planar unittests in Talk BUG=none Review URL: http://webrtc-codereview.appspot.com/244004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@33 16f28f9a-4ce2-e073-06de-1de4eb20be90
3faa0f15 · fbarchard@google.com · 3de12ae1 · 3faa0f15 · 3faa0f15 · 3faa0f15
Commit 3faa0f15 authored Oct 20, 2011 by fbarchard@google.com
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 38 deletions

planar_functions.cc source/planar_functions.cc +0 -0

row_posix.cc source/row_posix.cc +33 -38

scale.cc source/scale.cc +0 -0

No files found.
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -21,7 +21,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
                              const uint8* v_buf,  // rdx
                              uint8* rgb_buf,      // rcx
                              int width) {         // r8
-  asm(
+  asm volatile(
 "1:"
  "movzb  (%1),%%r10\n"
  "lea    1(%1),%1\n"
@@ -44,13 +44,12 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
  "lea    8(%3),%3\n"
  "sub    $0x2,%4\n"
  "ja     1b\n"
-  :
+  : "+r"(y_buf),    // %0
-  : "r"(y_buf),  // %0
+    "+r"(u_buf),    // %1
-    "r"(u_buf),  // %1
+    "+r"(v_buf),    // %2
-    "r"(v_buf),  // %2
+    "+r"(rgb_buf),  // %3
-    "r"(rgb_buf),  // %3
+    "+r"(width)     // %4
-    "r"(width),  // %4
+  : "r" (_kCoefficientsRgbY)  // %5
-    "r" (_kCoefficientsRgbY)  // %5
  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
 );
 }
@@ -60,7 +59,7 @@ void FastConvertYUVToBGRARow(const uint8* y_buf,  // rdi
                             const uint8* v_buf,  // rdx
                             uint8* rgb_buf,      // rcx
                             int width) {         // r8
-  asm(
+  asm volatile(
 "1:"
  "movzb  (%1),%%r10\n"
  "lea    1(%1),%1\n"
@@ -83,13 +82,12 @@ void FastConvertYUVToBGRARow(const uint8* y_buf,  // rdi
  "lea    8(%3),%3\n"
  "sub    $0x2,%4\n"
  "ja     1b\n"
-  :
+  : "+r"(y_buf),    // %0
-  : "r"(y_buf),  // %0
+    "+r"(u_buf),    // %1
-    "r"(u_buf),  // %1
+    "+r"(v_buf),    // %2
-    "r"(v_buf),  // %2
+    "+r"(rgb_buf),  // %3
-    "r"(rgb_buf),  // %3
+    "+r"(width)     // %4
-    "r"(width),  // %4
+  : "r" (_kCoefficientsBgraY)  // %5
-    "r" (_kCoefficientsBgraY)  // %5
  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
 );
 }
@@ -99,7 +97,7 @@ void FastConvertYUVToABGRRow(const uint8* y_buf,  // rdi
                             const uint8* v_buf,  // rdx
                             uint8* rgb_buf,      // rcx
                             int width) {         // r8
-  asm(
+  asm volatile(
 "1:"
  "movzb  (%1),%%r10\n"
  "lea    1(%1),%1\n"
@@ -122,13 +120,12 @@ void FastConvertYUVToABGRRow(const uint8* y_buf,  // rdi
  "lea    8(%3),%3\n"
  "sub    $0x2,%4\n"
  "ja     1b\n"
-  :
+  : "+r"(y_buf),    // %0
-  : "r"(y_buf),  // %0
+    "+r"(u_buf),    // %1
-    "r"(u_buf),  // %1
+    "+r"(v_buf),    // %2
-    "r"(v_buf),  // %2
+    "+r"(rgb_buf),  // %3
-    "r"(rgb_buf),  // %3
+    "+r"(width)     // %4
-    "r"(width),  // %4
+  : "r" (_kCoefficientsAbgrY)  // %5
-    "r" (_kCoefficientsAbgrY)  // %5
  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
 );
 }
@@ -138,7 +135,7 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf,  // rdi
                                 const uint8* v_buf,  // rdx
                                 uint8* rgb_buf,      // rcx
                                 int width) {         // r8
-  asm(
+  asm volatile(
 "1:"
  "movzb  (%1),%%r10\n"
  "lea    1(%1),%1\n"
@@ -158,13 +155,12 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf,  // rdi
  "lea    4(%3),%3\n"
  "sub    $0x1,%4\n"
  "ja     1b\n"
-  :
+  : "+r"(y_buf),    // %0
-  : "r"(y_buf),  // %0
+    "+r"(u_buf),    // %1
-    "r"(u_buf),  // %1
+    "+r"(v_buf),    // %2
-    "r"(v_buf),  // %2
+    "+r"(rgb_buf),  // %3
-    "r"(rgb_buf),  // %3
+    "+r"(width)     // %4
-    "r"(width),  // %4
+  : "r" (_kCoefficientsRgbY)  // %5
-    "r" (_kCoefficientsRgbY)  // %5
  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2"
 );
 }
@@ -172,7 +168,7 @@ void FastConvertYUV444ToRGB32Row(const uint8* y_buf,  // rdi
 void FastConvertYToRGB32Row(const uint8* y_buf,  // rdi
                            uint8* rgb_buf,      // rcx
                            int width) {         // r8
-  asm(
+  asm volatile(
 "1:"
  "movzb  (%0),%%r10\n"
  "movzb  0x1(%0),%%r11\n"
@@ -186,11 +182,10 @@ void FastConvertYToRGB32Row(const uint8* y_buf,  // rdi
  "lea    8(%1),%1\n"
  "sub    $0x2,%2\n"
  "ja     1b\n"
-  :
+  : "+r"(y_buf),    // %0
-  : "r"(y_buf),  // %0
+    "+r"(rgb_buf),  // %1
-    "r"(rgb_buf),  // %1
+    "+r"(width)     // %2
-    "r"(width),  // %2
+  : "r" (_kCoefficientsRgbY)  // %3
-    "r" (_kCoefficientsRgbY)  // %3
  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
 );
 }

--- a/source/scale.cc
+++ b/source/scale.cc