scale ported to OSX for 32/64 bit. Required breaking up some functions into 2…

scale ported to OSX for 32/64 bit. Required breaking up some functions into 2 or 3 asm inlines to set constants. Reworked storing of 6 bytes on all platforms for 3/4 scale by storing first 4 bytes and then storing last 4 bytes, overlapping 2, in order to stay within SSE2 registers. BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/582004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@266 16f28f9a-4ce2-e073-06de-1de4eb20be90

scale ported to OSX for 32/64 bit. Required breaking up some functions into 2…
scale ported to OSX for 32/64 bit. Required breaking up some functions into 2 or 3 asm inlines to set constants. Reworked storing of 6 bytes on all platforms for 3/4 scale by storing first 4 bytes and then storing last 4 bytes, overlapping 2, in order to stay within SSE2 registers. BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/582004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@266 16f28f9a-4ce2-e073-06de-1de4eb20be90
f2d84ddd · fbarchard@google.com · f368565b · f2d84ddd · f2d84ddd · f2d84ddd
Commit f2d84ddd authored May 14, 2012 by fbarchard@google.com
Showing with 7 additions and 29 deletions

README.chromium README.chromium +1 -1

version.h include/libyuv/version.h +1 -1

row.h source/row.h +2 -0

row_posix.cc source/row_posix.cc +3 -27

scale.cc source/scale.cc +0 -0

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 264
+Version: 265
 License: BSD
 License File: LICENSE

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,7 +11,7 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 264
+#define LIBYUV_VERSION 265
 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/row.h
+++ b/source/row.h
@@ -99,12 +99,14 @@ extern "C" {
 typedef __declspec(align(16)) int8 vec8[16];
 typedef __declspec(align(16)) uint8 uvec8[16];
 typedef __declspec(align(16)) int16 vec16[8];
+typedef __declspec(align(16)) uint16 uvec16[8];
 typedef __declspec(align(16)) uint32 uvec32[4];
 #else  // __GNUC__
 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
 typedef int8 __attribute__((vector_size(16))) vec8;
 typedef uint8 __attribute__((vector_size(16))) uvec8;
 typedef int16 __attribute__((vector_size(16))) vec16;
+typedef uint16 __attribute__((vector_size(16))) uvec16;
 typedef uint32 __attribute__((vector_size(16))) uvec32;
 #endif

--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -694,13 +694,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
    "movdqa    %1,%%xmm3                       \n"
    "movdqa    %2,%%xmm5                       \n"
  :
-  : "m"(kARGBToU),         // %0
+  : "m"(kARGBToU),  // %0
-    "m"(kARGBToV),         // %1
+    "m"(kARGBToV),  // %1
-    "m"(kAddUV128)         // %2
+    "m"(kAddUV128)  // %2
-  :
-#if defined(__SSE2__)
-    "xmm3", "xmm4", "xmm5"
-#endif
  );
  asm volatile (
    "sub       %1,%2                           \n"
@@ -762,10 +758,6 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
  : "m"(kARGBToU),         // %0
    "m"(kARGBToV),         // %1
    "m"(kAddUV128)         // %2
-  :
-#if defined(__SSE2__)
-    "xmm3", "xmm4", "xmm5"
-#endif
  );
  asm volatile (
    "sub       %1,%2                           \n"
@@ -905,10 +897,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
  : "m"(kBGRAToU),         // %0
    "m"(kBGRAToV),         // %1
    "m"(kAddUV128)         // %2
-  :
-#if defined(__SSE2__)
-    "xmm3", "xmm4", "xmm5"
-#endif
  );
  asm volatile (
    "sub       %1,%2                           \n"
@@ -970,10 +958,6 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
  : "m"(kBGRAToU),         // %0
    "m"(kBGRAToV),         // %1
    "m"(kAddUV128)         // %2
-  :
-#if defined(__SSE2__)
-    "xmm3", "xmm4", "xmm5"
-#endif
  );
  asm volatile (
    "sub       %1,%2                           \n"
@@ -1113,10 +1097,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
  : "m"(kABGRToU),         // %0
    "m"(kABGRToV),         // %1
    "m"(kAddUV128)         // %2
-  :
-#if defined(__SSE2__)
-    "xmm3", "xmm4", "xmm5"
-#endif
  );
  asm volatile (
    "sub       %1,%2                           \n"
@@ -1178,10 +1158,6 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
  : "m"(kABGRToU),         // %0
    "m"(kABGRToV),         // %1
    "m"(kAddUV128)         // %2
-  :
-#if defined(__SSE2__)
-    "xmm3", "xmm4", "xmm5"
-#endif
  );
  asm volatile (
    "sub       %1,%2                           \n"

--- a/source/scale.cc
+++ b/source/scale.cc