Commit f2d84ddd authored by fbarchard@google.com's avatar fbarchard@google.com

scale ported to OSX for 32/64 bit. Required breaking up some functions into 2…

scale ported to OSX for 32/64 bit.  Required breaking up some functions into 2 or 3 asm inlines to set constants.  Reworked storing of 6 bytes on all platforms for 3/4 scale by storing first 4 bytes and then storing last 4 bytes, overlapping 2, in order to stay within SSE2 registers.
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/582004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@266 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f368565b
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 264 Version: 265
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 264 #define LIBYUV_VERSION 265
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -99,12 +99,14 @@ extern "C" { ...@@ -99,12 +99,14 @@ extern "C" {
typedef __declspec(align(16)) int8 vec8[16]; typedef __declspec(align(16)) int8 vec8[16];
typedef __declspec(align(16)) uint8 uvec8[16]; typedef __declspec(align(16)) uint8 uvec8[16];
typedef __declspec(align(16)) int16 vec16[8]; typedef __declspec(align(16)) int16 vec16[8];
typedef __declspec(align(16)) uint16 uvec16[8];
typedef __declspec(align(16)) uint32 uvec32[4]; typedef __declspec(align(16)) uint32 uvec32[4];
#else // __GNUC__ #else // __GNUC__
#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
typedef int8 __attribute__((vector_size(16))) vec8; typedef int8 __attribute__((vector_size(16))) vec8;
typedef uint8 __attribute__((vector_size(16))) uvec8; typedef uint8 __attribute__((vector_size(16))) uvec8;
typedef int16 __attribute__((vector_size(16))) vec16; typedef int16 __attribute__((vector_size(16))) vec16;
typedef uint16 __attribute__((vector_size(16))) uvec16;
typedef uint32 __attribute__((vector_size(16))) uvec32; typedef uint32 __attribute__((vector_size(16))) uvec32;
#endif #endif
......
...@@ -694,13 +694,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -694,13 +694,9 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
"movdqa %1,%%xmm3 \n" "movdqa %1,%%xmm3 \n"
"movdqa %2,%%xmm5 \n" "movdqa %2,%%xmm5 \n"
: :
: "m"(kARGBToU), // %0 : "m"(kARGBToU), // %0
"m"(kARGBToV), // %1 "m"(kARGBToV), // %1
"m"(kAddUV128) // %2 "m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
...@@ -762,10 +758,6 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, ...@@ -762,10 +758,6 @@ void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
: "m"(kARGBToU), // %0 : "m"(kARGBToU), // %0
"m"(kARGBToV), // %1 "m"(kARGBToV), // %1
"m"(kAddUV128) // %2 "m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
...@@ -905,10 +897,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, ...@@ -905,10 +897,6 @@ void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
: "m"(kBGRAToU), // %0 : "m"(kBGRAToU), // %0
"m"(kBGRAToV), // %1 "m"(kBGRAToV), // %1
"m"(kAddUV128) // %2 "m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
...@@ -970,10 +958,6 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra, ...@@ -970,10 +958,6 @@ void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra,
: "m"(kBGRAToU), // %0 : "m"(kBGRAToU), // %0
"m"(kBGRAToV), // %1 "m"(kBGRAToV), // %1
"m"(kAddUV128) // %2 "m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
...@@ -1113,10 +1097,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, ...@@ -1113,10 +1097,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
: "m"(kABGRToU), // %0 : "m"(kABGRToU), // %0
"m"(kABGRToV), // %1 "m"(kABGRToV), // %1
"m"(kAddUV128) // %2 "m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
...@@ -1178,10 +1158,6 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr, ...@@ -1178,10 +1158,6 @@ void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr,
: "m"(kABGRToU), // %0 : "m"(kABGRToU), // %0
"m"(kABGRToV), // %1 "m"(kABGRToV), // %1
"m"(kAddUV128) // %2 "m"(kAddUV128) // %2
:
#if defined(__SSE2__)
"xmm3", "xmm4", "xmm5"
#endif
); );
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment