Commit 228bdc24 authored by fbarchard@google.com's avatar fbarchard@google.com

port yuv to rgb ssse3 to gcc

BUG=none
TEST=media_unittest
Review URL: http://webrtc-codereview.appspot.com/269015

git-svn-id: http://libyuv.googlecode.com/svn/trunk@80 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4cf70bd6
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 79 Version: 80
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -1136,19 +1136,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1136,19 +1136,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 4 == 0) &&
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow4_SSE2;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
} else
#endif #endif
{ {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
...@@ -1188,12 +1175,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y, ...@@ -1188,12 +1175,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSSE3; FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSSE3;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
} else
#endif #endif
{ {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C; FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
...@@ -1233,12 +1214,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y, ...@@ -1233,12 +1214,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSSE3; FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSSE3;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOABGRROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
} else
#endif #endif
{ {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C; FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
...@@ -1278,12 +1253,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1278,12 +1253,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
} else
#endif #endif
{ {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C; FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
...@@ -1321,11 +1290,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y, ...@@ -1321,11 +1290,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSSE3; FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSSE3;
} else } else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
} else
#endif #endif
{ {
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C; FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
...@@ -1354,7 +1318,7 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, ...@@ -1354,7 +1318,7 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#if defined(HAS_FASTCONVERTYTOARGBROW_SSE2) #if defined(HAS_FASTCONVERTYTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSSE3) && if (TestCpuFlag(kCpuHasSSE2) &&
(width % 8 == 0) && (width % 8 == 0) &&
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) { IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2; FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;
......
...@@ -37,28 +37,17 @@ ...@@ -37,28 +37,17 @@
#define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3
#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOUVROW_SSSE3
#define HAS_I400TOARGBROW_SSE2 #define HAS_I400TOARGBROW_SSE2
#endif
// The following are available on Linux (32/64 bit)
// TODO(fbarchard): enable for fpic on linux
#if (defined(__x86_64__) || \
(defined(__i386__) && !defined(__pic__))) && \
!defined(LIBYUV_DISABLE_ASM)
#define HAS_FASTCONVERTYUVTOARGBROW_SSE2
#define HAS_FASTCONVERTYUVTOBGRAROW_SSE2
#define HAS_FASTCONVERTYUVTOABGRROW_SSE2
#define HAS_FASTCONVERTYUV444TOARGBROW_SSE2
#define HAS_FASTCONVERTYTOARGBROW_SSE2 #define HAS_FASTCONVERTYTOARGBROW_SSE2
#endif #endif
// The following are available on Windows // The following are available on all x86 platforms except 32 bit OSX
#if defined(WIN32) && \ #if (defined(WIN32) || defined(__x86_64__) || \
(defined(__i386__) && !defined(__APPLE__))) && \
!defined(LIBYUV_DISABLE_ASM) !defined(LIBYUV_DISABLE_ASM)
#define HAS_FASTCONVERTYUVTOARGBROW_SSSE3 #define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3 #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3 #define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_FASTCONVERTYTOARGBROW_SSE2
#endif #endif
extern "C" { extern "C" {
......
This diff is collapsed.
...@@ -54,8 +54,7 @@ static const vec8 kABGRToV = { ...@@ -54,8 +54,7 @@ static const vec8 kABGRToV = {
}; };
static const uvec8 kAddY16 = { static const uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
}; };
static const uvec8 kAddUV128 = { static const uvec8 kAddUV128 = {
...@@ -548,27 +547,13 @@ static const vec8 kUVToG = { ...@@ -548,27 +547,13 @@ static const vec8 kUVToG = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
}; };
static const vec16 kYToRgb = { static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
YG, YG, YG, YG, YG, YG, YG, YG static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
}; static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kYSub16 = { static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
16, 16, 16, 16, 16, 16, 16, 16
};
static const vec16 kUVBiasB = {
BB, BB, BB, BB, BB, BB, BB, BB
};
static const vec16 kUVBiasG = {
BG, BG, BG, BG, BG, BG, BG, BG
};
static const vec16 kUVBiasR = {
BR, BR, BR, BR, BR, BR, BR, BR
};
#define YUVTORGB_SSSE3 __asm { \ #define YUVTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movd xmm0, [esi] /* U */ \ __asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \ __asm movd xmm1, [esi + edi] /* V */ \
...@@ -619,7 +604,7 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf, ...@@ -619,7 +604,7 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
pxor xmm4, xmm4 pxor xmm4, xmm4
convertloop: convertloop:
YUVTORGB_SSSE3 YUVTORGB
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG punpcklbw xmm0, xmm1 // BG
...@@ -658,7 +643,7 @@ void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf, ...@@ -658,7 +643,7 @@ void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
pxor xmm4, xmm4 pxor xmm4, xmm4
convertloop: convertloop:
YUVTORGB_SSSE3 YUVTORGB
// Step 3: Weave into BGRA // Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
...@@ -699,7 +684,7 @@ void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf, ...@@ -699,7 +684,7 @@ void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
pxor xmm4, xmm4 pxor xmm4, xmm4
convertloop: convertloop:
YUVTORGB_SSSE3 YUVTORGB
// Step 3: Weave into ARGB // Step 3: Weave into ARGB
punpcklbw xmm2, xmm1 // RG punpcklbw xmm2, xmm1 // RG
...@@ -787,7 +772,6 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -787,7 +772,6 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
#endif #endif
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2 #ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
__declspec(naked) __declspec(naked)
void FastConvertYToARGBRow_SSE2(const uint8* y_buf, void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
...@@ -829,8 +813,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf, ...@@ -829,8 +813,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
ret ret
} }
} }
#endif #endif
#endif #endif
} // extern "C" } // extern "C"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment