Commit 228bdc24 authored by fbarchard@google.com's avatar fbarchard@google.com

port yuv to rgb ssse3 to gcc

BUG=none
TEST=media_unittest
Review URL: http://webrtc-codereview.appspot.com/269015

git-svn-id: http://libyuv.googlecode.com/svn/trunk@80 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 4cf70bd6
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 79
Version: 80
License: BSD
License File: LICENSE
......
......@@ -1136,19 +1136,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 4 == 0) &&
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow4_SSE2;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
} else
#endif
{
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
......@@ -1188,12 +1175,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSSE3;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOBGRAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
} else
#endif
{
FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
......@@ -1233,12 +1214,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSSE3;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOABGRROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
} else
#endif
{
FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
......@@ -1278,12 +1253,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 2 == 0)) {
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
} else
#endif
{
FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
......@@ -1321,11 +1290,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSSE3;
} else
#endif
#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
} else
#endif
{
FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
......@@ -1354,7 +1318,7 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* rgb_buf,
int width);
#if defined(HAS_FASTCONVERTYTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSE2) &&
(width % 8 == 0) &&
IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;
......
......@@ -37,28 +37,17 @@
#define HAS_BGRATOUVROW_SSSE3
#define HAS_ABGRTOUVROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#endif
// The following are available on Linux (32/64 bit)
// TODO(fbarchard): enable for fpic on linux
#if (defined(__x86_64__) || \
(defined(__i386__) && !defined(__pic__))) && \
!defined(LIBYUV_DISABLE_ASM)
#define HAS_FASTCONVERTYUVTOARGBROW_SSE2
#define HAS_FASTCONVERTYUVTOBGRAROW_SSE2
#define HAS_FASTCONVERTYUVTOABGRROW_SSE2
#define HAS_FASTCONVERTYUV444TOARGBROW_SSE2
#define HAS_FASTCONVERTYTOARGBROW_SSE2
#endif
// The following are available on Windows
#if defined(WIN32) && \
// The following are available on all x86 platforms except 32 bit OSX
#if (defined(WIN32) || defined(__x86_64__) || \
(defined(__i386__) && !defined(__APPLE__))) && \
!defined(LIBYUV_DISABLE_ASM)
#define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
#define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
#define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
#define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
#define HAS_FASTCONVERTYTOARGBROW_SSE2
#endif
extern "C" {
......
This diff is collapsed.
......@@ -54,8 +54,7 @@ static const vec8 kABGRToV = {
};
static const uvec8 kAddY16 = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
};
static const uvec8 kAddUV128 = {
......@@ -548,27 +547,13 @@ static const vec8 kUVToG = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
};
static const vec16 kYToRgb = {
YG, YG, YG, YG, YG, YG, YG, YG
};
static const vec16 kYSub16 = {
16, 16, 16, 16, 16, 16, 16, 16
};
static const vec16 kUVBiasB = {
BB, BB, BB, BB, BB, BB, BB, BB
};
static const vec16 kUVBiasG = {
BG, BG, BG, BG, BG, BG, BG, BG
};
static const vec16 kUVBiasR = {
BR, BR, BR, BR, BR, BR, BR, BR
};
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
#define YUVTORGB_SSSE3 __asm { \
#define YUVTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \
......@@ -619,7 +604,7 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
pxor xmm4, xmm4
convertloop:
YUVTORGB_SSSE3
YUVTORGB
// Step 3: Weave into ARGB
punpcklbw xmm0, xmm1 // BG
......@@ -658,7 +643,7 @@ void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
pxor xmm4, xmm4
convertloop:
YUVTORGB_SSSE3
YUVTORGB
// Step 3: Weave into BGRA
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
......@@ -699,7 +684,7 @@ void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
pxor xmm4, xmm4
convertloop:
YUVTORGB_SSSE3
YUVTORGB
// Step 3: Weave into ARGB
punpcklbw xmm2, xmm1 // RG
......@@ -787,7 +772,6 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
#endif
#ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
__declspec(naked)
void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
uint8* rgb_buf,
......@@ -829,8 +813,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
ret
}
}
#endif
#endif
} // extern "C"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment