port yuv to rgb ssse3 to gcc

BUG=none TEST=media_unittest Review URL: http://webrtc-codereview.appspot.com/269015 git-svn-id: http://libyuv.googlecode.com/svn/trunk@80 16f28f9a-4ce2-e073-06de-1de4eb20be90

port yuv to rgb ssse3 to gcc
BUG=none TEST=media_unittest Review URL: http://webrtc-codereview.appspot.com/269015 git-svn-id: http://libyuv.googlecode.com/svn/trunk@80 16f28f9a-4ce2-e073-06de-1de4eb20be90
228bdc24 · fbarchard@google.com · 4cf70bd6 · 228bdc24 · 228bdc24 · 228bdc24
Commit 228bdc24 authored Nov 15, 2011 by fbarchard@google.com
Showing with 16 additions and 79 deletions

README.chromium README.chromium +1 -1

planar_functions.cc source/planar_functions.cc +1 -37

row.h source/row.h +3 -14

row_posix.cc source/row_posix.cc +0 -0

row_win.cc source/row_win.cc +11 -27

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 79
+Version: 80
 License: BSD
 License File: LICENSE

--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -1136,19 +1136,6 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
  } else
-#endif
-#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      (width % 4 == 0) &&
-      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
-    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow4_SSE2;
-  } else
-#endif
-#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      (width % 2 == 0)) {
-    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
-  } else
 #endif
  {
    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
@@ -1188,12 +1175,6 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
    FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSSE3;
  } else
-#endif
-#if defined(HAS_FASTCONVERTYUVTOBGRAROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      (width % 2 == 0)) {
-    FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_SSE2;
-  } else
 #endif
  {
    FastConvertYUVToBGRARow = FastConvertYUVToBGRARow_C;
@@ -1233,12 +1214,6 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
    FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSSE3;
  } else
-#endif
-#if defined(HAS_FASTCONVERTYUVTOABGRROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      (width % 2 == 0)) {
-    FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_SSE2;
-  } else
 #endif
  {
    FastConvertYUVToABGRRow = FastConvertYUVToABGRRow_C;
@@ -1278,12 +1253,6 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSSE3;
  } else
-#endif
-#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) &&
-      (width % 2 == 0)) {
-    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_SSE2;
-  } else
 #endif
  {
    FastConvertYUVToARGBRow = FastConvertYUVToARGBRow_C;
@@ -1321,11 +1290,6 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
    FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSSE3;
  } else
-#endif
-#if defined(HAS_FASTCONVERTYUVTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_SSE2;
-  } else
 #endif
  {
    FastConvertYUV444ToARGBRow = FastConvertYUV444ToARGBRow_C;
@@ -1354,7 +1318,7 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
                                 uint8* rgb_buf,
                                 int width);
 #if defined(HAS_FASTCONVERTYTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSSE3) &&
+  if (TestCpuFlag(kCpuHasSSE2) &&
      (width % 8 == 0) &&
      IS_ALIGNED(dst_argb, 16) && (dst_stride_argb % 16 == 0)) {
    FastConvertYToARGBRow = FastConvertYToARGBRow_SSE2;

--- a/source/row.h
+++ b/source/row.h
@@ -37,28 +37,17 @@
 #define HAS_BGRATOUVROW_SSSE3
 #define HAS_ABGRTOUVROW_SSSE3
 #define HAS_I400TOARGBROW_SSE2
-#endif
-// The following are available on Linux (32/64 bit)
-// TODO(fbarchard): enable for fpic on linux
-#if (defined(__x86_64__) || \
-    (defined(__i386__) && !defined(__pic__))) && \
-    !defined(LIBYUV_DISABLE_ASM)
-#define HAS_FASTCONVERTYUVTOARGBROW_SSE2
-#define HAS_FASTCONVERTYUVTOBGRAROW_SSE2
-#define HAS_FASTCONVERTYUVTOABGRROW_SSE2
-#define HAS_FASTCONVERTYUV444TOARGBROW_SSE2
 #define HAS_FASTCONVERTYTOARGBROW_SSE2
 #endif
-// The following are available on Windows
+// The following are available on all x86 platforms except 32 bit OSX
-#if defined(WIN32) && \
+#if (defined(WIN32) || defined(__x86_64__) || \
+    (defined(__i386__) && !defined(__APPLE__))) && \
    !defined(LIBYUV_DISABLE_ASM)
 #define HAS_FASTCONVERTYUVTOARGBROW_SSSE3
 #define HAS_FASTCONVERTYUVTOBGRAROW_SSSE3
 #define HAS_FASTCONVERTYUVTOABGRROW_SSSE3
 #define HAS_FASTCONVERTYUV444TOARGBROW_SSSE3
-#define HAS_FASTCONVERTYTOARGBROW_SSE2
 #endif
 extern "C" {

--- a/source/row_posix.cc
+++ b/source/row_posix.cc
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -54,8 +54,7 @@ static const vec8 kABGRToV = {
 };
 static const uvec8 kAddY16 = {
-  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
+  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u
-  16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
 };
 static const uvec8 kAddUV128 = {
@@ -548,27 +547,13 @@ static const vec8 kUVToG = {
  UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
 };
-static const vec16 kYToRgb = {
+static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
-  YG, YG, YG, YG, YG, YG, YG, YG
+static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
-};
+static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
+static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
-static const vec16 kYSub16 = {
+static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
-  16, 16, 16, 16, 16, 16, 16, 16
-};
-static const vec16 kUVBiasB = {
-  BB, BB, BB, BB, BB, BB, BB, BB
-};
-static const vec16 kUVBiasG = {
-  BG, BG, BG, BG, BG, BG, BG, BG
-};
-static const vec16 kUVBiasR = {
-  BR, BR, BR, BR, BR, BR, BR, BR
-};
-#define YUVTORGB_SSSE3 __asm {                                                 \
+#define YUVTORGB __asm {                                                 \
    /* Step 1: Find 4 UV contributions to 8 R,G,B values */                    \
    __asm movd       xmm0, [esi]          /* U */                              \
    __asm movd       xmm1, [esi + edi]    /* V */                              \
@@ -619,7 +604,7 @@ void FastConvertYUVToARGBRow_SSSE3(const uint8* y_buf,
    pxor       xmm4, xmm4
 convertloop:
-    YUVTORGB_SSSE3
+    YUVTORGB
    // Step 3: Weave into ARGB
    punpcklbw  xmm0, xmm1           // BG
@@ -658,7 +643,7 @@ void FastConvertYUVToBGRARow_SSSE3(const uint8* y_buf,
    pxor       xmm4, xmm4
 convertloop:
-    YUVTORGB_SSSE3
+    YUVTORGB
    // Step 3: Weave into BGRA
    pcmpeqb    xmm5, xmm5           // generate 0xffffffff for alpha
@@ -699,7 +684,7 @@ void FastConvertYUVToABGRRow_SSSE3(const uint8* y_buf,
    pxor       xmm4, xmm4
 convertloop:
-    YUVTORGB_SSSE3
+    YUVTORGB
    // Step 3: Weave into ARGB
    punpcklbw  xmm2, xmm1           // RG
@@ -787,7 +772,6 @@ void FastConvertYUV444ToARGBRow_SSSE3(const uint8* y_buf,
 #endif
 #ifdef HAS_FASTCONVERTYTOARGBROW_SSE2
 __declspec(naked)
 void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
                                uint8* rgb_buf,
@@ -829,8 +813,8 @@ void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
    ret
  }
 }
 #endif
 #endif
 }  // extern "C"