pass parameter for yuv conversion

BUG=267 TEST=Luma R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/3169005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@834 16f28f9a-4ce2-e073-06de-1de4eb20be90

pass parameter for yuv conversion
BUG=267 TEST=Luma R=tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/3169005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@834 16f28f9a-4ce2-e073-06de-1de4eb20be90
11a0d48e · fbarchard@google.com · 15df877b · 11a0d48e · 11a0d48e · 11a0d48e
Commit 11a0d48e authored Oct 31, 2013 by fbarchard@google.com
8 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 833
+Version: 834
 License: BSD
 License File: LICENSE

--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -1613,12 +1613,11 @@ void ARGBPolynomialRow_AVX2(const uint8* src_argb,
                            uint8* dst_argb, const float* poly,
                            int width);
-void ARGBLumaColorTableRow_C(const uint8* src_argb,
+void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
-                             uint8* dst_argb, const uint8* luma,
+                             const uint8* luma, const uint32 lumacoeff);
-                             int width);
+void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
+                                 int width, const uint8* luma,
-                                 uint8* dst_argb, const uint8* luma,
+                                 const uint32 lumacoeff);
-                                 int width);
 // Divide num by div and return as 16.16 fixed point result.
 int FixedDiv_C(int num, int div);

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 833
+#define LIBYUV_VERSION 834
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/cpu_id.cc
+++ b/source/cpu_id.cc
@@ -75,7 +75,7 @@ int TestOsSaveYmm() {
    mov        xcr0, eax
  }
 #elif defined(__i386__) || defined(__x86_64__)
-  asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx" );
+  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
 #endif  // defined(_MSC_VER)
  return((xcr0 & 6) == 6);  // Is ymm saved?
 }

--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -2074,16 +2074,16 @@ int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
    height = 1;
    src_stride_argb = dst_stride_argb = 0;
  }
-  void (*ARGBLumaColorTableRow)(const uint8* src_argb,
+  void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
-                                uint8* dst_argb, const uint8* luma,
+      int width, const uint8* luma, const uint32 lumacoeff) =
-                                int width) = ARGBLumaColorTableRow_C;
+      ARGBLumaColorTableRow_C;
 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
    ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
  }
 #endif
  for (int y = 0; y < height; ++y) {
-    ARGBLumaColorTableRow(src_argb, dst_argb, luma, width);
+    ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
    src_argb += src_stride_argb;
    dst_argb += dst_stride_argb;
  }

--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2082,30 +2082,22 @@ void ARGBPolynomialRow_C(const uint8* src_argb,
  }
 }
-// RGB to Luminance.
+void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
-// Leverage the fact that we want shifted left by 8 by the caller.
+                             const uint8* luma, const uint32 lumacoeff) {
-//
+  uint32 bc = lumacoeff & 0xff;
-// Borrowed from libyuv/files/source/row_common.cc.
+  uint32 gc = (lumacoeff >> 8) & 0xff;
-// JPeg 7 bit Y:
+  uint32 rc = (lumacoeff >> 16) & 0xff;
-// b 0.11400 * 128 = 14.592 = 15
-// g 0.58700 * 128 = 75.136 = 75
-// r 0.29900 * 128 = 38.272 = 38
-static __inline unsigned int RGBToYJx256(uint8 r, uint8 g, uint8 b) {
-  return (38u * r + 75u * g + 15u * b) & 0x7F00u;
-}
-void ARGBLumaColorTableRow_C(const uint8* src_argb,
-                             uint8* dst_argb, const uint8* luma,
-                             int width) {
  for (int i = 0; i < width - 1; i += 2) {
    // Luminance in rows, color values in columns.
-    const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) +
+    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
-      luma;
+                           src_argb[2] * rc) & 0x7F00u) + luma;
    dst_argb[0] = luma0[src_argb[0]];
    dst_argb[1] = luma0[src_argb[1]];
    dst_argb[2] = luma0[src_argb[2]];
    dst_argb[3] = src_argb[3];
-    const uint8* luma1 = RGBToYJx256(src_argb[6], src_argb[5], src_argb[4]) +
+    const uint8* luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
-      luma;
+                           src_argb[6] * rc) & 0x7F00u) + luma;
    dst_argb[4] = luma1[src_argb[4]];
    dst_argb[5] = luma1[src_argb[5]];
    dst_argb[6] = luma1[src_argb[6]];
@@ -2115,8 +2107,8 @@ void ARGBLumaColorTableRow_C(const uint8* src_argb,
  }
  if (width & 1) {
    // Luminance in rows, color values in columns.
-    const uint8* luma0 = RGBToYJx256(src_argb[2], src_argb[1], src_argb[0]) +
+    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
-      luma;
+                           src_argb[2] * rc) & 0x7F00u) + luma;
    dst_argb[0] = luma0[src_argb[0]];
    dst_argb[1] = luma0[src_argb[1]];
    dst_argb[2] = luma0[src_argb[2]];

--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -4477,14 +4477,10 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
                              const int8* matrix_argb, int width) {
  asm volatile (
-    "movd      " MEMACCESS(3) ",%%xmm2         \n"
+    "pshufd    $0x00," MEMACCESS(3) ",%%xmm2   \n"
-    "movd      " MEMACCESS2(0x4,3) ",%%xmm3    \n"
+    "pshufd    $0x55," MEMACCESS(3) ",%%xmm3   \n"
-    "movd      " MEMACCESS2(0x8,3) ",%%xmm4    \n"
+    "pshufd    $0xaa," MEMACCESS(3) ",%%xmm4   \n"
-    "movd      " MEMACCESS2(0xc,3) ",%%xmm5    \n"
+    "pshufd    $0xff," MEMACCESS(3) ",%%xmm5   \n"
-    "pshufd    $0x0,%%xmm2,%%xmm2              \n"
-    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
-    "pshufd    $0x0,%%xmm4,%%xmm4              \n"
-    "pshufd    $0x0,%%xmm5,%%xmm5              \n"
    // 8 pixel loop.
    ".p2align  4                               \n"
@@ -6361,17 +6357,15 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
 }
 #endif  // HAS_RGBCOLORTABLEROW_X86
-// TODO(fbarchard): Ensure this works with minimal number of registers/gcc32.
 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
 // Tranform RGB pixels with luma table.
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
+void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                 uint8* dst_argb, const uint8* luma,
+                                 int width,
-                                 int width) {
+                                 const uint8* luma, uint32 lumacoeff) {
  uintptr_t pixel_temp = 0u;
  uintptr_t table_temp = 0u;
  asm volatile (
-    "mov       $0x264b0f,%%edx                 \n"
+    "movd      %6,%%xmm3                       \n"
-    "movd      %%edx,%%xmm3                    \n"
    "pshufd    $0x0,%%xmm3,%%xmm3              \n"
    "pcmpeqb   %%xmm4,%%xmm4                   \n"
    "psllw     $0x8,%%xmm4                     \n"
@@ -6456,7 +6450,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
    "+r"(src_argb),    // %2
    "+r"(dst_argb),    // %3
    "+rm"(width)       // %4
-  : "r"(luma)          // %5
+  : "r"(luma),         // %5
+    "rm"(lumacoeff)    // %6
  : "memory", "cc"
 #if defined(__SSE2__)
    , "xmm0", "xmm3", "xmm4", "xmm5"

--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -5146,17 +5146,13 @@ void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
    mov        eax, [esp + 4]   /* src_argb */
    mov        edx, [esp + 8]   /* dst_argb */
    mov        ecx, [esp + 12]  /* matrix_argb */
-    movd       xmm2, [ecx]
+    pshufd     xmm2, [ecx], 0x00
-    movd       xmm3, [ecx + 4]
+    pshufd     xmm3, [ecx], 0x55
-    movd       xmm4, [ecx + 8]
+    pshufd     xmm4, [ecx], 0xaa
-    movd       xmm5, [ecx + 12]
+    pshufd     xmm5, [ecx], 0xff
-    pshufd     xmm2, xmm2, 0
-    pshufd     xmm3, xmm3, 0
-    pshufd     xmm4, xmm4, 0
-    pshufd     xmm5, xmm5, 0
    mov        ecx, [esp + 16]  /* width */
-    align      16
+    align      4
 convertloop:
    movdqa     xmm0, [eax]  // B
    movdqa     xmm7, [eax + 16]
@@ -7142,19 +7138,18 @@ void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
 // Tranform RGB pixels with luma table.
 __declspec(naked) __declspec(align(16))
-void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
+void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
-                                 uint8* dst_argb, const uint8* luma,
+                                 int width,
-                                 int width) {
+                                 const uint8* luma, uint32 lumacoeff) {
  __asm {
    push       esi
    push       edi
    mov        eax, [esp + 8 + 4]   /* src_argb */
    mov        edi, [esp + 8 + 8]   /* dst_argb */
-    movd       xmm2, dword ptr [esp + 8 + 12]  /* table_argb */
+    mov        ecx, [esp + 8 + 12]  /* width */
+    movd       xmm2, dword ptr [esp + 8 + 16]  // luma table
+    movd       xmm3, dword ptr [esp + 8 + 20]  // lumacoeff
    pshufd     xmm2, xmm2, 0
-    mov        ecx, [esp + 8 + 16]  /* width */
-    mov        edx, 0x00264b0f  // kARGBToYJ
-    movd       xmm3, edx
    pshufd     xmm3, xmm3, 0
    pcmpeqb    xmm4, xmm4        // generate mask 0xff00ff00
    psllw      xmm4, 8