Expose ARGB rotation using scale low levels

BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/675004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@292 16f28f9a-4ce2-e073-06de-1de4eb20be90

Expose ARGB rotation using scale low levels
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/675004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@292 16f28f9a-4ce2-e073-06de-1de4eb20be90
27d42c7f · fbarchard@google.com · 8f439eac · 27d42c7f · 27d42c7f · 27d42c7f
Commit 27d42c7f authored Jun 22, 2012 by fbarchard@google.com
11 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 291
+Version: 292
 License: BSD
 License File: LICENSE


--- a/include/libyuv.h
+++ b/include/libyuv.h
@@ -19,6 +19,7 @@
 #include "libyuv/format_conversion.h"
 #include "libyuv/planar_functions.h"
 #include "libyuv/rotate.h"
+#include "libyuv/rotate_argb.h"
 #include "libyuv/scale.h"
 #include "libyuv/scale_argb.h"
 #include "libyuv/version.h"

--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -47,6 +47,11 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

+// ARGB mirror.
+int ARGBMirror(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
 // Convert NV12 to ARGB.
 int NV12ToARGB(const uint8* src_y, int src_stride_y,
               const uint8* src_uv, int src_stride_uv,

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,7 +11,7 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 291
+#define LIBYUV_VERSION 292

 #endif  // INCLUDE_LIBYUV_VERSION_H_

--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -139,6 +139,41 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// ARGB mirror.
+int ARGBMirror(const uint8* src_argb, int src_stride_argb,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  if (!src_argb ||
+      !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    src_argb = src_argb + (height - 1) * src_stride_argb;
+    src_stride_argb = -src_stride_argb;
+  }
+
+  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
+      ARGBMirrorRow_C;
+#if defined(HAS_ARGBMIRRORROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
+  }
+#endif
+
+  // Mirror plane
+  for (int y = 0; y < height; ++y) {
+    ARGBMirrorRow(src_argb, dst_argb, width);
+    src_argb += src_stride_argb;
+    dst_argb += dst_stride_argb;
+  }
+  return 0;
+}
+
 // Copy ARGB with optional flipping
 int ARGBCopy(const uint8* src_argb, int src_stride_argb,
             uint8* dst_argb, int dst_stride_argb,
@@ -182,9 +217,9 @@ ARGBBlendRow GetARGBBlend() {

 // Alpha Blend 2 ARGB images and store to destination.
 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
-               const uint8* src_argb1, int src_stride_argb1,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
+              const uint8* src_argb1, int src_stride_argb1,
+              uint8* dst_argb, int dst_stride_argb,
+              int width, int height) {
  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
    return -1;
  }

--- a/source/row.h
+++ b/source/row.h
@@ -64,6 +64,7 @@ extern "C" {
 #define HAS_I400TOARGBROW_SSE2
 #define HAS_MIRRORROW_SSSE3
 #define HAS_MIRRORROWUV_SSSE3
+#define HAS_ARGBMIRRORROW_SSSE3
 #define HAS_RAWTOARGBROW_SSSE3
 #define HAS_RGB24TOARGBROW_SSSE3
 #define HAS_RGB565TOARGBROW_SSE2
@@ -183,6 +184,9 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
 void MirrorRowUV_NEON(const uint8* src, uint8* dst_u, uint8* dst_v, int width);
 void MirrorRowUV_C(const uint8* src, uint8* dst_u, uint8* dst_v, int width);

+void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
+void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
+
 void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
 void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
 void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);

--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -587,6 +587,20 @@ void MirrorRowUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
  }
 }

+void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
+  const uint32* src32 = reinterpret_cast<const uint32*>(src);
+  uint32* dst32 = reinterpret_cast<uint32*>(dst);
+  src32 += width - 1;
+  for (int x = 0; x < width - 1; x += 2) {
+    dst32[x] = src32[0];
+    dst32[x + 1] = src32[-1];
+    src32 -= 2;
+  }
+  if (width & 1) {
+    dst32[width - 1] = src32[0];
+  }
+}
+
 void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
  for (int x = 0; x < width - 1; x += 2) {
    dst_u[x] = src_uv[0];

--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -1977,6 +1977,37 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
 }
 #endif  // HAS_MIRRORROW_UV_SSSE3

+#ifdef HAS_ARGBMIRRORROW_SSSE3
+// Shuffle table for reversing the bytes.
+CONST uvec8 kShuffleMirror = {
+  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
+};
+
+void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+  intptr_t temp_width = static_cast<intptr_t>(width);
+  asm volatile (
+    "movdqa    %3,%%xmm5                       \n"
+    "lea       -0x10(%0),%0                    \n"
+    ".p2align  4                               \n"
+  "1:                                          \n"
+    "movdqa    (%0,%2,4),%%xmm0                \n"
+    "pshufb    %%xmm5,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0,(%1)                     \n"
+    "lea       0x10(%1),%1                     \n"
+    "jg        1b                              \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(temp_width)  // %2
+  : "m"(kShuffleMirror) // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm5"
+#endif
+  );
+}
+#endif  // HAS_ARGBMIRRORROW_SSSE3
+
 #ifdef HAS_SPLITUV_SSE2
 void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
  asm volatile (

--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -2059,6 +2059,35 @@ void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
 }
 #endif  // HAS_MIRRORROW_UV_SSSE3

+#ifdef HAS_ARGBMIRRORROW_SSSE3
+
+// Shuffle table for reversing the bytes.
+static const uvec8 kARGBShuffleMirror = {
+  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u
+};
+
+__declspec(naked) __declspec(align(16))
+void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
+__asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    movdqa    xmm5, kARGBShuffleMirror
+    lea       eax, [eax - 16]
+
+    align      16
+ convertloop:
+    movdqa    xmm0, [eax + ecx * 4]
+    pshufb    xmm0, xmm5
+    sub       ecx, 4
+    movdqa    [edx], xmm0
+    lea       edx, [edx + 16]
+    jg        convertloop
+    ret
+  }
+}
+#endif  // HAS_ARGBMIRRORROW_SSSE3
+
 #ifdef HAS_SPLITUV_SSE2
 __declspec(naked) __declspec(align(16))
 void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {

--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -97,9 +97,9 @@ static void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
 // Reads 4 pixels at a time.
 // Alignment requirement: dst_ptr 16 byte aligned.
 __declspec(naked) __declspec(align(16))
-static void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
-                                      int src_stepx,
-                                      uint8* dst_ptr, int dst_width) {
+void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
+                               int src_stepx,
+                               uint8* dst_ptr, int dst_width) {
  __asm {
    push       ebx
    push       edi
@@ -414,9 +414,9 @@ static void ScaleARGBRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
 #define HAS_SCALEARGBROWDOWNEVEN_SSE2
 // Reads 4 pixels at a time.
 // Alignment requirement: dst_ptr 16 byte aligned.
-static void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
-                                      int src_stepx,
-                                      uint8* dst_ptr, int dst_width) {
+void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
+                               int src_stepx,
+                               uint8* dst_ptr, int dst_width) {
  intptr_t src_stepx_x4 = static_cast<intptr_t>(src_stepx);
  intptr_t src_stepx_x12 = 0;
  asm volatile (
@@ -669,9 +669,9 @@ static void ScaleARGBRowDown2Int_C(const uint8* src_ptr, int src_stride,
  }
 }

-static void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
-                                   int src_stepx,
-                                   uint8* dst_ptr, int dst_width) {
+void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
+                            int src_stepx,
+                            uint8* dst_ptr, int dst_width) {
  const uint32* src = reinterpret_cast<const uint32*>(src_ptr);
  uint32* dst = reinterpret_cast<uint32*>(dst_ptr);


--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -218,9 +218,9 @@ TESTATOPLANAR(RGB24, 3, I420, 2, 2)
 TESTATOPLANAR(RGB565, 2, I420, 2, 2)
 TESTATOPLANAR(ARGB1555, 2, I420, 2, 2)
 TESTATOPLANAR(ARGB4444, 2, I420, 2, 2)
-//  TESTATOPLANAR(ARGB, 4, I411, 4, 1)
+// TESTATOPLANAR(ARGB, 4, I411, 4, 1)
 TESTATOPLANAR(ARGB, 4, I422, 2, 1)
-//  TESTATOPLANAR(ARGB, 4, I444, 1, 1)
+// TESTATOPLANAR(ARGB, 4, I444, 1, 1)
 // TODO(fbarchard): Implement and test 411 and 444

 #define TESTATOB(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B)                         \
@@ -625,4 +625,27 @@ TEST_F(libyuvTest, TestARGBQuantize) {
  }
 }

+TEST_F(libyuvTest, TestARGBMirror) {
+  SIMD_ALIGNED(uint8 orig_pixels[256][4]);
+  SIMD_ALIGNED(uint8 dst_pixels[256][4]);
+
+  for (int i = 0; i < 256; ++i) {
+    orig_pixels[i][0] = i;
+    orig_pixels[i][1] = i / 2;
+    orig_pixels[i][2] = i / 3;
+    orig_pixels[i][3] = i / 4;
+  }
+  ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 256, 1);
+
+  for (int i = 0; i < 256; ++i) {
+    EXPECT_EQ(i, dst_pixels[255 - i][0]);
+    EXPECT_EQ(i / 2, dst_pixels[255 - i][1]);
+    EXPECT_EQ(i / 3, dst_pixels[255 - i][2]);
+    EXPECT_EQ(i / 4, dst_pixels[255 - i][3]);
+  }
+  for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
+    ARGBMirror(&orig_pixels[0][0], 0, &dst_pixels[0][0], 0, 256, 1);
+  }
+}
+
 }  // namespace libyuv