Mirror

BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/1162005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@592 16f28f9a-4ce2-e073-06de-1de4eb20be90

Mirror
BUG=none TEST=none Review URL: https://webrtc-codereview.appspot.com/1162005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@592 16f28f9a-4ce2-e073-06de-1de4eb20be90
2007dca6 · fbarchard@google.com · af08b9d1 · 2007dca6 · 2007dca6 · 2007dca6
Commit 2007dca6 authored Mar 05, 2013 by fbarchard@google.com
Showing with 68 additions and 2 deletions

row.h include/libyuv/row.h +2 -0

planar_functions.cc source/planar_functions.cc +12 -0

rotate.cc source/rotate.cc +18 -0

rotate_argb.cc source/rotate_argb.cc +6 -0

row_win.cc source/row_win.cc +30 -2

No files found.
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -75,6 +75,7 @@ extern "C" {
 #define HAS_MERGEUVROW_SSE2
 #define HAS_MIRRORROW_SSSE3
 #define HAS_MIRRORUVROW_SSSE3
+#define HAS_MIRRORROW_AVX2
 #define HAS_NV12TOARGBROW_SSSE3
 #define HAS_NV12TORGB565ROW_SSSE3
 #define HAS_NV21TOARGBROW_SSSE3
@@ -557,6 +558,7 @@ void ARGBToUV422Row_C(const uint8* src_argb,
 void ARGBToUV411Row_C(const uint8* src_argb,
                      uint8* dst_u, uint8* dst_v, int width);
+void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);

--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -195,6 +195,13 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
    MirrorRow = MirrorRow_SSSE3;
  }
 #endif
+#if defined(HAS_MIRRORROW_AVX2)
+  bool clear = false;
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
+    clear = true;
+    MirrorRow = MirrorRow_AVX2;
+  }
+#endif
  // Mirror plane
  for (int y = 0; y < height; ++y) {
@@ -202,6 +209,11 @@ void MirrorPlane(const uint8* src_y, int src_stride_y,
    src_y += src_stride_y;
    dst_y += dst_stride_y;
  }
+#if defined(HAS_MIRRORROW_AVX2)
+  if (clear) {
+    __asm vzeroupper;
+  }
+#endif
 }
 // Convert YUY2 to I422.

--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -881,6 +881,13 @@ void RotatePlane180(const uint8* src, int src_stride,
    MirrorRow = MirrorRow_SSSE3;
  }
 #endif
+#if defined(HAS_MIRRORROW_AVX2)
+  bool clear = false;
+  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
+    clear = true;
+    MirrorRow = MirrorRow_AVX2;
+  }
+#endif
 #if defined(HAS_MIRRORROW_MIPS_DSPR2)
  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
      IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
@@ -906,6 +913,12 @@ void RotatePlane180(const uint8* src, int src_stride,
    CopyRow = CopyRow_SSE2;
  }
 #endif
+#if defined(HAS_COPYROW_AVX2)
+  // TODO(fbarchard): Detect Fast String support.
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    CopyRow = CopyRow_AVX2;
+  }
+#endif
 #if defined(HAS_COPYROW_MIPS)
  if (TestCpuFlag(kCpuHasMIPS)) {
    CopyRow = CopyRow_MIPS;
@@ -929,6 +942,11 @@ void RotatePlane180(const uint8* src, int src_stride,
    src_bot -= src_stride;
    dst_bot -= dst_stride;
  }
+#if defined(HAS_MIRRORROW_AVX2)
+  if (clear) {
+    __asm vzeroupper;
+  }
+#endif
 }
 static void TransposeUVWx8_C(const uint8* src, int src_stride,

--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -122,6 +122,12 @@ void ARGBRotate180(const uint8* src, int src_stride,
    CopyRow = CopyRow_SSE2;
  }
 #endif
+#if defined(HAS_COPYROW_AVX2)
+  // TODO(fbarchard): Detect Fast String support.
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    CopyRow = CopyRow_AVX2;
+  }
+#endif
 #if defined(HAS_COPYROW_MIPS)
  if (TestCpuFlag(kCpuHasMIPS)) {
    CopyRow = CopyRow_MIPS;

--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -2905,7 +2905,6 @@ void YToARGBRow_SSE2(const uint8* y_buf,
 #endif  // HAS_YTOARGBROW_SSE2
 #ifdef HAS_MIRRORROW_SSSE3
 // Shuffle table for reversing the bytes.
 static const uvec8 kShuffleMirror = {
  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
@@ -2933,6 +2932,36 @@ void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
 }
 #endif  // HAS_MIRRORROW_SSSE3
+#ifdef HAS_MIRRORROW_AVX2
+// Shuffle table for reversing the bytes.
+static const ulvec8 kShuffleMirror_AVX2 = {
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u,
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+__declspec(naked) __declspec(align(16))
+void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) {
+  __asm {
+    mov       eax, [esp + 4]   // src
+    mov       edx, [esp + 8]   // dst
+    mov       ecx, [esp + 12]  // width
+    vmovdqa   ymm5, kShuffleMirror_AVX2
+    lea       eax, [eax - 32]
+    align      16
+ convertloop:
+    vmovdqu   ymm0, [eax + ecx]
+    vpshufb   ymm0, ymm0, ymm5
+    vpermq    ymm0, ymm0, 0x4e  // swap high and low halfs
+    sub       ecx, 32
+    vmovdqu   [edx], ymm0
+    lea       edx, [edx + 32]
+    jg        convertloop
+    ret
+  }
+}
+#endif  // HAS_MIRRORROW_AVX2
 #ifdef HAS_MIRRORROW_SSE2
 // SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
 // version can not.
@@ -3000,7 +3029,6 @@ void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v,
 #endif  // HAS_MIRRORROW_UV_SSSE3
 #ifdef HAS_ARGBMIRRORROW_SSSE3
 // Shuffle table for reversing the bytes.
 static const uvec8 kARGBShuffleMirror = {
  12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u