ARGBSobelToPlane which produces a planar output.

BUG=none TEST=none R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2415005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@818 16f28f9a-4ce2-e073-06de-1de4eb20be90

ARGBSobelToPlane which produces a planar output.
BUG=none TEST=none R=ryanpetrie@google.com Review URL: https://webrtc-codereview.appspot.com/2415005 git-svn-id: http://libyuv.googlecode.com/svn/trunk@818 16f28f9a-4ce2-e073-06de-1de4eb20be90
8be4b289 · fbarchard@google.com · e35422d9 · 8be4b289 · 8be4b289 · 8be4b289
Commit 8be4b289 authored Oct 21, 2013 by fbarchard@google.com
10 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 817
+Version: 818
 License: BSD
 License File: LICENSE


--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -402,6 +402,12 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
                uint8* dst_argb, int dst_stride_argb,
                const uint8* shuffler, int width, int height);

+// Sobel ARGB effect with planar output.
+LIBYUV_API
+int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
+                     uint8* dst_y, int dst_stride_y,
+                     int width, int height);
+
 // Sobel ARGB effect.
 LIBYUV_API
 int ARGBSobel(const uint8* src_argb, int src_stride_argb,

--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -59,6 +59,7 @@ extern "C" {
 #define HAS_INTERPOLATEROW_SSE2
 #define HAS_INTERPOLATEROW_SSSE3
 #define HAS_SOBELROW_SSE2
+#define HAS_SOBELTOPLANEROW_SSE2
 #define HAS_SOBELXROW_SSSE3
 #define HAS_SOBELXYROW_SSE2
 #define HAS_SOBELYROW_SSSE3
@@ -313,6 +314,7 @@ extern "C" {
 #define HAS_ARGBSHADEROW_NEON
 #define HAS_ARGBSUBTRACTROW_NEON
 #define HAS_SOBELROW_NEON
+#define HAS_SOBELTOPLANEROW_NEON
 #define HAS_SOBELXYROW_NEON
 #define HAS_SOBELXROW_NEON
 #define HAS_SOBELYROW_NEON
@@ -1578,6 +1580,12 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
                   uint8* dst_argb, int width);
 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
                   uint8* dst_argb, int width);
+void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                       uint8* dst_y, int width);
+void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width);
+void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width);
 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                  uint8* dst_argb, int width);
 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 817
+#define LIBYUV_VERSION 818

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -1861,10 +1861,12 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
 }

 // Sobel ARGB effect.
-LIBYUV_API
-int ARGBSobel(const uint8* src_argb, int src_stride_argb,
+static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
                        uint8* dst_argb, int dst_stride_argb,
-              int width, int height) {
+                        int width, int height,
+                        void (*SobelRow)(const uint8* src_sobelx,
+                                         const uint8* src_sobely,
+                                         uint8* dst, int width)) {
  const int kMaxRow = kMaxStride / 4;
  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
  if (!src_argb  || !dst_argb ||
@@ -1921,20 +1923,6 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
    SobelXRow = SobelXRow_NEON;
  }
 #endif
-  void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
-                   uint8* dst_argb, int width) = SobelRow_C;
-#if defined(HAS_SOBELROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
-      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
-    SobelRow = SobelRow_SSE2;
-  }
-#endif
-#if defined(HAS_SOBELROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
-    SobelRow = SobelRow_NEON;
-  }
-#endif
-
  // 3 rows with edges before/after.
  SIMD_ALIGNED(uint8 row_y[kEdge + kMaxRow * 3]);
  SIMD_ALIGNED(uint8 row_sobelx[kMaxRow]);
@@ -1976,68 +1964,56 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
  return 0;
 }

-// SobelXY ARGB effect.
-// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
+// Sobel ARGB effect.
 LIBYUV_API
-int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
+int ARGBSobel(const uint8* src_argb, int src_stride_argb,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height) {
-  const int kMaxRow = kMaxStride / 4;
-  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
-  if (!src_argb  || !dst_argb ||
-      width <= 0 || height == 0 || width > (kMaxRow - kEdge)) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    src_argb  = src_argb  + (height - 1) * src_stride_argb;
-    src_stride_argb = -src_stride_argb;
-  }
-  // ARGBToBayer used to select G channel from ARGB.
-  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
-                         uint32 selector, int pix) = ARGBToBayerRow_C;
-#if defined(HAS_ARGBTOBAYERROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
-      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
-    }
-  }
-#elif defined(HAS_ARGBTOBAYERROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
-    ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBToBayerRow = ARGBToBayerRow_NEON;
-    }
-  }
-#endif
-  void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
-                    uint8* dst_sobely, int width) = SobelYRow_C;
-#if defined(HAS_SOBELYROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    SobelYRow = SobelYRow_SSSE3;
+  void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
+                   uint8* dst_argb, int width) = SobelRow_C;
+#if defined(HAS_SOBELROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+    SobelRow = SobelRow_SSE2;
  }
 #endif
-#if defined(HAS_SOBELYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    SobelYRow = SobelYRow_NEON;
+#if defined(HAS_SOBELROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
+    SobelRow = SobelRow_NEON;
  }
 #endif
-  void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
-                    const uint8* src_y2, uint8* dst_sobely, int width) =
-      SobelXRow_C;
-#if defined(HAS_SOBELXROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    SobelXRow = SobelXRow_SSSE3;
+  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+                      width, height, SobelRow);
+}
+
+// Sobel ARGB effect with planar output.
+LIBYUV_API
+int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
+                     uint8* dst_y, int dst_stride_y,
+                     int width, int height) {
+  void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_, int width) = SobelToPlaneRow_C;
+#if defined(HAS_SOBELTOPLANEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    SobelToPlaneRow = SobelToPlaneRow_SSE2;
  }
 #endif
-#if defined(HAS_SOBELXROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    SobelXRow = SobelXRow_NEON;
+#if defined(HAS_SOBELTOPLANEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
+    SobelToPlaneRow = SobelToPlaneRow_NEON;
  }
 #endif
+  return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
+                      width, height, SobelToPlaneRow);
+}
+
+// SobelXY ARGB effect.
+// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
+LIBYUV_API
+int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height) {
  void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
                     uint8* dst_argb, int width) = SobelXYRow_C;
 #if defined(HAS_SOBELXYROW_SSE2)
@@ -2051,45 +2027,8 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
    SobelXYRow = SobelXYRow_NEON;
  }
 #endif
-
-  SIMD_ALIGNED(uint8 row_y[kEdge + kMaxRow * 3]);
-  SIMD_ALIGNED(uint8 row_sobelx[kMaxRow]);
-  SIMD_ALIGNED(uint8 row_sobely[kMaxRow]);
-
-  // Convert first row.
-  uint8* row_y0 = row_y + kEdge;
-  uint8* row_y1 = row_y0 + kMaxRow;
-  uint8* row_y2 = row_y1 + kMaxRow;
-  ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
-  row_y0[-1] = row_y0[0];
-  memset(row_y0 + width, row_y0[width - 1], 16);  // extrude 16 pixels.
-  ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
-  row_y1[-1] = row_y1[0];
-  memset(row_y1 + width, row_y1[width - 1], 16);
-  memset(row_y2 + width, 0, 16);
-
-  for (int y = 0; y < height; ++y) {
-    // Convert next row of ARGB to Y.
-    if (y < (height - 1)) {
-      src_argb += src_stride_argb;
-    }
-    ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
-    row_y2[-1] = row_y2[0];
-    row_y2[width] = row_y2[width - 1];
-
-    SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
-    SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
-    SobelXYRow(row_sobelx, row_sobely, dst_argb, width);
-
-    // Cycle thru circular queue of 3 row_y buffers.
-    uint8* row_yt = row_y0;
-    row_y0 = row_y1;
-    row_y1 = row_y2;
-    row_y2 = row_yt;
-
-    dst_argb += dst_stride_argb;
-  }
-  return 0;
+  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
+                      width, height, SobelXYRow);
 }

 // Apply a 4x4 polynomial to each ARGB pixel.

--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -869,6 +869,16 @@ void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
  }
 }

+void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
+                       uint8* dst_y, int width) {
+  for (int i = 0; i < width; ++i) {
+    int r = src_sobelx[i];
+    int b = src_sobely[i];
+    int s = clamp255(r + b);
+    dst_y[i] = static_cast<uint8>(s);
+  }
+}
+
 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
                  uint8* dst_argb, int width) {
  for (int i = 0; i < width; ++i) {

--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -2644,6 +2644,28 @@ void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
  );
 }

+// Adds Sobel X and Sobel Y and stores Sobel into plane.
+void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  asm volatile (
+    // 16 pixel loop.
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "vld1.8     {q0}, [%0]!                    \n"  // load 16 sobelx.
+    "vld1.8     {q1}, [%1]!                    \n"  // load 16 sobely.
+    "subs       %3, %3, #16                    \n"  // 16 processed per loop.
+    "vqadd.u8   q0, q0, q1                     \n"  // add
+    "vst1.8     {q0}, [%2]!                    \n"  // store 16 pixels.
+    "bgt        1b                             \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_y),       // %2
+    "+r"(width)        // %3
+  :
+  : "cc", "memory", "q0", "q1"
+  );
+}
+
 // Mixes Sobel X, Sobel Y and Sobel into ARGB.
 // A = 255
 // R = Sobel X

--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -4767,6 +4767,43 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
 }
 #endif  // HAS_SOBELROW_SSE2

+#ifdef HAS_SOBELTOPLANEROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into a plane.
+void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  asm volatile (
+    "sub       %0,%1                           \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0x18,%%xmm5                    \n"
+
+    // 8 pixel loop.
+    ".p2align  4                               \n"
+    BUNDLEALIGN
+  "1:                                          \n"
+    "movdqa    "MEMACCESS(0)",%%xmm0           \n"
+    MEMOPREG(movdqa,0x00,0,1,1,xmm1)           //  movdqa    (%0,%1,1),%%xmm1
+    "lea       "MEMLEA(0x10,0)",%0             \n"
+    "paddusb   %%xmm1,%%xmm0                   \n"
+    "sub       $0x10,%3                        \n"
+    "movdqa    %%xmm0,"MEMACCESS(2)"           \n"
+    "lea       "MEMLEA(0x10,2)",%2             \n"
+    "jg        1b                              \n"
+  : "+r"(src_sobelx),  // %0
+    "+r"(src_sobely),  // %1
+    "+r"(dst_y),       // %2
+    "+r"(width)        // %3
+  :
+  : "memory", "cc"
+#if defined(__native_client__) && defined(__x86_64__)
+    , "r14"
+#endif
+#if defined(__SSE2__)
+    , "xmm0", "xmm1"
+#endif
+  );
+}
+#endif  // HAS_SOBELTOPLANEROW_SSE2
+
 #ifdef HAS_SOBELXYROW_SSE2
 // Mixes Sobel X, Sobel Y and Sobel into ARGB.
 // A = 255

--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -5737,6 +5737,36 @@ void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
 }
 #endif  // HAS_SOBELROW_SSE2

+#ifdef HAS_SOBELTOPLANEROW_SSE2
+// Adds Sobel X and Sobel Y and stores Sobel into a plane.
+__declspec(naked) __declspec(align(16))
+void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) {
+  __asm {
+    push       esi
+    mov        eax, [esp + 4 + 4]   // src_sobelx
+    mov        esi, [esp + 4 + 8]   // src_sobely
+    mov        edx, [esp + 4 + 12]  // dst_argb
+    mov        ecx, [esp + 4 + 16]  // width
+    sub        esi, eax
+
+    align      16
+ convertloop:
+    movdqa     xmm0, [eax]            // read 16 pixels src_sobelx
+    movdqa     xmm1, [eax + esi]      // read 16 pixels src_sobely
+    lea        eax, [eax + 16]
+    paddusb    xmm0, xmm1             // sobel = sobelx + sobely
+    sub        ecx, 16
+    movdqa     [edx], xmm0
+    lea        edx, [edx + 16]
+    jg         convertloop
+
+    pop        esi
+    ret
+  }
+}
+#endif  // HAS_SOBELTOPLANEROW_SSE2
+
 #ifdef HAS_SOBELXYROW_SSE2
 // Mixes Sobel X, Sobel Y and Sobel into ARGB.
 // A = 255

--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -1136,7 +1136,7 @@ TEST_F(libyuvTest, TestSobelX) {
              sobel_pixels_opt, 1280);
  }
  for (int i = 0; i < 1280; ++i) {
-    EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
+    EXPECT_EQ(sobel_pixels_c[i], sobel_pixels_opt[i]);
  }
 }

@@ -1172,7 +1172,7 @@ TEST_F(libyuvTest, TestSobelY) {
    SobelYRow(orig_pixels_0, orig_pixels_1, sobel_pixels_opt, 1280);
  }
  for (int i = 0; i < 1280; ++i) {
-    EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
+    EXPECT_EQ(sobel_pixels_c[i], sobel_pixels_opt[i]);
  }
 }

@@ -1215,8 +1215,46 @@ TEST_F(libyuvTest, TestSobel) {
  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
    SobelRow(orig_sobelx, orig_sobely, sobel_pixels_opt, 1280);
  }
-  for (int i = 0; i < 16; ++i) {
-    EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
+  for (int i = 0; i < 1280 * 4; ++i) {
+    EXPECT_EQ(sobel_pixels_c[i], sobel_pixels_opt[i]);
+  }
+}
+
+TEST_F(libyuvTest, TestSobelToPlane) {
+  SIMD_ALIGNED(uint8 orig_sobelx[1280]);
+  SIMD_ALIGNED(uint8 orig_sobely[1280]);
+  SIMD_ALIGNED(uint8 sobel_pixels_c[1280]);
+  SIMD_ALIGNED(uint8 sobel_pixels_opt[1280]);
+
+  for (int i = 0; i < 1280; ++i) {
+    orig_sobelx[i] = i;
+    orig_sobely[i] = i * 2;
+  }
+
+  SobelToPlaneRow_C(orig_sobelx, orig_sobely, sobel_pixels_c, 1280);
+
+  EXPECT_EQ(0u, sobel_pixels_c[0]);
+  EXPECT_EQ(3u, sobel_pixels_c[1]);
+  EXPECT_EQ(6u, sobel_pixels_c[2]);
+  EXPECT_EQ(99u, sobel_pixels_c[33]);
+  EXPECT_EQ(255u, sobel_pixels_c[100]);
+  void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
+                          uint8* dst_y, int width) = SobelToPlaneRow_C;
+#if defined(HAS_SOBELTOPLANEROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2)) {
+    SobelToPlaneRow = SobelToPlaneRow_SSE2;
+  }
+#endif
+#if defined(HAS_SOBELTOPLANEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    SobelToPlaneRow = SobelToPlaneRow_NEON;
+  }
+#endif
+  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+    SobelToPlaneRow(orig_sobelx, orig_sobely, sobel_pixels_opt, 1280);
+  }
+  for (int i = 0; i < 1280; ++i) {
+    EXPECT_EQ(sobel_pixels_c[i], sobel_pixels_opt[i]);
  }
 }

@@ -1255,8 +1293,8 @@ TEST_F(libyuvTest, TestSobelXY) {
  for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
    SobelXYRow(orig_sobelx, orig_sobely, sobel_pixels_opt, 1280);
  }
-  for (int i = 0; i < 16; ++i) {
-    EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
+  for (int i = 0; i < 1280 * 4; ++i) {
+    EXPECT_EQ(sobel_pixels_c[i], sobel_pixels_opt[i]);
  }
 }

@@ -1604,6 +1642,75 @@ TEST_F(libyuvTest, ARGBSobel_Opt) {
  EXPECT_EQ(0, max_diff);
 }

+static int TestSobelToPlane(int width, int height, int benchmark_iterations,
+                            int invert, int off) {
+  if (width < 1) {
+    width = 1;
+  }
+  const int kSrcBpp = 4;
+  const int kDstBpp = 1;
+  const int kSrcStride = (width * kSrcBpp + 15) & ~15;
+  const int kDstStride = (width * kDstBpp + 15) & ~15;
+  align_buffer_64(src_argb_a, kSrcStride * height + off);
+  align_buffer_64(dst_argb_c, kDstStride * height);
+  align_buffer_64(dst_argb_opt, kDstStride * height);
+  memset(src_argb_a, 0, kSrcStride * height + off);
+  srandom(time(NULL));
+  for (int i = 0; i < kSrcStride * height; ++i) {
+    src_argb_a[i + off] = (random() & 0xff);
+  }
+  memset(dst_argb_c, 0, kDstStride * height);
+  memset(dst_argb_opt, 0, kDstStride * height);
+
+  MaskCpuFlags(0);
+  ARGBSobelToPlane(src_argb_a + off, kSrcStride,
+                   dst_argb_c, kDstStride,
+                   width, invert * height);
+  MaskCpuFlags(-1);
+  for (int i = 0; i < benchmark_iterations; ++i) {
+    ARGBSobelToPlane(src_argb_a + off, kSrcStride,
+                     dst_argb_opt, kDstStride,
+                     width, invert * height);
+  }
+  int max_diff = 0;
+  for (int i = 0; i < kDstStride * height; ++i) {
+    int abs_diff =
+        abs(static_cast<int>(dst_argb_c[i]) -
+            static_cast<int>(dst_argb_opt[i]));
+    if (abs_diff > max_diff) {
+      max_diff = abs_diff;
+    }
+  }
+  free_aligned_buffer_64(src_argb_a)
+  free_aligned_buffer_64(dst_argb_c)
+  free_aligned_buffer_64(dst_argb_opt)
+  return max_diff;
+}
+
+TEST_F(libyuvTest, ARGBSobelToPlane_Any) {
+  int max_diff = TestSobelToPlane(benchmark_width_ - 1, benchmark_height_,
+                                  benchmark_iterations_, +1, 0);
+  EXPECT_EQ(0, max_diff);
+}
+
+TEST_F(libyuvTest, ARGBSobelToPlane_Unaligned) {
+  int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
+                                  benchmark_iterations_, +1, 1);
+  EXPECT_EQ(0, max_diff);
+}
+
+TEST_F(libyuvTest, ARGBSobelToPlane_Invert) {
+  int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
+                                  benchmark_iterations_, -1, 0);
+  EXPECT_EQ(0, max_diff);
+}
+
+TEST_F(libyuvTest, ARGBSobelToPlane_Opt) {
+  int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_,
+                                  benchmark_iterations_, +1, 0);
+  EXPECT_EQ(0, max_diff);
+}
+
 static int TestSobelXY(int width, int height, int benchmark_iterations,
                     int invert, int off) {
  if (width < 1) {