refactor I420AlphaToABGR to use I420AlphaToARGB internally

swap U and V and transpose conversion matrix, so I420AlphaToARGB and I420AlphaToABGR share low level code. Having less code with same performance allows more focused optimization for future ARM versions. R=harryjin@google.com TBR=harryjin@chromium.org BUG=libyuv:473,libyuv:516 Review URL: https://codereview.chromium.org/1422263002 .

refactor I420AlphaToABGR to use I420AlphaToARGB internally
swap U and V and transpose conversion matrix, so I420AlphaToARGB and I420AlphaToABGR share low level code. Having less code with same performance allows more focused optimization for future ARM versions. R=harryjin@google.com TBR=harryjin@chromium.org BUG=libyuv:473,libyuv:516 Review URL: https://codereview.chromium.org/1422263002 .
b86dbf24 · Frank Barchard · cf160cdb · b86dbf24 · b86dbf24 · b86dbf24
Commit b86dbf24 authored Oct 27, 2015 by Frank Barchard
8 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1525
+Version: 1526
 License: BSD
 License File: LICENSE


--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -107,7 +107,6 @@ extern "C" {
 #define HAS_H422TOARGBROW_SSSE3
 #define HAS_I400TOARGBROW_SSE2
 #define HAS_I411TOARGBROW_SSSE3
-#define HAS_I422ALPHATOABGRROW_SSSE3
 #define HAS_I422ALPHATOARGBROW_SSSE3
 #define HAS_I422TOABGRROW_SSSE3
 #define HAS_I422TOARGB1555ROW_SSSE3
@@ -199,7 +198,6 @@ extern "C" {
 #define HAS_H422TOABGRROW_AVX2
 #define HAS_H422TOARGBROW_AVX2
 #define HAS_I400TOARGBROW_AVX2
-#define HAS_I422ALPHATOABGRROW_AVX2
 #define HAS_I422ALPHATOARGBROW_AVX2
 #define HAS_I422TOABGRROW_AVX2
 #define HAS_I422TOARGBROW_AVX2
@@ -254,7 +252,6 @@ extern "C" {
 // The following are also available on x64 Visual C.
 #if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64) && \
    (!defined(__clang__) || defined(__SSSE3__))
-#define HAS_I422ALPHATOABGRROW_SSSE3
 #define HAS_I422ALPHATOARGBROW_SSSE3
 #define HAS_I422TOABGRROW_SSSE3
 #define HAS_I422TOARGBROW_SSSE3
@@ -1053,13 +1050,6 @@ void I422AlphaToARGBRow_C(const uint8* y_buf,
                          uint8* dst_argb,
                          const struct YuvConstants* yuvconstants,
                          int width);
-void I422AlphaToABGRRow_C(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          const uint8* a_buf,
-                          uint8* dst_argb,
-                          const struct YuvConstants* yuvconstants,
-                          int width);
 void I422ToABGRRow_C(const uint8* src_y,
                     const uint8* src_u,
                     const uint8* src_v,
@@ -1216,13 +1206,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
                              uint8* dst_argb,
                              const struct YuvConstants* yuvconstants,
                              int width);
-void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              const uint8* a_buf,
-                              uint8* dst_argb,
-                              const struct YuvConstants* yuvconstants,
-                              int width);
 void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
                             const uint8* u_buf,
                             const uint8* v_buf,
@@ -1230,13 +1213,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
                             uint8* dst_argb,
                             const struct YuvConstants* yuvconstants,
                             int width);
-void I422AlphaToABGRRow_AVX2(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             const uint8* a_buf,
-                             uint8* dst_argb,
-                             const struct YuvConstants* yuvconstants,
-                             int width);
 void I422ToARGBRow_SSSE3(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
@@ -1434,13 +1410,6 @@ void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
                                  uint8* dst_argb,
                                  const struct YuvConstants* yuvconstants,
                                  int width);
-void I422AlphaToABGRRow_Any_SSSE3(const uint8* y_buf,
-                                  const uint8* u_buf,
-                                  const uint8* v_buf,
-                                  const uint8* a_buf,
-                                  uint8* dst_abgr,
-                                  const struct YuvConstants* yuvconstants,
-                                  int width);
 void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
@@ -1448,13 +1417,6 @@ void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
                                 uint8* dst_argb,
                                 const struct YuvConstants* yuvconstants,
                                 int width);
-void I422AlphaToABGRRow_Any_AVX2(const uint8* y_buf,
-                                 const uint8* u_buf,
-                                 const uint8* v_buf,
-                                 const uint8* a_buf,
-                                 uint8* dst_abgr,
-                                 const struct YuvConstants* yuvconstants,
-                                 int width);
 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1525
+#define LIBYUV_VERSION 1526

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -45,7 +45,6 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
 }

 // Convert I444 to ARGB.
-LIBYUV_API
 static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y,
                            const uint8* src_u, int src_stride_u,
                            const uint8* src_v, int src_stride_v,
@@ -129,33 +128,33 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
                          width, height);
 }

-// Convert J444 to ARGB.
+// Convert I444 to ABGR.
 LIBYUV_API
-int J444ToARGB(const uint8* src_y, int src_stride_y,
+int I444ToABGR(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,
               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
+               uint8* dst_abgr, int dst_stride_abgr,
               int width, int height) {
  return I444ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
                          src_u, src_stride_u,
-                          src_v, src_stride_v,
-                          dst_argb, dst_stride_argb,
-                          &kYuvJConstants,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuIConstants,  // Use Yvu matrix
                          width, height);
 }

-// Convert I444 to ABGR.
+// Convert J444 to ARGB.
 LIBYUV_API
-int I444ToABGR(const uint8* src_y, int src_stride_y,
+int J444ToARGB(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,
               const uint8* src_v, int src_stride_v,
-               uint8* dst_abgr, int dst_stride_abgr,
+               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  return I444ToARGBMatrix(src_y, src_stride_y,
-                          src_v, src_stride_v,
                          src_u, src_stride_u,
-                          dst_abgr, dst_stride_abgr,
-                          &kYvuIConstants,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvJConstants,
                          width, height);
 }

@@ -307,13 +306,13 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
 }

 // Convert I420 with Alpha to preattenuated ARGB.
-LIBYUV_API
-int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
-                    const uint8* src_u, int src_stride_u,
-                    const uint8* src_v, int src_stride_v,
-                    const uint8* src_a, int src_stride_a,
-                    uint8* dst_argb, int dst_stride_argb,
-                    int width, int height, int attenuate) {
+static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y,
+                                 const uint8* src_u, int src_stride_u,
+                                 const uint8* src_v, int src_stride_v,
+                                 const uint8* src_a, int src_stride_a,
+                                 uint8* dst_argb, int dst_stride_argb,
+                                 const struct YuvConstants* yuvconstants,
+                                 int width, int height, int attenuate) {
  int y;
  void (*I422AlphaToARGBRow)(const uint8* y_buf,
                             const uint8* u_buf,
@@ -393,7 +392,7 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, &kYuvIConstants,
+    I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
                       width);
    if (attenuate) {
      ARGBAttenuateRow(dst_argb, dst_argb, width);
@@ -409,6 +408,23 @@ int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// Convert I420 with Alpha to preattenuated ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height, int attenuate) {
+  return I420AlphaToARGBMatrix(src_y, src_stride_y,
+                               src_u, src_stride_u,
+                               src_v, src_stride_v,
+                               src_a, src_stride_a,
+                               dst_argb, dst_stride_argb,
+                               &kYuvIConstants,
+                               width, height, attenuate);
+}
+
 // Convert I420 with Alpha to preattenuated ARGB.
 LIBYUV_API
 int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
@@ -417,99 +433,13 @@ int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
                    const uint8* src_a, int src_stride_a,
                    uint8* dst_abgr, int dst_stride_abgr,
                    int width, int height, int attenuate) {
-  int y;
-  void (*I422AlphaToABGRRow)(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             const uint8* a_buf,
-                             uint8* dst_abgr,
-                             const struct YuvConstants* yuvconstants,
-                             int width) = I422AlphaToABGRRow_C;
-  void (*ARGBAttenuateRow)(const uint8* src_abgr, uint8* dst_abgr,
-                           int width) = ARGBAttenuateRow_C;
-  if (!src_y || !src_u || !src_v || !dst_abgr ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
-    dst_stride_abgr = -dst_stride_abgr;
-  }
-#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422AlphaToABGRRow = I422AlphaToABGRRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422AlphaToABGRRow = I422AlphaToABGRRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422ALPHATOABGRROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422AlphaToABGRRow = I422AlphaToABGRRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422AlphaToABGRRow = I422AlphaToABGRRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422ALPHATOABGRROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422AlphaToABGRRow = I422AlphaToABGRRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422AlphaToABGRRow = I422AlphaToABGRRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422ALPHATOABGRROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
-    I422AlphaToABGRRow = I422AlphaToABGRRow_MIPS_DSPR2;
-  }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 4)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_ARGBATTENUATEROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422AlphaToABGRRow(src_y, src_u, src_v, src_a, dst_abgr, &kYuvIConstants,
-                       width);
-    if (attenuate) {
-      ARGBAttenuateRow(dst_abgr, dst_abgr, width);
-    }
-    dst_abgr += dst_stride_abgr;
-    src_a += src_stride_a;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
+  return I420AlphaToARGBMatrix(src_y, src_stride_y,
+                               src_v, src_stride_v,  // Swap U and V
+                               src_u, src_stride_u,
+                               src_a, src_stride_a,
+                               dst_abgr, dst_stride_abgr,
+                               &kYvuIConstants,  // Use Yvu matrix
+                               width, height, attenuate);
 }

 // Convert I400 to ARGB.

--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -46,11 +46,9 @@ extern "C" {

 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY41C(I422AlphaToABGRRow_Any_SSSE3, I422AlphaToABGRRow_SSSE3, 1, 0, 4, 7)
 #endif
 #ifdef HAS_I422ALPHATOARGBROW_AVX2
 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 7)
-ANY41C(I422AlphaToABGRRow_Any_AVX2, I422AlphaToABGRRow_AVX2, 1, 0, 4, 7)
 #endif
 #undef ANY41C


--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1413,34 +1413,6 @@ void I422ToABGRRow_C(const uint8* src_y,
  }
 }

-void I422AlphaToABGRRow_C(const uint8* src_y,
-                          const uint8* src_u,
-                          const uint8* src_v,
-                          const uint8* src_a,
-                          uint8* rgb_buf,
-                          const struct YuvConstants* yuvconstants,
-                          int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
-    rgb_buf[3] = src_a[0];
-    YuvPixel(src_y[1], src_u[0], src_v[0],
-             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
-    rgb_buf[7] = src_a[1];
-    src_y += 2;
-    src_u += 1;
-    src_v += 1;
-    src_a += 2;
-    rgb_buf += 8;  // Advance 2 pixels.
-  }
-  if (width & 1) {
-    YuvPixel(src_y[0], src_u[0], src_v[0],
-             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
-    rgb_buf[3] = src_a[0];
-  }
-}
-
 void I422ToRGB24Row_C(const uint8* src_y,
                      const uint8* src_u,
                      const uint8* src_v,

--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -1766,38 +1766,6 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
  );
 }

-void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
-                                     const uint8* u_buf,
-                                     const uint8* v_buf,
-                                     const uint8* a_buf,
-                                     uint8* dst_abgr,
-                                     const struct YuvConstants* yuvconstants,
-                                     int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUVA422
-    YUVTORGB(yuvconstants)
-    STOREABGR
-    "subl      $0x8,%[width]                   \n"
-    "jg        1b                              \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [a_buf]"+r"(a_buf),    // %[a_buf]
-    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
-#if defined(__i386__) && defined(__pic__)
-    [width]"+m"(width)     // %[width]
-#else
-    [width]"+rm"(width)    // %[width]
-#endif
-  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
-  : "memory", "cc", NACL_R14
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-  );
-}
-
 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
@@ -2229,43 +2197,6 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
 }
 #endif  // HAS_I422ALPHATOARGBROW_AVX2

-#if defined(HAS_I422ALPHATOABGRROW_AVX2)
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
-void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               const uint8* a_buf,
-                               uint8* dst_abgr,
-                               const struct YuvConstants* yuvconstants,
-                               int width) {
-  asm volatile (
-    "sub       %[u_buf],%[v_buf]               \n"
-    LABELALIGN
-  "1:                                          \n"
-    READYUVA422_AVX2
-    YUVTORGB_AVX2(yuvconstants)
-    STOREABGR_AVX2
-    "subl      $0x10,%[width]                  \n"
-    "jg        1b                              \n"
-    "vzeroupper                                \n"
-  : [y_buf]"+r"(y_buf),    // %[y_buf]
-    [u_buf]"+r"(u_buf),    // %[u_buf]
-    [v_buf]"+r"(v_buf),    // %[v_buf]
-    [a_buf]"+r"(a_buf),    // %[a_buf]
-    [dst_abgr]"+r"(dst_abgr),  // %[dst_abgr]
-#if defined(__i386__) && defined(__pic__)
-    [width]"+m"(width)     // %[width]
-#else
-    [width]"+rm"(width)    // %[width]
-#endif
-  : [yuvconstants]"r"(yuvconstants)  // %[yuvconstants]
-  : "memory", "cc", NACL_R14
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-  );
-}
-#endif  // HAS_I422ALPHATOABGRROW_AVX2
-
 #if defined(HAS_I422TOABGRROW_AVX2)
 // 16 pixels
 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).

--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -153,25 +153,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
 }
 #endif

-#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
-void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              const uint8* a_buf,
-                              uint8* dst_abgr,
-                              const struct YuvConstants* yuvconstants,
-                              int width) {
-  __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
-  const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
-  while (width > 0) {
-    READYUVA422
-    YUVTORGB(yuvconstants)
-    STOREABGR
-    width -= 8;
-  }
-}
-#endif
-
 // 32 bit
 #else  // defined(_M_X64)
 #ifdef HAS_ARGBTOYROW_SSSE3
@@ -2185,49 +2166,6 @@ void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
 }
 #endif  // HAS_I422ALPHATOARGBROW_AVX2

-#ifdef HAS_I422ALPHATOABGRROW_AVX2
-// 16 pixels
-// 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
-__declspec(naked)
-void I422AlphaToABGRRow_AVX2(const uint8* y_buf,
-                             const uint8* u_buf,
-                             const uint8* v_buf,
-                             const uint8* a_buf,
-                             uint8* dst_abgr,
-                             const struct YuvConstants* yuvconstants,
-                             int width) {
-  __asm {
-    push       esi
-    push       edi
-    push       ebx
-    push       ebp
-    mov        eax, [esp + 16 + 4]   // Y
-    mov        esi, [esp + 16 + 8]   // U
-    mov        edi, [esp + 16 + 12]  // V
-    mov        ebp, [esp + 16 + 16]  // A
-    mov        edx, [esp + 16 + 20]  // abgr
-    mov        ebx, [esp + 16 + 24]  // yuvconstants
-    mov        ecx, [esp + 16 + 28]  // width
-    sub        edi, esi
-
- convertloop:
-    READYUVA422_AVX2
-    YUVTORGB_AVX2(ebx)
-    STOREABGR_AVX2
-
-    sub        ecx, 16
-    jg         convertloop
-
-    pop        ebp
-    pop        ebx
-    pop        edi
-    pop        esi
-    vzeroupper
-    ret
-  }
-}
-#endif  // HAS_I422ALPHATOABGRROW_AVX2
-
 #ifdef HAS_I444TOARGBROW_AVX2
 // 16 pixels
 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
@@ -3027,46 +2965,6 @@ void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
  }
 }

-// 8 pixels.
-// 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR.
-__declspec(naked)
-void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              const uint8* a_buf,
-                              uint8* dst_abgr,
-                              const struct YuvConstants* yuvconstants,
-                              int width) {
-  __asm {
-    push       esi
-    push       edi
-    push       ebx
-    push       ebp
-    mov        eax, [esp + 16 + 4]   // Y
-    mov        esi, [esp + 16 + 8]   // U
-    mov        edi, [esp + 16 + 12]  // V
-    mov        ebp, [esp + 16 + 16]  // A
-    mov        edx, [esp + 16 + 20]  // abgr
-    mov        ebx, [esp + 16 + 24]  // yuvconstants
-    mov        ecx, [esp + 16 + 28]  // width
-    sub        edi, esi
-
- convertloop:
-    READYUVA422
-    YUVTORGB(ebx)
-    STOREABGR
-
-    sub        ecx, 8
-    jg         convertloop
-
-    pop        ebp
-    pop        ebx
-    pop        edi
-    pop        esi
-    ret
-  }
-}
-
 // 8 pixels.
 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
 // Similar to I420 but duplicate UV once more.