H010ToAR30 and H010ToARGB optimized YUV buffering

Reduce allocations of row buffers to 1 alloc/free. Do 2 rows at a time to avoid converting U and V planes twice. Bug: libyuv:715 Test: LibYUVConvertTest.H010ToAR30_Opt Change-Id: I2f3a03b4875df5e3b969112a78a1a0b28399fa2f Reviewed-on: https://chromium-review.googlesource.com/816021Reviewed-by: Cheng Wang <wangcheng@google.com>

H010ToAR30 and H010ToARGB optimized YUV buffering
Reduce allocations of row buffers to 1 alloc/free. Do 2 rows at a time to avoid converting U and V planes twice. Bug: libyuv:715 Test: LibYUVConvertTest.H010ToAR30_Opt Change-Id: I2f3a03b4875df5e3b969112a78a1a0b28399fa2f Reviewed-on: https://chromium-review.googlesource.com/816021Reviewed-by: Cheng Wang <wangcheng@google.com>
aabe3808 · Frank Barchard · Frank Barchard · 3541e46a · aabe3808 · aabe3808
Commit aabe3808 authored Dec 08, 2017 by Frank Barchard Committed by Frank Barchard Dec 08, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 65 additions and 55 deletions

README.chromium README.chromium +1 -1

formats.md docs/formats.md +9 -17

version.h include/libyuv/version.h +1 -1

convert_argb.cc source/convert_argb.cc +54 -36

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1681
+Version: 1682
 License: BSD
 License File: LICENSE


--- a/docs/formats.md
+++ b/docs/formats.md
@@ -35,9 +35,8 @@ This is how OSX formats map to libyuv
 # FOURCC (Four Charactacter Code) List

 The following is extracted from video_common.h as a complete list of formats supported by libyuv.
-
    enum FourCC {
-      // 8 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
+      // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
      FOURCC_I420 = FOURCC('I', '4', '2', '0'),
      FOURCC_I422 = FOURCC('I', '4', '2', '2'),
      FOURCC_I444 = FOURCC('I', '4', '4', '4'),
@@ -46,38 +45,34 @@ The following is extracted from video_common.h as a complete list of formats sup
      FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
      FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
      FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
+      FOURCC_H010 = FOURCC('H', '0', '1', '0'),  // unofficial fourcc. 10 bit lsb

-      // 1 Secondary YUV formats: row biplanar.
+      // 1 Secondary YUV format: row biplanar.
      FOURCC_M420 = FOURCC('M', '4', '2', '0'),

-      // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
+      // 10 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
      FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
      FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
      FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
-      FOURCC_AR30 = FOURCC('A', 'R', '3', '0'),
+      FOURCC_AR30 = FOURCC('A', 'R', '3', '0'),  // 10 bit per channel. 2101010.
      FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
-      FOURCC_RAW  = FOURCC('r', 'a', 'w', ' '),
+      FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
      FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
      FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'),  // rgb565 LE.
      FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'),  // argb1555 LE.
      FOURCC_R444 = FOURCC('R', '4', '4', '4'),  // argb4444 LE.

-      // 4 Secondary RGB formats: 4 Bayer Patterns.
-      FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
-      FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
-      FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
-      FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
-
      // 1 Primary Compressed YUV format.
      FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),

-      // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
+      // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
      FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
      FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
      FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
      FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
      FOURCC_J420 = FOURCC('J', '4', '2', '0'),
-      FOURCC_J400 = FOURCC('J', '4', '0', '0'),
+      FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // unofficial fourcc
+      FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // unofficial fourcc

      // 14 Auxiliary aliases.  CanonicalFourCC() maps these to canonical fourcc.
      FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'),  // Alias for I420.
@@ -98,9 +93,6 @@ The following is extracted from video_common.h as a complete list of formats sup
      FOURCC_L565 = FOURCC('L', '5', '6', '5'),  // Alias for RGBP.
      FOURCC_5551 = FOURCC('5', '5', '5', '1'),  // Alias for RGBO.

-      // 1 Auxiliary compressed YUV format set aside for capturer.
-      FOURCC_H264 = FOURCC('H', '2', '6', '4'),
-
 # Planar YUV
      The following formats contains a full size Y plane followed by 1 or 2
        planes for UV: I420, I422, I444, I400, NV21, NV12, I400

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1681
+#define LIBYUV_VERSION 1682

 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -519,31 +519,40 @@ static int H010ToAR30Matrix(const uint16* src_y,
  }
 #endif

-  align_buffer_64(row_y, width);
-  align_buffer_64(row_u, halfwidth);
-  align_buffer_64(row_v, halfwidth);
-  align_buffer_64(row_argb, width * 4);
-
-  for (y = 0; y < height; ++y) {
-    Convert16To8Row(src_y, row_y, scale, width);
-    Convert16To8Row(src_u, row_u, scale, halfwidth);
-    Convert16To8Row(src_v, row_v, scale, halfwidth);
-
-    I422ToARGBRow(row_y, row_u, row_v, row_argb, yuvconstants, width);
-
-    ARGBToAR30Row(row_argb, dst_ar30, width);
-
-    dst_ar30 += dst_stride_ar30;
-    src_y += src_stride_y;
-    if (y & 1) {
+  {
+    // Row buffers for 8 bit YUV and RGB.
+    align_buffer_64(row_buf, width + halfwidth * 2 + width * 4);
+    uint8* row_y = row_buf;
+    uint8* row_u = row_buf + width;
+    uint8* row_v = row_buf + width + halfwidth;
+    uint8* row_argb = row_buf + width + halfwidth * 2;
+
+    for (y = 0; y < height - 1; y += 2) {
+      Convert16To8Row(src_y, row_y, scale, width);
+      Convert16To8Row(src_u, row_u, scale, halfwidth);
+      Convert16To8Row(src_v, row_v, scale, halfwidth);
+      I422ToARGBRow(row_y, row_u, row_v, row_argb, yuvconstants, width);
+      ARGBToAR30Row(row_argb, dst_ar30, width);
+
+      Convert16To8Row(src_y + src_stride_y, row_y, scale, width);
+      I422ToARGBRow(row_y, row_u, row_v, row_argb, yuvconstants, width);
+      ARGBToAR30Row(row_argb, dst_ar30 + dst_stride_ar30, width);
+      dst_ar30 += dst_stride_ar30 * 2;
+      src_y += src_stride_y * 2;
      src_u += src_stride_u;
      src_v += src_stride_v;
    }
+
+    if (height & 1) {
+      Convert16To8Row(src_y, row_y, scale, width);
+      Convert16To8Row(src_u, row_u, scale, halfwidth);
+      Convert16To8Row(src_v, row_v, scale, halfwidth);
+      I422ToARGBRow(row_y, row_u, row_v, row_argb, yuvconstants, width);
+      ARGBToAR30Row(row_argb, dst_ar30, width);
+    }
+
+    free_aligned_buffer_64(row_buf);
  }
-  free_aligned_buffer_64(row_y);
-  free_aligned_buffer_64(row_u);
-  free_aligned_buffer_64(row_v);
-  free_aligned_buffer_64(row_argb);
  return 0;
 }

@@ -645,27 +654,36 @@ static int H010ToARGBMatrix(const uint16* src_y,
  }
 #endif

-  align_buffer_64(row_y, width);
-  align_buffer_64(row_u, halfwidth);
-  align_buffer_64(row_v, halfwidth);
-
-  for (y = 0; y < height; ++y) {
-    Convert16To8Row(src_y, row_y, scale, width);
-    Convert16To8Row(src_u, row_u, scale, halfwidth);
-    Convert16To8Row(src_v, row_v, scale, halfwidth);
+  {
+    // Row buffers for 8 bit YUV.
+    align_buffer_64(row_buf, width + halfwidth * 2);
+    uint8* row_y = row_buf;
+    uint8* row_u = row_buf + width;
+    uint8* row_v = row_buf + width + halfwidth;

-    I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);
+    for (y = 0; y < height - 1; y += 2) {
+      Convert16To8Row(src_y, row_y, scale, width);
+      Convert16To8Row(src_u, row_u, scale, halfwidth);
+      Convert16To8Row(src_v, row_v, scale, halfwidth);
+      I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);

-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
+      Convert16To8Row(src_y + src_stride_y, row_y, scale, width);
+      I422ToARGBRow(row_y, row_u, row_v, dst_argb + dst_stride_argb,
+                    yuvconstants, width);
+      dst_argb += dst_stride_argb * 2;
+      src_y += src_stride_y * 2;
      src_u += src_stride_u;
      src_v += src_stride_v;
    }
+
+    if (height & 1) {
+      Convert16To8Row(src_y, row_y, scale, width);
+      Convert16To8Row(src_u, row_u, scale, halfwidth);
+      Convert16To8Row(src_v, row_v, scale, halfwidth);
+      I422ToARGBRow(row_y, row_u, row_v, dst_argb, yuvconstants, width);
+    }
+    free_aligned_buffer_64(row_buf);
  }
-  free_aligned_buffer_64(row_y);
-  free_aligned_buffer_64(row_u);
-  free_aligned_buffer_64(row_v);
  return 0;
 }