YUV scaling with 16 bit planes

BUG=331 TESTED=libyuv_unittest --gunit_also_run_disabled_tests --gtest_filter=**.ScaleFrom1280x720* R=debargha@google.com, tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/17569004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1005 16f28f9a-4ce2-e073-06de-1de4eb20be90

YUV scaling with 16 bit planes
BUG=331 TESTED=libyuv_unittest --gunit_also_run_disabled_tests --gtest_filter=**.ScaleFrom1280x720* R=debargha@google.com, tpsiaki@google.com Review URL: https://webrtc-codereview.appspot.com/17569004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1005 16f28f9a-4ce2-e073-06de-1de4eb20be90
b18413e5 · fbarchard@google.com · 8b857c0a · b18413e5 · b18413e5 · b18413e5
Commit b18413e5 authored May 20, 2014 by fbarchard@google.com
11 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1004
+Version: 1005
 License: BSD
 License File: LICENSE

--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -28,6 +28,11 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
               uint8* dst_y, int dst_stride_y,
               int width, int height);
+LIBYUV_API
+void CopyPlane_16(const uint16* src_y, int src_stride_y,
+                  uint16* dst_y, int dst_stride_y,
+                  int width, int height);
 // Set a plane of data to a 32 bit value.
 LIBYUV_API
 void SetPlane(uint8* dst_y, int dst_stride_y,

--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -773,6 +773,8 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count);
 void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
 void CopyRow_C(const uint8* src, uint8* dst, int count);
+void CopyRow_16_C(const uint16* src, uint16* dst, int count);
 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
@@ -1458,6 +1460,9 @@ void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
 void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
                  uint8* dst_uv, int pix);
+void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
+                  uint16* dst_uv, int pix);
 void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
                      uint32 selector, int pix);
 void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
@@ -1639,6 +1644,10 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
                                    ptrdiff_t src_stride_ptr, int width,
                                    int source_y_fraction);
+void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                         ptrdiff_t src_stride_ptr,
+                         int width, int source_y_fraction);
 // Sobel images.
 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
                 uint8* dst_sobelx, int width);

--- a/include/libyuv/scale.h
+++ b/include/libyuv/scale.h
@@ -34,6 +34,12 @@ void ScalePlane(const uint8* src, int src_stride,
                int dst_width, int dst_height,
                enum FilterMode filtering);
+void ScalePlane_16(const uint16* src, int src_stride,
+                   int src_width, int src_height,
+                   uint16* dst, int dst_stride,
+                   int dst_width, int dst_height,
+                   enum FilterMode filtering);
 // Scales a YUV 4:2:0 image from the src width and height to the
 // dst width and height.
 // If filtering is kFilterNone, a simple nearest-neighbor algorithm is
@@ -55,6 +61,17 @@ int I420Scale(const uint8* src_y, int src_stride_y,
              int dst_width, int dst_height,
              enum FilterMode filtering);
+LIBYUV_API
+int I420Scale_16(const uint16* src_y, int src_stride_y,
+                 const uint16* src_u, int src_stride_u,
+                 const uint16* src_v, int src_stride_v,
+                 int src_width, int src_height,
+                 uint16* dst_y, int dst_stride_y,
+                 uint16* dst_u, int dst_stride_u,
+                 uint16* dst_v, int dst_stride_v,
+                 int dst_width, int dst_height,
+                 enum FilterMode filtering);
 #ifdef __cplusplus
 // Legacy API.  Deprecated.
 LIBYUV_API

--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -70,6 +70,13 @@ void ScalePlaneVertical(int src_height,
                        int x, int y, int dy,
                        int bpp, enum FilterMode filtering);
+void ScalePlaneVertical_16(int src_height,
+                           int dst_width, int dst_height,
+                           int src_stride, int dst_stride,
+                           const uint16* src_argb, uint16* dst_argb,
+                           int x, int y, int dy,
+                           int wpp, enum FilterMode filtering);
 // Simplify the filtering based on scale factors.
 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
                                  int dst_width, int dst_height,
@@ -97,37 +104,70 @@ void ScaleSlope(int src_width, int src_height,
 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
                     uint8* dst, int dst_width);
+void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst, int dst_width);
 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst, int dst_width);
+void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                              uint16* dst, int dst_width);
 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst, int dst_width);
+void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst, int dst_width);
 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
                     uint8* dst, int dst_width);
+void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                        uint16* dst, int dst_width);
 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst, int dst_width);
+void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                           uint16* dst, int dst_width);
 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
                      uint8* dst, int dst_width);
+void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                         uint16* dst, int dst_width);
 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* d, int dst_width);
+void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* d, int dst_width);
 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* d, int dst_width);
+void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* d, int dst_width);
 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
                 int dst_width, int x, int dx);
+void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                    int dst_width, int x, int dx);
 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
                    int dst_width, int, int);
+void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                       int dst_width, int, int);
 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
                       int dst_width, int x, int dx);
+void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                          int dst_width, int x, int dx);
 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
                         int dst_width, int x, int dx);
+void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                            int dst_width, int x, int dx);
 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
                      uint8* dst, int dst_width);
+void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                         uint16* dst, int dst_width);
 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
                            ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
+                               ptrdiff_t src_stride,
+                               uint16* dst_ptr, int dst_width);
 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                               uint16* dst_ptr, int dst_width);
 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
                    uint16* dst_ptr, int src_width, int src_height);
+void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
+                       uint32* dst_ptr, int src_width, int src_height);
 void ScaleARGBRowDown2_C(const uint8* src_argb,
                         ptrdiff_t src_stride,
                         uint8* dst_argb, int dst_width);

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1004
+#define LIBYUV_VERSION 1005
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -73,6 +73,55 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
  }
 }
+LIBYUV_API
+void CopyPlane_16(const uint16* src_y, int src_stride_y,
+                  uint16* dst_y, int dst_stride_y,
+                  int width, int height) {
+  int y;
+  void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      dst_stride_y == width) {
+    width *= height;
+    height = 1;
+    src_stride_y = dst_stride_y = 0;
+  }
+#if defined(HAS_COPYROW_16_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_16_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_16_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    CopyRow = CopyRow_16_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_16_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_16_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_16_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_16_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_16_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_16_MIPS;
+  }
+#endif
+  // Copy plane
+  for (y = 0; y < height; ++y) {
+    CopyRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+  }
+}
 // Copy I422.
 LIBYUV_API
 int I422Copy(const uint8* src_y, int src_stride_y,

--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1565,6 +1565,10 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) {
  memcpy(dst, src, count);
 }
+void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
+  memcpy(dst, src, count * 2);
+}
 void SetRow_C(uint8* dst, uint32 v8, int count) {
 #ifdef _MSC_VER
  // VC will generate rep stosb.
@@ -1890,6 +1894,14 @@ void HalfRow_C(const uint8* src_uv, int src_uv_stride,
  }
 }
+void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
+                  uint16* dst_uv, int pix) {
+  int x;
+  for (x = 0; x < pix; ++x) {
+    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
+  }
+}
 // C version 2x2 -> 2x1.
 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
                      ptrdiff_t src_stride,
@@ -1918,6 +1930,33 @@ void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
  }
 }
+void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
+                         ptrdiff_t src_stride,
+                         int width, int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  const uint16* src_ptr1 = src_ptr + src_stride;
+  int x;
+  if (source_y_fraction == 0) {
+    memcpy(dst_ptr, src_ptr, width * 2);
+    return;
+  }
+  if (source_y_fraction == 128) {
+    HalfRow_16_C(src_ptr, (int)(src_stride), dst_ptr, width);
+    return;
+  }
+  for (x = 0; x < width - 1; x += 2) {
+    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
+    src_ptr += 2;
+    src_ptr1 += 2;
+    dst_ptr += 2;
+  }
+  if (width & 1) {
+    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+  }
+}
 // Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
 void ARGBToBayerRow_C(const uint8* src_argb,
                      uint8* dst_bayer, uint32 selector, int pix) {

--- a/source/scale.cc
+++ b/source/scale.cc
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -132,6 +132,134 @@ static int TestFilter(int src_width, int src_height,
  return max_diff;
 }
+// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
+// 0 = exact.
+static int TestFilter_16(int src_width, int src_height,
+                         int dst_width, int dst_height,
+                         FilterMode f, int benchmark_iterations) {
+  int i, j;
+  const int b = 0;  // 128 to test for padding/stride.
+  int src_width_uv = (Abs(src_width) + 1) >> 1;
+  int src_height_uv = (Abs(src_height) + 1) >> 1;
+  int src_y_plane_size = (Abs(src_width) + b * 2) * (Abs(src_height) + b * 2);
+  int src_uv_plane_size = (src_width_uv + b * 2) * (src_height_uv + b * 2);
+  int src_stride_y = b * 2 + Abs(src_width);
+  int src_stride_uv = b * 2 + src_width_uv;
+  align_buffer_page_end(src_y, src_y_plane_size)
+  align_buffer_page_end(src_u, src_uv_plane_size)
+  align_buffer_page_end(src_v, src_uv_plane_size)
+  align_buffer_page_end(src_y_16, src_y_plane_size * 2)
+  align_buffer_page_end(src_u_16, src_uv_plane_size * 2)
+  align_buffer_page_end(src_v_16, src_uv_plane_size * 2)
+  uint16* p_src_y_16 = reinterpret_cast<uint16*>(src_y_16);
+  uint16* p_src_u_16 = reinterpret_cast<uint16*>(src_u_16);
+  uint16* p_src_v_16 = reinterpret_cast<uint16*>(src_v_16);
+  srandom(time(NULL));
+  MemRandomize(src_y, src_y_plane_size);
+  MemRandomize(src_u, src_uv_plane_size);
+  MemRandomize(src_v, src_uv_plane_size);
+  for (i = b; i < src_height + b; ++i) {
+    for (j = b; j < src_width + b; ++j) {
+      p_src_y_16[(i * src_stride_y) + j] = src_y[(i * src_stride_y) + j];
+    }
+  }
+  for (i = b; i < (src_height_uv + b); ++i) {
+    for (j = b; j < (src_width_uv + b); ++j) {
+      p_src_u_16[(i * src_stride_uv) + j] = src_u[(i * src_stride_uv) + j];
+      p_src_v_16[(i * src_stride_uv) + j] = src_v[(i * src_stride_uv) + j];
+    }
+  }
+  int dst_width_uv = (dst_width + 1) >> 1;
+  int dst_height_uv = (dst_height + 1) >> 1;
+  int dst_y_plane_size = (dst_width + b * 2) * (dst_height + b * 2);
+  int dst_uv_plane_size = (dst_width_uv + b * 2) * (dst_height_uv + b * 2);
+  int dst_stride_y = b * 2 + dst_width;
+  int dst_stride_uv = b * 2 + dst_width_uv;
+  align_buffer_page_end(dst_y_8, dst_y_plane_size)
+  align_buffer_page_end(dst_u_8, dst_uv_plane_size)
+  align_buffer_page_end(dst_v_8, dst_uv_plane_size)
+  align_buffer_page_end(dst_y_16, dst_y_plane_size * 2)
+  align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2)
+  align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2)
+  uint16* p_dst_y_16 = reinterpret_cast<uint16*>(dst_y_16);
+  uint16* p_dst_u_16 = reinterpret_cast<uint16*>(dst_u_16);
+  uint16* p_dst_v_16 = reinterpret_cast<uint16*>(dst_v_16);
+  I420Scale(src_y + (src_stride_y * b) + b, src_stride_y,
+            src_u + (src_stride_uv * b) + b, src_stride_uv,
+            src_v + (src_stride_uv * b) + b, src_stride_uv,
+            src_width, src_height,
+            dst_y_8 + (dst_stride_y * b) + b, dst_stride_y,
+            dst_u_8 + (dst_stride_uv * b) + b, dst_stride_uv,
+            dst_v_8 + (dst_stride_uv * b) + b, dst_stride_uv,
+            dst_width, dst_height, f);
+  for (i = 0; i < benchmark_iterations; ++i) {
+    I420Scale_16(p_src_y_16 + (src_stride_y * b) + b, src_stride_y,
+                 p_src_u_16 + (src_stride_uv * b) + b, src_stride_uv,
+                 p_src_v_16 + (src_stride_uv * b) + b, src_stride_uv,
+                 src_width, src_height,
+                 p_dst_y_16 + (dst_stride_y * b) + b, dst_stride_y,
+                 p_dst_u_16 + (dst_stride_uv * b) + b, dst_stride_uv,
+                 p_dst_v_16 + (dst_stride_uv * b) + b, dst_stride_uv,
+                 dst_width, dst_height, f);
+  }
+  // Expect an exact match
+  int max_diff = 0;
+  for (i = b; i < (dst_height + b); ++i) {
+    for (j = b; j < (dst_width + b); ++j) {
+      int abs_diff = Abs(dst_y_8[(i * dst_stride_y) + j] -
+                         p_dst_y_16[(i * dst_stride_y) + j]);
+      if (abs_diff > max_diff) {
+        max_diff = abs_diff;
+      }
+    }
+  }
+  for (i = b; i < (dst_height_uv + b); ++i) {
+    for (j = b; j < (dst_width_uv + b); ++j) {
+      int abs_diff = Abs(dst_u_8[(i * dst_stride_uv) + j] -
+                         p_dst_u_16[(i * dst_stride_uv) + j]);
+      if (abs_diff > max_diff) {
+        max_diff = abs_diff;
+      }
+      abs_diff = Abs(dst_v_8[(i * dst_stride_uv) + j] -
+                     p_dst_v_16[(i * dst_stride_uv) + j]);
+      if (abs_diff > max_diff) {
+        max_diff = abs_diff;
+      }
+    }
+  }
+  free_aligned_buffer_page_end(dst_y_8)
+  free_aligned_buffer_page_end(dst_u_8)
+  free_aligned_buffer_page_end(dst_v_8)
+  free_aligned_buffer_page_end(dst_y_16)
+  free_aligned_buffer_page_end(dst_u_16)
+  free_aligned_buffer_page_end(dst_v_16)
+  free_aligned_buffer_page_end(src_y)
+  free_aligned_buffer_page_end(src_u)
+  free_aligned_buffer_page_end(src_v)
+  free_aligned_buffer_page_end(src_y_16)
+  free_aligned_buffer_page_end(src_u_16)
+  free_aligned_buffer_page_end(src_v_16)
+  return max_diff;
+}
 #define TEST_FACTOR1(name, filter, hfactor, vfactor, max_diff)                 \
    TEST_F(libyuvTest, ScaleDownBy##name##_##filter) {                         \
      int diff = TestFilter(benchmark_width_, benchmark_height_,               \
@@ -139,6 +267,13 @@ static int TestFilter(int src_width, int src_height,
                            Abs(benchmark_height_) * vfactor,                  \
                            kFilter##filter, benchmark_iterations_);           \
      EXPECT_LE(diff, max_diff);                                               \
+    }                                                                          \
+    TEST_F(libyuvTest, ScaleDownBy##name##_##filter##_16) {                    \
+      int diff = TestFilter_16(benchmark_width_, benchmark_height_,            \
+                               Abs(benchmark_width_) * hfactor,                \
+                               Abs(benchmark_height_) * vfactor,               \
+                               kFilter##filter, benchmark_iterations_);        \
+      EXPECT_LE(diff, max_diff);                                               \
    }
 // Test a scale factor with all 4 filters.  Expect unfiltered to be exact, but
@@ -168,6 +303,18 @@ TEST_FACTOR(3by4, 3 / 4, 3 / 4)
                            Abs(benchmark_width_), Abs(benchmark_height_),     \
                            kFilter##filter, benchmark_iterations_);           \
      EXPECT_LE(diff, max_diff);                                               \
+    }                                                                          \
+    TEST_F(libyuvTest, name##To##width##x##height##_##filter##_16) {           \
+      int diff = TestFilter_16(benchmark_width_, benchmark_height_,            \
+                               width, height,                                  \
+                               kFilter##filter, benchmark_iterations_);        \
+      EXPECT_LE(diff, max_diff);                                               \
+    }                                                                          \
+    TEST_F(libyuvTest, name##From##width##x##height##_##filter##_16) {         \
+      int diff = TestFilter_16(width, height,                                  \
+                               Abs(benchmark_width_), Abs(benchmark_height_),  \
+                               kFilter##filter, benchmark_iterations_);        \
+      EXPECT_LE(diff, max_diff);                                               \
    }
 // Test scale to a specified size with all 4 filters.