psnr function was using C. unittest fixed and warnings improved

BUG=21,22 TEST=build\release\libyuv_unittest.exe --gtest_filter=* Review URL: https://webrtc-codereview.appspot.com/447013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@218 16f28f9a-4ce2-e073-06de-1de4eb20be90

psnr function was using C. unittest fixed and warnings improved
BUG=21,22 TEST=build\release\libyuv_unittest.exe --gtest_filter=* Review URL: https://webrtc-codereview.appspot.com/447013 git-svn-id: http://libyuv.googlecode.com/svn/trunk@218 16f28f9a-4ce2-e073-06de-1de4eb20be90
67be98bd · fbarchard@google.com · d4840d4f · 67be98bd · 67be98bd · 67be98bd
Commit 67be98bd authored Mar 19, 2012 by fbarchard@google.com
5 changed files
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 217
+Version: 218
 License: BSD
 License File: LICENSE

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,7 +11,7 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 217
+#define LIBYUV_VERSION 218
 #endif  // INCLUDE_LIBYUV_VERSION_H_
--- a/source/compare.cc
+++ b/source/compare.cc
@@ -39,8 +39,8 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
 #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
 #define HAS_SUMSQUAREERROR_NEON
-static uint32 SumSquareError_NEON(const uint8* src_a,
+static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
-                                  const uint8* src_b, int count) {
+                                  int count) {
  volatile uint32 sse;
  asm volatile (
    "vmov.u8    q7, #0                         \n"
@@ -79,8 +79,8 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
 #elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
 #define HAS_SUMSQUAREERROR_SSE2
 __declspec(naked)
-static uint32 SumSquareError_SSE2(const uint8* src_a,
+static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
-                                  const uint8* src_b, int count) {
+                                  int count) {
  __asm {
    mov        eax, [esp + 4]    // src_a
    mov        edx, [esp + 8]    // src_b
@@ -119,8 +119,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
 #elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
 #define HAS_SUMSQUAREERROR_SSE2
-static uint32 SumSquareError_SSE2(const uint8* src_a,
+static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
-                                  const uint8* src_b, int count) {
+                                  int count) {
  uint32 sse;
  asm volatile (
    "pxor      %%xmm0,%%xmm0                   \n"
@@ -165,8 +165,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
 }
 #endif
-static uint32 SumSquareError_C(const uint8* src_a,
+static uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b,
-                               const uint8* src_b, int count) {
+                               int count) {
  uint32 sse = 0u;
  for (int x = 0; x < count; ++x) {
    int diff = src_a[0] - src_b[0];
@@ -177,23 +177,20 @@ static uint32 SumSquareError_C(const uint8* src_a,
  return sse;
 }
-uint64 ComputeSumSquareError(const uint8* src_a,
+uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
-                             const uint8* src_b, int count) {
+                             int count) {
-  uint32 (*SumSquareError)(const uint8* src_a,
+  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
-                           const uint8* src_b, int count);
+      SumSquareError_C;
 #if defined(HAS_SUMSQUAREERROR_NEON)
  if (TestCpuFlag(kCpuHasNEON)) {
    SumSquareError = SumSquareError_NEON;
-  } else
+  }
 #elif defined(HAS_SUMSQUAREERROR_SSE2)
  if (TestCpuFlag(kCpuHasSSE2) &&
      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
    SumSquareError = SumSquareError_SSE2;
-  } else
-#endif
-  {
-    SumSquareError = SumSquareError_C;
  }
+#endif
  // 32K values will fit a 32bit int return value from SumSquareError.
  // After each block of 32K, accumulate into 64 bit int.
  const int kBlockSize = 1 << 15;  // 32768;
@@ -222,17 +219,18 @@ uint64 ComputeSumSquareError(const uint8* src_a,
 uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
                                  const uint8* src_b, int stride_b,
                                  int width, int height) {
-  uint32 (*SumSquareError)(const uint8* src_a,
+  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
-                           const uint8* src_b, int count);
+      SumSquareError_C;
 #if defined(HAS_SUMSQUAREERROR_NEON)
-  if (TestCpuFlag(kCpuHasNEON) &&
+  if (TestCpuFlag(kCpuHasNEON)) {
-      IS_ALIGNED(width, 16)) {
    SumSquareError = SumSquareError_NEON;
-  } else
-#endif
-  {
-    SumSquareError = SumSquareError_C;
  }
+#elif defined(HAS_SUMSQUAREERROR_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
+    SumSquareError = SumSquareError_SSE2;
+  }
+#endif
  uint64 sse = 0;
  for (int h = 0; h < height; ++h) {

--- a/unit_test/compare_test.cc
+++ b/unit_test/compare_test.cc
@@ -152,6 +152,9 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) {
                  src_b, _benchmark_width,
                  _benchmark_width, _benchmark_height);
+  c_time = (get_time() - c_time) / _benchmark_iterations;
+  printf ("BenchmarkPsnr_C - %8d us c\n", (int)(c_time*1e6));
  MaskCpuFlags(-1);
  EXPECT_EQ(0, 0);
@@ -164,15 +167,16 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
  align_buffer_16(src_a, _benchmark_width * _benchmark_height)
  align_buffer_16(src_b, _benchmark_width * _benchmark_height)
-  MaskCpuFlags(kCpuInitialized);
+  MaskCpuFlags(-1);
-  double c_time = get_time();
+  double opt_time = get_time();
  for (int i = 0; i < _benchmark_iterations; ++i)
    CalcFramePsnr(src_a, _benchmark_width,
                  src_b, _benchmark_width,
                  _benchmark_width, _benchmark_height);
-  MaskCpuFlags(-1);
+  opt_time = (get_time() - opt_time) / _benchmark_iterations;
+  printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
  EXPECT_EQ(0, 0);
@@ -269,6 +273,9 @@ TEST_F(libyuvTest, BenchmarkSsim_C) {
                  src_b, _benchmark_width,
                  _benchmark_width, _benchmark_height);
+  c_time = (get_time() - c_time) / _benchmark_iterations;
+  printf ("BenchmarkSsim_C - %8d us c\n", (int)(c_time*1e6));
  MaskCpuFlags(-1);
  EXPECT_EQ(0, 0);
@@ -281,15 +288,16 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) {
  align_buffer_16(src_a, _benchmark_width * _benchmark_height)
  align_buffer_16(src_b, _benchmark_width * _benchmark_height)
-  MaskCpuFlags(kCpuInitialized);
+  MaskCpuFlags(-1);
-  double c_time = get_time();
+  double opt_time = get_time();
  for (int i = 0; i < _benchmark_iterations; ++i)
    CalcFrameSsim(src_a, _benchmark_width,
                  src_b, _benchmark_width,
                  _benchmark_width, _benchmark_height);
-  MaskCpuFlags(-1);
+  opt_time = (get_time() - opt_time) / _benchmark_iterations;
+  printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
  EXPECT_EQ(0, 0);

--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -95,7 +95,6 @@ TEST_F (libyuvTest, I420To##FMT##_CvsOPT) {                                    \
              dst_rgb_opt, src_width << 2,                                     \
              src_width, src_height);                                          \
  int err = 0;                                                                 \
-  int i = 0;                                                                   \
  for (int i = 0; i < src_height; ++i) {                                       \
    for (int j = 0; j < src_width << 2; ++j) {                                 \
      int diff = (int)(dst_rgb_c[i * src_height + j]) -                        \