Commit 67be98bd authored by fbarchard@google.com's avatar fbarchard@google.com

psnr function was using C. unittest fixed and warnings improved

BUG=21,22
TEST=build\release\libyuv_unittest.exe  --gtest_filter=*
Review URL: https://webrtc-codereview.appspot.com/447013

git-svn-id: http://libyuv.googlecode.com/svn/trunk@218 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d4840d4f
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 217 Version: 218
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 217 #define LIBYUV_VERSION 218
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -39,8 +39,8 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { ...@@ -39,8 +39,8 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SUMSQUAREERROR_NEON #define HAS_SUMSQUAREERROR_NEON
static uint32 SumSquareError_NEON(const uint8* src_a, static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b,
const uint8* src_b, int count) { int count) {
volatile uint32 sse; volatile uint32 sse;
asm volatile ( asm volatile (
"vmov.u8 q7, #0 \n" "vmov.u8 q7, #0 \n"
...@@ -79,8 +79,8 @@ static uint32 SumSquareError_NEON(const uint8* src_a, ...@@ -79,8 +79,8 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
#elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SUMSQUAREERROR_SSE2 #define HAS_SUMSQUAREERROR_SSE2
__declspec(naked) __declspec(naked)
static uint32 SumSquareError_SSE2(const uint8* src_a, static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
const uint8* src_b, int count) { int count) {
__asm { __asm {
mov eax, [esp + 4] // src_a mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b mov edx, [esp + 8] // src_b
...@@ -119,8 +119,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, ...@@ -119,8 +119,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) #elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SUMSQUAREERROR_SSE2 #define HAS_SUMSQUAREERROR_SSE2
static uint32 SumSquareError_SSE2(const uint8* src_a, static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b,
const uint8* src_b, int count) { int count) {
uint32 sse; uint32 sse;
asm volatile ( asm volatile (
"pxor %%xmm0,%%xmm0 \n" "pxor %%xmm0,%%xmm0 \n"
...@@ -165,8 +165,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, ...@@ -165,8 +165,8 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
} }
#endif #endif
static uint32 SumSquareError_C(const uint8* src_a, static uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b,
const uint8* src_b, int count) { int count) {
uint32 sse = 0u; uint32 sse = 0u;
for (int x = 0; x < count; ++x) { for (int x = 0; x < count; ++x) {
int diff = src_a[0] - src_b[0]; int diff = src_a[0] - src_b[0];
...@@ -177,23 +177,20 @@ static uint32 SumSquareError_C(const uint8* src_a, ...@@ -177,23 +177,20 @@ static uint32 SumSquareError_C(const uint8* src_a,
return sse; return sse;
} }
uint64 ComputeSumSquareError(const uint8* src_a, uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
const uint8* src_b, int count) { int count) {
uint32 (*SumSquareError)(const uint8* src_a, uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
const uint8* src_b, int count); SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON; SumSquareError = SumSquareError_NEON;
} else }
#elif defined(HAS_SUMSQUAREERROR_SSE2) #elif defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
SumSquareError = SumSquareError_SSE2; SumSquareError = SumSquareError_SSE2;
} else
#endif
{
SumSquareError = SumSquareError_C;
} }
#endif
// 32K values will fit a 32bit int return value from SumSquareError. // 32K values will fit a 32bit int return value from SumSquareError.
// After each block of 32K, accumulate into 64 bit int. // After each block of 32K, accumulate into 64 bit int.
const int kBlockSize = 1 << 15; // 32768; const int kBlockSize = 1 << 15; // 32768;
...@@ -222,17 +219,18 @@ uint64 ComputeSumSquareError(const uint8* src_a, ...@@ -222,17 +219,18 @@ uint64 ComputeSumSquareError(const uint8* src_a,
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b, const uint8* src_b, int stride_b,
int width, int height) { int width, int height) {
uint32 (*SumSquareError)(const uint8* src_a, uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
const uint8* src_b, int count); SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON)) {
IS_ALIGNED(width, 16)) {
SumSquareError = SumSquareError_NEON; SumSquareError = SumSquareError_NEON;
} else
#endif
{
SumSquareError = SumSquareError_C;
} }
#elif defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
SumSquareError = SumSquareError_SSE2;
}
#endif
uint64 sse = 0; uint64 sse = 0;
for (int h = 0; h < height; ++h) { for (int h = 0; h < height; ++h) {
......
...@@ -152,6 +152,9 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) { ...@@ -152,6 +152,9 @@ TEST_F(libyuvTest, BenchmarkPsnr_C) {
src_b, _benchmark_width, src_b, _benchmark_width,
_benchmark_width, _benchmark_height); _benchmark_width, _benchmark_height);
c_time = (get_time() - c_time) / _benchmark_iterations;
printf ("BenchmarkPsnr_C - %8d us c\n", (int)(c_time*1e6));
MaskCpuFlags(-1); MaskCpuFlags(-1);
EXPECT_EQ(0, 0); EXPECT_EQ(0, 0);
...@@ -164,15 +167,16 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) { ...@@ -164,15 +167,16 @@ TEST_F(libyuvTest, BenchmarkPsnr_OPT) {
align_buffer_16(src_a, _benchmark_width * _benchmark_height) align_buffer_16(src_a, _benchmark_width * _benchmark_height)
align_buffer_16(src_b, _benchmark_width * _benchmark_height) align_buffer_16(src_b, _benchmark_width * _benchmark_height)
MaskCpuFlags(kCpuInitialized); MaskCpuFlags(-1);
double c_time = get_time(); double opt_time = get_time();
for (int i = 0; i < _benchmark_iterations; ++i) for (int i = 0; i < _benchmark_iterations; ++i)
CalcFramePsnr(src_a, _benchmark_width, CalcFramePsnr(src_a, _benchmark_width,
src_b, _benchmark_width, src_b, _benchmark_width,
_benchmark_width, _benchmark_height); _benchmark_width, _benchmark_height);
MaskCpuFlags(-1); opt_time = (get_time() - opt_time) / _benchmark_iterations;
printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
EXPECT_EQ(0, 0); EXPECT_EQ(0, 0);
...@@ -269,6 +273,9 @@ TEST_F(libyuvTest, BenchmarkSsim_C) { ...@@ -269,6 +273,9 @@ TEST_F(libyuvTest, BenchmarkSsim_C) {
src_b, _benchmark_width, src_b, _benchmark_width,
_benchmark_width, _benchmark_height); _benchmark_width, _benchmark_height);
c_time = (get_time() - c_time) / _benchmark_iterations;
printf ("BenchmarkSsim_C - %8d us c\n", (int)(c_time*1e6));
MaskCpuFlags(-1); MaskCpuFlags(-1);
EXPECT_EQ(0, 0); EXPECT_EQ(0, 0);
...@@ -281,15 +288,16 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) { ...@@ -281,15 +288,16 @@ TEST_F(libyuvTest, BenchmarkSsim_OPT) {
align_buffer_16(src_a, _benchmark_width * _benchmark_height) align_buffer_16(src_a, _benchmark_width * _benchmark_height)
align_buffer_16(src_b, _benchmark_width * _benchmark_height) align_buffer_16(src_b, _benchmark_width * _benchmark_height)
MaskCpuFlags(kCpuInitialized); MaskCpuFlags(-1);
double c_time = get_time(); double opt_time = get_time();
for (int i = 0; i < _benchmark_iterations; ++i) for (int i = 0; i < _benchmark_iterations; ++i)
CalcFrameSsim(src_a, _benchmark_width, CalcFrameSsim(src_a, _benchmark_width,
src_b, _benchmark_width, src_b, _benchmark_width,
_benchmark_width, _benchmark_height); _benchmark_width, _benchmark_height);
MaskCpuFlags(-1); opt_time = (get_time() - opt_time) / _benchmark_iterations;
printf ("BenchmarkPsnr_OPT - %8d us opt\n", (int)(opt_time*1e6));
EXPECT_EQ(0, 0); EXPECT_EQ(0, 0);
......
...@@ -95,7 +95,6 @@ TEST_F (libyuvTest, I420To##FMT##_CvsOPT) { \ ...@@ -95,7 +95,6 @@ TEST_F (libyuvTest, I420To##FMT##_CvsOPT) { \
dst_rgb_opt, src_width << 2, \ dst_rgb_opt, src_width << 2, \
src_width, src_height); \ src_width, src_height); \
int err = 0; \ int err = 0; \
int i = 0; \
for (int i = 0; i < src_height; ++i) { \ for (int i = 0; i < src_height; ++i) { \
for (int j = 0; j < src_width << 2; ++j) { \ for (int j = 0; j < src_width << 2; ++j) { \
int diff = (int)(dst_rgb_c[i * src_height + j]) - \ int diff = (int)(dst_rgb_c[i * src_height + j]) - \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment