Commit 167d5d1c authored by fbarchard@google.com's avatar fbarchard@google.com

Porting parts of compare to c89

BUG=303
TESTED=try bots still build, gcc and vc direct for c testing.
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/6739004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@956 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 53a7923b
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 955 Version: 956
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 955 #define LIBYUV_VERSION 956
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -46,6 +46,8 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); ...@@ -46,6 +46,8 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
// hash seed of 5381 recommended. // hash seed of 5381 recommended.
LIBYUV_API LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder = (int)(count) & ~15;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41) #if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
...@@ -58,13 +60,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { ...@@ -58,13 +60,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
} }
#endif #endif
const int kBlockSize = 1 << 15; // 32768;
while (count >= (uint64)(kBlockSize)) { while (count >= (uint64)(kBlockSize)) {
seed = HashDjb2_SSE(src, kBlockSize, seed); seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize; src += kBlockSize;
count -= kBlockSize; count -= kBlockSize;
} }
int remainder = (int)(count) & ~15;
if (remainder) { if (remainder) {
seed = HashDjb2_SSE(src, remainder, seed); seed = HashDjb2_SSE(src, remainder, seed);
src += remainder; src += remainder;
...@@ -98,6 +98,13 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); ...@@ -98,6 +98,13 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
LIBYUV_API LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
int count) { int count) {
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
int remainder = count & (kBlockSize - 1) & ~31;
uint64 sse = 0;
int i;
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
SumSquareError_C; SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
...@@ -118,20 +125,14 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, ...@@ -118,20 +125,14 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
SumSquareError = SumSquareError_AVX2; SumSquareError = SumSquareError_AVX2;
} }
#endif #endif
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
uint64 sse = 0;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for reduction(+: sse) #pragma omp parallel for reduction(+: sse)
#endif #endif
for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
sse += SumSquareError(src_a + i, src_b + i, kBlockSize); sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
} }
src_a += count & ~(kBlockSize - 1); src_a += count & ~(kBlockSize - 1);
src_b += count & ~(kBlockSize - 1); src_b += count & ~(kBlockSize - 1);
int remainder = count & (kBlockSize - 1) & ~31;
if (remainder) { if (remainder) {
sse += SumSquareError(src_a, src_b, remainder); sse += SumSquareError(src_a, src_b, remainder);
src_a += remainder; src_a += remainder;
...@@ -148,6 +149,8 @@ LIBYUV_API ...@@ -148,6 +149,8 @@ LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b, const uint8* src_b, int stride_b,
int width, int height) { int width, int height) {
uint64 sse = 0;
int h;
// Coalesce rows. // Coalesce rows.
if (stride_a == width && if (stride_a == width &&
stride_b == width) { stride_b == width) {
...@@ -155,8 +158,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, ...@@ -155,8 +158,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
height = 1; height = 1;
stride_a = stride_b = 0; stride_a = stride_b = 0;
} }
uint64 sse = 0; for (h = 0; h < height; ++h) {
for (int h = 0; h < height; ++h) {
sse += ComputeSumSquareError(src_a, src_b, width); sse += ComputeSumSquareError(src_a, src_b, width);
src_a += stride_a; src_a += stride_a;
src_b += stride_b; src_b += stride_b;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment