Commit 167d5d1c authored by fbarchard@google.com's avatar fbarchard@google.com

Porting parts of compare to c89

BUG=303
TESTED=try bots still build, gcc and vc direct for c testing.
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/6739004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@956 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 53a7923b
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 955
Version: 956
License: BSD
License File: LICENSE
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 955
#define LIBYUV_VERSION 956
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -46,6 +46,8 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder = (int)(count) & ~15;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
......@@ -58,13 +60,11 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
}
#endif
const int kBlockSize = 1 << 15; // 32768;
while (count >= (uint64)(kBlockSize)) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
}
int remainder = (int)(count) & ~15;
if (remainder) {
seed = HashDjb2_SSE(src, remainder, seed);
src += remainder;
......@@ -98,6 +98,13 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
int count) {
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
int remainder = count & (kBlockSize - 1) & ~31;
uint64 sse = 0;
int i;
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON)
......@@ -118,20 +125,14 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
SumSquareError = SumSquareError_AVX2;
}
#endif
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
uint64 sse = 0;
#ifdef _OPENMP
#pragma omp parallel for reduction(+: sse)
#endif
for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
}
src_a += count & ~(kBlockSize - 1);
src_b += count & ~(kBlockSize - 1);
int remainder = count & (kBlockSize - 1) & ~31;
if (remainder) {
sse += SumSquareError(src_a, src_b, remainder);
src_a += remainder;
......@@ -148,6 +149,8 @@ LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
uint64 sse = 0;
int h;
// Coalesce rows.
if (stride_a == width &&
stride_b == width) {
......@@ -155,8 +158,7 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
height = 1;
stride_a = stride_b = 0;
}
uint64 sse = 0;
for (int h = 0; h < height; ++h) {
for (h = 0; h < height; ++h) {
sse += ComputeSumSquareError(src_a, src_b, width);
src_a += stride_a;
src_b += stride_b;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment