Commit 2136e349 authored by Frank Barchard's avatar Frank Barchard

Hamming code difference of 2 memory blocks

BUG=libyuv:701
TEST=built and disassembled for aarch64
R=kjellander@chromium.org

Change-Id: I7712b1c7934e5dfb55fda1fa7c8405c32d6964ce
Reviewed-on: https://chromium-review.googlesource.com/495327Reviewed-by: 's avatarHenrik Kjellander <kjellander@chromium.org>
Reviewed-by: 's avatarCheng Wang <wangcheng@google.com>
parent 945ea1b7
...@@ -33,10 +33,10 @@ group("default") { ...@@ -33,10 +33,10 @@ group("default") {
if (libyuv_include_tests) { if (libyuv_include_tests) {
deps += [ deps += [
":compare", ":compare",
":yuvconvert",
":cpuid", ":cpuid",
":libyuv_unittest", ":libyuv_unittest",
":psnr", ":psnr",
":yuvconvert",
] ]
} }
} }
...@@ -158,7 +158,8 @@ static_library("libyuv_internal") { ...@@ -158,7 +158,8 @@ static_library("libyuv_internal") {
} }
# To enable AVX2 or other cpu optimization, pass flag here # To enable AVX2 or other cpu optimization, pass flag here
# cflags = [ "-mavx2" ] # cflags = [ "-mavx2" ]
# cflags = [ "-mpopcnt" ]
} }
if (libyuv_use_neon) { if (libyuv_use_neon) {
......
...@@ -67,6 +67,10 @@ extern "C" { ...@@ -67,6 +67,10 @@ extern "C" {
#define HAS_SUMSQUAREERROR_NEON #define HAS_SUMSQUAREERROR_NEON
#endif #endif
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
......
...@@ -17,6 +17,41 @@ namespace libyuv { ...@@ -17,6 +17,41 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if ORIGINAL_C
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 diff = 0u;
int i;
for (i = 0; i < count; ++i) {
int x = src_a[i] ^ src_b[i];
if (x & 1) ++diff;
if (x & 2) ++diff;
if (x & 4) ++diff;
if (x & 8) ++diff;
if (x & 16) ++diff;
if (x & 32) ++diff;
if (x & 64) ++diff;
if (x & 128) ++diff;
}
return diff;
}
#endif
// Hakmem method for hamming distance.
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 diff = 0u;
int i;
for (i = 0; i < count - 3; i += 4) {
uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b);
src_a += 4;
src_b += 4;
uint32 u = x - ((x >> 1) & 033333333333) - ((x >> 2) & 011111111111);
diff += ((u + (u >> 3)) & 030707070707) % 63;
}
return diff;
}
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse = 0u; uint32 sse = 0u;
int i; int i;
......
...@@ -22,6 +22,19 @@ extern "C" { ...@@ -22,6 +22,19 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && \ #if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 diff = 0u;
int i;
for (i = 0; i < count - 7; i += 8) {
uint64 x = *((uint64*)src_a) ^ *((uint64*)src_b);
src_a += 8;
src_b += 8;
diff += __builtin_popcountll(x);
}
return diff;
}
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse; uint32 sse;
asm volatile ( asm volatile (
......
...@@ -20,6 +20,67 @@ extern "C" { ...@@ -20,6 +20,67 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#if 0
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 diff;
asm volatile (
"eor v4.16b, v4.16b, v4.16b \n"
"eor v5.16b, v5.16b, v5.16b \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b}, [%0], #16 \n"
MEMACCESS(1)
"ld1 {v1.16b}, [%1], #16 \n"
"subs %w2, %w2, #16 \n"
"eor v2.16b, v0.16b, v1.16b \n"
"cnt v3.16b, v2.16b \n"
"addv b4, v3.16b \n"
"add d5, d5, d4 \n"
"b.gt 1b \n"
"fmov %w3, s5 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(diff)
:
: "cc", "v0", "v1", "v2", "v3", "v4", "v5");
return diff;
}
#endif
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 diff;
asm volatile (
"movi d6, #0 \n"
"1: \n"
MEMACCESS(0)
"ld1 {v0.16b, v1.16b}, [%0], #32 \n"
MEMACCESS(1)
"ld1 {v2.16b, v3.16b}, [%1], #32 \n"
"subs %w2, %w2, #32 \n"
"eor v0.16b, v0.16b, v2.16b \n"
"eor v1.16b, v1.16b, v3.16b \n"
"cnt v0.16b, v0.16b \n"
"cnt v1.16b, v1.16b \n"
"addv b4, v0.16b \n"
"addv b5, v1.16b \n"
"add d6, d6, d4 \n"
"add d6, d6, d5 \n"
"b.gt 1b \n"
"fmov %w3, s6 \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(diff)
:
: "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
return diff;
}
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse; volatile uint32 sse;
asm volatile ( asm volatile (
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "../unit_test/unit_test.h" #include "../unit_test/unit_test.h"
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare.h" #include "libyuv/compare.h"
#include "libyuv/compare_row.h" /* For HammingDistance_C */
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/video_common.h" #include "libyuv/video_common.h"
...@@ -202,6 +203,78 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) { ...@@ -202,6 +203,78 @@ TEST_F(LibYUVBaseTest, BenchmarkARGBDetect_Unaligned) {
free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_a);
} }
TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_Opt) {
const int kMaxWidth = 4096 * 3;
align_buffer_page_end(src_a, kMaxWidth);
align_buffer_page_end(src_b, kMaxWidth);
memset(src_a, 0, kMaxWidth);
memset(src_b, 0, kMaxWidth);
// Test known value
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint32 h1 = HammingDistance_C(src_a, src_b, 16);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
MemRandomize(src_b, kMaxWidth);
uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
int count =
benchmark_iterations_ *
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
for (int i = 0; i < count; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth);
#elif !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
#else
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
#endif
}
EXPECT_EQ(h0, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
}
TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_C) {
const int kMaxWidth = 4096 * 3;
align_buffer_page_end(src_a, kMaxWidth);
align_buffer_page_end(src_b, kMaxWidth);
memset(src_a, 0, kMaxWidth);
memset(src_b, 0, kMaxWidth);
// Test known value
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint32 h1 = HammingDistance_C(src_a, src_b, 16);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
MemRandomize(src_b, kMaxWidth);
uint32 h0 = HammingDistance_C(src_a, src_b, kMaxWidth);
int count =
benchmark_iterations_ *
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
for (int i = 0; i < count; ++i) {
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
}
EXPECT_EQ(h0, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
}
TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) { TEST_F(LibYUVBaseTest, BenchmarkSumSquareError_Opt) {
const int kMaxWidth = 4096 * 3; const int kMaxWidth = 4096 * 3;
align_buffer_page_end(src_a, kMaxWidth); align_buffer_page_end(src_a, kMaxWidth);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment