Commit fecd7417 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

Port HammingDistance to SSSE3

Bug: libyuv:701
Test: BenchmarkHammingDistance_Opt
Change-Id: Ibdd5d382677ebef4f82a62e0d5c3b88614a3b6e4
Reviewed-on: https://chromium-review.googlesource.com/696290
Commit-Queue: Frank Barchard <fbarchard@google.com>
Reviewed-by: 's avatarCheng Wang <wangcheng@google.com>
parent bde789b1
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1671 Version: 1672
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -70,7 +70,13 @@ extern "C" { ...@@ -70,7 +70,13 @@ extern "C" {
#define HAS_SUMSQUAREERROR_AVX2 #define HAS_SUMSQUAREERROR_AVX2
#endif #endif
// The following are available for VGCC and clangcl 64 bit: // The following are available for GCC and clangcl 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#define HAS_HAMMINGDISTANCE_SSSE3
#endif
// The following are available for GCC and clangcl 64 bit:
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \ #if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
#define HAS_HAMMINGDISTANCE_AVX2 #define HAS_HAMMINGDISTANCE_AVX2
...@@ -93,7 +99,7 @@ extern "C" { ...@@ -93,7 +99,7 @@ extern "C" {
uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count); uint32 HammingDistance_C(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count); uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_SSE2(const uint8* src_a, const uint8* src_b, int count); uint32 HammingDistance_SSSE3(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count); uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count); uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count);
uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count); uint32 HammingDistance_MSA(const uint8* src_a, const uint8* src_b, int count);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1671 #define LIBYUV_VERSION 1672
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -131,6 +131,11 @@ uint64 ComputeHammingDistance(const uint8* src_a, ...@@ -131,6 +131,11 @@ uint64 ComputeHammingDistance(const uint8* src_a,
HammingDistance = HammingDistance_X86; HammingDistance = HammingDistance_X86;
} }
#endif #endif
#if defined(HAS_HAMMINGDISTANCE_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
HammingDistance = HammingDistance_SSSE3;
}
#endif
#if defined(HAS_HAMMINGDISTANCE_AVX2) #if defined(HAS_HAMMINGDISTANCE_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) { if (TestCpuFlag(kCpuHasAVX2)) {
HammingDistance = HammingDistance_AVX2; HammingDistance = HammingDistance_AVX2;
......
...@@ -35,15 +35,74 @@ uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) { ...@@ -35,15 +35,74 @@ uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
return diff; return diff;
} }
#ifdef HAS_HAMMINGDISTANCE_AVX2 static vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
static uint32 kNibbleMask = 0x0f0f0f0fu; 15, 15, 15, 15, 15, 15, 15, 15};
static vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; static vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
uint32 HammingDistance_SSSE3(const uint8* src_a,
const uint8* src_b,
int count) {
uint32 diff = 0u;
asm volatile(
"movdqa %4,%%xmm2 \n"
"movdqa %5,%%xmm3 \n"
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm1,%%xmm1 \n"
"sub %0,%1 \n"
LABELALIGN
"1: \n"
"movdqa (%0),%%xmm4 \n"
"movdqa 0x10(%0), %%xmm5 \n"
"pxor (%0,%1), %%xmm4 \n"
"movdqa %%xmm4,%%xmm6 \n"
"pand %%xmm2,%%xmm6 \n"
"psrlw $0x4,%%xmm4 \n"
"movdqa %%xmm3,%%xmm7 \n"
"pshufb %%xmm6,%%xmm7 \n"
"pand %%xmm2,%%xmm4 \n"
"movdqa %%xmm3,%%xmm6 \n"
"pshufb %%xmm4,%%xmm6 \n"
"paddb %%xmm7,%%xmm6 \n"
"pxor 0x10(%0,%1),%%xmm5 \n"
"add $0x20,%0 \n"
"movdqa %%xmm5,%%xmm4 \n"
"pand %%xmm2,%%xmm5 \n"
"psrlw $0x4,%%xmm4 \n"
"movdqa %%xmm3,%%xmm7 \n"
"pshufb %%xmm5,%%xmm7 \n"
"pand %%xmm2,%%xmm4 \n"
"movdqa %%xmm3,%%xmm5 \n"
"pshufb %%xmm4,%%xmm5 \n"
"paddb %%xmm7,%%xmm5 \n"
"paddb %%xmm5,%%xmm6 \n"
"psadbw %%xmm1,%%xmm6 \n"
"paddd %%xmm6,%%xmm0 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
"pshufd $0xaa,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"vmovd %%xmm0, %3 \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=r"(diff) // %3
: "m"(kNibbleMask), // %4
"m"(kBitCount) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
return diff;
}
#ifdef HAS_HAMMINGDISTANCE_AVX2
uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) { uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff = 0u; uint32 diff = 0u;
asm volatile( asm volatile(
"vbroadcastss %4,%%ymm2 \n" "vbroadcastf128 %4,%%ymm2 \n"
"vbroadcastf128 %5,%%ymm3 \n" "vbroadcastf128 %5,%%ymm3 \n"
"vpxor %%ymm0,%%ymm0,%%ymm0 \n" "vpxor %%ymm0,%%ymm0,%%ymm0 \n"
"vpxor %%ymm1,%%ymm1,%%ymm1 \n" "vpxor %%ymm1,%%ymm1,%%ymm1 \n"
...@@ -83,7 +142,7 @@ uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) { ...@@ -83,7 +142,7 @@ uint32 HammingDistance_AVX2(const uint8* src_a, const uint8* src_b, int count) {
: "+r"(src_a), // %0 : "+r"(src_a), // %0
"+r"(src_b), // %1 "+r"(src_b), // %1
"+r"(count), // %2 "+r"(count), // %2
"=g"(diff) // %3 "=r"(diff) // %3
: "m"(kNibbleMask), // %4 : "m"(kNibbleMask), // %4
"m"(kBitCount) // %5 "m"(kBitCount) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
......
...@@ -233,9 +233,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) { ...@@ -233,9 +233,14 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
int has_avx2 = TestCpuFlag(kCpuHasAVX2); int has_avx2 = TestCpuFlag(kCpuHasAVX2);
if (has_avx2) { if (has_avx2) {
h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth); h1 = HammingDistance_AVX2(src_a, src_b, kMaxWidth);
} else {
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
if (has_ssse3) {
h1 = HammingDistance_SSSE3(src_a, src_b, kMaxWidth);
} else { } else {
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth); h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
} }
}
#elif defined(HAS_HAMMINGDISTANCE_X86) #elif defined(HAS_HAMMINGDISTANCE_X86)
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth); h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
#else #else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment