Commit 77f6916d authored by Frank Barchard's avatar Frank Barchard

use __popcnt for visual c HammingDistance_X86

BUG=libyuv:701
TEST=HammingDistance unittest performance is comparable to x64
R=wangcheng@google.com

Change-Id: I8abe861e086e0162ba4c7ba6f1ef7d1c006cd9d4
Reviewed-on: https://chromium-review.googlesource.com/505454Reviewed-by: 's avatarFrank Barchard <fbarchard@google.com>
Commit-Queue: Frank Barchard <fbarchard@google.com>
parent e0615c0e
...@@ -52,10 +52,6 @@ extern "C" { ...@@ -52,10 +52,6 @@ extern "C" {
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86)) (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
#define HAS_HASHDJB2_SSE41 #define HAS_HASHDJB2_SSE41
#define HAS_SUMSQUAREERROR_SSE2 #define HAS_SUMSQUAREERROR_SSE2
#endif
// The following are available for GCC:
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_HAMMINGDISTANCE_X86 #define HAS_HAMMINGDISTANCE_X86
#endif #endif
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
#include "libyuv/compare_row.h" #include "libyuv/compare_row.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#if defined(_MSC_VER)
#include <intrin.h> // For __popcnt
#endif
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
extern "C" { extern "C" {
...@@ -21,6 +25,19 @@ extern "C" { ...@@ -21,6 +25,19 @@ extern "C" {
// This module is for 32 bit Visual C x86 and clangcl // This module is for 32 bit Visual C x86 and clangcl
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
uint32 HammingDistance_X86(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff = 0u;
int i;
for (i = 0; i < count - 3; i += 4) {
uint32 x = *((uint32*)src_a) ^ *((uint32*)src_b);
src_a += 4;
src_b += 4;
diff += __popcnt(x);
}
return diff;
}
__declspec(naked) uint32 __declspec(naked) uint32
SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm { __asm {
......
...@@ -227,10 +227,9 @@ TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_Opt) { ...@@ -227,10 +227,9 @@ TEST_F(LibYUVBaseTest, BenchmarkHammingDistance_Opt) {
benchmark_iterations_ * benchmark_iterations_ *
((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth); ((benchmark_width_ * benchmark_height_ + kMaxWidth - 1) / kMaxWidth);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #if defined(HAS_HAMMINGDISTANCE_NEON)
h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth); h1 = HammingDistance_NEON(src_a, src_b, kMaxWidth);
#elif !defined(LIBYUV_DISABLE_X86) && \ #elif defined(HAS_HAMMINGDISTANCE_X86)
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
h1 = HammingDistance_X86(src_a, src_b, kMaxWidth); h1 = HammingDistance_X86(src_a, src_b, kMaxWidth);
#else #else
h1 = HammingDistance_C(src_a, src_b, kMaxWidth); h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment