Commit 74defefd authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #8076 from tomoaki0705:brushupAvxPopcount

parents 776c8c5c 820fdbff
...@@ -4261,7 +4261,9 @@ int normHamming(const uchar* a, int n) ...@@ -4261,7 +4261,9 @@ int normHamming(const uchar* a, int n)
_r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2)); _r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2));
result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0); result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0);
} }
#elif CV_POPCNT #endif // CV_AVX2
#if CV_POPCNT
if(checkHardwareSupport(CV_CPU_POPCNT)) if(checkHardwareSupport(CV_CPU_POPCNT))
{ {
# if defined CV_POPCNT_U64 # if defined CV_POPCNT_U64
...@@ -4275,7 +4277,9 @@ int normHamming(const uchar* a, int n) ...@@ -4275,7 +4277,9 @@ int normHamming(const uchar* a, int n)
result += CV_POPCNT_U32(*(uint*)(a + i)); result += CV_POPCNT_U32(*(uint*)(a + i));
} }
} }
#elif CV_SIMD128 #endif // CV_POPCNT
#if CV_SIMD128
if(hasSIMD128()) if(hasSIMD128())
{ {
v_uint32x4 t = v_setzero_u32(); v_uint32x4 t = v_setzero_u32();
...@@ -4283,9 +4287,10 @@ int normHamming(const uchar* a, int n) ...@@ -4283,9 +4287,10 @@ int normHamming(const uchar* a, int n)
{ {
t += v_popcount(v_load(a + i)); t += v_popcount(v_load(a + i));
} }
result = v_reduce_sum(t); result += v_reduce_sum(t);
} }
#endif #endif // CV_SIMD128
for(; i <= n - 4; i += 4) for(; i <= n - 4; i += 4)
{ {
result += popCountTable[a[i]] + popCountTable[a[i+1]] + result += popCountTable[a[i]] + popCountTable[a[i+1]] +
...@@ -4327,7 +4332,9 @@ int normHamming(const uchar* a, const uchar* b, int n) ...@@ -4327,7 +4332,9 @@ int normHamming(const uchar* a, const uchar* b, int n)
_r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2)); _r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2));
result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0); result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0);
} }
#elif CV_POPCNT #endif // CV_AVX2
#if CV_POPCNT
if(checkHardwareSupport(CV_CPU_POPCNT)) if(checkHardwareSupport(CV_CPU_POPCNT))
{ {
# if defined CV_POPCNT_U64 # if defined CV_POPCNT_U64
...@@ -4341,7 +4348,9 @@ int normHamming(const uchar* a, const uchar* b, int n) ...@@ -4341,7 +4348,9 @@ int normHamming(const uchar* a, const uchar* b, int n)
result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i)); result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i));
} }
} }
#elif CV_SIMD128 #endif // CV_POPCNT
#if CV_SIMD128
if(hasSIMD128()) if(hasSIMD128())
{ {
v_uint32x4 t = v_setzero_u32(); v_uint32x4 t = v_setzero_u32();
...@@ -4349,9 +4358,10 @@ int normHamming(const uchar* a, const uchar* b, int n) ...@@ -4349,9 +4358,10 @@ int normHamming(const uchar* a, const uchar* b, int n)
{ {
t += v_popcount(v_load(a + i) ^ v_load(b + i)); t += v_popcount(v_load(a + i) ^ v_load(b + i));
} }
result = v_reduce_sum(t); result += v_reduce_sum(t);
} }
#endif #endif // CV_SIMD128
for(; i <= n - 4; i += 4) for(; i <= n - 4; i += 4)
{ {
result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment