Commit 1220dd48 authored by Vitaly Tuzov's avatar Vitaly Tuzov

Updated v_popcount description, reference implementation and test.

parent 96ab78dc
...@@ -603,27 +603,20 @@ static const unsigned char popCountTable[] = ...@@ -603,27 +603,20 @@ static const unsigned char popCountTable[] =
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
}; };
/** @brief Count the 1 bits in the vector and return 4 values /** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Scheme: Scheme:
@code @code
{A1 A2 A3 ...} => popcount(A1) {A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}
@endcode @endcode
Any types but result will be in v_uint32x4*/ For all integer types. */
template<typename _Tp, int n> inline v_uint32x4 v_popcount(const v_reg<_Tp, n>& a) template<typename _Tp, int n>
inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_popcount(const v_reg<_Tp, n>& a)
{ {
v_uint8x16 b; v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b = v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
b = v_reinterpret_as_u8(a); for( int i = 0; i < n*sizeof(_Tp); i++ )
for( int i = 0; i < v_uint8x16::nlanes; i++ ) b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
{ return b;
b.s[i] = popCountTable[b.s[i]];
}
v_uint32x4 c;
for( int i = 0; i < v_uint32x4::nlanes; i++ )
{
c.s[i] = b.s[i*4] + b.s[i*4+1] + b.s[i*4+2] + b.s[i*4+3];
}
return c;
} }
......
...@@ -75,7 +75,7 @@ int normHamming(const uchar* a, int n) ...@@ -75,7 +75,7 @@ int normHamming(const uchar* a, int n)
v_uint64 t = vx_setzero_u64(); v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); t += v_popcount(v_reinterpret_as_u64(vx_load(a + i)));
result += v_reduce_sum(t); result += (int)v_reduce_sum(t);
} }
#endif // CV_SIMD #endif // CV_SIMD
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED
...@@ -144,7 +144,7 @@ int normHamming(const uchar* a, const uchar* b, int n) ...@@ -144,7 +144,7 @@ int normHamming(const uchar* a, const uchar* b, int n)
v_uint64 t = vx_setzero_u64(); v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i)));
result += v_reduce_sum(t); result += (int)v_reduce_sum(t);
} }
#endif // CV_SIMD #endif // CV_SIMD
#if CV_ENABLE_UNROLLED #if CV_ENABLE_UNROLLED
......
...@@ -686,18 +686,24 @@ template<typename R> struct TheTest ...@@ -686,18 +686,24 @@ template<typename R> struct TheTest
TheTest & test_popcount() TheTest & test_popcount()
{ {
typedef typename V_RegTraits<R>::u_reg Ru;
static unsigned popcountTable[] = { static unsigned popcountTable[] = {
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, //0x00-0x0f
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x10-0x1f
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x20-0x2f
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x30-0x3f
176, 181, 186, 192, 193 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x40-0x4f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x50-0x5f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x60-0x6f
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, //0x70-0x7f
1 //0x80
}; };
Data<R> dataA; Data<R> dataA;
R a = dataA; R a = dataA;
unsigned resB = (unsigned)v_reduce_sum(v_popcount(a)); Data<Ru> resB = v_popcount(a);
EXPECT_EQ(popcountTable[R::nlanes], resB); for (int i = 0; i < Ru::nlanes; ++i)
EXPECT_EQ(popcountTable[i + 1], resB[i]);
return *this; return *this;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment