Commit 1220dd48 authored by Vitaly Tuzov's avatar Vitaly Tuzov

Updated v_popcount description, reference implementation and test.

parent 96ab78dc
......@@ -603,27 +603,20 @@ static const unsigned char popCountTable[] =
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
/** @brief Count the 1 bits in the vector and return 4 values
/** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type
Scheme:
@code
{A1 A2 A3 ...} => popcount(A1)
{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...}
@endcode
Any types but result will be in v_uint32x4*/
template<typename _Tp, int n> inline v_uint32x4 v_popcount(const v_reg<_Tp, n>& a)
For all integer types. */
template<typename _Tp, int n>
inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_popcount(const v_reg<_Tp, n>& a)
{
v_uint8x16 b;
b = v_reinterpret_as_u8(a);
for( int i = 0; i < v_uint8x16::nlanes; i++ )
{
b.s[i] = popCountTable[b.s[i]];
}
v_uint32x4 c;
for( int i = 0; i < v_uint32x4::nlanes; i++ )
{
c.s[i] = b.s[i*4] + b.s[i*4+1] + b.s[i*4+2] + b.s[i*4+3];
}
return c;
v_reg<typename V_TypeTraits<_Tp>::abs_type, n> b = v_reg<typename V_TypeTraits<_Tp>::abs_type, n>::zero();
for( int i = 0; i < n*sizeof(_Tp); i++ )
b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]];
return b;
}
......
......@@ -75,7 +75,7 @@ int normHamming(const uchar* a, int n)
v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i)));
result += v_reduce_sum(t);
result += (int)v_reduce_sum(t);
}
#endif // CV_SIMD
#if CV_ENABLE_UNROLLED
......@@ -144,7 +144,7 @@ int normHamming(const uchar* a, const uchar* b, int n)
v_uint64 t = vx_setzero_u64();
for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i)));
result += v_reduce_sum(t);
result += (int)v_reduce_sum(t);
}
#endif // CV_SIMD
#if CV_ENABLE_UNROLLED
......
......@@ -686,18 +686,24 @@ template<typename R> struct TheTest
TheTest & test_popcount()
{
typedef typename V_RegTraits<R>::u_reg Ru;
static unsigned popcountTable[] = {
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33,
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81,
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123,
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172,
176, 181, 186, 192, 193
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, //0x00-0x0f
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x10-0x1f
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x20-0x2f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x30-0x3f
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x40-0x4f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x50-0x5f
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x60-0x6f
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, //0x70-0x7f
1 //0x80
};
Data<R> dataA;
R a = dataA;
unsigned resB = (unsigned)v_reduce_sum(v_popcount(a));
EXPECT_EQ(popcountTable[R::nlanes], resB);
Data<Ru> resB = v_popcount(a);
for (int i = 0; i < Ru::nlanes; ++i)
EXPECT_EQ(popcountTable[i + 1], resB[i]);
return *this;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment