Commit 938d8dce authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #15685 from pmur:cnz64f-simd

parents 6e85e852 ec91a3d5
...@@ -179,7 +179,25 @@ static int countNonZero32f( const float* src, int len ) ...@@ -179,7 +179,25 @@ static int countNonZero32f( const float* src, int len )
static int countNonZero64f( const double* src, int len ) static int countNonZero64f( const double* src, int len )
{ {
return countNonZero_(src, len); int nz = 0, i = 0;
#if CV_SIMD_64F
v_int64 sum1 = vx_setzero_s64();
v_int64 sum2 = vx_setzero_s64();
v_float64 zero = vx_setzero_f64();
int step = v_float64::nlanes * 2;
int len0 = len & -step;
for(i = 0; i < len0; i += step )
{
sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero);
sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero);
}
// N.B the value is incremented by -1 (0xF...F) for each value
nz = i + (int)v_reduce_sum(sum1 + sum2);
v_cleanup();
#endif
return nz + countNonZero_(src + i, len - i);
} }
CountNonZeroFunc getCountNonZeroTab(int depth) CountNonZeroFunc getCountNonZeroTab(int depth)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment