Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
3c70d966
Commit
3c70d966
authored
Feb 14, 2019
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #13823 from alalek:core_dispatch_countNonZero
parents
d9b57379
e3633ec4
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
195 additions
and
165 deletions
+195
-165
CMakeLists.txt
modules/core/CMakeLists.txt
+1
-0
count_non_zero.dispatch.cpp
modules/core/src/count_non_zero.dispatch.cpp
+181
-0
count_non_zero.simd.hpp
modules/core/src/count_non_zero.simd.hpp
+13
-165
No files found.
modules/core/CMakeLists.txt
View file @
3c70d966
...
...
@@ -5,6 +5,7 @@ ocv_add_dispatched_file(stat SSE4_2 AVX2)
ocv_add_dispatched_file
(
arithm SSE2 SSE4_1 AVX2 VSX3
)
ocv_add_dispatched_file
(
convert SSE2 AVX2
)
ocv_add_dispatched_file
(
convert_scale SSE2 AVX2
)
ocv_add_dispatched_file
(
count_non_zero SSE2 AVX2
)
ocv_add_dispatched_file
(
sum SSE2 AVX2
)
# dispatching for accuracy tests
...
...
modules/core/src/count_non_zero.dispatch.cpp
0 → 100644
View file @
3c70d966
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "stat.hpp"
#include "count_non_zero.simd.hpp"
#include "count_non_zero.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
static
CountNonZeroFunc
getCountNonZeroTab
(
int
depth
)
{
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getCountNonZeroTab
,
(
depth
),
CV_CPU_DISPATCH_MODES_ALL
);
}
#ifdef HAVE_OPENCL
static
bool
ocl_countNonZero
(
InputArray
_src
,
int
&
res
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
kercn
=
ocl
::
predictOptimalVectorWidth
(
_src
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
depth
),
(
int
)
wgs
,
kercn
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
_src
.
isContinuous
()
?
" -D HAVE_SRC_CONT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
CV_32SC1
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
size_t
globalsize
=
dbsize
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
true
))
return
res
=
saturate_cast
<
int
>
(
cv
::
sum
(
db
.
getMat
(
ACCESS_READ
))[
0
]),
true
;
return
false
;
}
#endif
#if defined HAVE_IPP
static
bool
ipp_countNonZero
(
Mat
&
src
,
int
&
res
)
{
CV_INSTRUMENT_REGION_IPP
();
#if IPP_VERSION_X100 < 201801
// Poor performance of SSE42
if
(
cv
::
ipp
::
getIppTopFeatures
()
==
ippCPUID_SSE42
)
return
false
;
#endif
Ipp32s
count
=
0
;
int
depth
=
src
.
depth
();
if
(
src
.
dims
<=
2
)
{
IppStatus
status
;
IppiSize
size
=
{
src
.
cols
*
src
.
channels
(),
src
.
rows
};
if
(
depth
==
CV_8U
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_8u_C1R
,
(
const
Ipp8u
*
)
src
.
ptr
(),
(
int
)
src
.
step
,
size
,
&
count
,
0
,
0
);
else
if
(
depth
==
CV_32F
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_32f_C1R
,
(
const
Ipp32f
*
)
src
.
ptr
(),
(
int
)
src
.
step
,
size
,
&
count
,
0
,
0
);
else
return
false
;
if
(
status
<
0
)
return
false
;
res
=
size
.
width
*
size
.
height
-
count
;
}
else
{
IppStatus
status
;
const
Mat
*
arrays
[]
=
{
&
src
,
NULL
};
Mat
planes
[
1
];
NAryMatIterator
it
(
arrays
,
planes
,
1
);
IppiSize
size
=
{(
int
)
it
.
size
*
src
.
channels
(),
1
};
res
=
0
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
if
(
depth
==
CV_8U
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_8u_C1R
,
it
.
planes
->
ptr
<
Ipp8u
>
(),
(
int
)
it
.
planes
->
step
,
size
,
&
count
,
0
,
0
);
else
if
(
depth
==
CV_32F
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_32f_C1R
,
it
.
planes
->
ptr
<
Ipp32f
>
(),
(
int
)
it
.
planes
->
step
,
size
,
&
count
,
0
,
0
);
else
return
false
;
if
(
status
<
0
||
(
int
)
it
.
planes
->
total
()
*
src
.
channels
()
<
count
)
return
false
;
res
+=
(
int
)
it
.
planes
->
total
()
*
src
.
channels
()
-
count
;
}
}
return
true
;
}
#endif
int
countNonZero
(
InputArray
_src
)
{
CV_INSTRUMENT_REGION
();
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
CV_Assert
(
cn
==
1
);
#if defined HAVE_OPENCL || defined HAVE_IPP
int
res
=
-
1
;
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN_
(
OCL_PERFORMANCE_CHECK
(
_src
.
isUMat
())
&&
_src
.
dims
()
<=
2
,
ocl_countNonZero
(
_src
,
res
),
res
)
#endif
Mat
src
=
_src
.
getMat
();
CV_IPP_RUN_FAST
(
ipp_countNonZero
(
src
,
res
),
res
);
CountNonZeroFunc
func
=
getCountNonZeroTab
(
src
.
depth
());
CV_Assert
(
func
!=
0
);
const
Mat
*
arrays
[]
=
{
&
src
,
0
};
uchar
*
ptrs
[
1
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
int
total
=
(
int
)
it
.
size
,
nz
=
0
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
nz
+=
func
(
ptrs
[
0
],
total
);
return
nz
;
}
void
findNonZero
(
InputArray
_src
,
OutputArray
_idx
)
{
CV_INSTRUMENT_REGION
();
Mat
src
=
_src
.
getMat
();
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
int
n
=
countNonZero
(
src
);
if
(
n
==
0
)
{
_idx
.
release
();
return
;
}
if
(
_idx
.
kind
()
==
_InputArray
::
MAT
&&
!
_idx
.
getMatRef
().
isContinuous
()
)
_idx
.
release
();
_idx
.
create
(
n
,
1
,
CV_32SC2
);
Mat
idx
=
_idx
.
getMat
();
CV_Assert
(
idx
.
isContinuous
());
Point
*
idx_ptr
=
idx
.
ptr
<
Point
>
();
for
(
int
i
=
0
;
i
<
src
.
rows
;
i
++
)
{
const
uchar
*
bin_ptr
=
src
.
ptr
(
i
);
for
(
int
j
=
0
;
j
<
src
.
cols
;
j
++
)
if
(
bin_ptr
[
j
]
)
*
idx_ptr
++
=
Point
(
j
,
i
);
}
}
}
// namespace
modules/core/src/count_non_zero.
c
pp
→
modules/core/src/count_non_zero.
simd.h
pp
View file @
3c70d966
...
...
@@ -2,13 +2,20 @@
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "stat.hpp"
namespace
cv
{
typedef
int
(
*
CountNonZeroFunc
)(
const
uchar
*
,
int
);
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
CountNonZeroFunc
getCountNonZeroTab
(
int
depth
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
template
<
typename
T
>
static
int
countNonZero_
(
const
T
*
src
,
int
len
)
{
...
...
@@ -175,9 +182,7 @@ static int countNonZero64f( const double* src, int len )
return
countNonZero_
(
src
,
len
);
}
typedef
int
(
*
CountNonZeroFunc
)(
const
uchar
*
,
int
);
static
CountNonZeroFunc
getCountNonZeroTab
(
int
depth
)
CountNonZeroFunc
getCountNonZeroTab
(
int
depth
)
{
static
CountNonZeroFunc
countNonZeroTab
[]
=
{
...
...
@@ -190,164 +195,7 @@ static CountNonZeroFunc getCountNonZeroTab(int depth)
return
countNonZeroTab
[
depth
];
}
#ifdef HAVE_OPENCL
static
bool
ocl_countNonZero
(
InputArray
_src
,
int
&
res
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
kercn
=
ocl
::
predictOptimalVectorWidth
(
_src
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
depth
),
(
int
)
wgs
,
kercn
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
_src
.
isContinuous
()
?
" -D HAVE_SRC_CONT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
CV_32SC1
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
size_t
globalsize
=
dbsize
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
true
))
return
res
=
saturate_cast
<
int
>
(
cv
::
sum
(
db
.
getMat
(
ACCESS_READ
))[
0
]),
true
;
return
false
;
}
#endif
#if defined HAVE_IPP
static
bool
ipp_countNonZero
(
Mat
&
src
,
int
&
res
)
{
CV_INSTRUMENT_REGION_IPP
();
#if IPP_VERSION_X100 < 201801
// Poor performance of SSE42
if
(
cv
::
ipp
::
getIppTopFeatures
()
==
ippCPUID_SSE42
)
return
false
;
#endif
Ipp32s
count
=
0
;
int
depth
=
src
.
depth
();
if
(
src
.
dims
<=
2
)
{
IppStatus
status
;
IppiSize
size
=
{
src
.
cols
*
src
.
channels
(),
src
.
rows
};
if
(
depth
==
CV_8U
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_8u_C1R
,
(
const
Ipp8u
*
)
src
.
ptr
(),
(
int
)
src
.
step
,
size
,
&
count
,
0
,
0
);
else
if
(
depth
==
CV_32F
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_32f_C1R
,
(
const
Ipp32f
*
)
src
.
ptr
(),
(
int
)
src
.
step
,
size
,
&
count
,
0
,
0
);
else
return
false
;
if
(
status
<
0
)
return
false
;
res
=
size
.
width
*
size
.
height
-
count
;
}
else
{
IppStatus
status
;
const
Mat
*
arrays
[]
=
{
&
src
,
NULL
};
Mat
planes
[
1
];
NAryMatIterator
it
(
arrays
,
planes
,
1
);
IppiSize
size
=
{(
int
)
it
.
size
*
src
.
channels
(),
1
};
res
=
0
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
{
if
(
depth
==
CV_8U
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_8u_C1R
,
it
.
planes
->
ptr
<
Ipp8u
>
(),
(
int
)
it
.
planes
->
step
,
size
,
&
count
,
0
,
0
);
else
if
(
depth
==
CV_32F
)
status
=
CV_INSTRUMENT_FUN_IPP
(
ippiCountInRange_32f_C1R
,
it
.
planes
->
ptr
<
Ipp32f
>
(),
(
int
)
it
.
planes
->
step
,
size
,
&
count
,
0
,
0
);
else
return
false
;
if
(
status
<
0
||
(
int
)
it
.
planes
->
total
()
*
src
.
channels
()
<
count
)
return
false
;
res
+=
(
int
)
it
.
planes
->
total
()
*
src
.
channels
()
-
count
;
}
}
return
true
;
}
#endif
}
// cv::
int
cv
::
countNonZero
(
InputArray
_src
)
{
CV_INSTRUMENT_REGION
();
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
CV_Assert
(
cn
==
1
);
#if defined HAVE_OPENCL || defined HAVE_IPP
int
res
=
-
1
;
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN_
(
OCL_PERFORMANCE_CHECK
(
_src
.
isUMat
())
&&
_src
.
dims
()
<=
2
,
ocl_countNonZero
(
_src
,
res
),
res
)
#endif
Mat
src
=
_src
.
getMat
();
CV_IPP_RUN_FAST
(
ipp_countNonZero
(
src
,
res
),
res
);
CountNonZeroFunc
func
=
getCountNonZeroTab
(
src
.
depth
());
CV_Assert
(
func
!=
0
);
const
Mat
*
arrays
[]
=
{
&
src
,
0
};
uchar
*
ptrs
[
1
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
int
total
=
(
int
)
it
.
size
,
nz
=
0
;
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
nz
+=
func
(
ptrs
[
0
],
total
);
return
nz
;
}
void
cv
::
findNonZero
(
InputArray
_src
,
OutputArray
_idx
)
{
CV_INSTRUMENT_REGION
();
Mat
src
=
_src
.
getMat
();
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
int
n
=
countNonZero
(
src
);
if
(
n
==
0
)
{
_idx
.
release
();
return
;
}
if
(
_idx
.
kind
()
==
_InputArray
::
MAT
&&
!
_idx
.
getMatRef
().
isContinuous
()
)
_idx
.
release
();
_idx
.
create
(
n
,
1
,
CV_32SC2
);
Mat
idx
=
_idx
.
getMat
();
CV_Assert
(
idx
.
isContinuous
());
Point
*
idx_ptr
=
idx
.
ptr
<
Point
>
();
for
(
int
i
=
0
;
i
<
src
.
rows
;
i
++
)
{
const
uchar
*
bin_ptr
=
src
.
ptr
(
i
);
for
(
int
j
=
0
;
j
<
src
.
cols
;
j
++
)
if
(
bin_ptr
[
j
]
)
*
idx_ptr
++
=
Point
(
j
,
i
);
}
}
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
// namespace
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment