Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
3ebfe600
Commit
3ebfe600
authored
Mar 13, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improved performance of cv::ocl::countNonZero
parent
8e79de35
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
33 deletions
+27
-33
arithm.cpp
modules/ocl/src/arithm.cpp
+21
-22
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+4
-9
test_arithm.cpp
modules/ocl/test/test_arithm.cpp
+2
-2
No files found.
modules/ocl/src/arithm.cpp
View file @
3ebfe600
...
...
@@ -1263,38 +1263,35 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
///////////////////////////// countNonZero ///////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
arithmetic_countNonZero_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
string
kernelName
)
static
void
arithmetic_countNonZero_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
int
vlen
)
{
int
ochannels
=
src
.
oclchannels
();
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;;
int
offset
=
src
.
offset
/
src
.
elemSize
();
int
vElemSize
=
vlen
*
src
.
elemSize1
();
int
src_step
=
src
.
step
/
vElemSize
,
src_offset
=
src
.
offset
/
vElemSize
;
int
src_cols
=
src
.
cols
/
vlen
,
total
=
src
.
size
().
area
()
/
vlen
;
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
};
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=int%s"
,
typeMap
[
src
.
depth
()],
channelMap
[
ochannels
],
channelMap
[
ochannels
]);
const
char
*
const
channelMap
[]
=
{
""
,
""
,
"2"
,
"4"
,
"4"
,
""
,
""
,
""
,
"8"
};
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=int%s -D convertToDstT=convert_int%s"
,
typeMap
[
src
.
depth
()],
channelMap
[
vlen
],
channelMap
[
vlen
],
channelMap
[
vlen
]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
#ifdef ANDROID
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
kernelName
,
globalThreads
,
NULL
,
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
"arithm_op_nonzero"
,
globalThreads
,
NULL
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
#else
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
kernelName
,
globalThreads
,
localThreads
,
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
"arithm_op_nonzero"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
#endif
}
...
...
@@ -1311,18 +1308,20 @@ int cv::ocl::countNonZero(const oclMat &src)
return
-
1
;
}
int
vlen
=
8
,
vElemSize
=
src
.
elemSize1
()
*
vlen
;
while
(
src
.
offset
%
vElemSize
!=
0
||
src
.
step
%
vElemSize
!=
0
||
src
.
cols
%
vlen
!=
0
)
vlen
>>=
1
,
vElemSize
>>=
1
;
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
;
string
kernelName
=
"arithm_op_nonzero"
;
int
dbsize
=
groupnum
*
vlen
;
AutoBuffer
<
int
>
_buf
(
dbsize
);
int
*
p
=
(
int
*
)
_buf
,
nonzero
=
0
;
memset
(
p
,
0
,
dbsize
*
sizeof
(
int
));
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
int
));
arithmetic_countNonZero_run
(
src
,
dstBuffer
,
groupnum
,
kernelName
);
arithmetic_countNonZero_run
(
src
,
dstBuffer
,
groupnum
,
vlen
);
openCLReadBuffer
(
clCxt
,
dstBuffer
,
(
void
*
)
p
,
dbsize
*
sizeof
(
int
));
for
(
int
i
=
0
;
i
<
dbsize
;
i
++
)
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
3ebfe600
...
...
@@ -52,23 +52,18 @@
/**************************************Count
NonZero**************************************/
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
,
__global
srcT
*src,
__global
dstT
*
dst
)
__kernel
void
arithm_op_nonzero
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_nonzero[128]
;
dstT
nonzero
=
(
dstT
)(
0
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
for
(
int
grain
=
groupnum
<<
8
; id < elemnum; id += grain)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
nonzero
+=
src[idx]
==
zero
?
zero
:
one
;
}
for
(
int
grain
=
groupnum
<<
8
; id < total; id += grain)
nonzero
+=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
==
(
srcT
)(
0
))
?
(
dstT
)(
0
)
:
(
dstT
)(
1
)
;
if
(
lid
>
127
)
localmem_nonzero[lid
-
128]
=
nonzero
;
...
...
modules/ocl/test/test_arithm.cpp
View file @
3ebfe600
...
...
@@ -198,7 +198,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
Size
roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Border
src1Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
2
,
11
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
-
11
,
11
);
Border
src2Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src2
,
src2_roi
,
roiSize
,
src2Border
,
type
,
-
1540
,
1740
);
...
...
@@ -1163,7 +1163,7 @@ OCL_TEST_P(CountNonZero, MAT)
int
cpures
=
cv
::
countNonZero
(
src1_roi
);
int
gpures
=
cv
::
ocl
::
countNonZero
(
gsrc1_roi
);
EXPECT_
DOUBLE_EQ
((
double
)
cpures
,
(
double
)
gpures
);
EXPECT_
EQ
(
cpures
,
gpures
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment