Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b54228fb
Commit
b54228fb
authored
Sep 27, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed ocl::countNonZero
parent
9dca7555
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
68 additions
and
154 deletions
+68
-154
arithm.cpp
modules/ocl/src/arithm.cpp
+30
-18
arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+38
-136
No files found.
modules/ocl/src/arithm.cpp
View file @
b54228fb
...
...
@@ -1209,21 +1209,22 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
///////////////////////////// countNonZero ///////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
arithmetic_countNonZero_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
vlen
,
int
groupnum
,
string
kernelName
)
static
void
arithmetic_countNonZero_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
string
kernelName
)
{
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
int
all_cols
=
src
.
step
/
(
vlen
*
src
.
elemSize1
()
);
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
(
vlen
*
src
.
elemSize1
()
);
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
(
vlen
*
src
.
elemSize1
()
)
-
1
;
int
ochannels
=
src
.
oclchannels
()
;
int
all_cols
=
src
.
step
/
src
.
elemSize
(
);
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
(
);
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
(
)
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;;
int
offset
=
src
.
offset
/
(
vlen
*
src
.
elemSize1
());
int
repeat_s
=
src
.
offset
/
src
.
elemSize1
()
-
offset
*
vlen
;
int
repeat_e
=
(
offset
+
cols
)
*
vlen
-
src
.
offset
/
src
.
elemSize1
()
-
src
.
cols
*
src
.
oclchannels
();
int
offset
=
src
.
offset
/
src
.
elemSize
();
char
build_options
[
50
];
sprintf
(
build_options
,
"-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d"
,
src
.
depth
(),
repeat_s
,
repeat_e
);
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
};
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=int%s"
,
typeMap
[
src
.
depth
()],
channelMap
[
ochannels
],
channelMap
[
ochannels
]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
...
...
@@ -1231,33 +1232,44 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
size_t
gt
[
3
]
=
{
groupnum
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
kernelName
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
}
int
cv
::
ocl
::
countNonZero
(
const
oclMat
&
src
)
{
size_t
groupnum
=
src
.
clCxt
->
computeUnits
();
CV_Assert
(
src
.
step
%
src
.
elemSize
()
==
0
);
CV_Assert
(
src
.
channels
()
==
1
);
Context
*
clCxt
=
src
.
clCxt
;
if
(
!
src
.
clCxt
->
supportsFeature
(
Context
::
CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
{
CV_Error
(
CV_GpuNotSupported
,
"selected device doesn't support double"
);
}
size_t
groupnum
=
src
.
clCxt
->
computeUnits
();
CV_Assert
(
groupnum
!=
0
);
int
vlen
=
8
,
dbsize
=
groupnum
*
vlen
;
Context
*
clCxt
=
src
.
clCxt
;
int
dbsize
=
groupnum
;
string
kernelName
=
"arithm_op_nonzero"
;
AutoBuffer
<
int
>
_buf
(
dbsize
);
int
*
p
=
(
int
*
)
_buf
,
nonzero
=
0
;
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
int
));
arithmetic_countNonZero_run
(
src
,
dstBuffer
,
vlen
,
groupnum
,
kernelName
);
memset
(
p
,
0
,
dbsize
*
sizeof
(
int
));
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
int
));
arithmetic_countNonZero_run
(
src
,
dstBuffer
,
groupnum
,
kernelName
);
openCLReadBuffer
(
clCxt
,
dstBuffer
,
(
void
*
)
p
,
dbsize
*
sizeof
(
int
));
for
(
int
i
=
0
;
i
<
dbsize
;
i
++
)
nonzero
+=
p
[
i
];
openCLSafeCall
(
clReleaseMemObject
(
dstBuffer
));
return
nonzero
;
}
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
View file @
b54228fb
...
...
@@ -41,151 +41,53 @@
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
///
/**************************************PUBLICFUNC*************************************/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
if
defined
(
DEPTH_0
)
#
define
VEC_TYPE
uchar8
#
endif
#
if
defined
(
DEPTH_1
)
#
define
VEC_TYPE
char8
#
endif
#
if
defined
(
DEPTH_2
)
#
define
VEC_TYPE
ushort8
#
endif
#
if
defined
(
DEPTH_3
)
#
define
VEC_TYPE
short8
#
endif
#
if
defined
(
DEPTH_4
)
#
define
VEC_TYPE
int8
#
endif
#
if
defined
(
DEPTH_5
)
#
define
VEC_TYPE
float8
#
endif
#
if
defined
(
DEPTH_6
)
#
define
VEC_TYPE
double8
#
endif
#
if
defined
(
REPEAT_S0
)
#
define
repeat_s
(
a
)
a
=
a
;
#
endif
#
if
defined
(
REPEAT_S1
)
#
define
repeat_s
(
a
)
a.s0
=
0
;
#
endif
#
if
defined
(
REPEAT_S2
)
#
define
repeat_s
(
a
)
a.s0
=
0
;a.s1 = 0;
#
endif
#
if
defined
(
REPEAT_S3
)
#
define
repeat_s
(
a
)
a.s0
=
0
;a.s1 = 0;a.s2 = 0;
#
endif
#
if
defined
(
REPEAT_S4
)
#
define
repeat_s
(
a
)
a.s0
=
0
;a.s1 = 0;a.s2 = 0;a.s3 = 0;
#
endif
#
if
defined
(
REPEAT_S5
)
#
define
repeat_s
(
a
)
a.s0
=
0
;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;
#
endif
#
if
defined
(
REPEAT_S6
)
#
define
repeat_s
(
a
)
a.s0
=
0
;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;
#
endif
#
if
defined
(
REPEAT_S7
)
#
define
repeat_s
(
a
)
a.s0
=
0
;a.s1 = 0;a.s2 = 0;a.s3 = 0;a.s4 = 0;a.s5 = 0;a.s6 = 0;
#
endif
/**************************************Count
NonZero**************************************/
#
if
defined
(
REPEAT_E0
)
#
define
repeat_e
(
a
)
a
=
a
;
#
endif
#
if
defined
(
REPEAT_E1
)
#
define
repeat_e
(
a
)
a.s7
=
0
;
#
endif
#
if
defined
(
REPEAT_E2
)
#
define
repeat_e
(
a
)
a.s7
=
0
;a.s6 = 0;
#
endif
#
if
defined
(
REPEAT_E3
)
#
define
repeat_e
(
a
)
a.s7
=
0
;a.s6 = 0;a.s5 = 0;
#
endif
#
if
defined
(
REPEAT_E4
)
#
define
repeat_e
(
a
)
a.s7
=
0
;a.s6 = 0;a.s5 = 0;a.s4 = 0;
#
endif
#
if
defined
(
REPEAT_E5
)
#
define
repeat_e
(
a
)
a.s7
=
0
;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;
#
endif
#
if
defined
(
REPEAT_E6
)
#
define
repeat_e
(
a
)
a.s7
=
0
;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;
#
endif
#
if
defined
(
REPEAT_E7
)
#
define
repeat_e
(
a
)
a.s7
=
0
;a.s6 = 0;a.s5 = 0;a.s4 = 0;a.s3 = 0;a.s2 = 0;a.s1 = 0;
#
endif
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
__global
srcT
*src,
__global
dstT
*dst
)
{
unsigned
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics:enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_extended_atomics:enable
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_nonzero[128]
;
dstT
nonzero
=
(
dstT
)(
0
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
/**************************************Count
NonZero**************************************/
__kernel
void
arithm_op_nonzero
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum,
__global
VEC_TYPE
*src,
__global
int8
*dst
)
{
unsigned
int
lid
=
get_local_id
(
0
)
;
unsigned
int
gid
=
get_group_id
(
0
)
;
unsigned
int
id
=
get_global_id
(
0
)
;
unsigned
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
int8
localmem_nonzero[128]
;
int8
nonzero
;
VEC_TYPE
zero=0,one=1,temp
;
if
(
id
<
elemnum
)
{
temp
=
src[idx]
;
if
(
id
%
cols
==
0
)
{
repeat_s
(
temp
)
;
}
if
(
id
%
cols
==
cols
-
1
)
{
repeat_e
(
temp
)
;
}
nonzero
=
convert_int8
(
temp
==
zero
?
zero:one
)
;
}
else
{
nonzero
=
0
;
}
for
(
id=id
+
(
groupnum
<<
8
)
; id < elemnum;id = id + (groupnum << 8))
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
if
(
id
%
cols
==
0
)
{
repeat_s
(
temp
)
;
}
if
(
id
%
cols
==
cols
-
1
)
{
repeat_e
(
temp
)
;
}
nonzero
=
nonzero
+
convert_int8
(
temp
==
zero
?
zero:one
)
;
}
if
(
lid
>
127
)
{
localmem_nonzero[lid
-
128]
=
nonzero
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
{
localmem_nonzero[lid]
=
nonzero
+
localmem_nonzero[lid]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
for
(
int
grain
=
groupnum
<<
8
; id < elemnum; id += grain)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
nonzero
+=
src[idx]
==
zero
?
zero
:
one
;
}
if
(
lid
>
127
)
localmem_nonzero[lid
-
128]
=
nonzero
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
localmem_nonzero[lid]
=
nonzero
+
localmem_nonzero[lid]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem_nonzero[lid]
=
localmem_nonzero[lid]
+
localmem_nonzero[lid2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
dst[gid]
=
localmem_nonzero[0]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
dst[gid]
=
localmem_nonzero[0]
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment