Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b8edc2cc
Commit
b8edc2cc
authored
Dec 01, 2013
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added cv::sum to T-API
parent
28575c19
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
114 additions
and
15 deletions
+114
-15
reduce.cl
modules/core/src/opencl/reduce.cl
+47
-13
stat.cpp
modules/core/src/stat.cpp
+67
-2
No files found.
modules/core/src/opencl/
count_non_zero
.cl
→
modules/core/src/opencl/
reduce
.cl
View file @
b8edc2cc
...
...
@@ -50,36 +50,70 @@
#
endif
#
endif
/**************************************Count
NonZero**************************************/
#
define
noconvert
__kernel
void
count_non_zero
(
__global
const
uchar
*
srcptr,
int
step,
int
offset,
int
cols,
int
total,
int
groupnum,
__global
uchar
*
dstptr
)
#
if
defined
OP_SUM
|
| defined OP_SUM_ABS |
|
defined
OP_SUM_SQR
#
if
OP_SUM
#
define
FUNC
(
a,
b
)
a
+=
b
#
elif
OP_SUM_ABS
#
define
FUNC
(
a,
b
)
a
+=
b
>=
(
dstT
)(
0
)
?
b
:
-b
#
elif
OP_SUM_SQR
#
define
FUNC
(
a,
b
)
a
+=
b
*
b
#
endif
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
#
define
REDUCE_GLOBAL
\
dstT
temp
=
convertToDT
(
src[0]
)
; \
FUNC
(
accumulator,
temp
)
#
define
REDUCE_LOCAL_1
\
localmem[lid]
+=
accumulator
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
#
elif
defined
OP_COUNT_NON_ZERO
#
define
dstT
int
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
; \
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
#
define
REDUCE_GLOBAL
\
accumulator
+=
src[0]
==
zero
?
zero
:
one
#
define
REDUCE_LOCAL_1
\
localmem[lid]
+=
accumulator
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
#
else
#
error
"No operation"
#
endif
__kernel
void
reduce
(
__global
const
uchar
*
srcptr,
int
step,
int
offset,
int
cols,
int
total,
int
groupnum,
__global
uchar
*
dstptr
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
__local
int
localmem[WGS2_ALIGNED]
;
__local
dstT
localmem[WGS2_ALIGNED]
;
if
(
lid
<
WGS2_ALIGNED
)
localmem[lid]
=
0
;
localmem[lid]
=
(
dstT
)(
0
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
nonzero
=
(
int
)(
0
)
,
src_index
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
DEFINE_ACCUMULATOR
;
for
(
int
grain
=
groupnum
*
WGS
; id < total; id += grain)
{
src_index
=
mad24
(
id
/
cols,
step,
offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
srcT
))
;
int
src_index
=
mad24
(
id
/
cols,
step,
offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
srcT
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
nonzero
+=
src[0]
==
zero
?
zero
:
one
;
REDUCE_GLOBAL
;
}
if
(
lid
>=
WGS2_ALIGNED
)
localmem[lid
-
WGS2_ALIGNED]
=
nonzero
;
localmem[lid
-
WGS2_ALIGNED]
=
accumulator
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
WGS2_ALIGNED
)
localmem[lid]
=
nonzero
+
localmem[lid]
;
REDUCE_LOCAL_1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
...
...
@@ -87,14 +121,14 @@ __kernel void count_non_zero(__global const uchar * srcptr, int step, int offset
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem[lid]
=
localmem[lid]
+
localmem[lid2]
;
REDUCE_LOCAL_2
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
__global
int
*
dst
=
(
__global
int
*
)(
dstptr
+
(
int
)
sizeof
(
int
)
*
gid
)
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
(
int
)
sizeof
(
dstT
)
*
gid
)
;
dst[0]
=
localmem[0]
;
}
}
modules/core/src/stat.cpp
View file @
b8edc2cc
...
...
@@ -449,10 +449,74 @@ static SumSqrFunc getSumSqrTab(int depth)
return
sumSqrTab
[
depth
];
}
template
<
typename
T
>
Scalar
ocl_part_sum
(
Mat
m
)
{
CV_Assert
(
m
.
rows
==
1
);
Scalar
s
=
Scalar
::
all
(
0
);
int
cn
=
m
.
channels
();
const
T
*
const
ptr
=
m
.
ptr
<
T
>
(
0
);
for
(
int
x
=
0
,
w
=
m
.
cols
*
cn
;
x
<
w
;
)
for
(
int
c
=
0
;
c
<
cn
;
++
c
,
++
x
)
s
[
c
]
+=
ptr
[
x
];
return
s
;
}
enum
{
OP_SUM
=
0
,
OP_SUM_ABS
=
1
,
OP_SUM_SQR
=
2
};
static
bool
ocl_sum
(
InputArray
_src
,
Scalar
&
res
,
int
sum_op
)
{
CV_Assert
(
sum_op
==
OP_SUM
||
sum_op
==
OP_SUM_ABS
||
sum_op
==
OP_SUM_SQR
);
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
||
cn
==
3
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
ddepth
=
std
::
max
(
CV_32S
,
depth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
dtype
);
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
opMap
[
sum_op
],
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
size_t
globalsize
=
dbsize
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
true
))
{
typedef
Scalar
(
*
part_sum
)(
Mat
m
);
part_sum
funcs
[
3
]
=
{
ocl_part_sum
<
int
>
,
ocl_part_sum
<
float
>
,
ocl_part_sum
<
double
>
},
func
=
funcs
[
ddepth
-
CV_32S
];
res
=
func
(
db
.
getMat
(
ACCESS_READ
));
return
true
;
}
return
false
;
}
}
cv
::
Scalar
cv
::
sum
(
InputArray
_src
)
{
Scalar
_res
;
if
(
ocl
::
useOpenCL
()
&&
_src
.
isUMat
()
&&
ocl_sum
(
_src
,
_res
,
OP_SUM
))
return
_res
;
Mat
src
=
_src
.
getMat
();
int
k
,
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
...
...
@@ -562,8 +626,9 @@ static bool ocl_countNonZero( InputArray _src, int & res )
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
ocl
::
Kernel
k
(
"count_non_zero"
,
ocl
::
core
::
count_non_zero_oclsrc
,
format
(
"-D srcT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
src
.
type
()),
(
int
)
wgs
,
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D OP_COUNT_NON_ZERO -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
src
.
type
()),
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment