Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
20409958
Commit
20409958
authored
Jun 07, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimized cv::norm with 2 args
parent
1a7a262f
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
381 additions
and
55 deletions
+381
-55
minmaxloc.cl
modules/core/src/opencl/minmaxloc.cl
+71
-10
reduce.cl
modules/core/src/opencl/reduce.cl
+216
-3
stat.cpp
modules/core/src/stat.cpp
+94
-42
No files found.
modules/core/src/opencl/minmaxloc.cl
View file @
20409958
...
...
@@ -73,14 +73,26 @@
#
define
CALC_MAX
(
p,
inc
)
#
endif
#
ifdef
OP_CALC2
#
define
CALC_MAX2
(
p
)
\
if
(
maxval2
<
temp.p
)
\
maxval2
=
temp.p
#
else
#
define
CALC_MAX2
(
p
)
#
endif
#
define
CALC_P
(
p,
inc
)
\
CALC_MIN
(
p,
inc
)
\
CALC_MAX
(
p,
inc
)
CALC_MAX
(
p,
inc
)
\
CALC_MAX2
(
p
)
__kernel
void
minmaxloc
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
cols,
int
total,
int
groupnum,
__global
uchar
*
dstptr
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
#
ifdef
HAVE_SRC2
,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
endif
)
{
...
...
@@ -92,36 +104,46 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
ifdef
HAVE_MASK
mask
+=
mask_offset
;
#
endif
#
ifdef
HAVE_SRC2
src2ptr
+=
src2_offset
;
#
endif
#
ifdef
NEED_MINVAL
__local
dstT1
localmem_min[WGS2_ALIGNED]
;
dstT1
minval
=
MAX_VAL
;
#
ifdef
NEED_MINLOC
__local
uint
localmem_minloc[WGS2_ALIGNED]
;
uint
minloc
=
INDEX_MAX
;
#
endif
#
endif
#
ifdef
NEED_MAXVAL
dstT1
maxval
=
MIN_VAL
;
__local
dstT1
localmem_max[WGS2_ALIGNED]
;
#
ifdef
NEED_MAXLOC
__local
uint
localmem_maxloc[WGS2_ALIGNED]
;
uint
maxloc
=
INDEX_MAX
;
#
endif
#
endif
#
ifdef
OP_CALC2
__local
dstT1
localmem_max2[WGS2_ALIGNED]
;
dstT1
maxval2
=
MIN_VAL
;
#
endif
dstT1
minval
=
MAX_VAL,
maxval
=
MIN_VAL
;
dstT
temp
;
uint
minloc
=
INDEX_MAX,
maxloc
=
INDEX_MAX
;
int
src_index
;
#
ifdef
HAVE_MASK
int
mask_index
;
#
endif
#
ifdef
HAVE_SRC2
int
src2_index
;
#
endif
for
(
int
grain
=
groupnum
*
WGS
*
kercn
; id < total; id += grain)
{
#
ifdef
HAVE_SRC_CONT
src_index
=
mul24
(
id,
(
int
)
sizeof
(
srcT1
))
;
#
else
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
(
int
)
sizeof
(
srcT1
)))
;
dstT
temp
;
#
ifdef
HAVE_SRC2
dstT
temp2
;
#
endif
for
(
int
grain
=
groupnum
*
WGS
*
kercn
; id < total; id += grain)
{
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK_CONT
mask_index
=
id
;
...
...
@@ -131,7 +153,26 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
if
(
mask[mask_index]
)
#
endif
{
#
ifdef
HAVE_SRC_CONT
src_index
=
mul24
(
id,
(
int
)
sizeof
(
srcT1
))
;
#
else
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
(
int
)
sizeof
(
srcT1
)))
;
#
endif
temp
=
convertToDT
(
*
(
__global
const
srcT
*
)(
srcptr
+
src_index
))
;
#
ifdef
OP_ABS
temp
=
temp
>=
(
dstT
)(
0
)
?
temp
:
-temp
;
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
HAVE_SRC2_CONT
src2_index
=
mul24
(
id,
(
int
)
sizeof
(
srcT1
))
;
#
else
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
(
int
)
sizeof
(
srcT1
)))
;
#
endif
temp2
=
convertToDT
(
*
(
__global
const
srcT
*
)(
src2ptr
+
src2_index
))
;
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
;
#
endif
#
if
kercn
==
1
#
ifdef
NEED_MINVAL
if
(
minval
>
temp
)
...
...
@@ -150,6 +191,11 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
maxloc
=
id
;
#
endif
}
#
ifdef
OP_CALC2
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
;
if
(
maxval2
<
temp2
)
maxval2
=
temp2
;
#
endif
#
endif
#
elif
kercn
>=
2
CALC_P
(
s0,
0
)
...
...
@@ -191,6 +237,9 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
#
ifdef
NEED_MAXLOC
localmem_maxloc[lid]
=
maxloc
;
#
endif
#
ifdef
OP_CALC2
localmem_max2[lid]
=
maxval2
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -221,6 +270,10 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
localmem_max[lid3]
=
maxval
;
}
#
endif
#
ifdef
OP_CALC2
if
(
localmem_max2[lid3]
<
maxval2
)
localmem_max2[lid3]
=
maxval2
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -254,6 +307,10 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
localmem_max[lid]
=
localmem_max[lid2]
;
}
#
endif
#
ifdef
OP_CALC2
if
(
localmem_max2[lid]
<
localmem_max2[lid2]
)
localmem_max2[lid]
=
localmem_max2[lid2]
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -276,6 +333,10 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
#
ifdef
NEED_MAXLOC
*
(
__global
uint
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
uint
)
,
pos
))
=
localmem_maxloc[0]
;
#
endif
#
ifdef
OP_CALC2
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
uint
)
,
pos
)
;
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_max2[0]
;
#
endif
}
}
modules/core/src/opencl/reduce.cl
View file @
20409958
...
...
@@ -109,13 +109,22 @@
#
endif
#
ifdef
HAVE_MASK
#
ifdef
HAVE_SRC2
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
else
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
#
else
#
ifdef
HAVE_SRC2
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
else
#
define
EXTRA_PARAMS
#
endif
#
endif
//
accumulative
reduction
stuff
#
if
defined
OP_SUM
|
| defined OP_SUM_ABS || defined OP_SUM_SQR |
|
defined
OP_DOT
#
ifdef
OP_DOT
#
if
ddepth
<=
4
#
define
FUNC
(
a,
b,
c
)
a
=
mad24
(
b,
c,
a
)
...
...
@@ -137,18 +146,48 @@
#
endif
#
endif
#
ifdef
OP_CALC2
#
define
DECLARE_LOCAL_MEM
\
__local
dstT
localmem[WGS2_ALIGNED]
; \
__local
dstT
localmem2[WGS2_ALIGNED]
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
; \
dstT
accumulator2
=
(
dstT
)(
0
)
#
else
#
define
DECLARE_LOCAL_MEM
\
__local
dstT
localmem[WGS2_ALIGNED]
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
OP_CALC2
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
-
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
dstT
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
-=
temp2
; \
temp
=
temp
>
(
dstT
)(
0
)
?
temp
:
-temp
; \
FUNC
(
accumulator2,
temp2
)
; \
FUNC
(
accumulator,
temp
)
#
else
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstT
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp
)
#
endif
#
else
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
FUNC
(
accumulator,
temp
)
#
endif
#
ifdef
HAVE_MASK
#
define
REDUCE_GLOBAL
\
MASK_INDEX
; \
if
(
mask[mask_index]
)
\
{
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
FUNC
(
accumulator,
temp
)
; \
PROCESS_ELEMS
; \
}
#
elif
defined
OP_DOT
...
...
@@ -211,7 +250,158 @@
FUNC
(
accumulator,
temp.sF,
temp2.sF
)
#
endif
#
else
#
else
//
sum
or
norm
with
2
args
#
ifdef
HAVE_SRC2
#
ifdef
OP_CALC2
//
norm
relative
#
if
kercn
==
1
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp
)
; \
FUNC
(
accumulator2,
temp2
)
#
elif
kercn
==
2
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
#
elif
kercn
==
4
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
; \
FUNC
(
accumulator2,
temp2.s2
)
; \
FUNC
(
accumulator2,
temp2.s3
)
#
elif
kercn
==
8
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
; \
FUNC
(
accumulator2,
temp2.s2
)
; \
FUNC
(
accumulator2,
temp2.s3
)
; \
FUNC
(
accumulator2,
temp2.s4
)
; \
FUNC
(
accumulator2,
temp2.s5
)
; \
FUNC
(
accumulator2,
temp2.s6
)
; \
FUNC
(
accumulator2,
temp2.s7
)
#
elif
kercn
==
16
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
; \
FUNC
(
accumulator,
temp.s8
)
; \
FUNC
(
accumulator,
temp.s9
)
; \
FUNC
(
accumulator,
temp.sA
)
; \
FUNC
(
accumulator,
temp.sB
)
; \
FUNC
(
accumulator,
temp.sC
)
; \
FUNC
(
accumulator,
temp.sD
)
; \
FUNC
(
accumulator,
temp.sE
)
; \
FUNC
(
accumulator,
temp.sF
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
; \
FUNC
(
accumulator2,
temp2.s2
)
; \
FUNC
(
accumulator2,
temp2.s3
)
; \
FUNC
(
accumulator2,
temp2.s4
)
; \
FUNC
(
accumulator2,
temp2.s5
)
; \
FUNC
(
accumulator2,
temp2.s6
)
; \
FUNC
(
accumulator2,
temp2.s7
)
; \
FUNC
(
accumulator2,
temp2.s8
)
; \
FUNC
(
accumulator2,
temp2.s9
)
; \
FUNC
(
accumulator2,
temp2.sA
)
; \
FUNC
(
accumulator2,
temp2.sB
)
; \
FUNC
(
accumulator2,
temp2.sC
)
; \
FUNC
(
accumulator2,
temp2.sD
)
; \
FUNC
(
accumulator2,
temp2.sE
)
; \
FUNC
(
accumulator2,
temp2.sF
)
#
endif
#
else
//
norm
with
2
args
#
if
kercn
==
1
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp
)
#
elif
kercn
==
2
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
#
elif
kercn
==
4
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
#
elif
kercn
==
8
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
#
elif
kercn
==
16
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
; \
FUNC
(
accumulator,
temp.s8
)
; \
FUNC
(
accumulator,
temp.s9
)
; \
FUNC
(
accumulator,
temp.sA
)
; \
FUNC
(
accumulator,
temp.sB
)
; \
FUNC
(
accumulator,
temp.sC
)
; \
FUNC
(
accumulator,
temp.sD
)
; \
FUNC
(
accumulator,
temp.sE
)
; \
FUNC
(
accumulator,
temp.sF
)
#
endif
#
endif
#
else
//
sum
#
if
kercn
==
1
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
...
...
@@ -260,6 +450,7 @@
FUNC
(
accumulator,
temp.sF
)
#
endif
#
endif
#
endif
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
...
...
@@ -325,6 +516,20 @@
accumulator
+=
value.sF
==
zero
?
zero
:
one
#
endif
#
ifdef
OP_CALC2
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
; \
localmem2[lid]
=
accumulator2
; \
#
define
REDUCE_LOCAL_1
\
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
; \
localmem2[lid
-
WGS2_ALIGNED]
+=
accumulator2
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
; \
localmem2[lid]
+=
localmem2[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem2[0],
dstptr
+
mad24
(
groupnum,
srcTSIZE,
dstTSIZE
*
gid
))
#
else
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
#
define
REDUCE_LOCAL_1
\
...
...
@@ -333,6 +538,7 @@
localmem[lid]
+=
localmem[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
#
endif
//
norm
(
NORM_INF
)
with
cn
>
1
and
mask
#
elif
defined
OP_NORM_INF_MASK
...
...
@@ -384,6 +590,13 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
int
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
int
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
HAVE_SRC2_CONT
int
src2_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
int
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
#
endif
REDUCE_GLOBAL
;
}
...
...
modules/core/src/stat.cpp
View file @
20409958
...
...
@@ -469,21 +469,25 @@ template <typename T> Scalar ocl_part_sum(Mat m)
enum
{
OCL_OP_SUM
=
0
,
OCL_OP_SUM_ABS
=
1
,
OCL_OP_SUM_SQR
=
2
};
static
bool
ocl_sum
(
InputArray
_src
,
Scalar
&
res
,
int
sum_op
,
InputArray
_mask
=
noArray
()
)
static
bool
ocl_sum
(
InputArray
_src
,
Scalar
&
res
,
int
sum_op
,
InputArray
_mask
=
noArray
(),
InputArray
_src2
=
noArray
(),
bool
calc2
=
false
,
const
Scalar
&
res2
=
Scalar
()
)
{
CV_Assert
(
sum_op
==
OCL_OP_SUM
||
sum_op
==
OCL_OP_SUM_ABS
||
sum_op
==
OCL_OP_SUM_SQR
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
;
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
,
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
,
haveSrc2
=
_src2
.
kind
()
!=
_InputArray
::
NONE
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
kercn
=
cn
==
1
&&
!
haveMask
?
ocl
::
predictOptimalVectorWidth
(
_src
)
:
1
,
mcn
=
std
::
max
(
cn
,
kercn
);
CV_Assert
(
!
haveSrc2
||
_src2
.
type
()
==
type
);
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
(
);
size_t
wgs
=
ocl
::
Device
::
getDefault
()
.
maxWorkGroupSize
();
int
ngroups
=
dev
.
maxComputeUnits
(),
dbsize
=
ngroups
*
(
calc2
?
2
:
1
);
size_t
wgs
=
dev
.
maxWorkGroupSize
();
int
ddepth
=
std
::
max
(
sum_op
==
OCL_OP_SUM_SQR
?
CV_32F
:
CV_32S
,
depth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
...
...
@@ -497,7 +501,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
String
opts
=
format
(
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d"
,
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d
%s%s%s
"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
mcn
)),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
mcn
)),
ocl
::
typeToStr
(
ddepth
),
ddepth
,
cn
,
...
...
@@ -506,30 +510,49 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveMask
?
" -D HAVE_MASK"
:
""
,
_src
.
isContinuous
()
?
" -D HAVE_SRC_CONT"
:
""
,
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
,
kercn
);
haveMask
&&
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
,
kercn
,
haveSrc2
?
" -D HAVE_SRC2"
:
""
,
calc2
?
" -D OP_CALC2"
:
""
,
haveSrc2
&&
_src2
.
isContinuous
()
?
" -D HAVE_SRC2_CONT"
:
""
);
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
dtype
),
mask
=
_mask
.
getUMat
();
UMat
src
=
_src
.
getUMat
(),
src2
=
_src2
.
getUMat
(),
db
(
1
,
dbsize
,
dtype
),
mask
=
_mask
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dbarg
=
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
),
src2arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
);
if
(
haveMask
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
dbarg
,
maskarg
);
{
if
(
haveSrc2
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
,
maskarg
,
src2arg
);
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
,
maskarg
);
}
else
{
if
(
haveSrc2
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
,
src2arg
);
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
dbarg
);
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
);
}
size_t
globalsize
=
dbsize
*
wgs
;
size_t
globalsize
=
ngroups
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
))
{
typedef
Scalar
(
*
part_sum
)(
Mat
m
);
part_sum
funcs
[
3
]
=
{
ocl_part_sum
<
int
>
,
ocl_part_sum
<
float
>
,
ocl_part_sum
<
double
>
},
func
=
funcs
[
ddepth
-
CV_32S
];
res
=
func
(
db
.
getMat
(
ACCESS_READ
));
Mat
mres
=
db
.
getMat
(
ACCESS_READ
);
if
(
calc2
)
const_cast
<
Scalar
&>
(
res2
)
=
func
(
mres
.
colRange
(
dbsize
,
dbsize
));
res
=
func
(
mres
.
colRange
(
0
,
dbsize
));
return
true
;
}
return
false
;
...
...
@@ -1396,18 +1419,21 @@ typedef void (*getMinMaxResFunc)(const Mat & db, double *minVal, double *maxVal,
int
*
minLoc
,
int
*
maxLoc
,
int
gropunum
,
int
cols
);
static
bool
ocl_minMaxIdx
(
InputArray
_src
,
double
*
minVal
,
double
*
maxVal
,
int
*
minLoc
,
int
*
maxLoc
,
InputArray
_mask
,
int
ddepth
=
-
1
,
bool
absValues
=
false
)
int
ddepth
=
-
1
,
bool
absValues
=
false
,
InputArray
_src2
=
noArray
(),
bool
calc2
=
false
)
{
CV_Assert
(
(
_src
.
channels
()
==
1
&&
(
_mask
.
empty
()
||
_mask
.
type
()
==
CV_8U
))
||
(
_src
.
channels
()
>=
1
&&
_mask
.
empty
()
&&
!
minLoc
&&
!
maxLoc
)
);
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
,
haveMask
=
!
_mask
.
empty
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
,
haveMask
=
!
_mask
.
empty
(),
haveSrc2
=
_src2
.
kind
()
!=
_InputArray
::
NONE
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
kercn
=
haveMask
?
1
:
std
::
min
(
4
,
ocl
::
predictOptimalVectorWidth
(
_src
));
if
(
ddepth
<
0
)
ddepth
=
depth
;
CV_Assert
(
!
haveSrc2
||
_src2
.
type
()
==
type
);
if
((
depth
==
CV_64F
||
ddepth
==
CV_64F
)
&&
!
doubleSupport
)
return
false
;
...
...
@@ -1435,7 +1461,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
char
cvt
[
40
];
String
opts
=
format
(
"-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s"
,
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s
%s%s%s
"
,
depth
,
ocl
::
typeToStr
(
depth
),
haveMask
?
" -D HAVE_MASK"
:
""
,
(
int
)
wgs
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
...
...
@@ -1444,7 +1470,9 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
needMinVal
?
" -D NEED_MINVAL"
:
""
,
needMaxVal
?
" -D NEED_MAXVAL"
:
""
,
needMinLoc
?
" -D NEED_MINLOC"
:
""
,
needMaxLoc
?
" -D NEED_MAXLOC"
:
""
,
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
kercn
)),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
kercn
,
cvt
),
absValues
?
" -D OP_ABS"
:
""
);
ocl
::
convertTypeStr
(
depth
,
ddepth
,
kercn
,
cvt
),
absValues
?
" -D OP_ABS"
:
""
,
haveSrc2
?
" -D HAVE_SRC2"
:
""
,
calc2
?
" -D OP_CALC2"
:
""
,
haveSrc2
&&
_src2
.
isContinuous
()
?
" -D HAVE_SRC2_CONT"
:
""
);
ocl
::
Kernel
k
(
"minmaxloc"
,
ocl
::
core
::
minmaxloc_oclsrc
,
opts
);
if
(
k
.
empty
())
...
...
@@ -1452,18 +1480,35 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
int
esz
=
CV_ELEM_SIZE
(
ddepth
),
esz32s
=
CV_ELEM_SIZE1
(
CV_32S
),
dbsize
=
groupnum
*
((
needMinVal
?
esz
:
0
)
+
(
needMaxVal
?
esz
:
0
)
+
(
needMinLoc
?
esz32s
:
0
)
+
(
needMaxLoc
?
esz32s
:
0
));
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
CV_8UC1
),
mask
=
_mask
.
getUMat
();
(
needMinLoc
?
esz32s
:
0
)
+
(
needMaxLoc
?
esz32s
:
0
)
+
(
calc2
?
esz
:
0
));
UMat
src
=
_src
.
getUMat
(),
src2
=
_src2
.
getUMat
(),
db
(
1
,
dbsize
,
CV_8UC1
),
mask
=
_mask
.
getUMat
();
if
(
cn
>
1
)
{
src
=
src
.
reshape
(
1
);
src2
=
src2
.
reshape
(
1
);
}
if
(
haveSrc2
)
{
if
(
!
haveMask
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
));
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
));
}
else
{
if
(
!
haveMask
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
));
}
size_t
globalsize
=
groupnum
*
wgs
;
if
(
!
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
))
...
...
@@ -2498,38 +2543,45 @@ namespace cv {
static
bool
ocl_norm
(
InputArray
_src1
,
InputArray
_src2
,
int
normType
,
InputArray
_mask
,
double
&
result
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
()
;
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
)
,
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
bool
relative
=
(
normType
&
NORM_RELATIVE
)
!=
0
;
Scalar
sc1
,
sc2
;
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
relative
=
(
normType
&
NORM_RELATIVE
)
!=
0
,
normsum
=
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
;
normType
&=
~
NORM_RELATIVE
;
if
(
!
(
normType
==
NORM_INF
||
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
)
||
(
!
doubleSupport
&&
depth
==
CV_64F
))
if
(
!
(
normType
==
NORM_INF
||
normsum
)
)
return
false
;
int
wdepth
=
std
::
max
(
CV_32S
,
depth
);
char
cvt
[
50
];
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1"
" -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
depth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
if
(
normsum
)
{
if
(
!
ocl_sum
(
_src1
,
sc1
,
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
?
OCL_OP_SUM_SQR
:
OCL_OP_SUM
,
_mask
,
_src2
,
relative
,
sc2
))
return
false
;
}
else
{
if
(
!
ocl_minMaxIdx
(
_src1
,
NULL
,
&
result
,
NULL
,
NULL
,
_mask
,
std
::
max
(
CV_32S
,
depth
),
false
,
_src2
,
relative
))
return
false
;
}
UMat
src1
=
_src1
.
getUMat
(),
src2
=
_src2
.
getUMat
(),
diff
(
src1
.
size
(),
CV_MAKE_TYPE
(
wdepth
,
cn
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
ocl
::
KernelArg
::
WriteOnly
(
diff
,
cn
));
double
s2
=
0
;
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
{
result
+=
sc1
[
i
];
if
(
relative
)
s2
+=
sc2
[
i
];
}
size_t
globalsize
[
2
]
=
{
diff
.
cols
*
cn
,
(
diff
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
if
(
!
k
.
run
(
2
,
globalsize
,
NULL
,
false
))
return
false
;
if
(
normType
==
NORM_L2
)
{
result
=
std
::
sqrt
(
result
);
if
(
relative
)
s2
=
std
::
sqrt
(
s2
);
}
result
=
cv
::
norm
(
diff
,
normType
,
_mask
);
if
(
relative
)
result
/=
cv
::
norm
(
src2
,
normType
,
_mask
)
+
DBL_EPSILON
;
result
/=
(
s2
+
DBL_EPSILON
)
;
return
true
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment