Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
88ceee05
Commit
88ceee05
authored
Jun 10, 2014
by
Alexander Alekhin
Committed by
OpenCV Buildbot
Jun 10, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2837 from ilya-lavrenov:tapi_norm_relative
parents
96ce9810
634da9f3
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
831 additions
and
275 deletions
+831
-275
minmaxloc.cl
modules/core/src/opencl/minmaxloc.cl
+370
-0
reduce.cl
modules/core/src/opencl/reduce.cl
+225
-120
stat.cpp
modules/core/src/stat.cpp
+224
-155
test_arithm.cpp
modules/core/test/ocl/test_arithm.cpp
+12
-0
No files found.
modules/core/src/opencl/minmaxloc.cl
0 → 100644
View file @
88ceee05
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
ifdef
DEPTH_0
#
define
MIN_VAL
0
#
define
MAX_VAL
UCHAR_MAX
#
elif
defined
DEPTH_1
#
define
MIN_VAL
SCHAR_MIN
#
define
MAX_VAL
SCHAR_MAX
#
elif
defined
DEPTH_2
#
define
MIN_VAL
0
#
define
MAX_VAL
USHRT_MAX
#
elif
defined
DEPTH_3
#
define
MIN_VAL
SHRT_MIN
#
define
MAX_VAL
SHRT_MAX
#
elif
defined
DEPTH_4
#
define
MIN_VAL
INT_MIN
#
define
MAX_VAL
INT_MAX
#
elif
defined
DEPTH_5
#
define
MIN_VAL
(
-FLT_MAX
)
#
define
MAX_VAL
FLT_MAX
#
elif
defined
DEPTH_6
#
define
MIN_VAL
(
-DBL_MAX
)
#
define
MAX_VAL
DBL_MAX
#
endif
#
define
noconvert
#
define
INDEX_MAX
UINT_MAX
#
if
kercn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
srcTSIZE
(
int
)
sizeof
(
srcT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
)
*
3
)
#
endif
#
ifndef
HAVE_MASK
#
undef
srcTSIZE
#
define
srcTSIZE
(
int
)
sizeof
(
srcT1
)
#
endif
#
ifdef
NEED_MINLOC
#
define
CALC_MINLOC
(
inc
)
minloc
=
id
+
inc
#
else
#
define
CALC_MINLOC
(
inc
)
#
endif
#
ifdef
NEED_MAXLOC
#
define
CALC_MAXLOC
(
inc
)
maxloc
=
id
+
inc
#
else
#
define
CALC_MAXLOC
(
inc
)
#
endif
#
ifdef
NEED_MINVAL
#
define
CALC_MIN
(
p,
inc
)
\
if
(
minval
>
temp.p
)
\
{
\
minval
=
temp.p
; \
CALC_MINLOC
(
inc
)
; \
}
#
else
#
define
CALC_MIN
(
p,
inc
)
#
endif
#
ifdef
NEED_MAXVAL
#
define
CALC_MAX
(
p,
inc
)
\
if
(
maxval
<
temp.p
)
\
{
\
maxval
=
temp.p
; \
CALC_MAXLOC
(
inc
)
; \
}
#
else
#
define
CALC_MAX
(
p,
inc
)
#
endif
#
ifdef
OP_CALC2
#
define
CALC_MAX2
(
p
)
\
if
(
maxval2
<
temp.p
)
\
maxval2
=
temp.p
;
#
else
#
define
CALC_MAX2
(
p
)
#
endif
#
define
CALC_P
(
p,
inc
)
\
CALC_MIN
(
p,
inc
)
\
CALC_MAX
(
p,
inc
)
\
CALC_MAX2
(
p
)
__kernel
void
minmaxloc
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
cols,
int
total,
int
groupnum,
__global
uchar
*
dstptr
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
#
ifdef
HAVE_SRC2
,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
endif
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
#
ifndef
HAVE_MASK
*
kercn
;
#
else
;
#
endif
srcptr
+=
src_offset
;
#
ifdef
HAVE_MASK
mask
+=
mask_offset
;
#
endif
#
ifdef
HAVE_SRC2
src2ptr
+=
src2_offset
;
#
endif
#
ifdef
NEED_MINVAL
__local
dstT1
localmem_min[WGS2_ALIGNED]
;
dstT1
minval
=
MAX_VAL
;
#
ifdef
NEED_MINLOC
__local
uint
localmem_minloc[WGS2_ALIGNED]
;
uint
minloc
=
INDEX_MAX
;
#
endif
#
endif
#
ifdef
NEED_MAXVAL
dstT1
maxval
=
MIN_VAL
;
__local
dstT1
localmem_max[WGS2_ALIGNED]
;
#
ifdef
NEED_MAXLOC
__local
uint
localmem_maxloc[WGS2_ALIGNED]
;
uint
maxloc
=
INDEX_MAX
;
#
endif
#
endif
#
ifdef
OP_CALC2
__local
dstT1
localmem_max2[WGS2_ALIGNED]
;
dstT1
maxval2
=
MIN_VAL
;
#
endif
int
src_index
;
#
ifdef
HAVE_MASK
int
mask_index
;
#
endif
#
ifdef
HAVE_SRC2
int
src2_index
;
#
endif
dstT
temp
;
#
ifdef
HAVE_SRC2
dstT
temp2
;
#
endif
for
(
int
grain
=
groupnum
*
WGS
#
ifndef
HAVE_MASK
*
kercn
#
endif
; id < total; id += grain)
{
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK_CONT
mask_index
=
id
;
#
else
mask_index
=
mad24
(
id
/
cols,
mask_step,
id
%
cols
)
;
#
endif
if
(
mask[mask_index]
)
#
endif
{
#
ifdef
HAVE_SRC_CONT
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
;
#
ifdef
OP_ABS
temp
=
temp
>=
(
dstT
)(
0
)
?
temp
:
-temp
;
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
HAVE_SRC2_CONT
src2_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
;
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
;
#
ifdef
OP_CALC2
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
;
#
endif
#
endif
#
if
kercn
==
1
#
ifdef
NEED_MINVAL
if
(
minval
>
temp
)
{
minval
=
temp
;
#
ifdef
NEED_MINLOC
minloc
=
id
;
#
endif
}
#
endif
#
ifdef
NEED_MAXVAL
if
(
maxval
<
temp
)
{
maxval
=
temp
;
#
ifdef
NEED_MAXLOC
maxloc
=
id
;
#
endif
}
#
ifdef
OP_CALC2
if
(
maxval2
<
temp2
)
maxval2
=
temp2
;
#
endif
#
endif
#
elif
kercn
>=
2
CALC_P
(
s0,
0
)
CALC_P
(
s1,
1
)
#
if
kercn
>=
3
CALC_P
(
s2,
2
)
#
if
kercn
>=
4
CALC_P
(
s3,
3
)
#
if
kercn
>=
8
CALC_P
(
s4,
4
)
CALC_P
(
s5,
5
)
CALC_P
(
s6,
6
)
CALC_P
(
s7,
7
)
#
if
kercn
==
16
CALC_P
(
s8,
8
)
CALC_P
(
s9,
9
)
CALC_P
(
sA,
10
)
CALC_P
(
sB,
11
)
CALC_P
(
sC,
12
)
CALC_P
(
sD,
13
)
CALC_P
(
sE,
14
)
CALC_P
(
sF,
15
)
#
endif
#
endif
#
endif
#
endif
#
endif
}
}
if
(
lid
<
WGS2_ALIGNED
)
{
#
ifdef
NEED_MINVAL
localmem_min[lid]
=
minval
;
#
endif
#
ifdef
NEED_MAXVAL
localmem_max[lid]
=
maxval
;
#
endif
#
ifdef
NEED_MINLOC
localmem_minloc[lid]
=
minloc
;
#
endif
#
ifdef
NEED_MAXLOC
localmem_maxloc[lid]
=
maxloc
;
#
endif
#
ifdef
OP_CALC2
localmem_max2[lid]
=
maxval2
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>=
WGS2_ALIGNED
&&
total
>=
WGS2_ALIGNED
)
{
int
lid3
=
lid
-
WGS2_ALIGNED
;
#
ifdef
NEED_MINVAL
if
(
localmem_min[lid3]
>=
minval
)
{
#
ifdef
NEED_MINLOC
if
(
localmem_min[lid3]
==
minval
)
localmem_minloc[lid3]
=
min
(
localmem_minloc[lid3],
minloc
)
;
else
localmem_minloc[lid3]
=
minloc,
#
endif
localmem_min[lid3]
=
minval
;
}
#
endif
#
ifdef
NEED_MAXVAL
if
(
localmem_max[lid3]
<=
maxval
)
{
#
ifdef
NEED_MAXLOC
if
(
localmem_max[lid3]
==
maxval
)
localmem_maxloc[lid3]
=
min
(
localmem_maxloc[lid3],
maxloc
)
;
else
localmem_maxloc[lid3]
=
maxloc,
#
endif
localmem_max[lid3]
=
maxval
;
}
#
endif
#
ifdef
OP_CALC2
if
(
localmem_max2[lid3]
<
maxval2
)
localmem_max2[lid3]
=
maxval2
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
#
ifdef
NEED_MINVAL
if
(
localmem_min[lid]
>=
localmem_min[lid2]
)
{
#
ifdef
NEED_MINLOC
if
(
localmem_min[lid]
==
localmem_min[lid2]
)
localmem_minloc[lid]
=
min
(
localmem_minloc[lid2],
localmem_minloc[lid]
)
;
else
localmem_minloc[lid]
=
localmem_minloc[lid2],
#
endif
localmem_min[lid]
=
localmem_min[lid2]
;
}
#
endif
#
ifdef
NEED_MAXVAL
if
(
localmem_max[lid]
<=
localmem_max[lid2]
)
{
#
ifdef
NEED_MAXLOC
if
(
localmem_max[lid]
==
localmem_max[lid2]
)
localmem_maxloc[lid]
=
min
(
localmem_maxloc[lid2],
localmem_maxloc[lid]
)
;
else
localmem_maxloc[lid]
=
localmem_maxloc[lid2],
#
endif
localmem_max[lid]
=
localmem_max[lid2]
;
}
#
endif
#
ifdef
OP_CALC2
if
(
localmem_max2[lid]
<
localmem_max2[lid2]
)
localmem_max2[lid]
=
localmem_max2[lid2]
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
int
pos
=
0
;
#
ifdef
NEED_MINVAL
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_min[0]
;
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
dstT1
)
,
pos
)
;
#
endif
#
ifdef
NEED_MAXVAL
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_max[0]
;
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
dstT1
)
,
pos
)
;
#
endif
#
ifdef
NEED_MINLOC
*
(
__global
uint
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
uint
)
,
pos
))
=
localmem_minloc[0]
;
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
uint
)
,
pos
)
;
#
endif
#
ifdef
NEED_MAXLOC
*
(
__global
uint
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
uint
)
,
pos
))
=
localmem_maxloc[0]
;
#
ifdef
OP_CALC2
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
uint
)
,
pos
)
;
#
endif
#
endif
#
ifdef
OP_CALC2
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_max2[0]
;
#
endif
}
}
modules/core/src/opencl/reduce.cl
View file @
88ceee05
...
...
@@ -50,7 +50,7 @@
#
endif
#
endif
#
if
defined
OP_NORM_INF_MASK
|
| defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK
#
if
defined
OP_NORM_INF_MASK
#
ifdef
DEPTH_0
#
define
MIN_VAL
0
...
...
@@ -109,13 +109,22 @@
#
endif
#
ifdef
HAVE_MASK
#
ifdef
HAVE_SRC2
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
else
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
#
else
#
ifdef
HAVE_SRC2
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
else
#
define
EXTRA_PARAMS
#
endif
#
endif
//
accumulative
reduction
stuff
#
if
defined
OP_SUM
|
| defined OP_SUM_ABS || defined OP_SUM_SQR |
|
defined
OP_DOT
#
ifdef
OP_DOT
#
if
ddepth
<=
4
#
define
FUNC
(
a,
b,
c
)
a
=
mad24
(
b,
c,
a
)
...
...
@@ -137,18 +146,46 @@
#
endif
#
endif
#
ifdef
OP_CALC2
#
define
DECLARE_LOCAL_MEM
\
__local
dstT
localmem[WGS2_ALIGNED],
localmem2[WGS2_ALIGNED]
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
,
accumulator2
=
(
dstT
)(
0
)
#
else
#
define
DECLARE_LOCAL_MEM
\
__local
dstT
localmem[WGS2_ALIGNED]
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
OP_CALC2
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstT
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator2,
temp2
)
; \
FUNC
(
accumulator,
temp
)
#
else
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstT
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp
)
#
endif
#
else
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
FUNC
(
accumulator,
temp
)
#
endif
#
ifdef
HAVE_MASK
#
define
REDUCE_GLOBAL
\
MASK_INDEX
; \
if
(
mask[mask_index]
)
\
{
\
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
FUNC(accumulator, temp); \
PROCESS_ELEMS
; \
}
#
elif
defined
OP_DOT
...
...
@@ -211,7 +248,163 @@
FUNC
(
accumulator,
temp.sF,
temp2.sF
)
#
endif
#else
#
else
//
sum
or
norm
with
2
args
#
ifdef
HAVE_SRC2
#
ifdef
OP_CALC2
//
norm
relative
#
if
kercn
==
1
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp
)
; \
FUNC
(
accumulator2,
temp2
)
#
elif
kercn
==
2
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
#
elif
kercn
==
4
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
; \
FUNC
(
accumulator2,
temp2.s2
)
; \
FUNC
(
accumulator2,
temp2.s3
)
#
elif
kercn
==
8
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
; \
FUNC
(
accumulator2,
temp2.s2
)
; \
FUNC
(
accumulator2,
temp2.s3
)
; \
FUNC
(
accumulator2,
temp2.s4
)
; \
FUNC
(
accumulator2,
temp2.s5
)
; \
FUNC
(
accumulator2,
temp2.s6
)
; \
FUNC
(
accumulator2,
temp2.s7
)
#
elif
kercn
==
16
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
; \
FUNC
(
accumulator,
temp.s8
)
; \
FUNC
(
accumulator,
temp.s9
)
; \
FUNC
(
accumulator,
temp.sA
)
; \
FUNC
(
accumulator,
temp.sB
)
; \
FUNC
(
accumulator,
temp.sC
)
; \
FUNC
(
accumulator,
temp.sD
)
; \
FUNC
(
accumulator,
temp.sE
)
; \
FUNC
(
accumulator,
temp.sF
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
FUNC
(
accumulator2,
temp2.s1
)
; \
FUNC
(
accumulator2,
temp2.s2
)
; \
FUNC
(
accumulator2,
temp2.s3
)
; \
FUNC
(
accumulator2,
temp2.s4
)
; \
FUNC
(
accumulator2,
temp2.s5
)
; \
FUNC
(
accumulator2,
temp2.s6
)
; \
FUNC
(
accumulator2,
temp2.s7
)
; \
FUNC
(
accumulator2,
temp2.s8
)
; \
FUNC
(
accumulator2,
temp2.s9
)
; \
FUNC
(
accumulator2,
temp2.sA
)
; \
FUNC
(
accumulator2,
temp2.sB
)
; \
FUNC
(
accumulator2,
temp2.sC
)
; \
FUNC
(
accumulator2,
temp2.sD
)
; \
FUNC
(
accumulator2,
temp2.sE
)
; \
FUNC
(
accumulator2,
temp2.sF
)
#
endif
#
else
//
norm
with
2
args
#
if
kercn
==
1
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp
)
#
elif
kercn
==
2
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
#
elif
kercn
==
4
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
#
elif
kercn
==
8
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
#
elif
kercn
==
16
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
FUNC
(
accumulator,
temp.s3
)
; \
FUNC
(
accumulator,
temp.s4
)
; \
FUNC
(
accumulator,
temp.s5
)
; \
FUNC
(
accumulator,
temp.s6
)
; \
FUNC
(
accumulator,
temp.s7
)
; \
FUNC
(
accumulator,
temp.s8
)
; \
FUNC
(
accumulator,
temp.s9
)
; \
FUNC
(
accumulator,
temp.sA
)
; \
FUNC
(
accumulator,
temp.sB
)
; \
FUNC
(
accumulator,
temp.sC
)
; \
FUNC
(
accumulator,
temp.sD
)
; \
FUNC
(
accumulator,
temp.sE
)
; \
FUNC
(
accumulator,
temp.sF
)
#
endif
#
endif
#
else
//
sum
#
if
kercn
==
1
#
define
REDUCE_GLOBAL
\
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
...
...
@@ -260,7 +453,22 @@
FUNC
(
accumulator,
temp.sF
)
#
endif
#
endif
#
endif
#
ifdef
OP_CALC2
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
; \
localmem2[lid]
=
accumulator2
#
define
REDUCE_LOCAL_1
\
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
; \
localmem2[lid
-
WGS2_ALIGNED]
+=
accumulator2
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
; \
localmem2[lid]
+=
localmem2[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem2[0],
dstptr
+
mad24
(
groupnum,
dstTSIZE,
dstTSIZE
*
gid
))
#
else
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
#
define
REDUCE_LOCAL_1
\
...
...
@@ -269,6 +477,7 @@
localmem[lid]
+=
localmem[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
#
endif
//
countNonZero
stuff
#
elif
defined
OP_COUNT_NON_ZERO
...
...
@@ -334,125 +543,11 @@
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
// norm (NORM_INF) with cn > 1 and mask
#elif defined OP_NORM_INF_MASK
#define DECLARE_LOCAL_MEM \
__local srcT localmem_max[WGS2_ALIGNED]
#define DEFINE_ACCUMULATOR \
srcT maxval = MIN_VAL, temp
#define REDUCE_GLOBAL \
MASK_INDEX; \
if (mask[mask_index]) \
{ \
temp = loadpix(srcptr + src_index); \
maxval = max(maxval, (srcT)(temp >= (srcT)(0) ? temp : -temp)); \
}
#define SET_LOCAL_1 \
localmem_max[lid] = maxval
#define REDUCE_LOCAL_1 \
localmem_max[lid - WGS2_ALIGNED] = max(maxval, localmem_max[lid - WGS2_ALIGNED])
#define REDUCE_LOCAL_2 \
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2])
#define CALC_RESULT \
storepix(localmem_max[0], dstptr + dstTSIZE * gid)
// minMaxLoc stuff
#elif defined OP_MIN_MAX_LOC |
|
defined
OP_MIN_MAX_LOC_MASK
#
define
DECLARE_LOCAL_MEM
\
__local
srcT
localmem_min[WGS2_ALIGNED]
; \
__local
srcT
localmem_max[WGS2_ALIGNED]
; \
__local
int
localmem_minloc[WGS2_ALIGNED]
; \
__local
int
localmem_maxloc[WGS2_ALIGNED]
#
define
DEFINE_ACCUMULATOR
\
srcT
minval
=
MAX_VAL
; \
srcT
maxval
=
MIN_VAL
; \
int
negative
=
-1
; \
int
minloc
=
negative
; \
int
maxloc
=
negative
; \
srcT
temp
; \
int
temploc
#
define
REDUCE_GLOBAL
\
temp
=
loadpix
(
srcptr
+
src_index
)
; \
temploc
=
id
; \
srcT
temp_minval
=
minval,
temp_maxval
=
maxval
; \
minval
=
min
(
minval,
temp
)
; \
maxval
=
max
(
maxval,
temp
)
; \
minloc
=
(
minval
==
temp_minval
)
?
(
temp_minval
==
MAX_VAL
)
?
temploc
:
minloc
:
temploc
; \
maxloc
=
(
maxval
==
temp_maxval
)
?
(
temp_maxval
==
MIN_VAL
)
?
temploc
:
maxloc
:
temploc
#
define
SET_LOCAL_1
\
localmem_min[lid]
=
minval
; \
localmem_max[lid]
=
maxval
; \
localmem_minloc[lid]
=
minloc
; \
localmem_maxloc[lid]
=
maxloc
#
define
REDUCE_LOCAL_1
\
srcT
oldmin
=
localmem_min[lid-WGS2_ALIGNED]
; \
srcT
oldmax
=
localmem_max[lid-WGS2_ALIGNED]
; \
localmem_min[lid
-
WGS2_ALIGNED]
=
min
(
minval,
localmem_min[lid-WGS2_ALIGNED]
)
; \
localmem_max[lid
-
WGS2_ALIGNED]
=
max
(
maxval,
localmem_max[lid-WGS2_ALIGNED]
)
; \
srcT
minv
=
localmem_min[lid
-
WGS2_ALIGNED],
maxv
=
localmem_max[lid
-
WGS2_ALIGNED]
; \
localmem_minloc[lid
-
WGS2_ALIGNED]
=
(
minv
==
minval
)
?
(
minv
==
oldmin
)
?
\
min
(
minloc,
localmem_minloc[lid-WGS2_ALIGNED]
)
:
minloc
:
localmem_minloc[lid-WGS2_ALIGNED]
; \
localmem_maxloc[lid
-
WGS2_ALIGNED]
=
(
maxv
==
maxval
)
?
(
maxv
==
oldmax
)
?
\
min
(
maxloc,
localmem_maxloc[lid-WGS2_ALIGNED]
)
:
maxloc
:
localmem_maxloc[lid-WGS2_ALIGNED]
#
define
REDUCE_LOCAL_2
\
srcT
oldmin
=
localmem_min[lid]
; \
srcT
oldmax
=
localmem_max[lid]
; \
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
; \
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
; \
srcT
min1
=
localmem_min[lid],
min2
=
localmem_min[lid2]
; \
localmem_minloc[lid]
=
(
localmem_minloc[lid]
==
negative
)
?
localmem_minloc[lid2]
:
(
localmem_minloc[lid2]
==
negative
)
?
\
localmem_minloc[lid]
:
(
min1
==
min2
)
?
(
min1
==
oldmin
)
?
min
(
localmem_minloc[lid2],localmem_minloc[lid]
)
:
\
localmem_minloc[lid2]
:
localmem_minloc[lid]
; \
srcT
max1
=
localmem_max[lid],
max2
=
localmem_max[lid2]
; \
localmem_maxloc[lid]
=
(
localmem_maxloc[lid]
==
negative
)
?
localmem_maxloc[lid2]
:
(
localmem_maxloc[lid2]
==
negative
)
?
\
localmem_maxloc[lid]
:
(
max1
==
max2
)
?
(
max1
==
oldmax
)
?
min
(
localmem_maxloc[lid2],localmem_maxloc[lid]
)
:
\
localmem_maxloc[lid2]
:
localmem_maxloc[lid]
#
define
CALC_RESULT
\
storepix
(
localmem_min[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem_max[0],
dstptr2
+
dstTSIZE
*
gid
)
; \
dstlocptr[gid]
=
localmem_minloc[0]
; \
dstlocptr2[gid]
=
localmem_maxloc[0]
#
if
defined
OP_MIN_MAX_LOC_MASK
#
undef
DEFINE_ACCUMULATOR
#
define
DEFINE_ACCUMULATOR
\
srcT
minval
=
MAX_VAL
; \
srcT
maxval
=
MIN_VAL
; \
int
negative
=
-1
; \
int
minloc
=
negative
; \
int
maxloc
=
negative
; \
srcT
temp,
temp_mask,
zeroVal
=
(
srcT
)(
0
)
; \
int
temploc
#
undef
REDUCE_GLOBAL
#
define
REDUCE_GLOBAL
\
temp
=
loadpix
(
srcptr
+
src_index
)
; \
temploc
=
id
; \
MASK_INDEX
; \
__global
const
uchar
*
mask
=
(
__global
const
uchar
*
)(
maskptr
+
mask_index
)
; \
temp_mask
=
mask[0]
; \
srcT
temp_minval
=
minval,
temp_maxval
=
maxval
; \
minval
=
(
temp_mask
==
zeroVal
)
?
minval
:
min
(
minval,
temp
)
; \
maxval
=
(
temp_mask
==
zeroVal
)
?
maxval
:
max
(
maxval,
temp
)
; \
minloc
=
(
temp_mask
==
zeroVal
)
?
minloc
:
(
minval
==
temp_minval
)
?
(
temp_minval
==
MAX_VAL
)
?
temploc
:
minloc
:
temploc
; \
maxloc
=
(
temp_mask
==
zeroVal
)
?
maxloc
:
(
maxval
==
temp_maxval
)
?
(
temp_maxval
==
MIN_VAL
)
?
temploc
:
maxloc
:
temploc
#
endif
#
else
#
error
"No operation"
#
endif
//
end
of
minMaxLoc
stuff
#
ifdef
OP_MIN_MAX_LOC
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
dstptr2,
__global
int
*
dstlocptr,
__global
int
*
dstlocptr2
#
elif
defined
OP_MIN_MAX_LOC_MASK
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
dstptr2,
__global
int
*
dstlocptr,
__global
int
*
dstlocptr2,
\
__global
const
uchar
*
maskptr,
int
mask_step,
int
mask_offset
#
endif
#
elif
defined
OP_DOT
#
ifdef
OP_DOT
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
endif
...
...
@@ -465,6 +560,9 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
int
id
=
get_global_id
(
0
)
*
kercn
;
srcptr
+=
src_offset
;
#
ifdef
HAVE_SRC2
src2ptr
+=
src2_offset
;
#
endif
DECLARE_LOCAL_MEM
;
DEFINE_ACCUMULATOR
;
...
...
@@ -475,6 +573,13 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
int
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
int
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
HAVE_SRC2_CONT
int
src2_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
int
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
#
endif
REDUCE_GLOBAL
;
}
...
...
modules/core/src/stat.cpp
View file @
88ceee05
...
...
@@ -469,21 +469,25 @@ template <typename T> Scalar ocl_part_sum(Mat m)
enum
{
OCL_OP_SUM
=
0
,
OCL_OP_SUM_ABS
=
1
,
OCL_OP_SUM_SQR
=
2
};
static
bool
ocl_sum
(
InputArray
_src
,
Scalar
&
res
,
int
sum_op
,
InputArray
_mask
=
noArray
()
)
static
bool
ocl_sum
(
InputArray
_src
,
Scalar
&
res
,
int
sum_op
,
InputArray
_mask
=
noArray
(),
InputArray
_src2
=
noArray
(),
bool
calc2
=
false
,
const
Scalar
&
res2
=
Scalar
()
)
{
CV_Assert
(
sum_op
==
OCL_OP_SUM
||
sum_op
==
OCL_OP_SUM_ABS
||
sum_op
==
OCL_OP_SUM_SQR
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
;
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
,
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
,
haveSrc2
=
_src2
.
kind
()
!=
_InputArray
::
NONE
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
kercn
=
cn
==
1
&&
!
haveMask
?
ocl
::
predictOptimalVectorWidth
(
_src
)
:
1
,
mcn
=
std
::
max
(
cn
,
kercn
);
CV_Assert
(
!
haveSrc2
||
_src2
.
type
()
==
type
);
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
(
);
size_t
wgs
=
ocl
::
Device
::
getDefault
()
.
maxWorkGroupSize
();
int
ngroups
=
dev
.
maxComputeUnits
(),
dbsize
=
ngroups
*
(
calc2
?
2
:
1
);
size_t
wgs
=
dev
.
maxWorkGroupSize
();
int
ddepth
=
std
::
max
(
sum_op
==
OCL_OP_SUM_SQR
?
CV_32F
:
CV_32S
,
depth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
...
...
@@ -497,7 +501,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
String
opts
=
format
(
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d"
,
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d
%s%s%s
"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
mcn
)),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
mcn
)),
ocl
::
typeToStr
(
ddepth
),
ddepth
,
cn
,
...
...
@@ -506,30 +510,49 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveMask
?
" -D HAVE_MASK"
:
""
,
_src
.
isContinuous
()
?
" -D HAVE_SRC_CONT"
:
""
,
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
,
kercn
);
haveMask
&&
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
,
kercn
,
haveSrc2
?
" -D HAVE_SRC2"
:
""
,
calc2
?
" -D OP_CALC2"
:
""
,
haveSrc2
&&
_src2
.
isContinuous
()
?
" -D HAVE_SRC2_CONT"
:
""
);
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
dtype
),
mask
=
_mask
.
getUMat
();
UMat
src
=
_src
.
getUMat
(),
src2
=
_src2
.
getUMat
(),
db
(
1
,
dbsize
,
dtype
),
mask
=
_mask
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dbarg
=
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
),
src2arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
);
if
(
haveMask
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
dbarg
,
maskarg
);
{
if
(
haveSrc2
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
,
maskarg
,
src2arg
);
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
,
maskarg
);
}
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
dbarg
);
{
if
(
haveSrc2
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
,
src2arg
);
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
ngroups
,
dbarg
);
}
size_t
globalsize
=
dbsize
*
wgs
;
size_t
globalsize
=
ngroups
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
))
{
typedef
Scalar
(
*
part_sum
)(
Mat
m
);
part_sum
funcs
[
3
]
=
{
ocl_part_sum
<
int
>
,
ocl_part_sum
<
float
>
,
ocl_part_sum
<
double
>
},
func
=
funcs
[
ddepth
-
CV_32S
];
res
=
func
(
db
.
getMat
(
ACCESS_READ
));
Mat
mres
=
db
.
getMat
(
ACCESS_READ
);
if
(
calc2
)
const_cast
<
Scalar
&>
(
res2
)
=
func
(
mres
.
colRange
(
ngroups
,
dbsize
));
res
=
func
(
mres
.
colRange
(
0
,
ngroups
));
return
true
;
}
return
false
;
...
...
@@ -1311,104 +1334,197 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx)
#ifdef HAVE_OPENCL
template
<
typename
T
>
void
getMinMaxRes
(
const
Mat
&
minv
,
const
Mat
&
maxv
,
const
Mat
&
minl
,
const
Mat
&
maxl
,
double
*
minVal
,
double
*
maxVal
,
int
*
minLoc
,
int
*
maxLoc
,
const
int
groupnum
,
const
int
cn
,
const
int
cols
)
void
getMinMaxRes
(
const
Mat
&
db
,
double
*
minVal
,
double
*
maxVal
,
int
*
minLoc
,
int
*
maxLoc
,
int
groupnum
,
int
cols
,
double
*
maxVal2
)
{
T
min
=
std
::
numeric_limits
<
T
>::
max
();
T
max
=
std
::
numeric_limits
<
T
>::
min
()
>
0
?
-
std
::
numeric_limits
<
T
>::
max
()
:
std
::
numeric_limits
<
T
>::
min
();
int
minloc
=
INT_MAX
,
maxloc
=
INT_MAX
;
uint
index_max
=
std
::
numeric_limits
<
uint
>::
max
();
T
minval
=
std
::
numeric_limits
<
T
>::
max
();
T
maxval
=
std
::
numeric_limits
<
T
>::
min
()
>
0
?
-
std
::
numeric_limits
<
T
>::
max
()
:
std
::
numeric_limits
<
T
>::
min
(),
maxval2
=
maxval
;
uint
minloc
=
index_max
,
maxloc
=
index_max
;
int
index
=
0
;
const
T
*
minptr
=
NULL
,
*
maxptr
=
NULL
,
*
maxptr2
=
NULL
;
const
uint
*
minlocptr
=
NULL
,
*
maxlocptr
=
NULL
;
if
(
minVal
||
minLoc
)
{
minptr
=
(
const
T
*
)
db
.
data
;
index
+=
sizeof
(
T
)
*
groupnum
;
}
if
(
maxVal
||
maxLoc
)
{
maxptr
=
(
const
T
*
)(
db
.
data
+
index
);
index
+=
sizeof
(
T
)
*
groupnum
;
}
if
(
minLoc
)
{
minlocptr
=
(
uint
*
)(
db
.
data
+
index
);
index
+=
sizeof
(
uint
)
*
groupnum
;
}
if
(
maxLoc
)
{
maxlocptr
=
(
uint
*
)(
db
.
data
+
index
);
index
+=
sizeof
(
uint
)
*
groupnum
;
}
if
(
maxVal2
)
maxptr2
=
(
const
T
*
)(
db
.
data
+
index
);
for
(
int
i
=
0
;
i
<
groupnum
;
i
++
)
{
T
current_min
=
minv
.
at
<
T
>
(
0
,
i
);
T
current_max
=
maxv
.
at
<
T
>
(
0
,
i
);
T
oldmin
=
min
,
oldmax
=
max
;
min
=
std
::
min
(
min
,
current_min
);
max
=
std
::
max
(
max
,
current_max
);
if
(
cn
==
1
)
if
(
minptr
&&
minptr
[
i
]
<=
minval
)
{
if
(
minptr
[
i
]
==
minval
)
{
if
(
minlocptr
)
minloc
=
std
::
min
(
minlocptr
[
i
],
minloc
);
}
else
{
int
current_minloc
=
minl
.
at
<
int
>
(
0
,
i
);
int
current_maxloc
=
maxl
.
at
<
int
>
(
0
,
i
);
if
(
current_minloc
<
0
||
current_maxloc
<
0
)
continue
;
minloc
=
(
oldmin
==
current_min
)
?
std
::
min
(
minloc
,
current_minloc
)
:
(
oldmin
<
current_min
)
?
minloc
:
current_minloc
;
maxloc
=
(
oldmax
==
current_max
)
?
std
::
min
(
maxloc
,
current_maxloc
)
:
(
oldmax
>
current_max
)
?
maxloc
:
current_maxloc
;
if
(
minlocptr
)
minloc
=
minlocptr
[
i
];
minval
=
minptr
[
i
];
}
}
bool
zero_mask
=
(
maxloc
==
INT_MAX
)
||
(
minloc
==
INT_MAX
);
if
(
maxptr
&&
maxptr
[
i
]
>=
maxval
)
{
if
(
maxptr
[
i
]
==
maxval
)
{
if
(
maxlocptr
)
maxloc
=
std
::
min
(
maxlocptr
[
i
],
maxloc
);
}
else
{
if
(
maxlocptr
)
maxloc
=
maxlocptr
[
i
];
maxval
=
maxptr
[
i
];
}
}
if
(
maxptr2
&&
maxptr2
[
i
]
>
maxval2
)
maxval2
=
maxptr2
[
i
];
}
bool
zero_mask
=
(
minLoc
&&
minloc
==
index_max
)
||
(
maxLoc
&&
maxloc
==
index_max
);
if
(
minVal
)
*
minVal
=
zero_mask
?
0
:
(
double
)
min
;
*
minVal
=
zero_mask
?
0
:
(
double
)
min
val
;
if
(
maxVal
)
*
maxVal
=
zero_mask
?
0
:
(
double
)
max
;
*
maxVal
=
zero_mask
?
0
:
(
double
)
maxval
;
if
(
maxVal2
)
*
maxVal2
=
zero_mask
?
0
:
(
double
)
maxval2
;
if
(
minLoc
)
{
minLoc
[
0
]
=
zero_mask
?
-
1
:
minloc
/
cols
;
minLoc
[
1
]
=
zero_mask
?
-
1
:
minloc
%
cols
;
minLoc
[
0
]
=
zero_mask
?
-
1
:
minloc
/
cols
;
minLoc
[
1
]
=
zero_mask
?
-
1
:
minloc
%
cols
;
}
if
(
maxLoc
)
{
maxLoc
[
0
]
=
zero_mask
?
-
1
:
maxloc
/
cols
;
maxLoc
[
1
]
=
zero_mask
?
-
1
:
maxloc
%
cols
;
maxLoc
[
0
]
=
zero_mask
?
-
1
:
maxloc
/
cols
;
maxLoc
[
1
]
=
zero_mask
?
-
1
:
maxloc
%
cols
;
}
}
typedef
void
(
*
getMinMaxResFunc
)(
const
Mat
&
minv
,
const
Mat
&
maxv
,
const
Mat
&
minl
,
const
Mat
&
maxl
,
double
*
min
Val
,
double
*
maxVal
,
int
*
minLoc
,
int
*
maxLoc
,
const
int
gropunum
,
const
int
cn
,
const
int
cols
);
typedef
void
(
*
getMinMaxResFunc
)(
const
Mat
&
db
,
double
*
minVal
,
double
*
max
Val
,
int
*
minLoc
,
int
*
maxLoc
,
int
gropunum
,
int
cols
,
double
*
maxVal2
);
static
bool
ocl_minMaxIdx
(
InputArray
_src
,
double
*
minVal
,
double
*
maxVal
,
int
*
minLoc
,
int
*
maxLoc
,
InputArray
_mask
)
static
bool
ocl_minMaxIdx
(
InputArray
_src
,
double
*
minVal
,
double
*
maxVal
,
int
*
minLoc
,
int
*
maxLoc
,
InputArray
_mask
,
int
ddepth
=
-
1
,
bool
absValues
=
false
,
InputArray
_src2
=
noArray
(),
double
*
maxVal2
=
NULL
)
{
CV_Assert
(
(
_src
.
channels
()
==
1
&&
(
_mask
.
empty
()
||
_mask
.
type
()
==
CV_8U
))
||
(
_src
.
channels
()
>=
1
&&
_mask
.
empty
()
&&
!
minLoc
&&
!
maxLoc
)
);
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
,
haveMask
=
!
_mask
.
empty
(),
haveSrc2
=
_src2
.
kind
()
!=
_InputArray
::
NONE
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
kercn
=
haveMask
?
cn
:
std
::
min
(
4
,
ocl
::
predictOptimalVectorWidth
(
_src
));
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
kercn
=
1
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
CV_Assert
(
(
cn
==
1
&&
(
!
haveMask
||
_mask
.
type
()
==
CV_8U
))
||
(
cn
>=
1
&&
!
minLoc
&&
!
maxLoc
)
)
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
if
(
ddepth
<
0
)
ddepth
=
depth
;
CV_Assert
(
!
haveSrc2
||
_src2
.
type
()
==
type
);
if
((
depth
==
CV_64F
||
ddepth
==
CV_64F
)
&&
!
doubleSupport
)
return
false
;
int
groupnum
=
ocl
::
Device
::
getDefault
()
.
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
()
.
maxWorkGroupSize
();
int
groupnum
=
dev
.
maxComputeUnits
();
size_t
wgs
=
dev
.
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
String
opts
=
format
(
"-D DEPTH_%d -D srcT=%s -D OP_MIN_MAX_LOC%s -D WGS=%d"
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d"
,
depth
,
ocl
::
typeToStr
(
depth
),
_mask
.
empty
()
?
""
:
"_MASK"
,
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
_src
.
isContinuous
()
?
" -D HAVE_SRC_CONT"
:
""
,
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
,
kercn
);
bool
needMinVal
=
minVal
||
minLoc
,
needMinLoc
=
minLoc
!=
NULL
,
needMaxVal
=
maxVal
||
maxLoc
,
needMaxLoc
=
maxLoc
!=
NULL
;
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
opts
);
// in case of mask we must know whether mask is filled with zeros or not
// so let's calculate min or max location, if it's undefined, so mask is zeros
if
(
!
(
needMaxLoc
||
needMinLoc
)
&&
haveMask
)
{
if
(
needMinVal
)
needMinLoc
=
true
;
else
needMaxLoc
=
true
;
}
char
cvt
[
40
];
String
opts
=
format
(
"-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s"
,
depth
,
ocl
::
typeToStr
(
depth
),
haveMask
?
" -D HAVE_MASK"
:
""
,
(
int
)
wgs
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
_src
.
isContinuous
()
?
" -D HAVE_SRC_CONT"
:
""
,
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
,
kercn
,
needMinVal
?
" -D NEED_MINVAL"
:
""
,
needMaxVal
?
" -D NEED_MAXVAL"
:
""
,
needMinLoc
?
" -D NEED_MINLOC"
:
""
,
needMaxLoc
?
" -D NEED_MAXLOC"
:
""
,
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
kercn
)),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
kercn
,
cvt
),
absValues
?
" -D OP_ABS"
:
""
,
haveSrc2
?
" -D HAVE_SRC2"
:
""
,
maxVal2
?
" -D OP_CALC2"
:
""
,
haveSrc2
&&
_src2
.
isContinuous
()
?
" -D HAVE_SRC2_CONT"
:
""
);
ocl
::
Kernel
k
(
"minmaxloc"
,
ocl
::
core
::
minmaxloc_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
minval
(
1
,
groupnum
,
src
.
type
()
),
maxval
(
1
,
groupnum
,
src
.
type
()),
minloc
(
1
,
groupnum
,
CV_32SC1
),
maxloc
(
1
,
groupnum
,
CV_32SC1
),
mask
;
if
(
!
_mask
.
empty
())
mask
=
_mask
.
getUMat
();
int
esz
=
CV_ELEM_SIZE
(
ddepth
),
esz32s
=
CV_ELEM_SIZE1
(
CV_32S
),
dbsize
=
groupnum
*
((
needMinVal
?
esz
:
0
)
+
(
needMaxVal
?
esz
:
0
)
+
(
needMinLoc
?
esz32s
:
0
)
+
(
needMaxLoc
?
esz32s
:
0
)
+
(
maxVal2
?
esz
:
0
));
UMat
src
=
_src
.
getUMat
(),
src2
=
_src2
.
getUMat
(),
db
(
1
,
dbsize
,
CV_8UC1
),
mask
=
_mask
.
getUMat
();
if
(
src
.
channels
()
>
1
)
if
(
cn
>
1
&&
!
haveMask
)
{
src
=
src
.
reshape
(
1
);
src2
=
src2
.
reshape
(
1
);
}
if
(
mask
.
empty
())
if
(
haveSrc2
)
{
if
(
!
haveMask
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
));
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
));
}
else
{
if
(
!
haveMask
)
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
minval
),
ocl
::
KernelArg
::
PtrWriteOnly
(
maxval
),
ocl
::
KernelArg
::
PtrWriteOnly
(
minloc
),
ocl
::
KernelArg
::
PtrWriteOnly
(
maxloc
));
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
minval
),
ocl
::
KernelArg
::
PtrWriteOnly
(
maxval
),
ocl
::
KernelArg
::
PtrWriteOnly
(
minloc
),
ocl
::
KernelArg
::
PtrWriteOnly
(
maxloc
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
()
,
groupnum
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
));
}
size_t
globalsize
=
groupnum
*
wgs
;
if
(
!
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
))
return
false
;
Mat
minv
=
minval
.
getMat
(
ACCESS_READ
),
maxv
=
maxval
.
getMat
(
ACCESS_READ
),
minl
=
minloc
.
getMat
(
ACCESS_READ
),
maxl
=
maxloc
.
getMat
(
ACCESS_READ
);
static
getMinMaxResFunc
functab
[
7
]
=
static
const
getMinMaxResFunc
functab
[
7
]
=
{
getMinMaxRes
<
uchar
>
,
getMinMaxRes
<
char
>
,
...
...
@@ -1419,10 +1535,13 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
getMinMaxRes
<
double
>
};
getMinMaxResFunc
func
;
getMinMaxResFunc
func
=
functab
[
ddepth
]
;
func
=
functab
[
depth
];
func
(
minv
,
maxv
,
minl
,
maxl
,
minVal
,
maxVal
,
minLoc
,
maxLoc
,
groupnum
,
src
.
channels
(),
src
.
cols
);
int
locTemp
[
2
];
func
(
db
.
getMat
(
ACCESS_READ
),
minVal
,
maxVal
,
needMinLoc
?
minLoc
?
minLoc
:
locTemp
:
minLoc
,
needMaxLoc
?
maxLoc
?
maxLoc
:
locTemp
:
maxLoc
,
groupnum
,
src
.
cols
,
maxVal2
);
return
true
;
}
...
...
@@ -2060,66 +2179,9 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
if
(
normType
==
NORM_INF
)
{
if
(
cn
==
1
||
!
haveMask
)
{
UMat
abssrc
;
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
)
{
int
wdepth
=
std
::
max
(
CV_32S
,
depth
),
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
char
cvt
[
50
];
ocl
::
Kernel
kabs
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s"
" -D convertToDT=%s -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
depth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
kabs
.
empty
())
if
(
!
ocl_minMaxIdx
(
_src
,
NULL
,
&
result
,
NULL
,
NULL
,
_mask
,
std
::
max
(
depth
,
CV_32S
),
depth
!=
CV_8U
&&
depth
!=
CV_16U
))
return
false
;
abssrc
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
wdepth
,
cn
));
kabs
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
abssrc
,
cn
));
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
,
(
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
if
(
!
kabs
.
run
(
2
,
globalsize
,
NULL
,
false
))
return
false
;
}
else
abssrc
=
src
;
cv
::
minMaxIdx
(
haveMask
?
abssrc
:
abssrc
.
reshape
(
1
),
NULL
,
&
result
,
NULL
,
NULL
,
_mask
);
}
else
{
int
dbsize
=
d
.
maxComputeUnits
();
size_t
wgs
=
d
.
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D OP_NORM_INF_MASK -D HAVE_MASK -D DEPTH_%d"
" -D srcT=%s -D srcT1=%s -D WGS=%d -D cn=%d -D WGS2_ALIGNED=%d%s%s%s"
,
depth
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
wgs
,
cn
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
src
.
isContinuous
()
?
" -D HAVE_CONT_SRC"
:
""
,
_mask
.
isContinuous
()
?
" -D HAVE_MASK_CONT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
db
(
1
,
dbsize
,
type
),
mask
=
_mask
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
));
size_t
globalsize
=
dbsize
*
wgs
;
if
(
!
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
true
))
return
false
;
minMaxIdx
(
db
.
getMat
(
ACCESS_READ
),
NULL
,
&
result
,
NULL
,
NULL
,
noArray
());
}
}
else
if
(
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
)
{
...
...
@@ -2462,38 +2524,46 @@ namespace cv {
static
bool
ocl_norm
(
InputArray
_src1
,
InputArray
_src2
,
int
normType
,
InputArray
_mask
,
double
&
result
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
Scalar
sc1
,
sc2
;
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
relative
=
(
normType
&
NORM_RELATIVE
)
!=
0
;
normType
&=
~
NORM_RELATIVE
;
bool
normsum
=
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
;
if
(
!
(
normType
==
NORM_INF
||
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
)
||
(
!
doubleSupport
&&
depth
==
CV_64F
))
if
(
!
(
normType
==
NORM_INF
||
normsum
)
)
return
false
;
int
wdepth
=
std
::
max
(
CV_32S
,
depth
);
char
cvt
[
50
];
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1"
" -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
depth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
if
(
normsum
)
{
if
(
!
ocl_sum
(
_src1
,
sc1
,
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
?
OCL_OP_SUM_SQR
:
OCL_OP_SUM
,
_mask
,
_src2
,
relative
,
sc2
))
return
false
;
}
else
{
if
(
!
ocl_minMaxIdx
(
_src1
,
NULL
,
&
sc1
[
0
],
NULL
,
NULL
,
_mask
,
std
::
max
(
CV_32S
,
depth
),
false
,
_src2
,
relative
?
&
sc2
[
0
]
:
NULL
))
return
false
;
cn
=
1
;
}
UMat
src1
=
_src1
.
getUMat
(),
src2
=
_src2
.
getUMat
(),
diff
(
src1
.
size
(),
CV_MAKE_TYPE
(
wdepth
,
cn
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
ocl
::
KernelArg
::
WriteOnly
(
diff
,
cn
));
double
s2
=
0
;
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
{
result
+=
sc1
[
i
];
if
(
relative
)
s2
+=
sc2
[
i
];
}
size_t
globalsize
[
2
]
=
{
diff
.
cols
*
cn
,
(
diff
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
if
(
!
k
.
run
(
2
,
globalsize
,
NULL
,
false
))
return
false
;
if
(
normType
==
NORM_L2
)
{
result
=
std
::
sqrt
(
result
);
if
(
relative
)
s2
=
std
::
sqrt
(
s2
);
}
result
=
cv
::
norm
(
diff
,
normType
,
_mask
);
if
(
relative
)
result
/=
cv
::
norm
(
src2
,
normType
,
_mask
)
+
DBL_EPSILON
;
result
/=
(
s2
+
DBL_EPSILON
)
;
return
true
;
}
...
...
@@ -2508,8 +2578,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
#ifdef HAVE_OPENCL
double
_result
=
0
;
CV_OCL_RUN_
(
_src1
.
isUMat
()
&&
_src2
.
isUMat
()
&&
_src1
.
dims
()
<=
2
&&
_src2
.
dims
()
<=
2
,
CV_OCL_RUN_
(
_src1
.
isUMat
(),
ocl_norm
(
_src1
,
_src2
,
normType
,
_mask
,
_result
),
_result
)
#endif
...
...
modules/core/test/ocl/test_arithm.cpp
View file @
88ceee05
...
...
@@ -1293,6 +1293,8 @@ OCL_TEST_P(Norm, NORM_INF_2args)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_INF
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1311,6 +1313,8 @@ OCL_TEST_P(Norm, NORM_INF_2args_mask)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_INF
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1329,6 +1333,8 @@ OCL_TEST_P(Norm, NORM_L1_2args)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L1
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1347,6 +1353,8 @@ OCL_TEST_P(Norm, NORM_L1_2args_mask)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L1
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1365,6 +1373,8 @@ OCL_TEST_P(Norm, NORM_L2_2args)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L2
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1383,6 +1393,8 @@ OCL_TEST_P(Norm, NORM_L2_2args_mask)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L2
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment