Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
88ceee05
Commit
88ceee05
authored
Jun 10, 2014
by
Alexander Alekhin
Committed by
OpenCV Buildbot
Jun 10, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2837 from ilya-lavrenov:tapi_norm_relative
parents
96ce9810
634da9f3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
382 additions
and
0 deletions
+382
-0
minmaxloc.cl
modules/core/src/opencl/minmaxloc.cl
+370
-0
reduce.cl
modules/core/src/opencl/reduce.cl
+0
-0
stat.cpp
modules/core/src/stat.cpp
+0
-0
test_arithm.cpp
modules/core/test/ocl/test_arithm.cpp
+12
-0
No files found.
modules/core/src/opencl/minmaxloc.cl
0 → 100644
View file @
88ceee05
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
ifdef
DEPTH_0
#
define
MIN_VAL
0
#
define
MAX_VAL
UCHAR_MAX
#
elif
defined
DEPTH_1
#
define
MIN_VAL
SCHAR_MIN
#
define
MAX_VAL
SCHAR_MAX
#
elif
defined
DEPTH_2
#
define
MIN_VAL
0
#
define
MAX_VAL
USHRT_MAX
#
elif
defined
DEPTH_3
#
define
MIN_VAL
SHRT_MIN
#
define
MAX_VAL
SHRT_MAX
#
elif
defined
DEPTH_4
#
define
MIN_VAL
INT_MIN
#
define
MAX_VAL
INT_MAX
#
elif
defined
DEPTH_5
#
define
MIN_VAL
(
-FLT_MAX
)
#
define
MAX_VAL
FLT_MAX
#
elif
defined
DEPTH_6
#
define
MIN_VAL
(
-DBL_MAX
)
#
define
MAX_VAL
DBL_MAX
#
endif
#
define
noconvert
#
define
INDEX_MAX
UINT_MAX
#
if
kercn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
srcTSIZE
(
int
)
sizeof
(
srcT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
)
*
3
)
#
endif
#
ifndef
HAVE_MASK
#
undef
srcTSIZE
#
define
srcTSIZE
(
int
)
sizeof
(
srcT1
)
#
endif
#
ifdef
NEED_MINLOC
#
define
CALC_MINLOC
(
inc
)
minloc
=
id
+
inc
#
else
#
define
CALC_MINLOC
(
inc
)
#
endif
#
ifdef
NEED_MAXLOC
#
define
CALC_MAXLOC
(
inc
)
maxloc
=
id
+
inc
#
else
#
define
CALC_MAXLOC
(
inc
)
#
endif
#
ifdef
NEED_MINVAL
#
define
CALC_MIN
(
p,
inc
)
\
if
(
minval
>
temp.p
)
\
{
\
minval
=
temp.p
; \
CALC_MINLOC
(
inc
)
; \
}
#
else
#
define
CALC_MIN
(
p,
inc
)
#
endif
#
ifdef
NEED_MAXVAL
#
define
CALC_MAX
(
p,
inc
)
\
if
(
maxval
<
temp.p
)
\
{
\
maxval
=
temp.p
; \
CALC_MAXLOC
(
inc
)
; \
}
#
else
#
define
CALC_MAX
(
p,
inc
)
#
endif
#
ifdef
OP_CALC2
#
define
CALC_MAX2
(
p
)
\
if
(
maxval2
<
temp.p
)
\
maxval2
=
temp.p
;
#
else
#
define
CALC_MAX2
(
p
)
#
endif
#
define
CALC_P
(
p,
inc
)
\
CALC_MIN
(
p,
inc
)
\
CALC_MAX
(
p,
inc
)
\
CALC_MAX2
(
p
)
__kernel
void
minmaxloc
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
cols,
int
total,
int
groupnum,
__global
uchar
*
dstptr
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
#
ifdef
HAVE_SRC2
,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
#
endif
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
#
ifndef
HAVE_MASK
*
kercn
;
#
else
;
#
endif
srcptr
+=
src_offset
;
#
ifdef
HAVE_MASK
mask
+=
mask_offset
;
#
endif
#
ifdef
HAVE_SRC2
src2ptr
+=
src2_offset
;
#
endif
#
ifdef
NEED_MINVAL
__local
dstT1
localmem_min[WGS2_ALIGNED]
;
dstT1
minval
=
MAX_VAL
;
#
ifdef
NEED_MINLOC
__local
uint
localmem_minloc[WGS2_ALIGNED]
;
uint
minloc
=
INDEX_MAX
;
#
endif
#
endif
#
ifdef
NEED_MAXVAL
dstT1
maxval
=
MIN_VAL
;
__local
dstT1
localmem_max[WGS2_ALIGNED]
;
#
ifdef
NEED_MAXLOC
__local
uint
localmem_maxloc[WGS2_ALIGNED]
;
uint
maxloc
=
INDEX_MAX
;
#
endif
#
endif
#
ifdef
OP_CALC2
__local
dstT1
localmem_max2[WGS2_ALIGNED]
;
dstT1
maxval2
=
MIN_VAL
;
#
endif
int
src_index
;
#
ifdef
HAVE_MASK
int
mask_index
;
#
endif
#
ifdef
HAVE_SRC2
int
src2_index
;
#
endif
dstT
temp
;
#
ifdef
HAVE_SRC2
dstT
temp2
;
#
endif
for
(
int
grain
=
groupnum
*
WGS
#
ifndef
HAVE_MASK
*
kercn
#
endif
; id < total; id += grain)
{
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK_CONT
mask_index
=
id
;
#
else
mask_index
=
mad24
(
id
/
cols,
mask_step,
id
%
cols
)
;
#
endif
if
(
mask[mask_index]
)
#
endif
{
#
ifdef
HAVE_SRC_CONT
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
;
#
ifdef
OP_ABS
temp
=
temp
>=
(
dstT
)(
0
)
?
temp
:
-temp
;
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
HAVE_SRC2_CONT
src2_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
;
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
;
#
ifdef
OP_CALC2
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
;
#
endif
#
endif
#
if
kercn
==
1
#
ifdef
NEED_MINVAL
if
(
minval
>
temp
)
{
minval
=
temp
;
#
ifdef
NEED_MINLOC
minloc
=
id
;
#
endif
}
#
endif
#
ifdef
NEED_MAXVAL
if
(
maxval
<
temp
)
{
maxval
=
temp
;
#
ifdef
NEED_MAXLOC
maxloc
=
id
;
#
endif
}
#
ifdef
OP_CALC2
if
(
maxval2
<
temp2
)
maxval2
=
temp2
;
#
endif
#
endif
#
elif
kercn
>=
2
CALC_P
(
s0,
0
)
CALC_P
(
s1,
1
)
#
if
kercn
>=
3
CALC_P
(
s2,
2
)
#
if
kercn
>=
4
CALC_P
(
s3,
3
)
#
if
kercn
>=
8
CALC_P
(
s4,
4
)
CALC_P
(
s5,
5
)
CALC_P
(
s6,
6
)
CALC_P
(
s7,
7
)
#
if
kercn
==
16
CALC_P
(
s8,
8
)
CALC_P
(
s9,
9
)
CALC_P
(
sA,
10
)
CALC_P
(
sB,
11
)
CALC_P
(
sC,
12
)
CALC_P
(
sD,
13
)
CALC_P
(
sE,
14
)
CALC_P
(
sF,
15
)
#
endif
#
endif
#
endif
#
endif
#
endif
}
}
if
(
lid
<
WGS2_ALIGNED
)
{
#
ifdef
NEED_MINVAL
localmem_min[lid]
=
minval
;
#
endif
#
ifdef
NEED_MAXVAL
localmem_max[lid]
=
maxval
;
#
endif
#
ifdef
NEED_MINLOC
localmem_minloc[lid]
=
minloc
;
#
endif
#
ifdef
NEED_MAXLOC
localmem_maxloc[lid]
=
maxloc
;
#
endif
#
ifdef
OP_CALC2
localmem_max2[lid]
=
maxval2
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>=
WGS2_ALIGNED
&&
total
>=
WGS2_ALIGNED
)
{
int
lid3
=
lid
-
WGS2_ALIGNED
;
#
ifdef
NEED_MINVAL
if
(
localmem_min[lid3]
>=
minval
)
{
#
ifdef
NEED_MINLOC
if
(
localmem_min[lid3]
==
minval
)
localmem_minloc[lid3]
=
min
(
localmem_minloc[lid3],
minloc
)
;
else
localmem_minloc[lid3]
=
minloc,
#
endif
localmem_min[lid3]
=
minval
;
}
#
endif
#
ifdef
NEED_MAXVAL
if
(
localmem_max[lid3]
<=
maxval
)
{
#
ifdef
NEED_MAXLOC
if
(
localmem_max[lid3]
==
maxval
)
localmem_maxloc[lid3]
=
min
(
localmem_maxloc[lid3],
maxloc
)
;
else
localmem_maxloc[lid3]
=
maxloc,
#
endif
localmem_max[lid3]
=
maxval
;
}
#
endif
#
ifdef
OP_CALC2
if
(
localmem_max2[lid3]
<
maxval2
)
localmem_max2[lid3]
=
maxval2
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
#
ifdef
NEED_MINVAL
if
(
localmem_min[lid]
>=
localmem_min[lid2]
)
{
#
ifdef
NEED_MINLOC
if
(
localmem_min[lid]
==
localmem_min[lid2]
)
localmem_minloc[lid]
=
min
(
localmem_minloc[lid2],
localmem_minloc[lid]
)
;
else
localmem_minloc[lid]
=
localmem_minloc[lid2],
#
endif
localmem_min[lid]
=
localmem_min[lid2]
;
}
#
endif
#
ifdef
NEED_MAXVAL
if
(
localmem_max[lid]
<=
localmem_max[lid2]
)
{
#
ifdef
NEED_MAXLOC
if
(
localmem_max[lid]
==
localmem_max[lid2]
)
localmem_maxloc[lid]
=
min
(
localmem_maxloc[lid2],
localmem_maxloc[lid]
)
;
else
localmem_maxloc[lid]
=
localmem_maxloc[lid2],
#
endif
localmem_max[lid]
=
localmem_max[lid2]
;
}
#
endif
#
ifdef
OP_CALC2
if
(
localmem_max2[lid]
<
localmem_max2[lid2]
)
localmem_max2[lid]
=
localmem_max2[lid2]
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
int
pos
=
0
;
#
ifdef
NEED_MINVAL
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_min[0]
;
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
dstT1
)
,
pos
)
;
#
endif
#
ifdef
NEED_MAXVAL
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_max[0]
;
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
dstT1
)
,
pos
)
;
#
endif
#
ifdef
NEED_MINLOC
*
(
__global
uint
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
uint
)
,
pos
))
=
localmem_minloc[0]
;
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
uint
)
,
pos
)
;
#
endif
#
ifdef
NEED_MAXLOC
*
(
__global
uint
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
uint
)
,
pos
))
=
localmem_maxloc[0]
;
#
ifdef
OP_CALC2
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
uint
)
,
pos
)
;
#
endif
#
endif
#
ifdef
OP_CALC2
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_max2[0]
;
#
endif
}
}
modules/core/src/opencl/reduce.cl
View file @
88ceee05
This diff is collapsed.
Click to expand it.
modules/core/src/stat.cpp
View file @
88ceee05
This diff is collapsed.
Click to expand it.
modules/core/test/ocl/test_arithm.cpp
View file @
88ceee05
...
...
@@ -1293,6 +1293,8 @@ OCL_TEST_P(Norm, NORM_INF_2args)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_INF
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1311,6 +1313,8 @@ OCL_TEST_P(Norm, NORM_INF_2args_mask)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_INF
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1329,6 +1333,8 @@ OCL_TEST_P(Norm, NORM_L1_2args)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L1
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1347,6 +1353,8 @@ OCL_TEST_P(Norm, NORM_L1_2args_mask)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L1
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1365,6 +1373,8 @@ OCL_TEST_P(Norm, NORM_L2_2args)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L2
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
@@ -1383,6 +1393,8 @@ OCL_TEST_P(Norm, NORM_L2_2args_mask)
{
generateTestData
();
SCOPED_TRACE
(
relative
?
"NORM_RELATIVE"
:
""
);
int
type
=
NORM_L2
;
if
(
relative
==
1
)
type
|=
NORM_RELATIVE
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment