Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7f2662b3
Commit
7f2662b3
authored
Jun 08, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixes
parent
5403bdd2
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
62 additions
and
46 deletions
+62
-46
ocl_defs.hpp
modules/core/include/opencv2/core/opencl/ocl_defs.hpp
+1
-1
minmaxloc.cl
modules/core/src/opencl/minmaxloc.cl
+26
-14
reduce.cl
modules/core/src/opencl/reduce.cl
+25
-22
stat.cpp
modules/core/src/stat.cpp
+10
-9
No files found.
modules/core/include/opencv2/core/opencl/ocl_defs.hpp
View file @
7f2662b3
...
...
@@ -5,7 +5,7 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
#define CV_OPENCL_RUN_ASSERT
#define CV_OPENCL_RUN_ASSERT
#ifdef HAVE_OPENCL
...
...
modules/core/src/opencl/minmaxloc.cl
View file @
7f2662b3
...
...
@@ -15,16 +15,16 @@
#
ifdef
DEPTH_0
#
define
MIN_VAL
0
#
define
MAX_VAL
255
#
define
MAX_VAL
UCHAR_MAX
#
elif
defined
DEPTH_1
#
define
MIN_VAL
-128
#
define
MAX_VAL
127
#
define
MIN_VAL
SCHAR_MIN
#
define
MAX_VAL
SCHAR_MAX
#
elif
defined
DEPTH_2
#
define
MIN_VAL
0
#
define
MAX_VAL
65535
#
define
MAX_VAL
USHRT_MAX
#
elif
defined
DEPTH_3
#
define
MIN_VAL
-32768
#
define
MAX_VAL
32767
#
define
MIN_VAL
SHRT_MIN
#
define
MAX_VAL
SHRT_MAX
#
elif
defined
DEPTH_4
#
define
MIN_VAL
INT_MIN
#
define
MAX_VAL
INT_MAX
...
...
@@ -39,6 +39,14 @@
#
define
noconvert
#
define
INDEX_MAX
UINT_MAX
#
if
kercn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
srcTSIZE
(
int
)
sizeof
(
srcT1
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
))
#
endif
#
ifdef
NEED_MINLOC
#
define
CALC_MINLOC
(
inc
)
minloc
=
id
+
inc
#
else
...
...
@@ -154,22 +162,22 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
{
#
ifdef
HAVE_SRC_CONT
src_index
=
mul24
(
id,
(
int
)
sizeof
(
srcT1
)
)
;
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
(
int
)
sizeof
(
srcT1
)
))
;
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
temp
=
convertToDT
(
*
(
__global
const
srcT
*
)
(
srcptr
+
src_index
))
;
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
;
#
ifdef
OP_ABS
temp
=
temp
>=
(
dstT
)(
0
)
?
temp
:
-temp
;
#
endif
#
ifdef
HAVE_SRC2
#
ifdef
HAVE_SRC2_CONT
src2_index
=
mul24
(
id,
(
int
)
sizeof
(
srcT1
)
)
;
src2_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
(
int
)
sizeof
(
srcT1
)
))
;
src2_index
=
mad24
(
id
/
cols,
src2_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
temp2
=
convertToDT
(
*
(
__global
const
srcT
*
)
(
src2ptr
+
src2_index
))
;
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
;
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
;
#
ifdef
OP_CALC2
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
;
...
...
@@ -202,8 +210,9 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
elif
kercn
>=
2
CALC_P
(
s0,
0
)
CALC_P
(
s1,
1
)
#
if
kercn
>=
4
#
if
kercn
>=
3
CALC_P
(
s2,
2
)
#
if
kercn
>=
4
CALC_P
(
s3,
3
)
#
if
kercn
>=
8
CALC_P
(
s4,
4
)
...
...
@@ -222,6 +231,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
#
endif
#
endif
#
endif
#
endif
}
}
...
...
@@ -335,9 +345,11 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
#
endif
#
ifdef
NEED_MAXLOC
*
(
__global
uint
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
uint
)
,
pos
))
=
localmem_maxloc[0]
;
#
endif
#
ifdef
OP_CALC2
pos
=
mad24
(
groupnum,
(
int
)
sizeof
(
uint
)
,
pos
)
;
#
endif
#
endif
#
ifdef
OP_CALC2
*
(
__global
dstT1
*
)(
dstptr
+
mad24
(
gid,
(
int
)
sizeof
(
dstT1
)
,
pos
))
=
localmem_max2[0]
;
#
endif
}
...
...
modules/core/src/opencl/reduce.cl
View file @
7f2662b3
...
...
@@ -148,11 +148,9 @@
#
ifdef
OP_CALC2
#
define
DECLARE_LOCAL_MEM
\
__local
dstT
localmem[WGS2_ALIGNED]
; \
__local
dstT
localmem2[WGS2_ALIGNED]
__local
dstT
localmem[WGS2_ALIGNED],
localmem2[WGS2_ALIGNED]
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
; \
dstT
accumulator2
=
(
dstT
)(
0
)
dstT
accumulator
=
(
dstT
)(
0
)
,
accumulator2
=
(
dstT
)(
0
)
#
else
#
define
DECLARE_LOCAL_MEM
\
__local
dstT
localmem[WGS2_ALIGNED]
...
...
@@ -163,10 +161,10 @@
#
ifdef
HAVE_SRC2
#
ifdef
OP_CALC2
#
define
PROCESS_ELEMS
\
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
-
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
dstT
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstT
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
-=
temp2
; \
temp
=
temp
>
(
dstT
)(
0
)
?
temp
:
-temp
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp
2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator2,
temp2
)
; \
FUNC
(
accumulator,
temp
)
#
else
...
...
@@ -258,6 +256,7 @@
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp
)
; \
FUNC
(
accumulator2,
temp2
)
#
elif
kercn
==
2
...
...
@@ -265,6 +264,7 @@
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator2,
temp2.s0
)
; \
...
...
@@ -274,6 +274,7 @@
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
...
...
@@ -287,6 +288,7 @@
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
...
...
@@ -308,6 +310,7 @@
dstTK
temp
=
convertToDT
(
loadpix
(
srcptr
+
src_index
))
; \
dstTK
temp2
=
convertToDT
(
loadpix
(
src2ptr
+
src2_index
))
; \
temp
=
temp
>
temp2
?
temp
-
temp2
:
(
temp2
-
temp
)
; \
temp2
=
temp2
>=
(
dstT
)(
0
)
?
temp2
:
-temp2
; \
FUNC
(
accumulator,
temp.s0
)
; \
FUNC
(
accumulator,
temp.s1
)
; \
FUNC
(
accumulator,
temp.s2
)
; \
...
...
@@ -452,6 +455,20 @@
#
endif
#
endif
#
ifdef
OP_CALC2
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
; \
localmem2[lid]
=
accumulator2
#
define
REDUCE_LOCAL_1
\
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
; \
localmem2[lid
-
WGS2_ALIGNED]
+=
accumulator2
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
; \
localmem2[lid]
+=
localmem2[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem2[0],
dstptr
+
mad24
(
groupnum,
dstTSIZE,
dstTSIZE
*
gid
))
#
else
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
#
define
REDUCE_LOCAL_1
\
...
...
@@ -460,6 +477,7 @@
localmem[lid]
+=
localmem[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
#
endif
//
countNonZero
stuff
#
elif
defined
OP_COUNT_NON_ZERO
...
...
@@ -516,20 +534,6 @@
accumulator
+=
value.sF
==
zero
?
zero
:
one
#
endif
#
ifdef
OP_CALC2
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
; \
localmem2[lid]
=
accumulator2
; \
#
define
REDUCE_LOCAL_1
\
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
; \
localmem2[lid
-
WGS2_ALIGNED]
+=
accumulator2
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
; \
localmem2[lid]
+=
localmem2[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem2[0],
dstptr
+
mad24
(
groupnum,
srcTSIZE,
dstTSIZE
*
gid
))
#
else
#
define
SET_LOCAL_1
\
localmem[lid]
=
accumulator
#
define
REDUCE_LOCAL_1
\
...
...
@@ -538,7 +542,6 @@
localmem[lid]
+=
localmem[lid2]
#
define
CALC_RESULT
\
storepix
(
localmem[0],
dstptr
+
dstTSIZE
*
gid
)
#
endif
//
norm
(
NORM_INF
)
with
cn
>
1
and
mask
#
elif
defined
OP_NORM_INF_MASK
...
...
modules/core/src/stat.cpp
View file @
7f2662b3
...
...
@@ -550,9 +550,9 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
Mat
mres
=
db
.
getMat
(
ACCESS_READ
);
if
(
calc2
)
const_cast
<
Scalar
&>
(
res2
)
=
func
(
mres
.
colRange
(
dbsize
,
dbsize
));
const_cast
<
Scalar
&>
(
res2
)
=
func
(
mres
.
colRange
(
ngroups
,
dbsize
));
res
=
func
(
mres
.
colRange
(
0
,
dbsize
));
res
=
func
(
mres
.
colRange
(
0
,
ngroups
));
return
true
;
}
return
false
;
...
...
@@ -1434,10 +1434,10 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
,
haveMask
=
!
_mask
.
empty
(),
haveSrc2
=
_src2
.
kind
()
!=
_InputArray
::
NONE
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
kercn
=
haveMask
?
1
:
std
::
min
(
4
,
ocl
::
predictOptimalVectorWidth
(
_src
));
kercn
=
haveMask
?
cn
:
std
::
min
(
4
,
ocl
::
predictOptimalVectorWidth
(
_src
));
CV_Assert
(
(
cn
==
1
&&
(
_mask
.
empty
()
||
_mask
.
type
()
==
CV_8U
))
||
(
cn
>=
1
&&
_mask
.
empty
(
)
&&
!
minLoc
&&
!
maxLoc
)
);
CV_Assert
(
(
cn
==
1
&&
(
!
haveMask
||
_mask
.
type
()
==
CV_8U
))
||
(
cn
>=
1
&&
(
!
haveMask
||
haveSrc2
)
&&
!
minLoc
&&
!
maxLoc
)
);
if
(
ddepth
<
0
)
ddepth
=
depth
;
...
...
@@ -1484,6 +1484,8 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
haveSrc2
?
" -D HAVE_SRC2"
:
""
,
maxVal2
?
" -D OP_CALC2"
:
""
,
haveSrc2
&&
_src2
.
isContinuous
()
?
" -D HAVE_SRC2_CONT"
:
""
);
printf
(
"%s
\n
"
,
opts
.
c_str
());
ocl
::
Kernel
k
(
"minmaxloc"
,
ocl
::
core
::
minmaxloc_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
...
...
@@ -2556,9 +2558,9 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
{
Scalar
sc1
,
sc2
;
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
relative
=
(
normType
&
NORM_RELATIVE
)
!=
0
,
normsum
=
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
;
bool
relative
=
(
normType
&
NORM_RELATIVE
)
!=
0
;
normType
&=
~
NORM_RELATIVE
;
bool
normsum
=
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
;
if
(
!
(
normType
==
NORM_INF
||
normsum
)
)
return
false
;
...
...
@@ -2608,8 +2610,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
#ifdef HAVE_OPENCL
double
_result
=
0
;
CV_OCL_RUN_
(
_src1
.
isUMat
()
&&
_src2
.
isUMat
()
&&
_src1
.
dims
()
<=
2
&&
_src2
.
dims
()
<=
2
,
CV_OCL_RUN_
(
_src1
.
isUMat
(),
ocl_norm
(
_src1
,
_src2
,
normType
,
_mask
,
_result
),
_result
)
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment