Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c00f0faf
Commit
c00f0faf
authored
Mar 04, 2014
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Mar 04, 2014
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2408 from ilya-lavrenov:tapi_core_opt
parents
5527f287
fe38aab8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
120 additions
and
103 deletions
+120
-103
ocl_defs.hpp
modules/core/include/opencv2/core/opencl/ocl_defs.hpp
+1
-1
arithm.cpp
modules/core/src/arithm.cpp
+2
-2
convert.cpp
modules/core/src/convert.cpp
+5
-10
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+4
-4
matmul.cpp
modules/core/src/matmul.cpp
+2
-2
arithm.cl
modules/core/src/opencl/arithm.cl
+0
-0
convert.cl
modules/core/src/opencl/convert.cl
+4
-4
copymakeborder.cl
modules/core/src/opencl/copymakeborder.cl
+7
-7
copyset.cl
modules/core/src/opencl/copyset.cl
+7
-7
flip.cl
modules/core/src/opencl/flip.cl
+14
-12
inrange.cl
modules/core/src/opencl/inrange.cl
+4
-5
lut.cl
modules/core/src/opencl/lut.cl
+3
-11
mixchannels.cl
modules/core/src/opencl/mixchannels.cl
+2
-2
mulspectrums.cl
modules/core/src/opencl/mulspectrums.cl
+4
-4
reduce.cl
modules/core/src/opencl/reduce.cl
+26
-7
reduce2.cl
modules/core/src/opencl/reduce2.cl
+1
-1
set_identity.cl
modules/core/src/opencl/set_identity.cl
+1
-1
split_merge.cl
modules/core/src/opencl/split_merge.cl
+4
-4
transpose.cl
modules/core/src/opencl/transpose.cl
+11
-11
stat.cpp
modules/core/src/stat.cpp
+2
-2
umatrix.cpp
modules/core/src/umatrix.cpp
+16
-6
No files found.
modules/core/include/opencv2/core/opencl/ocl_defs.hpp
View file @
c00f0faf
...
...
@@ -5,7 +5,7 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//#define CV_OPENCL_RUN_
VERBOSE
//#define CV_OPENCL_RUN_
ASSERT
#ifdef HAVE_OPENCL
...
...
modules/core/src/arithm.cpp
View file @
c00f0faf
...
...
@@ -1318,7 +1318,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
char
cvtstr
[
4
][
32
],
opts
[
1024
];
sprintf
(
opts
,
"-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D convertToWT1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D
wdepth=%d -D
convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d"
,
(
haveMask
?
"MASK_"
:
""
),
(
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
),
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth1
,
kercn
)),
...
...
@@ -1329,7 +1329,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl
::
typeToStr
(
CV_MAKETYPE
(
ddepth
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
scalarcn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
1
)),
wdepth
,
ocl
::
convertTypeStr
(
depth1
,
wdepth
,
kercn
,
cvtstr
[
0
]),
ocl
::
convertTypeStr
(
depth2
,
wdepth
,
kercn
,
cvtstr
[
1
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
kercn
,
cvtstr
[
2
]),
...
...
modules/core/src/convert.cpp
View file @
c00f0faf
...
...
@@ -1320,8 +1320,8 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
int
wdepth
=
std
::
max
(
depth
,
CV_32F
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s"
" -D workT=%s -D convertToWT1=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wdepth
),
" -D workT=%s -D
wdepth=%d -D
convertToWT1=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wdepth
),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
CV_8U
,
1
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
...
...
@@ -1492,19 +1492,14 @@ static LUTFunc lutTab[] =
static
bool
ocl_LUT
(
InputArray
_src
,
InputArray
_lut
,
OutputArray
_dst
)
{
int
dtype
=
_dst
.
type
(),
lcn
=
_lut
.
channels
(),
dcn
=
CV_MAT_CN
(
dtype
),
ddepth
=
CV_MAT_DEPTH
(
dtype
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
dims
()
>
2
||
(
!
doubleSupport
&&
ddepth
==
CV_64F
))
return
false
;
UMat
src
=
_src
.
getUMat
(),
lut
=
_lut
.
getUMat
();
_dst
.
create
(
src
.
size
(),
dtype
);
UMat
dst
=
_dst
.
getUMat
();
ocl
::
Kernel
k
(
"LUT"
,
ocl
::
core
::
lut_oclsrc
,
format
(
"-D dcn=%d -D lcn=%d -D srcT=%s -D dstT=%s%s"
,
dcn
,
lcn
,
ocl
::
typeToStr
(
src
.
depth
()),
ocl
::
typeToStr
(
ddepth
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
format
(
"-D dcn=%d -D lcn=%d -D srcT=%s -D dstT=%s"
,
dcn
,
lcn
,
ocl
::
typeToStr
(
src
.
depth
()),
ocl
::
memopTypeToStr
(
ddepth
)));
if
(
k
.
empty
())
return
false
;
...
...
@@ -1528,7 +1523,7 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
_lut
.
total
()
==
256
&&
_lut
.
isContinuous
()
&&
(
depth
==
CV_8U
||
depth
==
CV_8S
)
);
CV_OCL_RUN
(
_dst
.
isUMat
(),
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
,
ocl_LUT
(
_src
,
_lut
,
_dst
))
Mat
src
=
_src
.
getMat
(),
lut
=
_lut
.
getMat
();
...
...
modules/core/src/mathfuncs.cpp
View file @
c00f0faf
...
...
@@ -508,9 +508,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D dstT=%s -D OP_CTP_%s%s"
,
format
(
"-D BINARY_OP -D dstT=%s -D
depth=%d -D
OP_CTP_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
angleInDegrees
?
"AD"
:
"AR"
,
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -695,8 +695,8 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D dstT=%s -D BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
format
(
"-D dstT=%s -D
depth=%d -D
BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
...
...
modules/core/src/matmul.cpp
View file @
c00f0faf
...
...
@@ -2166,9 +2166,9 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
char
cvt
[
2
][
50
];
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
format
(
"-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D
wdepth=%d -D
convertToWT1=%s"
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
typeToStr
(
wdepth
),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
depth
,
1
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
...
...
modules/core/src/opencl/arithm.cl
View file @
c00f0faf
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/convert.cl
View file @
c00f0faf
...
...
@@ -53,19 +53,19 @@
__kernel
void
convertTo
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
float
alpha,
float
beta
)
WT
alpha,
WT
beta
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
srcT
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
(
int
)
sizeof
(
dstT
)
)
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
srcT
)
,
src_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
dstT
)
,
dst_offset
)
)
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
dst_index
)
;
dst[0]
=
convertToDT
(
src[0]
*
alpha
+
beta
)
;
dst[0]
=
convertToDT
(
mad
(
convertToWT
(
src[0]
)
,
alpha,
beta
)
)
;
}
}
modules/core/src/opencl/copymakeborder.cl
View file @
c00f0faf
...
...
@@ -47,9 +47,9 @@
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
v
)
\
{
\
x
=
max
(
min
(
x,
src_cols
-
1
)
,
0
)
; \
y
=
max
(
min
(
y,
src_rows
-
1
)
,
0
)
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
))
; \
x
=
clamp
(
x,
0
,
src_cols
-
1
)
; \
y
=
clamp
(
y,
0
,
src_rows
-
1
)
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)
))
; \
}
#
elif
defined
BORDER_WRAP
#
define
EXTRAPOLATE
(
x,
y,
v
)
\
...
...
@@ -63,7 +63,7 @@
y
-=
((
y
-
src_rows
+
1
)
/
src_rows
)
*
src_rows
; \
if
(
y
>=
src_rows
)
\
y
%=
src_rows
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
))
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)
))
; \
}
#
elif
defined
(
BORDER_REFLECT
)
|
| defined(BORDER_REFLECT_101)
#ifdef BORDER_REFLECT
...
...
@@ -97,7 +97,7 @@
y = src_rows - 1 - (y - src_rows) - delta; \
} \
while (y >= src_rows || y < 0); \
v = *(__global const T *)(srcptr + mad24(y, src_step,
x * (int)sizeof(T) + src_offset
)); \
v = *(__global const T *)(srcptr + mad24(y, src_step,
mad24(x, (int)sizeof(T), src_offset)
)); \
}
#else
#error No extrapolation method
...
...
@@ -117,14 +117,14 @@ __kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int sr
int
src_x
=
x
-
left
;
int
src_y
=
y
-
top
;
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
T
)
+
dst_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst_offset
)
)
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
if
(
NEED_EXTRAPOLATION
(
src_x,
src_y
))
EXTRAPOLATE
(
src_x,
src_y,
dst[0]
)
else
{
int
src_index
=
mad24
(
src_y,
src_step,
src_x
*
(
int
)
sizeof
(
T
)
+
src_offset
)
;
int
src_index
=
mad24
(
src_y,
src_step,
mad24
(
src_x,
(
int
)
sizeof
(
T
)
,
src_offset
)
)
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
;
dst[0]
=
src[0]
;
}
...
...
modules/core/src/opencl/copyset.cl
View file @
c00f0faf
...
...
@@ -44,8 +44,8 @@
#
ifdef
COPY_TO_MASK
#
define
DEFINE_DATA
\
int
src_index
=
mad24
(
y,
src_step,
x*
(
int
)
sizeof
(
T
)
*scn
+
src_offset
)
; \
int
dst_index
=
mad24
(
y,
dst_step,
x*
(
int
)
sizeof
(
T
)
*scn
+
dst_offset
)
; \
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn,
src_offset
)
)
; \
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn,
dst_offset
)
)
; \
\
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
; \
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
...
...
@@ -60,7 +60,7 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
x
*
mcn
+
mask_offset
)
;
int
mask_index
=
mad24
(
y,
mask_step,
mad24
(
x,
mcn,
mask_offset
)
)
;
__global
const
uchar
*
mask
=
(
__global
const
uchar
*
)(
maskptr
+
mask_index
)
;
#
if
mcn
==
1
...
...
@@ -93,10 +93,10 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
#
if
cn
!=
3
#
define
value
value_
#
define
storedst
(
val
)
*
(
__global
dstT*
)(
dstptr
+
dst_index
)
=
val
#
define
storedst
(
val
)
*
(
__global
dstT
*
)(
dstptr
+
dst_index
)
=
val
#
else
#
define
value
(
dstT
)(
value_.x,
value_.y,
value_.z
)
#
define
storedst
(
val
)
vstore3
(
val,
0
,
(
__global
dstT1*
)(
dstptr
+
dst_index
))
#
define
storedst
(
val
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
dstptr
+
dst_index
))
#
endif
__kernel
void
setMask
(
__global
const
uchar*
mask,
int
maskstep,
int
maskoffset,
...
...
@@ -111,7 +111,7 @@ __kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
int
mask_index
=
mad24
(
y,
maskstep,
x
+
maskoffset
)
;
if
(
mask[mask_index]
)
{
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT1
)
*cn
+
dstoffset
)
;
int
dst_index
=
mad24
(
y,
dststep,
mad24
(
x,
(
int
)
sizeof
(
dstT1
)
*
cn,
dstoffset
)
)
;
storedst
(
value
)
;
}
}
...
...
@@ -125,7 +125,7 @@ __kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
if
(
x
<
cols
&&
y
<
rows
)
{
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT1
)
*cn
+
dstoffset
)
;
int
dst_index
=
mad24
(
y,
dststep,
mad24
(
x,
(
int
)
sizeof
(
dstT1
)
*
cn,
dstoffset
)
)
;
storedst
(
value
)
;
}
}
...
...
modules/core/src/opencl/flip.cl
View file @
c00f0faf
...
...
@@ -50,11 +50,11 @@ __kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int sr
if
(
x
<
cols
&&
y
<
thread_rows
)
{
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
sizeoftype
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
srcoffset
+
x
*
sizeoftype
))
;
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
sizeoftype
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
dstoffset
+
x
*
sizeoftype
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
dst0[0]
=
src1[0]
;
dst1[0]
=
src0[0]
;
...
...
@@ -70,11 +70,12 @@ __kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, i
if
(
x
<
cols
&&
y
<
thread_rows
)
{
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
x*sizeoftype
+
srcoffset
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
(
cols
-
x
-
1
)
*sizeoftype
+
srcoffset
))
;
int
x1
=
cols
-
x
-
1
;
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
mad24
(
x1,
sizeoftype,
srcoffset
)))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
(
cols
-
x
-
1
)
*sizeoftype
+
dstoffset
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
x
*
sizeoftype
+
dstoffset
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
mad24
(
x1,
sizeoftype,
dstoffset
)
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
dst0[0]
=
src0[0]
;
dst1[0]
=
src1[0]
;
...
...
@@ -90,11 +91,12 @@ __kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int sr
if
(
x
<
thread_cols
&&
y
<
rows
)
{
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
x
*
sizeoftype
+
srcoffset
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
(
cols
-
x
-
1
)
*sizeoftype
+
srcoffset
))
;
int
x1
=
cols
-
x
-
1
;
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x1,
sizeoftype,
srcoffset
)))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
(
cols
-
x
-
1
)
*sizeoftype
+
dstoffset
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
x
*
sizeoftype
+
dstoffset
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x1,
sizeoftype,
dstoffset
)
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
dst1[0]
=
src1[0]
;
dst0[0]
=
src0[0]
;
...
...
modules/core/src/opencl/inrange.cl
View file @
c00f0faf
...
...
@@ -64,23 +64,22 @@ __kernel void inrange(__global const uchar * src1ptr, int src1_step, int src1_of
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
x*
(
int
)
sizeof
(
T
)
*cn
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src1_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
__global
uchar
*
dst
=
dstptr
+
dst_index
;
#
ifndef
HAVE_SCALAR
int
src2_index
=
mad24
(
y,
src2_step,
x*
(
int
)
sizeof
(
T
)
*cn
+
src2_offset
)
;
int
src3_index
=
mad24
(
y,
src3_step,
x*
(
int
)
sizeof
(
T
)
*cn
+
src3_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src2_offset
)
)
;
int
src3_index
=
mad24
(
y,
src3_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src3_offset
)
)
;
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
__global
const
T
*
src3
=
(
__global
const
T
*
)(
src3ptr
+
src3_index
)
;
#
endif
dst[0]
=
255
;
#
pragma
unroll
for
(
int
c
=
0
; c < cn; ++c)
if
(
src2[c]
>
src1[c]
||
src3[c]
<
src1[c]
)
if
(
src2[c]
>
src1[c]
||
src3[c]
<
src1[c]
)
{
dst[0]
=
0
;
break
;
...
...
modules/core/src/opencl/lut.cl
View file @
c00f0faf
...
...
@@ -34,14 +34,6 @@
//
//
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
LUT
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
const
uchar
*
lutptr,
int
lut_step,
int
lut_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols
)
...
...
@@ -51,8 +43,8 @@ __kernel void LUT(__global const uchar * srcptr, int src_step, int src_offset,
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
srcT
)
*
dcn
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
(
int
)
sizeof
(
dstT
)
*
dcn
)
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
srcT
)
*
dcn,
src_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
dstT
)
*
dcn,
dst_offset
)
)
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
__global
const
dstT
*
lut
=
(
__global
const
dstT
*
)(
lutptr
+
lut_offset
)
;
...
...
@@ -65,7 +57,7 @@ __kernel void LUT(__global const uchar * srcptr, int src_step, int src_offset,
#
else
#
pragma
unroll
for
(
int
cn
=
0
; cn < dcn; ++cn)
dst[cn]
=
lut[
src[cn]
*
dcn
+
cn
]
;
dst[cn]
=
lut[
mad24
(
src[cn],
dcn,
cn
)
]
;
#
endif
}
}
modules/core/src/opencl/mixchannels.cl
View file @
c00f0faf
...
...
@@ -46,9 +46,9 @@
#
define
DECLARE_OUTPUT_MAT
(
i
)
\
__global
uchar
*
dst##i##ptr,
int
dst##i##_step,
int
dst##i##_offset,
#
define
PROCESS_ELEM
(
i
)
\
int
src##i##_index
=
mad24
(
src##i##_step,
y,
x
*
(
int
)
sizeof
(
T
)
*
scn##i
+
src##i##_offset
)
; \
int
src##i##_index
=
mad24
(
src##i##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn##i,
src##i##_offset
)
)
; \
__global
const
T
*
src##i
=
(
__global
const
T
*
)(
src##i##ptr
+
src##i##_index
)
; \
int
dst##i##_index
=
mad24
(
dst##i##_step,
y,
x
*
(
int
)
sizeof
(
T
)
*
dcn##i
+
dst##i##_offset
)
; \
int
dst##i##_index
=
mad24
(
dst##i##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
dcn##i,
dst##i##_offset
)
)
; \
__global
T
*
dst##i
=
(
__global
T
*
)(
dst##i##ptr
+
dst##i##_index
)
; \
dst##i[0]
=
src##i[0]
;
...
...
modules/core/src/opencl/mulspectrums.cl
View file @
c00f0faf
...
...
@@ -45,7 +45,7 @@
inline
float2
cmulf
(
float2
a,
float2
b
)
{
return
(
float2
)(
a.x
*
b.x
-
a.y
*
b.y,
a.x
*
b.y
+
a.y
*
b.x
)
;
return
(
float2
)(
mad
(
a.x,
b.x,
-
a.y
*
b.y
)
,
mad
(
a.x,
b.y,
a.y
*
b.x
)
)
;
}
inline
float2
conjf
(
float2
a
)
...
...
@@ -63,9 +63,9 @@ __kernel void mulAndScaleSpectrums(__global const uchar * src1ptr, int src1_step
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
x
*
(
int
)
sizeof
(
float2
)
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
*
(
int
)
sizeof
(
float2
)
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
float2
)
+
dst_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src1_offset
)
)
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src2_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
dst_offset
)
)
;
float2
src0
=
*
(
__global
const
float2
*
)(
src1ptr
+
src1_index
)
;
float2
src1
=
*
(
__global
const
float2
*
)(
src2ptr
+
src2_index
)
;
...
...
modules/core/src/opencl/reduce.cl
View file @
c00f0faf
...
...
@@ -58,20 +58,34 @@
#
define
EXTRA_PARAMS
#
endif
//
accumulative
reduction
stuff
#
if
defined
OP_SUM
|
| defined OP_SUM_ABS || defined OP_SUM_SQR || defined OP_DOT
#ifdef OP_DOT
#define FUNC(a, b, c) a += b * c
#if ddepth <= 4
#define FUNC(a, b, c) a = mad24(b, c, a)
#else
#define FUNC(a, b, c) a = mad(b, c, a)
#endif
#elif defined OP_SUM
#define FUNC(a, b) a += b
#elif defined OP_SUM_ABS
#define FUNC(a, b) a += b >= (dstT)(0) ? b : -b
#elif defined OP_SUM_SQR
#define FUNC(a, b) a += b * b
#if ddepth <= 4
#define FUNC(a, b) a = mad24(b, b, a)
#else
#define FUNC(a, b) a = mad(b, b, a)
#endif
#endif
#define DECLARE_LOCAL_MEM \
__local dstT localmem[WGS2_ALIGNED]
#define DEFINE_ACCUMULATOR \
dstT accumulator = (dstT)(0)
#ifdef HAVE_MASK
#define REDUCE_GLOBAL \
dstT temp = convertToDT(src[0]); \
...
...
@@ -80,7 +94,7 @@
FUNC(accumulator, temp)
#elif defined OP_DOT
#define REDUCE_GLOBAL \
int src2_index = mad24(id / cols, src2_step,
src2_offset + (id % cols) * (int)sizeof(srcT
)); \
int src2_index = mad24(id / cols, src2_step,
mad24(id % cols, (int)sizeof(srcT), src2_offset
)); \
__global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); \
dstT temp = convertToDT(src[0]), temp2 = convertToDT(src2[0]); \
FUNC(accumulator, temp, temp2)
...
...
@@ -89,6 +103,7 @@
dstT temp = convertToDT(src[0]); \
FUNC(accumulator, temp)
#endif
#define SET_LOCAL_1 \
localmem[lid] = accumulator
#define REDUCE_LOCAL_1 \
...
...
@@ -99,6 +114,7 @@
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
// countNonZero stuff
#elif defined OP_COUNT_NON_ZERO
#define dstT int
#define DECLARE_LOCAL_MEM \
...
...
@@ -118,6 +134,7 @@
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
// minMaxLoc stuff
#elif defined OP_MIN_MAX_LOC |
|
defined
OP_MIN_MAX_LOC_MASK
#
ifdef
DEPTH_0
...
...
@@ -179,8 +196,8 @@
#
define
REDUCE_LOCAL_1
\
srcT
oldmin
=
localmem_min[lid-WGS2_ALIGNED]
; \
srcT
oldmax
=
localmem_max[lid-WGS2_ALIGNED]
; \
localmem_min[lid
-
WGS2_ALIGNED]
=
min
(
minval,localmem_min[lid-WGS2_ALIGNED]
)
; \
localmem_max[lid
-
WGS2_ALIGNED]
=
max
(
maxval,localmem_max[lid-WGS2_ALIGNED]
)
; \
localmem_min[lid
-
WGS2_ALIGNED]
=
min
(
minval,
localmem_min[lid-WGS2_ALIGNED]
)
; \
localmem_max[lid
-
WGS2_ALIGNED]
=
max
(
maxval,
localmem_max[lid-WGS2_ALIGNED]
)
; \
srcT
minv
=
localmem_min[lid
-
WGS2_ALIGNED],
maxv
=
localmem_max[lid
-
WGS2_ALIGNED]
; \
localmem_minloc[lid
-
WGS2_ALIGNED]
=
(
minv
==
minval
)
?
(
minv
==
oldmin
)
?
\
min
(
minloc,
localmem_minloc[lid-WGS2_ALIGNED]
)
:
minloc
:
localmem_minloc[lid-WGS2_ALIGNED]
; \
...
...
@@ -233,15 +250,17 @@
#
else
#
error
"No operation"
#
endif
#
endif
//
end
of
minMaxLoc
stuff
#
ifdef
OP_MIN_MAX_LOC
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
dstptr2,
__global
int
*
dstlocptr,
__global
int
*
dstlocptr2
#
elif
defined
OP_MIN_MAX_LOC_MASK
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
dstptr2,
__global
int
*
dstlocptr,
__global
int
*
dstlocptr2,
\
__global
const
uchar
*
maskptr,
int
mask_step,
int
mask_offset
#
elif
defined
OP_DOT
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
...
...
@@ -259,7 +278,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
for
(
int
grain
=
groupnum
*
WGS
; id < total; id += grain)
{
int
src_index
=
mad24
(
id
/
cols,
src_step,
src_offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
srcT
))
;
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
(
int
)
sizeof
(
srcT
)
,
src_offset
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
REDUCE_GLOBAL
;
}
...
...
modules/core/src/opencl/reduce2.cl
View file @
c00f0faf
...
...
@@ -98,7 +98,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
int
x
=
get_global_id
(
0
)
;
if
(
x
<
cols
)
{
int
src_index
=
x
*
(
int
)
sizeof
(
srcT
)
*
cn
+
src_offset
;
int
src_index
=
mad24
(
x,
(
int
)
sizeof
(
srcT
)
*
cn,
src_offset
)
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
dst_offset
)
+
x
*
cn
;
dstT
tmp[cn]
=
{
INIT_VALUE
}
;
...
...
modules/core/src/opencl/set_identity.cl
View file @
c00f0faf
...
...
@@ -51,7 +51,7 @@ __kernel void setIdentity(__global uchar * srcptr, int src_step, int src_offset,
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
T
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
__global
T
*
src
=
(
__global
T
*
)(
srcptr
+
src_index
)
;
src[0]
=
x
==
y
?
scalar
:
(
T
)(
0
)
;
...
...
modules/core/src/opencl/split_merge.cl
View file @
c00f0faf
...
...
@@ -45,7 +45,7 @@
#
define
DECLARE_SRC_PARAM
(
index
)
__global
const
uchar
*
src##index##ptr,
int
src##index##_step,
int
src##index##_offset,
#
define
DECLARE_DATA
(
index
)
__global
const
T
*
src##index
=
\
(
__global
T
*
)(
src##index##ptr
+
mad24
(
src##index##_step,
y,
x
*
(
int
)
sizeof
(
T
)
+
src##index##_offset
))
;
(
__global
T
*
)(
src##index##ptr
+
mad24
(
src##index##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src##index##_offset
)
))
;
#
define
PROCESS_ELEM
(
index
)
dst[index]
=
src##index[0]
;
__kernel
void
merge
(
DECLARE_SRC_PARAMS_N
...
...
@@ -58,7 +58,7 @@ __kernel void merge(DECLARE_SRC_PARAMS_N
if
(
x
<
cols
&&
y
<
rows
)
{
DECLARE_DATA_N
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
mad24
(
dst_step,
y,
x
*
(
int
)
sizeof
(
T
)
*
cn
+
dst_offset
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
mad24
(
dst_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
dst_offset
)
))
;
PROCESS_ELEMS_N
}
}
...
...
@@ -67,7 +67,7 @@ __kernel void merge(DECLARE_SRC_PARAMS_N
#
define
DECLARE_DST_PARAM
(
index
)
,
__global
uchar
*
dst##index##ptr,
int
dst##index##_step,
int
dst##index##_offset
#
define
DECLARE_DATA
(
index
)
__global
T
*
dst##index
=
\
(
__global
T
*
)(
dst##index##ptr
+
mad24
(
y,
dst##index##_step,
x
*
(
int
)
sizeof
(
T
)
+
dst##index##_offset
))
;
(
__global
T
*
)(
dst##index##ptr
+
mad24
(
y,
dst##index##_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst##index##_offset
)
))
;
#
define
PROCESS_ELEM
(
index
)
dst##index[0]
=
src[index]
;
__kernel
void
split
(
__global
uchar*
srcptr,
int
src_step,
int
src_offset,
int
rows,
int
cols
DECLARE_DST_PARAMS
)
...
...
@@ -78,7 +78,7 @@ __kernel void split(__global uchar* srcptr, int src_step, int src_offset, int ro
if
(
x
<
cols
&&
y
<
rows
)
{
DECLARE_DATA_N
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
x
*
cn
*
(
int
)
sizeof
(
T
)
+
src_offset
))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
cn
*
(
int
)
sizeof
(
T
)
,
src_offset
)
))
;
PROCESS_ELEMS_N
}
}
...
...
modules/core/src/opencl/transpose.cl
View file @
c00f0faf
...
...
@@ -60,7 +60,7 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
}
else
{
int
bid
=
gp_x
+
gs_x
*
gp_y
;
int
bid
=
mad24
(
gs_x,
gp_y,
gp_x
)
;
groupId_y
=
bid
%
gs_y
;
groupId_x
=
((
bid
/
gs_y
)
+
groupId_y
)
%
gs_x
;
}
...
...
@@ -68,23 +68,23 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
int
lx
=
get_local_id
(
0
)
;
int
ly
=
get_local_id
(
1
)
;
int
x
=
groupId_x
*
TILE_DIM
+
lx
;
int
y
=
groupId_y
*
TILE_DIM
+
ly
;
int
x
=
mad24
(
groupId_x,
TILE_DIM,
lx
)
;
int
y
=
mad24
(
groupId_y,
TILE_DIM,
ly
)
;
int
x_index
=
groupId_y
*
TILE_DIM
+
lx
;
int
y_index
=
groupId_x
*
TILE_DIM
+
ly
;
int
x_index
=
mad24
(
groupId_y,
TILE_DIM,
lx
)
;
int
y_index
=
mad24
(
groupId_x,
TILE_DIM,
ly
)
;
__local
T
title[TILE_DIM
*
LDS_STEP]
;
if
(
x
<
src_cols
&&
y
<
src_rows
)
{
int
index_src
=
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
)
;
int
index_src
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)
)
;
for
(
int
i
=
0
; i < TILE_DIM; i += BLOCK_ROWS)
if
(
y
+
i
<
src_rows
)
{
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
index_src
)
;
title[
(
ly
+
i
)
*
LDS_STEP
+
lx
]
=
src[0]
;
title[
mad24
(
ly
+
i,
LDS_STEP,
lx
)
]
=
src[0]
;
index_src
=
mad24
(
BLOCK_ROWS,
src_step,
index_src
)
;
}
}
...
...
@@ -92,13 +92,13 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
if
(
x_index
<
src_rows
&&
y_index
<
src_cols
)
{
int
index_dst
=
mad24
(
y_index,
dst_step,
x_index
*
(
int
)
sizeof
(
T
)
+
dst_offset
)
;
int
index_dst
=
mad24
(
y_index,
dst_step,
mad24
(
x_index,
(
int
)
sizeof
(
T
)
,
dst_offset
)
)
;
for
(
int
i
=
0
; i < TILE_DIM; i += BLOCK_ROWS)
if
((
y_index
+
i
)
<
src_cols
)
{
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
index_dst
)
;
dst[0]
=
title[
lx
*
LDS_STEP
+
ly
+
i
]
;
dst[0]
=
title[
mad24
(
lx,
LDS_STEP,
ly
+
i
)
]
;
index_dst
=
mad24
(
BLOCK_ROWS,
dst_step,
index_dst
)
;
}
}
...
...
@@ -111,8 +111,8 @@ __kernel void transpose_inplace(__global uchar * srcptr, int src_step, int src_o
if
(
y
<
src_rows
&&
x
<
y
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
T
))
;
int
dst_index
=
mad24
(
x,
src_step,
src_offset
+
y
*
(
int
)
sizeof
(
T
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
int
dst_index
=
mad24
(
x,
src_step,
mad24
(
y,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
__global
T
*
src
=
(
__global
T
*
)(
srcptr
+
src_index
)
;
__global
T
*
dst
=
(
__global
T
*
)(
srcptr
+
dst_index
)
;
...
...
modules/core/src/stat.cpp
View file @
c00f0faf
...
...
@@ -494,8 +494,8 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
format
(
"-D srcT=%s -D dstT=%s -D
ddepth=%d -D
convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ddepth
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
opMap
[
sum_op
],
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveMask
?
" -D HAVE_MASK"
:
""
));
...
...
modules/core/src/umatrix.cpp
View file @
c00f0faf
...
...
@@ -719,10 +719,14 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
if
(
dims
<=
2
&&
cn
&&
_dst
.
isUMat
()
&&
ocl
::
useOpenCL
()
&&
((
needDouble
&&
doubleSupport
)
||
!
needDouble
)
)
{
char
cvt
[
40
];
int
wdepth
=
std
::
max
(
CV_32F
,
sdepth
);
char
cvt
[
2
][
40
];
ocl
::
Kernel
k
(
"convertTo"
,
ocl
::
core
::
convert_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
convertTypeStr
(
CV_32F
,
ddepth
,
1
,
cvt
),
format
(
"-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
1
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
!
k
.
empty
())
{
...
...
@@ -731,7 +735,13 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
UMat
dst
=
_dst
.
getUMat
();
float
alphaf
=
(
float
)
alpha
,
betaf
=
(
float
)
beta
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
),
alphaf
,
betaf
);
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
);
if
(
wdepth
==
CV_32F
)
k
.
args
(
srcarg
,
dstarg
,
alphaf
,
betaf
);
else
k
.
args
(
srcarg
,
dstarg
,
alpha
,
beta
);
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
,
dst
.
rows
};
if
(
k
.
run
(
2
,
globalsize
,
NULL
,
false
))
...
...
@@ -838,8 +848,8 @@ static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s -D OP_DOT -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
1
,
cvt
),
format
(
"-D srcT=%s -D dstT=%s -D
ddepth=%d -D
convertToDT=%s -D OP_DOT -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
ddepth
),
ddepth
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
1
,
cvt
),
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment