Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c00f0faf
Commit
c00f0faf
authored
11 years ago
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
11 years ago
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2408 from ilya-lavrenov:tapi_core_opt
parents
5527f287
fe38aab8
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
191 additions
and
146 deletions
+191
-146
ocl_defs.hpp
modules/core/include/opencv2/core/opencl/ocl_defs.hpp
+1
-1
arithm.cpp
modules/core/src/arithm.cpp
+2
-2
convert.cpp
modules/core/src/convert.cpp
+5
-10
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+4
-4
matmul.cpp
modules/core/src/matmul.cpp
+2
-2
arithm.cl
modules/core/src/opencl/arithm.cl
+71
-43
convert.cl
modules/core/src/opencl/convert.cl
+4
-4
copymakeborder.cl
modules/core/src/opencl/copymakeborder.cl
+7
-7
copyset.cl
modules/core/src/opencl/copyset.cl
+7
-7
flip.cl
modules/core/src/opencl/flip.cl
+14
-12
inrange.cl
modules/core/src/opencl/inrange.cl
+4
-5
lut.cl
modules/core/src/opencl/lut.cl
+3
-11
mixchannels.cl
modules/core/src/opencl/mixchannels.cl
+2
-2
mulspectrums.cl
modules/core/src/opencl/mulspectrums.cl
+4
-4
reduce.cl
modules/core/src/opencl/reduce.cl
+26
-7
reduce2.cl
modules/core/src/opencl/reduce2.cl
+1
-1
set_identity.cl
modules/core/src/opencl/set_identity.cl
+1
-1
split_merge.cl
modules/core/src/opencl/split_merge.cl
+4
-4
transpose.cl
modules/core/src/opencl/transpose.cl
+11
-11
stat.cpp
modules/core/src/stat.cpp
+2
-2
umatrix.cpp
modules/core/src/umatrix.cpp
+16
-6
No files found.
modules/core/include/opencv2/core/opencl/ocl_defs.hpp
View file @
c00f0faf
...
...
@@ -5,7 +5,7 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//#define CV_OPENCL_RUN_
VERBOSE
//#define CV_OPENCL_RUN_
ASSERT
#ifdef HAVE_OPENCL
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/arithm.cpp
View file @
c00f0faf
...
...
@@ -1318,7 +1318,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
char
cvtstr
[
4
][
32
],
opts
[
1024
];
sprintf
(
opts
,
"-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D convertToWT1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D
wdepth=%d -D
convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d"
,
(
haveMask
?
"MASK_"
:
""
),
(
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
),
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth1
,
kercn
)),
...
...
@@ -1329,7 +1329,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl
::
typeToStr
(
CV_MAKETYPE
(
ddepth
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
scalarcn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
1
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
1
)),
wdepth
,
ocl
::
convertTypeStr
(
depth1
,
wdepth
,
kercn
,
cvtstr
[
0
]),
ocl
::
convertTypeStr
(
depth2
,
wdepth
,
kercn
,
cvtstr
[
1
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
kercn
,
cvtstr
[
2
]),
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/convert.cpp
View file @
c00f0faf
...
...
@@ -1320,8 +1320,8 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
int
wdepth
=
std
::
max
(
depth
,
CV_32F
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s"
" -D workT=%s -D convertToWT1=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wdepth
),
" -D workT=%s -D
wdepth=%d -D
convertToWT1=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wdepth
),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
CV_8U
,
1
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
...
...
@@ -1492,19 +1492,14 @@ static LUTFunc lutTab[] =
static
bool
ocl_LUT
(
InputArray
_src
,
InputArray
_lut
,
OutputArray
_dst
)
{
int
dtype
=
_dst
.
type
(),
lcn
=
_lut
.
channels
(),
dcn
=
CV_MAT_CN
(
dtype
),
ddepth
=
CV_MAT_DEPTH
(
dtype
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
_src
.
dims
()
>
2
||
(
!
doubleSupport
&&
ddepth
==
CV_64F
))
return
false
;
UMat
src
=
_src
.
getUMat
(),
lut
=
_lut
.
getUMat
();
_dst
.
create
(
src
.
size
(),
dtype
);
UMat
dst
=
_dst
.
getUMat
();
ocl
::
Kernel
k
(
"LUT"
,
ocl
::
core
::
lut_oclsrc
,
format
(
"-D dcn=%d -D lcn=%d -D srcT=%s -D dstT=%s%s"
,
dcn
,
lcn
,
ocl
::
typeToStr
(
src
.
depth
()),
ocl
::
typeToStr
(
ddepth
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
format
(
"-D dcn=%d -D lcn=%d -D srcT=%s -D dstT=%s"
,
dcn
,
lcn
,
ocl
::
typeToStr
(
src
.
depth
()),
ocl
::
memopTypeToStr
(
ddepth
)));
if
(
k
.
empty
())
return
false
;
...
...
@@ -1528,7 +1523,7 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
_lut
.
total
()
==
256
&&
_lut
.
isContinuous
()
&&
(
depth
==
CV_8U
||
depth
==
CV_8S
)
);
CV_OCL_RUN
(
_dst
.
isUMat
(),
CV_OCL_RUN
(
_dst
.
isUMat
()
&&
_src
.
dims
()
<=
2
,
ocl_LUT
(
_src
,
_lut
,
_dst
))
Mat
src
=
_src
.
getMat
(),
lut
=
_lut
.
getMat
();
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/mathfuncs.cpp
View file @
c00f0faf
...
...
@@ -508,9 +508,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D dstT=%s -D OP_CTP_%s%s"
,
format
(
"-D BINARY_OP -D dstT=%s -D
depth=%d -D
OP_CTP_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
angleInDegrees
?
"AD"
:
"AR"
,
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -695,8 +695,8 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D dstT=%s -D BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
format
(
"-D dstT=%s -D
depth=%d -D
BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/matmul.cpp
View file @
c00f0faf
...
...
@@ -2166,9 +2166,9 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
char
cvt
[
2
][
50
];
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
format
(
"-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D
wdepth=%d -D
convertToWT1=%s"
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
typeToStr
(
wdepth
),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
depth
,
1
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/arithm.cl
View file @
c00f0faf
...
...
@@ -63,11 +63,12 @@
#elif defined cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define CV_EPSILON DBL_EPSILON
#define CV_PI M_PI
#else
#define CV_EPSILON FLT_EPSILON
#endif
#if depth <= 5
#define CV_PI M_PI_F
#else
#define CV_PI M_PI
#endif
#ifndef cn
...
...
@@ -84,11 +85,11 @@
#endif
#if cn != 3
#define storedst(val) *(__global dstT*)(dstptr + dst_index) = val
#define storedst2(val) *(__global dstT*)(dstptr2 + dst_index2) = val
#define storedst(val) *(__global dstT
*)(dstptr + dst_index) = val
#define storedst2(val) *(__global dstT
*)(dstptr2 + dst_index2) = val
#else
#define storedst(val) vstore3(val, 0, (__global dstT_C1*)(dstptr + dst_index))
#define storedst2(val) vstore3(val, 0, (__global dstT_C1*)(dstptr2 + dst_index2))
#define storedst(val) vstore3(val, 0, (__global dstT_C1
*)(dstptr + dst_index))
#define storedst2(val) vstore3(val, 0, (__global dstT_C1
*)(dstptr2 + dst_index2))
#endif
#define noconvert
...
...
@@ -97,19 +98,27 @@
#ifndef srcT1
#define srcT1 dstT
#endif
#ifndef srcT1_C1
#define srcT1_C1 dstT_C1
#endif
#ifndef srcT2
#define srcT2 dstT
#endif
#ifndef srcT2_C1
#define srcT2_C1 dstT_C1
#endif
#define workT dstT
#if cn != 3
#define srcelem1 *(__global srcT1*)(srcptr1 + src1_index)
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index)
#define srcelem1 *(__global srcT1
*)(srcptr1 + src1_index)
#define srcelem2 *(__global srcT2
*)(srcptr2 + src2_index)
#else
#define srcelem1 vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index))
#define srcelem2 vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index))
#define srcelem1 vload3(0, (__global srcT1_C1
*)(srcptr1 + src1_index))
#define srcelem2 vload3(0, (__global srcT2_C1
*)(srcptr2 + src2_index))
#endif
#ifndef convertToDT
#define convertToDT noconvert
...
...
@@ -121,11 +130,11 @@
#define convertToWT2 convertToWT1
#endif
#if cn != 3
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
#define srcelem1 convertToWT1(*(__global srcT1
*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(__global srcT2
*)(srcptr2 + src2_index))
#else
#define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1*)(srcptr1 + src1_index)))
#define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1*)(srcptr2 + src2_index)))
#define srcelem1 convertToWT1(vload3(0, (__global srcT1_C1
*)(srcptr1 + src1_index)))
#define srcelem2 convertToWT2(vload3(0, (__global srcT2_C1
*)(srcptr2 + src2_index)))
#endif
#endif
...
...
@@ -224,7 +233,11 @@
#elif defined OP_ADDW
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
#define PROCESS_ELEM storedst(convertToDT(srcelem1*alpha + srcelem2*beta + gamma))
#if wdepth <= 4
#define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, alpha, mad24(srcelem2, beta, gamma))))
#else
#define PROCESS_ELEM storedst(convertToDT(mad(srcelem1, alpha, mad(srcelem2, beta, gamma))))
#endif
#elif defined OP_MAG
#define PROCESS_ELEM storedst(hypot(srcelem1, srcelem2))
...
...
@@ -274,16 +287,31 @@
#elif defined OP_CONVERT_SCALE_ABS
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta
#if wdepth <= 4
#define PROCESS_ELEM \
workT value =
srcelem1 * alpha + beta
; \
workT value =
mad24(srcelem1, alpha, beta)
; \
storedst(convertToDT(value >= 0 ? value : -value))
#else
#define PROCESS_ELEM \
workT value = mad(srcelem1, alpha, beta); \
storedst(convertToDT(value >= 0 ? value : -value))
#endif
#elif defined OP_SCALE_ADD
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha
#define PROCESS_ELEM storedst(convertToDT(srcelem1 * alpha + srcelem2))
#if wdepth <= 4
#define PROCESS_ELEM storedst(convertToDT(mad24(srcelem1, alpha, srcelem2)))
#else
#define PROCESS_ELEM storedst(convertToDT(mad(srcelem1, alpha, srcelem2)))
#endif
#elif defined OP_CTP_AD || defined OP_CTP_AR
#if depth <= 5
#define CV_EPSILON FLT_EPSILON
#else
#define CV_EPSILON DBL_EPSILON
#endif
#ifdef OP_CTP_AD
#define TO_DEGREE cartToPolar *= (180 / CV_PI);
#elif defined OP_CTP_AR
...
...
@@ -296,7 +324,7 @@
dstT tmp = y >= 0 ? 0 : CV_PI * 2; \
tmp = x < 0 ? CV_PI : tmp; \
dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
dstT cartToPolar = y2 <= x2 ? x * y /
(x2 + 0.28f * y2 + CV_EPSILON) + tmp : (tmp1 - x * y / (y2 + 0.28f * x
2 + CV_EPSILON)); \
dstT cartToPolar = y2 <= x2 ? x * y /
mad((dstT)(0.28f), y2, x2 + CV_EPSILON) + tmp : (tmp1 - x * y / mad((dstT)(0.28f), x2, y
2 + CV_EPSILON)); \
TO_DEGREE \
storedst(magnitude); \
storedst2(cartToPolar)
...
...
@@ -331,7 +359,7 @@
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
#undef EXTRA_INDEX
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2,
x*(int)sizeof(dstT_C1)*cn + dstoffset2
)
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2,
mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2)
)
#endif
#if defined UNARY_OP || defined MASK_UNARY_OP
...
...
@@ -355,9 +383,9 @@
#if defined BINARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
__global uchar* dstptr, int dststep, int dstoffset,
__kernel void KF(__global const uchar
* srcptr1, int srcstep1, int srcoffset1,
__global const uchar
* srcptr2, int srcstep2, int srcoffset2,
__global uchar
* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
...
...
@@ -365,11 +393,11 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
if (x < cols && y < rows)
{
int src1_index = mad24(y, srcstep1,
x*(int)sizeof(srcT1_C1)*cn + srcoffset1
);
int src1_index = mad24(y, srcstep1,
mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1)
);
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
int src2_index = mad24(y, srcstep2,
x*(int)sizeof(srcT2_C1)*cn + srcoffset2
);
int src2_index = mad24(y, srcstep2,
mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2)
);
#endif
int dst_index = mad24(y, dststep,
x*(int)sizeof(dstT_C1)*cn + dstoffset
);
int dst_index = mad24(y, dststep,
mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset)
);
EXTRA_INDEX;
PROCESS_ELEM;
...
...
@@ -378,10 +406,10 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
#elif defined MASK_BINARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global const uchar* srcptr2, int srcstep2, int srcoffset2,
__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
__kernel void KF(__global const uchar
* srcptr1, int srcstep1, int srcoffset1,
__global const uchar
* srcptr2, int srcstep2, int srcoffset2,
__global const uchar
* mask, int maskstep, int maskoffset,
__global uchar
* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
...
...
@@ -392,9 +420,9 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1,
x*(int)sizeof(srcT1_C1)*cn + srcoffset1
);
int src2_index = mad24(y, srcstep2,
x*(int)sizeof(srcT2_C1)*cn + srcoffset2
);
int dst_index = mad24(y, dststep,
x*(int)sizeof(dstT_C1)*cn + dstoffset
);
int src1_index = mad24(y, srcstep1,
mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1)
);
int src2_index = mad24(y, srcstep2,
mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2)
);
int dst_index = mad24(y, dststep,
mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset)
);
PROCESS_ELEM;
}
...
...
@@ -403,8 +431,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
#elif defined UNARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global uchar* dstptr, int dststep, int dstoffset,
__kernel void KF(__global const uchar
* srcptr1, int srcstep1, int srcoffset1,
__global uchar
* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
...
...
@@ -412,8 +440,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
if (x < cols && y < rows)
{
int src1_index = mad24(y, srcstep1,
x*(int)sizeof(srcT1_C1)*cn + srcoffset1
);
int dst_index = mad24(y, dststep,
x*(int)sizeof(dstT_C1)*cn + dstoffset
);
int src1_index = mad24(y, srcstep1,
mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1)
);
int dst_index = mad24(y, dststep,
mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset)
);
PROCESS_ELEM;
}
...
...
@@ -421,9 +449,9 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
#elif defined MASK_UNARY_OP
__kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
__kernel void KF(__global const uchar
* srcptr1, int srcstep1, int srcoffset1,
__global const uchar
* mask, int maskstep, int maskoffset,
__global uchar
* dstptr, int dststep, int dstoffset,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
...
...
@@ -434,8 +462,8 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1,
x*(int)sizeof(srcT1_C1)*cn + srcoffset1
);
int dst_index = mad24(y, dststep,
x*(int)sizeof(dstT_C1)*cn + dstoffset
);
int src1_index = mad24(y, srcstep1,
mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1)
);
int dst_index = mad24(y, dststep,
mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset)
);
PROCESS_ELEM;
}
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/convert.cl
View file @
c00f0faf
...
...
@@ -53,19 +53,19 @@
__kernel
void
convertTo
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
float
alpha,
float
beta
)
WT
alpha,
WT
beta
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
srcT
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
(
int
)
sizeof
(
dstT
)
)
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
srcT
)
,
src_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
dstT
)
,
dst_offset
)
)
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
dst_index
)
;
dst[0]
=
convertToDT
(
src[0]
*
alpha
+
beta
)
;
dst[0]
=
convertToDT
(
mad
(
convertToWT
(
src[0]
)
,
alpha,
beta
)
)
;
}
}
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/copymakeborder.cl
View file @
c00f0faf
...
...
@@ -47,9 +47,9 @@
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
v
)
\
{
\
x
=
max
(
min
(
x,
src_cols
-
1
)
,
0
)
; \
y
=
max
(
min
(
y,
src_rows
-
1
)
,
0
)
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
))
; \
x
=
clamp
(
x,
0
,
src_cols
-
1
)
; \
y
=
clamp
(
y,
0
,
src_rows
-
1
)
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)
))
; \
}
#
elif
defined
BORDER_WRAP
#
define
EXTRAPOLATE
(
x,
y,
v
)
\
...
...
@@ -63,7 +63,7 @@
y
-=
((
y
-
src_rows
+
1
)
/
src_rows
)
*
src_rows
; \
if
(
y
>=
src_rows
)
\
y
%=
src_rows
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
))
; \
v
=
*
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)
))
; \
}
#
elif
defined
(
BORDER_REFLECT
)
|
| defined(BORDER_REFLECT_101)
#ifdef BORDER_REFLECT
...
...
@@ -97,7 +97,7 @@
y = src_rows - 1 - (y - src_rows) - delta; \
} \
while (y >= src_rows || y < 0); \
v = *(__global const T *)(srcptr + mad24(y, src_step,
x * (int)sizeof(T) + src_offset
)); \
v = *(__global const T *)(srcptr + mad24(y, src_step,
mad24(x, (int)sizeof(T), src_offset)
)); \
}
#else
#error No extrapolation method
...
...
@@ -117,14 +117,14 @@ __kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int sr
int
src_x
=
x
-
left
;
int
src_y
=
y
-
top
;
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
T
)
+
dst_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst_offset
)
)
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
if
(
NEED_EXTRAPOLATION
(
src_x,
src_y
))
EXTRAPOLATE
(
src_x,
src_y,
dst[0]
)
else
{
int
src_index
=
mad24
(
src_y,
src_step,
src_x
*
(
int
)
sizeof
(
T
)
+
src_offset
)
;
int
src_index
=
mad24
(
src_y,
src_step,
mad24
(
src_x,
(
int
)
sizeof
(
T
)
,
src_offset
)
)
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
;
dst[0]
=
src[0]
;
}
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/copyset.cl
View file @
c00f0faf
...
...
@@ -44,8 +44,8 @@
#
ifdef
COPY_TO_MASK
#
define
DEFINE_DATA
\
int
src_index
=
mad24
(
y,
src_step,
x*
(
int
)
sizeof
(
T
)
*scn
+
src_offset
)
; \
int
dst_index
=
mad24
(
y,
dst_step,
x*
(
int
)
sizeof
(
T
)
*scn
+
dst_offset
)
; \
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn,
src_offset
)
)
; \
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn,
dst_offset
)
)
; \
\
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
src_index
)
; \
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
...
...
@@ -60,7 +60,7 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
x
*
mcn
+
mask_offset
)
;
int
mask_index
=
mad24
(
y,
mask_step,
mad24
(
x,
mcn,
mask_offset
)
)
;
__global
const
uchar
*
mask
=
(
__global
const
uchar
*
)(
maskptr
+
mask_index
)
;
#
if
mcn
==
1
...
...
@@ -93,10 +93,10 @@ __kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_of
#
if
cn
!=
3
#
define
value
value_
#
define
storedst
(
val
)
*
(
__global
dstT*
)(
dstptr
+
dst_index
)
=
val
#
define
storedst
(
val
)
*
(
__global
dstT
*
)(
dstptr
+
dst_index
)
=
val
#
else
#
define
value
(
dstT
)(
value_.x,
value_.y,
value_.z
)
#
define
storedst
(
val
)
vstore3
(
val,
0
,
(
__global
dstT1*
)(
dstptr
+
dst_index
))
#
define
storedst
(
val
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
dstptr
+
dst_index
))
#
endif
__kernel
void
setMask
(
__global
const
uchar*
mask,
int
maskstep,
int
maskoffset,
...
...
@@ -111,7 +111,7 @@ __kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
int
mask_index
=
mad24
(
y,
maskstep,
x
+
maskoffset
)
;
if
(
mask[mask_index]
)
{
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT1
)
*cn
+
dstoffset
)
;
int
dst_index
=
mad24
(
y,
dststep,
mad24
(
x,
(
int
)
sizeof
(
dstT1
)
*
cn,
dstoffset
)
)
;
storedst
(
value
)
;
}
}
...
...
@@ -125,7 +125,7 @@ __kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
if
(
x
<
cols
&&
y
<
rows
)
{
int
dst_index
=
mad24
(
y,
dststep,
x*
(
int
)
sizeof
(
dstT1
)
*cn
+
dstoffset
)
;
int
dst_index
=
mad24
(
y,
dststep,
mad24
(
x,
(
int
)
sizeof
(
dstT1
)
*
cn,
dstoffset
)
)
;
storedst
(
value
)
;
}
}
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/flip.cl
View file @
c00f0faf
...
...
@@ -50,11 +50,11 @@ __kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int sr
if
(
x
<
cols
&&
y
<
thread_rows
)
{
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
srcoffset
+
x
*
sizeoftype
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
srcoffset
+
x
*
sizeoftype
))
;
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
dstoffset
+
x
*
sizeoftype
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
dstoffset
+
x
*
sizeoftype
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
dst0[0]
=
src1[0]
;
dst1[0]
=
src0[0]
;
...
...
@@ -70,11 +70,12 @@ __kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, i
if
(
x
<
cols
&&
y
<
thread_rows
)
{
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
x*sizeoftype
+
srcoffset
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
(
cols
-
x
-
1
)
*sizeoftype
+
srcoffset
))
;
int
x1
=
cols
-
x
-
1
;
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
rows
-
y
-
1
,
srcstep,
mad24
(
x1,
sizeoftype,
srcoffset
)))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
(
cols
-
x
-
1
)
*sizeoftype
+
dstoffset
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
x
*
sizeoftype
+
dstoffset
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
rows
-
y
-
1
,
dststep,
mad24
(
x1,
sizeoftype,
dstoffset
)
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
dst0[0]
=
src0[0]
;
dst1[0]
=
src1[0]
;
...
...
@@ -90,11 +91,12 @@ __kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int sr
if
(
x
<
thread_cols
&&
y
<
rows
)
{
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
x
*
sizeoftype
+
srcoffset
))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
(
cols
-
x
-
1
)
*sizeoftype
+
srcoffset
))
;
int
x1
=
cols
-
x
-
1
;
__global
const
type*
src0
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x,
sizeoftype,
srcoffset
)))
;
__global
const
type*
src1
=
(
__global
const
type*
)(
srcptr
+
mad24
(
y,
srcstep,
mad24
(
x1,
sizeoftype,
srcoffset
)))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
(
cols
-
x
-
1
)
*sizeoftype
+
dstoffset
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
x
*
sizeoftype
+
dstoffset
))
;
__global
type*
dst0
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x1,
sizeoftype,
dstoffset
)
))
;
__global
type*
dst1
=
(
__global
type*
)(
dstptr
+
mad24
(
y,
dststep,
mad24
(
x,
sizeoftype,
dstoffset
)
))
;
dst1[0]
=
src1[0]
;
dst0[0]
=
src0[0]
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/inrange.cl
View file @
c00f0faf
...
...
@@ -64,23 +64,22 @@ __kernel void inrange(__global const uchar * src1ptr, int src1_step, int src1_of
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
x*
(
int
)
sizeof
(
T
)
*cn
+
src1_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src1_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
__global
uchar
*
dst
=
dstptr
+
dst_index
;
#
ifndef
HAVE_SCALAR
int
src2_index
=
mad24
(
y,
src2_step,
x*
(
int
)
sizeof
(
T
)
*cn
+
src2_offset
)
;
int
src3_index
=
mad24
(
y,
src3_step,
x*
(
int
)
sizeof
(
T
)
*cn
+
src3_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src2_offset
)
)
;
int
src3_index
=
mad24
(
y,
src3_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
src3_offset
)
)
;
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
__global
const
T
*
src3
=
(
__global
const
T
*
)(
src3ptr
+
src3_index
)
;
#
endif
dst[0]
=
255
;
#
pragma
unroll
for
(
int
c
=
0
; c < cn; ++c)
if
(
src2[c]
>
src1[c]
||
src3[c]
<
src1[c]
)
if
(
src2[c]
>
src1[c]
||
src3[c]
<
src1[c]
)
{
dst[0]
=
0
;
break
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/lut.cl
View file @
c00f0faf
...
...
@@ -34,14 +34,6 @@
//
//
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
__kernel
void
LUT
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
const
uchar
*
lutptr,
int
lut_step,
int
lut_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols
)
...
...
@@ -51,8 +43,8 @@ __kernel void LUT(__global const uchar * srcptr, int src_step, int src_offset,
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
srcT
)
*
dcn
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
(
int
)
sizeof
(
dstT
)
*
dcn
)
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
srcT
)
*
dcn,
src_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
dstT
)
*
dcn,
dst_offset
)
)
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
__global
const
dstT
*
lut
=
(
__global
const
dstT
*
)(
lutptr
+
lut_offset
)
;
...
...
@@ -65,7 +57,7 @@ __kernel void LUT(__global const uchar * srcptr, int src_step, int src_offset,
#
else
#
pragma
unroll
for
(
int
cn
=
0
; cn < dcn; ++cn)
dst[cn]
=
lut[
src[cn]
*
dcn
+
cn
]
;
dst[cn]
=
lut[
mad24
(
src[cn],
dcn,
cn
)
]
;
#
endif
}
}
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/mixchannels.cl
View file @
c00f0faf
...
...
@@ -46,9 +46,9 @@
#
define
DECLARE_OUTPUT_MAT
(
i
)
\
__global
uchar
*
dst##i##ptr,
int
dst##i##_step,
int
dst##i##_offset,
#
define
PROCESS_ELEM
(
i
)
\
int
src##i##_index
=
mad24
(
src##i##_step,
y,
x
*
(
int
)
sizeof
(
T
)
*
scn##i
+
src##i##_offset
)
; \
int
src##i##_index
=
mad24
(
src##i##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
scn##i,
src##i##_offset
)
)
; \
__global
const
T
*
src##i
=
(
__global
const
T
*
)(
src##i##ptr
+
src##i##_index
)
; \
int
dst##i##_index
=
mad24
(
dst##i##_step,
y,
x
*
(
int
)
sizeof
(
T
)
*
dcn##i
+
dst##i##_offset
)
; \
int
dst##i##_index
=
mad24
(
dst##i##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
dcn##i,
dst##i##_offset
)
)
; \
__global
T
*
dst##i
=
(
__global
T
*
)(
dst##i##ptr
+
dst##i##_index
)
; \
dst##i[0]
=
src##i[0]
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/mulspectrums.cl
View file @
c00f0faf
...
...
@@ -45,7 +45,7 @@
inline
float2
cmulf
(
float2
a,
float2
b
)
{
return
(
float2
)(
a.x
*
b.x
-
a.y
*
b.y,
a.x
*
b.y
+
a.y
*
b.x
)
;
return
(
float2
)(
mad
(
a.x,
b.x,
-
a.y
*
b.y
)
,
mad
(
a.x,
b.y,
a.y
*
b.x
)
)
;
}
inline
float2
conjf
(
float2
a
)
...
...
@@ -63,9 +63,9 @@ __kernel void mulAndScaleSpectrums(__global const uchar * src1ptr, int src1_step
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
x
*
(
int
)
sizeof
(
float2
)
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
*
(
int
)
sizeof
(
float2
)
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
float2
)
+
dst_offset
)
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src1_offset
)
)
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
src2_offset
)
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float2
)
,
dst_offset
)
)
;
float2
src0
=
*
(
__global
const
float2
*
)(
src1ptr
+
src1_index
)
;
float2
src1
=
*
(
__global
const
float2
*
)(
src2ptr
+
src2_index
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/reduce.cl
View file @
c00f0faf
...
...
@@ -58,20 +58,34 @@
#
define
EXTRA_PARAMS
#
endif
//
accumulative
reduction
stuff
#
if
defined
OP_SUM
|
| defined OP_SUM_ABS || defined OP_SUM_SQR || defined OP_DOT
#ifdef OP_DOT
#define FUNC(a, b, c) a += b * c
#if ddepth <= 4
#define FUNC(a, b, c) a = mad24(b, c, a)
#else
#define FUNC(a, b, c) a = mad(b, c, a)
#endif
#elif defined OP_SUM
#define FUNC(a, b) a += b
#elif defined OP_SUM_ABS
#define FUNC(a, b) a += b >= (dstT)(0) ? b : -b
#elif defined OP_SUM_SQR
#define FUNC(a, b) a += b * b
#if ddepth <= 4
#define FUNC(a, b) a = mad24(b, b, a)
#else
#define FUNC(a, b) a = mad(b, b, a)
#endif
#endif
#define DECLARE_LOCAL_MEM \
__local dstT localmem[WGS2_ALIGNED]
#define DEFINE_ACCUMULATOR \
dstT accumulator = (dstT)(0)
#ifdef HAVE_MASK
#define REDUCE_GLOBAL \
dstT temp = convertToDT(src[0]); \
...
...
@@ -80,7 +94,7 @@
FUNC(accumulator, temp)
#elif defined OP_DOT
#define REDUCE_GLOBAL \
int src2_index = mad24(id / cols, src2_step,
src2_offset + (id % cols) * (int)sizeof(srcT
)); \
int src2_index = mad24(id / cols, src2_step,
mad24(id % cols, (int)sizeof(srcT), src2_offset
)); \
__global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); \
dstT temp = convertToDT(src[0]), temp2 = convertToDT(src2[0]); \
FUNC(accumulator, temp, temp2)
...
...
@@ -89,6 +103,7 @@
dstT temp = convertToDT(src[0]); \
FUNC(accumulator, temp)
#endif
#define SET_LOCAL_1 \
localmem[lid] = accumulator
#define REDUCE_LOCAL_1 \
...
...
@@ -99,6 +114,7 @@
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
// countNonZero stuff
#elif defined OP_COUNT_NON_ZERO
#define dstT int
#define DECLARE_LOCAL_MEM \
...
...
@@ -118,6 +134,7 @@
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
// minMaxLoc stuff
#elif defined OP_MIN_MAX_LOC |
|
defined
OP_MIN_MAX_LOC_MASK
#
ifdef
DEPTH_0
...
...
@@ -179,8 +196,8 @@
#
define
REDUCE_LOCAL_1
\
srcT
oldmin
=
localmem_min[lid-WGS2_ALIGNED]
; \
srcT
oldmax
=
localmem_max[lid-WGS2_ALIGNED]
; \
localmem_min[lid
-
WGS2_ALIGNED]
=
min
(
minval,localmem_min[lid-WGS2_ALIGNED]
)
; \
localmem_max[lid
-
WGS2_ALIGNED]
=
max
(
maxval,localmem_max[lid-WGS2_ALIGNED]
)
; \
localmem_min[lid
-
WGS2_ALIGNED]
=
min
(
minval,
localmem_min[lid-WGS2_ALIGNED]
)
; \
localmem_max[lid
-
WGS2_ALIGNED]
=
max
(
maxval,
localmem_max[lid-WGS2_ALIGNED]
)
; \
srcT
minv
=
localmem_min[lid
-
WGS2_ALIGNED],
maxv
=
localmem_max[lid
-
WGS2_ALIGNED]
; \
localmem_minloc[lid
-
WGS2_ALIGNED]
=
(
minv
==
minval
)
?
(
minv
==
oldmin
)
?
\
min
(
minloc,
localmem_minloc[lid-WGS2_ALIGNED]
)
:
minloc
:
localmem_minloc[lid-WGS2_ALIGNED]
; \
...
...
@@ -233,15 +250,17 @@
#
else
#
error
"No operation"
#
endif
#
endif
//
end
of
minMaxLoc
stuff
#
ifdef
OP_MIN_MAX_LOC
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
dstptr2,
__global
int
*
dstlocptr,
__global
int
*
dstlocptr2
#
elif
defined
OP_MIN_MAX_LOC_MASK
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
dstptr2,
__global
int
*
dstlocptr,
__global
int
*
dstlocptr2,
\
__global
const
uchar
*
maskptr,
int
mask_step,
int
mask_offset
#
elif
defined
OP_DOT
#
undef
EXTRA_PARAMS
#
define
EXTRA_PARAMS
,
__global
uchar
*
src2ptr,
int
src2_step,
int
src2_offset
...
...
@@ -259,7 +278,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
for
(
int
grain
=
groupnum
*
WGS
; id < total; id += grain)
{
int
src_index
=
mad24
(
id
/
cols,
src_step,
src_offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
srcT
))
;
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
(
int
)
sizeof
(
srcT
)
,
src_offset
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
REDUCE_GLOBAL
;
}
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/reduce2.cl
View file @
c00f0faf
...
...
@@ -98,7 +98,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
int
x
=
get_global_id
(
0
)
;
if
(
x
<
cols
)
{
int
src_index
=
x
*
(
int
)
sizeof
(
srcT
)
*
cn
+
src_offset
;
int
src_index
=
mad24
(
x,
(
int
)
sizeof
(
srcT
)
*
cn,
src_offset
)
;
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
dst_offset
)
+
x
*
cn
;
dstT
tmp[cn]
=
{
INIT_VALUE
}
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/set_identity.cl
View file @
c00f0faf
...
...
@@ -51,7 +51,7 @@ __kernel void setIdentity(__global uchar * srcptr, int src_step, int src_offset,
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
T
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
__global
T
*
src
=
(
__global
T
*
)(
srcptr
+
src_index
)
;
src[0]
=
x
==
y
?
scalar
:
(
T
)(
0
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/split_merge.cl
View file @
c00f0faf
...
...
@@ -45,7 +45,7 @@
#
define
DECLARE_SRC_PARAM
(
index
)
__global
const
uchar
*
src##index##ptr,
int
src##index##_step,
int
src##index##_offset,
#
define
DECLARE_DATA
(
index
)
__global
const
T
*
src##index
=
\
(
__global
T
*
)(
src##index##ptr
+
mad24
(
src##index##_step,
y,
x
*
(
int
)
sizeof
(
T
)
+
src##index##_offset
))
;
(
__global
T
*
)(
src##index##ptr
+
mad24
(
src##index##_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src##index##_offset
)
))
;
#
define
PROCESS_ELEM
(
index
)
dst[index]
=
src##index[0]
;
__kernel
void
merge
(
DECLARE_SRC_PARAMS_N
...
...
@@ -58,7 +58,7 @@ __kernel void merge(DECLARE_SRC_PARAMS_N
if
(
x
<
cols
&&
y
<
rows
)
{
DECLARE_DATA_N
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
mad24
(
dst_step,
y,
x
*
(
int
)
sizeof
(
T
)
*
cn
+
dst_offset
))
;
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
mad24
(
dst_step,
y,
mad24
(
x,
(
int
)
sizeof
(
T
)
*
cn,
dst_offset
)
))
;
PROCESS_ELEMS_N
}
}
...
...
@@ -67,7 +67,7 @@ __kernel void merge(DECLARE_SRC_PARAMS_N
#
define
DECLARE_DST_PARAM
(
index
)
,
__global
uchar
*
dst##index##ptr,
int
dst##index##_step,
int
dst##index##_offset
#
define
DECLARE_DATA
(
index
)
__global
T
*
dst##index
=
\
(
__global
T
*
)(
dst##index##ptr
+
mad24
(
y,
dst##index##_step,
x
*
(
int
)
sizeof
(
T
)
+
dst##index##_offset
))
;
(
__global
T
*
)(
dst##index##ptr
+
mad24
(
y,
dst##index##_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst##index##_offset
)
))
;
#
define
PROCESS_ELEM
(
index
)
dst##index[0]
=
src[index]
;
__kernel
void
split
(
__global
uchar*
srcptr,
int
src_step,
int
src_offset,
int
rows,
int
cols
DECLARE_DST_PARAMS
)
...
...
@@ -78,7 +78,7 @@ __kernel void split(__global uchar* srcptr, int src_step, int src_offset, int ro
if
(
x
<
cols
&&
y
<
rows
)
{
DECLARE_DATA_N
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
x
*
cn
*
(
int
)
sizeof
(
T
)
+
src_offset
))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
cn
*
(
int
)
sizeof
(
T
)
,
src_offset
)
))
;
PROCESS_ELEMS_N
}
}
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/opencl/transpose.cl
View file @
c00f0faf
...
...
@@ -60,7 +60,7 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
}
else
{
int
bid
=
gp_x
+
gs_x
*
gp_y
;
int
bid
=
mad24
(
gs_x,
gp_y,
gp_x
)
;
groupId_y
=
bid
%
gs_y
;
groupId_x
=
((
bid
/
gs_y
)
+
groupId_y
)
%
gs_x
;
}
...
...
@@ -68,23 +68,23 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
int
lx
=
get_local_id
(
0
)
;
int
ly
=
get_local_id
(
1
)
;
int
x
=
groupId_x
*
TILE_DIM
+
lx
;
int
y
=
groupId_y
*
TILE_DIM
+
ly
;
int
x
=
mad24
(
groupId_x,
TILE_DIM,
lx
)
;
int
y
=
mad24
(
groupId_y,
TILE_DIM,
ly
)
;
int
x_index
=
groupId_y
*
TILE_DIM
+
lx
;
int
y_index
=
groupId_x
*
TILE_DIM
+
ly
;
int
x_index
=
mad24
(
groupId_y,
TILE_DIM,
lx
)
;
int
y_index
=
mad24
(
groupId_x,
TILE_DIM,
ly
)
;
__local
T
title[TILE_DIM
*
LDS_STEP]
;
if
(
x
<
src_cols
&&
y
<
src_rows
)
{
int
index_src
=
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
)
;
int
index_src
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)
)
;
for
(
int
i
=
0
; i < TILE_DIM; i += BLOCK_ROWS)
if
(
y
+
i
<
src_rows
)
{
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
index_src
)
;
title[
(
ly
+
i
)
*
LDS_STEP
+
lx
]
=
src[0]
;
title[
mad24
(
ly
+
i,
LDS_STEP,
lx
)
]
=
src[0]
;
index_src
=
mad24
(
BLOCK_ROWS,
src_step,
index_src
)
;
}
}
...
...
@@ -92,13 +92,13 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
if
(
x_index
<
src_rows
&&
y_index
<
src_cols
)
{
int
index_dst
=
mad24
(
y_index,
dst_step,
x_index
*
(
int
)
sizeof
(
T
)
+
dst_offset
)
;
int
index_dst
=
mad24
(
y_index,
dst_step,
mad24
(
x_index,
(
int
)
sizeof
(
T
)
,
dst_offset
)
)
;
for
(
int
i
=
0
; i < TILE_DIM; i += BLOCK_ROWS)
if
((
y_index
+
i
)
<
src_cols
)
{
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
index_dst
)
;
dst[0]
=
title[
lx
*
LDS_STEP
+
ly
+
i
]
;
dst[0]
=
title[
mad24
(
lx,
LDS_STEP,
ly
+
i
)
]
;
index_dst
=
mad24
(
BLOCK_ROWS,
dst_step,
index_dst
)
;
}
}
...
...
@@ -111,8 +111,8 @@ __kernel void transpose_inplace(__global uchar * srcptr, int src_step, int src_o
if
(
y
<
src_rows
&&
x
<
y
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
T
))
;
int
dst_index
=
mad24
(
x,
src_step,
src_offset
+
y
*
(
int
)
sizeof
(
T
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
int
dst_index
=
mad24
(
x,
src_step,
mad24
(
y,
(
int
)
sizeof
(
T
)
,
src_offset
))
;
__global
T
*
src
=
(
__global
T
*
)(
srcptr
+
src_index
)
;
__global
T
*
dst
=
(
__global
T
*
)(
srcptr
+
dst_index
)
;
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/stat.cpp
View file @
c00f0faf
...
...
@@ -494,8 +494,8 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
format
(
"-D srcT=%s -D dstT=%s -D
ddepth=%d -D
convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ddepth
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
opMap
[
sum_op
],
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveMask
?
" -D HAVE_MASK"
:
""
));
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/umatrix.cpp
View file @
c00f0faf
...
...
@@ -719,10 +719,14 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
if
(
dims
<=
2
&&
cn
&&
_dst
.
isUMat
()
&&
ocl
::
useOpenCL
()
&&
((
needDouble
&&
doubleSupport
)
||
!
needDouble
)
)
{
char
cvt
[
40
];
int
wdepth
=
std
::
max
(
CV_32F
,
sdepth
);
char
cvt
[
2
][
40
];
ocl
::
Kernel
k
(
"convertTo"
,
ocl
::
core
::
convert_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
convertTypeStr
(
CV_32F
,
ddepth
,
1
,
cvt
),
format
(
"-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
1
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
1
,
cvt
[
1
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
!
k
.
empty
())
{
...
...
@@ -731,7 +735,13 @@ void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) con
UMat
dst
=
_dst
.
getUMat
();
float
alphaf
=
(
float
)
alpha
,
betaf
=
(
float
)
beta
;
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
),
alphaf
,
betaf
);
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
);
if
(
wdepth
==
CV_32F
)
k
.
args
(
srcarg
,
dstarg
,
alphaf
,
betaf
);
else
k
.
args
(
srcarg
,
dstarg
,
alpha
,
beta
);
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
,
dst
.
rows
};
if
(
k
.
run
(
2
,
globalsize
,
NULL
,
false
))
...
...
@@ -838,8 +848,8 @@ static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s -D OP_DOT -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
1
,
cvt
),
format
(
"-D srcT=%s -D dstT=%s -D
ddepth=%d -D
convertToDT=%s -D OP_DOT -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
ddepth
),
ddepth
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
1
,
cvt
),
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment