Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
15738bf7
Commit
15738bf7
authored
May 14, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
multiple rows in KF kernel
parent
5ee398bf
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
126 additions
and
90 deletions
+126
-90
arithm.cpp
modules/core/src/arithm.cpp
+16
-13
convert.cpp
modules/core/src/convert.cpp
+7
-5
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+32
-22
matmul.cpp
modules/core/src/matmul.cpp
+7
-5
arithm.cl
modules/core/src/opencl/arithm.cl
+49
-33
stat.cpp
modules/core/src/stat.cpp
+15
-12
No files found.
modules/core/src/arithm.cpp
View file @
15738bf7
...
...
@@ -1008,7 +1008,8 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int
srcdepth
=
CV_MAT_DEPTH
(
srctype
);
int
cn
=
CV_MAT_CN
(
srctype
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
d
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
oclop
<
0
||
((
haveMask
||
haveScalar
)
&&
cn
>
4
)
||
(
!
doubleSupport
&&
srcdepth
==
CV_64F
&&
!
bitwise
))
return
false
;
...
...
@@ -1016,8 +1017,9 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
char
opts
[
1024
];
int
kercn
=
haveMask
||
haveScalar
?
cn
:
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
);
int
scalarcn
=
kercn
==
3
?
4
:
kercn
;
int
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
sprintf
(
opts
,
"-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d"
,
sprintf
(
opts
,
"-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d
-D rowsPerWI=%d
"
,
haveMask
?
"MASK_"
:
""
,
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
,
oclop2str
[
oclop
],
bitwise
?
ocl
::
memopTypeToStr
(
CV_MAKETYPE
(
srcdepth
,
kercn
))
:
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
kercn
)),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
...
...
@@ -1025,7 +1027,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
1
)),
bitwise
?
ocl
::
memopTypeToStr
(
CV_MAKETYPE
(
srcdepth
,
scalarcn
))
:
ocl
::
typeToStr
(
CV_MAKETYPE
(
srcdepth
,
scalarcn
)),
kercn
);
kercn
,
rowsPerWI
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
opts
);
if
(
k
.
empty
())
...
...
@@ -1068,7 +1070,7 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
k
.
args
(
src1arg
,
src2arg
,
maskarg
,
dstarg
);
}
size_t
globalsize
[]
=
{
src1
.
cols
*
cn
/
kercn
,
src1
.
rows
};
size_t
globalsize
[]
=
{
src1
.
cols
*
cn
/
kercn
,
(
src1
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
0
,
false
);
}
...
...
@@ -1371,7 +1373,8 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
void
*
usrdata
,
int
oclop
,
bool
haveScalar
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
d
=
ocl
::
Device
::
getDefault
();
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
int
type1
=
_src1
.
type
(),
depth1
=
CV_MAT_DEPTH
(
type1
),
cn
=
CV_MAT_CN
(
type1
);
bool
haveMask
=
!
_mask
.
empty
();
...
...
@@ -1388,12 +1391,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
return
false
;
int
kercn
=
haveMask
||
haveScalar
?
cn
:
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
);
int
scalarcn
=
kercn
==
3
?
4
:
kercn
;
int
scalarcn
=
kercn
==
3
?
4
:
kercn
,
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
char
cvtstr
[
4
][
32
],
opts
[
1024
];
sprintf
(
opts
,
"-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
"-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d"
,
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d
-D rowsPerWI=%d
"
,
(
haveMask
?
"MASK_"
:
""
),
(
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
),
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth1
,
kercn
)),
ocl
::
typeToStr
(
depth1
),
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth2
,
kercn
)),
...
...
@@ -1404,7 +1407,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl
::
convertTypeStr
(
depth1
,
wdepth
,
kercn
,
cvtstr
[
0
]),
ocl
::
convertTypeStr
(
depth2
,
wdepth
,
kercn
,
cvtstr
[
1
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
kercn
,
cvtstr
[
2
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
kercn
);
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
kercn
,
rowsPerWI
);
size_t
usrdata_esz
=
CV_ELEM_SIZE
(
wdepth
);
const
uchar
*
usrdata_p
=
(
const
uchar
*
)
usrdata
;
...
...
@@ -1478,7 +1481,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
k
.
args
(
src1arg
,
src2arg
,
maskarg
,
dstarg
);
}
size_t
globalsize
[]
=
{
src1
.
cols
*
cn
/
kercn
,
src1
.
rows
};
size_t
globalsize
[]
=
{
src1
.
cols
*
cn
/
kercn
,
(
src1
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -2764,7 +2767,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
if
(
!
haveScalar
&&
(
!
_src1
.
sameSize
(
_src2
)
||
type1
!=
type2
))
return
false
;
int
kercn
=
haveScalar
?
cn
:
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
);
int
kercn
=
haveScalar
?
cn
:
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
)
,
rowsPerWI
=
dev
.
isIntel
()
?
4
:
1
;
// Workaround for bug with "?:" operator in AMD OpenCL compiler
if
(
depth1
>=
CV_16U
)
kercn
=
1
;
...
...
@@ -2775,14 +2778,14 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
String
opts
=
format
(
"-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
" -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
" -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s%s"
,
" -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s
-D rowsPerWI=%d
%s"
,
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth1
,
kercn
)),
ocl
::
typeToStr
(
CV_8UC
(
kercn
)),
kercn
,
ocl
::
convertTypeStr
(
depth1
,
CV_8U
,
kercn
,
cvt
),
operationMap
[
op
],
ocl
::
typeToStr
(
depth1
),
ocl
::
typeToStr
(
depth1
),
ocl
::
typeToStr
(
CV_8U
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth1
,
scalarcn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth1
,
scalarcn
)),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
opts
);
...
...
@@ -2839,7 +2842,7 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
,
kercn
));
}
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
/
kercn
,
dst
.
rows
};
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
/
kercn
,
(
dst
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/convert.cpp
View file @
15738bf7
...
...
@@ -1357,9 +1357,10 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
static
bool
ocl_convertScaleAbs
(
InputArray
_src
,
OutputArray
_dst
,
double
alpha
,
double
beta
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
kercn
=
ocl
::
predictOptimalVectorWidth
(
_src
,
_dst
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
()
.
doubleFPConfig
()
>
0
;
kercn
=
ocl
::
predictOptimalVectorWidth
(
_src
,
_dst
)
,
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
depth
==
CV_64F
)
return
false
;
...
...
@@ -1368,13 +1369,14 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
int
wdepth
=
std
::
max
(
depth
,
CV_32F
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s -D workT1=%s%s"
,
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s"
" -D workT1=%s -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
CV_8UC
(
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
kercn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
CV_8U
,
kercn
,
cvt
[
1
]),
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
wdepth
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -1391,7 +1393,7 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
else
if
(
wdepth
==
CV_64F
)
k
.
args
(
srcarg
,
dstarg
,
alpha
,
beta
);
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
/
kercn
,
src
.
rows
};
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
/
kercn
,
(
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/mathfuncs.cpp
View file @
15738bf7
...
...
@@ -65,13 +65,15 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
int
kercn
=
oclop
==
OCL_OP_PHASE_DEGREES
||
oclop
==
OCL_OP_PHASE_RADIANS
?
1
:
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
);
bool
double_support
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
d
=
ocl
::
Device
::
getDefault
();
bool
double_support
=
d
.
doubleFPConfig
()
>
0
;
if
(
!
double_support
&&
depth
==
CV_64F
)
return
false
;
int
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D %s -D %s -D dstT=%s%s"
,
_src2
.
empty
()
?
"UNARY_OP"
:
"BINARY_OP"
,
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
format
(
"-D %s -D %s -D dstT=%s
-D rowsPerWI=%d
%s"
,
_src2
.
empty
()
?
"UNARY_OP"
:
"BINARY_OP"
,
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
rowsPerWI
,
double_support
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -89,7 +91,7 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in
else
k
.
args
(
src1arg
,
src2arg
,
dstarg
);
size_t
globalsize
[]
=
{
src1
.
cols
*
cn
/
kercn
,
src1
.
rows
};
size_t
globalsize
[]
=
{
src1
.
cols
*
cn
/
kercn
,
(
src1
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
0
,
false
);
}
...
...
@@ -524,8 +526,10 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
static
bool
ocl_cartToPolar
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst1
,
OutputArray
_dst2
,
bool
angleInDegrees
)
{
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
!
(
_src1
.
dims
()
<=
2
&&
_src2
.
dims
()
<=
2
&&
(
depth
==
CV_32F
||
depth
==
CV_64F
)
&&
type
==
_src2
.
type
())
||
...
...
@@ -533,9 +537,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D dstT=%s -D depth=%d -D OP_CTP_%s%s"
,
format
(
"-D BINARY_OP -D dstT=%s -D depth=%d -D
rowsPerWI=%d -D
OP_CTP_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
depth
,
rowsPerWI
,
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -553,7 +557,7 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
ocl
::
KernelArg
::
WriteOnly
(
dst1
,
cn
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst2
));
size_t
globalsize
[
2
]
=
{
dst1
.
cols
*
cn
,
dst1
.
rows
};
size_t
globalsize
[
2
]
=
{
dst1
.
cols
*
cn
,
(
dst1
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -713,16 +717,18 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
static
bool
ocl_polarToCart
(
InputArray
_mag
,
InputArray
_angle
,
OutputArray
_dst1
,
OutputArray
_dst2
,
bool
angleInDegrees
)
{
int
type
=
_angle
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_angle
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
depth
==
CV_64F
)
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D dstT=%s -D depth=%d -D BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
format
(
"-D dstT=%s -D
rowsPerWI=%d -D
depth=%d -D BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
rowsPerWI
,
depth
,
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -738,7 +744,7 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mag
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
angle
),
ocl
::
KernelArg
::
WriteOnly
(
dst1
,
cn
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst2
));
size_t
globalsize
[
2
]
=
{
dst1
.
cols
*
cn
,
dst1
.
rows
};
size_t
globalsize
[
2
]
=
{
dst1
.
cols
*
cn
,
(
dst1
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -2103,8 +2109,10 @@ static IPowFunc ipowTab[] =
static
bool
ocl_pow
(
InputArray
_src
,
double
power
,
OutputArray
_dst
,
bool
is_ipower
,
int
ipower
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
...
...
@@ -2113,8 +2121,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
const
char
*
const
op
=
issqrt
?
"OP_SQRT"
:
is_ipower
?
"OP_POWN"
:
"OP_POW"
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D dstT=%s -D %s -D UNARY_OP%s"
,
ocl
::
typeToStr
(
depth
),
op
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
format
(
"-D dstT=%s -D
rowsPerWI=%d -D
%s -D UNARY_OP%s"
,
ocl
::
typeToStr
(
depth
),
rowsPerWI
,
op
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -2137,7 +2145,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
k
.
args
(
srcarg
,
dstarg
,
power
);
}
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
,
dst
.
rows
};
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
,
(
dst
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -2491,8 +2499,10 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
static
bool
ocl_patchNaNs
(
InputOutputArray
_a
,
float
value
)
{
int
rowsPerWI
=
ocl
::
Device
::
getDefault
().
isIntel
()
?
4
:
1
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D UNARY_OP -D OP_PATCH_NANS -D dstT=int"
));
format
(
"-D UNARY_OP -D OP_PATCH_NANS -D dstT=int -D rowsPerWI=%d"
,
rowsPerWI
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -2502,7 +2512,7 @@ static bool ocl_patchNaNs( InputOutputArray _a, float value )
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
a
),
ocl
::
KernelArg
::
WriteOnly
(
a
,
cn
),
(
float
)
value
);
size_t
globalsize
[
2
]
=
{
a
.
cols
*
cn
,
a
.
rows
};
size_t
globalsize
[
2
]
=
{
a
.
cols
*
cn
,
(
a
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/matmul.cpp
View file @
15738bf7
...
...
@@ -2153,9 +2153,10 @@ typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, i
static
bool
ocl_scaleAdd
(
InputArray
_src1
,
double
alpha
,
InputArray
_src2
,
OutputArray
_dst
,
int
type
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
wdepth
=
std
::
max
(
depth
,
CV_32F
),
kercn
=
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
()
.
doubleFPConfig
()
>
0
;
kercn
=
ocl
::
predictOptimalVectorWidth
(
_src1
,
_src2
,
_dst
)
,
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
Size
size
=
_src1
.
size
();
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
size
!=
_src2
.
size
()
)
...
...
@@ -2164,13 +2165,14 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
char
cvt
[
2
][
50
];
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s -D wdepth=%d%s"
,
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s"
" -D wdepth=%d%s -D rowsPerWI=%d"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
kercn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
depth
,
kercn
,
cvt
[
1
]),
ocl
::
typeToStr
(
wdepth
),
wdepth
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
rowsPerWI
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -2187,7 +2189,7 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp
else
k
.
args
(
src1arg
,
src2arg
,
dstarg
,
alpha
);
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
/
kercn
,
dst
.
rows
};
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
/
kercn
,
(
dst
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
modules/core/src/opencl/arithm.cl
View file @
15738bf7
...
...
@@ -145,6 +145,7 @@
#define EXTRA_PARAMS
#define EXTRA_INDEX
#define EXTRA_INDEX_ADD
#if defined OP_ADD
#define PROCESS_ELEM storedst(convertToDT(srcelem1 + srcelem2))
...
...
@@ -363,7 +364,9 @@
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
#undef EXTRA_INDEX
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
#define EXTRA_INDEX int dst_index2 = mad24(y0, dststep2, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset2))
#undef EXTRA_INDEX_ADD
#define EXTRA_INDEX_ADD dst_index2 += dststep2
#endif
#if defined UNARY_OP || defined MASK_UNARY_OP
...
...
@@ -393,18 +396,25 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y
= get_global_id(1)
;
int y
0 = get_global_id(1) * rowsPerWI
;
if (x < cols
&& y < rows
)
if (x < cols)
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int src1_index = mad24(y
0
, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
int src2_index = mad24(y, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
int src2_index = mad24(y
0
, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
#endif
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
int dst_index = mad24(y
0
, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
EXTRA_INDEX;
PROCESS_ELEM;
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
{
PROCESS_ELEM;
#if !(defined(OP_RECIP_SCALE) || defined(OP_NOT))
src2_index += srcstep2;
#endif
EXTRA_INDEX_ADD;
}
}
}
...
...
@@ -417,19 +427,21 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y
= get_global_id(1)
;
int y
0 = get_global_id(1) * rowsPerWI
;
if (x < cols
&& y < rows
)
if (x < cols)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int src2_index = mad24(y, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
PROCESS_ELEM;
}
int mask_index = mad24(y0, maskstep, x + maskoffset);
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int src2_index = mad24(y0, srcstep2, mad24(x, (int)sizeof(srcT2_C1) * cn, srcoffset2));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, src2_index += srcstep2,
mask_index += maskstep, dst_index += dststep)
if (mask[mask_index])
{
PROCESS_ELEM;
}
}
}
...
...
@@ -440,14 +452,17 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y
= get_global_id(1)
;
int y
0 = get_global_id(1) * rowsPerWI
;
if (x < cols
&& y < rows
)
if (x < cols)
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
int src1_index = mad24(y
0
, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y
0
, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
PROCESS_ELEM;
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, dst_index += dststep)
{
PROCESS_ELEM;
}
}
}
...
...
@@ -459,18 +474,19 @@ __kernel void KF(__global const uchar * srcptr1, int srcstep1, int srcoffset1,
int rows, int cols EXTRA_PARAMS )
{
int x = get_global_id(0);
int y = get_global_id(1);
int y
0
= get_global_id(1);
if (x < cols
&& y < rows
)
if (x < cols)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
{
int src1_index = mad24(y, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
PROCESS_ELEM;
}
int mask_index = mad24(y0, maskstep, x + maskoffset);
int src1_index = mad24(y0, srcstep1, mad24(x, (int)sizeof(srcT1_C1) * cn, srcoffset1));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT_C1) * cn, dstoffset));
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src1_index += srcstep1, mask_index += maskstep, dst_index += dststep)
if (mask[mask_index])
{
PROCESS_ELEM;
}
}
}
...
...
modules/core/src/stat.cpp
View file @
15738bf7
...
...
@@ -1973,8 +1973,9 @@ static NormDiffFunc getNormDiffFunc(int normType, int depth)
static
bool
ocl_norm
(
InputArray
_src
,
int
normType
,
InputArray
_mask
,
double
&
result
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
()
.
doubleFPConfig
()
>
0
,
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
,
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
;
if
(
!
(
normType
==
NORM_INF
||
normType
==
NORM_L1
||
normType
==
NORM_L2
||
normType
==
NORM_L2SQR
)
||
...
...
@@ -1991,13 +1992,14 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
if
(
depth
!=
CV_8U
&&
depth
!=
CV_16U
)
{
int
wdepth
=
std
::
max
(
CV_32S
,
depth
);
int
wdepth
=
std
::
max
(
CV_32S
,
depth
)
,
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
char
cvt
[
50
];
ocl
::
Kernel
kabs
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s -D convertToDT=%s%s"
,
format
(
"-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s"
" -D convertToDT=%s -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
depth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
kabs
.
empty
())
return
false
;
...
...
@@ -2005,7 +2007,7 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
abssrc
.
create
(
src
.
size
(),
CV_MAKE_TYPE
(
wdepth
,
cn
));
kabs
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
abssrc
,
cn
));
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
,
src
.
rows
};
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
,
(
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
if
(
!
kabs
.
run
(
2
,
globalsize
,
NULL
,
false
))
return
false
;
}
...
...
@@ -2016,8 +2018,8 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
}
else
{
int
dbsize
=
ocl
::
Device
::
getDefault
()
.
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
()
.
maxWorkGroupSize
();
int
dbsize
=
d
.
maxComputeUnits
();
size_t
wgs
=
d
.
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
...
...
@@ -2384,8 +2386,9 @@ namespace cv {
static
bool
ocl_norm
(
InputArray
_src1
,
InputArray
_src2
,
int
normType
,
InputArray
_mask
,
double
&
result
)
{
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
bool
relative
=
(
normType
&
NORM_RELATIVE
)
!=
0
;
normType
&=
~
NORM_RELATIVE
;
...
...
@@ -2397,9 +2400,9 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
char
cvt
[
50
];
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1"
" -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT%s"
,
" -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT
-D rowsPerWI=%d
%s"
,
ocl
::
typeToStr
(
wdepth
),
ocl
::
typeToStr
(
depth
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
1
,
cvt
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -2408,7 +2411,7 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
ocl
::
KernelArg
::
WriteOnly
(
diff
,
cn
));
size_t
globalsize
[
2
]
=
{
diff
.
cols
*
cn
,
diff
.
rows
};
size_t
globalsize
[
2
]
=
{
diff
.
cols
*
cn
,
(
diff
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
if
(
!
k
.
run
(
2
,
globalsize
,
NULL
,
false
))
return
false
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment