Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
001aa705
Commit
001aa705
authored
Dec 04, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 04, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1902 from ilya-lavrenov:tapi_arithm
parents
90c23067
49474903
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
787 additions
and
33 deletions
+787
-33
arithm.cpp
modules/core/src/arithm.cpp
+63
-16
mathfuncs.cpp
modules/core/src/mathfuncs.cpp
+112
-2
matrix.cpp
modules/core/src/matrix.cpp
+77
-3
ocl.cpp
modules/core/src/ocl.cpp
+1
-1
arithm.cl
modules/core/src/opencl/arithm.cl
+62
-7
reduce.cl
modules/core/src/opencl/reduce.cl
+130
-0
set_identity.cl
modules/core/src/opencl/set_identity.cl
+59
-0
transpose.cl
modules/core/src/opencl/transpose.cl
+124
-0
stat.cpp
modules/core/src/stat.cpp
+159
-4
test_arithm.cpp
modules/core/test/ocl/test_arithm.cpp
+0
-0
No files found.
modules/core/src/arithm.cpp
View file @
001aa705
...
...
@@ -929,11 +929,11 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int
srcdepth
=
CV_MAT_DEPTH
(
srctype
);
int
cn
=
CV_MAT_CN
(
srctype
);
if
(
oclop
<
0
||
((
haveMask
||
haveScalar
)
&&
cn
>
4
)
)
return
false
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
UMat
src1
=
_src1
.
getUMat
(),
src2
;
UMat
dst
=
_dst
.
getUMat
(),
mask
=
_mask
.
getUMat
();
if
(
oclop
<
0
||
((
haveMask
||
haveScalar
)
&&
(
cn
>
4
||
cn
==
3
))
||
(
!
doubleSupport
&&
srcdepth
==
CV_64F
))
return
false
;
char
opts
[
1024
];
int
kercn
=
haveMask
||
haveScalar
?
cn
:
1
;
...
...
@@ -946,6 +946,9 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if
(
k
.
empty
()
)
return
false
;
UMat
src1
=
_src1
.
getUMat
(),
src2
;
UMat
dst
=
_dst
.
getUMat
(),
mask
=
_mask
.
getUMat
();
int
cscale
=
cn
/
kercn
;
ocl
::
KernelArg
src1arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
,
cscale
);
ocl
::
KernelArg
dstarg
=
haveMask
?
ocl
::
KernelArg
::
ReadWrite
(
dst
,
cscale
)
:
...
...
@@ -1280,24 +1283,28 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
void
*
usrdata
,
int
oclop
,
bool
haveScalar
)
{
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
int
type1
=
_src1
.
type
(),
depth1
=
CV_MAT_DEPTH
(
type1
),
cn
=
CV_MAT_CN
(
type1
);
bool
haveMask
=
!
_mask
.
empty
();
if
(
((
haveMask
||
haveScalar
)
&&
cn
>
4
)
||
cn
==
3
)
// TODO need fix for 3 channels
if
(
((
haveMask
||
haveScalar
)
&&
(
cn
>
4
||
cn
==
3
))
)
return
false
;
int
dtype
=
_dst
.
type
(),
ddepth
=
CV_MAT_DEPTH
(
dtype
),
wdepth
=
std
::
max
(
CV_32S
,
CV_MAT_DEPTH
(
wtype
));
if
(
!
doubleSupport
)
wdepth
=
std
::
min
(
wdepth
,
CV_32F
);
wtype
=
CV_MAKETYPE
(
wdepth
,
cn
);
int
type2
=
haveScalar
?
wtype
:
_src2
.
type
(),
depth2
=
CV_MAT_DEPTH
(
type2
);
int
kercn
=
haveMask
||
haveScalar
?
cn
:
1
;
if
(
!
doubleSupport
&&
(
depth2
==
CV_64F
||
depth1
==
CV_64F
))
return
false
;
UMat
src1
=
_src1
.
getUMat
(),
src2
;
UMat
dst
=
_dst
.
getUMat
(),
mask
=
_mask
.
getUMat
();
int
kercn
=
haveMask
||
haveScalar
?
cn
:
1
;
char
cvtstr
[
3
][
32
],
opts
[
1024
];
sprintf
(
opts
,
"-D %s%s -D %s -D srcT1=%s -D srcT2=%s "
"-D dstT=%s -D workT=%s -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s"
,
"-D convertToWT2=%s -D convertToDT=%s
%s
"
,
(
haveMask
?
"MASK_"
:
""
),
(
haveScalar
?
"UNARY_OP"
:
"BINARY_OP"
),
oclop2str
[
oclop
],
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth1
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKETYPE
(
depth2
,
kercn
)),
...
...
@@ -1305,7 +1312,8 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl
::
typeToStr
(
CV_MAKETYPE
(
wdepth
,
kercn
)),
ocl
::
convertTypeStr
(
depth1
,
wdepth
,
kercn
,
cvtstr
[
0
]),
ocl
::
convertTypeStr
(
depth2
,
wdepth
,
kercn
,
cvtstr
[
1
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
kercn
,
cvtstr
[
2
]));
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
kercn
,
cvtstr
[
2
]),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
const
uchar
*
usrdata_p
=
(
const
uchar
*
)
usrdata
;
const
double
*
usrdata_d
=
(
const
double
*
)
usrdata
;
...
...
@@ -1323,6 +1331,9 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if
(
k
.
empty
()
)
return
false
;
UMat
src1
=
_src1
.
getUMat
(),
src2
;
UMat
dst
=
_dst
.
getUMat
(),
mask
=
_mask
.
getUMat
();
int
cscale
=
cn
/
kercn
;
ocl
::
KernelArg
src1arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
,
cscale
);
...
...
@@ -1337,9 +1348,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
Mat
src2sc
=
_src2
.
getMat
();
if
(
!
src2sc
.
empty
()
)
{
convertAndUnrollScalar
(
src2sc
,
wtype
,
(
uchar
*
)
buf
,
1
);
}
ocl
::
KernelArg
scalararg
=
ocl
::
KernelArg
(
0
,
0
,
0
,
buf
,
esz
);
if
(
!
haveMask
)
...
...
@@ -1369,12 +1378,10 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
CV_Error
(
Error
::
StsNotImplemented
,
"unsupported number of extra parameters"
);
}
else
{
k
.
args
(
src1arg
,
src2arg
,
maskarg
,
dstarg
);
}
}
size_t
globalsize
[]
=
{
src1
.
cols
*
cscale
,
src1
.
rows
};
size_t
globalsize
[]
=
{
src1
.
cols
*
cscale
,
src1
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -2075,7 +2082,7 @@ void cv::multiply(InputArray src1, InputArray src2,
OutputArray
dst
,
double
scale
,
int
dtype
)
{
arithm_op
(
src1
,
src2
,
dst
,
noArray
(),
dtype
,
getMulTab
(),
true
,
&
scale
,
s
cale
==
1.
?
OCL_OP_MUL
:
OCL_OP_MUL_SCALE
);
true
,
&
scale
,
s
td
::
abs
(
scale
-
1.0
)
<
DBL_EPSILON
?
OCL_OP_MUL
:
OCL_OP_MUL_SCALE
);
}
void
cv
::
divide
(
InputArray
src1
,
InputArray
src2
,
...
...
@@ -2581,6 +2588,42 @@ static double getMaxVal(int depth)
return
tab
[
depth
];
}
static
bool
ocl_compare
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
op
)
{
if
(
!
((
_src1
.
isMat
()
||
_src1
.
isUMat
())
&&
(
_src2
.
isMat
()
||
_src2
.
isUMat
()))
)
return
false
;
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
type2
=
_src2
.
type
();
if
(
!
doubleSupport
&&
(
depth
==
CV_64F
||
_src2
.
depth
()
==
CV_64F
))
return
false
;
const
char
*
const
operationMap
[]
=
{
"=="
,
">"
,
">="
,
"<"
,
"<="
,
"!="
};
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D srcT1=%s -D workT=srcT1"
" -D OP_CMP -D CMP_OPERATOR=%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
operationMap
[
op
],
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
CV_Assert
(
type
==
type2
);
UMat
src1
=
_src1
.
getUMat
(),
src2
=
_src2
.
getUMat
();
Size
size
=
src1
.
size
();
CV_Assert
(
size
==
src2
.
size
());
_dst
.
create
(
size
,
CV_8UC
(
cn
));
UMat
dst
=
_dst
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
));
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
,
dst
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
}
void
cv
::
compare
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
int
op
)
...
...
@@ -2588,6 +2631,10 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
CV_Assert
(
op
==
CMP_LT
||
op
==
CMP_LE
||
op
==
CMP_EQ
||
op
==
CMP_NE
||
op
==
CMP_GE
||
op
==
CMP_GT
);
if
(
ocl
::
useOpenCL
()
&&
_src1
.
dims
()
<=
2
&&
_src2
.
dims
()
<=
2
&&
_dst
.
isUMat
()
&&
ocl_compare
(
_src1
,
_src2
,
_dst
,
op
))
return
;
int
kind1
=
_src1
.
kind
(),
kind2
=
_src2
.
kind
();
Mat
src1
=
_src1
.
getMat
(),
src2
=
_src2
.
getMat
();
...
...
modules/core/src/mathfuncs.cpp
View file @
001aa705
...
...
@@ -497,10 +497,49 @@ void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegre
}
}
static
bool
ocl_cartToPolar
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst1
,
OutputArray
_dst2
,
bool
angleInDegrees
)
{
int
type
=
_src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
!
(
_src1
.
dims
()
<=
2
&&
_src2
.
dims
()
<=
2
&&
(
depth
==
CV_32F
||
depth
==
CV_64F
)
&&
type
==
_src2
.
type
())
||
(
depth
==
CV_64F
&&
!
doubleSupport
)
)
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D BINARY_OP -D dstT=%s -D OP_CTP_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
src1
=
_src1
.
getUMat
(),
src2
=
_src2
.
getUMat
();
Size
size
=
src1
.
size
();
CV_Assert
(
size
==
src2
.
size
()
);
_dst1
.
create
(
size
,
type
);
_dst2
.
create
(
size
,
type
);
UMat
dst1
=
_dst1
.
getUMat
(),
dst2
=
_dst2
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src1
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
ocl
::
KernelArg
::
WriteOnly
(
dst1
,
cn
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst2
));
size_t
globalsize
[
2
]
=
{
dst1
.
cols
*
cn
,
dst1
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
void
cartToPolar
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst1
,
OutputArray
dst2
,
bool
angleInDegrees
)
{
if
(
ocl
::
useOpenCL
()
&&
dst1
.
isUMat
()
&&
dst2
.
isUMat
()
&&
ocl_cartToPolar
(
src1
,
src2
,
dst1
,
dst2
,
angleInDegrees
))
return
;
Mat
X
=
src1
.
getMat
(),
Y
=
src2
.
getMat
();
int
type
=
X
.
type
(),
depth
=
X
.
depth
(),
cn
=
X
.
channels
();
CV_Assert
(
X
.
size
==
Y
.
size
&&
type
==
Y
.
type
()
&&
(
depth
==
CV_32F
||
depth
==
CV_64F
));
...
...
@@ -644,12 +683,50 @@ static void SinCos_32f( const float *angle, float *sinval, float* cosval,
}
static
bool
ocl_polarToCart
(
InputArray
_mag
,
InputArray
_angle
,
OutputArray
_dst1
,
OutputArray
_dst2
,
bool
angleInDegrees
)
{
int
type
=
_angle
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
depth
==
CV_64F
)
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D dstT=%s -D BINARY_OP -D OP_PTC_%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
angleInDegrees
?
"AD"
:
"AR"
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
mag
=
_mag
.
getUMat
(),
angle
=
_angle
.
getUMat
();
Size
size
=
angle
.
size
();
CV_Assert
(
mag
.
size
()
==
size
);
_dst1
.
create
(
size
,
type
);
_dst2
.
create
(
size
,
type
);
UMat
dst1
=
_dst1
.
getUMat
(),
dst2
=
_dst2
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mag
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
angle
),
ocl
::
KernelArg
::
WriteOnly
(
dst1
,
cn
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst2
));
size_t
globalsize
[
2
]
=
{
dst1
.
cols
*
cn
,
dst1
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
void
polarToCart
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst1
,
OutputArray
dst2
,
bool
angleInDegrees
)
{
int
type
=
src2
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
CV_Assert
((
depth
==
CV_32F
||
depth
==
CV_64F
)
&&
(
src1
.
empty
()
||
src1
.
type
()
==
type
));
if
(
ocl
::
useOpenCL
()
&&
!
src1
.
empty
()
&&
src2
.
dims
()
<=
2
&&
dst1
.
isUMat
()
&&
dst2
.
isUMat
()
&&
ocl_polarToCart
(
src1
,
src2
,
dst1
,
dst2
,
angleInDegrees
))
return
;
Mat
Mag
=
src1
.
getMat
(),
Angle
=
src2
.
getMat
();
int
type
=
Angle
.
type
(),
depth
=
Angle
.
depth
(),
cn
=
Angle
.
channels
();
CV_Assert
(
Mag
.
empty
()
||
(
Angle
.
size
==
Mag
.
size
&&
type
==
Mag
.
type
()
&&
(
depth
==
CV_32F
||
depth
==
CV_64F
)));
CV_Assert
(
Mag
.
empty
()
||
Angle
.
size
==
Mag
.
size
);
dst1
.
create
(
Angle
.
dims
,
Angle
.
size
,
type
);
dst2
.
create
(
Angle
.
dims
,
Angle
.
size
,
type
);
Mat
X
=
dst1
.
getMat
(),
Y
=
dst2
.
getMat
();
...
...
@@ -1955,9 +2032,42 @@ static IPowFunc ipowTab[] =
(
IPowFunc
)
iPow32s
,
(
IPowFunc
)
iPow32f
,
(
IPowFunc
)
iPow64f
,
0
};
static
bool
ocl_pow
(
InputArray
_src
,
double
power
,
OutputArray
_dst
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
!
(
_src
.
dims
()
<=
2
&&
(
depth
==
CV_32F
||
depth
==
CV_64F
))
||
(
depth
==
CV_64F
&&
!
doubleSupport
)
)
return
false
;
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
format
(
"-D dstT=%s -D OP_POW -D UNARY_OP%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
1
)),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
size
(),
type
);
UMat
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
);
if
(
depth
==
CV_32F
)
k
.
args
(
srcarg
,
dstarg
,
(
float
)
power
);
else
k
.
args
(
srcarg
,
dstarg
,
power
);
size_t
globalsize
[
2
]
=
{
dst
.
cols
*
cn
,
dst
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
void
pow
(
InputArray
_src
,
double
power
,
OutputArray
_dst
)
{
if
(
ocl
::
useOpenCL
()
&&
_dst
.
isUMat
()
&&
ocl_pow
(
_src
,
power
,
_dst
))
return
;
Mat
src
=
_src
.
getMat
();
int
type
=
src
.
type
(),
depth
=
src
.
depth
(),
cn
=
src
.
channels
();
...
...
modules/core/src/matrix.cpp
View file @
001aa705
...
...
@@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
/****************************************************************************************\
* [scaled] Identity matrix initialization *
...
...
@@ -2368,10 +2369,37 @@ void cv::vconcat(InputArray _src, OutputArray dst)
}
//////////////////////////////////////// set identity ////////////////////////////////////////////
namespace
cv
{
static
bool
ocl_setIdentity
(
InputOutputArray
_m
,
const
Scalar
&
s
)
{
int
type
=
_m
.
type
(),
cn
=
CV_MAT_CN
(
type
);
if
(
cn
==
3
)
return
false
;
ocl
::
Kernel
k
(
"setIdentity"
,
ocl
::
core
::
set_identity_oclsrc
,
format
(
"-D T=%s"
,
ocl
::
memopTypeToStr
(
type
)));
if
(
k
.
empty
())
return
false
;
UMat
m
=
_m
.
getUMat
();
k
.
args
(
ocl
::
KernelArg
::
WriteOnly
(
m
),
ocl
::
KernelArg
::
Constant
(
Mat
(
1
,
1
,
type
,
s
)));
size_t
globalsize
[
2
]
=
{
m
.
cols
,
m
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
}
void
cv
::
setIdentity
(
InputOutputArray
_m
,
const
Scalar
&
s
)
{
CV_Assert
(
_m
.
dims
()
<=
2
);
if
(
ocl
::
useOpenCL
()
&&
_m
.
isUMat
()
&&
ocl_setIdentity
(
_m
,
s
))
return
;
Mat
m
=
_m
.
getMat
();
CV_Assert
(
m
.
dims
<=
2
);
int
i
,
j
,
rows
=
m
.
rows
,
cols
=
m
.
cols
,
type
=
m
.
type
();
if
(
type
==
CV_32FC1
)
...
...
@@ -2548,18 +2576,63 @@ static TransposeInplaceFunc transposeInplaceTab[] =
0
,
0
,
0
,
0
,
0
,
0
,
0
,
transposeI_32sC6
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
transposeI_32sC8
};
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
static
bool
ocl_transpose
(
InputArray
_src
,
OutputArray
_dst
)
{
const
int
TILE_DIM
=
32
,
BLOCK_ROWS
=
8
;
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
if
(
cn
==
3
)
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
src
.
cols
,
src
.
rows
,
type
);
UMat
dst
=
_dst
.
getUMat
();
String
kernelName
(
"transpose"
);
bool
inplace
=
dst
.
u
==
src
.
u
;
if
(
inplace
)
{
CV_Assert
(
dst
.
cols
==
dst
.
rows
);
kernelName
+=
"_inplace"
;
}
ocl
::
Kernel
k
(
kernelName
.
c_str
(),
ocl
::
core
::
transpose_oclsrc
,
format
(
"-D T=%s -D TILE_DIM=%d -D BLOCK_ROWS=%d"
,
ocl
::
memopTypeToStr
(
type
),
TILE_DIM
,
BLOCK_ROWS
));
if
(
inplace
)
k
.
args
(
ocl
::
KernelArg
::
ReadWriteNoSize
(
dst
),
dst
.
rows
);
else
k
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
src
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
dst
));
size_t
localsize
[
3
]
=
{
TILE_DIM
,
BLOCK_ROWS
,
1
};
size_t
globalsize
[
3
]
=
{
src
.
cols
,
inplace
?
src
.
rows
:
divUp
(
src
.
rows
,
TILE_DIM
)
*
BLOCK_ROWS
,
1
};
return
k
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
}
void
cv
::
transpose
(
InputArray
_src
,
OutputArray
_dst
)
{
int
type
=
_src
.
type
(),
esz
=
CV_ELEM_SIZE
(
type
);
CV_Assert
(
_src
.
dims
()
<=
2
&&
esz
<=
32
);
if
(
ocl
::
useOpenCL
()
&&
_dst
.
isUMat
()
&&
ocl_transpose
(
_src
,
_dst
))
return
;
Mat
src
=
_src
.
getMat
();
if
(
src
.
empty
()
)
{
_dst
.
release
();
return
;
}
size_t
esz
=
src
.
elemSize
();
CV_Assert
(
src
.
dims
<=
2
&&
esz
<=
(
size_t
)
32
);
_dst
.
create
(
src
.
cols
,
src
.
rows
,
src
.
type
());
Mat
dst
=
_dst
.
getMat
();
...
...
@@ -2576,6 +2649,7 @@ void cv::transpose( InputArray _src, OutputArray _dst )
{
TransposeInplaceFunc
func
=
transposeInplaceTab
[
esz
];
CV_Assert
(
func
!=
0
);
CV_Assert
(
dst
.
cols
==
dst
.
rows
);
func
(
dst
.
data
,
dst
.
step
,
dst
.
rows
);
}
else
...
...
modules/core/src/ocl.cpp
View file @
001aa705
...
...
@@ -3145,7 +3145,7 @@ const char* memopTypeToStr(int t)
"ushort"
,
"ushort2"
,
"ushort3"
,
"ushort4"
,
"int"
,
"int2"
,
"int3"
,
"int4"
,
"int"
,
"int2"
,
"int3"
,
"int4"
,
"
long"
,
"long2"
,
"long3"
,
"long4
"
,
"
int2"
,
"int4"
,
"?"
,
"int8
"
,
"?"
,
"?"
,
"?"
,
"?"
};
int
cn
=
CV_MAT_CN
(
t
);
...
...
modules/core/src/opencl/arithm.cl
View file @
001aa705
...
...
@@ -57,19 +57,22 @@
-D workDepth=<work depth> [-D cn=<num channels>]" - for mixed-type operations
*/
#if
defined (DOUBLE_SUPPORT)
#if
def DOUBLE_SUPPORT
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#define CV_EPSILON DBL_EPSILON
#define CV_PI M_PI
#else
#define CV_EPSILON FLT_EPSILON
#define CV_PI M_PI_F
#endif
#define CV_32S 4
#define CV_32F 5
#define dstelem *(__global dstT*)(dstptr + dst_index)
#define noconvert(x) x
#define dstelem2 *(__global dstT*)(dstptr2 + dst_index2)
#define noconvert
#ifndef workT
...
...
@@ -88,6 +91,7 @@
#endif
#define EXTRA_PARAMS
#define EXTRA_INDEX
#if defined OP_ADD
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 + srcelem2)
...
...
@@ -99,7 +103,9 @@
#define PROCESS_ELEM dstelem = convertToDT(srcelem2 - srcelem1)
#elif defined OP_ABSDIFF
#define PROCESS_ELEM dstelem = abs_diff(srcelem1, srcelem2)
#define PROCESS_ELEM \
workT v = srcelem1 - srcelem2; \
dstelem = convertToDT(v >= (workT)(0) ? v : -v);
#elif defined OP_AND
#define PROCESS_ELEM dstelem = srcelem1 & srcelem2
...
...
@@ -169,6 +175,9 @@
#elif defined OP_EXP
#define PROCESS_ELEM dstelem = exp(srcelem1)
#elif defined OP_POW
#define PROCESS_ELEM dstelem = pow(srcelem1, srcelem2)
#elif defined OP_SQRT
#define PROCESS_ELEM dstelem = sqrt(srcelem1)
...
...
@@ -178,6 +187,10 @@ dstT v = (dstT)(srcelem1);\
dstelem = v > (dstT)(0) ? log(v) : log(-v)
#elif defined OP_CMP
#define dstT uchar
#define srcT2 srcT1
#define convertToWT1
#define convertToWT2
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
#elif defined OP_CONVERT
...
...
@@ -188,15 +201,55 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#define EXTRA_PARAMS , workT alpha, workT beta
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
#elif defined OP_CTP_AD || defined OP_CTP_AR
#ifdef OP_CTP_AD
#define TO_DEGREE cartToPolar *= (180 / CV_PI);
#elif defined OP_CTP_AR
#define TO_DEGREE
#endif
#define PROCESS_ELEM \
dstT x = srcelem1, y = srcelem2; \
dstT x2 = x * x, y2 = y * y; \
dstT magnitude = sqrt(x2 + y2); \
dstT tmp = y >= 0 ? 0 : CV_PI * 2; \
tmp = x < 0 ? CV_PI : tmp; \
dstT tmp1 = y >= 0 ? CV_PI * 0.5f : CV_PI * 1.5f; \
dstT cartToPolar = y2 <= x2 ? x * y / (x2 + 0.28f * y2 + CV_EPSILON) + tmp : (tmp1 - x * y / (y2 + 0.28f * x2 + CV_EPSILON)); \
TO_DEGREE \
dstelem = magnitude; \
dstelem2 = cartToPolar
#elif defined OP_PTC_AD || defined OP_PTC_AR
#ifdef OP_PTC_AD
#define FROM_DEGREE \
dstT ascale = CV_PI/180.0f; \
dstT alpha = y * ascale
#else
#define FROM_DEGREE \
dstT alpha = y
#endif
#define PROCESS_ELEM \
dstT x = srcelem1, y = srcelem2; \
FROM_DEGREE; \
dstelem = cos(alpha) * x; \
dstelem2 = sin(alpha) * x
#else
#error "unknown op type"
#endif
#if defined OP_CTP_AD || defined OP_CTP_AR || defined OP_PTC_AD || defined OP_PTC_AR
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , __global uchar* dstptr2, int dststep2, int dstoffset2
#undef EXTRA_INDEX
#define EXTRA_INDEX int dst_index2 = mad24(y, dststep2, x*(int)sizeof(dstT) + dstoffset2)
#endif
#if defined UNARY_OP || defined MASK_UNARY_OP
#undef srcelem2
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX
|| defined OP_POW
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT srcelem2
#endif
...
...
@@ -217,6 +270,7 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1);
int src2_index = mad24(y, srcstep2, x*(int)sizeof(srcT2) + srcoffset2);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset);
EXTRA_INDEX;
PROCESS_ELEM;
}
...
...
@@ -260,6 +314,7 @@ __kernel void KF(__global const uchar* srcptr1, int srcstep1, int srcoffset1,
{
int src1_index = mad24(y, srcstep1, x*(int)sizeof(srcT1) + srcoffset1);
int dst_index = mad24(y, dststep, x*(int)sizeof(dstT) + dstoffset);
EXTRA_INDEX;
PROCESS_ELEM;
}
...
...
modules/core/src/opencl/reduce.cl
0 → 100644
View file @
001aa705
////////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Shengen
Yan,yanshengen@gmail.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
noconvert
#
if
defined
OP_SUM
|
| defined OP_SUM_ABS |
|
defined
OP_SUM_SQR
#
if
OP_SUM
#
define
FUNC
(
a,
b
)
a
+=
b
#
elif
OP_SUM_ABS
#
define
FUNC
(
a,
b
)
a
+=
b
>=
(
dstT
)(
0
)
?
b
:
-b
#
elif
OP_SUM_SQR
#
define
FUNC
(
a,
b
)
a
+=
b
*
b
#
endif
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
#
define
REDUCE_GLOBAL
\
dstT
temp
=
convertToDT
(
src[0]
)
; \
FUNC
(
accumulator,
temp
)
#
define
REDUCE_LOCAL_1
\
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
#
elif
defined
OP_COUNT_NON_ZERO
#
define
dstT
int
#
define
DEFINE_ACCUMULATOR
\
dstT
accumulator
=
(
dstT
)(
0
)
; \
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
#
define
REDUCE_GLOBAL
\
accumulator
+=
src[0]
==
zero
?
zero
:
one
#
define
REDUCE_LOCAL_1
\
localmem[lid
-
WGS2_ALIGNED]
+=
accumulator
#
define
REDUCE_LOCAL_2
\
localmem[lid]
+=
localmem[lid2]
#
else
#
error
"No operation"
#
endif
__kernel
void
reduce
(
__global
const
uchar
*
srcptr,
int
step,
int
offset,
int
cols,
int
total,
int
groupnum,
__global
uchar
*
dstptr
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
__local
dstT
localmem[WGS2_ALIGNED]
;
DEFINE_ACCUMULATOR
;
for
(
int
grain
=
groupnum
*
WGS
; id < total; id += grain)
{
int
src_index
=
mad24
(
id
/
cols,
step,
offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
srcT
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
REDUCE_GLOBAL
;
}
if
(
lid
<
WGS2_ALIGNED
)
localmem[lid]
=
accumulator
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>=
WGS2_ALIGNED
)
REDUCE_LOCAL_1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
REDUCE_LOCAL_2
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
(
int
)
sizeof
(
dstT
)
*
gid
)
;
dst[0]
=
localmem[0]
;
}
}
modules/core/src/opencl/set_identity.cl
0 → 100644
View file @
001aa705
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jin
Ma
jin@multicorewareinc.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
__kernel
void
setIdentity
(
__global
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
rows,
int
cols,
T
scalar
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
T
))
;
__global
T
*
src
=
(
__global
T
*
)(
srcptr
+
src_index
)
;
src[0]
=
x
==
y
?
scalar
:
(
T
)(
0
)
;
}
}
modules/core/src/opencl/transpose.cl
0 → 100644
View file @
001aa705
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
define
LDS_STEP
TILE_DIM
__kernel
void
transpose
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset
)
{
int
gp_x
=
get_group_id
(
0
)
,
gp_y
=
get_group_id
(
1
)
;
int
gs_x
=
get_num_groups
(
0
)
,
gs_y
=
get_num_groups
(
1
)
;
int
groupId_x,
groupId_y
;
if
(
src_rows
==
src_cols
)
{
groupId_y
=
gp_x
;
groupId_x
=
(
gp_x
+
gp_y
)
%
gs_x
;
}
else
{
int
bid
=
gp_x
+
gs_x
*
gp_y
;
groupId_y
=
bid
%
gs_y
;
groupId_x
=
((
bid
/
gs_y
)
+
groupId_y
)
%
gs_x
;
}
int
lx
=
get_local_id
(
0
)
;
int
ly
=
get_local_id
(
1
)
;
int
x
=
groupId_x
*
TILE_DIM
+
lx
;
int
y
=
groupId_y
*
TILE_DIM
+
ly
;
int
x_index
=
groupId_y
*
TILE_DIM
+
lx
;
int
y_index
=
groupId_x
*
TILE_DIM
+
ly
;
__local
T
title[TILE_DIM
*
LDS_STEP]
;
if
(
x
<
src_cols
&&
y
<
src_rows
)
{
int
index_src
=
mad24
(
y,
src_step,
x
*
(
int
)
sizeof
(
T
)
+
src_offset
)
;
for
(
int
i
=
0
; i < TILE_DIM; i += BLOCK_ROWS)
if
(
y
+
i
<
src_rows
)
{
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
index_src
)
;
title[
(
ly
+
i
)
*
LDS_STEP
+
lx]
=
src[0]
;
index_src
=
mad24
(
BLOCK_ROWS,
src_step,
index_src
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
x_index
<
src_rows
&&
y_index
<
src_cols
)
{
int
index_dst
=
mad24
(
y_index,
dst_step,
x_index
*
(
int
)
sizeof
(
T
)
+
dst_offset
)
;
for
(
int
i
=
0
; i < TILE_DIM; i += BLOCK_ROWS)
if
((
y_index
+
i
)
<
src_cols
)
{
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
index_dst
)
;
dst[0]
=
title[lx
*
LDS_STEP
+
ly
+
i]
;
index_dst
=
mad24
(
BLOCK_ROWS,
dst_step,
index_dst
)
;
}
}
}
__kernel
void
transpose_inplace
(
__global
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
src_rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
src_rows
&&
x
<
y
)
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
(
int
)
sizeof
(
T
))
;
int
dst_index
=
mad24
(
x,
src_step,
src_offset
+
y
*
(
int
)
sizeof
(
T
))
;
__global
T
*
src
=
(
__global
T
*
)(
srcptr
+
src_index
)
;
__global
T
*
dst
=
(
__global
T
*
)(
srcptr
+
dst_index
)
;
T
tmp
=
dst[0]
;
dst[0]
=
src[0]
;
src[0]
=
tmp
;
}
}
modules/core/src/stat.cpp
View file @
001aa705
...
...
@@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <climits>
#include <limits>
...
...
@@ -448,10 +449,77 @@ static SumSqrFunc getSumSqrTab(int depth)
return
sumSqrTab
[
depth
];
}
template
<
typename
T
>
Scalar
ocl_part_sum
(
Mat
m
)
{
CV_Assert
(
m
.
rows
==
1
);
Scalar
s
=
Scalar
::
all
(
0
);
int
cn
=
m
.
channels
();
const
T
*
const
ptr
=
m
.
ptr
<
T
>
(
0
);
for
(
int
x
=
0
,
w
=
m
.
cols
*
cn
;
x
<
w
;
)
for
(
int
c
=
0
;
c
<
cn
;
++
c
,
++
x
)
s
[
c
]
+=
ptr
[
x
];
return
s
;
}
enum
{
OCL_OP_SUM
=
0
,
OCL_OP_SUM_ABS
=
1
,
OCL_OP_SUM_SQR
=
2
};
static
bool
ocl_sum
(
InputArray
_src
,
Scalar
&
res
,
int
sum_op
)
{
CV_Assert
(
sum_op
==
OCL_OP_SUM
||
sum_op
==
OCL_OP_SUM_ABS
||
sum_op
==
OCL_OP_SUM_SQR
);
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
||
cn
==
3
||
_src
.
dims
()
>
2
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
ddepth
=
std
::
max
(
CV_32S
,
depth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
opMap
[
sum_op
],
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
dtype
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
size_t
globalsize
=
dbsize
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
true
))
{
typedef
Scalar
(
*
part_sum
)(
Mat
m
);
part_sum
funcs
[
3
]
=
{
ocl_part_sum
<
int
>
,
ocl_part_sum
<
float
>
,
ocl_part_sum
<
double
>
},
func
=
funcs
[
ddepth
-
CV_32S
];
res
=
func
(
db
.
getMat
(
ACCESS_READ
));
return
true
;
}
return
false
;
}
}
cv
::
Scalar
cv
::
sum
(
InputArray
_src
)
{
Scalar
_res
;
if
(
ocl
::
useOpenCL
()
&&
_src
.
isUMat
()
&&
ocl_sum
(
_src
,
_res
,
OCL_OP_SUM
))
return
_res
;
Mat
src
=
_src
.
getMat
();
int
k
,
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
...
...
@@ -542,12 +610,55 @@ cv::Scalar cv::sum( InputArray _src )
return
s
;
}
namespace
cv
{
static
bool
ocl_countNonZero
(
InputArray
_src
,
int
&
res
)
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
depth
==
CV_64F
&&
!
doubleSupport
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D OP_COUNT_NON_ZERO -D WGS=%d -D WGS2_ALIGNED=%d%s"
,
ocl
::
typeToStr
(
type
),
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
CV_32SC1
);
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src
.
cols
,
(
int
)
src
.
total
(),
dbsize
,
ocl
::
KernelArg
::
PtrWriteOnly
(
db
));
size_t
globalsize
=
dbsize
*
wgs
;
if
(
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
true
))
return
res
=
saturate_cast
<
int
>
(
cv
::
sum
(
db
.
getMat
(
ACCESS_READ
))[
0
]),
true
;
return
false
;
}
}
int
cv
::
countNonZero
(
InputArray
_src
)
{
CV_Assert
(
_src
.
channels
()
==
1
);
int
res
=
-
1
;
if
(
ocl
::
useOpenCL
()
&&
_src
.
isUMat
()
&&
ocl_countNonZero
(
_src
,
res
))
return
res
;
Mat
src
=
_src
.
getMat
();
CountNonZeroFunc
func
=
getCountNonZeroTab
(
src
.
depth
());
CV_Assert
(
src
.
channels
()
==
1
&&
func
!=
0
);
CV_Assert
(
func
!=
0
);
const
Mat
*
arrays
[]
=
{
&
src
,
0
};
uchar
*
ptrs
[
1
];
...
...
@@ -693,9 +804,54 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
return
s
*
(
nz0
?
1.
/
nz0
:
0
);
}
namespace
cv
{
static
bool
ocl_meanStdDev
(
InputArray
_src
,
OutputArray
_mean
,
OutputArray
_sdv
)
{
Scalar
mean
,
stddev
;
if
(
!
ocl_sum
(
_src
,
mean
,
OCL_OP_SUM
))
return
false
;
if
(
!
ocl_sum
(
_src
,
stddev
,
OCL_OP_SUM_SQR
))
return
false
;
double
total
=
1.0
/
_src
.
total
();
int
k
,
j
,
cn
=
_src
.
channels
();
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
{
mean
[
i
]
*=
total
;
stddev
[
i
]
=
std
::
sqrt
(
std
::
max
(
stddev
[
i
]
*
total
-
mean
[
i
]
*
mean
[
i
]
,
0.
));
}
for
(
j
=
0
;
j
<
2
;
j
++
)
{
const
double
*
const
sptr
=
j
==
0
?
&
mean
[
0
]
:
&
stddev
[
0
];
_OutputArray
_dst
=
j
==
0
?
_mean
:
_sdv
;
if
(
!
_dst
.
needed
()
)
continue
;
if
(
!
_dst
.
fixedSize
()
)
_dst
.
create
(
cn
,
1
,
CV_64F
,
-
1
,
true
);
Mat
dst
=
_dst
.
getMat
();
int
dcn
=
(
int
)
dst
.
total
();
CV_Assert
(
dst
.
type
()
==
CV_64F
&&
dst
.
isContinuous
()
&&
(
dst
.
cols
==
1
||
dst
.
rows
==
1
)
&&
dcn
>=
cn
);
double
*
dptr
=
dst
.
ptr
<
double
>
();
for
(
k
=
0
;
k
<
cn
;
k
++
)
dptr
[
k
]
=
sptr
[
k
];
for
(
;
k
<
dcn
;
k
++
)
dptr
[
k
]
=
0
;
}
return
true
;
}
}
void
cv
::
meanStdDev
(
InputArray
_src
,
OutputArray
_mean
,
OutputArray
_sdv
,
InputArray
_mask
)
{
if
(
ocl
::
useOpenCL
()
&&
_src
.
isUMat
()
&&
_mask
.
empty
()
&&
ocl_meanStdDev
(
_src
,
_mean
,
_sdv
))
return
;
Mat
src
=
_src
.
getMat
(),
mask
=
_mask
.
getMat
();
CV_Assert
(
mask
.
empty
()
||
mask
.
type
()
==
CV_8U
);
...
...
@@ -2602,9 +2758,8 @@ void cv::findNonZero( InputArray _src, OutputArray _idx )
double
cv
::
PSNR
(
InputArray
_src1
,
InputArray
_src2
)
{
Mat
src1
=
_src1
.
getMat
(),
src2
=
_src2
.
getMat
();
CV_Assert
(
src1
.
depth
()
==
CV_8U
);
double
diff
=
std
::
sqrt
(
norm
(
src1
,
src2
,
NORM_L2SQR
)
/
(
src1
.
total
()
*
src1
.
channels
()));
CV_Assert
(
_src1
.
depth
()
==
CV_8U
);
double
diff
=
std
::
sqrt
(
norm
(
_src1
,
_src2
,
NORM_L2SQR
)
/
(
_src1
.
total
()
*
_src1
.
channels
()));
return
20
*
log10
(
255.
/
(
diff
+
DBL_EPSILON
));
}
...
...
modules/core/test/ocl/test_arithm.cpp
View file @
001aa705
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment