Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b6b190df
Commit
b6b190df
authored
Oct 29, 2013
by
Jin Ma
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Rewrote moments of opencl version.
parent
957c85e9
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
617 additions
and
1100 deletions
+617
-1100
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+6
-1
perf_moments.cpp
modules/ocl/perf/perf_moments.cpp
+22
-21
moments.cpp
modules/ocl/src/moments.cpp
+306
-257
moments.cl
modules/ocl/src/opencl/moments.cl
+271
-805
test_moments.cpp
modules/ocl/test/test_moments.cpp
+12
-16
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
b6b190df
...
@@ -1518,7 +1518,12 @@ namespace cv
...
@@ -1518,7 +1518,12 @@ namespace cv
float
pos
,
oclMat
&
newFrame
,
oclMat
&
buf
);
float
pos
,
oclMat
&
newFrame
,
oclMat
&
buf
);
//! computes moments of the rasterized shape or a vector of points
//! computes moments of the rasterized shape or a vector of points
CV_EXPORTS
Moments
ocl_moments
(
InputArray
_array
,
bool
binaryImage
);
//! _array should be a vector a points standing for the contour
CV_EXPORTS
Moments
ocl_moments
(
InputArray
contour
);
//! src should be a general image uploaded to the GPU.
//! the supported oclMat type are CV_8UC1, CV_16UC1, CV_16SC1, CV_32FC1 and CV_64FC1
//! to use type of CV_64FC1, the GPU should support CV_64FC1
CV_EXPORTS
Moments
ocl_moments
(
oclMat
&
src
,
bool
binary
);
class
CV_EXPORTS
StereoBM_OCL
class
CV_EXPORTS
StereoBM_OCL
{
{
...
...
modules/ocl/perf/perf_moments.cpp
View file @
b6b190df
...
@@ -26,7 +26,7 @@
...
@@ -26,7 +26,7 @@
//
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// this list of conditions and the following disclaimer in the documentation
// and/or other
m
aterials provided with the distribution.
// and/or other
oclM
aterials provided with the distribution.
//
//
// * The name of the copyright holders may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// derived from this software without specific prior written permission.
...
@@ -49,41 +49,42 @@
...
@@ -49,41 +49,42 @@
using
namespace
perf
;
using
namespace
perf
;
using
std
::
tr1
::
tuple
;
using
std
::
tr1
::
tuple
;
using
std
::
tr1
::
get
;
using
std
::
tr1
::
get
;
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
cvtest
;
using
namespace
testing
;
using
namespace
std
;
///////////// Moments ////////////////////////
typedef
Size_MatType
MomentsFixture
;
///////////// Moments ////////////////////////
//*! performance of image
typedef
tuple
<
Size
,
MatType
,
bool
>
MomentsParamType
;
typedef
TestBaseWithParam
<
MomentsParamType
>
MomentsFixture
;
PERF_TEST_P
(
MomentsFixture
,
DISABLED_
Moments
,
PERF_TEST_P
(
MomentsFixture
,
Moments
,
::
testing
::
Combine
(
OCL_TYPICAL_MAT_SIZES
,
::
testing
::
Combine
(
OCL_TYPICAL_MAT_SIZES
,
OCL_PERF_ENUM
(
CV_8UC1
,
CV_16SC1
,
CV_32FC1
,
CV_64FC1
)))
// TODO does not work properly (see below
)
OCL_PERF_ENUM
(
CV_8UC1
,
CV_16SC1
,
CV_16UC1
,
CV_32FC1
,
CV_64FC1
),
::
testing
::
Values
(
false
,
true
))
)
{
{
const
Size_MatType_t
params
=
GetParam
();
const
MomentsParamType
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
const
Size
srcSize
=
get
<
0
>
(
params
);
const
int
type
=
get
<
1
>
(
params
);
const
int
type
=
get
<
1
>
(
params
);
const
bool
binaryImage
=
get
<
2
>
(
params
);
Mat
src
(
srcSize
,
type
),
dst
(
7
,
1
,
CV_64F
);
Mat
src
(
srcSize
,
type
),
dst
(
7
,
1
,
CV_64F
);
const
bool
binaryImage
=
false
;
randu
(
src
,
0
,
255
);
cv
::
Moments
mom
;
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
);
oclMat
src_d
(
src
);
cv
::
Moments
mom
;
if
(
RUN_OCL_IMPL
)
if
(
RUN_OCL_IMPL
)
{
{
ocl
::
oclMat
oclSrc
(
src
);
OCL_TEST_CYCLE
()
mom
=
cv
::
ocl
::
ocl_moments
(
src_d
,
binaryImage
);
OCL_TEST_CYCLE
()
mom
=
cv
::
ocl
::
ocl_moments
(
oclSrc
,
binaryImage
);
// TODO Use oclSrc
cv
::
HuMoments
(
mom
,
dst
);
SANITY_CHECK
(
dst
);
}
}
else
if
(
RUN_PLAIN_IMPL
)
else
if
(
RUN_PLAIN_IMPL
)
{
{
TEST_CYCLE
()
mom
=
cv
::
moments
(
src
,
binaryImage
);
TEST_CYCLE
()
mom
=
cv
::
moments
(
src
,
binaryImage
);
cv
::
HuMoments
(
mom
,
dst
);
SANITY_CHECK
(
dst
);
}
}
else
else
OCL_PERF_ELSE
OCL_PERF_ELSE
cv
::
HuMoments
(
mom
,
dst
);
SANITY_CHECK
(
dst
,
1e-3
);
}
}
modules/ocl/src/moments.cpp
View file @
b6b190df
...
@@ -26,7 +26,7 @@
...
@@ -26,7 +26,7 @@
//
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// this list of conditions and the following disclaimer in the documentation
// and/or other
m
aterials provided with the distribution.
// and/or other
oclM
aterials provided with the distribution.
//
//
// * The name of the copyright holders may not be used to endorse or promote products
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// derived from this software without specific prior written permission.
...
@@ -46,294 +46,342 @@
...
@@ -46,294 +46,342 @@
#include "precomp.hpp"
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels.hpp"
#if defined _MSC_VER
#define snprintf sprintf_s
#endif
namespace
cv
namespace
cv
{
{
namespace
ocl
namespace
ocl
{
// The function calculates center of gravity and the central second order moments
static
void
icvCompleteMomentState
(
CvMoments
*
moments
)
{
double
cx
=
0
,
cy
=
0
;
double
mu20
,
mu11
,
mu02
;
assert
(
moments
!=
0
);
moments
->
inv_sqrt_m00
=
0
;
if
(
fabs
(
moments
->
m00
)
>
DBL_EPSILON
)
{
double
inv_m00
=
1.
/
moments
->
m00
;
cx
=
moments
->
m10
*
inv_m00
;
cy
=
moments
->
m01
*
inv_m00
;
moments
->
inv_sqrt_m00
=
std
::
sqrt
(
fabs
(
inv_m00
)
);
}
// mu20 = m20 - m10*cx
mu20
=
moments
->
m20
-
moments
->
m10
*
cx
;
// mu11 = m11 - m10*cy
mu11
=
moments
->
m11
-
moments
->
m10
*
cy
;
// mu02 = m02 - m01*cy
mu02
=
moments
->
m02
-
moments
->
m01
*
cy
;
moments
->
mu20
=
mu20
;
moments
->
mu11
=
mu11
;
moments
->
mu02
=
mu02
;
// mu30 = m30 - cx*(3*mu20 + cx*m10)
moments
->
mu30
=
moments
->
m30
-
cx
*
(
3
*
mu20
+
cx
*
moments
->
m10
);
mu11
+=
mu11
;
// mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
moments
->
mu21
=
moments
->
m21
-
cx
*
(
mu11
+
cx
*
moments
->
m01
)
-
cy
*
mu20
;
// mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
moments
->
mu12
=
moments
->
m12
-
cy
*
(
mu11
+
cy
*
moments
->
m10
)
-
cx
*
mu02
;
// mu03 = m03 - cy*(3*mu02 + cy*m01)
moments
->
mu03
=
moments
->
m03
-
cy
*
(
3
*
mu02
+
cy
*
moments
->
m01
);
}
static
void
icvContourMoments
(
CvSeq
*
contour
,
CvMoments
*
mom
)
{
if
(
contour
->
total
)
{
{
CvSeqReader
reader
;
// The function calculates center of gravity and the central second order moments
int
lpt
=
contour
->
total
;
static
void
icvCompleteMomentState
(
CvMoments
*
moments
)
double
a00
,
a10
,
a01
,
a20
,
a11
,
a02
,
a30
,
a21
,
a12
,
a03
;
{
double
cx
=
0
,
cy
=
0
;
cvStartReadSeq
(
contour
,
&
reader
,
0
)
;
double
mu20
,
mu11
,
mu02
;
size_t
reader_size
=
lpt
<<
1
;
assert
(
moments
!=
0
)
;
cv
::
Mat
reader_mat
(
1
,
reader_size
,
CV_32FC1
)
;
moments
->
inv_sqrt_m00
=
0
;
bool
is_float
=
CV_SEQ_ELTYPE
(
contour
)
==
CV_32FC2
;
if
(
fabs
(
moments
->
m00
)
>
DBL_EPSILON
)
{
double
inv_m00
=
1.
/
moments
->
m00
;
cx
=
moments
->
m10
*
inv_m00
;
cy
=
moments
->
m01
*
inv_m00
;
moments
->
inv_sqrt_m00
=
std
::
sqrt
(
fabs
(
inv_m00
)
);
}
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
)
&&
is_float
)
// mu20 = m20 - m10*cx
{
mu20
=
moments
->
m20
-
moments
->
m10
*
cx
;
CV_Error
(
CV_StsUnsupportedFormat
,
"Moments - double is not supported by your GPU!"
);
// mu11 = m11 - m10*cy
mu11
=
moments
->
m11
-
moments
->
m10
*
cy
;
// mu02 = m02 - m01*cy
mu02
=
moments
->
m02
-
moments
->
m01
*
cy
;
moments
->
mu20
=
mu20
;
moments
->
mu11
=
mu11
;
moments
->
mu02
=
mu02
;
// mu30 = m30 - cx*(3*mu20 + cx*m10)
moments
->
mu30
=
moments
->
m30
-
cx
*
(
3
*
mu20
+
cx
*
moments
->
m10
);
mu11
+=
mu11
;
// mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
moments
->
mu21
=
moments
->
m21
-
cx
*
(
mu11
+
cx
*
moments
->
m01
)
-
cy
*
mu20
;
// mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
moments
->
mu12
=
moments
->
m12
-
cy
*
(
mu11
+
cy
*
moments
->
m10
)
-
cx
*
mu02
;
// mu03 = m03 - cy*(3*mu02 + cy*m01)
moments
->
mu03
=
moments
->
m03
-
cy
*
(
3
*
mu02
+
cy
*
moments
->
m01
);
}
}
if
(
is_float
)
static
void
icvContourMoments
(
CvSeq
*
contour
,
CvMoments
*
mom
)
{
{
for
(
size_t
i
=
0
;
i
<
reader_size
;
++
i
)
if
(
contour
->
total
)
{
{
reader_mat
.
at
<
float
>
(
0
,
i
++
)
=
((
CvPoint2D32f
*
)(
reader
.
ptr
))
->
x
;
CvSeqReader
reader
;
reader_mat
.
at
<
float
>
(
0
,
i
)
=
((
CvPoint2D32f
*
)(
reader
.
ptr
))
->
y
;
int
lpt
=
contour
->
total
;
CV_NEXT_SEQ_ELEM
(
contour
->
elem_size
,
reader
);
double
a00
,
a10
,
a01
,
a20
,
a11
,
a02
,
a30
,
a21
,
a12
,
a03
;
cvStartReadSeq
(
contour
,
&
reader
,
0
);
size_t
reader_size
=
lpt
<<
1
;
cv
::
Mat
reader_mat
(
1
,
reader_size
,
CV_32FC1
);
bool
is_float
=
CV_SEQ_ELTYPE
(
contour
)
==
CV_32FC2
;
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
)
&&
is_float
)
{
CV_Error
(
CV_StsUnsupportedFormat
,
"Moments - double is not supported by your GPU!"
);
}
if
(
is_float
)
{
for
(
size_t
i
=
0
;
i
<
reader_size
;
++
i
)
{
reader_mat
.
at
<
float
>
(
0
,
i
++
)
=
((
CvPoint2D32f
*
)(
reader
.
ptr
))
->
x
;
reader_mat
.
at
<
float
>
(
0
,
i
)
=
((
CvPoint2D32f
*
)(
reader
.
ptr
))
->
y
;
CV_NEXT_SEQ_ELEM
(
contour
->
elem_size
,
reader
);
}
}
else
{
for
(
size_t
i
=
0
;
i
<
reader_size
;
++
i
)
{
reader_mat
.
at
<
float
>
(
0
,
i
++
)
=
((
CvPoint
*
)(
reader
.
ptr
))
->
x
;
reader_mat
.
at
<
float
>
(
0
,
i
)
=
((
CvPoint
*
)(
reader
.
ptr
))
->
y
;
CV_NEXT_SEQ_ELEM
(
contour
->
elem_size
,
reader
);
}
}
cv
::
ocl
::
oclMat
dst_a
(
10
,
lpt
,
CV_64FC1
);
cv
::
ocl
::
oclMat
reader_oclmat
(
reader_mat
);
int
llength
=
std
::
min
(
lpt
,
128
);
size_t
localThreads
[
3
]
=
{
llength
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
lpt
,
1
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
contour
->
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
reader_oclmat
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a
.
data
));
cl_int
dst_step
=
(
cl_int
)
dst_a
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
char
builOption
[
128
];
snprintf
(
builOption
,
128
,
"-D CV_8UC1"
);
openCLExecuteKernel
(
dst_a
.
clCxt
,
&
moments
,
"icvContourMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
builOption
);
cv
::
Mat
dst
(
dst_a
);
a00
=
a10
=
a01
=
a20
=
a11
=
a02
=
a30
=
a21
=
a12
=
a03
=
0.0
;
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
for
(
int
i
=
0
;
i
<
contour
->
total
;
++
i
)
{
a00
+=
dst
.
at
<
cl_long
>
(
0
,
i
);
a10
+=
dst
.
at
<
cl_long
>
(
1
,
i
);
a01
+=
dst
.
at
<
cl_long
>
(
2
,
i
);
a20
+=
dst
.
at
<
cl_long
>
(
3
,
i
);
a11
+=
dst
.
at
<
cl_long
>
(
4
,
i
);
a02
+=
dst
.
at
<
cl_long
>
(
5
,
i
);
a30
+=
dst
.
at
<
cl_long
>
(
6
,
i
);
a21
+=
dst
.
at
<
cl_long
>
(
7
,
i
);
a12
+=
dst
.
at
<
cl_long
>
(
8
,
i
);
a03
+=
dst
.
at
<
cl_long
>
(
9
,
i
);
}
}
else
{
a00
=
cv
::
sum
(
dst
.
row
(
0
))[
0
];
a10
=
cv
::
sum
(
dst
.
row
(
1
))[
0
];
a01
=
cv
::
sum
(
dst
.
row
(
2
))[
0
];
a20
=
cv
::
sum
(
dst
.
row
(
3
))[
0
];
a11
=
cv
::
sum
(
dst
.
row
(
4
))[
0
];
a02
=
cv
::
sum
(
dst
.
row
(
5
))[
0
];
a30
=
cv
::
sum
(
dst
.
row
(
6
))[
0
];
a21
=
cv
::
sum
(
dst
.
row
(
7
))[
0
];
a12
=
cv
::
sum
(
dst
.
row
(
8
))[
0
];
a03
=
cv
::
sum
(
dst
.
row
(
9
))[
0
];
}
double
db1_2
,
db1_6
,
db1_12
,
db1_24
,
db1_20
,
db1_60
;
if
(
fabs
(
a00
)
>
FLT_EPSILON
)
{
if
(
a00
>
0
)
{
db1_2
=
0.5
;
db1_6
=
0.16666666666666666666666666666667
;
db1_12
=
0.083333333333333333333333333333333
;
db1_24
=
0.041666666666666666666666666666667
;
db1_20
=
0.05
;
db1_60
=
0.016666666666666666666666666666667
;
}
else
{
db1_2
=
-
0.5
;
db1_6
=
-
0.16666666666666666666666666666667
;
db1_12
=
-
0.083333333333333333333333333333333
;
db1_24
=
-
0.041666666666666666666666666666667
;
db1_20
=
-
0.05
;
db1_60
=
-
0.016666666666666666666666666666667
;
}
// spatial moments
mom
->
m00
=
a00
*
db1_2
;
mom
->
m10
=
a10
*
db1_6
;
mom
->
m01
=
a01
*
db1_6
;
mom
->
m20
=
a20
*
db1_12
;
mom
->
m11
=
a11
*
db1_24
;
mom
->
m02
=
a02
*
db1_12
;
mom
->
m30
=
a30
*
db1_20
;
mom
->
m21
=
a21
*
db1_60
;
mom
->
m12
=
a12
*
db1_60
;
mom
->
m03
=
a03
*
db1_20
;
icvCompleteMomentState
(
mom
);
}
}
}
}
}
else
Moments
ocl_moments
(
oclMat
&
src
,
bool
binary
)
//for image
{
{
for
(
size_t
i
=
0
;
i
<
reader_size
;
++
i
)
CV_Assert
(
src
.
oclchannels
()
==
1
);
if
(
src
.
type
()
==
CV_64FC1
&&
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
{
reader_mat
.
at
<
float
>
(
0
,
i
++
)
=
((
CvPoint
*
)(
reader
.
ptr
))
->
x
;
CV_Error
(
CV_StsUnsupportedFormat
,
"Moments - double is not supported by your GPU!"
);
reader_mat
.
at
<
float
>
(
0
,
i
)
=
((
CvPoint
*
)(
reader
.
ptr
))
->
y
;
CV_NEXT_SEQ_ELEM
(
contour
->
elem_size
,
reader
);
}
}
}
cv
::
ocl
::
oclMat
dst_a
(
10
,
lpt
,
CV_64FC1
);
if
(
binary
)
cv
::
ocl
::
oclMat
reader_oclmat
(
reader_mat
);
int
llength
=
std
::
min
(
lpt
,
128
);
size_t
localThreads
[
3
]
=
{
llength
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
lpt
,
1
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
contour
->
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
reader_oclmat
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a
.
data
));
cl_int
dst_step
=
(
cl_int
)
dst_a
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_step
));
openCLExecuteKernel
(
dst_a
.
clCxt
,
&
moments
,
"icvContourMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
cv
::
Mat
dst
(
dst_a
);
a00
=
a10
=
a01
=
a20
=
a11
=
a02
=
a30
=
a21
=
a12
=
a03
=
0.0
;
if
(
!
cv
::
ocl
::
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
for
(
int
i
=
0
;
i
<
contour
->
total
;
++
i
)
{
{
a00
+=
dst
.
at
<
cl_long
>
(
0
,
i
);
oclMat
mask
;
a10
+=
dst
.
at
<
cl_long
>
(
1
,
i
);
if
(
src
.
type
()
!=
CV_8UC1
)
a01
+=
dst
.
at
<
cl_long
>
(
2
,
i
);
{
a20
+=
dst
.
at
<
cl_long
>
(
3
,
i
);
src
.
convertTo
(
mask
,
CV_8UC1
);
a11
+=
dst
.
at
<
cl_long
>
(
4
,
i
);
}
a02
+=
dst
.
at
<
cl_long
>
(
5
,
i
);
oclMat
src8u
(
src
.
size
(),
CV_8UC1
);
a30
+=
dst
.
at
<
cl_long
>
(
6
,
i
);
src8u
.
setTo
(
Scalar
(
255
),
mask
);
a21
+=
dst
.
at
<
cl_long
>
(
7
,
i
);
src
=
src8u
;
a12
+=
dst
.
at
<
cl_long
>
(
8
,
i
);
a03
+=
dst
.
at
<
cl_long
>
(
9
,
i
);
}
}
}
const
int
TILE_SIZE
=
256
;
else
{
a00
=
cv
::
sum
(
dst
.
row
(
0
))[
0
];
a10
=
cv
::
sum
(
dst
.
row
(
1
))[
0
];
a01
=
cv
::
sum
(
dst
.
row
(
2
))[
0
];
a20
=
cv
::
sum
(
dst
.
row
(
3
))[
0
];
a11
=
cv
::
sum
(
dst
.
row
(
4
))[
0
];
a02
=
cv
::
sum
(
dst
.
row
(
5
))[
0
];
a30
=
cv
::
sum
(
dst
.
row
(
6
))[
0
];
a21
=
cv
::
sum
(
dst
.
row
(
7
))[
0
];
a12
=
cv
::
sum
(
dst
.
row
(
8
))[
0
];
a03
=
cv
::
sum
(
dst
.
row
(
9
))[
0
];
}
double
db1_2
,
db1_6
,
db1_12
,
db1_24
,
db1_20
,
db1_60
;
CvMoments
mom
;
if
(
fabs
(
a00
)
>
FLT_EPSILON
)
memset
(
&
mom
,
0
,
sizeof
(
mom
));
{
if
(
a00
>
0
)
cv
::
Size
size
=
src
.
size
();
int
blockx
,
blocky
;
blockx
=
(
size
.
width
+
TILE_SIZE
-
1
)
/
TILE_SIZE
;
blocky
=
(
size
.
height
+
TILE_SIZE
-
1
)
/
TILE_SIZE
;
oclMat
dst_m
;
int
tile_height
=
TILE_SIZE
;
size_t
localThreads
[
3
]
=
{
1
,
tile_height
,
1
};
size_t
globalThreads
[
3
]
=
{
blockx
,
size
.
height
,
1
};
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
{
dst_m
.
create
(
blocky
*
10
,
blockx
,
CV_64FC1
);
}
else
{
{
db1_2
=
0.5
;
dst_m
.
create
(
blocky
*
10
,
blockx
,
CV_32FC1
);
db1_6
=
0.16666666666666666666666666666667
;
db1_12
=
0.083333333333333333333333333333333
;
db1_24
=
0.041666666666666666666666666666667
;
db1_20
=
0.05
;
db1_60
=
0.016666666666666666666666666666667
;
}
}
int
src_step
=
(
int
)(
src
.
step
/
src
.
elemSize
());
int
dstm_step
=
(
int
)(
dst_m
.
step
/
dst_m
.
elemSize
());
vector
<
pair
<
size_t
,
const
void
*>
>
args
,
args_sum
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dstm_step
));
int
binary_
;
if
(
binary
)
binary_
=
1
;
else
else
binary_
=
0
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
binary_
));
char
builOption
[
128
];
if
(
binary
||
src
.
type
()
==
CV_8UC1
)
{
snprintf
(
builOption
,
128
,
"-D CV_8UC1"
);
}
else
if
(
src
.
type
()
==
CV_16UC1
)
{
{
db1_2
=
-
0.5
;
snprintf
(
builOption
,
128
,
"-D CV_16UC1"
);
db1_6
=
-
0.16666666666666666666666666666667
;
}
else
if
(
src
.
type
()
==
CV_16SC1
)
db1_12
=
-
0.083333333333333333333333333333333
;
{
db1_24
=
-
0.041666666666666666666666666666667
;
snprintf
(
builOption
,
128
,
"-D CV_16SC1"
);
db1_20
=
-
0.05
;
}
else
if
(
src
.
type
()
==
CV_32FC1
)
db1_60
=
-
0.016666666666666666666666666666667
;
{
snprintf
(
builOption
,
128
,
"-D CV_32FC1"
);
}
else
if
(
src
.
type
()
==
CV_64FC1
)
{
snprintf
(
builOption
,
128
,
"-D CV_64FC1"
);
}
else
{
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
}
openCLExecuteKernel
(
Context
::
getContext
(),
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
builOption
);
Mat
tmp
(
dst_m
);
tmp
.
convertTo
(
tmp
,
CV_64FC1
);
double
tmp_m
[
10
]
=
{
0
};
for
(
int
j
=
0
;
j
<
tmp
.
rows
;
j
+=
10
)
{
for
(
int
i
=
0
;
i
<
tmp
.
cols
;
i
++
)
{
tmp_m
[
0
]
+=
tmp
.
at
<
double
>
(
j
,
i
);
tmp_m
[
1
]
+=
tmp
.
at
<
double
>
(
j
+
1
,
i
);
tmp_m
[
2
]
+=
tmp
.
at
<
double
>
(
j
+
2
,
i
);
tmp_m
[
3
]
+=
tmp
.
at
<
double
>
(
j
+
3
,
i
);
tmp_m
[
4
]
+=
tmp
.
at
<
double
>
(
j
+
4
,
i
);
tmp_m
[
5
]
+=
tmp
.
at
<
double
>
(
j
+
5
,
i
);
tmp_m
[
6
]
+=
tmp
.
at
<
double
>
(
j
+
6
,
i
);
tmp_m
[
7
]
+=
tmp
.
at
<
double
>
(
j
+
7
,
i
);
tmp_m
[
8
]
+=
tmp
.
at
<
double
>
(
j
+
8
,
i
);
tmp_m
[
9
]
+=
tmp
.
at
<
double
>
(
j
+
9
,
i
);
}
}
}
// spatial moments
mom
.
m00
=
tmp_m
[
0
];
mom
->
m00
=
a00
*
db1_2
;
mom
.
m10
=
tmp_m
[
1
];
mom
->
m10
=
a10
*
db1_6
;
mom
.
m01
=
tmp_m
[
2
];
mom
->
m01
=
a01
*
db1_6
;
mom
.
m20
=
tmp_m
[
3
];
mom
->
m20
=
a20
*
db1_12
;
mom
.
m11
=
tmp_m
[
4
];
mom
->
m11
=
a11
*
db1_24
;
mom
.
m02
=
tmp_m
[
5
];
mom
->
m02
=
a02
*
db1_12
;
mom
.
m30
=
tmp_m
[
6
];
mom
->
m30
=
a30
*
db1_20
;
mom
.
m21
=
tmp_m
[
7
];
mom
->
m21
=
a21
*
db1_60
;
mom
.
m12
=
tmp_m
[
8
];
mom
->
m12
=
a12
*
db1_60
;
mom
.
m03
=
tmp_m
[
9
];
mom
->
m03
=
a03
*
db1_20
;
icvCompleteMomentState
(
&
mom
);
return
mom
;
icvCompleteMomentState
(
mom
);
}
}
}
}
static
void
ocl_cvMoments
(
const
void
*
array
,
CvMoments
*
mom
,
int
binary
)
Moments
ocl_moments
(
InputArray
_contour
)
//for contour
{
{
const
int
TILE_SIZE
=
256
;
CvMoments
mom
;
int
type
,
depth
,
cn
,
coi
=
0
;
memset
(
&
mom
,
0
,
sizeof
(
mom
));
CvMat
stub
,
*
mat
=
(
CvMat
*
)
array
;
CvContour
contourHeader
;
CvSeq
*
contour
=
0
;
CvSeqBlock
block
;
if
(
CV_IS_SEQ
(
array
))
{
contour
=
(
CvSeq
*
)
array
;
if
(
!
CV_IS_SEQ_POINT_SET
(
contour
))
CV_Error
(
CV_StsBadArg
,
"The passed sequence is not a valid contour"
);
}
if
(
!
mom
)
Mat
arr
=
_contour
.
getMat
();
CV_Error
(
CV_StsNullPtr
,
""
)
;
CvMat
c_array
=
arr
;
memset
(
mom
,
0
,
sizeof
(
*
mom
))
;
const
void
*
array
=
&
c_array
;
if
(
!
contour
)
CvSeq
*
contour
=
0
;
{
if
(
CV_IS_SEQ
(
array
))
{
contour
=
(
CvSeq
*
)(
array
);
if
(
!
CV_IS_SEQ_POINT_SET
(
contour
))
CV_Error
(
CV_StsBadArg
,
"The passed sequence is not a valid contour"
);
}
mat
=
cvGetMat
(
mat
,
&
stub
,
&
coi
);
int
type
,
coi
=
0
;
type
=
CV_MAT_TYPE
(
mat
->
type
);
if
(
type
==
CV_32SC2
||
type
==
CV_32FC2
)
CvMat
stub
,
*
mat
=
(
CvMat
*
)(
array
);
{
CvContour
contourHeader
;
contour
=
cvPointSeqFromMat
(
CvSeqBlock
block
;
CV_SEQ_KIND_CURVE
|
CV_SEQ_FLAG_CLOSED
,
mat
,
&
contourHeader
,
&
block
);
}
}
if
(
contour
)
{
icvContourMoments
(
contour
,
mom
);
return
;
}
type
=
CV_MAT_TYPE
(
mat
->
type
);
if
(
!
contour
)
depth
=
CV_MAT_DEPTH
(
type
);
{
cn
=
CV_MAT_CN
(
type
);
mat
=
cvGetMat
(
mat
,
&
stub
,
&
coi
);
type
=
CV_MAT_TYPE
(
mat
->
type
);
cv
::
Size
size
=
cvGetMatSize
(
mat
);
if
(
cn
>
1
&&
coi
==
0
)
if
(
type
==
CV_32SC2
||
type
==
CV_32FC2
)
CV_Error
(
CV_StsBadArg
,
"Invalid image type"
);
{
contour
=
cvPointSeqFromMat
(
if
(
size
.
width
<=
0
||
size
.
height
<=
0
)
CV_SEQ_KIND_CURVE
|
CV_SEQ_FLAG_CLOSED
,
return
;
mat
,
&
contourHeader
,
&
block
);
}
cv
::
Mat
src0
(
mat
);
}
cv
::
ocl
::
oclMat
src
(
src0
);
cv
::
Size
tileSize
;
int
blockx
,
blocky
;
if
(
size
.
width
%
TILE_SIZE
==
0
)
blockx
=
size
.
width
/
TILE_SIZE
;
else
blockx
=
size
.
width
/
TILE_SIZE
+
1
;
if
(
size
.
height
%
TILE_SIZE
==
0
)
blocky
=
size
.
height
/
TILE_SIZE
;
else
blocky
=
size
.
height
/
TILE_SIZE
+
1
;
oclMat
dst_m
(
blocky
*
10
,
blockx
,
CV_64FC1
);
oclMat
sum
(
1
,
10
,
CV_64FC1
);
int
tile_width
=
std
::
min
(
size
.
width
,
TILE_SIZE
);
int
tile_height
=
std
::
min
(
size
.
height
,
TILE_SIZE
);
size_t
localThreads
[
3
]
=
{
tile_height
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
size
.
height
,
blockx
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
,
args_sum
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
blocky
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
depth
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
coi
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
binary
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
openCLExecuteKernel
(
Context
::
getContext
(),
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
size_t
localThreadss
[
3
]
=
{
128
,
1
,
1
};
size_t
globalThreadss
[
3
]
=
{
128
,
1
,
1
};
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_height
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_width
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
sum
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
openCLExecuteKernel
(
Context
::
getContext
(),
&
moments
,
"dst_sum"
,
globalThreadss
,
localThreadss
,
args_sum
,
-
1
,
-
1
);
Mat
dstsum
(
sum
);
mom
->
m00
=
dstsum
.
at
<
double
>
(
0
,
0
);
mom
->
m10
=
dstsum
.
at
<
double
>
(
0
,
1
);
mom
->
m01
=
dstsum
.
at
<
double
>
(
0
,
2
);
mom
->
m20
=
dstsum
.
at
<
double
>
(
0
,
3
);
mom
->
m11
=
dstsum
.
at
<
double
>
(
0
,
4
);
mom
->
m02
=
dstsum
.
at
<
double
>
(
0
,
5
);
mom
->
m30
=
dstsum
.
at
<
double
>
(
0
,
6
);
mom
->
m21
=
dstsum
.
at
<
double
>
(
0
,
7
);
mom
->
m12
=
dstsum
.
at
<
double
>
(
0
,
8
);
mom
->
m03
=
dstsum
.
at
<
double
>
(
0
,
9
);
icvCompleteMomentState
(
mom
);
}
Moments
ocl_moments
(
InputArray
_array
,
bool
binaryImage
)
{
CvMoments
om
;
Mat
arr
=
_array
.
getMat
();
CvMat
c_array
=
arr
;
ocl_cvMoments
(
&
c_array
,
&
om
,
binaryImage
);
return
om
;
}
}
CV_Assert
(
contour
);
}
icvContourMoments
(
contour
,
&
mom
);
return
mom
;
}
}
}
\ No newline at end of file
modules/ocl/src/opencl/moments.cl
View file @
b6b190df
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
//
@Authors
//
@Authors
//
Jin
Ma,
jin@multicorewareinc.com
//
Sen
Liu,
swjtuls1987@126.com
//
Sen
Liu,
swjtuls1987@126.com
//
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
...
@@ -44,22 +45,14 @@
...
@@ -44,22 +45,14 @@
//M*/
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
#
ifdef
cl_khr_fp64
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
elif
defined
(
cl_amd_fp64
)
#
elif
defined
(
cl_amd_fp64
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
endif
#
endif
typedef
double
T
;
typedef
double
T
;
typedef
double
F
;
typedef
double4
F4
;
#
define
convert_F4
convert_double4
#
else
#
else
typedef
float
F
;
typedef
float4
F4
;
typedef
long
T
;
typedef
long
T
;
#
define
convert_F4
convert_float4
#
endif
#
endif
#
define
DST_ROW_00
0
#
define
DST_ROW_00
0
...
@@ -99,7 +92,6 @@ __kernel void icvContourMoments(int contour_total,
...
@@ -99,7 +92,6 @@ __kernel void icvContourMoments(int contour_total,
xi
=
(
T
)(
*
(
reader_oclmat_data
+
(
idx
+
1
)
*
2
))
;
xi
=
(
T
)(
*
(
reader_oclmat_data
+
(
idx
+
1
)
*
2
))
;
yi
=
(
T
)(
*
(
reader_oclmat_data
+
(
idx
+
1
)
*
2
+
1
))
;
yi
=
(
T
)(
*
(
reader_oclmat_data
+
(
idx
+
1
)
*
2
+
1
))
;
}
}
xi2
=
xi
*
xi
;
xi2
=
xi
*
xi
;
yi2
=
yi
*
yi
;
yi2
=
yi
*
yi
;
dxy
=
xi_1
*
yi
-
xi
*
yi_1
;
dxy
=
xi_1
*
yi
-
xi
*
yi_1
;
...
@@ -117,864 +109,338 @@ __kernel void icvContourMoments(int contour_total,
...
@@ -117,864 +109,338 @@ __kernel void icvContourMoments(int contour_total,
*
(
dst_a
+
DST_ROW_03
*
dst_step
+
idx
)
=
dxy
*
yii_1
*
(
yi_12
+
yi2
)
;
*
(
dst_a
+
DST_ROW_03
*
dst_step
+
idx
)
=
dxy
*
yii_1
*
(
yi_12
+
yi2
)
;
*
(
dst_a
+
DST_ROW_21
*
dst_step
+
idx
)
=
*
(
dst_a
+
DST_ROW_21
*
dst_step
+
idx
)
=
dxy
*
(
xi_12
*
(
3
*
yi_1
+
yi
)
+
2
*
xi
*
xi_1
*
yii_1
+
dxy
*
(
xi_12
*
(
3
*
yi_1
+
yi
)
+
2
*
xi
*
xi_1
*
yii_1
+
xi2
*
(
yi_1
+
3
*
yi
))
;
xi2
*
(
yi_1
+
3
*
yi
))
;
*
(
dst_a
+
DST_ROW_12
*
dst_step
+
idx
)
=
*
(
dst_a
+
DST_ROW_12
*
dst_step
+
idx
)
=
dxy
*
(
yi_12
*
(
3
*
xi_1
+
xi
)
+
2
*
yi
*
yi_1
*
xii_1
+
dxy
*
(
yi_12
*
(
3
*
xi_1
+
xi
)
+
2
*
yi
*
yi_1
*
xii_1
+
yi2
*
(
xi_1
+
3
*
xi
))
;
yi2
*
(
xi_1
+
3
*
xi
))
;
}
}
__kernel
void
dst_sum
(
int
src_rows,
int
src_cols,
int
tile_height,
int
tile_width,
int
TILE_SIZE,
#
if
defined
(
DOUBLE_SUPPORT
)
__global
F*
sum,
__global
F*
dst_m,
int
dst_step
)
#
define
WT
double
#
define
WT4
double4
#
define
convert_T4
convert_double4
#
define
convert_T
convert_double
#
else
#
define
WT
float
#
define
WT4
float4
#
define
convert_T4
convert_float4
#
define
convert_T
convert_float
#
endif
#
ifdef
CV_8UC1
#
define
TT
uchar
#
elif
defined
CV_16UC1
#
define
TT
ushort
#
elif
defined
CV_16SC1
#
define
TT
short
#
elif
defined
CV_32FC1
#
define
TT
float
#
elif
defined
CV_64FC1
#
ifdef
DOUBLE_SUPPORT
#
define
TT
double
#
else
#
define
TT
float
#
endif
#
endif
__kernel
void
CvMoments
(
__global
TT*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
WT*
dst_m,
int
dst_cols,
int
dst_step,
int
binary
)
{
{
int
gidy
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
int
gidx
=
get_global_id
(
1
)
;
int
ly
=
get_local_id
(
1
)
;
int
block_y
=
src_rows/tile_height
;
int
gidx
=
get_group_id
(
0
)
;
int
block_x
=
src_cols/tile_width
;
int
gidy
=
get_group_id
(
1
)
;
int
block_num
;
int
x_rest
=
src_cols
%
256
;
int
y_rest
=
src_rows
%
256
;
if
(
src_rows
>
TILE_SIZE
&&
src_rows
%
TILE_SIZE
!=
0
)
__local
int
codxy[256]
;
block_y
++
;
codxy[ly]
=
ly
;
if
(
src_cols
>
TILE_SIZE
&&
src_cols
%
TILE_SIZE
!=
0
)
block_x
++
;
block_num
=
block_y
*
block_x
;
__local
F
dst_sum[10][128]
;
if
(
gidy<128-block_num
)
for
(
int
i=0
; i<10; i++)
dst_sum[i][gidy+block_num]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
dst_step
/=
sizeof
(
F
)
;
WT4
x0
=
(
WT4
)(
0.f
)
;
if
(
gidy<block_num
)
WT4
x1
=
(
WT4
)(
0.f
)
;
{
WT4
x2
=
(
WT4
)(
0.f
)
;
dst_sum[0][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_00
*
block_y,
dst_step,
gidy
))
;
WT4
x3
=
(
WT4
)(
0.f
)
;
dst_sum[1][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_10
*
block_y,
dst_step,
gidy
))
;
dst_sum[2][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_01
*
block_y,
dst_step,
gidy
))
;
dst_sum[3][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_20
*
block_y,
dst_step,
gidy
))
;
dst_sum[4][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_11
*
block_y,
dst_step,
gidy
))
;
dst_sum[5][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_02
*
block_y,
dst_step,
gidy
))
;
dst_sum[6][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_30
*
block_y,
dst_step,
gidy
))
;
dst_sum[7][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_21
*
block_y,
dst_step,
gidy
))
;
dst_sum[8][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_12
*
block_y,
dst_step,
gidy
))
;
dst_sum[9][gidy]
=
*
(
dst_m
+
mad24
(
DST_ROW_03
*
block_y,
dst_step,
gidy
))
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize=64
; lsize>0; lsize>>=1)
{
if
(
gidy<lsize
)
{
int
lsize2
=
gidy
+
lsize
;
for
(
int
i=0
; i<10; i++)
dst_sum[i][gidy]
+=
dst_sum[i][lsize2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
gidy==0
)
for
(
int
i=0
; i<10; i++)
sum[i]
=
dst_sum[i][0]
;
}
__kernel
void
CvMoments_D0
(
__global
uchar16*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
TT*
row
=
src_data
+
gidy
*
src_step
+
ly
*
src_step
+
gidx
*
256
;
__global
F*
dst_m,
bool
switchFlag
=
false
;
int
dst_cols,
int
dst_step,
int
blocky,
int
depth,
int
cn,
int
coi,
int
binary,
int
TILE_SIZE
)
{
uchar
tmp_coi[16]
; // get the coi data
uchar16
tmp[16]
;
int
VLEN_C
=
16
; // vector length of uchar
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // vector length of uchar
int
x
=
wgidx*TILE_SIZE
; // vector length of uchar
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+x+i
)
=
0
;
if
(
coi
>
0
)
//channel
of
interest
WT4
p
;
for
(
int
i
=
0
; i < tileSize_width; i += VLEN_C)
WT4
x
;
{
WT4
xp
;
for
(
int
j=0
; j<VLEN_C; j++)
WT4
xxp
;
tmp_coi[j]
=
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_C]
=
(
uchar16
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
*
(
src_data+
(
y+lidy
)
*src_step/VLEN_C+
(
x+i
)
/VLEN_C
)
;
}
uchar16
zero
=
(
uchar16
)(
0
)
;
WT
py
=
0.f,
sy
=
0.f
;
uchar16
full
=
(
uchar16
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
(
tmp[i/VLEN_C]!=zero
)
?full:zero
;
F
mom[10]
;
if
(
dy
<
src_rows
)
__local
int
m[10][128]
;
if
(
lidy
<
128
)
{
{
for
(
int
i=0
; i<10; i++)
if
((
x_rest
>
0
)
&&
(
gidx
==
(
get_num_groups
(
0
)
-
1
)))
m[i][lidy]=0
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
lm[10]
=
{0}
;
int16
x0
=
(
int16
)(
0
)
;
int16
x1
=
(
int16
)(
0
)
;
int16
x2
=
(
int16
)(
0
)
;
int16
x3
=
(
int16
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=(VLEN_C) )
{
int16
v_xt
=
(
int16
)(
xt,
xt+1,
xt+2,
xt+3,
xt+4,
xt+5,
xt+6,
xt+7,
xt+8,
xt+9,
xt+10,
xt+11,
xt+12,
xt+13,
xt+14,
xt+15
)
;
int16
p
=
convert_int16
(
tmp[xt/VLEN_C]
)
;
int16
xp
=
v_xt
*
p,
xxp
=
xp
*v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
xxp
*
v_xt
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
+
x0.s4
+
x0.s5
+
x0.s6
+
x0.s7
+
x0.s8
+
x0.s9
+
x0.sa
+
x0.sb
+
x0.sc
+
x0.sd
+
x0.se
+
x0.sf
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
+
x1.s4
+
x1.s5
+
x1.s6
+
x1.s7
+
x1.s8
+
x1.s9
+
x1.sa
+
x1.sb
+
x1.sc
+
x1.sd
+
x1.se
+
x1.sf
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
+
x2.s4
+
x2.s5
+
x2.s6
+
x2.s7
+
x2.s8
+
x2.s9
+
x2.sa
+
x2.sb
+
x2.sc
+
x2.sd
+
x2.se
+
x2.sf
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
+
x3.s4
+
x3.s5
+
x3.s6
+
x3.s7
+
x3.s8
+
x3.s9
+
x3.sa
+
x3.sb
+
x3.sc
+
x3.sd
+
x3.se
+
x3.sf
;
int
py
=
lidy
*
((
int
)
x0.s0
)
;
int
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
int
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
int
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
int
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
int
)
py
)
*
sy
; // m03
lm[8]
=
((
int
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
int
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
bheight
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
F
)
lm[mt]
;
if
(
binary
)
{
{
F
s
=
1./255
;
int
i
;
for
(
int
mt
=
0
; mt < 10; mt++ )
for
(
i
=
0
; i < x_rest - 4; i += 4)
mom[mt]
*=
s
;
{
}
p
=
convert_T4
(
vload4
(
0
,
row
+
i
))
;
F
xm
=
x
*
mom[0],
ym
=
y
*
mom[0]
;
x
=
convert_T4
(
vload4
(
0
,
codxy
+
i
))
;
xp
=
x
*
p
;
//
accumulate
moments
computed
in
each
tile
xxp
=
xp
*
x
;
dst_step
/=
sizeof
(
F
)
;
x0
+=
p
;
//
+
m00
(
=
m00
'
)
x1
+=
xp
;
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
x2
+=
xxp
;
x3
+=
convert_T4
(
xxp
*
x
)
;
}
//
+
m10
(
=
m10
'
+
x*m00
'
)
x0.s0
=
x0.s0
+
x0.s1
+
x0.s2
+
x0.s3
;
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
x1.s0
=
x1.s0
+
x1.s1
+
x1.s2
+
x1.s3
;
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
x2.s0
=
x2.s0
+
x2.s1
+
x2.s2
+
x2.s3
;
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
x3.s0
=
x3.s0
+
x3.s1
+
x3.s2
+
x3.s3
;
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
WT
x0_
=
0
;
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
WT
x1_
=
0
;
WT
x2_
=
0
;
WT
x3_
=
0
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
for
(
; i < x_rest; i++)
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
{
WT
p_
=
0
;
p_
=
row[i]
;
WT
x_
=
convert_T
(
codxy[i]
)
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
WT
xp_
=
x_
*
p_
;
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
WT
xxp_
=
xp_
*
x_
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
x0_
+=
p_
;
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
x1_
+=
xp_
;
}
x2_
+=
xxp_
;
}
x3_
+=
xxp_
*
x_
;
}
__kernel
void
CvMoments_D2
(
__global
ushort8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
x0.s0
+=
x0_
;
__global
F*
dst_m,
x1.s0
+=
x1_
;
int
dst_cols,
int
dst_step,
int
blocky,
x2.s0
+=
x2_
;
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
x3.s0
+=
x3_
;
{
}else
ushort
tmp_coi[8]
; // get the coi data
{
ushort8
tmp[32]
;
for
(
int
i
=
0
; i < 256; i += 4)
int
VLEN_US
=
8
; // vector length of ushort
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
src_cols
>
TILE_SIZE
&&
tileSize_width
<
TILE_SIZE
)
for
(
int
i=tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
{
{
for
(
int
j=0
; j<VLEN_US; j++)
p
=
convert_T4
(
vload4
(
0
,
row
+
i
))
;
tmp_coi[j]
=
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*
(
int
)
src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
x
=
convert_T4
(
vload4
(
0
,
codxy
+
i
))
;
tmp[i/VLEN_US]
=
(
ushort8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
xp
=
x
*
p
;
xxp
=
xp
*
x
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
convert_T4
(
xxp
*
x
)
;
}
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_US
)
+
(
x+i
)
/VLEN_US
)
;
}
ushort8
zero
=
(
ushort8
)(
0
)
;
x0.s0
=
x0.s0
+
x0.s1
+
x0.s2
+
x0.s3
;
ushort8
full
=
(
ushort8
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
(
tmp[i/VLEN_US]!=zero
)
?full:zero
;
F
mom[10]
;
__local
long
m[10][128]
;
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
m[i][lidy]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
int8
x2
=
(
int8
)(
0
)
;
long8
x3
=
(
long8
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=(VLEN_US) )
{
int8
v_xt
=
(
int8
)(
xt,
xt+1,
xt+2,
xt+3,
xt+4,
xt+5,
xt+6,
xt+7
)
;
int8
p
=
convert_int8
(
tmp[xt/VLEN_US]
)
;
int8
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
convert_long8
(
xxp
)
*convert_long8
(
v_xt
)
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
+
x0.s4
+
x0.s5
+
x0.s6
+
x0.s7
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
+
x1.s4
+
x1.s5
+
x1.s6
+
x1.s7
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
+
x2.s4
+
x2.s5
+
x2.s6
+
x2.s7
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
+
x3.s4
+
x3.s5
+
x3.s6
+
x3.s7
;
int
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
long
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
long
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
long
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
long
)
py
)
*
sy
; // m03
lm[8]
=
((
long
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
long
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
x1.s0
=
x1.s0
+
x1.s1
+
x1.s2
+
x1.s3
;
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
==
0&&lidx
==
0
)
x2.s0
=
x2.s0
+
x2.s1
+
x2.s2
+
x2.s3
;
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
F
)
lm[mt]
;
if
(
binary
)
x3.s0
=
x3.s0
+
x3.s1
+
x3.s2
+
x3.s3
;
{
F
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
}
F
xm
=
x
*mom[0],
ym
=
y
*
mom[0]
;
py
=
ly
*
x0.s0
;
sy
=
ly
*
ly
;
//
accumulate
moments
computed
in
each
tile
dst_step
/=
sizeof
(
F
)
;
//
+
m00
(
=
m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
}
__local
WT
mom[10][256]
;
__kernel
void
CvMoments_D3
(
__global
short8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
if
((
y_rest
>
0
)
&&
(
gidy
==
(
get_num_groups
(
1
)
-
1
)))
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
short
tmp_coi[8]
; // get the coi data
short8
tmp[32]
;
int
VLEN_S
=8
; // vector length of short
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
y+lidy
<
src_rows
)
{
{
if
(
tileSize_width
<
TILE_SIZE
)
if
(
ly
<
y_rest
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
{
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
mom[9][ly]
=
py
*
sy
;
if
(
coi
>
0
)
mom[8][ly]
=
x1.s0
*
sy
;
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
mom[7][ly]
=
x2.s0
*
ly
;
mom[6][ly]
=
x3.s0
;
mom[5][ly]
=
x0.s0
*
sy
;
mom[4][ly]
=
x1.s0
*
ly
;
mom[3][ly]
=
x2.s0
;
mom[2][ly]
=
py
;
mom[1][ly]
=
x1.s0
;
mom[0][ly]
=
x0.s0
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
ly
<
10
)
{
for
(
int
i
=
1
; i < y_rest; i++)
{
{
for
(
int
j=0
; j<VLEN_S; j++)
mom[ly][0]
=
mom[ly][i]
+
mom[ly][0]
;
tmp_coi[j]
=
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_S]
=
(
short8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
}
else
}
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
}else
tmp[i/VLEN_S]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_S
)
+
(
x+i
)
/VLEN_S
)
;
{
}
mom[9][ly]
=
py
*
sy
;
mom[8][ly]
=
x1.s0
*
sy
;
mom[7][ly]
=
x2.s0
*
ly
;
mom[6][ly]
=
x3.s0
;
mom[5][ly]
=
x0.s0
*
sy
;
mom[4][ly]
=
x1.s0
*
ly
;
mom[3][ly]
=
x2.s0
;
mom[2][ly]
=
py
;
mom[1][ly]
=
x1.s0
;
mom[0][ly]
=
x0.s0
;
short8
zero
=
(
short8
)(
0
)
;
short8
full
=
(
short8
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=(VLEN_S))
tmp[i/VLEN_S]
=
(
tmp[i/VLEN_S]!=zero
)
?full:zero
;
F
mom[10]
;
__local
long
m[10][128]
;
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
m[i][lidy]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
int8
x2
=
(
int8
)(
0
)
;
long8
x3
=
(
long8
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+= (VLEN_S))
{
int8
v_xt
=
(
int8
)(
xt,
xt+1,
xt+2,
xt+3,
xt+4,
xt+5,
xt+6,
xt+7
)
;
int8
p
=
convert_int8
(
tmp[xt/VLEN_S]
)
;
int8
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
convert_long8
(
xxp
)
*
convert_long8
(
v_xt
)
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
+
x0.s4
+
x0.s5
+
x0.s6
+
x0.s7
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
+
x1.s4
+
x1.s5
+
x1.s6
+
x1.s7
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
+
x2.s4
+
x2.s5
+
x2.s6
+
x2.s7
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
+
x3.s4
+
x3.s5
+
x3.s6
+
x3.s7
;
int
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
long
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
long
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
long
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
long
)
py
)
*
sy
; // m03
lm[8]
=
((
long
)(
x1.s0
))
*
sy
; // m12
lm[7]
=
((
long
)(
x2.s0
))
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >=1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==0
&&lidx
==0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
F
)
lm[mt]
;
if
(
binary
)
if
(
ly
<
128
)
{
{
F
s
=
1./255
;
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
128]
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
128]
;
mom[mt]
*=
s
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
128]
;
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
128]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
128]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
128]
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
128]
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
128]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
128]
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
128]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
xm
=
x
*
mom[0],
ym
=
y*mom[0]
;
if
(
ly
<
64
)
{
//
accumulate
moments
computed
in
each
tile
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
64]
;
dst_step
/=
sizeof
(
F
)
;
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
64]
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
64]
;
//
+
m00
(
=
m00
'
)
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
64]
;
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
64]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
64]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
64]
;
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
64]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
64]
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
64]
;
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
if
(
ly
<
32
)
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
{
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
32]
;
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
32]
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
32]
;
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
32]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
32]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
32]
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
32]
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
32]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
32]
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
if
(
ly
<
16
)
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
{
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
16]
;
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
16]
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
16]
;
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
16]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
16]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
16]
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
16]
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
16]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
16]
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
if
(
ly
<
8
)
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
{
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
8]
;
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
8]
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
8]
;
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
8]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
8]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
8]
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
8]
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
8]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
8]
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
if
(
ly
<
4
)
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
{
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
4]
;
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
4]
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
4]
;
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
4]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
4]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
4]
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
4]
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
4]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
4]
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
4]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
if
(
ly
<
2
)
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
{
}
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
2]
;
}
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
2]
;
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
2]
;
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
2]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
2]
;
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
2]
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
2]
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
2]
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
2]
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
__kernel
void
CvMoments_D5
(
__global
float*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
if
(
ly
<
1
)
__global
F*
dst_m,
{
int
dst_cols,
int
dst_step,
int
blocky,
mom[0][ly]
=
mom[0][ly]
+
mom[0][ly
+
1]
;
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
mom[1][ly]
=
mom[1][ly]
+
mom[1][ly
+
1]
;
{
mom[2][ly]
=
mom[2][ly]
+
mom[2][ly
+
1]
;
float
tmp_coi[4]
; // get the coi data
mom[3][ly]
=
mom[3][ly]
+
mom[3][ly
+
1]
;
float4
tmp[64]
;
mom[4][ly]
=
mom[4][ly]
+
mom[4][ly
+
1]
;
int
VLEN_F
=
4
; // vector length of float
mom[5][ly]
=
mom[5][ly]
+
mom[5][ly
+
1]
;
int
gidy
=
get_global_id
(
0
)
;
mom[6][ly]
=
mom[6][ly]
+
mom[6][ly
+
1]
;
int
gidx
=
get_global_id
(
1
)
;
mom[7][ly]
=
mom[7][ly]
+
mom[7][ly
+
1]
;
int
wgidy
=
get_group_id
(
0
)
;
mom[8][ly]
=
mom[8][ly]
+
mom[8][ly
+
1]
;
int
wgidx
=
get_group_id
(
1
)
;
mom[9][ly]
=
mom[9][ly]
+
mom[9][ly
+
1]
;
int
lidy
=
get_local_id
(
0
)
;
}
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/4,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
int
maxIdx
=
mul24
(
src_rows,
src_cols
)
;
int
yOff
=
(
y+lidy
)
*src_step
;
int
index
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
float*
)
src_data+
(
y+lidy
)
*src_step/4+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
{
for
(
int
j=0
; j<4; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/4+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
float4
)(
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+3
))
;
}
}
float4
zero
=
(
float4
)(
0
)
;
float4
full
=
(
float4
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=4)
tmp[i/VLEN_F]
=
(
tmp[i/VLEN_F]!=zero
)
?full:zero
;
F
mom[10]
;
__local
F
m[10][128]
;
if
(
lidy
<
128
)
for
(
int
i
=
0
; i < 10; i ++)
m[i][lidy]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
lm[10]
=
{0}
;
F4
x0
=
(
F4
)(
0
)
;
F4
x1
=
(
F4
)(
0
)
;
F4
x2
=
(
F4
)(
0
)
;
F4
x3
=
(
F4
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=VLEN_F )
{
F4
v_xt
=
(
F4
)(
xt,
xt+1,
xt+2,
xt+3
)
;
F4
p
=
convert_F4
(
tmp[xt/VLEN_F]
)
;
F4
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
xxp
*
v_xt
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
;
F
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
F
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
F
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
F
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
if
(
binary
)
{
lm[9]
=
((
F
)
py
)
*
sy
; // m03
lm[8]
=
((
F
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
F
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
{
if
(
lidy
<
j
)
WT
s
=
1./255
;
for
(
int
i
=
0
; i < 10; i++ )
if
(
ly
<
10
)
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
F
)
lm[mt]
;
if
(
binary
)
{
{
F
s
=
1./255
;
mom[ly][0]
*=
s
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
xm
=
x
*
mom[0],
ym
=
y
*
mom[0]
;
//
accumulate
moments
computed
in
each
tile
dst_step
/=
sizeof
(
F
)
;
//
+
m00
(
=
m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
}
WT
xm
=
(
gidx
*
256
)
*
mom[0][0]
;
WT
ym
=
(
gidy
*
256
)
*
mom[0][0]
;
__kernel
void
CvMoments_D6
(
__global
F*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
if
(
ly
==
0
)
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
F
tmp_coi[4]
; // get the coi data
F4
tmp[64]
;
int
VLEN_D
=
4
; // length of vetor
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/8,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
y+lidy
<
src_rows
)
{
{
if
(
tileSize_width
<
TILE_SIZE
)
mom[0][1]
=
mom[0][0]
;
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
mom[1][1]
=
mom[1][0]
+
xm
;
*
((
__global
F*
)
src_data+
(
y+lidy
)
*src_step/8+x+i
)
=
0
;
mom[2][1]
=
mom[2][0]
+
ym
;
if
(
coi
>
0
)
mom[3][1]
=
mom[3][0]
+
gidx
*
256
*
(
mom[1][0]
*
2
+
xm
)
;
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
mom[4][1]
=
mom[4][0]
+
gidx
*
256
*
(
mom[2][0]
+
ym
)
+
gidy
*
256
*
mom[1][0]
;
{
mom[5][1]
=
mom[5][0]
+
gidy
*
256
*
(
mom[2][0]
*
2
+
ym
)
;
for
(
int
j=0
; j<4 && ((x+i+j)*kcn+coi-1)<src_cols; j++)
mom[6][1]
=
mom[6][0]
+
gidx
*
256
*
(
3
*
mom[3][0]
+
256
*
gidx
*
(
3
*
mom[1][0]
+
xm
))
;
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/8+
(
x+i+j
)
*kcn+coi-1
)
;
mom[7][1]
=
mom[7][0]
+
gidx
*
256
*
(
2
*
(
mom[4][0]
+
256
*
gidy
*
mom[1][0]
)
+
256
*
gidx
*
(
mom[2][0]
+
ym
))
+
256
*
gidy
*
mom[3][0]
;
tmp[i/VLEN_D]
=
(
F4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
mom[8][1]
=
mom[8][0]
+
gidy
*
256
*
(
2
*
(
mom[4][0]
+
256
*
gidx
*
mom[2][0]
)
+
256
*
gidy
*
(
mom[1][0]
+
xm
))
+
256
*
gidx
*
mom[5][0]
;
}
mom[9][1]
=
mom[9][0]
+
gidy
*
256
*
(
3
*
mom[5][0]
+
256
*
gidy
*
(
3
*
mom[2][0]
+
ym
))
;
else
for
(
int
i=0
; i < tileSize_width && (x+i+3) < src_cols; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
F4
)(
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+3
))
;
}
}
F4
zero
=
(
F4
)(
0
)
;
F4
full
=
(
F4
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
tmp[i/VLEN_D]!=zero
)
?full:zero
;
F
mom[10]
;
__local
F
m[10][128]
;
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
m[i][lidy]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
lm[10]
=
{0}
;
F4
x0
=
(
F4
)(
0
)
;
F4
x1
=
(
F4
)(
0
)
;
F4
x2
=
(
F4
)(
0
)
;
F4
x3
=
(
F4
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=VLEN_D )
{
F4
v_xt
=
(
F4
)(
xt,
xt+1,
xt+2,
xt+3
)
;
F4
p
=
tmp[xt/VLEN_D]
;
F4
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
xxp
*v_xt
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
;
F
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
F
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
F
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
F
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
F
)
py
)
*
sy
; // m03
lm[8]
=
((
F
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
F
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
if
(
ly
<
10
)
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
{
for
(
int
mt
=
0
; mt < 10; mt++ )
dst_m[10
*
gidy
*
dst_step
+
ly
*
dst_step
+
gidx]
=
mom[ly][1]
;
mom[mt]
=
(
F
)
lm[mt]
;
if
(
binary
)
{
F
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
F
xm
=
x
*
mom[0],
ym
=
y
*
mom[0]
;
//
accumulate
moments
computed
in
each
tile
dst_step
/=
sizeof
(
F
)
;
//
+
m00
(
=
m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
}
}
modules/ocl/test/test_moments.cpp
View file @
b6b190df
...
@@ -10,18 +10,19 @@ using namespace cvtest;
...
@@ -10,18 +10,19 @@ using namespace cvtest;
using
namespace
testing
;
using
namespace
testing
;
using
namespace
std
;
using
namespace
std
;
PARAM_TEST_CASE
(
MomentsTest
,
MatType
,
bool
)
PARAM_TEST_CASE
(
MomentsTest
,
MatType
,
bool
,
bool
)
{
{
int
type
;
int
type
;
cv
::
Mat
mat
1
;
cv
::
Mat
mat
;
bool
test_contours
;
bool
test_contours
;
bool
binaryImage
;
virtual
void
SetUp
()
virtual
void
SetUp
()
{
{
type
=
GET_PARAM
(
0
);
type
=
GET_PARAM
(
0
);
test_contours
=
GET_PARAM
(
1
);
test_contours
=
GET_PARAM
(
1
);
cv
::
Size
size
(
10
*
MWIDTH
,
10
*
MHEIGHT
);
cv
::
Size
size
(
10
*
MWIDTH
,
10
*
MHEIGHT
);
mat1
=
randomMat
(
size
,
type
,
5
,
16
,
false
);
mat
=
randomMat
(
size
,
type
,
0
,
256
,
false
);
binaryImage
=
GET_PARAM
(
2
);
}
}
void
Compare
(
Moments
&
cpu
,
Moments
&
gpu
)
void
Compare
(
Moments
&
cpu
,
Moments
&
gpu
)
...
@@ -29,16 +30,13 @@ PARAM_TEST_CASE(MomentsTest, MatType, bool)
...
@@ -29,16 +30,13 @@ PARAM_TEST_CASE(MomentsTest, MatType, bool)
Mat
gpu_dst
,
cpu_dst
;
Mat
gpu_dst
,
cpu_dst
;
HuMoments
(
cpu
,
cpu_dst
);
HuMoments
(
cpu
,
cpu_dst
);
HuMoments
(
gpu
,
gpu_dst
);
HuMoments
(
gpu
,
gpu_dst
);
EXPECT_MAT_NEAR
(
gpu_dst
,
cpu_dst
,
.5
);
EXPECT_MAT_NEAR
(
gpu_dst
,
cpu_dst
,
1e-3
);
}
}
};
};
OCL_TEST_P
(
MomentsTest
,
Mat
)
OCL_TEST_P
(
MomentsTest
,
Mat
)
{
{
bool
binaryImage
=
0
;
oclMat
src_d
(
mat
);
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
{
if
(
test_contours
)
if
(
test_contours
)
...
@@ -53,18 +51,16 @@ OCL_TEST_P(MomentsTest, Mat)
...
@@ -53,18 +51,16 @@ OCL_TEST_P(MomentsTest, Mat)
for
(
size_t
i
=
0
;
i
<
contours
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
contours
.
size
();
i
++
)
{
{
Moments
m
=
moments
(
contours
[
i
],
false
);
Moments
m
=
moments
(
contours
[
i
],
false
);
Moments
dm
=
ocl
::
ocl_moments
(
contours
[
i
]
,
false
);
Moments
dm
=
ocl
::
ocl_moments
(
contours
[
i
]);
Compare
(
m
,
dm
);
Compare
(
m
,
dm
);
}
}
}
}
cv
::
_InputArray
_array
(
mat1
);
cv
::
Moments
CvMom
=
cv
::
moments
(
mat
,
binaryImage
);
cv
::
Moments
CvMom
=
cv
::
moments
(
_array
,
binaryImage
);
cv
::
Moments
oclMom
=
cv
::
ocl
::
ocl_moments
(
src_d
,
binaryImage
);
cv
::
Moments
oclMom
=
cv
::
ocl
::
ocl_moments
(
_array
,
binaryImage
);
Compare
(
CvMom
,
oclMom
);
Compare
(
CvMom
,
oclMom
);
}
}
}
}
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MomentsTest
,
Combine
(
INSTANTIATE_TEST_CASE_P
(
OCL_ImgProc
,
MomentsTest
,
Combine
(
Values
(
CV_8UC1
,
CV_16UC1
,
CV_16SC1
,
CV_64FC1
),
Values
(
true
,
fals
e
)));
Values
(
CV_8UC1
,
CV_16UC1
,
CV_16SC1
,
CV_32FC1
,
CV_64FC1
),
Values
(
false
,
true
),
Values
(
false
,
tru
e
)));
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment