Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
af13c860
Commit
af13c860
authored
Feb 27, 2013
by
Andrey Kamaev
Committed by
OpenCV Buildbot
Feb 27, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #568 from bitwangyaoyao:2.4_moments
parents
5d43ce85
dba2ec05
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1392 additions
and
0 deletions
+1392
-0
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+2
-0
moments.cl
modules/ocl/src/kernels/moments.cl
+938
-0
moments.cpp
modules/ocl/src/moments.cpp
+380
-0
test_moments.cpp
modules/ocl/test/test_moments.cpp
+72
-0
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
af13c860
...
@@ -1773,6 +1773,8 @@ namespace cv
...
@@ -1773,6 +1773,8 @@ namespace cv
const
oclMat
&
bu
,
const
oclMat
&
bv
,
const
oclMat
&
bu
,
const
oclMat
&
bv
,
float
pos
,
oclMat
&
newFrame
,
oclMat
&
buf
);
float
pos
,
oclMat
&
newFrame
,
oclMat
&
buf
);
//! computes moments of the rasterized shape or a vector of points
CV_EXPORTS
Moments
ocl_moments
(
InputArray
_array
,
bool
binaryImage
);
}
}
}
}
#if defined _MSC_VER && _MSC_VER >= 1200
#if defined _MSC_VER && _MSC_VER >= 1200
...
...
modules/ocl/src/kernels/moments.cl
0 → 100644
View file @
af13c860
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
else
typedef
float
double
;
typedef
float4
double4
;
#
define
convert_double4
convert_float4
#
endif
//#pragma
OPENCL
EXTENSION
cl_amd_printf:enable
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
icvContourMoments
(
int
contour_total,
__global
float*
reader_oclmat_data,
__global
double*
dst_a00,
__global
double*
dst_a10,
__global
double*
dst_a01,
__global
double*
dst_a20,
__global
double*
dst_a11,
__global
double*
dst_a02,
__global
double*
dst_a30,
__global
double*
dst_a21,
__global
double*
dst_a12,
__global
double*
dst_a03
)
{
double
xi_1,
yi_1,
xi_12,
yi_12,
xi,
yi,
xi2,
yi2,
dxy,
xii_1,
yii_1
;
int
idx
=
get_global_id
(
0
)
;
xi_1
=
*
(
reader_oclmat_data
+
(
get_global_id
(
0
)
<<
1
))
;
yi_1
=
*
(
reader_oclmat_data
+
(
get_global_id
(
0
)
<<
1
)
+
1
)
;
xi_12
=
xi_1
*
xi_1
;
yi_12
=
yi_1
*
yi_1
;
if
(
idx
==
contour_total
-
1
)
{
xi
=
*
(
reader_oclmat_data
)
;
yi
=
*
(
reader_oclmat_data
+
1
)
;
}
else
{
xi
=
*
(
reader_oclmat_data
+
(
idx
+
1
)
*
2
)
;
yi
=
*
(
reader_oclmat_data
+
(
idx
+
1
)
*
2
+
1
)
;
}
xi2
=
xi
*
xi
;
yi2
=
yi
*
yi
;
dxy
=
xi_1
*
yi
-
xi
*
yi_1
;
xii_1
=
xi_1
+
xi
;
yii_1
=
yi_1
+
yi
;
dst_a00[idx]
=
dxy
;
dst_a10[idx]
=
dxy
*
xii_1
;
dst_a01[idx]
=
dxy
*
yii_1
;
dst_a20[idx]
=
dxy
*
(
xi_1
*
xii_1
+
xi2
)
;
dst_a11[idx]
=
dxy
*
(
xi_1
*
(
yii_1
+
yi_1
)
+
xi
*
(
yii_1
+
yi
))
;
dst_a02[idx]
=
dxy
*
(
yi_1
*
yii_1
+
yi2
)
;
dst_a30[idx]
=
dxy
*
xii_1
*
(
xi_12
+
xi2
)
;
dst_a03[idx]
=
dxy
*
yii_1
*
(
yi_12
+
yi2
)
;
dst_a21[idx]
=
dxy
*
(
xi_12
*
(
3
*
yi_1
+
yi
)
+
2
*
xi
*
xi_1
*
yii_1
+
xi2
*
(
yi_1
+
3
*
yi
))
;
dst_a12[idx]
=
dxy
*
(
yi_12
*
(
3
*
xi_1
+
xi
)
+
2
*
yi
*
yi_1
*
xii_1
+
yi2
*
(
xi_1
+
3
*
xi
))
;
}
//#endif
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
CvMoments_D0
(
__global
uchar16*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__global
double*
dst_m00,
__global
double*
dst_m10,
__global
double*
dst_m01,
__global
double*
dst_m20,
__global
double*
dst_m11,
__global
double*
dst_m02,
__global
double*
dst_m30,
__global
double*
dst_m21,
__global
double*
dst_m12,
__global
double*
dst_m03,
int
dst_cols,
int
dst_step,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
int
TILE_SIZE
)
{
uchar
tmp_coi[16]
; // get the coi data
uchar16
tmp[16]
;
int
VLEN_C
=
16
; // vector length of uchar
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // vector length of uchar
int
x
=
wgidx*TILE_SIZE
; // vector length of uchar
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+x+i
)
=
0
;
if
(
coi
>
0
)
//channel
of
interest
for
(
int
i
=
0
; i < tileSize_width; i += VLEN_C)
{
for
(
int
j=0
; j<VLEN_C; j++)
tmp_coi[j]
=
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_C]
=
(
uchar16
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
*
(
src_data+
(
y+lidy
)
*src_step/VLEN_C+
(
x+i
)
/VLEN_C
)
;
uchar16
zero
=
(
uchar16
)(
0
)
;
uchar16
full
=
(
uchar16
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
(
tmp[i/VLEN_C]!=zero
)
?full:zero
;
double
mom[10]
;
__local
int
m[10][128]
;
if
(
lidy
==
0
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
lm[10]
=
{0}
;
int16
x0
=
(
int16
)(
0
)
;
int16
x1
=
(
int16
)(
0
)
;
int16
x2
=
(
int16
)(
0
)
;
int16
x3
=
(
int16
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=(VLEN_C) )
{
int16
v_xt
=
(
int16
)(
xt,
xt+1,
xt+2,
xt+3,
xt+4,
xt+5,
xt+6,
xt+7,
xt+8,
xt+9,
xt+10,
xt+11,
xt+12,
xt+13,
xt+14,
xt+15
)
;
int16
p
=
convert_int16
(
tmp[xt/VLEN_C]
)
;
int16
xp
=
v_xt
*
p,
xxp
=
xp
*v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
xxp
*
v_xt
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
+
x0.s4
+
x0.s5
+
x0.s6
+
x0.s7
+
x0.s8
+
x0.s9
+
x0.sa
+
x0.sb
+
x0.sc
+
x0.sd
+
x0.se
+
x0.sf
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
+
x1.s4
+
x1.s5
+
x1.s6
+
x1.s7
+
x1.s8
+
x1.s9
+
x1.sa
+
x1.sb
+
x1.sc
+
x1.sd
+
x1.se
+
x1.sf
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
+
x2.s4
+
x2.s5
+
x2.s6
+
x2.s7
+
x2.s8
+
x2.s9
+
x2.sa
+
x2.sb
+
x2.sc
+
x2.sd
+
x2.se
+
x2.sf
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
+
x3.s4
+
x3.s5
+
x3.s6
+
x3.s7
+
x3.s8
+
x3.s9
+
x3.sa
+
x3.sb
+
x3.sc
+
x3.sd
+
x3.se
+
x3.sf
;
int
py
=
lidy
*
((
int
)
x0.s0
)
;
int
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
int
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
int
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
int
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
int
)
py
)
*
sy
; // m03
lm[8]
=
((
int
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
int
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
bheight
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
double
)
lm[mt]
;
if
(
binary
)
{
double
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
double
xm
=
x
*
mom[0],
ym
=
y
*
mom[0]
;
//
accumulate
moments
computed
in
each
tile
//
+
m00
(
=
m00
'
)
dst_m00[wgidy*dst_cols+wgidx]
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_m10[wgidy*dst_cols+wgidx]
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_m01[wgidy*dst_cols+wgidx]
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_m20[wgidy*dst_cols+wgidx]
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_m11[wgidy*dst_cols+wgidx]
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_m02[wgidy*dst_cols+wgidx]
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_m30[wgidy*dst_cols+wgidx]
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_m21[wgidy*dst_cols+wgidx]
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_m12[wgidy*dst_cols+wgidx]
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_m03[wgidy*dst_cols+wgidx]
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
//#endif
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
dst_sum
(
int
src_rows,
int
src_cols,
int
tile_height,
int
tile_width,
int
TILE_SIZE,
__global
double*
sum,
__global
double*
dst_m00,
__global
double*
dst_m10,
__global
double*
dst_m01,
__global
double*
dst_m20,
__global
double*
dst_m11,
__global
double*
dst_m02,
__global
double*
dst_m30,
__global
double*
dst_m21,
__global
double*
dst_m12,
__global
double*
dst_m03
)
{
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
block_y
=
src_rows/tile_height
;
int
block_x
=
src_cols/tile_width
;
int
block_num
;
if
(
src_rows
>
TILE_SIZE
&&
src_rows
%
TILE_SIZE
!=
0
)
block_y
++
;
if
(
src_cols
>
TILE_SIZE
&&
src_cols
%
TILE_SIZE
!=
0
)
block_x
++
;
block_num
=
block_y
*
block_x
;
__local
double
dst_sum[10][128]
;
if
(
gidy<128-block_num
)
for
(
int
i=0
; i<10; i++)
dst_sum[i][gidy+block_num]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
gidy<block_num
)
{
dst_sum[0][gidy]
=
dst_m00[gidy]
;
dst_sum[1][gidy]
=
dst_m10[gidy]
;
dst_sum[2][gidy]
=
dst_m01[gidy]
;
dst_sum[3][gidy]
=
dst_m20[gidy]
;
dst_sum[4][gidy]
=
dst_m11[gidy]
;
dst_sum[5][gidy]
=
dst_m02[gidy]
;
dst_sum[6][gidy]
=
dst_m30[gidy]
;
dst_sum[7][gidy]
=
dst_m21[gidy]
;
dst_sum[8][gidy]
=
dst_m12[gidy]
;
dst_sum[9][gidy]
=
dst_m03[gidy]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize=64
; lsize>0; lsize>>=1)
{
if
(
gidy<lsize
)
{
int
lsize2
=
gidy
+
lsize
;
for
(
int
i=0
; i<10; i++)
dst_sum[i][gidy]
+=
dst_sum[i][lsize2]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
gidy==0
)
for
(
int
i=0
; i<10; i++)
sum[i]
=
dst_sum[i][0]
;
}
//#endif
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
CvMoments_D2
(
__global
ushort8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__global
double*
dst_m00,
__global
double*
dst_m10,
__global
double*
dst_m01,
__global
double*
dst_m20,
__global
double*
dst_m11,
__global
double*
dst_m02,
__global
double*
dst_m30,
__global
double*
dst_m21,
__global
double*
dst_m12,
__global
double*
dst_m03,
int
dst_cols,
int
dst_step,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
ushort
tmp_coi[8]
; // get the coi data
ushort8
tmp[32]
;
int
VLEN_US
=
8
; // vector length of ushort
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
src_cols
>
TILE_SIZE
&&
tileSize_width
<
TILE_SIZE
)
for
(
int
i=tileSize_width
; i < rstep; i++ )
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
{
for
(
int
j=0
; j<VLEN_US; j++)
tmp_coi[j]
=
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*
(
int
)
src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_US]
=
(
ushort8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_US
)
+
(
x+i
)
/VLEN_US
)
;
ushort8
zero
=
(
ushort8
)(
0
)
;
ushort8
full
=
(
ushort8
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
(
tmp[i/VLEN_US]!=zero
)
?full:zero
;
double
mom[10]
;
__local
long
m[10][128]
;
if
(
lidy
==
0
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
int8
x2
=
(
int8
)(
0
)
;
long8
x3
=
(
long8
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=(VLEN_US) )
{
int8
v_xt
=
(
int8
)(
xt,
xt+1,
xt+2,
xt+3,
xt+4,
xt+5,
xt+6,
xt+7
)
;
int8
p
=
convert_int8
(
tmp[xt/VLEN_US]
)
;
int8
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
convert_long8
(
xxp
)
*convert_long8
(
v_xt
)
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
+
x0.s4
+
x0.s5
+
x0.s6
+
x0.s7
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
+
x1.s4
+
x1.s5
+
x1.s6
+
x1.s7
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
+
x2.s4
+
x2.s5
+
x2.s6
+
x2.s7
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
+
x3.s4
+
x3.s5
+
x3.s6
+
x3.s7
;
int
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
long
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
long
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
long
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
long
)
py
)
*
sy
; // m03
lm[8]
=
((
long
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
long
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
double
)
lm[mt]
;
if
(
binary
)
{
double
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
double
xm
=
x
*mom[0],
ym
=
y
*
mom[0]
;
//
accumulate
moments
computed
in
each
tile
//
+
m00
(
=
m00
'
)
dst_m00[wgidy*dst_cols+wgidx]
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_m10[wgidy*dst_cols+wgidx]
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_m01[wgidy*dst_cols+wgidx]
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_m20[wgidy*dst_cols+wgidx]
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_m11[wgidy*dst_cols+wgidx]
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_m02[wgidy*dst_cols+wgidx]
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_m30[wgidy*dst_cols+wgidx]
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_m21[wgidy*dst_cols+wgidx]
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_m12[wgidy*dst_cols+wgidx]
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_m03[wgidy*dst_cols+wgidx]
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
//#endif
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
CvMoments_D3
(
__global
short8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__global
double*
dst_m00,
__global
double*
dst_m10,
__global
double*
dst_m01,
__global
double*
dst_m20,
__global
double*
dst_m11,
__global
double*
dst_m02,
__global
double*
dst_m30,
__global
double*
dst_m21,
__global
double*
dst_m12,
__global
double*
dst_m03,
int
dst_cols,
int
dst_step,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
short
tmp_coi[8]
; // get the coi data
short8
tmp[32]
;
int
VLEN_S
=8
; // vector length of short
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
{
for
(
int
j=0
; j<VLEN_S; j++)
tmp_coi[j]
=
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_S]
=
(
short8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
tmp[i/VLEN_S]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_S
)
+
(
x+i
)
/VLEN_S
)
;
short8
zero
=
(
short8
)(
0
)
;
short8
full
=
(
short8
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=(VLEN_S))
tmp[i/VLEN_S]
=
(
tmp[i/VLEN_S]!=zero
)
?full:zero
;
double
mom[10]
;
__local
long
m[10][128]
;
if
(
lidy
==
0
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
int8
x2
=
(
int8
)(
0
)
;
long8
x3
=
(
long8
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+= (VLEN_S))
{
int8
v_xt
=
(
int8
)(
xt,
xt+1,
xt+2,
xt+3,
xt+4,
xt+5,
xt+6,
xt+7
)
;
int8
p
=
convert_int8
(
tmp[xt/VLEN_S]
)
;
int8
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
convert_long8
(
xxp
)
*
convert_long8
(
v_xt
)
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
+
x0.s4
+
x0.s5
+
x0.s6
+
x0.s7
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
+
x1.s4
+
x1.s5
+
x1.s6
+
x1.s7
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
+
x2.s4
+
x2.s5
+
x2.s6
+
x2.s7
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
+
x3.s4
+
x3.s5
+
x3.s6
+
x3.s7
;
int
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
long
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
long
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
long
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
long
)
py
)
*
sy
; // m03
lm[8]
=
((
long
)(
x1.s0
))
*
sy
; // m12
lm[7]
=
((
long
)(
x2.s0
))
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >=1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==0
&&lidx
==0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
double
)
lm[mt]
;
if
(
binary
)
{
double
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
double
xm
=
x
*
mom[0],
ym
=
y*mom[0]
;
//
accumulate
moments
computed
in
each
tile
//
+
m00
(
=
m00
'
)
dst_m00[wgidy*dst_cols+wgidx]
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_m10[wgidy*dst_cols+wgidx]
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_m01[wgidy*dst_cols+wgidx]
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_m20[wgidy*dst_cols+wgidx]
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_m11[wgidy*dst_cols+wgidx]
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_m02[wgidy*dst_cols+wgidx]
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_m30[wgidy*dst_cols+wgidx]
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_m21[wgidy*dst_cols+wgidx]
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_m12[wgidy*dst_cols+wgidx]
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_m03[wgidy*dst_cols+wgidx]
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
//#endif
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
CvMoments_D5
(
__global
float*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__global
double*
dst_m00,
__global
double*
dst_m10,
__global
double*
dst_m01,
__global
double*
dst_m20,
__global
double*
dst_m11,
__global
double*
dst_m02,
__global
double*
dst_m30,
__global
double*
dst_m21,
__global
double*
dst_m12,
__global
double*
dst_m03,
int
dst_cols,
int
dst_step,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
float
tmp_coi[4]
; // get the coi data
float4
tmp[64]
;
int
VLEN_F
=
4
; // vector length of float
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/4,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
float*
)
src_data+
(
y+lidy
)
*src_step/4+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
{
for
(
int
j=0
; j<4; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/4+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
float4
)(
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+3
))
;
float4
zero
=
(
float4
)(
0
)
;
float4
full
=
(
float4
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=4)
tmp[i/VLEN_F]
=
(
tmp[i/VLEN_F]!=zero
)
?full:zero
;
double
mom[10]
;
__local
double
m[10][128]
;
if
(
lidy
==
0
)
for
(
int
i
=
0
; i < 10; i ++)
for
(
int
j
=
0
; j < 128; j ++)
m[i][j]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
double
lm[10]
=
{0}
;
double4
x0
=
(
double4
)(
0
)
;
double4
x1
=
(
double4
)(
0
)
;
double4
x2
=
(
double4
)(
0
)
;
double4
x3
=
(
double4
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=VLEN_F )
{
double4
v_xt
=
(
double4
)(
xt,
xt+1,
xt+2,
xt+3
)
;
double4
p
=
convert_double4
(
tmp[xt/VLEN_F]
)
;
double4
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
xxp
*
v_xt
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
;
/*
double
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
double
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
double
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
double
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
double
)
py
)
*
sy
; // m03
lm[8]
=
((
double
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
double
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
double
)
lm[mt]
;
if
(
binary
)
{
double
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
double
xm
=
x
*
mom[0],
ym
=
y
*
mom[0]
;
//
accumulate
moments
computed
in
each
tile
//
+
m00
(
=
m00
'
)
dst_m00[wgidy*dst_cols+wgidx]=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_m10[wgidy*dst_cols+wgidx]
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_m01[wgidy*dst_cols+wgidx]
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_m20[wgidy*dst_cols+wgidx]
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_m11[wgidy*dst_cols+wgidx]
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_m02[wgidy*dst_cols+wgidx]=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_m30[wgidy*dst_cols+wgidx]=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_m21[wgidy*dst_cols+wgidx]
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_m12[wgidy*dst_cols+wgidx]
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_m03[wgidy*dst_cols+wgidx]=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}*/
}
//#endif
//#if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
CvMoments_D6
(
__global
double*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__global
double*
dst_m00,
__global
double*
dst_m10,
__global
double*
dst_m01,
__global
double*
dst_m20,
__global
double*
dst_m11,
__global
double*
dst_m02,
__global
double*
dst_m30,
__global
double*
dst_m21,
__global
double*
dst_m12,
__global
double*
dst_m03,
int
dst_cols,
int
dst_step,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
double
tmp_coi[4]
; // get the coi data
double4
tmp[64]
;
int
VLEN_D
=
4
; // length of vetor
int
gidy
=
get_global_id
(
0
)
;
int
gidx
=
get_global_id
(
1
)
;
int
wgidy
=
get_group_id
(
0
)
;
int
wgidx
=
get_group_id
(
1
)
;
int
lidy
=
get_local_id
(
0
)
;
int
lidx
=
get_local_id
(
1
)
;
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/8,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
double*
)
src_data+
(
y+lidy
)
*src_step/8+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
{
for
(
int
j=0
; j<4; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/8+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_D]
=
(
double4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
double4
)(
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+3
))
;
double4
zero
=
(
double4
)(
0
)
;
double4
full
=
(
double4
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
tmp[i/VLEN_D]!=zero
)
?full:zero
;
double
mom[10]
;
__local
double
m[10][128]
;
if
(
lidy
==
0
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
double
lm[10]
=
{0}
;
double4
x0
=
(
double4
)(
0
)
;
double4
x1
=
(
double4
)(
0
)
;
double4
x2
=
(
double4
)(
0
)
;
double4
x3
=
(
double4
)(
0
)
;
for
(
int
xt
=
0
; xt < tileSize_width; xt+=VLEN_D )
{
double4
v_xt
=
(
double4
)(
xt,
xt+1,
xt+2,
xt+3
)
;
double4
p
=
tmp[xt/VLEN_D]
;
double4
xp
=
v_xt
*
p,
xxp
=
xp
*
v_xt
;
x0
+=
p
;
x1
+=
xp
;
x2
+=
xxp
;
x3
+=
xxp
*v_xt
;
}
x0.s0
+=
x0.s1
+
x0.s2
+
x0.s3
;
x1.s0
+=
x1.s1
+
x1.s2
+
x1.s3
;
x2.s0
+=
x2.s1
+
x2.s2
+
x2.s3
;
x3.s0
+=
x3.s1
+
x3.s2
+
x3.s3
;
double
py
=
lidy
*
x0.s0,
sy
=
lidy*lidy
;
int
bheight
=
min
(
tileSize_height,
TILE_SIZE/2
)
;
if
(
bheight
>=
TILE_SIZE/2&&lidy
>
bheight-1&&lidy
<
tileSize_height
)
{
m[9][lidy-bheight]
=
((
double
)
py
)
*
sy
; // m03
m[8][lidy-bheight]
=
((
double
)
x1.s0
)
*
sy
; // m12
m[7][lidy-bheight]
=
((
double
)
x2.s0
)
*
lidy
; // m21
m[6][lidy-bheight]
=
x3.s0
; // m30
m[5][lidy-bheight]
=
x0.s0
*
sy
; // m02
m[4][lidy-bheight]
=
x1.s0
*
lidy
; // m11
m[3][lidy-bheight]
=
x2.s0
; // m20
m[2][lidy-bheight]
=
py
; // m01
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
double
)
py
)
*
sy
; // m03
lm[8]
=
((
double
)
x1.s0
)
*
sy
; // m12
lm[7]
=
((
double
)
x2.s0
)
*
lidy
; // m21
lm[6]
=
x3.s0
; // m30
lm[5]
=
x0.s0
*
sy
; // m02
lm[4]
=
x1.s0
*
lidy
; // m11
lm[3]
=
x2.s0
; // m20
lm[2]
=
py
; // m01
lm[1]
=
x1.s0
; // m10
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
=
(
double
)
lm[mt]
;
if
(
binary
)
{
double
s
=
1./255
;
for
(
int
mt
=
0
; mt < 10; mt++ )
mom[mt]
*=
s
;
}
double
xm
=
x
*
mom[0],
ym
=
y
*
mom[0]
;
//
accumulate
moments
computed
in
each
tile
//
+
m00
(
=
m00
'
)
dst_m00[wgidy*dst_cols+wgidx]
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_m10[wgidy*dst_cols+wgidx]
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_m01[wgidy*dst_cols+wgidx]
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_m20[wgidy*dst_cols+wgidx]
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_m11[wgidy*dst_cols+wgidx]
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_m02[wgidy*dst_cols+wgidx]
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_m30[wgidy*dst_cols+wgidx]
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_m21[wgidy*dst_cols+wgidx]
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_m12[wgidy*dst_cols+wgidx]
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_m03[wgidy*dst_cols+wgidx]
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
//#endif
modules/ocl/src/moments.cpp
0 → 100644
View file @
af13c860
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Sen Liu, sen@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <iostream>
namespace
cv
{
namespace
ocl
{
extern
const
char
*
moments
;
// The function calculates center of gravity and the central second order moments
static
void
icvCompleteMomentState
(
CvMoments
*
moments
)
{
double
cx
=
0
,
cy
=
0
;
double
mu20
,
mu11
,
mu02
;
assert
(
moments
!=
0
);
moments
->
inv_sqrt_m00
=
0
;
if
(
fabs
(
moments
->
m00
)
>
DBL_EPSILON
)
{
double
inv_m00
=
1.
/
moments
->
m00
;
cx
=
moments
->
m10
*
inv_m00
;
cy
=
moments
->
m01
*
inv_m00
;
moments
->
inv_sqrt_m00
=
std
::
sqrt
(
fabs
(
inv_m00
)
);
}
// mu20 = m20 - m10*cx
mu20
=
moments
->
m20
-
moments
->
m10
*
cx
;
// mu11 = m11 - m10*cy
mu11
=
moments
->
m11
-
moments
->
m10
*
cy
;
// mu02 = m02 - m01*cy
mu02
=
moments
->
m02
-
moments
->
m01
*
cy
;
moments
->
mu20
=
mu20
;
moments
->
mu11
=
mu11
;
moments
->
mu02
=
mu02
;
// mu30 = m30 - cx*(3*mu20 + cx*m10)
moments
->
mu30
=
moments
->
m30
-
cx
*
(
3
*
mu20
+
cx
*
moments
->
m10
);
mu11
+=
mu11
;
// mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
moments
->
mu21
=
moments
->
m21
-
cx
*
(
mu11
+
cx
*
moments
->
m01
)
-
cy
*
mu20
;
// mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
moments
->
mu12
=
moments
->
m12
-
cy
*
(
mu11
+
cy
*
moments
->
m10
)
-
cx
*
mu02
;
// mu03 = m03 - cy*(3*mu02 + cy*m01)
moments
->
mu03
=
moments
->
m03
-
cy
*
(
3
*
mu02
+
cy
*
moments
->
m01
);
}
static
void
icvContourMoments
(
CvSeq
*
contour
,
CvMoments
*
mom
)
{
if
(
contour
->
total
)
{
CvSeqReader
reader
;
int
lpt
=
contour
->
total
;
double
a00
,
a10
,
a01
,
a20
,
a11
,
a02
,
a30
,
a21
,
a12
,
a03
;
int
dst_type
=
cv
::
ocl
::
Context
::
getContext
()
->
impl
->
double_support
?
CV_64FC1
:
CV_32FC1
;
cvStartReadSeq
(
contour
,
&
reader
,
0
);
cv
::
ocl
::
oclMat
dst_a00
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a10
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a01
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a20
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a11
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a02
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a30
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a21
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a12
(
1
,
lpt
,
dst_type
);
cv
::
ocl
::
oclMat
dst_a03
(
1
,
lpt
,
dst_type
);
size_t
reader_size
=
lpt
<<
1
;
cv
::
Mat
reader_mat
(
1
,
reader_size
,
CV_32FC1
);
bool
is_float
=
CV_SEQ_ELTYPE
(
contour
)
==
CV_32FC2
;
if
(
is_float
)
{
for
(
size_t
i
=
0
;
i
<
reader_size
;
++
i
)
{
reader_mat
.
at
<
float
>
(
0
,
i
++
)
=
((
CvPoint2D32f
*
)(
reader
.
ptr
))
->
x
;
reader_mat
.
at
<
float
>
(
0
,
i
)
=
((
CvPoint2D32f
*
)(
reader
.
ptr
))
->
y
;
CV_NEXT_SEQ_ELEM
(
contour
->
elem_size
,
reader
);
}
}
else
{
for
(
size_t
i
=
0
;
i
<
reader_size
;
++
i
)
{
reader_mat
.
at
<
float
>
(
0
,
i
++
)
=
((
CvPoint
*
)(
reader
.
ptr
))
->
x
;
reader_mat
.
at
<
float
>
(
0
,
i
)
=
((
CvPoint
*
)(
reader
.
ptr
))
->
y
;
CV_NEXT_SEQ_ELEM
(
contour
->
elem_size
,
reader
);
}
}
cv
::
ocl
::
oclMat
reader_oclmat
(
reader_mat
);
int
llength
=
std
::
min
(
lpt
,
128
);
size_t
localThreads
[
3
]
=
{
llength
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
lpt
,
1
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
contour
->
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
reader_oclmat
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a00
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a10
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a01
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a20
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a11
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a02
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a30
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a21
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a12
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_a03
.
data
));
openCLExecuteKernel
(
dst_a00
.
clCxt
,
&
moments
,
"icvContourMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
cv
::
Mat
dst
(
dst_a00
);
cv
::
Scalar
s
=
cv
::
sum
(
dst
);
a00
=
s
[
0
];
dst
=
dst_a10
;
s
=
cv
::
sum
(
dst
);
a10
=
s
[
0
];
//dstsum[1];
dst
=
dst_a01
;
s
=
cv
::
sum
(
dst
);
a01
=
s
[
0
];
//dstsum[2];
dst
=
dst_a20
;
s
=
cv
::
sum
(
dst
);
a20
=
s
[
0
];
//dstsum[3];
dst
=
dst_a11
;
s
=
cv
::
sum
(
dst
);
a11
=
s
[
0
];
//dstsum[4];
dst
=
dst_a02
;
s
=
cv
::
sum
(
dst
);
a02
=
s
[
0
];
//dstsum[5];
dst
=
dst_a30
;
s
=
cv
::
sum
(
dst
);
a30
=
s
[
0
];
//dstsum[6];
dst
=
dst_a21
;
s
=
cv
::
sum
(
dst
);
a21
=
s
[
0
];
//dstsum[7];
dst
=
dst_a12
;
s
=
cv
::
sum
(
dst
);
a12
=
s
[
0
];
//dstsum[8];
dst
=
dst_a03
;
s
=
cv
::
sum
(
dst
);
a03
=
s
[
0
];
//dstsum[9];
double
db1_2
,
db1_6
,
db1_12
,
db1_24
,
db1_20
,
db1_60
;
if
(
fabs
(
a00
)
>
FLT_EPSILON
)
{
if
(
a00
>
0
)
{
db1_2
=
0.5
;
db1_6
=
0.16666666666666666666666666666667
;
db1_12
=
0.083333333333333333333333333333333
;
db1_24
=
0.041666666666666666666666666666667
;
db1_20
=
0.05
;
db1_60
=
0.016666666666666666666666666666667
;
}
else
{
db1_2
=
-
0.5
;
db1_6
=
-
0.16666666666666666666666666666667
;
db1_12
=
-
0.083333333333333333333333333333333
;
db1_24
=
-
0.041666666666666666666666666666667
;
db1_20
=
-
0.05
;
db1_60
=
-
0.016666666666666666666666666666667
;
}
// spatial moments
mom
->
m00
=
a00
*
db1_2
;
mom
->
m10
=
a10
*
db1_6
;
mom
->
m01
=
a01
*
db1_6
;
mom
->
m20
=
a20
*
db1_12
;
mom
->
m11
=
a11
*
db1_24
;
mom
->
m02
=
a02
*
db1_12
;
mom
->
m30
=
a30
*
db1_20
;
mom
->
m21
=
a21
*
db1_60
;
mom
->
m12
=
a12
*
db1_60
;
mom
->
m03
=
a03
*
db1_20
;
icvCompleteMomentState
(
mom
);
}
}
}
static
void
ocl_cvMoments
(
const
void
*
array
,
CvMoments
*
mom
,
int
binary
)
{
const
int
TILE_SIZE
=
256
;
int
type
,
depth
,
cn
,
coi
=
0
;
CvMat
stub
,
*
mat
=
(
CvMat
*
)
array
;
CvContour
contourHeader
;
CvSeq
*
contour
=
0
;
CvSeqBlock
block
;
if
(
CV_IS_SEQ
(
array
))
{
contour
=
(
CvSeq
*
)
array
;
if
(
!
CV_IS_SEQ_POINT_SET
(
contour
))
CV_Error
(
CV_StsBadArg
,
"The passed sequence is not a valid contour"
);
}
if
(
!
moments
)
CV_Error
(
CV_StsNullPtr
,
""
);
memset
(
mom
,
0
,
sizeof
(
*
mom
));
if
(
!
contour
)
{
mat
=
cvGetMat
(
mat
,
&
stub
,
&
coi
);
type
=
CV_MAT_TYPE
(
mat
->
type
);
if
(
type
==
CV_32SC2
||
type
==
CV_32FC2
)
{
contour
=
cvPointSeqFromMat
(
CV_SEQ_KIND_CURVE
|
CV_SEQ_FLAG_CLOSED
,
mat
,
&
contourHeader
,
&
block
);
}
}
if
(
contour
)
{
icvContourMoments
(
contour
,
mom
);
return
;
}
type
=
CV_MAT_TYPE
(
mat
->
type
);
depth
=
CV_MAT_DEPTH
(
type
);
cn
=
CV_MAT_CN
(
type
);
cv
::
Size
size
=
cvGetMatSize
(
mat
);
if
(
cn
>
1
&&
coi
==
0
)
CV_Error
(
CV_StsBadArg
,
"Invalid image type"
);
if
(
size
.
width
<=
0
||
size
.
height
<=
0
)
return
;
cv
::
Mat
src0
(
mat
);
cv
::
ocl
::
oclMat
src
(
src0
);
cv
::
Size
tileSize
;
int
blockx
,
blocky
;
if
(
size
.
width
%
TILE_SIZE
==
0
)
blockx
=
size
.
width
/
TILE_SIZE
;
else
blockx
=
size
.
width
/
TILE_SIZE
+
1
;
if
(
size
.
height
%
TILE_SIZE
==
0
)
blocky
=
size
.
height
/
TILE_SIZE
;
else
blocky
=
size
.
height
/
TILE_SIZE
+
1
;
cv
::
ocl
::
oclMat
dst_m00
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m10
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m01
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m20
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m11
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m02
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m30
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m21
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m12
(
blocky
,
blockx
,
CV_64FC1
);
cv
::
ocl
::
oclMat
dst_m03
(
blocky
,
blockx
,
CV_64FC1
);
cl_mem
sum
=
openCLCreateBuffer
(
src
.
clCxt
,
CL_MEM_READ_WRITE
,
10
*
sizeof
(
double
));
int
tile_width
=
std
::
min
(
size
.
width
,
TILE_SIZE
);
int
tile_height
=
std
::
min
(
size
.
height
,
TILE_SIZE
);
size_t
localThreads
[
3
]
=
{
tile_height
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
size
.
height
,
blockx
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
,
args_sum
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tileSize
.
width
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tileSize
.
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m00
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m10
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m01
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m20
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m11
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m02
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m30
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m21
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m12
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m03
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m00
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m00
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
type
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
depth
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
coi
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
binary
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
openCLExecuteKernel
(
dst_m00
.
clCxt
,
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
size_t
localThreadss
[
3
]
=
{
128
,
1
,
1
};
size_t
globalThreadss
[
3
]
=
{
128
,
1
,
1
};
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_height
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_width
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
sum
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m00
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m10
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m01
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m20
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m11
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m02
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m30
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m21
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m12
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m03
.
data
));
openCLExecuteKernel
(
dst_m00
.
clCxt
,
&
moments
,
"dst_sum"
,
globalThreadss
,
localThreadss
,
args_sum
,
-
1
,
-
1
);
double
*
dstsum
=
new
double
[
10
];
memset
(
dstsum
,
0
,
10
*
sizeof
(
double
));
openCLReadBuffer
(
dst_m00
.
clCxt
,
sum
,(
void
*
)
dstsum
,
10
*
sizeof
(
double
));
mom
->
m00
=
dstsum
[
0
];
mom
->
m10
=
dstsum
[
1
];
mom
->
m01
=
dstsum
[
2
];
mom
->
m20
=
dstsum
[
3
];
mom
->
m11
=
dstsum
[
4
];
mom
->
m02
=
dstsum
[
5
];
mom
->
m30
=
dstsum
[
6
];
mom
->
m21
=
dstsum
[
7
];
mom
->
m12
=
dstsum
[
8
];
mom
->
m03
=
dstsum
[
9
];
icvCompleteMomentState
(
mom
);
}
Moments
ocl_moments
(
InputArray
_array
,
bool
binaryImage
)
{
CvMoments
om
;
Mat
arr
=
_array
.
getMat
();
CvMat
c_array
=
arr
;
ocl_cvMoments
(
&
c_array
,
&
om
,
binaryImage
);
return
om
;
}
}
}
modules/ocl/test/test_moments.cpp
0 → 100644
View file @
af13c860
#include "precomp.hpp"
#include <iomanip>
#include "opencv2/imgproc/imgproc_c.h"
#ifdef HAVE_OPENCL
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
cvtest
;
using
namespace
testing
;
using
namespace
std
;
extern
string
workdir
;
PARAM_TEST_CASE
(
MomentsTestBase
,
MatType
,
bool
)
{
int
type
;
cv
::
Mat
mat1
;
bool
test_contours
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
test_contours
=
GET_PARAM
(
1
);
cv
::
RNG
&
rng
=
TS
::
ptr
()
->
get_rng
();
cv
::
Size
size
(
10
*
MWIDTH
,
10
*
MHEIGHT
);
mat1
=
randomMat
(
rng
,
size
,
type
,
5
,
16
,
false
);
}
void
Compare
(
Moments
&
cpu
,
Moments
&
gpu
)
{
Mat
gpu_dst
,
cpu_dst
;
HuMoments
(
cpu
,
cpu_dst
);
HuMoments
(
gpu
,
gpu_dst
);
EXPECT_MAT_NEAR
(
gpu_dst
,
cpu_dst
,
.5
,
""
);
}
};
struct
ocl_Moments
:
MomentsTestBase
{};
TEST_P
(
ocl_Moments
,
Mat
)
{
bool
binaryImage
=
0
;
SetUp
();
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
if
(
test_contours
)
{
Mat
src
=
imread
(
workdir
+
"../cpp/pic3.png"
,
1
);
Mat
src_gray
,
canny_output
;
cvtColor
(
src
,
src_gray
,
CV_BGR2GRAY
);
vector
<
vector
<
Point
>
>
contours
;
vector
<
Vec4i
>
hierarchy
;
Canny
(
src_gray
,
canny_output
,
100
,
200
,
3
);
findContours
(
canny_output
,
contours
,
hierarchy
,
CV_RETR_TREE
,
CV_CHAIN_APPROX_SIMPLE
,
Point
(
0
,
0
)
);
for
(
size_t
i
=
0
;
i
<
contours
.
size
();
i
++
)
{
Moments
m
=
moments
(
contours
[
i
],
false
);
Moments
dm
=
ocl
::
ocl_moments
(
contours
[
i
],
false
);
Compare
(
m
,
dm
);
}
}
cv
::
_InputArray
_array
(
mat1
);
cv
::
Moments
CvMom
=
cv
::
moments
(
_array
,
binaryImage
);
cv
::
Moments
oclMom
=
cv
::
ocl
::
ocl_moments
(
_array
,
binaryImage
);
Compare
(
CvMom
,
oclMom
);
}
}
INSTANTIATE_TEST_CASE_P
(
Moments
,
ocl_Moments
,
Combine
(
Values
(
CV_8UC1
,
CV_16UC1
,
CV_16SC1
,
CV_64FC1
),
Values
(
true
,
false
)));
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment