Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bf6b119a
Commit
bf6b119a
authored
Jun 26, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
Jun 26, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1045 from bitwangyaoyao:2.4_fix
parents
1cfe5aa4
2ec1140b
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
202 additions
and
206 deletions
+202
-206
main.cpp
modules/ocl/perf/main.cpp
+32
-29
mcwutil.cpp
modules/ocl/src/mcwutil.cpp
+1
-1
moments.cpp
modules/ocl/src/moments.cpp
+19
-24
moments.cl
modules/ocl/src/opencl/moments.cl
+144
-146
stereobm.cl
modules/ocl/src/opencl/stereobm.cl
+2
-2
test_moments.cpp
modules/ocl/test/test_moments.cpp
+4
-4
No files found.
modules/ocl/perf/main.cpp
View file @
bf6b119a
...
...
@@ -44,43 +44,21 @@
int
main
(
int
argc
,
const
char
*
argv
[])
{
vector
<
ocl
::
Info
>
oclinfo
;
int
num_devices
=
getDevice
(
oclinfo
);
if
(
num_devices
<
1
)
{
cerr
<<
"no device found
\n
"
;
return
-
1
;
}
// set this to overwrite binary cache every time the test starts
ocl
::
setBinaryDiskCache
(
ocl
::
CACHE_UPDATE
);
int
devidx
=
0
;
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
)
{
printf
(
"device %d: %s
\n
"
,
devidx
++
,
oclinfo
[
i
].
DeviceName
[
j
].
c_str
());
}
}
redirectError
(
cvErrorCallback
);
const
char
*
keys
=
"{ h | help | false | print help message }"
"{ f | filter | | filter for test }"
"{ w | workdir | | set working directory }"
"{ l | list | false | show all tests }"
"{ d | device | 0 | device id }"
"{ c | cpu_ocl | false | use cpu as ocl device}"
"{ i | iters | 10 | iteration count }"
"{ m | warmup | 1 | gpu warm up iteration count}"
"{ t | xtop | 1.1
| xfactor top boundary}"
"{ b | xbottom | 0.9
| xfactor bottom boundary}"
"{ t | xtop | 1.1
| xfactor top boundary}"
"{ b | xbottom | 0.9
| xfactor bottom boundary}"
"{ v | verify | false | only run gpu once to verify if problems occur}"
;
redirectError
(
cvErrorCallback
);
CommandLineParser
cmd
(
argc
,
argv
,
keys
);
if
(
cmd
.
get
<
bool
>
(
"help"
))
{
cout
<<
"Avaible options:"
<<
endl
;
...
...
@@ -88,14 +66,40 @@ int main(int argc, const char *argv[])
return
0
;
}
int
device
=
cmd
.
get
<
int
>
(
"device"
);
// get ocl devices
bool
use_cpu
=
cmd
.
get
<
bool
>
(
"c"
);
vector
<
ocl
::
Info
>
oclinfo
;
int
num_devices
=
0
;
if
(
use_cpu
)
num_devices
=
getDevice
(
oclinfo
,
ocl
::
CVCL_DEVICE_TYPE_CPU
);
else
num_devices
=
getDevice
(
oclinfo
);
if
(
num_devices
<
1
)
{
cerr
<<
"no device found
\n
"
;
return
-
1
;
}
// show device info
int
devidx
=
0
;
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
)
{
cout
<<
"device "
<<
devidx
++
<<
": "
<<
oclinfo
[
i
].
DeviceName
[
j
]
<<
endl
;
}
}
int
device
=
cmd
.
get
<
int
>
(
"device"
);
if
(
device
<
0
||
device
>=
num_devices
)
{
cerr
<<
"Invalid device ID"
<<
endl
;
return
-
1
;
}
// set this to overwrite binary cache every time the test starts
ocl
::
setBinaryDiskCache
(
ocl
::
CACHE_UPDATE
);
if
(
cmd
.
get
<
bool
>
(
"verify"
))
{
TestSystem
::
instance
().
setNumIters
(
1
);
...
...
@@ -104,7 +108,6 @@ int main(int argc, const char *argv[])
}
devidx
=
0
;
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
,
devidx
++
)
...
...
@@ -113,7 +116,7 @@ int main(int argc, const char *argv[])
{
ocl
::
setDevice
(
oclinfo
[
i
],
(
int
)
j
);
TestSystem
::
instance
().
setRecordName
(
oclinfo
[
i
].
DeviceName
[
j
]);
printf
(
"
\n
use %d: %s
\n
"
,
devidx
,
oclinfo
[
i
].
DeviceName
[
j
].
c_str
())
;
cout
<<
"use "
<<
devidx
<<
": "
<<
oclinfo
[
i
].
DeviceName
[
j
]
<<
endl
;
goto
END_DEV
;
}
}
...
...
modules/ocl/src/mcwutil.cpp
View file @
bf6b119a
...
...
@@ -149,7 +149,7 @@ namespace cv
cl_image_format
format
;
int
err
;
int
depth
=
mat
.
depth
();
int
channels
=
mat
.
channels
();
int
channels
=
mat
.
ocl
channels
();
switch
(
depth
)
{
...
...
modules/ocl/src/moments.cpp
View file @
bf6b119a
...
...
@@ -16,7 +16,7 @@
// Third party copyrights are property of their respective owners.
//
// @Authors
// Sen Liu, s
en@multicorewareinc
.com
// Sen Liu, s
wjtuls1987@126
.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
...
...
@@ -277,8 +277,8 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
blocky
=
size
.
height
/
TILE_SIZE
;
else
blocky
=
size
.
height
/
TILE_SIZE
+
1
;
cv
::
ocl
::
oclMat
dst_m
(
blocky
*
10
,
blockx
,
CV_64FC1
);
cl_mem
sum
=
openCLCreateBuffer
(
src
.
clCxt
,
CL_MEM_READ_WRITE
,
10
*
sizeof
(
double
)
);
oclMat
dst_m
(
blocky
*
10
,
blockx
,
CV_64FC1
);
oclMat
sum
(
1
,
10
,
CV_64FC1
);
int
tile_width
=
std
::
min
(
size
.
width
,
TILE_SIZE
);
int
tile_height
=
std
::
min
(
size
.
height
,
TILE_SIZE
);
size_t
localThreads
[
3
]
=
{
tile_height
,
1
,
1
};
...
...
@@ -288,19 +288,16 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tileSize
.
width
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tileSize
.
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
blocky
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
type
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
depth
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
coi
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
binary
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
openCLExecuteKernel
(
dst_m
.
clCxt
,
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
Context
::
getContext
()
,
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
size_t
localThreadss
[
3
]
=
{
128
,
1
,
1
};
size_t
globalThreadss
[
3
]
=
{
128
,
1
,
1
};
...
...
@@ -309,25 +306,23 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_height
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_width
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
sum
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
sum
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
openCLExecuteKernel
(
dst_m
.
clCxt
,
&
moments
,
"dst_sum"
,
globalThreadss
,
localThreadss
,
args_sum
,
-
1
,
-
1
);
double
*
dstsum
=
new
double
[
10
];
memset
(
dstsum
,
0
,
10
*
sizeof
(
double
));
openCLReadBuffer
(
dst_m
.
clCxt
,
sum
,(
void
*
)
dstsum
,
10
*
sizeof
(
double
));
mom
->
m00
=
dstsum
[
0
];
mom
->
m10
=
dstsum
[
1
];
mom
->
m01
=
dstsum
[
2
];
mom
->
m20
=
dstsum
[
3
];
mom
->
m11
=
dstsum
[
4
];
mom
->
m02
=
dstsum
[
5
];
mom
->
m30
=
dstsum
[
6
];
mom
->
m21
=
dstsum
[
7
];
mom
->
m12
=
dstsum
[
8
];
mom
->
m03
=
dstsum
[
9
];
delete
[]
dstsum
;
openCLSafeCall
(
clReleaseMemObject
(
sum
));
openCLExecuteKernel
(
Context
::
getContext
(),
&
moments
,
"dst_sum"
,
globalThreadss
,
localThreadss
,
args_sum
,
-
1
,
-
1
);
Mat
dstsum
(
sum
);
mom
->
m00
=
dstsum
.
at
<
double
>
(
0
,
0
);
mom
->
m10
=
dstsum
.
at
<
double
>
(
0
,
1
);
mom
->
m01
=
dstsum
.
at
<
double
>
(
0
,
2
);
mom
->
m20
=
dstsum
.
at
<
double
>
(
0
,
3
);
mom
->
m11
=
dstsum
.
at
<
double
>
(
0
,
4
);
mom
->
m02
=
dstsum
.
at
<
double
>
(
0
,
5
);
mom
->
m30
=
dstsum
.
at
<
double
>
(
0
,
6
);
mom
->
m21
=
dstsum
.
at
<
double
>
(
0
,
7
);
mom
->
m12
=
dstsum
.
at
<
double
>
(
0
,
8
);
mom
->
m03
=
dstsum
.
at
<
double
>
(
0
,
9
);
icvCompleteMomentState
(
mom
);
}
...
...
modules/ocl/src/opencl/moments.cl
View file @
bf6b119a
...
...
@@ -173,10 +173,10 @@ __kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_widt
sum[i]
=
dst_sum[i][0]
;
}
__kernel
void
CvMoments_D0
(
__global
uchar16*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D0
(
__global
uchar16*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
int
TILE_SIZE
)
{
uchar
tmp_coi[16]
; // get the coi data
uchar16
tmp[16]
;
...
...
@@ -192,35 +192,43 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
int
x
=
wgidx*TILE_SIZE
; // vector length of uchar
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+x+i
)
=
0
;
if
(
coi
>
0
)
//channel
of
interest
for
(
int
i
=
0
; i < tileSize_width; i += VLEN_C)
{
for
(
int
j=0
; j<VLEN_C; j++)
tmp_coi[j]
=
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_C]
=
(
uchar16
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
*
(
src_data+
(
y+lidy
)
*src_step/VLEN_C+
(
x+i
)
/VLEN_C
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+x+i
)
=
0
;
if
(
coi
>
0
)
//channel
of
interest
for
(
int
i
=
0
; i < tileSize_width; i += VLEN_C)
{
for
(
int
j=0
; j<VLEN_C; j++)
tmp_coi[j]
=
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_C]
=
(
uchar16
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7],
tmp_coi[8],tmp_coi[9],tmp_coi[10],tmp_coi[11],tmp_coi[12],tmp_coi[13],tmp_coi[14],tmp_coi[15]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
*
(
src_data+
(
y+lidy
)
*src_step/VLEN_C+
(
x+i
)
/VLEN_C
)
;
}
uchar16
zero
=
(
uchar16
)(
0
)
;
uchar16
full
=
(
uchar16
)(
255
)
;
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
(
tmp[i/VLEN_C]!=zero
)
?full:zero
;
F
mom[10]
;
__local
int
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
{
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
m[i][lidy]=0
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
lm[10]
=
{0}
;
int16
x0
=
(
int16
)(
0
)
;
int16
x1
=
(
int16
)(
0
)
;
...
...
@@ -281,6 +289,7 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
...
...
@@ -328,10 +337,10 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
}
}
__kernel
void
CvMoments_D2
(
__global
ushort8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D2
(
__global
ushort8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
ushort
tmp_coi[8]
; // get the coi data
ushort8
tmp[32]
;
...
...
@@ -346,21 +355,26 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
src_cols
>
TILE_SIZE
&&
tileSize_width
<
TILE_SIZE
)
for
(
int
i=tileSize_width
; i < rstep; i++ )
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
{
for
(
int
j=0
; j<VLEN_US; j++)
tmp_coi[j]
=
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*
(
int
)
src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_US]
=
(
ushort8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_US
)
+
(
x+i
)
/VLEN_US
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
src_cols
>
TILE_SIZE
&&
tileSize_width
<
TILE_SIZE
)
for
(
int
i=tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
{
for
(
int
j=0
; j<VLEN_US; j++)
tmp_coi[j]
=
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*
(
int
)
src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_US]
=
(
ushort8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_US
)
+
(
x+i
)
/VLEN_US
)
;
}
ushort8
zero
=
(
ushort8
)(
0
)
;
ushort8
full
=
(
ushort8
)(
255
)
;
if
(
binary
)
...
...
@@ -368,11 +382,11 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
tmp[i/VLEN_US]
=
(
tmp[i/VLEN_US]!=zero
)
?full:zero
;
F
mom[10]
;
__local
long
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
m[i][lidy]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
...
...
@@ -422,17 +436,22 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
==
0&&lidx
==
0
)
{
for
(
int
mt
=
0
; mt < 10; mt++ )
...
...
@@ -482,10 +501,10 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
}
}
__kernel
void
CvMoments_D3
(
__global
short8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D3
(
__global
short8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
short
tmp_coi[8]
; // get the coi data
short8
tmp[32]
;
...
...
@@ -500,21 +519,26 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
{
for
(
int
j=0
; j<VLEN_S; j++)
tmp_coi[j]
=
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_S]
=
(
short8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
tmp[i/VLEN_S]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_S
)
+
(
x+i
)
/VLEN_S
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
{
for
(
int
j=0
; j<VLEN_S; j++)
tmp_coi[j]
=
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_S]
=
(
short8
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3],tmp_coi[4],tmp_coi[5],tmp_coi[6],tmp_coi[7]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
tmp[i/VLEN_S]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_S
)
+
(
x+i
)
/VLEN_S
)
;
}
short8
zero
=
(
short8
)(
0
)
;
short8
full
=
(
short8
)(
255
)
;
if
(
binary
)
...
...
@@ -523,10 +547,9 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
F
mom[10]
;
__local
long
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
m[i][lidy]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
...
...
@@ -637,10 +660,10 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
}
}
__kernel
void
CvMoments_D5
(
__global
float*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D5
(
__global
float*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
float
tmp_coi[4]
; // get the coi data
float4
tmp[64]
;
...
...
@@ -654,33 +677,30 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
src_step
/=
sizeof
(
*src_data
)
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
int
rstep
=
min
(
src_step/4,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
int
maxIdx
=
mul24
(
src_rows,
src_cols
)
;
int
yOff
=
(
y+lidy
)
*src_step
;
int
index
;
if
(
tileSize_width
<
TILE_SIZE
&&
yOff
<
src_rows
)
for
(
int
i
=
tileSize_width
; i < rstep && (yOff+x+i) < maxIdx; i++
)
*
(
src_data+yOff+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F
)
{
#
pragma
unroll
for
(
int
j=0
; j<4; j++
)
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++
)
*
((
__global
float*
)
src_data+
(
y+lidy
)
*src_step/4+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F
)
{
index
=
yOff+
(
x+i+j
)
*kcn+coi-1
;
if
(
index
<
maxIdx
)
tmp_coi[j]
=
*
(
src_data+index
)
;
else
tmp_coi[j]
=
0
;
for
(
int
j=0
; j<4; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/4+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
*
(
__global
float4
*
)(
src_data+yOff+x+i
))
;
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
float4
)(
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+3
))
;
}
float4
zero
=
(
float4
)(
0
)
;
float4
full
=
(
float4
)(
255
)
;
if
(
binary
)
...
...
@@ -688,10 +708,9 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
tmp[i/VLEN_F]
=
(
tmp[i/VLEN_F]!=zero
)
?full:zero
;
F
mom[10]
;
__local
F
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i
=
0
; i < 10; i ++)
for
(
int
j
=
0
; j < 128; j ++)
m[i][j]
=
0
;
m[i][lidy]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
lm[10]
=
{0}
;
F4
x0
=
(
F4
)(
0
)
;
...
...
@@ -770,66 +789,42 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
//
accumulate
moments
computed
in
each
tile
dst_step
/=
sizeof
(
F
)
;
int
dst_x_off
=
mad24
(
wgidy,
dst_cols,
wgidx
)
;
int
dst_off
=
0
;
int
max_dst_index
=
10
*
blocky
*
get_global_size
(
1
)
;
//
+
m00
(
=
m00
'
)
dst_off
=
mad24
(
DST_ROW_00
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[0]
;
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_off
=
mad24
(
DST_ROW_10
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[1]
+
xm
;
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_off
=
mad24
(
DST_ROW_01
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[2]
+
ym
;
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_off
=
mad24
(
DST_ROW_20
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_off
=
mad24
(
DST_ROW_11
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_off
=
mad24
(
DST_ROW_02
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_off
=
mad24
(
DST_ROW_30
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_off
=
mad24
(
DST_ROW_21
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_off
=
mad24
(
DST_ROW_12
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_off
=
mad24
(
DST_ROW_03
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
__kernel
void
CvMoments_D6
(
__global
F*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D6
(
__global
F*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
F
tmp_coi[4]
; // get the coi data
F4
tmp[64]
;
...
...
@@ -844,22 +839,26 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/8,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
F*
)
src_data+
(
y+lidy
)
*src_step/8+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
{
for
(
int
j=0
; j<4 && ((x+i+j)*kcn+coi-1)<src_cols; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/8+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_D]
=
(
F4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width && (x+i+3) < src_cols; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
F4
)(
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+3
))
;
}
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
F*
)
src_data+
(
y+lidy
)
*src_step/8+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
{
for
(
int
j=0
; j<4; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/8+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_D]
=
(
F4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
F4
)(
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+3
))
;
F4
zero
=
(
F4
)(
0
)
;
F4
full
=
(
F4
)(
255
)
;
if
(
binary
)
...
...
@@ -867,10 +866,9 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
tmp[i/VLEN_D]
=
(
tmp[i/VLEN_D]!=zero
)
?full:zero
;
F
mom[10]
;
__local
F
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][j]=0
;
m[i][lidy]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
lm[10]
=
{0}
;
F4
x0
=
(
F4
)(
0
)
;
...
...
@@ -907,7 +905,6 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
}
else
if
(
lidy
<
bheight
)
{
lm[9]
=
((
F
)
py
)
*
sy
; // m03
...
...
@@ -922,6 +919,7 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
lm[0]
=
x0.s0
; // m00
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
<
j
)
...
...
modules/ocl/src/opencl/stereobm.cl
View file @
bf6b119a
...
...
@@ -162,8 +162,8 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
int
y_tex
;
int
x_tex
=
X
-
radius
;
if
(
x_tex
>=
cwidth
)
return
;
//
if
(
x_tex
>=
cwidth
)
//
return
;
for
(
int
d
=
STEREO_MIND
; d < maxdisp; d += STEREO_DISP_STEP)
{
...
...
modules/ocl/test/test_moments.cpp
View file @
bf6b119a
...
...
@@ -45,12 +45,12 @@ TEST_P(MomentsTest, Mat)
{
if
(
test_contours
)
{
Mat
src
=
imread
(
workdir
+
"../cpp/pic3.png"
,
1
);
Mat
src_gray
,
canny_output
;
cvtColor
(
src
,
src_gray
,
CV_BGR2GRAY
)
;
Mat
src
=
imread
(
workdir
+
"../cpp/pic3.png"
,
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
src
.
empty
())
;
Mat
canny_output
;
vector
<
vector
<
Point
>
>
contours
;
vector
<
Vec4i
>
hierarchy
;
Canny
(
src
_gray
,
canny_output
,
100
,
200
,
3
);
Canny
(
src
,
canny_output
,
100
,
200
,
3
);
findContours
(
canny_output
,
contours
,
hierarchy
,
CV_RETR_TREE
,
CV_CHAIN_APPROX_SIMPLE
,
Point
(
0
,
0
)
);
for
(
size_t
i
=
0
;
i
<
contours
.
size
();
i
++
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment