Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bf6b119a
Commit
bf6b119a
authored
Jun 26, 2013
by
Roman Donchenko
Committed by
OpenCV Buildbot
Jun 26, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1045 from bitwangyaoyao:2.4_fix
parents
1cfe5aa4
2ec1140b
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
146 additions
and
150 deletions
+146
-150
main.cpp
modules/ocl/perf/main.cpp
+30
-27
mcwutil.cpp
modules/ocl/src/mcwutil.cpp
+1
-1
moments.cpp
modules/ocl/src/moments.cpp
+19
-24
moments.cl
modules/ocl/src/opencl/moments.cl
+90
-92
stereobm.cl
modules/ocl/src/opencl/stereobm.cl
+2
-2
test_moments.cpp
modules/ocl/test/test_moments.cpp
+4
-4
No files found.
modules/ocl/perf/main.cpp
View file @
bf6b119a
...
@@ -44,43 +44,21 @@
...
@@ -44,43 +44,21 @@
int
main
(
int
argc
,
const
char
*
argv
[])
int
main
(
int
argc
,
const
char
*
argv
[])
{
{
vector
<
ocl
::
Info
>
oclinfo
;
int
num_devices
=
getDevice
(
oclinfo
);
if
(
num_devices
<
1
)
{
cerr
<<
"no device found
\n
"
;
return
-
1
;
}
// set this to overwrite binary cache every time the test starts
ocl
::
setBinaryDiskCache
(
ocl
::
CACHE_UPDATE
);
int
devidx
=
0
;
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
)
{
printf
(
"device %d: %s
\n
"
,
devidx
++
,
oclinfo
[
i
].
DeviceName
[
j
].
c_str
());
}
}
redirectError
(
cvErrorCallback
);
const
char
*
keys
=
const
char
*
keys
=
"{ h | help | false | print help message }"
"{ h | help | false | print help message }"
"{ f | filter | | filter for test }"
"{ f | filter | | filter for test }"
"{ w | workdir | | set working directory }"
"{ w | workdir | | set working directory }"
"{ l | list | false | show all tests }"
"{ l | list | false | show all tests }"
"{ d | device | 0 | device id }"
"{ d | device | 0 | device id }"
"{ c | cpu_ocl | false | use cpu as ocl device}"
"{ i | iters | 10 | iteration count }"
"{ i | iters | 10 | iteration count }"
"{ m | warmup | 1 | gpu warm up iteration count}"
"{ m | warmup | 1 | gpu warm up iteration count}"
"{ t | xtop | 1.1 | xfactor top boundary}"
"{ t | xtop | 1.1 | xfactor top boundary}"
"{ b | xbottom | 0.9 | xfactor bottom boundary}"
"{ b | xbottom | 0.9 | xfactor bottom boundary}"
"{ v | verify | false | only run gpu once to verify if problems occur}"
;
"{ v | verify | false | only run gpu once to verify if problems occur}"
;
redirectError
(
cvErrorCallback
);
CommandLineParser
cmd
(
argc
,
argv
,
keys
);
CommandLineParser
cmd
(
argc
,
argv
,
keys
);
if
(
cmd
.
get
<
bool
>
(
"help"
))
if
(
cmd
.
get
<
bool
>
(
"help"
))
{
{
cout
<<
"Avaible options:"
<<
endl
;
cout
<<
"Avaible options:"
<<
endl
;
...
@@ -88,14 +66,40 @@ int main(int argc, const char *argv[])
...
@@ -88,14 +66,40 @@ int main(int argc, const char *argv[])
return
0
;
return
0
;
}
}
int
device
=
cmd
.
get
<
int
>
(
"device"
);
// get ocl devices
bool
use_cpu
=
cmd
.
get
<
bool
>
(
"c"
);
vector
<
ocl
::
Info
>
oclinfo
;
int
num_devices
=
0
;
if
(
use_cpu
)
num_devices
=
getDevice
(
oclinfo
,
ocl
::
CVCL_DEVICE_TYPE_CPU
);
else
num_devices
=
getDevice
(
oclinfo
);
if
(
num_devices
<
1
)
{
cerr
<<
"no device found
\n
"
;
return
-
1
;
}
// show device info
int
devidx
=
0
;
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
)
{
cout
<<
"device "
<<
devidx
++
<<
": "
<<
oclinfo
[
i
].
DeviceName
[
j
]
<<
endl
;
}
}
int
device
=
cmd
.
get
<
int
>
(
"device"
);
if
(
device
<
0
||
device
>=
num_devices
)
if
(
device
<
0
||
device
>=
num_devices
)
{
{
cerr
<<
"Invalid device ID"
<<
endl
;
cerr
<<
"Invalid device ID"
<<
endl
;
return
-
1
;
return
-
1
;
}
}
// set this to overwrite binary cache every time the test starts
ocl
::
setBinaryDiskCache
(
ocl
::
CACHE_UPDATE
);
if
(
cmd
.
get
<
bool
>
(
"verify"
))
if
(
cmd
.
get
<
bool
>
(
"verify"
))
{
{
TestSystem
::
instance
().
setNumIters
(
1
);
TestSystem
::
instance
().
setNumIters
(
1
);
...
@@ -104,7 +108,6 @@ int main(int argc, const char *argv[])
...
@@ -104,7 +108,6 @@ int main(int argc, const char *argv[])
}
}
devidx
=
0
;
devidx
=
0
;
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
oclinfo
.
size
();
i
++
)
{
{
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
,
devidx
++
)
for
(
size_t
j
=
0
;
j
<
oclinfo
[
i
].
DeviceName
.
size
();
j
++
,
devidx
++
)
...
@@ -113,7 +116,7 @@ int main(int argc, const char *argv[])
...
@@ -113,7 +116,7 @@ int main(int argc, const char *argv[])
{
{
ocl
::
setDevice
(
oclinfo
[
i
],
(
int
)
j
);
ocl
::
setDevice
(
oclinfo
[
i
],
(
int
)
j
);
TestSystem
::
instance
().
setRecordName
(
oclinfo
[
i
].
DeviceName
[
j
]);
TestSystem
::
instance
().
setRecordName
(
oclinfo
[
i
].
DeviceName
[
j
]);
printf
(
"
\n
use %d: %s
\n
"
,
devidx
,
oclinfo
[
i
].
DeviceName
[
j
].
c_str
())
;
cout
<<
"use "
<<
devidx
<<
": "
<<
oclinfo
[
i
].
DeviceName
[
j
]
<<
endl
;
goto
END_DEV
;
goto
END_DEV
;
}
}
}
}
...
...
modules/ocl/src/mcwutil.cpp
View file @
bf6b119a
...
@@ -149,7 +149,7 @@ namespace cv
...
@@ -149,7 +149,7 @@ namespace cv
cl_image_format
format
;
cl_image_format
format
;
int
err
;
int
err
;
int
depth
=
mat
.
depth
();
int
depth
=
mat
.
depth
();
int
channels
=
mat
.
channels
();
int
channels
=
mat
.
ocl
channels
();
switch
(
depth
)
switch
(
depth
)
{
{
...
...
modules/ocl/src/moments.cpp
View file @
bf6b119a
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
// Third party copyrights are property of their respective owners.
// Third party copyrights are property of their respective owners.
//
//
// @Authors
// @Authors
// Sen Liu, s
en@multicorewareinc
.com
// Sen Liu, s
wjtuls1987@126
.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// are permitted provided that the following conditions are met:
...
@@ -277,8 +277,8 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
...
@@ -277,8 +277,8 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
blocky
=
size
.
height
/
TILE_SIZE
;
blocky
=
size
.
height
/
TILE_SIZE
;
else
else
blocky
=
size
.
height
/
TILE_SIZE
+
1
;
blocky
=
size
.
height
/
TILE_SIZE
+
1
;
cv
::
ocl
::
oclMat
dst_m
(
blocky
*
10
,
blockx
,
CV_64FC1
);
oclMat
dst_m
(
blocky
*
10
,
blockx
,
CV_64FC1
);
cl_mem
sum
=
openCLCreateBuffer
(
src
.
clCxt
,
CL_MEM_READ_WRITE
,
10
*
sizeof
(
double
)
);
oclMat
sum
(
1
,
10
,
CV_64FC1
);
int
tile_width
=
std
::
min
(
size
.
width
,
TILE_SIZE
);
int
tile_width
=
std
::
min
(
size
.
width
,
TILE_SIZE
);
int
tile_height
=
std
::
min
(
size
.
height
,
TILE_SIZE
);
int
tile_height
=
std
::
min
(
size
.
height
,
TILE_SIZE
);
size_t
localThreads
[
3
]
=
{
tile_height
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
tile_height
,
1
,
1
};
...
@@ -288,19 +288,16 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
...
@@ -288,19 +288,16 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tileSize
.
width
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tileSize
.
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
blocky
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
blocky
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
type
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
depth
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
depth
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cn
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
coi
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
coi
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
binary
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
binary
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
openCLExecuteKernel
(
dst_m
.
clCxt
,
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
openCLExecuteKernel
(
Context
::
getContext
()
,
&
moments
,
"CvMoments"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
size_t
localThreadss
[
3
]
=
{
128
,
1
,
1
};
size_t
localThreadss
[
3
]
=
{
128
,
1
,
1
};
size_t
globalThreadss
[
3
]
=
{
128
,
1
,
1
};
size_t
globalThreadss
[
3
]
=
{
128
,
1
,
1
};
...
@@ -309,25 +306,23 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
...
@@ -309,25 +306,23 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_height
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_height
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_width
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
tile_width
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
TILE_SIZE
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
sum
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
sum
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_m
.
data
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
args_sum
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
dst_m
.
step
));
openCLExecuteKernel
(
dst_m
.
clCxt
,
&
moments
,
"dst_sum"
,
globalThreadss
,
localThreadss
,
args_sum
,
-
1
,
-
1
);
openCLExecuteKernel
(
Context
::
getContext
(),
&
moments
,
"dst_sum"
,
globalThreadss
,
localThreadss
,
args_sum
,
-
1
,
-
1
);
double
*
dstsum
=
new
double
[
10
];
memset
(
dstsum
,
0
,
10
*
sizeof
(
double
));
Mat
dstsum
(
sum
);
openCLReadBuffer
(
dst_m
.
clCxt
,
sum
,(
void
*
)
dstsum
,
10
*
sizeof
(
double
));
mom
->
m00
=
dstsum
.
at
<
double
>
(
0
,
0
);
mom
->
m00
=
dstsum
[
0
];
mom
->
m10
=
dstsum
.
at
<
double
>
(
0
,
1
);
mom
->
m10
=
dstsum
[
1
];
mom
->
m01
=
dstsum
.
at
<
double
>
(
0
,
2
);
mom
->
m01
=
dstsum
[
2
];
mom
->
m20
=
dstsum
.
at
<
double
>
(
0
,
3
);
mom
->
m20
=
dstsum
[
3
];
mom
->
m11
=
dstsum
.
at
<
double
>
(
0
,
4
);
mom
->
m11
=
dstsum
[
4
];
mom
->
m02
=
dstsum
.
at
<
double
>
(
0
,
5
);
mom
->
m02
=
dstsum
[
5
];
mom
->
m30
=
dstsum
.
at
<
double
>
(
0
,
6
);
mom
->
m30
=
dstsum
[
6
];
mom
->
m21
=
dstsum
.
at
<
double
>
(
0
,
7
);
mom
->
m21
=
dstsum
[
7
];
mom
->
m12
=
dstsum
.
at
<
double
>
(
0
,
8
);
mom
->
m12
=
dstsum
[
8
];
mom
->
m03
=
dstsum
.
at
<
double
>
(
0
,
9
);
mom
->
m03
=
dstsum
[
9
];
delete
[]
dstsum
;
openCLSafeCall
(
clReleaseMemObject
(
sum
));
icvCompleteMomentState
(
mom
);
icvCompleteMomentState
(
mom
);
}
}
...
...
modules/ocl/src/opencl/moments.cl
View file @
bf6b119a
...
@@ -173,10 +173,10 @@ __kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_widt
...
@@ -173,10 +173,10 @@ __kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_widt
sum[i]
=
dst_sum[i][0]
;
sum[i]
=
dst_sum[i][0]
;
}
}
__kernel
void
CvMoments_D0
(
__global
uchar16*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D0
(
__global
uchar16*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
int
TILE_SIZE
)
{
{
uchar
tmp_coi[16]
; // get the coi data
uchar
tmp_coi[16]
; // get the coi data
uchar16
tmp[16]
;
uchar16
tmp[16]
;
...
@@ -192,12 +192,15 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
...
@@ -192,12 +192,15 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
int
x
=
wgidx*TILE_SIZE
; // vector length of uchar
int
x
=
wgidx*TILE_SIZE
; // vector length of uchar
int
kcn
=
(
cn==2
)
?2:4
;
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep
; i++ )
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols
; i++ )
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+x+i
)
=
0
;
*
((
__global
uchar*
)
src_data+
(
y+lidy
)
*src_step+x+i
)
=
0
;
if
(
coi
>
0
)
//channel
of
interest
if
(
coi
>
0
)
//channel
of
interest
for
(
int
i
=
0
; i < tileSize_width; i += VLEN_C)
for
(
int
i
=
0
; i < tileSize_width; i += VLEN_C)
{
{
...
@@ -209,18 +212,23 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
...
@@ -209,18 +212,23 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
else
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
*
(
src_data+
(
y+lidy
)
*src_step/VLEN_C+
(
x+i
)
/VLEN_C
)
;
tmp[i/VLEN_C]
=
*
(
src_data+
(
y+lidy
)
*src_step/VLEN_C+
(
x+i
)
/VLEN_C
)
;
}
uchar16
zero
=
(
uchar16
)(
0
)
;
uchar16
zero
=
(
uchar16
)(
0
)
;
uchar16
full
=
(
uchar16
)(
255
)
;
uchar16
full
=
(
uchar16
)(
255
)
;
if
(
binary
)
if
(
binary
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_C)
tmp[i/VLEN_C]
=
(
tmp[i/VLEN_C]!=zero
)
?full:zero
;
tmp[i/VLEN_C]
=
(
tmp[i/VLEN_C]!=zero
)
?full:zero
;
F
mom[10]
;
F
mom[10]
;
__local
int
m[10][128]
;
__local
int
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
{
for
(
int
i=0
; i<10; i++)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][lidy]=0
;
m[i][j]=0
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
lm[10]
=
{0}
;
int
lm[10]
=
{0}
;
int16
x0
=
(
int16
)(
0
)
;
int16
x0
=
(
int16
)(
0
)
;
int16
x1
=
(
int16
)(
0
)
;
int16
x1
=
(
int16
)(
0
)
;
...
@@ -281,6 +289,7 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
...
@@ -281,6 +289,7 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
m[i][lidy-j/2]
=
lm[i]
;
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
lidy
==
0&&lidx
==
0
)
if
(
lidy
==
0&&lidx
==
0
)
{
{
for
(
int
mt
=
0
; mt < 10; mt++ )
for
(
int
mt
=
0
; mt < 10; mt++ )
...
@@ -328,10 +337,10 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
...
@@ -328,10 +337,10 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col
}
}
}
}
__kernel
void
CvMoments_D2
(
__global
ushort8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D2
(
__global
ushort8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
{
ushort
tmp_coi[8]
; // get the coi data
ushort
tmp_coi[8]
; // get the coi data
ushort8
tmp[32]
;
ushort8
tmp[32]
;
...
@@ -346,10 +355,13 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
...
@@ -346,10 +355,13 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
src_cols
>
TILE_SIZE
&&
tileSize_width
<
TILE_SIZE
)
if
(
src_cols
>
TILE_SIZE
&&
tileSize_width
<
TILE_SIZE
)
for
(
int
i=tileSize_width
; i < rstep
; i++ )
for
(
int
i=tileSize_width
; i < rstep && (x+i) < src_cols
; i++ )
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
*
((
__global
ushort*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
...
@@ -361,6 +373,8 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
...
@@ -361,6 +373,8 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
else
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_US)
tmp[i/VLEN_US]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_US
)
+
(
x+i
)
/VLEN_US
)
;
tmp[i/VLEN_US]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_US
)
+
(
x+i
)
/VLEN_US
)
;
}
ushort8
zero
=
(
ushort8
)(
0
)
;
ushort8
zero
=
(
ushort8
)(
0
)
;
ushort8
full
=
(
ushort8
)(
255
)
;
ushort8
full
=
(
ushort8
)(
255
)
;
if
(
binary
)
if
(
binary
)
...
@@ -368,11 +382,11 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
...
@@ -368,11 +382,11 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
tmp[i/VLEN_US]
=
(
tmp[i/VLEN_US]!=zero
)
?full:zero
;
tmp[i/VLEN_US]
=
(
tmp[i/VLEN_US]!=zero
)
?full:zero
;
F
mom[10]
;
F
mom[10]
;
__local
long
m[10][128]
;
__local
long
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][lidy]=0
;
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x0
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
int8
x1
=
(
int8
)(
0
)
;
...
@@ -422,17 +436,22 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
...
@@ -422,17 +436,22 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
lm[0]
=
x0.s0
; // m00
lm[0]
=
x0.s0
; // m00
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
{
if
(
lidy
<
j
)
if
(
lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
for
(
int
i
=
0
; i < 10; i++ )
lm[i]
=
lm[i]
+
m[i][lidy]
;
lm[i]
=
lm[i]
+
m[i][lidy]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
if
(
lidy
>=
j/2&&lidy
<
j
)
if
(
lidy
>=
j/2&&lidy
<
j
)
for
(
int
i
=
0
; i < 10; i++ )
for
(
int
i
=
0
; i < 10; i++ )
m[i][lidy-j/2]
=
lm[i]
;
m[i][lidy-j/2]
=
lm[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lidy
==
0&&lidx
==
0
)
if
(
lidy
==
0&&lidx
==
0
)
{
{
for
(
int
mt
=
0
; mt < 10; mt++ )
for
(
int
mt
=
0
; mt < 10; mt++ )
...
@@ -482,10 +501,10 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
...
@@ -482,10 +501,10 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col
}
}
}
}
__kernel
void
CvMoments_D3
(
__global
short8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D3
(
__global
short8*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
{
short
tmp_coi[8]
; // get the coi data
short
tmp_coi[8]
; // get the coi data
short8
tmp[32]
;
short8
tmp[32]
;
...
@@ -500,10 +519,13 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
...
@@ -500,10 +519,13 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
int
rstep
=
min
(
src_step/2,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep
; i++ )
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols
; i++ )
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
*
((
__global
short*
)
src_data+
(
y+lidy
)
*src_step/2+x+i
)
=
0
;
if
(
coi
>
0
)
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
...
@@ -515,6 +537,8 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
...
@@ -515,6 +537,8 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
else
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_S)
tmp[i/VLEN_S]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_S
)
+
(
x+i
)
/VLEN_S
)
;
tmp[i/VLEN_S]
=
*
(
src_data+
(
y+lidy
)
*src_step/
(
2*VLEN_S
)
+
(
x+i
)
/VLEN_S
)
;
}
short8
zero
=
(
short8
)(
0
)
;
short8
zero
=
(
short8
)(
0
)
;
short8
full
=
(
short8
)(
255
)
;
short8
full
=
(
short8
)(
255
)
;
if
(
binary
)
if
(
binary
)
...
@@ -523,10 +547,9 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
...
@@ -523,10 +547,9 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
F
mom[10]
;
F
mom[10]
;
__local
long
m[10][128]
;
__local
long
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][lidy]=0
;
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
long
lm[10]
=
{0}
;
long
lm[10]
=
{0}
;
int8
x0
=
(
int8
)(
0
)
;
int8
x0
=
(
int8
)(
0
)
;
...
@@ -637,10 +660,10 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
...
@@ -637,10 +660,10 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols
}
}
}
}
__kernel
void
CvMoments_D5
(
__global
float*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D5
(
__global
float*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
{
float
tmp_coi[4]
; // get the coi data
float
tmp_coi[4]
; // get the coi data
float4
tmp[64]
;
float4
tmp[64]
;
...
@@ -654,33 +677,30 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
...
@@ -654,33 +677,30 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
kcn
=
(
cn==2
)
?2:4
;
src_step
/=
sizeof
(
*src_data
)
;
int
rstep
=
min
(
src_step/4,
TILE_SIZE
)
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
int
maxIdx
=
mul24
(
src_rows,
src_cols
)
;
int
maxIdx
=
mul24
(
src_rows,
src_cols
)
;
int
yOff
=
(
y+lidy
)
*src_step
;
int
yOff
=
(
y+lidy
)
*src_step
;
int
index
;
int
index
;
if
(
tileSize_width
<
TILE_SIZE
&&
yOff
<
src_rows
)
for
(
int
i
=
tileSize_width
; i < rstep && (yOff+x+i) < maxIdx; i++ )
if
(
y+lidy
<
src_rows
)
*
(
src_data+yOff+x+i
)
=
0
;
{
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols; i++ )
*
((
__global
float*
)
src_data+
(
y+lidy
)
*src_step/4+x+i
)
=
0
;
if
(
coi
>
0
)
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
{
{
#
pragma
unroll
for
(
int
j=0
; j<4; j++)
for
(
int
j=0
; j<4; j++)
{
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/4+
(
x+i+j
)
*kcn+coi-1
)
;
index
=
yOff+
(
x+i+j
)
*kcn+coi-1
;
if
(
index
<
maxIdx
)
tmp_coi[j]
=
*
(
src_data+index
)
;
else
tmp_coi[j]
=
0
;
}
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
}
else
else
for
(
int
i=0
; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
*
(
__global
float4
*
)(
src_data+yOff+x+i
))
;
tmp[i/VLEN_F]
=
(
float4
)(
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+3
))
;
}
float4
zero
=
(
float4
)(
0
)
;
float4
zero
=
(
float4
)(
0
)
;
float4
full
=
(
float4
)(
255
)
;
float4
full
=
(
float4
)(
255
)
;
if
(
binary
)
if
(
binary
)
...
@@ -688,10 +708,9 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
...
@@ -688,10 +708,9 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
tmp[i/VLEN_F]
=
(
tmp[i/VLEN_F]!=zero
)
?full:zero
;
tmp[i/VLEN_F]
=
(
tmp[i/VLEN_F]!=zero
)
?full:zero
;
F
mom[10]
;
F
mom[10]
;
__local
F
m[10][128]
;
__local
F
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i
=
0
; i < 10; i ++)
for
(
int
i
=
0
; i < 10; i ++)
for
(
int
j
=
0
; j < 128; j ++)
m[i][lidy]
=
0
;
m[i][j]
=
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
lm[10]
=
{0}
;
F
lm[10]
=
{0}
;
F4
x0
=
(
F4
)(
0
)
;
F4
x0
=
(
F4
)(
0
)
;
...
@@ -770,66 +789,42 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
...
@@ -770,66 +789,42 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
//
accumulate
moments
computed
in
each
tile
//
accumulate
moments
computed
in
each
tile
dst_step
/=
sizeof
(
F
)
;
dst_step
/=
sizeof
(
F
)
;
int
dst_x_off
=
mad24
(
wgidy,
dst_cols,
wgidx
)
;
int
dst_off
=
0
;
int
max_dst_index
=
10
*
blocky
*
get_global_size
(
1
)
;
//
+
m00
(
=
m00
'
)
//
+
m00
(
=
m00
'
)
dst_off
=
mad24
(
DST_ROW_00
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
//
+
m10
(
=
m10
'
+
x*m00
'
)
dst_off
=
mad24
(
DST_ROW_10
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
//
+
m01
(
=
m01
'
+
y*m00
'
)
dst_off
=
mad24
(
DST_ROW_01
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
dst_off
=
mad24
(
DST_ROW_20
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
dst_off
=
mad24
(
DST_ROW_11
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
dst_off
=
mad24
(
DST_ROW_02
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
dst_off
=
mad24
(
DST_ROW_30
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
dst_off
=
mad24
(
DST_ROW_21
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
dst_off
=
mad24
(
DST_ROW_12
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
dst_off
=
mad24
(
DST_ROW_03
*
blocky,
dst_step,
dst_x_off
)
;
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
}
}
__kernel
void
CvMoments_D6
(
__global
F*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
int
tileSize_width,
int
tileSize_height,
__kernel
void
CvMoments_D6
(
__global
F*
src_data,
int
src_rows,
int
src_cols,
int
src_step,
__global
F*
dst_m,
__global
F*
dst_m,
int
dst_cols,
int
dst_step,
int
blocky,
int
dst_cols,
int
dst_step,
int
blocky,
int
type,
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
int
depth,
int
cn,
int
coi,
int
binary,
const
int
TILE_SIZE
)
{
{
F
tmp_coi[4]
; // get the coi data
F
tmp_coi[4]
; // get the coi data
F4
tmp[64]
;
F4
tmp[64]
;
...
@@ -844,22 +839,26 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
...
@@ -844,22 +839,26 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/8,
TILE_SIZE
)
;
int
rstep
=
min
(
src_step/8,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
int
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
int
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-
x
)
;
if
(
y+lidy
<
src_rows
)
{
if
(
tileSize_width
<
TILE_SIZE
)
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep
; i++ )
for
(
int
i
=
tileSize_width
; i < rstep && (x+i) < src_cols
; i++ )
*
((
__global
F*
)
src_data+
(
y+lidy
)
*src_step/8+x+i
)
=
0
;
*
((
__global
F*
)
src_data+
(
y+lidy
)
*src_step/8+x+i
)
=
0
;
if
(
coi
>
0
)
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_D)
{
{
for
(
int
j=0
; j<4
; j++)
for
(
int
j=0
; j<4 && ((x+i+j)*kcn+coi-1)<src_cols
; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/8+
(
x+i+j
)
*kcn+coi-1
)
;
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/8+
(
x+i+j
)
*kcn+coi-1
)
;
tmp[i/VLEN_D]
=
(
F4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
tmp[i/VLEN_D]
=
(
F4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
}
else
else
for
(
int
i=0
; i < tileSize_width
; i+=VLEN_D)
for
(
int
i=0
; i < tileSize_width && (x+i+3) < src_cols
; i+=VLEN_D)
tmp[i/VLEN_D]
=
(
F4
)(
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+3
))
;
tmp[i/VLEN_D]
=
(
F4
)(
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/8+x+i+3
))
;
}
F4
zero
=
(
F4
)(
0
)
;
F4
zero
=
(
F4
)(
0
)
;
F4
full
=
(
F4
)(
255
)
;
F4
full
=
(
F4
)(
255
)
;
if
(
binary
)
if
(
binary
)
...
@@ -867,10 +866,9 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
...
@@ -867,10 +866,9 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
tmp[i/VLEN_D]
=
(
tmp[i/VLEN_D]!=zero
)
?full:zero
;
tmp[i/VLEN_D]
=
(
tmp[i/VLEN_D]!=zero
)
?full:zero
;
F
mom[10]
;
F
mom[10]
;
__local
F
m[10][128]
;
__local
F
m[10][128]
;
if
(
lidy
==
0
)
if
(
lidy
<
128
)
for
(
int
i=0
; i<10; i++)
for
(
int
i=0
; i<10; i++)
for
(
int
j=0
; j<128; j++)
m[i][lidy]=0
;
m[i][j]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
F
lm[10]
=
{0}
;
F
lm[10]
=
{0}
;
F4
x0
=
(
F4
)(
0
)
;
F4
x0
=
(
F4
)(
0
)
;
...
@@ -907,7 +905,6 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
...
@@ -907,7 +905,6 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
m[1][lidy-bheight]
=
x1.s0
; // m10
m[1][lidy-bheight]
=
x1.s0
; // m10
m[0][lidy-bheight]
=
x0.s0
; // m00
m[0][lidy-bheight]
=
x0.s0
; // m00
}
}
else
if
(
lidy
<
bheight
)
else
if
(
lidy
<
bheight
)
{
{
lm[9]
=
((
F
)
py
)
*
sy
; // m03
lm[9]
=
((
F
)
py
)
*
sy
; // m03
...
@@ -922,6 +919,7 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
...
@@ -922,6 +919,7 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in
lm[0]
=
x0.s0
; // m00
lm[0]
=
x0.s0
; // m00
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
for
(
int
j
=
TILE_SIZE/2
; j >= 1; j = j/2 )
{
{
if
(
lidy
<
j
)
if
(
lidy
<
j
)
...
...
modules/ocl/src/opencl/stereobm.cl
View file @
bf6b119a
...
@@ -162,8 +162,8 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
...
@@ -162,8 +162,8 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
int
y_tex
;
int
y_tex
;
int
x_tex
=
X
-
radius
;
int
x_tex
=
X
-
radius
;
if
(
x_tex
>=
cwidth
)
//
if
(
x_tex
>=
cwidth
)
return
;
//
return
;
for
(
int
d
=
STEREO_MIND
; d < maxdisp; d += STEREO_DISP_STEP)
for
(
int
d
=
STEREO_MIND
; d < maxdisp; d += STEREO_DISP_STEP)
{
{
...
...
modules/ocl/test/test_moments.cpp
View file @
bf6b119a
...
@@ -45,12 +45,12 @@ TEST_P(MomentsTest, Mat)
...
@@ -45,12 +45,12 @@ TEST_P(MomentsTest, Mat)
{
{
if
(
test_contours
)
if
(
test_contours
)
{
{
Mat
src
=
imread
(
workdir
+
"../cpp/pic3.png"
,
1
);
Mat
src
=
imread
(
workdir
+
"../cpp/pic3.png"
,
IMREAD_GRAYSCALE
);
Mat
src_gray
,
canny_output
;
ASSERT_FALSE
(
src
.
empty
())
;
cvtColor
(
src
,
src_gray
,
CV_BGR2GRAY
)
;
Mat
canny_output
;
vector
<
vector
<
Point
>
>
contours
;
vector
<
vector
<
Point
>
>
contours
;
vector
<
Vec4i
>
hierarchy
;
vector
<
Vec4i
>
hierarchy
;
Canny
(
src
_gray
,
canny_output
,
100
,
200
,
3
);
Canny
(
src
,
canny_output
,
100
,
200
,
3
);
findContours
(
canny_output
,
contours
,
hierarchy
,
CV_RETR_TREE
,
CV_CHAIN_APPROX_SIMPLE
,
Point
(
0
,
0
)
);
findContours
(
canny_output
,
contours
,
hierarchy
,
CV_RETR_TREE
,
CV_CHAIN_APPROX_SIMPLE
,
Point
(
0
,
0
)
);
for
(
size_t
i
=
0
;
i
<
contours
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
contours
.
size
();
i
++
)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment