Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bd1d7cd2
Commit
bd1d7cd2
authored
May 14, 2013
by
Vadim Pisarevsky
Committed by
OpenCV Buildbot
May 14, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #839 from pengx17:2.4_ocl_csbp
parents
87765c0f
9cfa24e5
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2263 additions
and
0 deletions
+2263
-0
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+38
-0
stereocsbp.cl
modules/ocl/src/opencl/stereocsbp.cl
+1402
-0
stereo_csbp.cpp
modules/ocl/src/stereo_csbp.cpp
+763
-0
test_calib3d.cpp
modules/ocl/test/test_calib3d.cpp
+60
-0
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
bd1d7cd2
...
@@ -1769,6 +1769,44 @@ namespace cv
...
@@ -1769,6 +1769,44 @@ namespace cv
std
::
vector
<
oclMat
>
datas
;
std
::
vector
<
oclMat
>
datas
;
oclMat
out
;
oclMat
out
;
};
};
class
CV_EXPORTS
StereoConstantSpaceBP
{
public
:
enum
{
DEFAULT_NDISP
=
128
};
enum
{
DEFAULT_ITERS
=
8
};
enum
{
DEFAULT_LEVELS
=
4
};
enum
{
DEFAULT_NR_PLANE
=
4
};
static
void
estimateRecommendedParams
(
int
width
,
int
height
,
int
&
ndisp
,
int
&
iters
,
int
&
levels
,
int
&
nr_plane
);
explicit
StereoConstantSpaceBP
(
int
ndisp
=
DEFAULT_NDISP
,
int
iters
=
DEFAULT_ITERS
,
int
levels
=
DEFAULT_LEVELS
,
int
nr_plane
=
DEFAULT_NR_PLANE
,
int
msg_type
=
CV_32F
);
StereoConstantSpaceBP
(
int
ndisp
,
int
iters
,
int
levels
,
int
nr_plane
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
,
int
min_disp_th
=
0
,
int
msg_type
=
CV_32F
);
void
operator
()(
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
disparity
);
int
ndisp
;
int
iters
;
int
levels
;
int
nr_plane
;
float
max_data_term
;
float
data_weight
;
float
max_disc_term
;
float
disc_single_jump
;
int
min_disp_th
;
int
msg_type
;
bool
use_local_init_data_cost
;
private
:
oclMat
u
[
2
],
d
[
2
],
l
[
2
],
r
[
2
];
oclMat
disp_selected_pyr
[
2
];
oclMat
data_cost
;
oclMat
data_cost_selected
;
oclMat
temp
;
oclMat
out
;
};
}
}
}
}
#if defined _MSC_VER && _MSC_VER >= 1200
#if defined _MSC_VER && _MSC_VER >= 1200
...
...
modules/ocl/src/opencl/stereocsbp.cl
0 → 100644
View file @
bd1d7cd2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jia
Haipeng,
jiahaipeng95@gmail.com
//
Jin
Ma,
jin@multicorewareinc.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
oclMaterials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
"as is"
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifndef
FLT_MAX
#
define
FLT_MAX
CL_FLT_MAX
#
endif
#
ifndef
SHRT_MAX
#
define
SHRT_MAX
CL_SHORT_MAX
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////get_first_k_initial_global//////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_global_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
short
*selected_disparity
=
selected_disp_pyr
+
y
*
cmsg_step1
+
x
;
__global
short
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
short
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
for
(
int
i
=
0
; i < nr_plane; i++)
{
short
minimum
=
SHRT_MAX
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
{
short
cur
=
data_cost[d
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
d
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost
[id
*
cdisp_step1]
=
SHRT_MAX
;
}
}
}
__kernel
void
get_first_k_initial_global_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
float
*selected_disparity
=
selected_disp_pyr
+
y
*
cmsg_step1
+
x
;
__global
float
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
float
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
{
float
cur
=
data_cost[d
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
d
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost
[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
get_first_k_initial_local_0
(
__global
short
*data_cost_selected_,
__global
short
*selected_disp_pyr,
__global
short
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
short
*selected_disparity
=
selected_disp_pyr
+
y
*
cmsg_step1
+
x
;
__global
short
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
short
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
int
nr_local_minimum
=
0
;
short
prev
=
data_cost[0
*
cdisp_step1]
;
short
cur
=
data_cost[1
*
cdisp_step1]
;
short
next
=
data_cost[2
*
cdisp_step1]
;
for
(
int
d
=
1
; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
{
if
(
cur
<
prev
&&
cur
<
next
)
{
data_cost_selected[nr_local_minimum
*
cdisp_step1]
=
cur
;
selected_disparity[nr_local_minimum
*
cdisp_step1]
=
d
;
data_cost[d
*
cdisp_step1]
=
SHRT_MAX
;
nr_local_minimum++
;
}
prev
=
cur
;
cur
=
next
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
}
for
(
int
i
=
nr_local_minimum
; i < nr_plane; i++)
{
short
minimum
=
SHRT_MAX
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
{
cur
=
data_cost[d
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
d
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost[id
*
cdisp_step1]
=
SHRT_MAX
;
}
}
}
__kernel
void
get_first_k_initial_local_1
(
__global
float
*data_cost_selected_,
__global
float
*selected_disp_pyr,
__global
float
*ctemp,int
h,
int
w,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1,
int
cndisp
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
float
*selected_disparity
=
selected_disp_pyr
+
y
*
cmsg_step1
+
x
;
__global
float
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
float
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
int
nr_local_minimum
=
0
;
float
prev
=
data_cost[0
*
cdisp_step1]
;
float
cur
=
data_cost[1
*
cdisp_step1]
;
float
next
=
data_cost[2
*
cdisp_step1]
;
for
(
int
d
=
1
; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
{
if
(
cur
<
prev
&&
cur
<
next
)
{
data_cost_selected[nr_local_minimum
*
cdisp_step1]
=
cur
;
selected_disparity[nr_local_minimum
*
cdisp_step1]
=
d
;
data_cost[d
*
cdisp_step1]
=
FLT_MAX
;
nr_local_minimum++
;
}
prev
=
cur
;
cur
=
next
;
next
=
data_cost[
(
d
+
1
)
*
cdisp_step1]
;
}
for
(
int
i
=
nr_local_minimum
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
d
=
0
; d < cndisp; d++)
{
cur
=
data_cost[d
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
d
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
minimum
;
selected_disparity[i
*
cdisp_step1]
=
id
;
data_cost[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
}
///////////////////////////////////////////////////////////////
///////////////////////
init
data
cost
////////////////////////
///////////////////////////////////////////////////////////////
float
compute_3
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
{
float
tb
=
0.114f
*
abs
((
int
)
left[0]
-
right[0]
)
;
float
tg
=
0.587f
*
abs
((
int
)
left[1]
-
right[1]
)
;
float
tr
=
0.299f
*
abs
((
int
)
left[2]
-
right[2]
)
;
return
fmin
(
cdata_weight
*
(
tr
+
tg
+
tb
)
,
cdata_weight
*
cmax_data_term
)
;
}
float
compute_1
(
__global
uchar*
left,
__global
uchar*
right,
float
cdata_weight,
float
cmax_data_term
)
{
return
fmin
(
cdata_weight
*
abs
((
int
)
*left
-
(
int
)
*right
)
,
cdata_weight
*
cmax_data_term
)
;
}
short
round_short
(
float
v
)
{
return
convert_short_sat_rte
(
v
)
;
}
///////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////init_data_cost///////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
init_data_cost_0
(
__global
short
*ctemp,
__global
uchar
*cleft,
__global
uchar
*cright,
int
h,
int
w,
int
level,
int
channels,
int
cmsg_step1,
float
cdata_weight,
float
cmax_data_term,
int
cdisp_step1,
int
cth,
int
cimg_step,
int
cndisp
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
int
y0
=
y
<<
level
;
int
yt
=
(
y
+
1
)
<<
level
;
int
x0
=
x
<<
level
;
int
xt
=
(
x
+
1
)
<<
level
;
__global
short
*data_cost
=
ctemp
+
y
*
cmsg_step1
+
x
;
for
(
int
d
=
0
; d < cndisp; ++d)
{
float
val
=
0.0f
;
for
(
int
yi
=
y0
; yi < yt; yi++)
{
for
(
int
xi
=
x0
; xi < xt; xi++)
{
int
xr
=
xi
-
d
;
if
(
d
<
cth
|
| xr < 0)
val += cdata_weight * cmax_data_term;
else
{
__global uchar *lle = cleft + yi * cimg_step + xi * channels;
__global uchar *lri = cright + yi * cimg_step + xr * channels;
if(channels == 1)
val += compute_1(lle, lri, cdata_weight, cmax_data_term);
else
val += compute_3(lle, lri, cdata_weight, cmax_data_term);
}
}
}
data_cost[cdisp_step1 * d] = round_short(val);
}
}
}
__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
int h, int w, int level, int channels,
int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
int cth, int cimg_step, int cndisp)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < h && x < w)
{
int y0 = y << level;
int yt = (y + 1) << level;
int x0 = x << level;
int xt = (x + 1) << level;
__global float *data_cost = ctemp + y * cmsg_step1 + x;
for(int d = 0; d < cndisp; ++d)
{
float val = 0.0f;
for(int yi = y0; yi < yt; yi++)
{
for(int xi = x0; xi < xt; xi++)
{
int xr = xi - d;
if(d < cth || xr < 0)
val += cdata_weight * cmax_data_term;
else
{
__global uchar* lle = cleft + yi * cimg_step + xi * channels;
__global uchar* lri = cright + yi * cimg_step + xr * channels;
if(channels == 1)
val += compute_1(lle, lri, cdata_weight, cmax_data_term);
else
val += compute_3(lle, lri, cdata_weight, cmax_data_term);
}
}
}
data_cost[cdisp_step1 * d] = val;
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cdisp_step1, int cmsg_step1)
{
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
//int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
if (d < cndisp)
{
int x0 = x_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
if (x0 + tid < cols)
{
if (x0 + tid - d < 0 || d < cth)
val = cdata_weight * cmax_data_term * len;
else
{
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
for(int y = 0; y < len; ++y)
{
if(channels == 1)
val += compute_1(lle, lri, cdata_weight, cmax_data_term);
else
val += compute_3(lle, lri, cdata_weight, cmax_data_term);
lle += cimg_step;
lri += cimg_step;
}
}
}
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
{
if (tid < 128)
dline[tid] += dline[tid + 128];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
{
if (tid < 64)
dline[tid] += dline[tid + 64];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d<cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
__global short* data_cost = ctemp + y_out * cmsg_step1 + x_out;
if (tid == 0)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(dline[0]);
}
}
__kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
__local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
int cdisp_step1, int cmsg_step1)
{
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
if (d < cndisp)
{
int x0 = x_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
//float val = 528.0f;
if (x0 + tid < cols)
{
if (x0 + tid - d < 0 || d < cth)
val = cdata_weight * cmax_data_term * len;
else
{
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
for(int y = 0; y < len; ++y)
{
if(channels == 1)
val += compute_1(lle, lri, cdata_weight, cmax_data_term);
else
val += compute_3(lle, lri, cdata_weight, cmax_data_term);
lle += cimg_step;
lri += cimg_step;
}
}
}
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 256)
if (tid < 128)
dline[tid] += dline[tid + 128];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local float* dline = smem + winsz * get_local_id(2);
if (winsz >= 128)
if (tid < 64)
dline[tid] += dline[tid + 64];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < cndisp)
{
__global float *data_cost = ctemp + y_out * cmsg_step1 + x_out;
__local float* dline = smem + winsz * get_local_id(2);
if (tid == 0)
data_cost[cdisp_step1 * d] = dline[0];
}
}
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < h && x < w)
{
int y0 = y << level;
int yt = (y + 1) << level;
int x0 = x << level;
int xt = (x + 1) << level;
__global const short *selected_disparity = selected_disp_pyr + y/2 * cmsg_step2 + x/2;
__global short *data_cost = data_cost_ + y * cmsg_step1 + x;
for(int d = 0; d < nr_plane; d++)
{
float val = 0.0f;
for(int yi = y0; yi < yt; yi++)
{
for(int xi = x0; xi < xt; xi++)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int xr = xi - sel_disp;
if (xr < 0 || sel_disp < cth)
val += cdata_weight * cmax_data_term;
else
{
__global uchar* left_x = cleft + yi * cimg_step + xi * channels;
__global uchar* right_x = cright + yi * cimg_step + xr * channels;
if(channels == 1)
val += compute_1(left_x, right_x, cdata_weight, cmax_data_term);
else
val += compute_3(left_x, right_x, cdata_weight, cmax_data_term);
}
}
}
data_cost[cdisp_step1 * d] = convert_short_sat_rte(val);
}
}
}
__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright,
int h, int w, int level, int nr_plane, int channels,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (y < h && x < w)
{
int y0 = y << level;
int yt = (y + 1) << level;
int x0 = x << level;
int xt = (x + 1) << level;
__global const float *selected_disparity = selected_disp_pyr + y/2 * cmsg_step2 + x/2;
__global float *data_cost = data_cost_ + y * cmsg_step1 + x;
for(int d = 0; d < nr_plane; d++)
{
float val = 0.0f;
for(int yi = y0; yi < yt; yi++)
{
for(int xi = x0; xi < xt; xi++)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int xr = xi - sel_disp;
if (xr < 0 || sel_disp < cth)
val += cdata_weight * cmax_data_term;
else
{
__global uchar* left_x = cleft + yi * cimg_step + xi * channels;
__global uchar* right_x = cright + yi * cimg_step + xr * channels;
if(channels == 1)
val += compute_1(left_x, right_x, cdata_weight, cmax_data_term);
else
val += compute_3(left_x, right_x, cdata_weight, cmax_data_term);
}
}
}
data_cost[cdisp_step1 * d] = val;
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
__global uchar *cleft, __global uchar *cright,__local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int channels, int winsz,
int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
float cdata_weight, float cmax_data_term, int cimg_step,int cth)
{
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
__global const short* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
__global short* data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
if (d < nr_plane)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int x0 = x_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
if (x0 + tid < cols)
{
if (x0 + tid - sel_disp < 0 || sel_disp < cth)
val = cdata_weight * cmax_data_term * len;
else
{
__global uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
__global uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
for(int y = 0; y < len; ++y)
{
if(channels == 1)
val += compute_1(lle, lri, cdata_weight, cmax_data_term);
else
val += compute_3(lle, lri, cdata_weight, cmax_data_term);
lle += cimg_step;
lri += cimg_step;
}
}
}
__local float* dline = smem + winsz * get_local_id(2);
dline[tid] = val;
}
barrier(CLK_LOCAL_MEM_FENCE);
// if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
//if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } barrier(CLK_LOCAL_MEM_FENCE); }
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 64)
{
if (tid < 32)
vdline[tid] += vdline[tid + 32];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 32)
{
if (tid < 16)
vdline[tid] += vdline[tid + 16];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 16)
{
if (tid < 8)
vdline[tid] += vdline[tid + 8];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 8)
{
if (tid < 4)
vdline[tid] += vdline[tid + 4];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 4)
{
if (tid < 2)
vdline[tid] += vdline[tid + 2];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (winsz >= 2)
{
if (tid < 1)
vdline[tid] += vdline[tid + 1];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(d < nr_plane)
{
__local volatile float* vdline = smem + winsz * get_local_id(2);
if (tid == 0)
data_cost[cdisp_step1 * d] = convert_short_sat_rte(vdline[0]);
}
}
__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
__global uchar *cleft, __global uchar *cright, __local float *smem,
int level, int rows, int cols, int h, int nr_plane,
int channels, int winsz,
int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
float cmax_data_term, int cimg_step, int cth)
{
int x_out = get_group_id(0);
int y_out = get_group_id(1) % h;
int d = (get_group_id(1)/ h) * get_local_size(2) + get_local_id(2);
int tid = get_local_id(0);
__global const float *selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step2 + x_out/2;
__global float *data_cost = data_cost_ + y_out * cmsg_step1 + x_out;
if (d < nr_plane)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int x0 = x_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
if (x0 + tid < cols)
{
if (x0 + tid - sel_disp < 0 |
|
sel_disp
<
cth
)
val
=
cdata_weight
*
cmax_data_term
*
len
;
else
{
__global
uchar*
lle
=
cleft
+
y0
*
cimg_step
+
channels
*
(
x0
+
tid
)
;
__global
uchar*
lri
=
cright
+
y0
*
cimg_step
+
channels
*
(
x0
+
tid
-
sel_disp
)
;
for
(
int
y
=
0
; y < len; ++y)
{
if
(
channels
==
1
)
val
+=
compute_1
(
lle,
lri,
cdata_weight,
cmax_data_term
)
;
else
val
+=
compute_3
(
lle,
lri,
cdata_weight,
cmax_data_term
)
;
lle
+=
cimg_step
;
lri
+=
cimg_step
;
}
}
}
__local
float*
dline
=
smem
+
winsz
*
get_local_id
(
2
)
;
dline[tid]
=
val
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
64
)
{
if
(
tid
<
32
)
vdline[tid]
+=
vdline[tid
+
32]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
32
)
{
if
(
tid
<
16
)
vdline[tid]
+=
vdline[tid
+
16]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
16
)
{
if
(
tid
<
8
)
vdline[tid]
+=
vdline[tid
+
8]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
8
)
{
if
(
tid
<
4
)
vdline[tid]
+=
vdline[tid
+
4]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
4
)
{
if
(
tid
<
2
)
vdline[tid]
+=
vdline[tid
+
2]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
winsz
>=
2
)
{
if
(
tid
<
1
)
vdline[tid]
+=
vdline[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
d
<
nr_plane
)
{
__local
volatile
float*
vdline
=
smem
+
winsz
*
get_local_id
(
2
)
;
if
(
tid
==
0
)
data_cost[cdisp_step1
*
d]
=
vdline[0]
;
}
}
///////////////////////////////////////////////////////////////
////////////////////////
init
message
/////////////////////////
///////////////////////////////////////////////////////////////
void
get_first_k_element_increase_0
(
__global
short*
u_new,
__global
short
*d_new,
__global
short
*l_new,
__global
short
*r_new,
__global
const
short
*u_cur,
__global
const
short
*d_cur,
__global
const
short
*l_cur,
__global
const
short
*r_cur,
__global
short
*data_cost_selected,
__global
short
*disparity_selected_new,
__global
short
*data_cost_new,
__global
const
short*
data_cost_cur,
__global
const
short
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
{
for
(
int
i
=
0
; i < nr_plane; i++)
{
short
minimum
=
SHRT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
short
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost_cur[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
SHRT_MAX
;
}
}
void
get_first_k_element_increase_1
(
__global
float
*u_new,
__global
float
*d_new,
__global
float
*l_new,
__global
float
*r_new,
__global
const
float
*u_cur,
__global
const
float
*d_cur,
__global
const
float
*l_cur,
__global
const
float
*r_cur,
__global
float
*data_cost_selected,
__global
float
*disparity_selected_new,
__global
float
*data_cost_new,
__global
const
float
*data_cost_cur,
__global
const
float
*disparity_selected_cur,
int
nr_plane,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2
)
{
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost_cur[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
__kernel
void
init_message_0
(
__global
short
*u_new_,
__global
short
*d_new_,
__global
short
*l_new_,
__global
short
*r_new_,
__global
short
*u_cur_,
__global
const
short
*d_cur_,
__global
const
short
*l_cur_,
__global
const
short
*r_cur_,
__global
short
*ctemp,
__global
short
*selected_disp_pyr_new,
__global
const
short
*selected_disp_pyr_cur,
__global
short
*data_cost_selected_,
__global
const
short
*data_cost_,
int
h,
int
w,
int
nr_plane,
int
h2,
int
w2,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2,
int
cmsg_step1,
int
cmsg_step2
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
h
&&
x
<
w
)
{
__global
const
short
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
__global
const
short
*d_cur
=
d_cur_
+
max
(
0
,
y/2
-
1
)
*
cmsg_step2
+
x/2
;
__global
const
short
*l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
min
(
w2-1,
x/2
+
1
)
;
__global
const
short
*r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
max
(
0
,
x/2
-
1
)
;
__global
short
*data_cost_new
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
const
short
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
short
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
for
(
int
d
=
0
; d < nr_plane2; d++)
{
int
idx2
=
d
*
cdisp_step2
;
short
val
=
data_cost[d
*
cdisp_step1]
+
u_cur[idx2]
+
d_cur[idx2]
+
l_cur[idx2]
+
r_cur[idx2]
;
data_cost_new[d
*
cdisp_step1]
=
val
;
}
__global
short
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
short
*disparity_selected_new
=
selected_disp_pyr_new
+
y
*
cmsg_step1
+
x
;
__global
short
*u_new
=
u_new_
+
y
*
cmsg_step1
+
x
;
__global
short
*d_new
=
d_new_
+
y
*
cmsg_step1
+
x
;
__global
short
*l_new
=
l_new_
+
y
*
cmsg_step1
+
x
;
__global
short
*r_new
=
r_new_
+
y
*
cmsg_step1
+
x
;
u_cur
=
u_cur_
+
y/2
*
cmsg_step2
+
x/2
;
d_cur
=
d_cur_
+
y/2
*
cmsg_step2
+
x/2
;
l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
x/2
;
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
get_first_k_element_increase_0
(
u_new,
d_new,
l_new,
r_new,
u_cur,
d_cur,
l_cur,
r_cur,
data_cost_selected,
disparity_selected_new,
data_cost_new,
data_cost,
disparity_selected_cur,
nr_plane,
nr_plane2,
cdisp_step1,
cdisp_step2
)
;
}
}
__kernel
void
init_message_1
(
__global
float
*u_new_,
__global
float
*d_new_,
__global
float
*l_new_,
__global
float
*r_new_,
__global
const
float
*u_cur_,
__global
const
float
*d_cur_,
__global
const
float
*l_cur_,
__global
const
float
*r_cur_,
__global
float
*ctemp,
__global
float
*selected_disp_pyr_new,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*data_cost_selected_,
__global
const
float
*data_cost_,
int
h,
int
w,
int
nr_plane,
int
h2,
int
w2,
int
nr_plane2,
int
cdisp_step1,
int
cdisp_step2,
int
cmsg_step1,
int
cmsg_step2
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
__global
const
float
*u_cur
=
u_cur_
+
min
(
h2-1,
y/2
+
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*d_cur
=
d_cur_
+
max
(
0
,
y/2
-
1
)
*
cmsg_step2
+
x/2
;
__global
const
float
*l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
min
(
w2-1,
x/2
+
1
)
;
__global
const
float
*r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
max
(
0
,
x/2
-
1
)
;
__global
float
*data_cost_new
=
ctemp
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disparity_selected_cur
=
selected_disp_pyr_cur
+
y/2
*
cmsg_step2
+
x/2
;
__global
const
float
*data_cost
=
data_cost_
+
y
*
cmsg_step1
+
x
;
if
(
y
<
h
&&
x
<
w
)
{
for
(
int
d
=
0
; d < nr_plane2; d++)
{
int
idx2
=
d
*
cdisp_step2
;
float
val
=
data_cost[d
*
cdisp_step1]
+
u_cur[idx2]
+
d_cur[idx2]
+
l_cur[idx2]
+
r_cur[idx2]
;
data_cost_new[d
*
cdisp_step1]
=
val
;
}
}
__global
float
*data_cost_selected
=
data_cost_selected_
+
y
*
cmsg_step1
+
x
;
__global
float
*disparity_selected_new
=
selected_disp_pyr_new
+
y
*
cmsg_step1
+
x
;
__global
float
*u_new
=
u_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*d_new
=
d_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*l_new
=
l_new_
+
y
*
cmsg_step1
+
x
;
__global
float
*r_new
=
r_new_
+
y
*
cmsg_step1
+
x
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
y
<
h
&&
x
<
w
)
{
u_cur
=
u_cur_
+
y/2
*
cmsg_step2
+
x/2
;
d_cur
=
d_cur_
+
y/2
*
cmsg_step2
+
x/2
;
l_cur
=
l_cur_
+
y/2
*
cmsg_step2
+
x/2
;
r_cur
=
r_cur_
+
y/2
*
cmsg_step2
+
x/2
;
for
(
int
i
=
0
; i < nr_plane; i++)
{
float
minimum
=
FLT_MAX
;
int
id
=
0
;
for
(
int
j
=
0
; j < nr_plane2; j++)
{
float
cur
=
data_cost_new[j
*
cdisp_step1]
;
if
(
cur
<
minimum
)
{
minimum
=
cur
;
id
=
j
;
}
}
data_cost_selected[i
*
cdisp_step1]
=
data_cost[id
*
cdisp_step1]
;
disparity_selected_new[i
*
cdisp_step1]
=
disparity_selected_cur[id
*
cdisp_step2]
;
u_new[i
*
cdisp_step1]
=
u_cur[id
*
cdisp_step2]
;
d_new[i
*
cdisp_step1]
=
d_cur[id
*
cdisp_step2]
;
l_new[i
*
cdisp_step1]
=
l_cur[id
*
cdisp_step2]
;
r_new[i
*
cdisp_step1]
=
r_cur[id
*
cdisp_step2]
;
data_cost_new[id
*
cdisp_step1]
=
FLT_MAX
;
}
}
}
///////////////////////////////////////////////////////////////
////////////////////
calc
all
iterations
/////////////////////
///////////////////////////////////////////////////////////////
void
message_per_pixel_0
(
__global
const
short
*data,
__global
short
*msg_dst,
__global
const
short
*msg1,
__global
const
short
*msg2,
__global
const
short
*msg3,
__global
const
short
*dst_disp,
__global
const
short
*src_disp,
int
nr_plane,
__global
short
*temp,
float
cmax_disc_term,
int
cdisp_step1,
float
cdisc_single_jump
)
{
short
minimum
=
SHRT_MAX
;
for
(
int
d
=
0
; d < nr_plane; d++)
{
int
idx
=
d
*
cdisp_step1
;
short
val
=
data[idx]
+
msg1[idx]
+
msg2[idx]
+
msg3[idx]
;
if
(
val
<
minimum
)
minimum
=
val
;
msg_dst[idx]
=
val
;
}
float
sum
=
0
;
for
(
int
d
=
0
; d < nr_plane; d++)
{
float
cost_min
=
minimum
+
cmax_disc_term
;
short
src_disp_reg
=
src_disp[d
*
cdisp_step1]
;
for
(
int
d2
=
0
; d2 < nr_plane; d2++)
cost_min
=
fmin
(
cost_min,
(
msg_dst[d2
*
cdisp_step1]
+
cdisc_single_jump
*
abs
(
dst_disp[d2
*
cdisp_step1]
-
src_disp_reg
)))
;
temp[d
*
cdisp_step1]
=
convert_short_sat_rte
(
cost_min
)
;
sum
+=
cost_min
;
}
sum
/=
nr_plane
;
for
(
int
d
=
0
; d < nr_plane; d++)
msg_dst[d
*
cdisp_step1]
=
convert_short_sat_rte
(
temp[d
*
cdisp_step1]
-
sum
)
;
}
void
message_per_pixel_1
(
__global
const
float
*data,
__global
float
*msg_dst,
__global
const
float
*msg1,
__global
const
float
*msg2,
__global
const
float
*msg3,
__global
const
float
*dst_disp,
__global
const
float
*src_disp,
int
nr_plane,
__global
float
*temp,
float
cmax_disc_term,
int
cdisp_step1,
float
cdisc_single_jump
)
{
float
minimum
=
FLT_MAX
;
for
(
int
d
=
0
; d < nr_plane; d++)
{
int
idx
=
d
*
cdisp_step1
;
float
val
=
data[idx]
+
msg1[idx]
+
msg2[idx]
+
msg3[idx]
;
if
(
val
<
minimum
)
minimum
=
val
;
msg_dst[idx]
=
val
;
}
float
sum
=
0
;
for
(
int
d
=
0
; d < nr_plane; d++)
{
float
cost_min
=
minimum
+
cmax_disc_term
;
float
src_disp_reg
=
src_disp[d
*
cdisp_step1]
;
for
(
int
d2
=
0
; d2 < nr_plane; d2++)
cost_min
=
fmin
(
cost_min,
(
msg_dst[d2
*
cdisp_step1]
+
cdisc_single_jump
*
fabs
(
dst_disp[d2
*
cdisp_step1]
-
src_disp_reg
)))
;
temp[d
*
cdisp_step1]
=
cost_min
;
sum
+=
cost_min
;
}
sum
/=
nr_plane
;
for
(
int
d
=
0
; d < nr_plane; d++)
msg_dst[d
*
cdisp_step1]
=
temp[d
*
cdisp_step1]
-
sum
;
}
__kernel
void
compute_message_0
(
__global
short
*u_,
__global
short
*d_,
__global
short
*l_,
__global
short
*r_,
__global
const
short
*data_cost_selected,
__global
const
short
*selected_disp_pyr_cur,
__global
short
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
float
cmax_disc_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdisc_single_jump
)
{
int
y
=
get_global_id
(
1
)
;
int
x
=
((
get_global_id
(
0
))
<<
1
)
+
((
y
+
i
)
&
1
)
;
if
(
y
>
0
&&
y
<
h
-
1
&&
x
>
0
&&
x
<
w
-
1
)
{
__global
const
short
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
short
*u
=
u_
+
y
*
cmsg_step1
+
x
;
__global
short
*d
=
d_
+
y
*
cmsg_step1
+
x
;
__global
short
*l
=
l_
+
y
*
cmsg_step1
+
x
;
__global
short
*r
=
r_
+
y
*
cmsg_step1
+
x
;
__global
const
short
*disp
=
selected_disp_pyr_cur
+
y
*
cmsg_step1
+
x
;
__global
short
*temp
=
ctemp
+
y
*
cmsg_step1
+
x
;
message_per_pixel_0
(
data,
u,
r
-
1
,
u
+
cmsg_step1,
l
+
1
,
disp,
disp
-
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_0
(
data,
d,
d
-
cmsg_step1,
r
-
1
,
l
+
1
,
disp,
disp
+
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_0
(
data,
l,
u
+
cmsg_step1,
d
-
cmsg_step1,
l
+
1
,
disp,
disp
-
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_0
(
data,
r,
u
+
cmsg_step1,
d
-
cmsg_step1,
r
-
1
,
disp,
disp
+
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
}
}
__kernel
void
compute_message_1
(
__global
float
*u_,
__global
float
*d_,
__global
float
*l_,
__global
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*selected_disp_pyr_cur,
__global
float
*ctemp,
int
h,
int
w,
int
nr_plane,
int
i,
float
cmax_disc_term,
int
cdisp_step1,
int
cmsg_step1,
float
cdisc_single_jump
)
{
int
y
=
get_global_id
(
1
)
;
int
x
=
((
get_global_id
(
0
))
<<
1
)
+
((
y
+
i
)
&
1
)
;
if
(
y
>
0
&&
y
<
h
-
1
&&
x
>
0
&&
x
<
w
-
1
)
{
__global
const
float
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
float
*u
=
u_
+
y
*
cmsg_step1
+
x
;
__global
float
*d
=
d_
+
y
*
cmsg_step1
+
x
;
__global
float
*l
=
l_
+
y
*
cmsg_step1
+
x
;
__global
float
*r
=
r_
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disp
=
selected_disp_pyr_cur
+
y
*
cmsg_step1
+
x
;
__global
float
*temp
=
ctemp
+
y
*
cmsg_step1
+
x
;
message_per_pixel_1
(
data,
u,
r
-
1
,
u
+
cmsg_step1,
l
+
1
,
disp,
disp
-
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_1
(
data,
d,
d
-
cmsg_step1,
r
-
1
,
l
+
1
,
disp,
disp
+
cmsg_step1,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_1
(
data,
l,
u
+
cmsg_step1,
d
-
cmsg_step1,
l
+
1
,
disp,
disp
-
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
message_per_pixel_1
(
data,
r,
u
+
cmsg_step1,
d
-
cmsg_step1,
r
-
1
,
disp,
disp
+
1
,
nr_plane,
temp,
cmax_disc_term,
cdisp_step1,
cdisc_single_jump
)
;
}
}
///////////////////////////////////////////////////////////////
///////////////////////////
output
////////////////////////////
///////////////////////////////////////////////////////////////
__kernel
void
compute_disp_0
(
__global
const
short
*u_,
__global
const
short
*d_,
__global
const
short
*l_,
__global
const
short
*r_,
__global
const
short
*
data_cost_selected,
__global
const
short
*disp_selected_pyr,
__global
short*
disp,
int
res_step,
int
cols,
int
rows,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
>
0
&&
y
<
rows
-
1
&&
x
>
0
&&
x
<
cols
-
1
)
{
__global
const
short
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
const
short
*disp_selected
=
disp_selected_pyr
+
y
*
cmsg_step1
+
x
;
__global
const
short
*u
=
u_
+
(
y+1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
short
*d
=
d_
+
(
y-1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
short
*l
=
l_
+
(
y+0
)
*
cmsg_step1
+
(
x+1
)
;
__global
const
short
*r
=
r_
+
(
y+0
)
*
cmsg_step1
+
(
x-1
)
;
short
best
=
0
;
short
best_val
=
SHRT_MAX
;
for
(
int
i
=
0
; i < nr_plane; ++i)
{
int
idx
=
i
*
cdisp_step1
;
short
val
=
data[idx]+
u[idx]
+
d[idx]
+
l[idx]
+
r[idx]
;
if
(
val
<
best_val
)
{
best_val
=
val
;
best
=
disp_selected[idx]
;
}
}
disp[res_step
*
y
+
x]
=
best
;
}
}
__kernel
void
compute_disp_1
(
__global
const
float
*u_,
__global
const
float
*d_,
__global
const
float
*l_,
__global
const
float
*r_,
__global
const
float
*data_cost_selected,
__global
const
float
*disp_selected_pyr,
__global
short
*disp,
int
res_step,
int
cols,
int
rows,
int
nr_plane,
int
cmsg_step1,
int
cdisp_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
y
>
0
&&
y
<
rows
-
1
&&
x
>
0
&&
x
<
cols
-
1
)
{
__global
const
float
*data
=
data_cost_selected
+
y
*
cmsg_step1
+
x
;
__global
const
float
*disp_selected
=
disp_selected_pyr
+
y
*
cmsg_step1
+
x
;
__global
const
float
*u
=
u_
+
(
y+1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
float
*d
=
d_
+
(
y-1
)
*
cmsg_step1
+
(
x+0
)
;
__global
const
float
*l
=
l_
+
(
y+0
)
*
cmsg_step1
+
(
x+1
)
;
__global
const
float
*r
=
r_
+
(
y+0
)
*
cmsg_step1
+
(
x-1
)
;
short
best
=
0
;
short
best_val
=
SHRT_MAX
;
for
(
int
i
=
0
; i < nr_plane; ++i)
{
int
idx
=
i
*
cdisp_step1
;
float
val
=
data[idx]+
u[idx]
+
d[idx]
+
l[idx]
+
r[idx]
;
if
(
val
<
best_val
)
{
best_val
=
val
;
best
=
convert_short_sat_rte
(
disp_selected[idx]
)
;
}
}
disp[res_step
*
y
+
x]
=
best
;
}
}
modules/ocl/src/stereo_csbp.cpp
0 → 100644
View file @
bd1d7cd2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Jin Ma, jin@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
cv
;
using
namespace
cv
::
ocl
;
using
namespace
std
;
#if !defined (HAVE_OPENCL)
namespace
cv
{
namespace
ocl
{
void
cv
::
ocl
::
StereoConstantSpaceBP
::
estimateRecommendedParams
(
int
,
int
,
int
&
,
int
&
,
int
&
,
int
&
)
{
throw_nogpu
();
}
cv
::
ocl
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
}
cv
::
ocl
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
,
int
,
int
,
int
,
float
,
float
,
float
,
float
,
int
,
int
)
{
throw_nogpu
();
}
void
cv
::
ocl
::
StereoConstantSpaceBP
::
operator
()(
const
oclMat
&
,
const
oclMat
&
,
oclMat
&
)
{
throw_nogpu
();
}
}
}
#else
/* !defined (HAVE_OPENCL) */
namespace
cv
{
namespace
ocl
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
stereocsbp
;
}
}
namespace
cv
{
namespace
ocl
{
namespace
stereoCSBP
{
//////////////////////////////////////////////////////////////////////////
//////////////////////////////common////////////////////////////////////
////////////////////////////////////////////////////////////////////////
static
inline
int
divUp
(
int
total
,
int
grain
)
{
return
(
total
+
grain
-
1
)
/
grain
;
}
static
string
get_kernel_name
(
string
kernel_name
,
int
data_type
)
{
stringstream
idxStr
;
if
(
data_type
==
CV_16S
)
idxStr
<<
"0"
;
else
idxStr
<<
"1"
;
kernel_name
+=
idxStr
.
str
();
return
kernel_name
;
}
using
cv
::
ocl
::
StereoConstantSpaceBP
;
//////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////init_data_cost//////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////
static
void
init_data_cost_caller
(
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
temp
,
StereoConstantSpaceBP
&
rthis
,
int
msg_step
,
int
h
,
int
w
,
int
level
)
{
Context
*
clCxt
=
left
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
int
channels
=
left
.
oclchannels
();
string
kernelName
=
get_kernel_name
(
"init_data_cost_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
cdisp_step1
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
temp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
left
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
right
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
(
void
*
)
&
w
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
(
void
*
)
&
level
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
data_weight
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
max_data_term
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_int
),
(
void
*
)
&
cdisp_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
min_disp_th
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
13
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
ndisp
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
init_data_cost_reduce_caller
(
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
temp
,
StereoConstantSpaceBP
&
rthis
,
int
msg_step
,
int
h
,
int
w
,
int
level
)
{
Context
*
clCxt
=
left
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
int
channels
=
left
.
oclchannels
();
int
win_size
=
(
int
)
std
::
pow
(
2.
f
,
level
);
string
kernelName
=
get_kernel_name
(
"init_data_cost_reduce_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
const
int
threadsNum
=
256
;
//size_t blockSize = threadsNum;
size_t
localThreads
[
3
]
=
{
win_size
,
1
,
threadsNum
/
win_size
};
size_t
globalThreads
[
3
]
=
{
w
*
localThreads
[
0
],
h
*
divUp
(
rthis
.
ndisp
,
localThreads
[
2
])
*
localThreads
[
1
],
1
*
localThreads
[
2
]
};
int
local_mem_size
=
threadsNum
*
sizeof
(
float
);
int
cdisp_step1
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
temp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
left
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
right
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
local_mem_size
,
(
void
*
)
NULL
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
(
void
*
)
&
level
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
rows
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
cols
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
win_size
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
ndisp
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
data_weight
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
13
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
max_data_term
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
14
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
min_disp_th
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
15
,
sizeof
(
cl_int
),
(
void
*
)
&
cdisp_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
16
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
3
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
get_first_initial_local_caller
(
uchar
*
data_cost_selected
,
uchar
*
disp_selected_pyr
,
oclMat
&
temp
,
StereoConstantSpaceBP
&
rthis
,
int
h
,
int
w
,
int
nr_plane
,
int
msg_step
)
{
Context
*
clCxt
=
temp
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
string
kernelName
=
get_kernel_name
(
"get_first_k_initial_local_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
disp_step
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost_selected
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
temp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
(
void
*
)
&
w
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
ndisp
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
get_first_initial_global_caller
(
uchar
*
data_cost_selected
,
uchar
*
disp_selected_pyr
,
oclMat
&
temp
,
StereoConstantSpaceBP
&
rthis
,
int
h
,
int
w
,
int
nr_plane
,
int
msg_step
)
{
Context
*
clCxt
=
temp
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
string
kernelName
=
get_kernel_name
(
"get_first_k_initial_global_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
disp_step
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost_selected
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
temp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
(
void
*
)
&
w
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
ndisp
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
init_data_cost
(
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
temp
,
StereoConstantSpaceBP
&
rthis
,
uchar
*
disp_selected_pyr
,
uchar
*
data_cost_selected
,
size_t
msg_step
,
int
h
,
int
w
,
int
level
,
int
nr_plane
)
{
if
(
level
<=
1
)
init_data_cost_caller
(
left
,
right
,
temp
,
rthis
,
msg_step
,
h
,
w
,
level
);
else
init_data_cost_reduce_caller
(
left
,
right
,
temp
,
rthis
,
msg_step
,
h
,
w
,
level
);
if
(
rthis
.
use_local_init_data_cost
==
true
)
{
get_first_initial_local_caller
(
data_cost_selected
,
disp_selected_pyr
,
temp
,
rthis
,
h
,
w
,
nr_plane
,
msg_step
);
}
else
{
get_first_initial_global_caller
(
data_cost_selected
,
disp_selected_pyr
,
temp
,
rthis
,
h
,
w
,
nr_plane
,
msg_step
);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////compute_data_cost//////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
static
void
compute_data_cost_caller
(
uchar
*
disp_selected_pyr
,
uchar
*
data_cost
,
StereoConstantSpaceBP
&
rthis
,
int
msg_step1
,
int
msg_step2
,
const
oclMat
&
left
,
const
oclMat
&
right
,
int
h
,
int
w
,
int
h2
,
int
level
,
int
nr_plane
)
{
Context
*
clCxt
=
left
.
clCxt
;
int
channels
=
left
.
oclchannels
();
int
data_type
=
rthis
.
msg_type
;
string
kernelName
=
get_kernel_name
(
"compute_data_cost_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
disp_step1
=
msg_step1
*
h
;
int
disp_step2
=
msg_step2
*
h2
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
left
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
(
void
*
)
&
right
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
(
void
*
)
&
w
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
(
void
*
)
&
level
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
13
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
data_weight
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
14
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
max_data_term
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
15
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
16
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
min_disp_th
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
compute_data_cost_reduce_caller
(
uchar
*
disp_selected_pyr
,
uchar
*
data_cost
,
StereoConstantSpaceBP
&
rthis
,
int
msg_step1
,
int
msg_step2
,
const
oclMat
&
left
,
const
oclMat
&
right
,
int
h
,
int
w
,
int
h2
,
int
level
,
int
nr_plane
)
{
Context
*
clCxt
=
left
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
int
channels
=
left
.
oclchannels
();
int
win_size
=
(
int
)
std
::
pow
(
2.
f
,
level
);
string
kernelName
=
get_kernel_name
(
"compute_data_cost_reduce_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
const
size_t
threadsNum
=
256
;
//size_t blockSize = threadsNum;
size_t
localThreads
[
3
]
=
{
win_size
,
1
,
threadsNum
/
win_size
};
size_t
globalThreads
[
3
]
=
{
w
*
localThreads
[
0
],
h
*
divUp
(
nr_plane
,
localThreads
[
2
])
*
localThreads
[
1
],
1
*
localThreads
[
2
]
};
int
disp_step1
=
msg_step1
*
h
;
int
disp_step2
=
msg_step2
*
h2
;
size_t
local_mem_size
=
threadsNum
*
sizeof
(
float
);
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
left
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
(
void
*
)
&
right
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
local_mem_size
,
(
void
*
)
NULL
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_int
),
(
void
*
)
&
level
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
rows
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
cols
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_int
),
(
void
*
)
&
channels
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_int
),
(
void
*
)
&
win_size
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
13
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
14
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
15
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
16
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
data_weight
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
17
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
max_data_term
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
18
,
sizeof
(
cl_int
),
(
void
*
)
&
left
.
step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
19
,
sizeof
(
cl_int
),
(
void
*
)
&
rthis
.
min_disp_th
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
3
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
compute_data_cost
(
uchar
*
disp_selected_pyr
,
uchar
*
data_cost
,
StereoConstantSpaceBP
&
rthis
,
int
msg_step1
,
int
msg_step2
,
const
oclMat
&
left
,
const
oclMat
&
right
,
int
h
,
int
w
,
int
h2
,
int
level
,
int
nr_plane
)
{
if
(
level
<=
1
)
compute_data_cost_caller
(
disp_selected_pyr
,
data_cost
,
rthis
,
msg_step1
,
msg_step2
,
left
,
right
,
h
,
w
,
h2
,
level
,
nr_plane
);
else
compute_data_cost_reduce_caller
(
disp_selected_pyr
,
data_cost
,
rthis
,
msg_step1
,
msg_step2
,
left
,
right
,
h
,
w
,
h2
,
level
,
nr_plane
);
}
////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////init message//////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////
static
void
init_message
(
uchar
*
u_new
,
uchar
*
d_new
,
uchar
*
l_new
,
uchar
*
r_new
,
uchar
*
u_cur
,
uchar
*
d_cur
,
uchar
*
l_cur
,
uchar
*
r_cur
,
uchar
*
disp_selected_pyr_new
,
uchar
*
disp_selected_pyr_cur
,
uchar
*
data_cost_selected
,
uchar
*
data_cost
,
oclMat
&
temp
,
StereoConstantSpaceBP
rthis
,
size_t
msg_step1
,
size_t
msg_step2
,
int
h
,
int
w
,
int
nr_plane
,
int
h2
,
int
w2
,
int
nr_plane2
)
{
Context
*
clCxt
=
temp
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
string
kernelName
=
get_kernel_name
(
"init_message_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
disp_step1
=
msg_step1
*
h
;
int
disp_step2
=
msg_step2
*
h2
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
u_new
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
d_new
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
l_new
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
(
void
*
)
&
r_new
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
(
void
*
)
&
u_cur
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
(
void
*
)
&
d_cur
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
(
void
*
)
&
l_cur
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
(
void
*
)
&
r_cur
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
(
void
*
)
&
temp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr_new
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr_cur
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost_selected
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
13
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
14
,
sizeof
(
cl_int
),
(
void
*
)
&
w
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
15
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
16
,
sizeof
(
cl_int
),
(
void
*
)
&
h2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
17
,
sizeof
(
cl_int
),
(
void
*
)
&
w2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
18
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
19
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
20
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step2
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
21
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step1
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
22
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step2
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////calc_all_iterations////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////
static
void
calc_all_iterations_caller
(
uchar
*
u
,
uchar
*
d
,
uchar
*
l
,
uchar
*
r
,
uchar
*
data_cost_selected
,
uchar
*
disp_selected_pyr
,
oclMat
&
temp
,
StereoConstantSpaceBP
rthis
,
int
msg_step
,
int
h
,
int
w
,
int
nr_plane
,
int
i
)
{
Context
*
clCxt
=
temp
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
string
kernelName
=
get_kernel_name
(
"compute_message_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
w
,
(
localThreads
[
0
])
<<
1
)
*
localThreads
[
0
],
divUp
(
h
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
disp_step
=
msg_step
*
h
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
u
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
d
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
l
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
(
void
*
)
&
r
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost_selected
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
(
void
*
)
&
temp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
h
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
w
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_int
),
(
void
*
)
&
i
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
max_disc_term
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
13
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
14
,
sizeof
(
cl_float
),
(
void
*
)
&
rthis
.
disc_single_jump
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
static
void
calc_all_iterations
(
uchar
*
u
,
uchar
*
d
,
uchar
*
l
,
uchar
*
r
,
uchar
*
data_cost_selected
,
uchar
*
disp_selected_pyr
,
oclMat
&
temp
,
StereoConstantSpaceBP
rthis
,
int
msg_step
,
int
h
,
int
w
,
int
nr_plane
)
{
for
(
int
t
=
0
;
t
<
rthis
.
iters
;
t
++
)
calc_all_iterations_caller
(
u
,
d
,
l
,
r
,
data_cost_selected
,
disp_selected_pyr
,
temp
,
rthis
,
msg_step
,
h
,
w
,
nr_plane
,
t
&
1
);
}
///////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////compute_disp////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////
static
void
compute_disp
(
uchar
*
u
,
uchar
*
d
,
uchar
*
l
,
uchar
*
r
,
uchar
*
data_cost_selected
,
uchar
*
disp_selected_pyr
,
StereoConstantSpaceBP
&
rthis
,
size_t
msg_step
,
oclMat
&
disp
,
int
nr_plane
)
{
Context
*
clCxt
=
disp
.
clCxt
;
int
data_type
=
rthis
.
msg_type
;
string
kernelName
=
get_kernel_name
(
"compute_disp_"
,
data_type
);
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
stereocsbp
,
kernelName
);
//size_t blockSize = 256;
size_t
localThreads
[]
=
{
32
,
8
,
1
};
size_t
globalThreads
[]
=
{
divUp
(
disp
.
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
disp
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
step_size
=
disp
.
step
/
disp
.
elemSize
();
int
disp_step
=
disp
.
rows
*
msg_step
;
openCLVerifyKernel
(
clCxt
,
kernel
,
localThreads
);
openCLSafeCall
(
clSetKernelArg
(
kernel
,
0
,
sizeof
(
cl_mem
),
(
void
*
)
&
u
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
1
,
sizeof
(
cl_mem
),
(
void
*
)
&
d
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
(
void
*
)
&
l
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
(
void
*
)
&
r
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
(
void
*
)
&
data_cost_selected
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp_selected_pyr
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
(
void
*
)
&
disp
.
data
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_int
),
(
void
*
)
&
step_size
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_int
),
(
void
*
)
&
disp
.
cols
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
9
,
sizeof
(
cl_int
),
(
void
*
)
&
disp
.
rows
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
10
,
sizeof
(
cl_int
),
(
void
*
)
&
nr_plane
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
11
,
sizeof
(
cl_int
),
(
void
*
)
&
msg_step
));
openCLSafeCall
(
clSetKernelArg
(
kernel
,
12
,
sizeof
(
cl_int
),
(
void
*
)
&
disp_step
));
openCLSafeCall
(
clEnqueueNDRangeKernel
(
*
(
cl_command_queue
*
)
getoclCommandQueue
(),
kernel
,
2
,
NULL
,
globalThreads
,
localThreads
,
0
,
NULL
,
NULL
));
clFinish
(
*
(
cl_command_queue
*
)
getoclCommandQueue
());
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
}
}
}
namespace
{
const
float
DEFAULT_MAX_DATA_TERM
=
30.0
f
;
const
float
DEFAULT_DATA_WEIGHT
=
1.0
f
;
const
float
DEFAULT_MAX_DISC_TERM
=
160.0
f
;
const
float
DEFAULT_DISC_SINGLE_JUMP
=
10.0
f
;
}
void
cv
::
ocl
::
StereoConstantSpaceBP
::
estimateRecommendedParams
(
int
width
,
int
height
,
int
&
ndisp
,
int
&
iters
,
int
&
levels
,
int
&
nr_plane
)
{
ndisp
=
(
int
)
((
float
)
width
/
3.14
f
);
if
((
ndisp
&
1
)
!=
0
)
ndisp
++
;
int
mm
=
::
max
(
width
,
height
);
iters
=
mm
/
100
+
((
mm
>
1200
)
?
-
4
:
4
);
levels
=
(
int
)
::
log
(
static_cast
<
double
>
(
mm
))
*
2
/
3
;
if
(
levels
==
0
)
levels
++
;
nr_plane
=
(
int
)
((
float
)
ndisp
/
std
::
pow
(
2.0
,
levels
+
1
));
}
cv
::
ocl
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
ndisp_
,
int
iters_
,
int
levels_
,
int
nr_plane_
,
int
msg_type_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
nr_plane
(
nr_plane_
),
max_data_term
(
DEFAULT_MAX_DATA_TERM
),
data_weight
(
DEFAULT_DATA_WEIGHT
),
max_disc_term
(
DEFAULT_MAX_DISC_TERM
),
disc_single_jump
(
DEFAULT_DISC_SINGLE_JUMP
),
min_disp_th
(
0
),
msg_type
(
msg_type_
),
use_local_init_data_cost
(
true
)
{
CV_Assert
(
msg_type_
==
CV_32F
||
msg_type_
==
CV_16S
);
}
cv
::
ocl
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
ndisp_
,
int
iters_
,
int
levels_
,
int
nr_plane_
,
float
max_data_term_
,
float
data_weight_
,
float
max_disc_term_
,
float
disc_single_jump_
,
int
min_disp_th_
,
int
msg_type_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
nr_plane
(
nr_plane_
),
max_data_term
(
max_data_term_
),
data_weight
(
data_weight_
),
max_disc_term
(
max_disc_term_
),
disc_single_jump
(
disc_single_jump_
),
min_disp_th
(
min_disp_th_
),
msg_type
(
msg_type_
),
use_local_init_data_cost
(
true
)
{
CV_Assert
(
msg_type_
==
CV_32F
||
msg_type_
==
CV_16S
);
}
template
<
class
T
>
static
void
csbp_operator
(
StereoConstantSpaceBP
&
rthis
,
oclMat
u
[
2
],
oclMat
d
[
2
],
oclMat
l
[
2
],
oclMat
r
[
2
],
oclMat
disp_selected_pyr
[
2
],
oclMat
&
data_cost
,
oclMat
&
data_cost_selected
,
oclMat
&
temp
,
oclMat
&
out
,
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
disp
)
{
CV_DbgAssert
(
0
<
rthis
.
ndisp
&&
0
<
rthis
.
iters
&&
0
<
rthis
.
levels
&&
0
<
rthis
.
nr_plane
&&
left
.
rows
==
right
.
rows
&&
left
.
cols
==
right
.
cols
&&
left
.
type
()
==
right
.
type
());
CV_Assert
(
rthis
.
levels
<=
8
&&
(
left
.
type
()
==
CV_8UC1
||
left
.
type
()
==
CV_8UC3
));
const
Scalar
zero
=
Scalar
::
all
(
0
);
////////////////////////////////////Init///////////////////////////////////////////////////
int
rows
=
left
.
rows
;
int
cols
=
left
.
cols
;
rthis
.
levels
=
min
(
rthis
.
levels
,
int
(
log
((
double
)
rthis
.
ndisp
)
/
log
(
2.0
)));
int
levels
=
rthis
.
levels
;
AutoBuffer
<
int
>
buf
(
levels
*
4
);
int
*
cols_pyr
=
buf
;
int
*
rows_pyr
=
cols_pyr
+
levels
;
int
*
nr_plane_pyr
=
rows_pyr
+
levels
;
int
*
step_pyr
=
nr_plane_pyr
+
levels
;
cols_pyr
[
0
]
=
cols
;
rows_pyr
[
0
]
=
rows
;
nr_plane_pyr
[
0
]
=
rthis
.
nr_plane
;
const
int
n
=
64
;
step_pyr
[
0
]
=
alignSize
(
cols
*
sizeof
(
T
),
n
)
/
sizeof
(
T
);
for
(
int
i
=
1
;
i
<
levels
;
i
++
)
{
cols_pyr
[
i
]
=
cols_pyr
[
i
-
1
]
/
2
;
rows_pyr
[
i
]
=
rows_pyr
[
i
-
1
]
/
2
;
nr_plane_pyr
[
i
]
=
nr_plane_pyr
[
i
-
1
]
*
2
;
step_pyr
[
i
]
=
alignSize
(
cols_pyr
[
i
]
*
sizeof
(
T
),
n
)
/
sizeof
(
T
);
}
Size
msg_size
(
step_pyr
[
0
],
rows
*
nr_plane_pyr
[
0
]);
Size
data_cost_size
(
step_pyr
[
0
],
rows
*
nr_plane_pyr
[
0
]
*
2
);
u
[
0
].
create
(
msg_size
,
DataType
<
T
>::
type
);
d
[
0
].
create
(
msg_size
,
DataType
<
T
>::
type
);
l
[
0
].
create
(
msg_size
,
DataType
<
T
>::
type
);
r
[
0
].
create
(
msg_size
,
DataType
<
T
>::
type
);
u
[
1
].
create
(
msg_size
,
DataType
<
T
>::
type
);
d
[
1
].
create
(
msg_size
,
DataType
<
T
>::
type
);
l
[
1
].
create
(
msg_size
,
DataType
<
T
>::
type
);
r
[
1
].
create
(
msg_size
,
DataType
<
T
>::
type
);
disp_selected_pyr
[
0
].
create
(
msg_size
,
DataType
<
T
>::
type
);
disp_selected_pyr
[
1
].
create
(
msg_size
,
DataType
<
T
>::
type
);
data_cost
.
create
(
data_cost_size
,
DataType
<
T
>::
type
);
data_cost_selected
.
create
(
msg_size
,
DataType
<
T
>::
type
);
Size
temp_size
=
data_cost_size
;
if
(
data_cost_size
.
width
*
data_cost_size
.
height
<
step_pyr
[
0
]
*
rows_pyr
[
levels
-
1
]
*
rthis
.
ndisp
)
temp_size
=
Size
(
step_pyr
[
0
],
rows_pyr
[
levels
-
1
]
*
rthis
.
ndisp
);
temp
.
create
(
temp_size
,
DataType
<
T
>::
type
);
temp
=
zero
;
///////////////////////////////// Compute////////////////////////////////////////////////
//csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
// rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
l
[
0
]
=
zero
;
d
[
0
]
=
zero
;
r
[
0
]
=
zero
;
u
[
0
]
=
zero
;
disp_selected_pyr
[
0
]
=
zero
;
l
[
1
]
=
zero
;
d
[
1
]
=
zero
;
r
[
1
]
=
zero
;
u
[
1
]
=
zero
;
disp_selected_pyr
[
1
]
=
zero
;
data_cost
=
zero
;
data_cost_selected
=
zero
;
int
cur_idx
=
0
;
for
(
int
i
=
levels
-
1
;
i
>=
0
;
i
--
)
{
if
(
i
==
levels
-
1
)
{
cv
::
ocl
::
stereoCSBP
::
init_data_cost
(
left
,
right
,
temp
,
rthis
,
disp_selected_pyr
[
cur_idx
].
data
,
data_cost_selected
.
data
,
step_pyr
[
0
],
rows_pyr
[
i
],
cols_pyr
[
i
],
i
,
nr_plane_pyr
[
i
]);
}
else
{
cv
::
ocl
::
stereoCSBP
::
compute_data_cost
(
disp_selected_pyr
[
cur_idx
].
data
,
data_cost
.
data
,
rthis
,
step_pyr
[
0
],
step_pyr
[
0
],
left
,
right
,
rows_pyr
[
i
],
cols_pyr
[
i
],
rows_pyr
[
i
+
1
],
i
,
nr_plane_pyr
[
i
+
1
]);
int
new_idx
=
(
cur_idx
+
1
)
&
1
;
cv
::
ocl
::
stereoCSBP
::
init_message
(
u
[
new_idx
].
data
,
d
[
new_idx
].
data
,
l
[
new_idx
].
data
,
r
[
new_idx
].
data
,
u
[
cur_idx
].
data
,
d
[
cur_idx
].
data
,
l
[
cur_idx
].
data
,
r
[
cur_idx
].
data
,
disp_selected_pyr
[
new_idx
].
data
,
disp_selected_pyr
[
cur_idx
].
data
,
data_cost_selected
.
data
,
data_cost
.
data
,
temp
,
rthis
,
step_pyr
[
0
],
step_pyr
[
0
],
rows_pyr
[
i
],
cols_pyr
[
i
],
nr_plane_pyr
[
i
],
rows_pyr
[
i
+
1
],
cols_pyr
[
i
+
1
],
nr_plane_pyr
[
i
+
1
]);
cur_idx
=
new_idx
;
}
cv
::
ocl
::
stereoCSBP
::
calc_all_iterations
(
u
[
cur_idx
].
data
,
d
[
cur_idx
].
data
,
l
[
cur_idx
].
data
,
r
[
cur_idx
].
data
,
data_cost_selected
.
data
,
disp_selected_pyr
[
cur_idx
].
data
,
temp
,
rthis
,
step_pyr
[
0
],
rows_pyr
[
i
],
cols_pyr
[
i
],
nr_plane_pyr
[
i
]);
}
if
(
disp
.
empty
())
disp
.
create
(
rows
,
cols
,
CV_16S
);
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
(
out
.
create
(
rows
,
cols
,
CV_16S
),
out
));
out
=
zero
;
stereoCSBP
::
compute_disp
(
u
[
cur_idx
].
data
,
d
[
cur_idx
].
data
,
l
[
cur_idx
].
data
,
r
[
cur_idx
].
data
,
data_cost_selected
.
data
,
disp_selected_pyr
[
cur_idx
].
data
,
rthis
,
step_pyr
[
0
],
out
,
nr_plane_pyr
[
0
]);
if
(
disp
.
type
()
!=
CV_16S
)
out
.
convertTo
(
disp
,
disp
.
type
());
}
typedef
void
(
*
csbp_operator_t
)(
StereoConstantSpaceBP
&
rthis
,
oclMat
u
[
2
],
oclMat
d
[
2
],
oclMat
l
[
2
],
oclMat
r
[
2
],
oclMat
disp_selected_pyr
[
2
],
oclMat
&
data_cost
,
oclMat
&
data_cost_selected
,
oclMat
&
temp
,
oclMat
&
out
,
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
disp
);
const
static
csbp_operator_t
operators
[]
=
{
0
,
0
,
0
,
csbp_operator
<
short
>
,
0
,
csbp_operator
<
float
>
,
0
,
0
};
void
cv
::
ocl
::
StereoConstantSpaceBP
::
operator
()(
const
oclMat
&
left
,
const
oclMat
&
right
,
oclMat
&
disp
)
{
CV_Assert
(
msg_type
==
CV_32F
||
msg_type
==
CV_16S
);
operators
[
msg_type
](
*
this
,
u
,
d
,
l
,
r
,
disp_selected_pyr
,
data_cost
,
data_cost_selected
,
temp
,
out
,
left
,
right
,
disp
);
}
#endif
/* !defined (HAVE_OPENCL) */
modules/ocl/test/test_calib3d.cpp
View file @
bd1d7cd2
...
@@ -134,4 +134,64 @@ TEST_P(StereoMatchBP, Regression)
...
@@ -134,4 +134,64 @@ TEST_P(StereoMatchBP, Regression)
INSTANTIATE_TEST_CASE_P
(
OCL_Calib3D
,
StereoMatchBP
,
testing
::
Combine
(
testing
::
Values
(
64
),
INSTANTIATE_TEST_CASE_P
(
OCL_Calib3D
,
StereoMatchBP
,
testing
::
Combine
(
testing
::
Values
(
64
),
testing
::
Values
(
8
),
testing
::
Values
(
2
),
testing
::
Values
(
25.0
f
),
testing
::
Values
(
8
),
testing
::
Values
(
2
),
testing
::
Values
(
25.0
f
),
testing
::
Values
(
0.1
f
),
testing
::
Values
(
15.0
f
),
testing
::
Values
(
1.0
f
)));
testing
::
Values
(
0.1
f
),
testing
::
Values
(
15.0
f
),
testing
::
Values
(
1.0
f
)));
//////////////////////////////////////////////////////////////////////////
// ConstSpaceBeliefPropagation
PARAM_TEST_CASE
(
StereoMatchConstSpaceBP
,
int
,
int
,
int
,
int
,
float
,
float
,
float
,
float
,
int
,
int
)
{
int
ndisp_
;
int
iters_
;
int
levels_
;
int
nr_plane_
;
float
max_data_term_
;
float
data_weight_
;
float
max_disc_term_
;
float
disc_single_jump_
;
int
min_disp_th_
;
int
msg_type_
;
virtual
void
SetUp
()
{
ndisp_
=
GET_PARAM
(
0
);
iters_
=
GET_PARAM
(
1
);
levels_
=
GET_PARAM
(
2
);
nr_plane_
=
GET_PARAM
(
3
);
max_data_term_
=
GET_PARAM
(
4
);
data_weight_
=
GET_PARAM
(
5
);
max_disc_term_
=
GET_PARAM
(
6
);
disc_single_jump_
=
GET_PARAM
(
7
);
min_disp_th_
=
GET_PARAM
(
8
);
msg_type_
=
GET_PARAM
(
9
);
}
};
TEST_P
(
StereoMatchConstSpaceBP
,
Regression
)
{
Mat
left_image
=
readImage
(
"csstereobp/aloe-L.png"
);
Mat
right_image
=
readImage
(
"csstereobp/aloe-R.png"
);
Mat
disp_gold
=
readImage
(
"csstereobp/aloe-disp.png"
,
IMREAD_GRAYSCALE
);
ocl
::
oclMat
d_left
,
d_right
;
ocl
::
oclMat
d_disp
;
Mat
disp
;
ASSERT_FALSE
(
left_image
.
empty
());
ASSERT_FALSE
(
right_image
.
empty
());
ASSERT_FALSE
(
disp_gold
.
empty
());
d_left
.
upload
(
left_image
);
d_right
.
upload
(
right_image
);
ocl
::
StereoConstantSpaceBP
bp
(
ndisp_
,
iters_
,
levels_
,
nr_plane_
,
max_data_term_
,
data_weight_
,
max_disc_term_
,
disc_single_jump_
,
0
,
CV_32F
);
bp
(
d_left
,
d_right
,
d_disp
);
d_disp
.
download
(
disp
);
disp
.
convertTo
(
disp
,
disp_gold
.
depth
());
EXPECT_MAT_SIMILAR
(
disp_gold
,
disp
,
1e-4
);
//EXPECT_MAT_NEAR(disp_gold, disp, 1.0, "");
}
INSTANTIATE_TEST_CASE_P
(
OCL_Calib3D
,
StereoMatchConstSpaceBP
,
testing
::
Combine
(
testing
::
Values
(
128
),
testing
::
Values
(
16
),
testing
::
Values
(
4
),
testing
::
Values
(
4
),
testing
::
Values
(
30.0
f
),
testing
::
Values
(
1.0
f
),
testing
::
Values
(
160.0
f
),
testing
::
Values
(
10.0
f
),
testing
::
Values
(
0
),
testing
::
Values
(
CV_32F
)));
#endif // HAVE_OPENCL
#endif // HAVE_OPENCL
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment