Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ee104c27
Commit
ee104c27
authored
Aug 12, 2010
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added gpu implementation of constant space belief propagation stereo matching.
some refactoring of StereoBeliefPropagation.
parent
53057afc
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1227 additions
and
182 deletions
+1227
-182
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+68
-24
beliefpropagation_gpu.cpp
modules/gpu/src/beliefpropagation_gpu.cpp
+44
-131
constantspacebp_gpu.cpp
modules/gpu/src/constantspacebp_gpu.cpp
+272
-0
beliefpropagation.cu
modules/gpu/src/cuda/beliefpropagation.cu
+28
-27
constantspacebp.cu
modules/gpu/src/cuda/constantspacebp.cu
+815
-0
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
ee104c27
...
...
@@ -375,34 +375,28 @@ namespace cv
GpuMat
minSSD
,
leBuf
,
riBuf
;
};
////////////////////////
StereoBeliefPropagation_GPU
/////////////////////////
////////////////////////
// StereoBeliefPropagation //
/////////////////////////
class
CV_EXPORTS
StereoBeliefPropagation
_GPU
class
CV_EXPORTS
StereoBeliefPropagation
{
public
:
enum
{
MSG_TYPE_AUTO
,
MSG_TYPE_FLOAT
,
MSG_TYPE_SHORT_SCALE_AUTO
,
MSG_TYPE_SHORT_SCALE_MANUAL
};
enum
{
DEFAULT_NDISP
=
64
};
enum
{
DEFAULT_ITERS
=
5
};
enum
{
DEFAULT_LEVELS
=
5
};
//! the default constructor
explicit
StereoBeliefPropagation
_GPU
(
int
ndisp
=
DEFAULT_NDISP
,
int
iters
=
DEFAULT_ITERS
,
int
levels
=
DEFAULT_LEVELS
,
int
msg_type
=
MSG_TYPE_AUTO
,
float
msg_scale
=
1.0
f
);
explicit
StereoBeliefPropagation
(
int
ndisp
=
DEFAULT_NDISP
,
int
iters
=
DEFAULT_ITERS
,
int
levels
=
DEFAULT_LEVELS
,
int
msg_type
=
CV_32F
);
//! the full constructor taking the number of disparities, number of BP iterations on each level,
//! number of levels, truncation of data cost, data weight,
//! truncation of discontinuity cost and discontinuity single jump
StereoBeliefPropagation_GPU
(
int
ndisp
,
int
iters
,
int
levels
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
,
int
msg_type
=
MSG_TYPE_AUTO
,
float
msg_scale
=
1.0
f
);
StereoBeliefPropagation
(
int
ndisp
,
int
iters
,
int
levels
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
,
int
msg_type
=
CV_32F
);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
...
...
@@ -410,11 +404,6 @@ namespace cv
//! Acync version
void
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
Stream
&
stream
);
//! Some heuristics that tries to estmate
//! if current GPU will be faster then CPU in this algorithm.
//! It queries current active device.
static
bool
checkIfGpuCallReasonable
();
int
ndisp
;
...
...
@@ -427,12 +416,67 @@ namespace cv
float
disc_single_jump
;
int
msg_type
;
float
msg_scale
;
private
:
GpuMat
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
;
std
::
vector
<
GpuMat
>
datas
;
GpuMat
out
;
};
};
/////////////////////////// StereoConstantSpaceBP ///////////////////////////
class
CV_EXPORTS
StereoConstantSpaceBP
{
public
:
enum
{
DEFAULT_NDISP
=
64
};
enum
{
DEFAULT_ITERS
=
5
};
enum
{
DEFAULT_LEVELS
=
5
};
enum
{
DEFAULT_NR_PLANE
=
2
};
//! the default constructor
explicit
StereoConstantSpaceBP
(
int
ndisp
=
DEFAULT_NDISP
,
int
iters
=
DEFAULT_ITERS
,
int
levels
=
DEFAULT_LEVELS
,
int
nr_plane
=
DEFAULT_NR_PLANE
,
int
msg_type
=
CV_32F
);
//! the full constructor taking the number of disparities, number of BP iterations on each level,
//! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
//! truncation of discontinuity cost and discontinuity single jump
StereoConstantSpaceBP
(
int
ndisp
,
int
iters
,
int
levels
,
int
nr_plane
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
,
int
msg_type
=
CV_32F
);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
void
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
);
//! Acync version
void
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
Stream
&
stream
);
int
ndisp
;
int
iters
;
int
levels
;
int
nr_plane
;
float
max_data_term
;
float
data_weight
;
float
max_disc_term
;
float
disc_single_jump
;
int
msg_type
;
private
:
GpuMat
u
[
2
],
d
[
2
],
l
[
2
],
r
[
2
];
GpuMat
disp_selected_pyr
[
2
];
GpuMat
data_cost
;
GpuMat
data_cost_selected
;
GpuMat
temp1
,
temp2
;
GpuMat
out
;
};
}
//! Speckle filtering - filters small connected components on diparity image.
...
...
modules/gpu/src/beliefpropagation_gpu.cpp
View file @
ee104c27
...
...
@@ -28,7 +28,7 @@
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or
implied warranties, including, but not limited to, the impl
ied
// any express or
bpied warranties, including, but not limited to, the bp
ied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
...
...
@@ -48,22 +48,16 @@ using namespace std;
#if !defined (HAVE_CUDA)
cv
::
gpu
::
StereoBeliefPropagation
_GPU
::
StereoBeliefPropagation_GPU
(
int
,
int
,
int
,
int
,
floa
t
)
{
throw_nogpu
();
}
cv
::
gpu
::
StereoBeliefPropagation
_GPU
::
StereoBeliefPropagation_GPU
(
int
,
int
,
int
,
float
,
float
,
float
,
float
,
int
,
floa
t
)
{
throw_nogpu
();
}
cv
::
gpu
::
StereoBeliefPropagation
::
StereoBeliefPropagation
(
int
,
int
,
int
,
in
t
)
{
throw_nogpu
();
}
cv
::
gpu
::
StereoBeliefPropagation
::
StereoBeliefPropagation
(
int
,
int
,
int
,
float
,
float
,
float
,
float
,
in
t
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
bool
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
checkIfGpuCallReasonable
()
{
throw_nogpu
();
return
false
;
}
void
cv
::
gpu
::
StereoBeliefPropagation
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBeliefPropagation
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
const
float
DEFAULT_MAX_DATA_TERM
=
10.0
f
;
const
float
DEFAULT_DATA_WEIGHT
=
0.07
f
;
const
float
DEFAULT_MAX_DISC_TERM
=
1.7
f
;
const
float
DEFAULT_DISC_SINGLE_JUMP
=
1.0
f
;
namespace
cv
{
namespace
gpu
{
namespace
impl
{
namespace
cv
{
namespace
gpu
{
namespace
bp
{
void
load_constants
(
int
ndisp
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
);
void
comp_data
(
int
msg_type
,
const
DevMem2D
&
l
,
const
DevMem2D
&
r
,
int
channels
,
DevMem2D
mdata
,
const
cudaStream_t
&
stream
);
void
data_step_down
(
int
dst_cols
,
int
dst_rows
,
int
src_rows
,
int
msg_type
,
const
DevMem2D
&
src
,
DevMem2D
dst
,
const
cudaStream_t
&
stream
);
...
...
@@ -72,48 +66,49 @@ namespace cv { namespace gpu { namespace impl {
void
output
(
int
msg_type
,
const
DevMem2D
&
u
,
const
DevMem2D
&
d
,
const
DevMem2D
&
l
,
const
DevMem2D
&
r
,
const
DevMem2D
&
data
,
DevMem2D
disp
,
const
cudaStream_t
&
stream
);
}}}
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
StereoBeliefPropagation_GPU
(
int
ndisp_
,
int
iters_
,
int
levels_
,
int
msg_type_
,
float
msg_scale_
)
namespace
{
const
float
DEFAULT_MAX_DATA_TERM
=
10.0
f
;
const
float
DEFAULT_DATA_WEIGHT
=
0.07
f
;
const
float
DEFAULT_MAX_DISC_TERM
=
1.7
f
;
const
float
DEFAULT_DISC_SINGLE_JUMP
=
1.0
f
;
}
cv
::
gpu
::
StereoBeliefPropagation
::
StereoBeliefPropagation
(
int
ndisp_
,
int
iters_
,
int
levels_
,
int
msg_type_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
max_data_term
(
DEFAULT_MAX_DATA_TERM
),
data_weight
(
DEFAULT_DATA_WEIGHT
),
max_disc_term
(
DEFAULT_MAX_DISC_TERM
),
disc_single_jump
(
DEFAULT_DISC_SINGLE_JUMP
),
msg_type
(
msg_type_
),
msg_scale
(
msg_scale_
),
datas
(
levels_
)
msg_type
(
msg_type_
),
datas
(
levels_
)
{
CV_Assert
(
0
<
ndisp
&&
0
<
iters
&&
0
<
levels
);
}
cv
::
gpu
::
StereoBeliefPropagation
_GPU
::
StereoBeliefPropagation_GPU
(
int
ndisp_
,
int
iters_
,
int
levels_
,
float
max_data_term_
,
float
data_weight_
,
float
max_disc_term_
,
float
disc_single_jump_
,
int
msg_type_
,
float
msg_scal
e_
)
cv
::
gpu
::
StereoBeliefPropagation
::
StereoBeliefPropagation
(
int
ndisp_
,
int
iters_
,
int
levels_
,
float
max_data_term_
,
float
data_weight_
,
float
max_disc_term_
,
float
disc_single_jump_
,
int
msg_typ
e_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
max_data_term
(
max_data_term_
),
data_weight
(
data_weight_
),
max_disc_term
(
max_disc_term_
),
disc_single_jump
(
disc_single_jump_
),
msg_type
(
msg_type_
),
msg_scale
(
msg_scale_
),
datas
(
levels_
)
msg_type
(
msg_type_
),
datas
(
levels_
)
{
CV_Assert
(
0
<
ndisp
&&
0
<
iters
&&
0
<
levels
);
}
static
bool
checkMsgOverflow
(
int
levels
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
msg_scale
)
{
float
maxV
=
ceil
(
max_disc_term
*
msg_scale
);
float
maxD
=
ceil
(
max_data_term
*
data_weight
*
msg_scale
);
float
maxMsg
=
maxV
+
(
maxD
*
pow
(
4.0
f
,
(
float
)
levels
));
maxMsg
=
maxV
+
(
maxD
*
pow
(
4.0
f
,
(
float
)
levels
))
+
3
*
maxMsg
;
return
(
maxMsg
>
numeric_limits
<
short
>::
max
());
}
static
void
stereo_bp_gpu_operator
(
int
ndisp
,
int
iters
,
int
levels
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
,
int
msg_type
,
float
&
msg_scal
e
,
static
void
stereo_bp_gpu_operator
(
int
&
ndisp
,
int
&
iters
,
int
&
levels
,
float
&
max_data_term
,
float
&
data_weight
,
float
&
max_disc_term
,
float
&
disc_single_jump
,
int
&
msg_typ
e
,
GpuMat
&
u
,
GpuMat
&
d
,
GpuMat
&
l
,
GpuMat
&
r
,
GpuMat
&
u2
,
GpuMat
&
d2
,
GpuMat
&
l2
,
GpuMat
&
r2
,
vector
<
GpuMat
>&
datas
,
GpuMat
&
out
,
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
cudaStream_t
&
stream
)
{
CV_DbgAssert
(
left
.
cols
==
right
.
cols
&&
left
.
rows
==
right
.
rows
&&
left
.
type
()
==
right
.
type
()
&&
left
.
type
()
==
CV_8U
);
CV_DbgAssert
(
0
<
ndisp
&&
0
<
iters
&&
0
<
levels
&&
(
msg_type
==
CV_32F
||
msg_type
==
CV_16S
)
&&
left
.
rows
==
right
.
rows
&&
left
.
cols
==
right
.
cols
&&
left
.
type
()
==
right
.
type
());
CV_Assert
((
left
.
type
()
==
CV_8UC1
||
left
.
type
()
==
CV_8UC3
));
const
Scalar
zero
=
Scalar
::
all
(
0
);
const
float
scale
=
((
msg_type
==
CV_32F
)
?
1.0
f
:
10.0
f
);
int
rows
=
left
.
rows
;
int
cols
=
left
.
cols
;
...
...
@@ -121,65 +116,7 @@ static void stereo_bp_gpu_operator(int ndisp, int iters, int levels,
int
lowest_cols
=
cols
/
divisor
;
int
lowest_rows
=
rows
/
divisor
;
const
int
min_image_dim_size
=
2
;
CV_Assert
(
min
(
lowest_cols
,
lowest_rows
)
>
min_image_dim_size
);
switch
(
msg_type
)
{
case
StereoBeliefPropagation_GPU
:
:
MSG_TYPE_AUTO
:
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
100.0
f
))
{
msg_type
=
CV_16S
;
msg_scale
=
100.0
f
;
}
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
64.0
f
))
{
msg_type
=
CV_16S
;
msg_scale
=
64.0
f
;
}
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
32.0
f
))
{
msg_type
=
CV_16S
;
msg_scale
=
32.0
f
;
}
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
16.0
f
))
{
msg_type
=
CV_16S
;
msg_scale
=
16.0
f
;
}
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
10.0
f
))
{
msg_type
=
CV_16S
;
msg_scale
=
10.0
f
;
}
else
{
msg_type
=
CV_32F
;
msg_scale
=
1.0
f
;
}
break
;
case
StereoBeliefPropagation_GPU
:
:
MSG_TYPE_FLOAT
:
msg_type
=
CV_32F
;
msg_scale
=
1.0
f
;
break
;
case
StereoBeliefPropagation_GPU
:
:
MSG_TYPE_SHORT_SCALE_AUTO
:
msg_type
=
CV_16S
;
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
100.0
f
))
msg_scale
=
100.0
f
;
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
64.0
f
))
msg_scale
=
64.0
f
;
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
32.0
f
))
msg_scale
=
32.0
f
;
else
if
(
!
checkMsgOverflow
(
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
16.0
f
))
msg_scale
=
16.0
f
;
else
msg_scale
=
10.0
f
;
break
;
case
StereoBeliefPropagation_GPU
:
:
MSG_TYPE_SHORT_SCALE_MANUAL
:
msg_type
=
CV_16S
;
break
;
default
:
cv
::
gpu
::
error
(
"Unsupported message type"
,
__FILE__
,
__LINE__
);
}
CV_Assert
(
min
(
lowest_cols
,
lowest_rows
)
>
min_image_dim_size
);
u
.
create
(
rows
*
ndisp
,
cols
,
msg_type
);
d
.
create
(
rows
*
ndisp
,
cols
,
msg_type
);
...
...
@@ -214,7 +151,7 @@ static void stereo_bp_gpu_operator(int ndisp, int iters, int levels,
}
}
impl
::
load_constants
(
ndisp
,
max_data_term
,
msg_scale
*
data_weight
,
msg_scale
*
max_disc_term
,
msg_
scale
*
disc_single_jump
);
bp
::
load_constants
(
ndisp
,
max_data_term
,
scale
*
data_weight
,
scale
*
max_disc_term
,
scale
*
disc_single_jump
);
datas
.
resize
(
levels
);
...
...
@@ -228,7 +165,7 @@ static void stereo_bp_gpu_operator(int ndisp, int iters, int levels,
datas
[
0
].
create
(
rows
*
ndisp
,
cols
,
msg_type
);
impl
::
comp_data
(
msg_type
,
left
,
right
,
left
.
channels
(),
datas
.
front
(),
stream
);
bp
::
comp_data
(
msg_type
,
left
,
right
,
left
.
channels
(),
datas
.
front
(),
stream
);
for
(
int
i
=
1
;
i
<
levels
;
i
++
)
{
...
...
@@ -237,7 +174,7 @@ static void stereo_bp_gpu_operator(int ndisp, int iters, int levels,
datas
[
i
].
create
(
rows_all
[
i
]
*
ndisp
,
cols_all
[
i
],
msg_type
);
impl
::
data_step_down
(
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
-
1
],
msg_type
,
datas
[
i
-
1
],
datas
[
i
],
stream
);
bp
::
data_step_down
(
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
-
1
],
msg_type
,
datas
[
i
-
1
],
datas
[
i
],
stream
);
}
DevMem2D
mus
[]
=
{
u
,
u2
};
...
...
@@ -251,9 +188,9 @@ static void stereo_bp_gpu_operator(int ndisp, int iters, int levels,
{
// for lower level we have already computed messages by setting to zero
if
(
i
!=
levels
-
1
)
impl
::
level_up_messages
(
mem_idx
,
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
+
1
],
msg_type
,
mus
,
mds
,
mls
,
mrs
,
stream
);
bp
::
level_up_messages
(
mem_idx
,
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
+
1
],
msg_type
,
mus
,
mds
,
mls
,
mrs
,
stream
);
impl
::
calc_all_iterations
(
cols_all
[
i
],
rows_all
[
i
],
iters
,
msg_type
,
mus
[
mem_idx
],
mds
[
mem_idx
],
mls
[
mem_idx
],
mrs
[
mem_idx
],
datas
[
i
],
stream
);
bp
::
calc_all_iterations
(
cols_all
[
i
],
rows_all
[
i
],
iters
,
msg_type
,
mus
[
mem_idx
],
mds
[
mem_idx
],
mls
[
mem_idx
],
mrs
[
mem_idx
],
datas
[
i
],
stream
);
mem_idx
=
(
mem_idx
+
1
)
&
1
;
}
...
...
@@ -261,47 +198,23 @@ static void stereo_bp_gpu_operator(int ndisp, int iters, int levels,
if
(
disp
.
empty
())
disp
.
create
(
rows
,
cols
,
CV_16S
);
if
(
disp
.
type
()
==
CV_16S
)
{
disp
=
zero
;
impl
::
output
(
msg_type
,
u
,
d
,
l
,
r
,
datas
.
front
(),
disp
,
stream
);
}
else
{
out
.
create
(
rows
,
cols
,
CV_16S
);
out
=
zero
;
impl
::
output
(
msg_type
,
u
,
d
,
l
,
r
,
datas
.
front
(),
out
,
stream
);
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
GpuMat
(
rows
,
cols
,
CV_16S
));
out
=
zero
;
bp
::
output
(
msg_type
,
u
,
d
,
l
,
r
,
datas
.
front
(),
disp
,
stream
);
if
(
disp
.
type
()
!=
CV_16S
)
out
.
convertTo
(
disp
,
disp
.
type
());
}
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
)
{
::
stereo_bp_gpu_operator
(
ndisp
,
iters
,
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
disc_single_jump
,
msg_type
,
msg_scale
,
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
,
datas
,
out
,
left
,
right
,
disp
,
0
);
}
void
cv
::
gpu
::
StereoBeliefPropagation
_GPU
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
Stream
&
stream
)
void
cv
::
gpu
::
StereoBeliefPropagation
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
)
{
::
stereo_bp_gpu_operator
(
ndisp
,
iters
,
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
disc_single_jump
,
msg_type
,
msg_scale
,
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
,
datas
,
out
,
left
,
right
,
disp
,
StreamAccessor
::
getStream
(
stream
)
);
::
stereo_bp_gpu_operator
(
ndisp
,
iters
,
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
disc_single_jump
,
msg_type
,
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
,
datas
,
out
,
left
,
right
,
disp
,
0
);
}
bool
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
checkIfGpuCallReasonable
(
)
void
cv
::
gpu
::
StereoBeliefPropagation
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
Stream
&
stream
)
{
if
(
0
==
getCudaEnabledDeviceCount
())
return
false
;
int
device
=
getDevice
();
int
minor
,
major
;
getComputeCapability
(
device
,
&
major
,
&
minor
);
int
numSM
=
getNumberOfSMs
(
device
);
if
(
major
>
1
||
numSM
>
16
)
return
true
;
return
false
;
::
stereo_bp_gpu_operator
(
ndisp
,
iters
,
levels
,
max_data_term
,
data_weight
,
max_disc_term
,
disc_single_jump
,
msg_type
,
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
,
datas
,
out
,
left
,
right
,
disp
,
StreamAccessor
::
getStream
(
stream
));
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/constantspacebp_gpu.cpp
0 → 100644
View file @
ee104c27
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
cv
;
using
namespace
cv
::
gpu
;
using
namespace
std
;
#if !defined (HAVE_CUDA)
cv
::
gpu
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
,
int
,
int
,
int
,
int
)
{
throw_nogpu
();
}
cv
::
gpu
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
,
int
,
int
,
int
,
float
,
float
,
float
,
float
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoConstantSpaceBP
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoConstantSpaceBP
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
namespace
cv
{
namespace
gpu
{
namespace
csbp
{
void
load_constants
(
int
ndisp
,
float
max_data_term
,
float
data_weight
,
float
max_disc_term
,
float
disc_single_jump
,
const
DevMem2D
&
left
,
const
DevMem2D
&
right
,
const
DevMem2D
&
temp1
,
const
DevMem2D
&
temp2
);
void
init_data_cost
(
int
rows
,
int
cols
,
const
DevMem2D
&
disp_selected_pyr
,
const
DevMem2D
&
data_cost_selected
,
size_t
msg_step
,
int
msg_type
,
int
h
,
int
w
,
int
level
,
int
nr_plane
,
int
ndisp
,
int
channels
,
const
cudaStream_t
&
stream
);
void
compute_data_cost
(
const
DevMem2D
&
disp_selected_pyr
,
const
DevMem2D
&
data_cost
,
size_t
msg_step1
,
size_t
msg_step2
,
int
msg_type
,
int
h
,
int
w
,
int
h2
,
int
level
,
int
nr_plane
,
int
channels
,
const
cudaStream_t
&
stream
);
void
init_message
(
const
DevMem2D
&
u_new
,
const
DevMem2D
&
d_new
,
const
DevMem2D
&
l_new
,
const
DevMem2D
&
r_new
,
const
DevMem2D
&
u_cur
,
const
DevMem2D
&
d_cur
,
const
DevMem2D
&
l_cur
,
const
DevMem2D
&
r_cur
,
const
DevMem2D
&
selected_disp_pyr_new
,
const
DevMem2D
&
selected_disp_pyr_cur
,
const
DevMem2D
&
data_cost_selected
,
const
DevMem2D
&
data_cost
,
size_t
msg_step1
,
size_t
msg_step2
,
int
msg_type
,
int
h
,
int
w
,
int
nr_plane
,
int
h2
,
int
w2
,
int
nr_plane2
,
const
cudaStream_t
&
stream
);
void
calc_all_iterations
(
const
DevMem2D
&
u
,
const
DevMem2D
&
d
,
const
DevMem2D
&
l
,
const
DevMem2D
&
r
,
const
DevMem2D
&
data_cost_selected
,
const
DevMem2D
&
selected_disp_pyr_cur
,
size_t
msg_step
,
int
msg_type
,
int
h
,
int
w
,
int
nr_plane
,
int
iters
,
const
cudaStream_t
&
stream
);
void
compute_disp
(
const
DevMem2D
&
u
,
const
DevMem2D
&
d
,
const
DevMem2D
&
l
,
const
DevMem2D
&
r
,
const
DevMem2D
&
data_cost_selected
,
const
DevMem2D
&
disp_selected
,
size_t
msg_step
,
int
msg_type
,
const
DevMem2D
&
disp
,
int
nr_plane
,
const
cudaStream_t
&
stream
);
}}}
namespace
{
const
float
DEFAULT_MAX_DATA_TERM
=
10.0
f
;
const
float
DEFAULT_DATA_WEIGHT
=
0.07
f
;
const
float
DEFAULT_MAX_DISC_TERM
=
1.7
f
;
const
float
DEFAULT_DISC_SINGLE_JUMP
=
1.0
f
;
}
cv
::
gpu
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
ndisp_
,
int
iters_
,
int
levels_
,
int
nr_plane_
,
int
msg_type_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
nr_plane
(
nr_plane_
),
max_data_term
(
DEFAULT_MAX_DATA_TERM
),
data_weight
(
DEFAULT_DATA_WEIGHT
),
max_disc_term
(
DEFAULT_MAX_DISC_TERM
),
disc_single_jump
(
DEFAULT_DISC_SINGLE_JUMP
),
msg_type
(
msg_type_
)
{
}
cv
::
gpu
::
StereoConstantSpaceBP
::
StereoConstantSpaceBP
(
int
ndisp_
,
int
iters_
,
int
levels_
,
int
nr_plane_
,
float
max_data_term_
,
float
data_weight_
,
float
max_disc_term_
,
float
disc_single_jump_
,
int
msg_type_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
nr_plane
(
nr_plane_
),
max_data_term
(
max_data_term_
),
data_weight
(
data_weight_
),
max_disc_term
(
max_disc_term_
),
disc_single_jump
(
disc_single_jump_
),
msg_type
(
msg_type_
)
{
}
static
void
stereo_csbp_gpu_operator
(
int
&
ndisp
,
int
&
iters
,
int
&
levels
,
int
&
nr_plane
,
float
&
max_data_term
,
float
&
data_weight
,
float
&
max_disc_term
,
float
&
disc_single_jump
,
int
&
msg_type
,
GpuMat
u
[
2
],
GpuMat
d
[
2
],
GpuMat
l
[
2
],
GpuMat
r
[
2
],
GpuMat
disp_selected_pyr
[
2
],
GpuMat
&
data_cost
,
GpuMat
&
data_cost_selected
,
GpuMat
&
temp1
,
GpuMat
&
temp2
,
GpuMat
&
out
,
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
cudaStream_t
&
stream
)
{
CV_DbgAssert
(
0
<
ndisp
&&
0
<
iters
&&
0
<
levels
&&
0
<
nr_plane
&&
(
msg_type
==
CV_32F
||
msg_type
==
CV_16S
)
&&
left
.
rows
==
right
.
rows
&&
left
.
cols
==
right
.
cols
&&
left
.
type
()
==
right
.
type
());
CV_Assert
(
levels
<=
8
&&
(
left
.
type
()
==
CV_8UC1
||
left
.
type
()
==
CV_8UC3
));
const
Scalar
zero
=
Scalar
::
all
(
0
);
const
float
scale
=
((
msg_type
==
CV_32F
)
?
1.0
f
:
10.0
f
);
const
size_t
type_size
=
((
msg_type
==
CV_32F
)
?
sizeof
(
float
)
:
sizeof
(
short
));
////////////////////////////////////////////////////////////////////////////////////////////
// Init
int
rows
=
left
.
rows
;
int
cols
=
left
.
cols
;
levels
=
min
(
levels
,
int
(
log
((
double
)
ndisp
)
/
log
(
2.0
)));
AutoBuffer
<
int
>
buf
(
levels
*
4
);
int
*
cols_pyr
=
buf
;
int
*
rows_pyr
=
cols_pyr
+
levels
;
int
*
nr_plane_pyr
=
rows_pyr
+
levels
;
int
*
step_pyr
=
nr_plane_pyr
+
levels
;
cols_pyr
[
0
]
=
cols
;
rows_pyr
[
0
]
=
rows
;
nr_plane_pyr
[
0
]
=
nr_plane
;
const
int
n
=
64
;
step_pyr
[
0
]
=
alignSize
(
cols
*
type_size
,
n
)
/
type_size
;
for
(
int
i
=
1
;
i
<
levels
;
i
++
)
{
cols_pyr
[
i
]
=
(
cols_pyr
[
i
-
1
]
+
1
)
/
2
;
rows_pyr
[
i
]
=
(
rows_pyr
[
i
-
1
]
+
1
)
/
2
;
nr_plane_pyr
[
i
]
=
nr_plane_pyr
[
i
-
1
]
*
2
;
step_pyr
[
i
]
=
alignSize
(
cols_pyr
[
i
]
*
type_size
,
n
)
/
type_size
;
}
Size
msg_size
(
step_pyr
[
0
],
rows
*
nr_plane_pyr
[
0
]);
Size
data_cost_size
(
step_pyr
[
0
],
rows
*
nr_plane_pyr
[
0
]
*
2
);
u
[
0
].
create
(
msg_size
,
msg_type
);
d
[
0
].
create
(
msg_size
,
msg_type
);
l
[
0
].
create
(
msg_size
,
msg_type
);
r
[
0
].
create
(
msg_size
,
msg_type
);
u
[
1
].
create
(
msg_size
,
msg_type
);
d
[
1
].
create
(
msg_size
,
msg_type
);
l
[
1
].
create
(
msg_size
,
msg_type
);
r
[
1
].
create
(
msg_size
,
msg_type
);
disp_selected_pyr
[
0
].
create
(
msg_size
,
msg_type
);
disp_selected_pyr
[
1
].
create
(
msg_size
,
msg_type
);
data_cost
.
create
(
data_cost_size
,
msg_type
);
data_cost_selected
.
create
(
msg_size
,
msg_type
);
step_pyr
[
0
]
=
data_cost
.
step
/
type_size
;
Size
temp_size
=
data_cost_size
;
if
(
data_cost
.
step
*
data_cost_size
.
height
<
static_cast
<
size_t
>
(
step_pyr
[
levels
-
1
])
*
rows_pyr
[
levels
-
1
]
*
ndisp
)
{
temp_size
=
Size
(
step_pyr
[
levels
-
1
],
rows_pyr
[
levels
-
1
]
*
nr_plane
);
}
temp1
.
create
(
temp_size
,
msg_type
);
temp2
.
create
(
temp_size
,
msg_type
);
////////////////////////////////////////////////////////////////////////////
// Compute
csbp
::
load_constants
(
ndisp
,
max_data_term
,
scale
*
data_weight
,
scale
*
max_disc_term
,
scale
*
disc_single_jump
,
left
,
right
,
temp1
,
temp2
);
l
[
0
]
=
zero
;
d
[
0
]
=
zero
;
r
[
0
]
=
zero
;
u
[
0
]
=
zero
;
l
[
1
]
=
zero
;
d
[
1
]
=
zero
;
r
[
1
]
=
zero
;
u
[
1
]
=
zero
;
data_cost
=
zero
;
data_cost_selected
=
zero
;
int
cur_idx
=
0
;
for
(
int
i
=
levels
-
1
;
i
>=
0
;
i
--
)
{
if
(
i
==
levels
-
1
)
{
csbp
::
init_data_cost
(
left
.
rows
,
left
.
cols
,
disp_selected_pyr
[
cur_idx
],
data_cost_selected
,
step_pyr
[
i
],
msg_type
,
rows_pyr
[
i
],
cols_pyr
[
i
],
i
,
nr_plane_pyr
[
i
],
ndisp
,
left
.
channels
(),
stream
);
}
else
{
csbp
::
compute_data_cost
(
disp_selected_pyr
[
cur_idx
],
data_cost
,
step_pyr
[
i
],
step_pyr
[
i
+
1
],
msg_type
,
rows_pyr
[
i
],
cols_pyr
[
i
],
rows_pyr
[
i
+
1
],
i
,
nr_plane_pyr
[
i
+
1
],
left
.
channels
(),
stream
);
int
new_idx
=
(
cur_idx
+
1
)
&
1
;
csbp
::
init_message
(
u
[
new_idx
],
d
[
new_idx
],
l
[
new_idx
],
r
[
new_idx
],
u
[
cur_idx
],
d
[
cur_idx
],
l
[
cur_idx
],
r
[
cur_idx
],
disp_selected_pyr
[
new_idx
],
disp_selected_pyr
[
cur_idx
],
data_cost_selected
,
data_cost
,
step_pyr
[
i
],
step_pyr
[
i
+
1
],
msg_type
,
rows_pyr
[
i
],
cols_pyr
[
i
],
nr_plane_pyr
[
i
],
rows_pyr
[
i
+
1
],
cols_pyr
[
i
+
1
],
nr_plane_pyr
[
i
+
1
],
stream
);
cur_idx
=
new_idx
;
}
csbp
::
calc_all_iterations
(
u
[
cur_idx
],
d
[
cur_idx
],
l
[
cur_idx
],
r
[
cur_idx
],
data_cost_selected
,
disp_selected_pyr
[
cur_idx
],
step_pyr
[
i
],
msg_type
,
rows_pyr
[
i
],
cols_pyr
[
i
],
nr_plane_pyr
[
i
],
iters
,
stream
);
}
if
(
disp
.
empty
())
disp
.
create
(
rows
,
cols
,
CV_16S
);
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
GpuMat
(
rows
,
cols
,
CV_16S
));
out
=
zero
;
csbp
::
compute_disp
(
u
[
cur_idx
],
d
[
cur_idx
],
l
[
cur_idx
],
r
[
cur_idx
],
data_cost_selected
,
disp_selected_pyr
[
cur_idx
],
step_pyr
[
0
],
msg_type
,
out
,
nr_plane_pyr
[
0
],
stream
);
if
(
disp
.
type
()
!=
CV_16S
)
out
.
convertTo
(
disp
,
disp
.
type
());
}
void
cv
::
gpu
::
StereoConstantSpaceBP
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
)
{
::
stereo_csbp_gpu_operator
(
ndisp
,
iters
,
levels
,
nr_plane
,
max_data_term
,
data_weight
,
max_disc_term
,
disc_single_jump
,
msg_type
,
u
,
d
,
l
,
r
,
disp_selected_pyr
,
data_cost
,
data_cost_selected
,
temp1
,
temp2
,
out
,
left
,
right
,
disp
,
0
);
}
void
cv
::
gpu
::
StereoConstantSpaceBP
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
Stream
&
stream
)
{
::
stereo_csbp_gpu_operator
(
ndisp
,
iters
,
levels
,
nr_plane
,
max_data_term
,
data_weight
,
max_disc_term
,
disc_single_jump
,
msg_type
,
u
,
d
,
l
,
r
,
disp_selected_pyr
,
data_cost
,
data_cost_selected
,
temp1
,
temp2
,
out
,
left
,
right
,
disp
,
StreamAccessor
::
getStream
(
stream
));
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/cuda/beliefpropagation.cu
View file @
ee104c27
...
...
@@ -28,7 +28,7 @@
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or
implied warranties, including, but not limited to, the impl
ied
// any express or
bpied warranties, including, but not limited to, the bp
ied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
...
...
@@ -45,6 +45,7 @@
#include "safe_call.hpp"
using namespace cv::gpu;
using namespace cv::gpu::impl;
#ifndef FLT_MAX
#define FLT_MAX 3.402823466e+38F
...
...
@@ -54,7 +55,7 @@ using namespace cv::gpu;
/////////////////////// load constants ////////////////////////
///////////////////////////////////////////////////////////////
namespace b
eliefpropagation_gpu
namespace b
p_kernels
{
__constant__ int cndisp;
__constant__ float cmax_data_term;
...
...
@@ -63,14 +64,14 @@ namespace beliefpropagation_gpu
__constant__ float cdisc_single_jump;
};
namespace cv { namespace gpu { namespace
impl
{
namespace cv { namespace gpu { namespace
bp
{
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
{
cudaSafeCall( cudaMemcpyToSymbol(b
eliefpropagation_gpu
::cndisp, &ndisp, sizeof(int )) );
cudaSafeCall( cudaMemcpyToSymbol(b
eliefpropagation_gpu
::cmax_data_term, &max_data_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
eliefpropagation_gpu
::cdata_weight, &data_weight, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
eliefpropagation_gpu
::cmax_disc_term, &max_disc_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
eliefpropagation_gpu
::cdisc_single_jump, &disc_single_jump, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
p_kernels
::cndisp, &ndisp, sizeof(int )) );
cudaSafeCall( cudaMemcpyToSymbol(b
p_kernels
::cmax_data_term, &max_data_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
p_kernels
::cdata_weight, &data_weight, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
p_kernels
::cmax_disc_term, &max_disc_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(b
p_kernels
::cdisc_single_jump, &disc_single_jump, sizeof(float)) );
}
}}}
...
...
@@ -78,7 +79,7 @@ namespace cv { namespace gpu { namespace impl {
////////////////////////// comp data //////////////////////////
///////////////////////////////////////////////////////////////
namespace b
eliefpropagation_gpu
namespace b
p_kernels
{
template <typename T>
__global__ void comp_data_gray(const uchar* l, const uchar* r, size_t step, T* data, size_t data_step, int cols, int rows)
...
...
@@ -147,7 +148,7 @@ namespace beliefpropagation_gpu
}
}
namespace cv { namespace gpu { namespace
impl
{
namespace cv { namespace gpu { namespace
bp
{
typedef void (*CompDataFunc)(const DevMem2D& l, const DevMem2D& r, int channels, DevMem2D mdata, const cudaStream_t& stream);
template<typename T>
...
...
@@ -160,9 +161,9 @@ namespace cv { namespace gpu { namespace impl {
grid.y = divUp(l.rows, threads.y);
if (channels == 1)
b
eliefpropagation_gpu
::comp_data_gray<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
b
p_kernels
::comp_data_gray<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
else
b
eliefpropagation_gpu
::comp_data_bgr<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
b
p_kernels
::comp_data_bgr<T><<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, (T*)mdata.ptr, mdata.step/sizeof(T), l.cols, l.rows);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
...
...
@@ -193,7 +194,7 @@ namespace cv { namespace gpu { namespace impl {
//////////////////////// data step down ///////////////////////
///////////////////////////////////////////////////////////////
namespace b
eliefpropagation_gpu
namespace b
p_kernels
{
template <typename T>
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const T* src, size_t src_step, T* dst, size_t dst_step)
...
...
@@ -219,7 +220,7 @@ namespace beliefpropagation_gpu
}
}
namespace cv { namespace gpu { namespace
impl
{
namespace cv { namespace gpu { namespace
bp
{
typedef void (*DataStepDownFunc)(int dst_cols, int dst_rows, int src_rows, const DevMem2D& src, DevMem2D dst, const cudaStream_t& stream);
template<typename T>
...
...
@@ -231,7 +232,7 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(dst_cols, threads.x);
grid.y = divUp(dst_rows, threads.y);
b
eliefpropagation_gpu
::data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)src.ptr, src.step/sizeof(T), (T*)dst.ptr, dst.step/sizeof(T));
b
p_kernels
::data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)src.ptr, src.step/sizeof(T), (T*)dst.ptr, dst.step/sizeof(T));
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
...
...
@@ -262,7 +263,7 @@ namespace cv { namespace gpu { namespace impl {
/////////////////// level up messages ////////////////////////
///////////////////////////////////////////////////////////////
namespace b
eliefpropagation_gpu
namespace b
p_kernels
{
template <typename T>
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const T* src, size_t src_step, T* dst, size_t dst_step)
...
...
@@ -284,7 +285,7 @@ namespace beliefpropagation_gpu
}
}
namespace cv { namespace gpu { namespace
impl
{
namespace cv { namespace gpu { namespace
bp
{
typedef void (*LevelUpMessagesFunc)(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D* mus, DevMem2D* mds, DevMem2D* mls, DevMem2D* mrs, const cudaStream_t& stream);
template<typename T>
...
...
@@ -298,10 +299,10 @@ namespace cv { namespace gpu { namespace impl {
int src_idx = (dst_idx + 1) & 1;
b
eliefpropagation_gpu
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mus[src_idx].ptr, mus[src_idx].step/sizeof(T), (T*)mus[dst_idx].ptr, mus[dst_idx].step/sizeof(T));
b
eliefpropagation_gpu
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mds[src_idx].ptr, mds[src_idx].step/sizeof(T), (T*)mds[dst_idx].ptr, mds[dst_idx].step/sizeof(T));
b
eliefpropagation_gpu
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mls[src_idx].ptr, mls[src_idx].step/sizeof(T), (T*)mls[dst_idx].ptr, mls[dst_idx].step/sizeof(T));
b
eliefpropagation_gpu
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mrs[src_idx].ptr, mrs[src_idx].step/sizeof(T), (T*)mrs[dst_idx].ptr, mrs[dst_idx].step/sizeof(T));
b
p_kernels
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mus[src_idx].ptr, mus[src_idx].step/sizeof(T), (T*)mus[dst_idx].ptr, mus[dst_idx].step/sizeof(T));
b
p_kernels
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mds[src_idx].ptr, mds[src_idx].step/sizeof(T), (T*)mds[dst_idx].ptr, mds[dst_idx].step/sizeof(T));
b
p_kernels
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mls[src_idx].ptr, mls[src_idx].step/sizeof(T), (T*)mls[dst_idx].ptr, mls[dst_idx].step/sizeof(T));
b
p_kernels
::level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (const T*)mrs[src_idx].ptr, mrs[src_idx].step/sizeof(T), (T*)mrs[dst_idx].ptr, mrs[dst_idx].step/sizeof(T));
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
...
...
@@ -332,7 +333,7 @@ namespace cv { namespace gpu { namespace impl {
//////////////////// calc all iterations /////////////////////
///////////////////////////////////////////////////////////////
namespace b
eliefpropagation_gpu
namespace b
p_kernels
{
template <typename T>
__device__ void calc_min_linear_penalty(T* dst, size_t step)
...
...
@@ -429,7 +430,7 @@ namespace beliefpropagation_gpu
}
}
namespace cv { namespace gpu { namespace
impl
{
namespace cv { namespace gpu { namespace
bp
{
typedef void (*CalcAllIterationFunc)(int cols, int rows, int iters, DevMem2D& u, DevMem2D& d, DevMem2D& l, DevMem2D& r, const DevMem2D& data, const cudaStream_t& stream);
template<typename T>
...
...
@@ -443,7 +444,7 @@ namespace cv { namespace gpu { namespace impl {
for(int t = 0; t < iters; ++t)
{
b
eliefpropagation_gpu
::one_iteration<T><<<grid, threads, 0, stream>>>(t, (T*)u.ptr, (T*)d.ptr, (T*)l.ptr, (T*)r.ptr, u.step/sizeof(T), (const T*)data.ptr, data.step/sizeof(T), cols, rows);
b
p_kernels
::one_iteration<T><<<grid, threads, 0, stream>>>(t, (T*)u.ptr, (T*)d.ptr, (T*)l.ptr, (T*)r.ptr, u.step/sizeof(T), (const T*)data.ptr, data.step/sizeof(T), cols, rows);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
...
...
@@ -475,7 +476,7 @@ namespace cv { namespace gpu { namespace impl {
/////////////////////////// output ////////////////////////////
///////////////////////////////////////////////////////////////
namespace b
eliefpropagation_gpu
namespace b
p_kernels
{
template <typename T>
__global__ void output(int cols, int rows, const T* u, const T* d, const T* l, const T* r, const T* data, size_t step, short* disp, size_t res_step)
...
...
@@ -515,7 +516,7 @@ namespace beliefpropagation_gpu
}
}
namespace cv { namespace gpu { namespace
impl
{
namespace cv { namespace gpu { namespace
bp
{
typedef void (*OutputFunc)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data, DevMem2D disp, const cudaStream_t& stream);
template<typename T>
...
...
@@ -527,7 +528,7 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
b
eliefpropagation_gpu
::output<T><<<grid, threads, 0, stream>>>(disp.cols, disp.rows, (const T*)u.ptr, (const T*)d.ptr, (const T*)l.ptr, (const T*)r.ptr, (const T*)data.ptr, u.step/sizeof(T), (short*)disp.ptr, disp.step/sizeof(short));
b
p_kernels
::output<T><<<grid, threads, 0, stream>>>(disp.cols, disp.rows, (const T*)u.ptr, (const T*)d.ptr, (const T*)l.ptr, (const T*)r.ptr, (const T*)data.ptr, u.step/sizeof(T), (short*)disp.ptr, disp.step/sizeof(short));
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
...
...
modules/gpu/src/cuda/constantspacebp.cu
0 → 100644
View file @
ee104c27
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/devmem2d.hpp"
#include "saturate_cast.hpp"
#include "safe_call.hpp"
using namespace cv::gpu;
using namespace cv::gpu::impl;
#ifndef FLT_MAX
#define FLT_MAX 3.402823466e+38F
#endif
#ifndef SHRT_MAX
#define SHRT_MAX 32767
#endif
template <typename T>
struct TypeLimits {};
template <>
struct TypeLimits<short>
{
static __device__ short max() {return SHRT_MAX;}
};
template <>
struct TypeLimits<float>
{
static __device__ float max() {return FLT_MAX;}
};
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
///////////////////////////////////////////////////////////////
namespace csbp_kernels
{
__constant__ int cndisp;
__constant__ float cmax_data_term;
__constant__ float cdata_weight;
__constant__ float cmax_disc_term;
__constant__ float cdisc_single_jump;
__constant__ size_t cimg_step;
__constant__ size_t cmsg_step1;
__constant__ size_t cmsg_step2;
__constant__ size_t cdisp_step1;
__constant__ size_t cdisp_step2;
__constant__ uchar* cleft;
__constant__ uchar* cright;
__constant__ uchar* ctemp1;
__constant__ uchar* ctemp2;
}
namespace cv { namespace gpu { namespace csbp
{
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
const DevMem2D& left, const DevMem2D& right, const DevMem2D& temp1, const DevMem2D& temp2)
{
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cndisp, &ndisp, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmax_data_term, &max_data_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdata_weight, &data_weight, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmax_disc_term, &max_disc_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisc_single_jump, &disc_single_jump, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cimg_step, &left.step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cleft, &left.ptr, sizeof(left.ptr)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cright, &right.ptr, sizeof(right.ptr)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::ctemp1, &temp1.ptr, sizeof(temp1.ptr)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::ctemp2, &temp2.ptr, sizeof(temp2.ptr)) );
}
}}}
///////////////////////////////////////////////////////////////
/////////////////////// init data cost ////////////////////////
///////////////////////////////////////////////////////////////
namespace csbp_kernels
{
template <int channels>
struct DataCostPerPixel
{
static __device__ float compute(const uchar* left, const uchar* right)
{
float tb = 0.114f * abs((int)left[0] - right[0]);
float tg = 0.587f * abs((int)left[1] - right[1]);
float tr = 0.299f * abs((int)left[2] - right[2]);
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
}
};
template <>
struct DataCostPerPixel<1>
{
static __device__ float compute(const uchar* left, const uchar* right)
{
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
}
};
template <typename T>
__global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
T* selected_disparity = selected_disp_pyr + y * cmsg_step1 + x;
T* data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
T* data_cost = (T*)ctemp1 + y * cmsg_step1 + x;
int nr_local_minimum = 0;
T prev = data_cost[0 * cdisp_step1];
T cur = data_cost[1 * cdisp_step1];
T next = data_cost[2 * cdisp_step1];
for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
{
if (cur < prev && cur < next)
{
data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
selected_disparity[nr_local_minimum * cdisp_step1] = d;
data_cost[d * cdisp_step1] = TypeLimits<T>::max();
nr_local_minimum++;
}
prev = cur;
cur = next;
next = data_cost[(d + 1) * cdisp_step1];
}
for (int i = nr_local_minimum; i < nr_plane; i++)
{
T minimum = TypeLimits<T>::max();
int id = 0;
for (int d = 0; d < cndisp; d++)
{
cur = data_cost[d * cdisp_step1];
if (cur < minimum)
{
minimum = cur;
id = d;
}
}
data_cost_selected[i * cdisp_step1] = minimum;
selected_disparity[i * cdisp_step1] = id;
data_cost[id * cdisp_step1] = TypeLimits<T>::max();
}
}
}
template <typename T, int winsz, int channels>
__global__ void data_init(int level, int rows, int cols, int h)
{
int x_out = blockIdx.x;
int y_out = blockIdx.y % h;
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
int tid = threadIdx.x;
if (d < cndisp)
{
int x0 = x_out << level;
int y0 = y_out << level;
int len = min(y0 + winsz, rows) - y0;
float val = 0.0f;
if (x0 + tid < cols)
{
if (x0 + tid - d < 0)
val = cdata_weight * cmax_data_term * len;
else
{
const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
const uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
for(int y = 0; y < len; ++y)
{
val += DataCostPerPixel<channels>::compute(lle, lri);
lle += cimg_step;
lri += cimg_step;
}
}
}
extern __shared__ float smem[];
float* dline = smem + winsz * threadIdx.z;
dline[tid] = val;
__syncthreads();
if (winsz >= 256) { if (tid < 128) { dline[tid] += dline[tid + 128]; } __syncthreads(); }
if (winsz >= 128) { if (tid < 64) { dline[tid] += dline[tid + 64]; } __syncthreads(); }
if (winsz >= 64) if (tid < 32) dline[tid] += dline[tid + 32];
if (winsz >= 32) if (tid < 16) dline[tid] += dline[tid + 16];
if (winsz >= 16) if (tid < 8) dline[tid] += dline[tid + 8];
if (winsz >= 8) if (tid < 4) dline[tid] += dline[tid + 4];
if (winsz >= 4) if (tid < 2) dline[tid] += dline[tid + 2];
if (winsz >= 2) if (tid < 1) dline[tid] += dline[tid + 1];
T* data_cost = (T*)ctemp1 + y_out * cmsg_step1 + x_out;
if (tid == 0)
data_cost[cdisp_step1 * d] = saturate_cast<T>(dline[0]);
}
}
}
namespace cv { namespace gpu { namespace csbp
{
template <typename T, int winsz>
void data_init_caller(int rows, int cols, int h, int w, int level, int ndisp, int channels, const cudaStream_t& stream)
{
const int threadsNum = 256;
const size_t smem_size = threadsNum * sizeof(float);
dim3 threads(winsz, 1, threadsNum/winsz);
dim3 grid(w, h, 1);
grid.y *= divUp(ndisp, threads.z);
switch (channels)
{
case 1: csbp_kernels::data_init<T, winsz, 1><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
case 3: csbp_kernels::data_init<T, winsz, 3><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
}
}
typedef void (*DataInitCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, const cudaStream_t& stream);
template <typename T>
void get_first_k_initial_local_caller(const DevMem2D& disp_selected_pyr, const DevMem2D& data_cost_selected, int h, int w, int nr_plane, const cudaStream_t& stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
csbp_kernels::get_first_k_initial_local<T><<<grid, threads, 0, stream>>>((T*)data_cost_selected.ptr, (T*)disp_selected_pyr.ptr, h, w, nr_plane);
}
typedef void (*GetFirstKInitialLocalCaller)(const DevMem2D& disp_selected_pyr, const DevMem2D& data_cost_selected, int h, int w, int nr_plane, const cudaStream_t& stream);
void init_data_cost(int rows, int cols, const DevMem2D& disp_selected_pyr, const DevMem2D& data_cost_selected,
size_t msg_step, int msg_type, int h, int w, int level, int nr_plane, int ndisp, int channels, const cudaStream_t& stream)
{
static const DataInitCaller data_init_callers[8][9] =
{
{0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0},
{data_init_caller<short, 1>, data_init_caller<short, 2>, data_init_caller<short, 4>, data_init_caller<short, 8>,
data_init_caller<short, 16>, data_init_caller<short, 32>, data_init_caller<short, 64>, data_init_caller<short, 128>,
data_init_caller<short, 256>},
{0, 0, 0, 0, 0, 0, 0, 0, 0},
{data_init_caller<float, 1>, data_init_caller<float, 2>, data_init_caller<float, 4>, data_init_caller<float, 8>,
data_init_caller<float, 16>, data_init_caller<float, 32>, data_init_caller<float, 64>, data_init_caller<float, 128>,
data_init_caller<float, 256>},
{0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0}
};
static const GetFirstKInitialLocalCaller get_first_k_initial_local_callers[8] =
{
0, 0, 0,
get_first_k_initial_local_caller<short>,
0,
get_first_k_initial_local_caller<float>,
0, 0
};
DataInitCaller data_init_caller = data_init_callers[msg_type][level];
GetFirstKInitialLocalCaller get_first_k_initial_local_caller = get_first_k_initial_local_callers[msg_type];
if (!data_init_caller || !get_first_k_initial_local_caller)
cv::gpu::error("Unsupported message type or levels count", __FILE__, __LINE__);
size_t disp_step = msg_step * h;
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step1, &disp_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step1, &msg_step, sizeof(size_t)) );
data_init_caller(rows, cols, h, w, level, ndisp, channels, stream);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
get_first_k_initial_local_caller(disp_selected_pyr, data_cost_selected, h, w, nr_plane, stream);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}}}
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
namespace csbp_kernels
{
template <typename T, int channels>
__global__ void compute_data_cost(T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
int y0 = y << level;
int yt = (y + 1) << level;
int x0 = x << level;
int xt = (x + 1) << level;
T* selected_disparity = selected_disp_pyr + y/2 * cmsg_step2 + x/2;
T* data_cost = data_cost_ + y * cmsg_step1 + x;
for(int d = 0; d < nr_plane; d++)
{
float val = 0.0f;
for(int yi = y0; yi < yt; yi++)
{
for(int xi = x0; xi < xt; xi++)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int xr = xi - sel_disp;
if (xr < 0)
val += cdata_weight * cmax_data_term;
else
{
const uchar* left_x = cleft + yi * cimg_step + xi * channels;
const uchar* right_x = cright + yi * cimg_step + xr * channels;
val += DataCostPerPixel<channels>::compute(left_x, right_x);
}
}
}
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
}
}
}
}
namespace cv { namespace gpu { namespace csbp
{
template <typename T>
void compute_data_cost_caller(const DevMem2D& disp_selected_pyr, const DevMem2D& data_cost,
int h, int w, int level, int nr_plane, int channels, const cudaStream_t& stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
switch(channels)
{
case 1: csbp_kernels::compute_data_cost<T, 1><<<grid, threads, 0, stream>>>((T*)disp_selected_pyr.ptr, (T*)data_cost.ptr, h, w, level, nr_plane); break;
case 3: csbp_kernels::compute_data_cost<T, 3><<<grid, threads, 0, stream>>>((T*)disp_selected_pyr.ptr, (T*)data_cost.ptr, h, w, level, nr_plane); break;
default: cv::gpu::error("Unsupported channels count", __FILE__, __LINE__);
}
}
typedef void (*ComputeDataCostCaller)(const DevMem2D& disp_selected_pyr, const DevMem2D& data_cost,
int h, int w, int level, int nr_plane, int channels, const cudaStream_t& stream);
void compute_data_cost(const DevMem2D& disp_selected_pyr, const DevMem2D& data_cost, size_t msg_step1, size_t msg_step2, int msg_type,
int h, int w, int h2, int level, int nr_plane, int channels, const cudaStream_t& stream)
{
static const ComputeDataCostCaller callers[8] =
{
0, 0, 0,
compute_data_cost_caller<short>,
0,
compute_data_cost_caller<float>,
0, 0
};
size_t disp_step1 = msg_step1 * h;
size_t disp_step2 = msg_step2 * h2;
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step1, &disp_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step2, &disp_step2, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step1, &msg_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step2, &msg_step2, sizeof(size_t)) );
ComputeDataCostCaller caller = callers[msg_type];
if (!caller)
cv::gpu::error("Unsopported message type", __FILE__, __LINE__);
caller(disp_selected_pyr, data_cost, h, w, level, nr_plane, channels, stream);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}}}
///////////////////////////////////////////////////////////////
//////////////////////// init message /////////////////////////
///////////////////////////////////////////////////////////////
namespace csbp_kernels
{
template <typename T>
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
T* data_cost_selected, T* disparity_selected_new, T* data_cost_new,
const T* data_cost_cur, const T* disparity_selected_cur,
int nr_plane, int nr_plane2)
{
for(int i = 0; i < nr_plane; i++)
{
T minimum = TypeLimits<T>::max();
int id = 0;
for(int j = 0; j < nr_plane2; j++)
{
T cur = data_cost_new[j * cdisp_step1];
if(cur < minimum)
{
minimum = cur;
id = j;
}
}
data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step1];
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
data_cost_new[id * cdisp_step1] = TypeLimits<T>::max();
}
}
template <typename T>
__global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_,
const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_,
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
T* data_cost_selected_, T* data_cost_,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
const T* u_cur = u_cur_ + min(h2-1, y/2 + 1) * cmsg_step2 + x/2;
const T* d_cur = d_cur_ + max(0, y/2 - 1) * cmsg_step2 + x/2;
const T* l_cur = l_cur_ + y/2 * cmsg_step2 + min(w2-1, x/2 + 1);
const T* r_cur = r_cur_ + y/2 * cmsg_step2 + max(0, x/2 - 1);
T* disparity_selected_cur_backup = (T*)ctemp2 + y * cmsg_step1 + x;
T* data_cost_new = (T*)ctemp1 + y * cmsg_step1 + x;
const T* disparity_selected_cur = selected_disp_pyr_cur + y/2 * cmsg_step2 + x/2;
T* data_cost = data_cost_ + y * cmsg_step1 + x;
for(int d = 0; d < nr_plane2; d++)
{
int idx2 = d * cdisp_step2;
disparity_selected_cur_backup[d * cdisp_step1] = disparity_selected_cur[idx2];
T val = data_cost[d * cdisp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2];
data_cost_new[d * cdisp_step1] = val;
}
T* data_cost_selected = data_cost_selected_ + y * cmsg_step1 + x;
T* disparity_selected_new = selected_disp_pyr_new + y * cmsg_step1 + x;
T* u_new = u_new_ + y * cmsg_step1 + x;
T* d_new = d_new_ + y * cmsg_step1 + x;
T* l_new = l_new_ + y * cmsg_step1 + x;
T* r_new = r_new_ + y * cmsg_step1 + x;
u_cur = u_cur_ + y/2 * cmsg_step2 + x/2;
d_cur = d_cur_ + y/2 * cmsg_step2 + x/2;
l_cur = l_cur_ + y/2 * cmsg_step2 + x/2;
r_cur = r_cur_ + y/2 * cmsg_step2 + x/2;
get_first_k_element_increase(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur,
data_cost_selected, disparity_selected_new, data_cost_new,
data_cost, disparity_selected_cur_backup, nr_plane, nr_plane2);
}
}
}
namespace cv { namespace gpu { namespace csbp
{
template <typename T>
void init_message_caller(const DevMem2D& u_new, const DevMem2D& d_new, const DevMem2D& l_new, const DevMem2D& r_new,
const DevMem2D& u_cur, const DevMem2D& d_cur, const DevMem2D& l_cur, const DevMem2D& r_cur,
const DevMem2D& selected_disp_pyr_new, const DevMem2D& selected_disp_pyr_cur,
const DevMem2D& data_cost_selected, const DevMem2D& data_cost,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, const cudaStream_t& stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
csbp_kernels::init_message<T><<<grid, threads, 0, stream>>>((T*)u_new.ptr, (T*)d_new.ptr, (T*)l_new.ptr, (T*)r_new.ptr,
(const T*)u_cur.ptr, (const T*)d_cur.ptr, (const T*)l_cur.ptr, (const T*)r_cur.ptr,
(T*)selected_disp_pyr_new.ptr, (const T*)selected_disp_pyr_cur.ptr,
(T*)data_cost_selected.ptr, (T*)data_cost.ptr,
h, w, nr_plane, h2, w2, nr_plane2);
}
typedef void (*InitMessageCaller)(const DevMem2D& u_new, const DevMem2D& d_new, const DevMem2D& l_new, const DevMem2D& r_new,
const DevMem2D& u_cur, const DevMem2D& d_cur, const DevMem2D& l_cur, const DevMem2D& r_cur,
const DevMem2D& selected_disp_pyr_new, const DevMem2D& selected_disp_pyr_cur,
const DevMem2D& data_cost_selected, const DevMem2D& data_cost,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, const cudaStream_t& stream);
void init_message(const DevMem2D& u_new, const DevMem2D& d_new, const DevMem2D& l_new, const DevMem2D& r_new,
const DevMem2D& u_cur, const DevMem2D& d_cur, const DevMem2D& l_cur, const DevMem2D& r_cur,
const DevMem2D& selected_disp_pyr_new, const DevMem2D& selected_disp_pyr_cur,
const DevMem2D& data_cost_selected, const DevMem2D& data_cost, size_t msg_step1, size_t msg_step2, int msg_type,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, const cudaStream_t& stream)
{
static const InitMessageCaller callers[8] =
{
0, 0, 0,
init_message_caller<short>,
0,
init_message_caller<float>,
0, 0
};
size_t disp_step1 = msg_step1 * h;
size_t disp_step2 = msg_step2 * h2;
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step1, &disp_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step2, &disp_step2, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step1, &msg_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step2, &msg_step2, sizeof(size_t)) );
InitMessageCaller caller = callers[msg_type];
if (!caller)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
caller(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur,
selected_disp_pyr_new, selected_disp_pyr_cur, data_cost_selected, data_cost,
h, w, nr_plane, h2, w2, nr_plane2, stream);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}}}
///////////////////////////////////////////////////////////////
//////////////////// calc all iterations /////////////////////
///////////////////////////////////////////////////////////////
namespace csbp_kernels
{
template <typename T>
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
{
T minimum = TypeLimits<T>::max();
for(int d = 0; d < nr_plane; d++)
{
int idx = d * cdisp_step1;
T val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
if(val < minimum)
minimum = val;
msg_dst[idx] = val;
}
float sum = 0;
for(int d = 0; d < nr_plane; d++)
{
float cost_min = minimum + cmax_disc_term;
T src_disp_reg = src_disp[d * cdisp_step1];
for(int d2 = 0; d2 < nr_plane; d2++)
cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg));
temp[d * cdisp_step1] = saturate_cast<T>(cost_min);
sum += cost_min;
}
sum /= nr_plane;
for(int d = 0; d < nr_plane; d++)
msg_dst[d * cdisp_step1] = saturate_cast<T>(temp[d * cdisp_step1] - sum);
}
template <typename T>
__global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur,
int h, int w, int nr_plane, int i)
{
int y = blockIdx.y * blockDim.y + threadIdx.y;
int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1);
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
{
const T* data = data_cost_selected + y * cmsg_step1 + x;
T* u = u_ + y * cmsg_step1 + x;
T* d = d_ + y * cmsg_step1 + x;
T* l = l_ + y * cmsg_step1 + x;
T* r = r_ + y * cmsg_step1 + x;
const T* disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
T* temp = (T*)ctemp1 + y * cmsg_step1 + x;
message_per_pixel(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp);
message_per_pixel(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp);
message_per_pixel(data, l, u + cmsg_step1, d - cmsg_step1, l + 1, disp, disp - 1, nr_plane, temp);
message_per_pixel(data, r, u + cmsg_step1, d - cmsg_step1, r - 1, disp, disp + 1, nr_plane, temp);
}
}
}
namespace cv { namespace gpu { namespace csbp
{
template <typename T>
void compute_message_caller(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data_cost_selected,
const DevMem2D& selected_disp_pyr_cur, int h, int w, int nr_plane, int t, const cudaStream_t& stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x << 1);
grid.y = divUp(h, threads.y);
csbp_kernels::compute_message<T><<<grid, threads, 0, stream>>>((T*)u.ptr, (T*)d.ptr, (T*)l.ptr, (T*)r.ptr,
(const T*)data_cost_selected.ptr, (const T*)selected_disp_pyr_cur.ptr,
h, w, nr_plane, t & 1);
}
typedef void (*ComputeMessageCaller)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data_cost_selected,
const DevMem2D& selected_disp_pyr_cur, int h, int w, int nr_plane, int t, const cudaStream_t& stream);
void calc_all_iterations(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data_cost_selected,
const DevMem2D& selected_disp_pyr_cur, size_t msg_step, int msg_type, int h, int w, int nr_plane, int iters, const cudaStream_t& stream)
{
static const ComputeMessageCaller callers[8] =
{
0, 0, 0,
compute_message_caller<short>,
0,
compute_message_caller<float>,
0, 0
};
size_t disp_step = msg_step * h;
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step1, &disp_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step1, &msg_step, sizeof(size_t)) );
ComputeMessageCaller caller = callers[msg_type];
if (!caller)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
for(int t = 0; t < iters; ++t)
{
caller(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t, stream);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}
}}}
///////////////////////////////////////////////////////////////
/////////////////////////// output ////////////////////////////
///////////////////////////////////////////////////////////////
namespace csbp_kernels
{
template <typename T>
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
const T* data_cost_selected, const T* disp_selected_pyr,
short* disp, size_t res_step, int cols, int rows, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y > 0 && y < rows - 1 && x > 0 && x < cols - 1)
{
const T* data = data_cost_selected + y * cmsg_step1 + x;
const T* disp_selected = disp_selected_pyr + y * cmsg_step1 + x;
const T* u = u_ + (y+1) * cmsg_step1 + (x+0);
const T* d = d_ + (y-1) * cmsg_step1 + (x+0);
const T* l = l_ + (y+0) * cmsg_step1 + (x+1);
const T* r = r_ + (y+0) * cmsg_step1 + (x-1);
int best = 0;
T best_val = TypeLimits<T>::max();
for (int i = 0; i < nr_plane; ++i)
{
int idx = i * cdisp_step1;
T val = data[idx]+ u[idx] + d[idx] + l[idx] + r[idx];
if (val < best_val)
{
best_val = val;
best = saturate_cast<short>(disp_selected[idx]);
}
}
disp[res_step * y + x] = best;
}
}
}
namespace cv { namespace gpu { namespace csbp
{
template <typename T>
void compute_disp_caller(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data_cost_selected,
const DevMem2D& disp_selected, const DevMem2D& disp, int nr_plane, const cudaStream_t& stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
csbp_kernels::compute_disp<T><<<grid, threads, 0, stream>>>((const T*)u.ptr, (const T*)d.ptr, (const T*)l.ptr, (const T*)r.ptr,
(const T*)data_cost_selected.ptr, (const T*)disp_selected.ptr,
(short*)disp.ptr, disp.step / sizeof(short), disp.cols, disp.rows, nr_plane);
}
typedef void (*ComputeDispCaller)(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data_cost_selected,
const DevMem2D& disp_selected, const DevMem2D& disp, int nr_plane, const cudaStream_t& stream);
void compute_disp(const DevMem2D& u, const DevMem2D& d, const DevMem2D& l, const DevMem2D& r, const DevMem2D& data_cost_selected,
const DevMem2D& disp_selected, size_t msg_step, int msg_type, const DevMem2D& disp, int nr_plane, const cudaStream_t& stream)
{
static const ComputeDispCaller callers[8] =
{
0, 0, 0,
compute_disp_caller<short>,
0,
compute_disp_caller<float>,
0, 0
};
size_t disp_step = disp.rows * msg_step;
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cdisp_step1, &disp_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(csbp_kernels::cmsg_step1, &msg_step, sizeof(size_t)) );
ComputeDispCaller caller = callers[msg_type];
if (!caller)
cv::gpu::error("Unsupported message type", __FILE__, __LINE__);
caller(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane, stream);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}}}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment