Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
84f51332
Commit
84f51332
authored
Jul 29, 2010
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implemented asynchronous call for StereoBeliefPropagation_GPU
parent
70a2c8f5
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
139 additions
and
49 deletions
+139
-49
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+17
-10
beliefpropagation_gpu.cpp
modules/gpu/src/beliefpropagation_gpu.cpp
+63
-18
beliefpropagation.cu
modules/gpu/src/cuda/beliefpropagation.cu
+59
-21
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
84f51332
...
...
@@ -379,19 +379,26 @@ namespace cv
enum
{
DEFAULT_ITERS
=
5
};
enum
{
DEFAULT_LEVELS
=
5
};
static
const
float
DEFAULT_DISC_COST
;
static
const
float
DEFAULT_DATA_COST
;
static
const
float
DEFAULT_LAMBDA_COST
;
explicit
StereoBeliefPropagation_GPU
(
int
ndisp
=
DEFAULT_NDISP
,
int
iters
=
DEFAULT_ITERS
,
int
levels
=
DEFAULT_LEVELS
,
float
disc_cost
=
DEFAULT_DISC_COST
,
float
data_cost
=
DEFAULT_DATA_COST
,
float
lambda
=
DEFAULT_LAMBDA_COST
);
//! the default constructor
explicit
StereoBeliefPropagation_GPU
(
int
ndisp
=
DEFAULT_NDISP
,
int
iters
=
DEFAULT_ITERS
,
int
levels
=
DEFAULT_LEVELS
);
//! the full constructor taking the number of disparities, number of BP iterations on first level,
//! number of levels, truncation of discontinuity cost, truncation of data cost and weighting of data cost.
StereoBeliefPropagation_GPU
(
int
ndisp
,
int
iters
,
int
levels
,
float
disc_cost
,
float
data_cost
,
float
lambda
);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
//! Output disparity has CV_8U type.
void
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
);
//! Acync version
void
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
);
//! Some heuristics that tries to estmate
//! if current GPU will be faster then CPU in this algorithm.
//! It queries current active device.
static
bool
checkIfGpuCallReasonable
();
int
ndisp
;
int
iters
;
...
...
modules/gpu/src/beliefpropagation_gpu.cpp
View file @
84f51332
...
...
@@ -46,29 +46,42 @@ using namespace cv;
using
namespace
cv
::
gpu
;
using
namespace
std
;
const
float
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
DEFAULT_DISC_COST
=
1.7
f
;
const
float
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
DEFAULT_DATA_COST
=
10.0
f
;
const
float
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
DEFAULT_LAMBDA_COST
=
0.07
f
;
#if !defined (HAVE_CUDA)
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
StereoBeliefPropagation_GPU
(
int
,
int
,
int
)
{
throw_nogpu
();
}
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
StereoBeliefPropagation_GPU
(
int
,
int
,
int
,
float
,
float
,
float
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
CudaStream
&
)
{
throw_nogpu
();
}
bool
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
checkIfGpuCallReasonable
()
{
throw_nogpu
();
return
false
;
}
#else
/* !defined (HAVE_CUDA) */
static
const
float
DEFAULT_DISC_COST
=
1.7
f
;
static
const
float
DEFAULT_DATA_COST
=
10.0
f
;
static
const
float
DEFAULT_LAMBDA_COST
=
0.07
f
;
typedef
DevMem2D_
<
float
>
DevMem2Df
;
namespace
cv
{
namespace
gpu
{
namespace
impl
{
extern
"C"
void
load_constants
(
int
ndisp
,
float
disc_cost
,
float
data_cost
,
float
lambda
);
extern
"C"
void
comp_data_caller
(
const
DevMem2D
&
l
,
const
DevMem2D
&
r
,
DevMem2Df
mdata
);
extern
"C"
void
data_down_kernel_caller
(
int
dst_cols
,
int
dst_rows
,
int
src_rows
,
const
DevMem2Df
&
src
,
DevMem2Df
dst
);
extern
"C"
void
level_up
(
int
dst_idx
,
int
dst_cols
,
int
dst_rows
,
int
src_rows
,
DevMem2Df
*
mu
,
DevMem2Df
*
md
,
DevMem2Df
*
ml
,
DevMem2Df
*
mr
);
extern
"C"
void
call_all_iterations
(
int
cols
,
int
rows
,
int
iters
,
DevMem2Df
&
u
,
DevMem2Df
&
d
,
DevMem2Df
&
l
,
DevMem2Df
&
r
,
const
DevMem2Df
&
data
);
extern
"C"
void
output_caller
(
const
DevMem2Df
&
u
,
const
DevMem2Df
&
d
,
const
DevMem2Df
&
l
,
const
DevMem2Df
&
r
,
const
DevMem2Df
&
data
,
DevMem2D
disp
);
extern
"C"
void
comp_data_caller
(
const
DevMem2D
&
l
,
const
DevMem2D
&
r
,
DevMem2Df
mdata
,
const
cudaStream_t
&
stream
);
extern
"C"
void
data_down_kernel_caller
(
int
dst_cols
,
int
dst_rows
,
int
src_rows
,
const
DevMem2Df
&
src
,
DevMem2Df
dst
,
const
cudaStream_t
&
stream
);
extern
"C"
void
level_up
(
int
dst_idx
,
int
dst_cols
,
int
dst_rows
,
int
src_rows
,
DevMem2Df
*
mu
,
DevMem2Df
*
md
,
DevMem2Df
*
ml
,
DevMem2Df
*
mr
,
const
cudaStream_t
&
stream
);
extern
"C"
void
call_all_iterations
(
int
cols
,
int
rows
,
int
iters
,
DevMem2Df
&
u
,
DevMem2Df
&
d
,
DevMem2Df
&
l
,
DevMem2Df
&
r
,
const
DevMem2Df
&
data
,
const
cudaStream_t
&
stream
);
extern
"C"
void
output_caller
(
const
DevMem2Df
&
u
,
const
DevMem2Df
&
d
,
const
DevMem2Df
&
l
,
const
DevMem2Df
&
r
,
const
DevMem2Df
&
data
,
DevMem2D
disp
,
const
cudaStream_t
&
stream
);
}}}
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
StereoBeliefPropagation_GPU
(
int
ndisp_
,
int
iters_
,
int
levels_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
disc_cost
(
DEFAULT_DISC_COST
),
data_cost
(
DEFAULT_DATA_COST
),
lambda
(
DEFAULT_LAMBDA_COST
),
datas
(
levels_
)
{
const
int
max_supported_ndisp
=
1
<<
(
sizeof
(
unsigned
char
)
*
8
);
CV_Assert
(
0
<
ndisp
&&
ndisp
<=
max_supported_ndisp
);
CV_Assert
(
ndisp
%
8
==
0
);
}
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
StereoBeliefPropagation_GPU
(
int
ndisp_
,
int
iters_
,
int
levels_
,
float
disc_cost_
,
float
data_cost_
,
float
lambda_
)
:
ndisp
(
ndisp_
),
iters
(
iters_
),
levels
(
levels_
),
disc_cost
(
disc_cost_
),
data_cost
(
data_cost_
),
lambda
(
lambda_
),
datas
(
levels_
)
{
...
...
@@ -78,8 +91,13 @@ cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int ndisp_, in
CV_Assert
(
ndisp
%
8
==
0
);
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
)
{
static
void
stereo_bp_gpu_operator
(
int
ndisp
,
int
iters
,
int
levels
,
float
disc_cost
,
float
data_cost
,
float
lambda
,
GpuMat
&
u
,
GpuMat
&
d
,
GpuMat
&
l
,
GpuMat
&
r
,
GpuMat
&
u2
,
GpuMat
&
d2
,
GpuMat
&
l2
,
GpuMat
&
r2
,
vector
<
GpuMat
>&
datas
,
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
cudaStream_t
&
stream
)
{
CV_DbgAssert
(
left
.
cols
==
right
.
cols
&&
left
.
rows
==
right
.
rows
&&
left
.
type
()
==
right
.
type
()
&&
left
.
type
()
==
CV_8U
);
const
Scalar
zero
=
Scalar
::
all
(
0
);
...
...
@@ -98,7 +116,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const
u
.
create
(
rows
*
ndisp
,
cols
,
CV_32F
);
d
.
create
(
rows
*
ndisp
,
cols
,
CV_32F
);
l
.
create
(
rows
*
ndisp
,
cols
,
CV_32F
);
r
.
create
(
rows
*
ndisp
,
cols
,
CV_32F
);
r
.
create
(
rows
*
ndisp
,
cols
,
CV_32F
);
if
(
levels
&
1
)
{
...
...
@@ -140,7 +158,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const
datas
[
0
].
create
(
rows
*
ndisp
,
cols
,
CV_32F
);
//datas[0] = Scalar(data_cost); //DOTO did in kernel, but not sure if correct
impl
::
comp_data_caller
(
left
,
right
,
datas
.
front
());
impl
::
comp_data_caller
(
left
,
right
,
datas
.
front
()
,
stream
);
for
(
int
i
=
1
;
i
<
levels
;
i
++
)
{
...
...
@@ -153,7 +171,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const
datas
[
i
].
create
(
rows_all
[
i
]
*
ndisp
,
cols_all
[
i
],
CV_32F
);
impl
::
data_down_kernel_caller
(
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
-
1
],
datas
[
i
-
1
],
datas
[
i
]);
impl
::
data_down_kernel_caller
(
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
-
1
],
datas
[
i
-
1
],
datas
[
i
]
,
stream
);
}
DevMem2D_
<
float
>
mus
[]
=
{
u
,
u2
};
...
...
@@ -166,14 +184,41 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const
for
(
int
i
=
levels
-
1
;
i
>=
0
;
i
--
)
// for lower level we have already computed messages by setting to zero
{
if
(
i
!=
levels
-
1
)
impl
::
level_up
(
mem_idx
,
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
+
1
],
mus
,
mds
,
mls
,
mrs
);
impl
::
level_up
(
mem_idx
,
cols_all
[
i
],
rows_all
[
i
],
rows_all
[
i
+
1
],
mus
,
mds
,
mls
,
mrs
,
stream
);
impl
::
call_all_iterations
(
cols_all
[
i
],
rows_all
[
i
],
iters_all
[
i
],
mus
[
mem_idx
],
mds
[
mem_idx
],
mls
[
mem_idx
],
mrs
[
mem_idx
],
datas
[
i
]);
impl
::
call_all_iterations
(
cols_all
[
i
],
rows_all
[
i
],
iters_all
[
i
],
mus
[
mem_idx
],
mds
[
mem_idx
],
mls
[
mem_idx
],
mrs
[
mem_idx
],
datas
[
i
]
,
stream
);
mem_idx
=
(
mem_idx
+
1
)
&
1
;
}
impl
::
output_caller
(
u
,
d
,
l
,
r
,
datas
.
front
(),
disp
);
impl
::
output_caller
(
u
,
d
,
l
,
r
,
datas
.
front
(),
disp
,
stream
);
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
)
{
::
stereo_bp_gpu_operator
(
ndisp
,
iters
,
levels
,
disc_cost
,
data_cost
,
lambda
,
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
,
datas
,
left
,
right
,
disp
,
0
);
}
void
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
operator
()(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disp
,
const
CudaStream
&
stream
)
{
::
stereo_bp_gpu_operator
(
ndisp
,
iters
,
levels
,
disc_cost
,
data_cost
,
lambda
,
u
,
d
,
l
,
r
,
u2
,
d2
,
l2
,
r2
,
datas
,
left
,
right
,
disp
,
StreamAccessor
::
getStream
(
stream
));
}
bool
cv
::
gpu
::
StereoBeliefPropagation_GPU
::
checkIfGpuCallReasonable
()
{
if
(
0
==
getCudaEnabledDeviceCount
())
return
false
;
int
device
=
getDevice
();
int
minor
,
major
;
getComputeCapability
(
device
,
&
major
,
&
minor
);
int
numSM
=
getNumberOfSMs
(
device
);
if
(
major
>
1
||
numSM
>
16
)
return
true
;
return
false
;
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpu/src/cuda/beliefpropagation.cu
View file @
84f51332
...
...
@@ -108,7 +108,7 @@ namespace cv { namespace gpu { namespace impl {
cudaSafeCall( cudaMemcpyToSymbol(beliefpropagation_gpu::clambda, &lambda, sizeof(lambda)) );
}
extern "C" void comp_data_caller(const DevMem2D& l, const DevMem2D& r, DevMem2D_<float> mdata)
extern "C" void comp_data_caller(const DevMem2D& l, const DevMem2D& r, DevMem2D_<float> mdata
, const cudaStream_t& stream
)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
...
...
@@ -116,8 +116,15 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(l.cols, threads.x);
grid.y = divUp(l.rows, threads.y);
beliefpropagation_gpu::comp_data_kernel<<<grid, threads>>>(l.ptr, r.ptr, l.step, mdata.ptr, mdata.step/sizeof(float), l.cols, l.rows);
cudaSafeCall( cudaThreadSynchronize() );
if (stream == 0)
{
beliefpropagation_gpu::comp_data_kernel<<<grid, threads>>>(l.ptr, r.ptr, l.step, mdata.ptr, mdata.step/sizeof(float), l.cols, l.rows);
//cudaSafeCall( cudaThreadSynchronize() );
}
else
{
beliefpropagation_gpu::comp_data_kernel<<<grid, threads, 0, stream>>>(l.ptr, r.ptr, l.step, mdata.ptr, mdata.step/sizeof(float), l.cols, l.rows);
}
}
}}}
...
...
@@ -151,7 +158,7 @@ namespace beliefpropagation_gpu
}
namespace cv { namespace gpu { namespace impl {
extern "C" void data_down_kernel_caller(int dst_cols, int dst_rows, int src_rows, const DevMem2D_<float>& src, DevMem2D_<float> dst)
extern "C" void data_down_kernel_caller(int dst_cols, int dst_rows, int src_rows, const DevMem2D_<float>& src, DevMem2D_<float> dst
, const cudaStream_t& stream
)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
...
...
@@ -159,8 +166,15 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(dst_cols, threads.x);
grid.y = divUp(dst_rows, threads.y);
beliefpropagation_gpu::data_down_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, src.ptr, src.step/sizeof(float), dst.ptr, dst.step/sizeof(float));
cudaSafeCall( cudaThreadSynchronize() );
if (stream == 0)
{
beliefpropagation_gpu::data_down_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, src.ptr, src.step/sizeof(float), dst.ptr, dst.step/sizeof(float));
//cudaSafeCall( cudaThreadSynchronize() );
}
else
{
beliefpropagation_gpu::data_down_kernel<<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, src.ptr, src.step/sizeof(float), dst.ptr, dst.step/sizeof(float));
}
}
}}}
...
...
@@ -191,7 +205,7 @@ namespace beliefpropagation_gpu
}
namespace cv { namespace gpu { namespace impl {
extern "C" void level_up(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D_<float>* mu, DevMem2D_<float>* md, DevMem2D_<float>* ml, DevMem2D_<float>* mr)
extern "C" void level_up(int dst_idx, int dst_cols, int dst_rows, int src_rows, DevMem2D_<float>* mu, DevMem2D_<float>* md, DevMem2D_<float>* ml, DevMem2D_<float>* mr
, const cudaStream_t& stream
)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
...
...
@@ -201,12 +215,21 @@ namespace cv { namespace gpu { namespace impl {
int src_idx = (dst_idx + 1) & 1;
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, mu[src_idx].ptr, mu[src_idx].step/sizeof(float), mu[dst_idx].ptr, mu[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, md[src_idx].ptr, md[src_idx].step/sizeof(float), md[dst_idx].ptr, md[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, ml[src_idx].ptr, ml[src_idx].step/sizeof(float), ml[dst_idx].ptr, ml[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, mr[src_idx].ptr, mr[src_idx].step/sizeof(float), mr[dst_idx].ptr, mr[dst_idx].step/sizeof(float));
cudaSafeCall( cudaThreadSynchronize() );
if (stream == 0)
{
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, mu[src_idx].ptr, mu[src_idx].step/sizeof(float), mu[dst_idx].ptr, mu[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, md[src_idx].ptr, md[src_idx].step/sizeof(float), md[dst_idx].ptr, md[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, ml[src_idx].ptr, ml[src_idx].step/sizeof(float), ml[dst_idx].ptr, ml[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads>>>(dst_cols, dst_rows, src_rows, mr[src_idx].ptr, mr[src_idx].step/sizeof(float), mr[dst_idx].ptr, mr[dst_idx].step/sizeof(float));
//cudaSafeCall( cudaThreadSynchronize() );
}
else
{
beliefpropagation_gpu::level_up_kernel<<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, mu[src_idx].ptr, mu[src_idx].step/sizeof(float), mu[dst_idx].ptr, mu[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, md[src_idx].ptr, md[src_idx].step/sizeof(float), md[dst_idx].ptr, md[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, ml[src_idx].ptr, ml[src_idx].step/sizeof(float), ml[dst_idx].ptr, ml[dst_idx].step/sizeof(float));
beliefpropagation_gpu::level_up_kernel<<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, mr[src_idx].ptr, mr[src_idx].step/sizeof(float), mr[dst_idx].ptr, mr[dst_idx].step/sizeof(float));
}
}
}}}
...
...
@@ -301,7 +324,7 @@ namespace beliefpropagation_gpu
}
namespace cv { namespace gpu { namespace impl {
extern "C" void call_all_iterations(int cols, int rows, int iters, DevMem2D_<float>& u, DevMem2D_<float>& d, DevMem2D_<float>& l, DevMem2D_<float>& r, const DevMem2D_<float>& data)
extern "C" void call_all_iterations(int cols, int rows, int iters, DevMem2D_<float>& u, DevMem2D_<float>& d, DevMem2D_<float>& l, DevMem2D_<float>& r, const DevMem2D_<float>& data
, const cudaStream_t& stream
)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
...
...
@@ -309,10 +332,17 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(cols, threads.x << 1);
grid.y = divUp(rows, threads.y);
for(int t = 0; t < iters; ++t)
beliefpropagation_gpu::one_iteration<<<grid, threads>>>(t, u.ptr, d.ptr, l.ptr, r.ptr, u.step/sizeof(float), data.ptr, data.step/sizeof(float), cols, rows);
cudaSafeCall( cudaThreadSynchronize() );
if (stream == 0)
{
for(int t = 0; t < iters; ++t)
beliefpropagation_gpu::one_iteration<<<grid, threads>>>(t, u.ptr, d.ptr, l.ptr, r.ptr, u.step/sizeof(float), data.ptr, data.step/sizeof(float), cols, rows);
//cudaSafeCall( cudaThreadSynchronize() );
}
else
{
for(int t = 0; t < iters; ++t)
beliefpropagation_gpu::one_iteration<<<grid, threads, 0, stream>>>(t, u.ptr, d.ptr, l.ptr, r.ptr, u.step/sizeof(float), data.ptr, data.step/sizeof(float), cols, rows);
}
}
}}}
...
...
@@ -358,7 +388,7 @@ namespace beliefpropagation_gpu
}
namespace cv { namespace gpu { namespace impl {
extern "C" void output_caller(const DevMem2D_<float>& u, const DevMem2D_<float>& d, const DevMem2D_<float>& l, const DevMem2D_<float>& r, const DevMem2D_<float>& data, DevMem2D disp)
extern "C" void output_caller(const DevMem2D_<float>& u, const DevMem2D_<float>& d, const DevMem2D_<float>& l, const DevMem2D_<float>& r, const DevMem2D_<float>& data, DevMem2D disp
, const cudaStream_t& stream
)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
...
...
@@ -366,7 +396,14 @@ namespace cv { namespace gpu { namespace impl {
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
beliefpropagation_gpu::output<<<grid, threads>>>(disp.cols, disp.rows, u.ptr, d.ptr, l.ptr, r.ptr, data.ptr, u.step/sizeof(float), disp.ptr, disp.step);
cudaSafeCall( cudaThreadSynchronize() );
if (stream == 0)
{
beliefpropagation_gpu::output<<<grid, threads>>>(disp.cols, disp.rows, u.ptr, d.ptr, l.ptr, r.ptr, data.ptr, u.step/sizeof(float), disp.ptr, disp.step);
cudaSafeCall( cudaThreadSynchronize() );
}
else
{
beliefpropagation_gpu::output<<<grid, threads, 0, stream>>>(disp.cols, disp.rows, u.ptr, d.ptr, l.ptr, r.ptr, data.ptr, u.step/sizeof(float), disp.ptr, disp.step);
}
}
}}}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment