Commit 12dc52c2 authored by Andrey Morozov's avatar Andrey Morozov

implemented asynchronous call for StereoBM()

parent dc0f3139
...@@ -349,7 +349,7 @@ namespace cv ...@@ -349,7 +349,7 @@ namespace cv
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity); void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);
//! Acync version //! Acync version
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream); void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream & stream);
//! Some heuristics that tries to estmate //! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm. // if current GPU will be faster then CPU in this algorithm.
......
...@@ -311,7 +311,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i ...@@ -311,7 +311,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
namespace cv { namespace gpu { namespace impl namespace cv { namespace gpu { namespace impl
{ {
template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp) template<int RADIUS> void kernel_caller(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream)
{ {
dim3 grid(1,1,1); dim3 grid(1,1,1);
dim3 threads(BLOCK_W, 1, 1); dim3 threads(BLOCK_W, 1, 1);
...@@ -322,11 +322,19 @@ namespace cv { namespace gpu { namespace impl ...@@ -322,11 +322,19 @@ namespace cv { namespace gpu { namespace impl
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS) //See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int); size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
if (stream == 0)
{
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp); stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
}
else
{
stereobm_gpu::stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.ptr, right.ptr, left.step, disp.ptr, disp.step, maxdisp);
}
}; };
typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp); typedef void (*kernel_caller_t)(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, const cudaStream_t & stream);
const static kernel_caller_t callers[] = const static kernel_caller_t callers[] =
{ {
...@@ -341,7 +349,7 @@ namespace cv { namespace gpu { namespace impl ...@@ -341,7 +349,7 @@ namespace cv { namespace gpu { namespace impl
}; };
const int calles_num = sizeof(callers)/sizeof(callers[0]); const int calles_num = sizeof(callers)/sizeof(callers[0]);
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf) extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, const cudaStream_t & stream)
{ {
int winsz2 = winsz >> 1; int winsz2 = winsz >> 1;
...@@ -361,7 +369,7 @@ namespace cv { namespace gpu { namespace impl ...@@ -361,7 +369,7 @@ namespace cv { namespace gpu { namespace impl
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize(); size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step, &minssd_step, sizeof(minssd_step) ) ); cudaSafeCall( cudaMemcpyToSymbol( stereobm_gpu::cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
callers[winsz2](left, right, disp, maxdisp); callers[winsz2](left, right, disp, maxdisp, stream);
} }
}}} }}}
......
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
#include <limits> #include <limits>
#include "opencv2/gpu/gpu.hpp" #include "opencv2/gpu/gpu.hpp"
#include "opencv2/gpu/stream_accessor.hpp"
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
......
...@@ -60,7 +60,8 @@ namespace cv { namespace gpu ...@@ -60,7 +60,8 @@ namespace cv { namespace gpu
{ {
namespace impl namespace impl
{ {
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf); //extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const DevMem2D& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, const cudaStream_t & stream);
extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31); extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output, int prefilterCap = 31);
extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avergeTexThreshold, const DevMem2D& disp); extern "C" void postfilter_textureness(const DevMem2D& input, int winsz, float avergeTexThreshold, const DevMem2D& disp);
} }
...@@ -97,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() ...@@ -97,7 +98,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
return false; return false;
} }
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) void stereo_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, const cudaStream_t & stream)
{ {
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
CV_DbgAssert(left.type() == CV_8UC1); CV_DbgAssert(left.type() == CV_8UC1);
...@@ -109,7 +110,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right ...@@ -109,7 +110,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
GpuMat le_for_bm = left; GpuMat le_for_bm = left;
GpuMat ri_for_bm = right; GpuMat ri_for_bm = right;
if (preset == PREFILTER_XSOBEL) if (preset == StereoBM_GPU::PREFILTER_XSOBEL)
{ {
leBuf.create( left.size(), left.type()); leBuf.create( left.size(), left.type());
riBuf.create(right.size(), right.type()); riBuf.create(right.size(), right.type());
...@@ -120,15 +121,22 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right ...@@ -120,15 +121,22 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
le_for_bm = leBuf; le_for_bm = leBuf;
ri_for_bm = riBuf; ri_for_bm = riBuf;
} }
impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD);
impl::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
if (avergeTexThreshold) if (avergeTexThreshold)
impl::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity); impl::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity);
} }
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)
{
::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
}
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)
{ {
CV_Assert(!"Not implemented"); ::stereo_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment