Commit d1fc3e6b authored by Anatoly Baksheev's avatar Anatoly Baksheev

cv::gpu::CudaStream -> cv::gpu::Stream

some refactoring
added gpu module to compilation
parent c5608591
...@@ -43,7 +43,7 @@ SET(OpenCV_LIB_DIR "@CMAKE_LIB_DIRS_CONFIGCMAKE@") ...@@ -43,7 +43,7 @@ SET(OpenCV_LIB_DIR "@CMAKE_LIB_DIRS_CONFIGCMAKE@")
# ==================================================================== # ====================================================================
# Link libraries: e.g. opencv_core220.so, opencv_imgproc220d.lib, etc... # Link libraries: e.g. opencv_core220.so, opencv_imgproc220d.lib, etc...
# ==================================================================== # ====================================================================
set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib) set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_gpu opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib)
SET(OpenCV_LIBS "") SET(OpenCV_LIBS "")
foreach(__CVLIB ${OPENCV_LIB_COMPONENTS}) foreach(__CVLIB ${OPENCV_LIB_COMPONENTS})
# CMake>=2.6 supports the notation "debug XXd optimized XX" # CMake>=2.6 supports the notation "debug XXd optimized XX"
......
...@@ -24,4 +24,4 @@ add_subdirectory(haartraining) ...@@ -24,4 +24,4 @@ add_subdirectory(haartraining)
add_subdirectory(traincascade) add_subdirectory(traincascade)
#add_subdirectory(gpu) add_subdirectory(gpu)
...@@ -67,7 +67,7 @@ namespace cv ...@@ -67,7 +67,7 @@ namespace cv
CV_EXPORTS void getGpuMemInfo(size_t *free, size_t* total); CV_EXPORTS void getGpuMemInfo(size_t *free, size_t* total);
//////////////////////////////// GpuMat //////////////////////////////// //////////////////////////////// GpuMat ////////////////////////////////
class CudaStream; class Stream;
class MatPL; class MatPL;
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
...@@ -111,12 +111,12 @@ namespace cv ...@@ -111,12 +111,12 @@ namespace cv
//! pefroms blocking upload data to GpuMat. . //! pefroms blocking upload data to GpuMat. .
void upload(const cv::Mat& m); void upload(const cv::Mat& m);
void upload(const MatPL& m, CudaStream& stream); void upload(const MatPL& m, Stream& stream);
//! Downloads data from device to host memory. Blocking calls. //! Downloads data from device to host memory. Blocking calls.
operator Mat() const; operator Mat() const;
void download(cv::Mat& m) const; void download(cv::Mat& m) const;
void download(MatPL& m, CudaStream& stream) const; void download(MatPL& m, Stream& stream) const;
//! returns a new GpuMatrix header for the specified row //! returns a new GpuMatrix header for the specified row
GpuMat row(int y) const; GpuMat row(int y) const;
...@@ -291,14 +291,14 @@ namespace cv ...@@ -291,14 +291,14 @@ namespace cv
// Passed to each function that supports async kernel execution. // Passed to each function that supports async kernel execution.
// Reference counting is enabled // Reference counting is enabled
class CV_EXPORTS CudaStream class CV_EXPORTS Stream
{ {
public: public:
CudaStream(); Stream();
~CudaStream(); ~Stream();
CudaStream(const CudaStream&); Stream(const Stream&);
CudaStream& operator=(const CudaStream&); Stream& operator=(const Stream&);
bool queryIfComplete(); bool queryIfComplete();
void waitForCompletion(); void waitForCompletion();
...@@ -355,7 +355,7 @@ namespace cv ...@@ -355,7 +355,7 @@ namespace cv
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity); void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);
//! Acync version //! Acync version
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream & stream); void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream & stream);
//! Some heuristics that tries to estmate //! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm. // if current GPU will be faster then CPU in this algorithm.
...@@ -390,18 +390,18 @@ namespace cv ...@@ -390,18 +390,18 @@ namespace cv
enum { DEFAULT_LEVELS = 5 }; enum { DEFAULT_LEVELS = 5 };
//! the default constructor //! the default constructor
explicit StereoBeliefPropagation_GPU(int ndisp_ = DEFAULT_NDISP, explicit StereoBeliefPropagation_GPU(int ndisp = DEFAULT_NDISP,
int iters_ = DEFAULT_ITERS, int iters = DEFAULT_ITERS,
int levels_ = DEFAULT_LEVELS, int levels = DEFAULT_LEVELS,
int msg_type_ = MSG_TYPE_AUTO, int msg_type = MSG_TYPE_AUTO,
float msg_scale = 1.0f); float msg_scale = 1.0f);
//! the full constructor taking the number of disparities, number of BP iterations on each level, //! the full constructor taking the number of disparities, number of BP iterations on each level,
//! number of levels, truncation of data cost, data weight, //! number of levels, truncation of data cost, data weight,
//! truncation of discontinuity cost and discontinuity single jump //! truncation of discontinuity cost and discontinuity single jump
StereoBeliefPropagation_GPU(int ndisp_, int iters_, int levels_, StereoBeliefPropagation_GPU(int ndisp, int iters, int levels,
float max_data_term_, float data_weight_, float max_data_term, float data_weight,
float max_disc_term_, float disc_single_jump_, float max_disc_term, float disc_single_jump,
int msg_type_ = MSG_TYPE_AUTO, int msg_type = MSG_TYPE_AUTO,
float msg_scale = 1.0f); float msg_scale = 1.0f);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair, //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
...@@ -409,7 +409,7 @@ namespace cv ...@@ -409,7 +409,7 @@ namespace cv
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity); void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);
//! Acync version //! Acync version
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream); void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream);
//! Some heuristics that tries to estmate //! Some heuristics that tries to estmate
//! if current GPU will be faster then CPU in this algorithm. //! if current GPU will be faster then CPU in this algorithm.
......
...@@ -56,7 +56,7 @@ namespace cv ...@@ -56,7 +56,7 @@ namespace cv
// In this case you have to install Cuda Toolkit. // In this case you have to install Cuda Toolkit.
struct StreamAccessor struct StreamAccessor
{ {
CV_EXPORTS static cudaStream_t getStream(const CudaStream& stream); CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
}; };
} }
} }
......
...@@ -52,7 +52,7 @@ cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, ...@@ -52,7 +52,7 @@ cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int,
cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, float, float, float, float, int, float) { throw_nogpu(); } cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, float, float, float, float, int, float) { throw_nogpu(); }
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); } void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
bool cv::gpu::StereoBeliefPropagation_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; } bool cv::gpu::StereoBeliefPropagation_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }
...@@ -282,7 +282,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const ...@@ -282,7 +282,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const
::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, 0); ::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, 0);
} }
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const CudaStream& stream) void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const Stream& stream)
{ {
::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, StreamAccessor::getStream(stream)); ::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, StreamAccessor::getStream(stream));
} }
......
...@@ -44,9 +44,11 @@ ...@@ -44,9 +44,11 @@
using namespace cv::gpu; using namespace cv::gpu;
/////////////////////////////////// Remap ///////////////////////////////////////////////
namespace imgproc namespace imgproc
{ {
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex1; texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
__global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height) __global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height)
{ {
...@@ -58,12 +60,40 @@ namespace imgproc ...@@ -58,12 +60,40 @@ namespace imgproc
float xcoo = mapx[idx]; float xcoo = mapx[idx];
float ycoo = mapy[idx]; float ycoo = mapy[idx];
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo)); out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo));
} }
} }
texture< uchar4, 2, cudaReadModeElementType > tex_meanshift; }
namespace cv { namespace gpu { namespace impl
{
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst)
{
dim3 block(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(dst.cols, block.x);
grid.y = divUp(dst.rows, block.y);
imgproc::tex_remap.filterMode = cudaFilterModeLinear;
imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, dst.cols, dst.rows, src.step) );
imgproc::kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );
}
}}}
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
namespace imgproc
{
texture<uchar4, 2> tex_meanshift;
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps ) extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
{ {
...@@ -72,9 +102,8 @@ namespace imgproc ...@@ -72,9 +102,8 @@ namespace imgproc
if( x0 < cols && y0 < rows ) if( x0 < cols && y0 < rows )
{ {
int isr2 = sr*sr; int isr2 = sr*sr;
uchar4 c = tex2D( tex_meanshift, x0, y0 ); uchar4 c = tex2D(tex_meanshift, x0, y0 );
// iterate meanshift procedure // iterate meanshift procedure
for( int iter = 0; iter < maxIter; iter++ ) for( int iter = 0; iter < maxIter; iter++ )
{ {
...@@ -137,26 +166,6 @@ namespace imgproc ...@@ -137,26 +166,6 @@ namespace imgproc
namespace cv { namespace gpu { namespace impl namespace cv { namespace gpu { namespace impl
{ {
using namespace imgproc;
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst)
{
dim3 block(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(dst.cols, block.x);
grid.y = divUp(dst.rows, block.y);
tex1.filterMode = cudaFilterModeLinear;
tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) );
kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(tex1) );
}
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps) extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps)
{ {
dim3 grid(1, 1, 1); dim3 grid(1, 1, 1);
...@@ -165,11 +174,11 @@ namespace cv { namespace gpu { namespace impl ...@@ -165,11 +174,11 @@ namespace cv { namespace gpu { namespace impl
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) ); cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps ); imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
} }
}}} }}}
......
...@@ -48,28 +48,28 @@ using namespace cv::gpu; ...@@ -48,28 +48,28 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
void cv::gpu::CudaStream::create() { throw_nogpu(); } void cv::gpu::Stream::create() { throw_nogpu(); }
void cv::gpu::CudaStream::release() { throw_nogpu(); } void cv::gpu::Stream::release() { throw_nogpu(); }
cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); } cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }
cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); } cv::gpu::Stream::~Stream() { throw_nogpu(); }
cv::gpu::CudaStream::CudaStream(const CudaStream& /*stream*/) { throw_nogpu(); } cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }
CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& /*stream*/) { throw_nogpu(); return *this; } Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }
bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; } bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); } void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
void cv::gpu::CudaStream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); } void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
#include "opencv2/gpu/stream_accessor.hpp" #include "opencv2/gpu/stream_accessor.hpp"
struct CudaStream::Impl struct Stream::Impl
{ {
cudaStream_t stream; cudaStream_t stream;
int ref_counter; int ref_counter;
...@@ -85,9 +85,9 @@ namespace ...@@ -85,9 +85,9 @@ namespace
}; };
} }
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; }; CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl->stream; };
void cv::gpu::CudaStream::create() void cv::gpu::Stream::create()
{ {
if (impl) if (impl)
release(); release();
...@@ -95,13 +95,13 @@ void cv::gpu::CudaStream::create() ...@@ -95,13 +95,13 @@ void cv::gpu::CudaStream::create()
cudaStream_t stream; cudaStream_t stream;
cudaSafeCall( cudaStreamCreate( &stream ) ); cudaSafeCall( cudaStreamCreate( &stream ) );
impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl)); impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));
impl->stream = stream; impl->stream = stream;
impl->ref_counter = 1; impl->ref_counter = 1;
} }
void cv::gpu::CudaStream::release() void cv::gpu::Stream::release()
{ {
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 ) if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )
{ {
...@@ -110,15 +110,15 @@ void cv::gpu::CudaStream::release() ...@@ -110,15 +110,15 @@ void cv::gpu::CudaStream::release()
} }
} }
cv::gpu::CudaStream::CudaStream() : impl(0) { create(); } cv::gpu::Stream::Stream() : impl(0) { create(); }
cv::gpu::CudaStream::~CudaStream() { release(); } cv::gpu::Stream::~Stream() { release(); }
cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl) cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
{ {
if( impl ) if( impl )
CV_XADD(&impl->ref_counter, 1); CV_XADD(&impl->ref_counter, 1);
} }
CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) Stream& cv::gpu::Stream::operator=(const Stream& stream)
{ {
if( this != &stream ) if( this != &stream )
{ {
...@@ -131,7 +131,7 @@ CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) ...@@ -131,7 +131,7 @@ CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream)
return *this; return *this;
} }
bool cv::gpu::CudaStream::queryIfComplete() bool cv::gpu::Stream::queryIfComplete()
{ {
cudaError_t err = cudaStreamQuery( impl->stream ); cudaError_t err = cudaStreamQuery( impl->stream );
...@@ -142,31 +142,31 @@ bool cv::gpu::CudaStream::queryIfComplete() ...@@ -142,31 +142,31 @@ bool cv::gpu::CudaStream::queryIfComplete()
return false; return false;
} }
void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); } void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
{ {
// if not -> allocation will be done, but after that dst will not point to page locked memory // if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ) CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);
} }
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } void cv::gpu::Stream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } void cv::gpu::Stream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); } void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val)
{ {
impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream); impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream);
} }
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)
{ {
impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream); impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream);
} }
void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
{ {
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
......
...@@ -82,7 +82,7 @@ void cv::gpu::GpuMat::upload(const Mat& m) ...@@ -82,7 +82,7 @@ void cv::gpu::GpuMat::upload(const Mat& m)
cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
} }
void cv::gpu::GpuMat::upload(const MatPL& m, CudaStream& stream) void cv::gpu::GpuMat::upload(const MatPL& m, Stream& stream)
{ {
CV_DbgAssert(!m.empty()); CV_DbgAssert(!m.empty());
stream.enqueueUpload(m, *this); stream.enqueueUpload(m, *this);
...@@ -95,7 +95,7 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const ...@@ -95,7 +95,7 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
} }
void cv::gpu::GpuMat::download(MatPL& m, CudaStream& stream) const void cv::gpu::GpuMat::download(MatPL& m, Stream& stream) const
{ {
CV_DbgAssert(!m.empty()); CV_DbgAssert(!m.empty());
stream.enqueueDownload(*this, m); stream.enqueueDownload(*this, m);
......
...@@ -52,7 +52,7 @@ cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_nogpu(); } ...@@ -52,7 +52,7 @@ cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_nogpu(); }
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; } bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); } void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
...@@ -134,7 +134,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right ...@@ -134,7 +134,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0); ::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
} }
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream)
{ {
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream)); ::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
} }
......
...@@ -58,7 +58,7 @@ bool CV_GpuMatASyncCall::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat) ...@@ -58,7 +58,7 @@ bool CV_GpuMatASyncCall::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
//int64 time = getTickCount(); //int64 time = getTickCount();
CudaStream stream; Stream stream;
stream.enqueueCopy(gmat0, gmat1); stream.enqueueCopy(gmat0, gmat1);
stream.enqueueCopy(gmat0, gmat2); stream.enqueueCopy(gmat0, gmat2);
stream.enqueueCopy(gmat0, gmat3); stream.enqueueCopy(gmat0, gmat3);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment