Commit 769564c1 authored by Andrey Morozov's avatar Andrey Morozov

implemented asynchronous call for gpumat::setTo(), gpumat::copyTo(), gpumat::converTo()

parent 1ead3a5b
...@@ -49,24 +49,24 @@ ...@@ -49,24 +49,24 @@
namespace cv namespace cv
{ {
namespace gpu namespace gpu
{ {
//////////////////////////////// Initialization //////////////////////// //////////////////////////////// Initialization ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda. //! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS int getCudaEnabledDeviceCount(); CV_EXPORTS int getCudaEnabledDeviceCount();
//! Functions below throw cv::Expception if the library is compiled without Cuda. //! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS string getDeviceName(int device); CV_EXPORTS string getDeviceName(int device);
CV_EXPORTS void setDevice(int device); CV_EXPORTS void setDevice(int device);
CV_EXPORTS int getDevice(); CV_EXPORTS int getDevice();
CV_EXPORTS void getComputeCapability(int device, int* major, int* minor); CV_EXPORTS void getComputeCapability(int device, int* major, int* minor);
CV_EXPORTS int getNumberOfSMs(int device); CV_EXPORTS int getNumberOfSMs(int device);
//////////////////////////////// GpuMat //////////////////////////////// //////////////////////////////// GpuMat ////////////////////////////////
class CudaStrem; class CudaStream;
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class CV_EXPORTS GpuMat class CV_EXPORTS GpuMat
{ {
public: public:
...@@ -81,7 +81,7 @@ namespace cv ...@@ -81,7 +81,7 @@ namespace cv
GpuMat(Size _size, int _type, const Scalar& _s); GpuMat(Size _size, int _type, const Scalar& _s);
//! copy constructor //! copy constructor
GpuMat(const GpuMat& m); GpuMat(const GpuMat& m);
//! constructor for GpuMatrix headers pointing to user-allocated data //! constructor for GpuMatrix headers pointing to user-allocated data
GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step = Mat::AUTO_STEP); GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step = Mat::AUTO_STEP);
GpuMat(Size _size, int _type, void* _data, size_t _step = Mat::AUTO_STEP); GpuMat(Size _size, int _type, void* _data, size_t _step = Mat::AUTO_STEP);
...@@ -89,7 +89,7 @@ namespace cv ...@@ -89,7 +89,7 @@ namespace cv
//! creates a matrix header for a part of the bigger matrix //! creates a matrix header for a part of the bigger matrix
GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange); GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange);
GpuMat(const GpuMat& m, const Rect& roi); GpuMat(const GpuMat& m, const Rect& roi);
//! builds GpuMat from Mat. Perfom blocking upload to device. //! builds GpuMat from Mat. Perfom blocking upload to device.
explicit GpuMat (const Mat& m); explicit GpuMat (const Mat& m);
...@@ -99,7 +99,7 @@ namespace cv ...@@ -99,7 +99,7 @@ namespace cv
//! assignment operators //! assignment operators
GpuMat& operator = (const GpuMat& m); GpuMat& operator = (const GpuMat& m);
//! assignment operator. Perfom blocking upload to device. //! assignment operator. Perfom blocking upload to device.
GpuMat& operator = (const Mat& m); GpuMat& operator = (const Mat& m);
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code. //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
// Contains just image size, data ptr and step. // Contains just image size, data ptr and step.
...@@ -110,7 +110,7 @@ namespace cv ...@@ -110,7 +110,7 @@ namespace cv
//! Downloads data from device to host memory. Blocking calls. //! Downloads data from device to host memory. Blocking calls.
operator Mat() const; operator Mat() const;
void download(cv::Mat& m) const; void download(cv::Mat& m) const;
//! returns a new GpuMatrix header for the specified row //! returns a new GpuMatrix header for the specified row
GpuMat row(int y) const; GpuMat row(int y) const;
...@@ -161,7 +161,7 @@ namespace cv ...@@ -161,7 +161,7 @@ namespace cv
//! extracts a rectangular sub-GpuMatrix //! extracts a rectangular sub-GpuMatrix
// (this is a generalized form of row, rowRange etc.) // (this is a generalized form of row, rowRange etc.)
GpuMat operator()( Range rowRange, Range colRange ) const; GpuMat operator()( Range rowRange, Range colRange ) const;
GpuMat operator()( const Rect& roi ) const; GpuMat operator()( const Rect& roi ) const;
//! returns true iff the GpuMatrix data is continuous //! returns true iff the GpuMatrix data is continuous
// (i.e. when there are no gaps between successive rows). // (i.e. when there are no gaps between successive rows).
...@@ -222,33 +222,33 @@ namespace cv ...@@ -222,33 +222,33 @@ namespace cv
// Page locked memory is only needed for async and faster coping to GPU. // Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting // It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions. // so you can use it with other opencv functions.
class CV_EXPORTS MatPL class CV_EXPORTS MatPL
{ {
public: public:
//Not supported. Now behaviour is like ALLOC_DEFAULT. //Not supported. Now behaviour is like ALLOC_DEFAULT.
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 } //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
MatPL(); MatPL();
MatPL(const MatPL& m); MatPL(const MatPL& m);
MatPL(int _rows, int _cols, int _type); MatPL(int _rows, int _cols, int _type);
MatPL(Size _size, int _type); MatPL(Size _size, int _type);
//! creates from cv::Mat with coping data //! creates from cv::Mat with coping data
explicit MatPL(const Mat& m); explicit MatPL(const Mat& m);
~MatPL(); ~MatPL();
MatPL& operator = (const MatPL& m); MatPL& operator = (const MatPL& m);
//! returns deep copy of the matrix, i.e. the data is copied //! returns deep copy of the matrix, i.e. the data is copied
MatPL clone() const; MatPL clone() const;
//! allocates new matrix data unless the matrix already has specified size and type. //! allocates new matrix data unless the matrix already has specified size and type.
void create(int _rows, int _cols, int _type); void create(int _rows, int _cols, int _type);
void create(Size _size, int _type); void create(Size _size, int _type);
//! decrements reference counter and released memory if needed. //! decrements reference counter and released memory if needed.
void release(); void release();
...@@ -256,25 +256,25 @@ namespace cv ...@@ -256,25 +256,25 @@ namespace cv
//! returns matrix header with disabled reference counting for MatPL data. //! returns matrix header with disabled reference counting for MatPL data.
Mat createMatHeader() const; Mat createMatHeader() const;
operator Mat() const; operator Mat() const;
// Please see cv::Mat for descriptions // Please see cv::Mat for descriptions
bool isContinuous() const; bool isContinuous() const;
size_t elemSize() const; size_t elemSize() const;
size_t elemSize1() const; size_t elemSize1() const;
int type() const; int type() const;
int depth() const; int depth() const;
int channels() const; int channels() const;
size_t step1() const; size_t step1() const;
Size size() const; Size size() const;
bool empty() const; bool empty() const;
// Please see cv::Mat for descriptions // Please see cv::Mat for descriptions
int flags; int flags;
int rows, cols; int rows, cols;
size_t step; size_t step;
uchar* data; uchar* data;
int* refcount; int* refcount;
uchar* datastart; uchar* datastart;
uchar* dataend; uchar* dataend;
...@@ -288,37 +288,37 @@ namespace cv ...@@ -288,37 +288,37 @@ namespace cv
class CV_EXPORTS CudaStream class CV_EXPORTS CudaStream
{ {
public: public:
CudaStream(); CudaStream();
~CudaStream(); ~CudaStream();
CudaStream(const CudaStream&); CudaStream(const CudaStream&);
CudaStream& operator=(const CudaStream&); CudaStream& operator=(const CudaStream&);
bool queryIfComplete(); bool queryIfComplete();
void waitForCompletion(); void waitForCompletion();
//! downloads asynchronously. //! downloads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat) // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)
void enqueueDownload(const GpuMat& src, MatPL& dst); void enqueueDownload(const GpuMat& src, MatPL& dst);
void enqueueDownload(const GpuMat& src, Mat& dst); void enqueueDownload(const GpuMat& src, Mat& dst);
//! uploads asynchronously. //! uploads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI) // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)
void enqueueUpload(const MatPL& src, GpuMat& dst); void enqueueUpload(const MatPL& src, GpuMat& dst);
void enqueueUpload(const Mat& src, GpuMat& dst); void enqueueUpload(const Mat& src, GpuMat& dst);
void enqueueCopy(const GpuMat& src, GpuMat& dst); void enqueueCopy(const GpuMat& src, GpuMat& dst);
void enqueueMemSet(const GpuMat& src, Scalar val); void enqueueMemSet(const GpuMat& src, Scalar val);
void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask);
// converts matrix type, ex from float to uchar depending on type // converts matrix type, ex from float to uchar depending on type
void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0); void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0);
private: private:
void create(); void create();
void release(); void release();
struct Impl; struct Impl;
Impl *impl; Impl *impl;
friend struct StreamAccessor; friend struct StreamAccessor;
}; };
...@@ -348,7 +348,7 @@ namespace cv ...@@ -348,7 +348,7 @@ namespace cv
//! Acync version //! Acync version
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream); void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream);
//! Some heuristics that tries to estmate //! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm. // if current GPU will be faster then CPU in this algorithm.
// It queries current active device. // It queries current active device.
static bool checkIfGpuCallReasonable(); static bool checkIfGpuCallReasonable();
...@@ -356,11 +356,11 @@ namespace cv ...@@ -356,11 +356,11 @@ namespace cv
int ndisp; int ndisp;
int winSize; int winSize;
int preset; int preset;
// If avergeTexThreshold == 0 => post procesing is disabled // If avergeTexThreshold == 0 => post procesing is disabled
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
// i.e. input left image is low textured. // i.e. input left image is low textured.
float avergeTexThreshold; float avergeTexThreshold;
private: private:
GpuMat minSSD, leBuf, riBuf; GpuMat minSSD, leBuf, riBuf;
...@@ -369,4 +369,4 @@ namespace cv ...@@ -369,4 +369,4 @@ namespace cv
} }
#include "opencv2/gpu/matrix_operations.hpp" #include "opencv2/gpu/matrix_operations.hpp"
#endif /* __OPENCV_GPU_HPP__ */ #endif /* __OPENCV_GPU_HPP__ */
\ No newline at end of file
...@@ -61,12 +61,12 @@ namespace cv ...@@ -61,12 +61,12 @@ namespace cv
{ {
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels); extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels); extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels, const cudaStream_t & stream = 0);
extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels); extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta); extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream = 0);
} }
} }
} }
......
This diff is collapsed.
...@@ -74,6 +74,7 @@ struct CudaStream::Impl ...@@ -74,6 +74,7 @@ struct CudaStream::Impl
cudaStream_t stream; cudaStream_t stream;
int ref_counter; int ref_counter;
}; };
namespace namespace
{ {
template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k) template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)
...@@ -147,7 +148,7 @@ void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) ...@@ -147,7 +148,7 @@ void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)
{ {
// if not -> allocation will be done, but after that dst will not point to page locked memory // if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ) CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);
} }
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment