Commit 35ebeb21 authored by Andrey Morozov's avatar Andrey Morozov

added implement MatPL with serocopy and writecombited

parent 024283ce
...@@ -233,17 +233,18 @@ namespace cv ...@@ -233,17 +233,18 @@ namespace cv
{ {
public: public:
//Not supported. Now behaviour is like ALLOC_DEFAULT. //Supported. Now behaviour is like ALLOC_DEFAULT.
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 } enum { ALLOC_PAGE_LOCKED = 0, ALLOC_ZEROCOPY = 1, ALLOC_WRITE_COMBINED = 4 };
MatPL(); MatPL();
MatPL(const MatPL& m); MatPL(const MatPL& m);
MatPL(int _rows, int _cols, int _type); MatPL(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
MatPL(Size _size, int _type); MatPL(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
//! creates from cv::Mat with coping data //! creates from cv::Mat with coping data
explicit MatPL(const Mat& m); explicit MatPL(const Mat& m, int type_alloc = ALLOC_PAGE_LOCKED);
~MatPL(); ~MatPL();
...@@ -253,8 +254,8 @@ namespace cv ...@@ -253,8 +254,8 @@ namespace cv
MatPL clone() const; MatPL clone() const;
//! allocates new matrix data unless the matrix already has specified size and type. //! allocates new matrix data unless the matrix already has specified size and type.
void create(int _rows, int _cols, int _type); void create(int _rows, int _cols, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
void create(Size _size, int _type); void create(Size _size, int _type, int type_alloc = ALLOC_PAGE_LOCKED);
//! decrements reference counter and released memory if needed. //! decrements reference counter and released memory if needed.
void release(); void release();
...@@ -263,6 +264,11 @@ namespace cv ...@@ -263,6 +264,11 @@ namespace cv
Mat createMatHeader() const; Mat createMatHeader() const;
operator Mat() const; operator Mat() const;
operator GpuMat() const;
static bool can_device_map_to_host();
// Please see cv::Mat for descriptions // Please see cv::Mat for descriptions
bool isContinuous() const; bool isContinuous() const;
size_t elemSize() const; size_t elemSize() const;
...@@ -274,16 +280,20 @@ namespace cv ...@@ -274,16 +280,20 @@ namespace cv
Size size() const; Size size() const;
bool empty() const; bool empty() const;
// Please see cv::Mat for descriptions // Please see cv::Mat for descriptions
int flags; int flags;
int rows, cols; int rows, cols;
size_t step; size_t step;
int alloc_type;
uchar* data; uchar* data;
int* refcount; int* refcount;
uchar* datastart; uchar* datastart;
uchar* dataend; uchar* dataend;
}; };
//////////////////////////////// CudaStream //////////////////////////////// //////////////////////////////// CudaStream ////////////////////////////////
......
...@@ -343,29 +343,28 @@ static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); } ...@@ -343,29 +343,28 @@ static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
inline MatPL::MatPL() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} inline MatPL::MatPL() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}
inline MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) inline MatPL::MatPL(int _rows, int _cols, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if( _rows > 0 && _cols > 0 ) if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type ); create( _rows, _cols, _type , type_alloc);
} }
inline MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) inline MatPL::MatPL(Size _size, int _type, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if( _size.height > 0 && _size.width > 0 ) if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type ); create( _size.height, _size.width, _type, type_alloc );
} }
inline MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0) inline MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0)
{ {
if( refcount ) if( refcount )
CV_XADD(refcount, 1); CV_XADD(refcount, 1);
} }
inline MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) inline MatPL::MatPL(const Mat& m, int type_alloc) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
{ {
if( m.rows > 0 && m.cols > 0 ) if( m.rows > 0 && m.cols > 0 )
create( m.size(), m.type() ); create( m.size(), m.type() , type_alloc);
Mat tmp = createMatHeader(); Mat tmp = createMatHeader();
m.copyTo(tmp); m.copyTo(tmp);
...@@ -375,6 +374,7 @@ inline MatPL::~MatPL() ...@@ -375,6 +374,7 @@ inline MatPL::~MatPL()
{ {
release(); release();
} }
inline MatPL& MatPL::operator = (const MatPL& m) inline MatPL& MatPL::operator = (const MatPL& m)
{ {
if( this != &m ) if( this != &m )
...@@ -388,6 +388,7 @@ inline MatPL& MatPL::operator = (const MatPL& m) ...@@ -388,6 +388,7 @@ inline MatPL& MatPL::operator = (const MatPL& m)
datastart = m.datastart; datastart = m.datastart;
dataend = m.dataend; dataend = m.dataend;
refcount = m.refcount; refcount = m.refcount;
alloc_type = m.alloc_type;
} }
return *this; return *this;
} }
...@@ -401,7 +402,7 @@ inline MatPL MatPL::clone() const ...@@ -401,7 +402,7 @@ inline MatPL MatPL::clone() const
return m; return m;
} }
inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); } inline void MatPL::create(Size _size, int _type, int type_alloc) { create(_size.height, _size.width, _type, type_alloc); }
//CCP void MatPL::create(int _rows, int _cols, int _type); //CCP void MatPL::create(int _rows, int _cols, int _type);
//CPP void MatPL::release(); //CPP void MatPL::release();
......
...@@ -67,7 +67,8 @@ namespace cv ...@@ -67,7 +67,8 @@ namespace cv
void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }
void GpuMat::release() { throw_nogpu(); } void GpuMat::release() { throw_nogpu(); }
void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); }
void MatPL::get_property_device() { throw_nogpu(); }
void MatPL::release() { throw_nogpu(); } void MatPL::release() { throw_nogpu(); }
} }
...@@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const ...@@ -209,6 +210,15 @@ GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
return hdr; return hdr;
} }
bool cv::gpu::MatPL::can_device_map_to_host()
{
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
return (prop.canMapHostMemory != 0) ? true : false;
}
void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
{ {
_type &= TYPE_MASK; _type &= TYPE_MASK;
...@@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release() ...@@ -259,8 +269,9 @@ void cv::gpu::GpuMat::release()
//////////////////////////////// MatPL //////////////////////////////// //////////////////////////////// MatPL ////////////////////////////////
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
void cv::gpu::MatPL::create(int _rows, int _cols, int _type) void cv::gpu::MatPL::create(int _rows, int _cols, int _type, int type_alloc)
{ {
alloc_type = type_alloc;
_type &= TYPE_MASK; _type &= TYPE_MASK;
if( rows == _rows && cols == _cols && type() == _type && data ) if( rows == _rows && cols == _cols && type() == _type && data )
return; return;
...@@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type) ...@@ -281,7 +292,24 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount)); //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
void *ptr; void *ptr;
cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );
switch (type_alloc)
{
case ALLOC_PAGE_LOCKED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); break;
case ALLOC_ZEROCOPY:
if (can_device_map_to_host() == true)
{
cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocMapped) );
}
else
cv::gpu::error("ZeroCopy is not supported by current device", __FILE__, __LINE__);
break;
case ALLOC_WRITE_COMBINED: cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocWriteCombined) ); break;
default:
cv::gpu::error("Invalid alloc type", __FILE__, __LINE__);
}
datastart = data = (uchar*)ptr; datastart = data = (uchar*)ptr;
dataend = data + nettosize; dataend = data + nettosize;
...@@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type) ...@@ -291,6 +319,19 @@ void cv::gpu::MatPL::create(int _rows, int _cols, int _type)
} }
} }
inline MatPL::operator GpuMat() const
{
if (alloc_type == ALLOC_ZEROCOPY)
{
void ** pdev;
cudaHostGetDevicePointer( pdev, this->data, 0 );
GpuMat m(this->rows, this->cols, this->type(), *pdev, this->step);
return m;
}
else
cv::gpu::error("", __FILE__, __LINE__);
}
void cv::gpu::MatPL::release() void cv::gpu::MatPL::release()
{ {
if( refcount && CV_XADD(refcount, -1) == 1 ) if( refcount && CV_XADD(refcount, -1) == 1 )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment