Commit 29f37fc1 authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1575 from jet47:gpu-buffer-pool

parents 21233656 342e007d
...@@ -61,16 +61,30 @@ namespace cv { namespace cuda { ...@@ -61,16 +61,30 @@ namespace cv { namespace cuda {
class CV_EXPORTS GpuMat class CV_EXPORTS GpuMat
{ {
public: public:
class CV_EXPORTS Allocator
{
public:
virtual ~Allocator() {}
// allocator must fill data, step and refcount fields
virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
virtual void free(GpuMat* mat) = 0;
};
//! default allocator
static Allocator* defaultAllocator();
static void setDefaultAllocator(Allocator* allocator);
//! default constructor //! default constructor
GpuMat(); explicit GpuMat(Allocator* allocator = defaultAllocator());
//! constructs GpuMat of the specified size and type //! constructs GpuMat of the specified size and type
GpuMat(int rows, int cols, int type); GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
GpuMat(Size size, int type); GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
//! constucts GpuMat and fills it with the specified value _s //! constucts GpuMat and fills it with the specified value _s
GpuMat(int rows, int cols, int type, Scalar s); GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
GpuMat(Size size, int type, Scalar s); GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
//! copy constructor //! copy constructor
GpuMat(const GpuMat& m); GpuMat(const GpuMat& m);
...@@ -84,7 +98,7 @@ public: ...@@ -84,7 +98,7 @@ public:
GpuMat(const GpuMat& m, Rect roi); GpuMat(const GpuMat& m, Rect roi);
//! builds GpuMat from host memory (Blocking call) //! builds GpuMat from host memory (Blocking call)
explicit GpuMat(InputArray arr); explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
//! destructor - calls release() //! destructor - calls release()
~GpuMat(); ~GpuMat();
...@@ -249,6 +263,9 @@ public: ...@@ -249,6 +263,9 @@ public:
//! helper fields used in locateROI and adjustROI //! helper fields used in locateROI and adjustROI
uchar* datastart; uchar* datastart;
uchar* dataend; uchar* dataend;
//! allocator
Allocator* allocator;
}; };
//! creates continuous matrix //! creates continuous matrix
...@@ -260,6 +277,10 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr ...@@ -260,6 +277,10 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr
CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat); CV_EXPORTS GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat);
//! BufferPool management (must be called before Stream creation)
CV_EXPORTS void setBufferPoolUsage(bool on);
CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
//////////////////////////////// CudaMem //////////////////////////////// //////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation. // CudaMem is limited cv::Mat with page locked memory allocation.
...@@ -382,6 +403,7 @@ private: ...@@ -382,6 +403,7 @@ private:
Stream(const Ptr<Impl>& impl); Stream(const Ptr<Impl>& impl);
friend struct StreamAccessor; friend struct StreamAccessor;
friend class BufferPool;
}; };
class CV_EXPORTS Event class CV_EXPORTS Event
......
...@@ -51,29 +51,29 @@ namespace cv { namespace cuda { ...@@ -51,29 +51,29 @@ namespace cv { namespace cuda {
//////////////////////////////// GpuMat /////////////////////////////// //////////////////////////////// GpuMat ///////////////////////////////
inline inline
GpuMat::GpuMat() GpuMat::GpuMat(Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{} {}
inline inline
GpuMat::GpuMat(int rows_, int cols_, int type_) GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{ {
if (rows_ > 0 && cols_ > 0) if (rows_ > 0 && cols_ > 0)
create(rows_, cols_, type_); create(rows_, cols_, type_);
} }
inline inline
GpuMat::GpuMat(Size size_, int type_) GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{ {
if (size_.height > 0 && size_.width > 0) if (size_.height > 0 && size_.width > 0)
create(size_.height, size_.width, type_); create(size_.height, size_.width, type_);
} }
inline inline
GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{ {
if (rows_ > 0 && cols_ > 0) if (rows_ > 0 && cols_ > 0)
{ {
...@@ -83,8 +83,8 @@ GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_) ...@@ -83,8 +83,8 @@ GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
} }
inline inline
GpuMat::GpuMat(Size size_, int type_, Scalar s_) GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{ {
if (size_.height > 0 && size_.width > 0) if (size_.height > 0 && size_.width > 0)
{ {
...@@ -95,15 +95,15 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_) ...@@ -95,15 +95,15 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_)
inline inline
GpuMat::GpuMat(const GpuMat& m) GpuMat::GpuMat(const GpuMat& m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
{ {
if (refcount) if (refcount)
CV_XADD(refcount, 1); CV_XADD(refcount, 1);
} }
inline inline
GpuMat::GpuMat(InputArray arr) : GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
{ {
upload(arr); upload(arr);
} }
......
...@@ -90,6 +90,38 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The ...@@ -90,6 +90,38 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The
namespace cv { namespace cuda namespace cv { namespace cuda
{ {
class MemoryStack;
class CV_EXPORTS StackAllocator : public GpuMat::Allocator
{
public:
explicit StackAllocator(cudaStream_t stream);
~StackAllocator();
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
void free(GpuMat* mat);
private:
StackAllocator(const StackAllocator&);
StackAllocator& operator =(const StackAllocator&);
cudaStream_t stream_;
MemoryStack* memStack_;
size_t alignment_;
};
class CV_EXPORTS BufferPool
{
public:
explicit BufferPool(Stream& stream);
GpuMat getBuffer(int rows, int cols, int type);
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
private:
GpuMat::Allocator* allocator_;
};
static inline void checkNppError(int code, const char* file, const int line, const char* func) static inline void checkNppError(int code, const char* file, const int line, const char* func)
{ {
if (code < 0) if (code < 0)
......
...@@ -55,6 +55,54 @@ using namespace cv; ...@@ -55,6 +55,54 @@ using namespace cv;
using namespace cv::cuda; using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace
{
class DefaultAllocator : public GpuMat::Allocator
{
public:
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
void free(GpuMat* mat);
};
bool DefaultAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize)
{
if (rows > 1 && cols > 1)
{
CV_CUDEV_SAFE_CALL( cudaMallocPitch(&mat->data, &mat->step, elemSize * cols, rows) );
}
else
{
// Single row or single column must be continuous
CV_CUDEV_SAFE_CALL( cudaMalloc(&mat->data, elemSize * cols * rows) );
mat->step = elemSize * cols;
}
mat->refcount = (int*) fastMalloc(sizeof(int));
return true;
}
void DefaultAllocator::free(GpuMat* mat)
{
cudaFree(mat->datastart);
fastFree(mat->refcount);
}
DefaultAllocator cudaDefaultAllocator;
GpuMat::Allocator* g_defaultAllocator = &cudaDefaultAllocator;
}
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
{
return g_defaultAllocator;
}
void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator)
{
CV_Assert( allocator != 0 );
g_defaultAllocator = allocator;
}
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
/// create /// create
...@@ -76,19 +124,16 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) ...@@ -76,19 +124,16 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
rows = _rows; rows = _rows;
cols = _cols; cols = _cols;
size_t esz = elemSize(); const size_t esz = elemSize();
void* devPtr; bool allocSuccess = allocator->allocate(this, rows, cols, esz);
if (rows > 1 && cols > 1) if (!allocSuccess)
{ {
CV_CUDEV_SAFE_CALL( cudaMallocPitch(&devPtr, &step, esz * cols, rows) ); // custom allocator fails, try default allocator
} allocator = defaultAllocator();
else allocSuccess = allocator->allocate(this, rows, cols, esz);
{ CV_Assert( allocSuccess );
// Single row or single column must be continuous
CV_CUDEV_SAFE_CALL( cudaMalloc(&devPtr, esz * cols * rows) );
step = esz * cols;
} }
if (esz * cols == step) if (esz * cols == step)
...@@ -97,11 +142,11 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) ...@@ -97,11 +142,11 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
int64 _nettosize = static_cast<int64>(step) * rows; int64 _nettosize = static_cast<int64>(step) * rows;
size_t nettosize = static_cast<size_t>(_nettosize); size_t nettosize = static_cast<size_t>(_nettosize);
datastart = data = static_cast<uchar*>(devPtr); datastart = data;
dataend = data + nettosize; dataend = data + nettosize;
refcount = static_cast<int*>(fastMalloc(sizeof(*refcount))); if (refcount)
*refcount = 1; *refcount = 1;
} }
} }
...@@ -110,11 +155,10 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) ...@@ -110,11 +155,10 @@ void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
void cv::cuda::GpuMat::release() void cv::cuda::GpuMat::release()
{ {
CV_DbgAssert( allocator != 0 );
if (refcount && CV_XADD(refcount, -1) == 1) if (refcount && CV_XADD(refcount, -1) == 1)
{ allocator->free(this);
cudaFree(datastart);
fastFree(refcount);
}
data = datastart = dataend = 0; data = datastart = dataend = 0;
step = rows = cols = 0; step = rows = cols = 0;
......
This diff is collapsed.
...@@ -49,7 +49,8 @@ using namespace cv::cuda; ...@@ -49,7 +49,8 @@ using namespace cv::cuda;
cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) : cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_), flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(rows_), cols(cols_),
step(step_), data((uchar*)data_), refcount(0), step(step_), data((uchar*)data_), refcount(0),
datastart((uchar*)data_), dataend((uchar*)data_) datastart((uchar*)data_), dataend((uchar*)data_),
allocator(defaultAllocator())
{ {
size_t minstep = cols * elemSize(); size_t minstep = cols * elemSize();
...@@ -74,7 +75,8 @@ cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t st ...@@ -74,7 +75,8 @@ cv::cuda::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t st
cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) : cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width), flags(Mat::MAGIC_VAL + (type_ & Mat::TYPE_MASK)), rows(size_.height), cols(size_.width),
step(step_), data((uchar*)data_), refcount(0), step(step_), data((uchar*)data_), refcount(0),
datastart((uchar*)data_), dataend((uchar*)data_) datastart((uchar*)data_), dataend((uchar*)data_),
allocator(defaultAllocator())
{ {
size_t minstep = cols * elemSize(); size_t minstep = cols * elemSize();
...@@ -92,6 +94,7 @@ cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) : ...@@ -92,6 +94,7 @@ cv::cuda::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
} }
dataend += step * (rows - 1) + minstep; dataend += step * (rows - 1) + minstep;
} }
...@@ -100,6 +103,7 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_) ...@@ -100,6 +103,7 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
flags = m.flags; flags = m.flags;
step = m.step; refcount = m.refcount; step = m.step; refcount = m.refcount;
data = m.data; datastart = m.datastart; dataend = m.dataend; data = m.data; datastart = m.datastart; dataend = m.dataend;
allocator = m.allocator;
if (rowRange_ == Range::all()) if (rowRange_ == Range::all())
{ {
...@@ -139,7 +143,8 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_) ...@@ -139,7 +143,8 @@ cv::cuda::GpuMat::GpuMat(const GpuMat& m, Range rowRange_, Range colRange_)
cv::cuda::GpuMat::GpuMat(const GpuMat& m, Rect roi) : cv::cuda::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
flags(m.flags), rows(roi.height), cols(roi.width), flags(m.flags), rows(roi.height), cols(roi.width),
step(m.step), data(m.data + roi.y*step), refcount(m.refcount), step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
datastart(m.datastart), dataend(m.dataend) datastart(m.datastart), dataend(m.dataend),
allocator(m.allocator)
{ {
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
data += roi.x * elemSize(); data += roi.x * elemSize();
...@@ -347,6 +352,17 @@ GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) ...@@ -347,6 +352,17 @@ GpuMat cv::cuda::allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)
#ifndef HAVE_CUDA #ifndef HAVE_CUDA
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
{
return 0;
}
void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator)
{
(void) allocator;
throw_no_cuda();
}
void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
{ {
(void) _rows; (void) _rows;
......
...@@ -66,6 +66,7 @@ class cv::cuda::Stream::Impl ...@@ -66,6 +66,7 @@ class cv::cuda::Stream::Impl
{ {
public: public:
cudaStream_t stream; cudaStream_t stream;
Ptr<StackAllocator> stackAllocator_;
Impl(); Impl();
Impl(cudaStream_t stream); Impl(cudaStream_t stream);
...@@ -73,17 +74,26 @@ public: ...@@ -73,17 +74,26 @@ public:
~Impl(); ~Impl();
}; };
cv::cuda::BufferPool::BufferPool(Stream& stream) : allocator_(stream.impl_->stackAllocator_.get())
{
}
cv::cuda::Stream::Impl::Impl() : stream(0) cv::cuda::Stream::Impl::Impl() : stream(0)
{ {
cudaSafeCall( cudaStreamCreate(&stream) ); cudaSafeCall( cudaStreamCreate(&stream) );
stackAllocator_ = makePtr<StackAllocator>(stream);
} }
cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_) cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
{ {
stackAllocator_ = makePtr<StackAllocator>(stream);
} }
cv::cuda::Stream::Impl::~Impl() cv::cuda::Stream::Impl::~Impl()
{ {
stackAllocator_.release();
if (stream) if (stream)
cudaStreamDestroy(stream); cudaStreamDestroy(stream);
} }
...@@ -197,7 +207,7 @@ cv::cuda::Stream::operator bool_type() const ...@@ -197,7 +207,7 @@ cv::cuda::Stream::operator bool_type() const
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Stream // Event
#ifndef HAVE_CUDA #ifndef HAVE_CUDA
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_CUDA
#include "opencv2/cudaarithm.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace testing;
using namespace perf;
using namespace cv;
using namespace cv::cuda;
namespace
{
void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
{
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels()));
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
cuda::exp(buf, dst, stream);
}
void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
BufferPool pool(stream);
GpuMat buf1 = pool.getBuffer(src1.size(), CV_32FC(src1.channels()));
func1(src1, buf1, stream);
GpuMat buf2 = pool.getBuffer(src2.size(), CV_32FC(src2.channels()));
func1(src2, buf2, stream);
cuda::add(buf1, buf2, dst, noArray(), -1, stream);
}
}
PERF_TEST_P(Sz, BufferPool, CUDA_TYPICAL_MAT_SIZES)
{
static bool first = true;
const Size size = GetParam();
const bool useBufferPool = PERF_RUN_CUDA();
Mat host_src(size, CV_8UC1);
declare.in(host_src, WARMUP_RNG);
GpuMat src1(host_src), src2(host_src);
GpuMat dst;
setBufferPoolUsage(useBufferPool);
if (useBufferPool && first)
{
setBufferPoolConfig(-1, 25 * 1024 * 1024, 2);
first = false;
}
TEST_CYCLE()
{
func2(src1, src2, dst, Stream::Null());
}
Mat h_dst(dst);
SANITY_CHECK(h_dst);
}
#endif
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace testing;
using namespace cv;
using namespace cv::cuda;
struct BufferPoolTest : TestWithParam<DeviceInfo>
{
};
namespace
{
void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
{
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels()));
src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
cuda::exp(buf, dst, stream);
}
void func2(const GpuMat& src, GpuMat& dst, Stream& stream)
{
BufferPool pool(stream);
GpuMat buf1 = pool.getBuffer(saturate_cast<int>(src.rows * 0.5), saturate_cast<int>(src.cols * 0.5), src.type());
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream);
GpuMat buf2 = pool.getBuffer(buf1.size(), CV_32FC(buf1.channels()));
func1(buf1, buf2, stream);
GpuMat buf3 = pool.getBuffer(src.size(), buf2.type());
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream);
buf3.convertTo(dst, CV_8U, stream);
}
}
CUDA_TEST_P(BufferPoolTest, SimpleUsage)
{
DeviceInfo devInfo = GetParam();
setDevice(devInfo.deviceID());
GpuMat src(200, 200, CV_8UC1);
GpuMat dst;
Stream stream;
func2(src, dst, stream);
stream.waitForCompletion();
GpuMat buf, buf1, buf2, buf3;
GpuMat dst_gold;
cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST);
buf1.convertTo(buf, CV_32F, 1.0 / 255.0);
cuda::exp(buf, buf2);
cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST);
buf3.convertTo(dst_gold, CV_8U);
ASSERT_MAT_NEAR(dst_gold, dst, 0);
}
INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPoolTest, ALL_DEVICES);
#endif // HAVE_CUDA
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment