Commit 05d40946 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

move StackAllocator to cpp file

it is internal class, no need to export it
parent 7ed38b97
...@@ -92,26 +92,6 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The ...@@ -92,26 +92,6 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The
namespace cv { namespace cuda namespace cv { namespace cuda
{ {
class MemoryStack;
class CV_EXPORTS StackAllocator : public GpuMat::Allocator
{
public:
explicit StackAllocator(cudaStream_t stream);
~StackAllocator();
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
void free(GpuMat* mat);
private:
StackAllocator(const StackAllocator&);
StackAllocator& operator =(const StackAllocator&);
cudaStream_t stream_;
MemoryStack* memStack_;
size_t alignment_;
};
class CV_EXPORTS BufferPool class CV_EXPORTS BufferPool
{ {
public: public:
...@@ -120,6 +100,8 @@ namespace cv { namespace cuda ...@@ -120,6 +100,8 @@ namespace cv { namespace cuda
GpuMat getBuffer(int rows, int cols, int type); GpuMat getBuffer(int rows, int cols, int type);
GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); } GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
GpuMat::Allocator* getAllocator() const { return allocator_; }
private: private:
GpuMat::Allocator* allocator_; GpuMat::Allocator* allocator_;
}; };
......
...@@ -53,55 +53,55 @@ using namespace cv::cuda; ...@@ -53,55 +53,55 @@ using namespace cv::cuda;
namespace namespace
{ {
class MemoryPool; class MemoryPool;
}
class cv::cuda::MemoryStack class MemoryStack
{ {
public: public:
uchar* requestMemory(size_t size); uchar* requestMemory(size_t size);
void returnMemory(uchar* ptr); void returnMemory(uchar* ptr);
uchar* datastart; uchar* datastart;
uchar* dataend; uchar* dataend;
uchar* tip; uchar* tip;
bool isFree; bool isFree;
MemoryPool* pool; MemoryPool* pool;
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::vector<size_t> allocations; std::vector<size_t> allocations;
#endif #endif
}; };
uchar* cv::cuda::MemoryStack::requestMemory(size_t size) uchar* MemoryStack::requestMemory(size_t size)
{ {
const size_t freeMem = dataend - tip; const size_t freeMem = dataend - tip;
if (size > freeMem) if (size > freeMem)
return 0; return 0;
uchar* ptr = tip; uchar* ptr = tip;
tip += size; tip += size;
#if !defined(NDEBUG) #if !defined(NDEBUG)
allocations.push_back(size); allocations.push_back(size);
#endif #endif
return ptr; return ptr;
} }
void cv::cuda::MemoryStack::returnMemory(uchar* ptr) void MemoryStack::returnMemory(uchar* ptr)
{ {
CV_DbgAssert( ptr >= datastart && ptr < dataend ); CV_DbgAssert( ptr >= datastart && ptr < dataend );
#if !defined(NDEBUG) #if !defined(NDEBUG)
const size_t allocSize = tip - ptr; const size_t allocSize = tip - ptr;
CV_Assert( allocSize == allocations.back() ); CV_Assert( allocSize == allocations.back() );
allocations.pop_back(); allocations.pop_back();
#endif #endif
tip = ptr; tip = ptr;
}
} }
#endif #endif
...@@ -271,6 +271,11 @@ public: ...@@ -271,6 +271,11 @@ public:
#else #else
namespace
{
class StackAllocator;
}
class cv::cuda::Stream::Impl class cv::cuda::Stream::Impl
{ {
public: public:
...@@ -540,29 +545,44 @@ cudaStream_t cv::cuda::StreamAccessor::getStream(const Stream& stream) ...@@ -540,29 +545,44 @@ cudaStream_t cv::cuda::StreamAccessor::getStream(const Stream& stream)
namespace namespace
{ {
bool enableMemoryPool = true; bool enableMemoryPool = true;
}
cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0) class StackAllocator : public GpuMat::Allocator
{
if (enableMemoryPool)
{ {
const int deviceId = getDevice(); public:
memStack_ = initializer.getMemoryPool(deviceId)->getFreeMemStack(); explicit StackAllocator(cudaStream_t stream);
DeviceInfo devInfo(deviceId); ~StackAllocator();
alignment_ = devInfo.textureAlignment();
bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize);
void free(GpuMat* mat);
private:
StackAllocator(const StackAllocator&);
StackAllocator& operator =(const StackAllocator&);
cudaStream_t stream_;
MemoryStack* memStack_;
size_t alignment_;
};
StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0)
{
if (enableMemoryPool)
{
const int deviceId = getDevice();
memStack_ = initializer.getMemoryPool(deviceId)->getFreeMemStack();
DeviceInfo devInfo(deviceId);
alignment_ = devInfo.textureAlignment();
}
} }
}
cv::cuda::StackAllocator::~StackAllocator() StackAllocator::~StackAllocator()
{ {
cudaStreamSynchronize(stream_); cudaStreamSynchronize(stream_);
if (memStack_ != 0) if (memStack_ != 0)
memStack_->pool->returnMemStack(memStack_); memStack_->pool->returnMemStack(memStack_);
} }
namespace
{
size_t alignUp(size_t what, size_t alignment) size_t alignUp(size_t what, size_t alignment)
{ {
size_t alignMask = alignment-1; size_t alignMask = alignment-1;
...@@ -570,55 +590,71 @@ namespace ...@@ -570,55 +590,71 @@ namespace
size_t res = (what + alignMask) & inverseAlignMask; size_t res = (what + alignMask) & inverseAlignMask;
return res; return res;
} }
}
bool cv::cuda::StackAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize) bool StackAllocator::allocate(GpuMat* mat, int rows, int cols, size_t elemSize)
{ {
if (memStack_ == 0) if (memStack_ == 0)
return false; return false;
size_t pitch, memSize; size_t pitch, memSize;
if (rows > 1 && cols > 1) if (rows > 1 && cols > 1)
{ {
pitch = alignUp(cols * elemSize, alignment_); pitch = alignUp(cols * elemSize, alignment_);
memSize = pitch * rows; memSize = pitch * rows;
} }
else else
{ {
// Single row or single column must be continuous // Single row or single column must be continuous
pitch = elemSize * cols; pitch = elemSize * cols;
memSize = alignUp(elemSize * cols * rows, 64); memSize = alignUp(elemSize * cols * rows, 64);
} }
uchar* ptr = memStack_->requestMemory(memSize); uchar* ptr = memStack_->requestMemory(memSize);
if (ptr == 0) if (ptr == 0)
return false; return false;
mat->data = ptr; mat->data = ptr;
mat->step = pitch; mat->step = pitch;
mat->refcount = (int*) fastMalloc(sizeof(int)); mat->refcount = (int*) fastMalloc(sizeof(int));
return true; return true;
} }
void cv::cuda::StackAllocator::free(GpuMat* mat) void StackAllocator::free(GpuMat* mat)
{ {
if (memStack_ == 0) if (memStack_ == 0)
return; return;
memStack_->returnMemory(mat->datastart); memStack_->returnMemory(mat->datastart);
fastFree(mat->refcount); fastFree(mat->refcount);
}
} }
#endif
/////////////////////////////////////////////////////////////
/// BufferPool
void cv::cuda::setBufferPoolUsage(bool on) void cv::cuda::setBufferPoolUsage(bool on)
{ {
#ifndef HAVE_CUDA
(void)on;
throw_no_cuda();
#else
enableMemoryPool = on; enableMemoryPool = on;
#endif
} }
void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount) void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount)
{ {
#ifndef HAVE_CUDA
(void)deviceId;
(void)stackSize;
(void)stackCount;
throw_no_cuda();
#else
const int currentDevice = getDevice(); const int currentDevice = getDevice();
if (deviceId >= 0) if (deviceId >= 0)
...@@ -638,12 +674,8 @@ void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCoun ...@@ -638,12 +674,8 @@ void cv::cuda::setBufferPoolConfig(int deviceId, size_t stackSize, int stackCoun
} }
setDevice(currentDevice); setDevice(currentDevice);
}
#endif #endif
}
/////////////////////////////////////////////////////////////
/// BufferPool
#ifdef HAVE_CUDA #ifdef HAVE_CUDA
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment