Commit 6a769c92 authored by Ernest Galbrun's avatar Ernest Galbrun

modified default stream initialization to allow concurrent calls modified cuda…

modified default stream initialization to allow concurrent calls modified cuda surf.cuda.cpp to allow concurrent call
parent 964b2609
...@@ -207,7 +207,6 @@ namespace ...@@ -207,7 +207,6 @@ namespace
MemoryStack* MemoryPool::getFreeMemStack() MemoryStack* MemoryPool::getFreeMemStack()
{ {
AutoLock lock(mtx_); AutoLock lock(mtx_);
if (!initialized_) if (!initialized_)
initilizeImpl(); initilizeImpl();
...@@ -256,22 +255,31 @@ namespace ...@@ -256,22 +255,31 @@ namespace
namespace namespace
{ {
Mutex mtx_;
bool memory_pool_manager_initialized;
class MemoryPoolManager class MemoryPoolManager
{ {
public: public:
MemoryPoolManager(); MemoryPoolManager();
~MemoryPoolManager(); ~MemoryPoolManager();
void Init();
MemoryPool* getPool(int deviceId); MemoryPool* getPool(int deviceId);
private: private:
std::vector<MemoryPool> pools_; std::vector<MemoryPool> pools_;
}; } manager;
//MemoryPoolManager ;
MemoryPoolManager::MemoryPoolManager() MemoryPoolManager::MemoryPoolManager()
{ {
int deviceCount = getCudaEnabledDeviceCount(); }
void MemoryPoolManager::Init()
{
int deviceCount = getCudaEnabledDeviceCount();
if (deviceCount > 0) if (deviceCount > 0)
pools_.resize(deviceCount); pools_.resize(deviceCount);
} }
...@@ -280,7 +288,7 @@ namespace ...@@ -280,7 +288,7 @@ namespace
{ {
for (size_t i = 0; i < pools_.size(); ++i) for (size_t i = 0; i < pools_.size(); ++i)
{ {
cudaSetDevice(i); cudaSetDevice(static_cast<int>(i));
pools_[i].release(); pools_[i].release();
} }
} }
...@@ -293,7 +301,14 @@ namespace ...@@ -293,7 +301,14 @@ namespace
MemoryPool* memPool(int deviceId) MemoryPool* memPool(int deviceId)
{ {
static MemoryPoolManager manager; {
AutoLock lock(mtx_);
if (!memory_pool_manager_initialized)
{
memory_pool_manager_initialized = true;
manager.Init();
}
}
return manager.getPool(deviceId); return manager.getPool(deviceId);
} }
} }
...@@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), ...@@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream),
if (enableMemoryPool) if (enableMemoryPool)
{ {
const int deviceId = getDevice(); const int deviceId = getDevice();
memStack_ = memPool(deviceId)->getFreeMemStack(); {
AutoLock lock(mtx_);
memStack_ = memPool(deviceId)->getFreeMemStack();
}
DeviceInfo devInfo(deviceId); DeviceInfo devInfo(deviceId);
alignment_ = devInfo.textureAlignment(); alignment_ = devInfo.textureAlignment();
} }
......
...@@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa ...@@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
#endif #endif
} }
namespace
{
bool default_stream_is_initialized;
Mutex mtx;
Ptr<Stream> default_stream;
}
Stream& cv::cuda::Stream::Null() Stream& cv::cuda::Stream::Null()
{ {
static Stream s(Ptr<Impl>(new Impl(0))); AutoLock lock(mtx);
return s; if (!default_stream_is_initialized)
{
default_stream = Ptr<Stream>(new Stream(Ptr<Impl>(new Impl(0))));
default_stream_is_initialized = true;
}
return *default_stream;
} }
cv::cuda::Stream::operator bool_type() const cv::cuda::Stream::operator bool_type() const
......
...@@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf; ...@@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf;
namespace namespace
{ {
Mutex mtx;
int calcSize(int octave, int layer) int calcSize(int octave, int layer)
{ {
/* Wavelet size at first layer of first octave. */ /* Wavelet size at first layer of first octave. */
...@@ -166,7 +168,6 @@ namespace ...@@ -166,7 +168,6 @@ namespace
{ {
const int layer_rows = img_rows >> octave; const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave; const int layer_cols = img_cols >> octave;
loadOctaveConstants(octave, layer_rows, layer_cols); loadOctaveConstants(octave, layer_rows, layer_cols);
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers); icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers);
...@@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std: ...@@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints) void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{ {
AutoLock lock(mtx);
if (!img.empty()) if (!img.empty())
{ {
SURF_CUDA_Invoker surf(*this, img, mask); SURF_CUDA_Invoker surf(*this, img, mask);
...@@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM ...@@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors, void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints) bool useProvidedKeypoints)
{ {
AutoLock lock(mtx);
if (!img.empty()) if (!img.empty())
{ {
SURF_CUDA_Invoker surf(*this, img, mask); SURF_CUDA_Invoker surf(*this, img, mask);
...@@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM ...@@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints) void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{ {
AutoLock lock(mtx);
GpuMat keypointsGPU; GpuMat keypointsGPU;
(*this)(img, mask, keypointsGPU); (*this)(img, mask, keypointsGPU);
...@@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std: ...@@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
GpuMat& descriptors, bool useProvidedKeypoints) GpuMat& descriptors, bool useProvidedKeypoints)
{ {
AutoLock lock(mtx);
GpuMat keypointsGPU; GpuMat keypointsGPU;
if (useProvidedKeypoints) if (useProvidedKeypoints)
...@@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std: ...@@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors, bool useProvidedKeypoints) std::vector<float>& descriptors, bool useProvidedKeypoints)
{ {
AutoLock lock(mtx);
GpuMat descriptorsGPU; GpuMat descriptorsGPU;
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints); (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment