Commit 6a769c92 authored by Ernest Galbrun's avatar Ernest Galbrun

modified default stream initialization to allow concurrent calls modified cuda…

modified default stream initialization to allow concurrent calls modified cuda surf.cuda.cpp to allow concurrent call
parent 964b2609
......@@ -207,7 +207,6 @@ namespace
MemoryStack* MemoryPool::getFreeMemStack()
{
AutoLock lock(mtx_);
if (!initialized_)
initilizeImpl();
......@@ -256,22 +255,31 @@ namespace
namespace
{
Mutex mtx_;
bool memory_pool_manager_initialized;
class MemoryPoolManager
{
public:
MemoryPoolManager();
~MemoryPoolManager();
void Init();
MemoryPool* getPool(int deviceId);
private:
std::vector<MemoryPool> pools_;
};
} manager;
//MemoryPoolManager ;
MemoryPoolManager::MemoryPoolManager()
{
int deviceCount = getCudaEnabledDeviceCount();
}
void MemoryPoolManager::Init()
{
int deviceCount = getCudaEnabledDeviceCount();
if (deviceCount > 0)
pools_.resize(deviceCount);
}
......@@ -280,7 +288,7 @@ namespace
{
for (size_t i = 0; i < pools_.size(); ++i)
{
cudaSetDevice(i);
cudaSetDevice(static_cast<int>(i));
pools_[i].release();
}
}
......@@ -293,7 +301,14 @@ namespace
MemoryPool* memPool(int deviceId)
{
static MemoryPoolManager manager;
{
AutoLock lock(mtx_);
if (!memory_pool_manager_initialized)
{
memory_pool_manager_initialized = true;
manager.Init();
}
}
return manager.getPool(deviceId);
}
}
......@@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream),
if (enableMemoryPool)
{
const int deviceId = getDevice();
memStack_ = memPool(deviceId)->getFreeMemStack();
{
AutoLock lock(mtx_);
memStack_ = memPool(deviceId)->getFreeMemStack();
}
DeviceInfo devInfo(deviceId);
alignment_ = devInfo.textureAlignment();
}
......
......@@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
#endif
}
namespace
{
bool default_stream_is_initialized;
Mutex mtx;
Ptr<Stream> default_stream;
}
Stream& cv::cuda::Stream::Null()
{
static Stream s(Ptr<Impl>(new Impl(0)));
return s;
AutoLock lock(mtx);
if (!default_stream_is_initialized)
{
default_stream = Ptr<Stream>(new Stream(Ptr<Impl>(new Impl(0))));
default_stream_is_initialized = true;
}
return *default_stream;
}
cv::cuda::Stream::operator bool_type() const
......
......@@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf;
namespace
{
Mutex mtx;
int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
......@@ -166,7 +168,6 @@ namespace
{
const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave;
loadOctaveConstants(octave, layer_rows, layer_cols);
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers);
......@@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{
AutoLock lock(mtx);
if (!img.empty())
{
SURF_CUDA_Invoker surf(*this, img, mask);
......@@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints)
{
AutoLock lock(mtx);
if (!img.empty())
{
SURF_CUDA_Invoker surf(*this, img, mask);
......@@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{
AutoLock lock(mtx);
GpuMat keypointsGPU;
(*this)(img, mask, keypointsGPU);
......@@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
GpuMat& descriptors, bool useProvidedKeypoints)
{
AutoLock lock(mtx);
GpuMat keypointsGPU;
if (useProvidedKeypoints)
......@@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors, bool useProvidedKeypoints)
{
AutoLock lock(mtx);
GpuMat descriptorsGPU;
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment