modified default stream initialization to allow concurrent calls modified cuda…

modified default stream initialization to allow concurrent calls modified cuda surf.cuda.cpp to allow concurrent call

modified default stream initialization to allow concurrent calls modified cuda…
modified default stream initialization to allow concurrent calls modified cuda surf.cuda.cpp to allow concurrent call
6a769c92 · Ernest Galbrun · 964b2609 · 6a769c92 · 6a769c92 · 6a769c92
Commit 6a769c92 authored Jul 23, 2014 by Ernest Galbrun
Showing with 45 additions and 10 deletions

cuda_buffer_pool.cpp modules/core/src/cuda_buffer_pool.cpp +24 -7

cuda_stream.cpp modules/core/src/cuda_stream.cpp +14 -2

surf.cuda.cpp modules/nonfree/src/surf.cuda.cpp +7 -1

No files found.
--- a/modules/core/src/cuda_buffer_pool.cpp
+++ b/modules/core/src/cuda_buffer_pool.cpp
@@ -207,7 +207,6 @@ namespace
    MemoryStack* MemoryPool::getFreeMemStack()
    {
        AutoLock lock(mtx_);
        if (!initialized_)
            initilizeImpl();
@@ -256,22 +255,31 @@ namespace
 namespace
 {
+    Mutex mtx_;
+    bool memory_pool_manager_initialized;
    class MemoryPoolManager
    {
    public:
        MemoryPoolManager();
        ~MemoryPoolManager();
+        void Init();
        MemoryPool* getPool(int deviceId);
    private:
        std::vector<MemoryPool> pools_;
-    };
+    } manager;
+    //MemoryPoolManager ;
    MemoryPoolManager::MemoryPoolManager()
    {
-        int deviceCount = getCudaEnabledDeviceCount();
+    }
+    void MemoryPoolManager::Init()
+    {
+        int deviceCount = getCudaEnabledDeviceCount();
        if (deviceCount > 0)
            pools_.resize(deviceCount);
    }
@@ -280,7 +288,7 @@ namespace
    {
        for (size_t i = 0; i < pools_.size(); ++i)
        {
-            cudaSetDevice(i);
+            cudaSetDevice(static_cast<int>(i));
            pools_[i].release();
        }
    }
@@ -293,7 +301,14 @@ namespace
    MemoryPool* memPool(int deviceId)
    {
-        static MemoryPoolManager manager;
+        {
+            AutoLock lock(mtx_);
+            if (!memory_pool_manager_initialized)
+            {
+                memory_pool_manager_initialized = true;
+                manager.Init();
+            }
+        }
        return manager.getPool(deviceId);
    }
 }
@@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream),
    if (enableMemoryPool)
    {
        const int deviceId = getDevice();
-        memStack_ = memPool(deviceId)->getFreeMemStack();
+        {
+            AutoLock lock(mtx_);
+            memStack_ = memPool(deviceId)->getFreeMemStack();
+        }
        DeviceInfo devInfo(deviceId);
        alignment_ = devInfo.textureAlignment();
    }

--- a/modules/core/src/cuda_stream.cpp
+++ b/modules/core/src/cuda_stream.cpp
@@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
 #endif
 }
+namespace 
+{
+    bool default_stream_is_initialized;
+    Mutex mtx;
+    Ptr<Stream> default_stream;
+}
 Stream& cv::cuda::Stream::Null()
 {
-    static Stream s(Ptr<Impl>(new Impl(0)));
+    AutoLock lock(mtx);
-    return s;
+    if (!default_stream_is_initialized)
+    {
+        default_stream = Ptr<Stream>(new Stream(Ptr<Impl>(new Impl(0))));
+        default_stream_is_initialized = true;
+    }
+    return *default_stream;
 }
 cv::cuda::Stream::operator bool_type() const

--- a/modules/nonfree/src/surf.cuda.cpp
+++ b/modules/nonfree/src/surf.cuda.cpp
@@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf;
 namespace
 {
+    Mutex mtx;
    int calcSize(int octave, int layer)
    {
        /* Wavelet size at first layer of first octave. */
@@ -166,7 +168,6 @@ namespace
            {
                const int layer_rows = img_rows >> octave;
                const int layer_cols = img_cols >> octave;
                loadOctaveConstants(octave, layer_rows, layer_cols);
                icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers);
@@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std:
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
 {
+    AutoLock lock(mtx);
    if (!img.empty())
    {
        SURF_CUDA_Invoker surf(*this, img, mask);
@@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
                                   bool useProvidedKeypoints)
 {
+    AutoLock lock(mtx);
    if (!img.empty())
    {
        SURF_CUDA_Invoker surf(*this, img, mask);
@@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
 {
+    AutoLock lock(mtx);
    GpuMat keypointsGPU;
    (*this)(img, mask, keypointsGPU);
@@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
    GpuMat& descriptors, bool useProvidedKeypoints)
 {
+    AutoLock lock(mtx);
    GpuMat keypointsGPU;
    if (useProvidedKeypoints)
@@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
 void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
    std::vector<float>& descriptors, bool useProvidedKeypoints)
 {
+    AutoLock lock(mtx);
    GpuMat descriptorsGPU;
    (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);