Merge pull request #10456 from dkurt:dnn_allocate_mem_for_optimized_concat

8e7af7f0 · Alexander Alekhin · a65b5df5 · a9807d8f · 8e7af7f0 · 8e7af7f0
Commit 8e7af7f0 authored Dec 28, 2017 by Alexander Alekhin
Show whitespace changes
Inline Side-by-side

Showing with 20 additions and 17 deletions

CMakeLists.txt modules/dnn/CMakeLists.txt +5 -0

dnn.cpp modules/dnn/src/dnn.cpp +15 -17

No files found.
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
    endif()
  endif()
 endif()
+ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
+if (${the_module}_REUSE_MEMORY)
+  add_definitions(-DREUSE_DNN_MEMORY=1)
+endif()
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -367,13 +367,12 @@ public:
        }
    }
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
    {
+#ifdef REUSE_DNN_MEMORY
        Mat bestBlob;
        LayerPin bestBlobPin;
-        if( !force )
-        {
        std::map<LayerPin, Mat>::iterator hostIt;
        std::map<LayerPin, int>::iterator refIt;
@@ -397,13 +396,13 @@ public:
                }
            }
        }
-        }
        if (!bestBlob.empty())
        {
            reuse(bestBlobPin, lp);
-            dst = Mat(shape, CV_32F, bestBlob.data);
+            dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
        }
        else
+#endif  // REUSE_DNN_MEMORY
        {
            // if dst already has been allocated with total(shape) elements,
            // it won't be recrreated and pointer of dst.data remains the same.
@@ -412,13 +411,12 @@ public:
        }
    }
-    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force)
+    void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
    {
+#ifdef REUSE_DNN_MEMORY
        UMat bestBlob;
        LayerPin bestBlobPin;
-        if( !force )
-        {
        std::map<LayerPin, UMat>::iterator hostIt;
        std::map<LayerPin, int>::iterator refIt;
@@ -442,13 +440,13 @@ public:
                }
            }
        }
-        }
        if (!bestBlob.empty())
        {
            reuse(bestBlobPin, lp);
            umat_dst.create(shape, CV_32F);
        }
        else
+#endif  // REUSE_DNN_MEMORY
        {
            // if dst already has been allocated with total(shape) elements,
            // it won't be recrreated and pointer of dst.data remains the same.
@@ -458,8 +456,7 @@ public:
    }
    void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
-                               std::vector<LayerPin>& pinsForInternalBlobs,
+                               std::vector<LayerPin>& pinsForInternalBlobs)
-                               bool maximizeReuse)
    {
        CV_TRACE_FUNCTION();
        bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
@@ -530,7 +527,6 @@ public:
        }
        std::map<int, std::vector<int> >::reverse_iterator it;
-        bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
        for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
        {
            for(int j = 0; j < it->second.size(); j++)
@@ -539,7 +535,7 @@ public:
                if (total(shapes[index]))
                {
                    LayerPin blobPin(ld.id, index);
-                    if (index < outShapes.size() && inPlace && !force)
+                    if (index < outShapes.size() && inPlace)
                    {
                        if (use_umat)
                        {
@@ -558,9 +554,9 @@ public:
                    else
                    {
                        if (use_umat)
-                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
                        else
-                            reuseOrCreate(shapes[index], blobPin, *blobs[index], force);
+                            reuseOrCreate(shapes[index], blobPin, *blobs[index]);
                    }
                }
            }
@@ -1111,8 +1107,7 @@ struct Net::Impl
        CV_Assert(layerShapesIt != layersShapes.end());
        std::vector<LayerPin> pinsForInternalBlobs;
-        bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE;
+        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
-        blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
        ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
        for (int i = 0; i < ld.outputBlobs.size(); ++i)
        {
@@ -1415,6 +1410,9 @@ struct Net::Impl
                    if( i >= ninputs )
                    {
+                        // Allocate new memory to prevent collisions during memory
+                        // reusing (see https://github.com/opencv/opencv/pull/10456).
+                        output = output.clone();
                        Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
                        int ofs = 0;
                        for( i = 0; i < ninputs; i++ )