Commit 8e7af7f0 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #10456 from dkurt:dnn_allocate_mem_for_optimized_concat

parents a65b5df5 a9807d8f
...@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS) ...@@ -97,3 +97,8 @@ if(BUILD_PERF_TESTS)
endif() endif()
endif() endif()
endif() endif()
ocv_option(${the_module}_REUSE_MEMORY "Enable reusing strategy of memory management" ON)
if (${the_module}_REUSE_MEMORY)
add_definitions(-DREUSE_DNN_MEMORY=1)
endif()
...@@ -367,13 +367,12 @@ public: ...@@ -367,13 +367,12 @@ public:
} }
} }
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool force) void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
{ {
#ifdef REUSE_DNN_MEMORY
Mat bestBlob; Mat bestBlob;
LayerPin bestBlobPin; LayerPin bestBlobPin;
if( !force )
{
std::map<LayerPin, Mat>::iterator hostIt; std::map<LayerPin, Mat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt; std::map<LayerPin, int>::iterator refIt;
...@@ -397,13 +396,13 @@ public: ...@@ -397,13 +396,13 @@ public:
} }
} }
} }
}
if (!bestBlob.empty()) if (!bestBlob.empty())
{ {
reuse(bestBlobPin, lp); reuse(bestBlobPin, lp);
dst = Mat(shape, CV_32F, bestBlob.data); dst = bestBlob.reshape(1, 1).colRange(0, targetTotal).reshape(1, shape);
} }
else else
#endif // REUSE_DNN_MEMORY
{ {
// if dst already has been allocated with total(shape) elements, // if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same. // it won't be recrreated and pointer of dst.data remains the same.
...@@ -412,13 +411,12 @@ public: ...@@ -412,13 +411,12 @@ public:
} }
} }
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst, bool force) void reuseOrCreate(const MatShape& shape, const LayerPin& lp, UMat &umat_dst)
{ {
#ifdef REUSE_DNN_MEMORY
UMat bestBlob; UMat bestBlob;
LayerPin bestBlobPin; LayerPin bestBlobPin;
if( !force )
{
std::map<LayerPin, UMat>::iterator hostIt; std::map<LayerPin, UMat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt; std::map<LayerPin, int>::iterator refIt;
...@@ -442,13 +440,13 @@ public: ...@@ -442,13 +440,13 @@ public:
} }
} }
} }
}
if (!bestBlob.empty()) if (!bestBlob.empty())
{ {
reuse(bestBlobPin, lp); reuse(bestBlobPin, lp);
umat_dst.create(shape, CV_32F); umat_dst.create(shape, CV_32F);
} }
else else
#endif // REUSE_DNN_MEMORY
{ {
// if dst already has been allocated with total(shape) elements, // if dst already has been allocated with total(shape) elements,
// it won't be recrreated and pointer of dst.data remains the same. // it won't be recrreated and pointer of dst.data remains the same.
...@@ -458,8 +456,7 @@ public: ...@@ -458,8 +456,7 @@ public:
} }
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs, std::vector<LayerPin>& pinsForInternalBlobs)
bool maximizeReuse)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT && bool use_umat = (preferableBackend == DNN_BACKEND_DEFAULT &&
...@@ -530,7 +527,6 @@ public: ...@@ -530,7 +527,6 @@ public:
} }
std::map<int, std::vector<int> >::reverse_iterator it; std::map<int, std::vector<int> >::reverse_iterator it;
bool force = !maximizeReuse && ld.inputBlobsId.size() > 1;
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++) for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
{ {
for(int j = 0; j < it->second.size(); j++) for(int j = 0; j < it->second.size(); j++)
...@@ -539,7 +535,7 @@ public: ...@@ -539,7 +535,7 @@ public:
if (total(shapes[index])) if (total(shapes[index]))
{ {
LayerPin blobPin(ld.id, index); LayerPin blobPin(ld.id, index);
if (index < outShapes.size() && inPlace && !force) if (index < outShapes.size() && inPlace)
{ {
if (use_umat) if (use_umat)
{ {
...@@ -558,9 +554,9 @@ public: ...@@ -558,9 +554,9 @@ public:
else else
{ {
if (use_umat) if (use_umat)
reuseOrCreate(shapes[index], blobPin, *umat_blobs[index], force); reuseOrCreate(shapes[index], blobPin, *umat_blobs[index]);
else else
reuseOrCreate(shapes[index], blobPin, *blobs[index], force); reuseOrCreate(shapes[index], blobPin, *blobs[index]);
} }
} }
} }
...@@ -1111,8 +1107,7 @@ struct Net::Impl ...@@ -1111,8 +1107,7 @@ struct Net::Impl
CV_Assert(layerShapesIt != layersShapes.end()); CV_Assert(layerShapesIt != layersShapes.end());
std::vector<LayerPin> pinsForInternalBlobs; std::vector<LayerPin> pinsForInternalBlobs;
bool maximizeReuse = preferableBackend == DNN_BACKEND_HALIDE; blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, maximizeReuse);
ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
for (int i = 0; i < ld.outputBlobs.size(); ++i) for (int i = 0; i < ld.outputBlobs.size(); ++i)
{ {
...@@ -1415,6 +1410,9 @@ struct Net::Impl ...@@ -1415,6 +1410,9 @@ struct Net::Impl
if( i >= ninputs ) if( i >= ninputs )
{ {
// Allocate new memory to prevent collisions during memory
// reusing (see https://github.com/opencv/opencv/pull/10456).
output = output.clone();
Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() }; Range chrange[] = { Range::all(), Range::all(), Range::all(), Range::all() };
int ofs = 0; int ofs = 0;
for( i = 0; i < ninputs; i++ ) for( i = 0; i < ninputs; i++ )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment