Commit 09b73b2d authored by Aleksandr Rybnikov's avatar Aleksandr Rybnikov Committed by Vadim Pisarevsky

Blobs reuse improvement (#1205)

* Reuse deep learning output blobs

* Changed order for iterating through blobs while seeking memory. Refactored a little.
parent 1c8809ff
...@@ -369,6 +369,21 @@ namespace dnn //! This namespace is used for dnn module functionlaity. ...@@ -369,6 +369,21 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
CV_WRAP void getMemoryConsumption(const int layerId, CV_WRAP void getMemoryConsumption(const int layerId,
const MatShape& netInputShape, const MatShape& netInputShape,
size_t& weights, size_t& blobs) const; size_t& weights, size_t& blobs) const;
/** @brief Computes bytes number which are requered to store
* all weights and intermediate blobs for each layer.
* @param netInputShapes vector of shapes for all net inputs.
* @param layerIds output vector to save layer IDs.
* @param weights output parameter to store resulting bytes for weights.
* @param blobs output parameter to store resulting bytes for intermediate blobs.
*/
CV_WRAP void getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
std::vector<int>& layerIds, std::vector<size_t>& weights,
std::vector<size_t>& blobs) const;
/** @overload */
CV_WRAP void getMemoryConsumption(const MatShape& netInputShape,
std::vector<int>& layerIds, std::vector<size_t>& weights,
std::vector<size_t>& blobs) const;
private: private:
struct Impl; struct Impl;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
typedef dnn::DictValue LayerId; typedef dnn::DictValue LayerId;
typedef std::vector<dnn::MatShape> vector_MatShape; typedef std::vector<dnn::MatShape> vector_MatShape;
typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape; typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape;
typedef std::vector<size_t> vector_size_t;
template<> template<>
bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name) bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)
......
...@@ -55,6 +55,22 @@ using std::map; ...@@ -55,6 +55,22 @@ using std::map;
using std::make_pair; using std::make_pair;
using std::set; using std::set;
namespace
{
typedef std::vector<MatShape> ShapesVec;
struct LayerShapes
{
ShapesVec in, out, internal;
// No guarantees that layer which support in-place computations
// will be computed in-place (input.data_ptr == output.data_ptr).
// If layer said that it could work in-place and layers after it
// no longer use input blob, we'll set output = input.
bool supportInPlace;
LayerShapes() {supportInPlace = false;}
};
}
namespace cv namespace cv
{ {
namespace dnn namespace dnn
...@@ -154,6 +170,11 @@ struct LayerPin ...@@ -154,6 +170,11 @@ struct LayerPin
{ {
return (lid == r.lid && oid == r.oid); return (lid == r.lid && oid == r.oid);
} }
bool operator<(const LayerPin &r) const
{
return lid < r.lid || lid == r.lid && oid < r.oid;
}
}; };
struct LayerData struct LayerData
...@@ -219,16 +240,222 @@ private: ...@@ -219,16 +240,222 @@ private:
std::vector<String> outNames; std::vector<String> outNames;
}; };
struct Net::Impl struct BlobManager
{ {
typedef std::vector<MatShape> ShapesVec; public:
struct LayerShapes // Increase references counter to layer output.
void addReference(const LayerPin& lp)
{ {
ShapesVec in, out, internal; std::map<LayerPin, int>::iterator it = refCounter.find(lp);
bool inplace; if (it == refCounter.end())
LayerShapes() {inplace = false;} refCounter[lp] = 1;
}; else
it->second += 1;
}
void addReferences(const std::vector<LayerPin>& pins)
{
for (int i = 0; i < pins.size(); i++)
{
addReference(pins[i]);
}
}
// Returns number of references to allocated memory that used in specific
// layer blob.
int numReferences(const LayerPin& lp)
{
std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
CV_Assert(mapIt != reuseMap.end());
LayerPin memHost = mapIt->second;
std::map<LayerPin, int>::iterator refIt = refCounter.find(memHost);
CV_Assert(refIt != refCounter.end());
return refIt->second;
}
// Reuse data allocated in <host> inside the <user> blob.
void reuse(const LayerPin& host, const LayerPin& user)
{
CV_Assert(reuseMap.find(user) == reuseMap.end());
CV_Assert(reuseMap.find(host) != reuseMap.end());
LayerPin memHost = reuseMap[host];
reuseMap[user] = memHost;
if (refCounter.find(memHost) != refCounter.end())
{
std::map<LayerPin, int>::iterator userRefIt = refCounter.find(user);
if (userRefIt != refCounter.end())
{
refCounter[memHost] += userRefIt->second;
refCounter.erase(userRefIt);
}
else
refCounter[memHost] += 1;
}
}
// Decrease references counter to allocated memory inside specific blob.
void releaseReference(const LayerPin& lp)
{
std::map<LayerPin, LayerPin>::iterator mapIt = reuseMap.find(lp);
CV_Assert(mapIt != reuseMap.end());
std::map<LayerPin, int>::iterator refIt = refCounter.find(mapIt->second);
CV_Assert(refIt != refCounter.end());
CV_Assert(refIt->second > 0);
refIt->second -= 1;
}
void releaseReferences(const std::vector<LayerPin>& pins)
{
for (int i = 0; i < pins.size(); i++)
{
releaseReference(pins[i]);
}
}
void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst)
{
std::map<LayerPin, Mat>::iterator hostIt;
std::map<LayerPin, int>::iterator refIt;
const int targetTotal = total(shape);
Mat bestBlob;
int bestBlobTotal = INT_MAX;
LayerPin bestBlobPin;
for (hostIt = memHosts.begin(); hostIt != memHosts.end(); ++hostIt)
{
refIt = refCounter.find(hostIt->first);
// Use only blobs that had references before because if not,
// it might be used as output.
if (refIt != refCounter.end() && refIt->second == 0)
{
Mat& unusedBlob = hostIt->second;
if (unusedBlob.total() >= targetTotal &&
unusedBlob.total() < bestBlobTotal)
{
bestBlobPin = hostIt->first;
bestBlob = unusedBlob;
bestBlobTotal = unusedBlob.total();
}
}
}
if (!bestBlob.empty())
{
reuse(bestBlobPin, lp);
dst = Mat(shape, CV_32F, bestBlob.data);
}
else
{
dst.create(shape, CV_32F);
addHost(lp, dst);
}
}
void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes,
std::vector<LayerPin>& pinsForInternalBlobs)
{
pinsForInternalBlobs.clear();
std::vector<Mat>& outputBlobs = ld.outputBlobs,
&internalBlobs = ld.internals;
const ShapesVec& outShapes = layerShapes.out,
internalShapes = layerShapes.internal;
outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob
internalBlobs.resize(internalShapes.size());
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
// Check that layer could work in-place.
bool inPlace = false;
if (layerShapes.supportInPlace)
{
if (ld.inputBlobs.size() == 1)
{
// Get number of references to the input memory.
int numRef = numReferences(ld.inputBlobsId[0]);
// If current layer is one and only customer of this blob.
inPlace = numRef == 1;
}
}
ShapesVec shapes(outShapes);
shapes.insert(shapes.end(), internalShapes.begin(), internalShapes.end());
std::vector<Mat*> blobs;
for(int i = 0; i < outputBlobs.size(); i++)
{
blobs.push_back(&outputBlobs[i]);
}
for(int i = 0; i < internalBlobs.size(); i++)
{
blobs.push_back(&internalBlobs[i]);
if (total(internalShapes[i]))
{
pinsForInternalBlobs.push_back(LayerPin(ld.id, ld.outputBlobs.size() + i));
}
}
addReferences(pinsForInternalBlobs);
std::map<int, std::vector<int> > idxSizes;
for(int i = 0; i < shapes.size(); i++)
{
idxSizes[total(shapes[i])].push_back(i);
}
std::map<int, std::vector<int> >::reverse_iterator it;
for(it = idxSizes.rbegin(); it != idxSizes.rend(); it++)
{
for(int j = 0; j < it->second.size(); j++)
{
int index = it->second[j];
if (total(shapes[index]))
{
LayerPin blobPin(ld.id, index);
if (index < outShapes.size() && inPlace)
{
CV_Assert(ld.inputBlobs[0]->total() == total(shapes[index]));
ld.outputBlobs[index] = ld.inputBlobs[0]->reshape(1, shapes[index]);
reuse(ld.inputBlobsId[0], blobPin);
}
else
{
reuseOrCreate(shapes[index], blobPin, *blobs[index]);
}
}
}
}
}
// Clear internal state. Calls before an every reallocation.
void reset()
{
refCounter.clear();
reuseMap.clear();
memHosts.clear();
}
private:
// Registed allocated memory.
void addHost(const LayerPin& lp, const Mat& mat)
{
CV_Assert(memHosts.find(lp) == memHosts.end());
reuseMap[lp] = lp;
memHosts[lp] = mat;
}
std::map<LayerPin, int> refCounter;
// Maps pin to origin blob (for whom memory was allocated firstly).
// For origin blobs key == value.
std::map<LayerPin, LayerPin> reuseMap;
std::map<LayerPin, Mat> memHosts;
};
struct Net::Impl
{
typedef std::map<int, LayerShapes> LayersShapesMap; typedef std::map<int, LayerShapes> LayersShapesMap;
typedef std::map<int, LayerData> MapIdToLayerData; typedef std::map<int, LayerData> MapIdToLayerData;
...@@ -252,6 +479,7 @@ struct Net::Impl ...@@ -252,6 +479,7 @@ struct Net::Impl
MapIdToLayerData layers; MapIdToLayerData layers;
std::map<String, int> layerNameToId; std::map<String, int> layerNameToId;
BlobManager blobManager;
int lastLayerId; int lastLayerId;
...@@ -469,37 +697,11 @@ struct Net::Impl ...@@ -469,37 +697,11 @@ struct Net::Impl
LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid); LayersShapesMap::const_iterator layerShapesIt = layersShapes.find(lid);
CV_Assert(layerShapesIt != layersShapes.end()); CV_Assert(layerShapesIt != layersShapes.end());
const ShapesVec& outShapes = layerShapesIt->second.out;
CV_Assert(ld.requiredOutputs.size() <= outShapes.size());
ld.outputBlobs.resize(std::max((size_t)1, outShapes.size())); //layer produce at least one output blob std::vector<LayerPin> pinsForInternalBlobs;
for(int i = 0; i < outShapes.size(); i++) blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs);
{
if (shape(ld.outputBlobs[i]) != outShapes[i])
{
if (layerShapesIt->second.inplace)
{
CV_Assert(ld.inputBlobs.size() == ld.outputBlobs.size());
CV_Assert(ld.inputBlobs[i]->total() == total(outShapes[i]));
ld.outputBlobs[i] = ld.inputBlobs[i]->reshape(1, outShapes[i]);
}
else
{
ld.outputBlobs[i].create(outShapes[i], CV_32F);
}
}
}
const ShapesVec& intShapes = layerShapesIt->second.internal;
ld.internals.resize(intShapes.size());
for(int i = 0; i < intShapes.size(); i++)
{
if (shape(ld.internals[i]) != intShapes[i] && total(intShapes[i]))
ld.internals[i].create(intShapes[i], CV_32F);
}
Ptr<Layer> layerPtr = ld.getLayerInstance(); Ptr<Layer> layerPtr = ld.getLayerInstance();
//try
{ {
layerPtr->finalize(ld.inputBlobs, ld.outputBlobs); layerPtr->finalize(ld.inputBlobs, ld.outputBlobs);
#if 0 #if 0
...@@ -512,10 +714,10 @@ struct Net::Impl ...@@ -512,10 +714,10 @@ struct Net::Impl
std::cout << "\n"; std::cout << "\n";
#endif #endif
} }
/*catch (const cv::Exception &err)
{ // After allocation of layer, we decrease counters to it's input blobs.
CV_RETHROW_ERROR(err, format("The following error occured while making allocate() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str())); blobManager.releaseReferences(ld.inputBlobsId);
}*/ blobManager.releaseReferences(pinsForInternalBlobs);
ld.flag = 1; ld.flag = 1;
} }
...@@ -536,6 +738,13 @@ struct Net::Impl ...@@ -536,6 +738,13 @@ struct Net::Impl
LayersShapesMap layersShapes; LayersShapesMap layersShapes;
getLayersShapes(inputShapes, layersShapes); getLayersShapes(inputShapes, layersShapes);
blobManager.reset();
for (it = layers.begin(); it != layers.end(); ++it)
{
const LayerData& ld = it->second;
blobManager.addReferences(ld.inputBlobsId);
}
for (it = layers.begin(); it != layers.end(); it++) for (it = layers.begin(); it != layers.end(); it++)
{ {
int lid = it->first; int lid = it->first;
...@@ -609,7 +818,7 @@ struct Net::Impl ...@@ -609,7 +818,7 @@ struct Net::Impl
ShapesVec& os = inOutShapes[id].out; ShapesVec& os = inOutShapes[id].out;
ShapesVec& ints = inOutShapes[id].internal; ShapesVec& ints = inOutShapes[id].internal;
int requiredOutputs = layers[id].requiredOutputs.size(); int requiredOutputs = layers[id].requiredOutputs.size();
inOutShapes[id].inplace = inOutShapes[id].supportInPlace =
layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints); layers[id].getLayerInstance()->getMemoryShapes(is, requiredOutputs, os, ints);
} }
...@@ -718,9 +927,13 @@ void Net::setBlob(String outputName, const Mat &blob_) ...@@ -718,9 +927,13 @@ void Net::setBlob(String outputName, const Mat &blob_)
LayerData &ld = impl->layers[pin.lid]; LayerData &ld = impl->layers[pin.lid];
ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
MatShape prevShape = shape(ld.outputBlobs[pin.oid]); MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
bool oldShape = prevShape == shape(blob_);
if (oldShape)
blob_.copyTo(ld.outputBlobs[pin.oid]);
else
ld.outputBlobs[pin.oid] = blob_.clone(); ld.outputBlobs[pin.oid] = blob_.clone();
impl->netWasAllocated = impl->netWasAllocated && prevShape == shape(blob_); impl->netWasAllocated = impl->netWasAllocated && oldShape;
} }
Mat Net::getBlob(String outputName) Mat Net::getBlob(String outputName)
...@@ -827,10 +1040,10 @@ std::vector<int> Net::getUnconnectedOutLayers() const ...@@ -827,10 +1040,10 @@ std::vector<int> Net::getUnconnectedOutLayers() const
return layersIds; return layersIds;
} }
void Net::getLayersShapes(const Net::Impl::ShapesVec& netInputShapes, void Net::getLayersShapes(const ShapesVec& netInputShapes,
std::vector<int>* layersIds, std::vector<int>* layersIds,
std::vector<Net::Impl::ShapesVec>* inLayersShapes, std::vector<ShapesVec>* inLayersShapes,
std::vector<Net::Impl::ShapesVec>* outLayersShapes) const std::vector<ShapesVec>* outLayersShapes) const
{ {
if ((layersIds || inLayersShapes || outLayersShapes) == false) if ((layersIds || inLayersShapes || outLayersShapes) == false)
return; return;
...@@ -856,29 +1069,29 @@ void Net::getLayersShapes(const Net::Impl::ShapesVec& netInputShapes, ...@@ -856,29 +1069,29 @@ void Net::getLayersShapes(const Net::Impl::ShapesVec& netInputShapes,
void Net::getLayersShapes(const MatShape& netInputShape, void Net::getLayersShapes(const MatShape& netInputShape,
std::vector<int>* layerIds, std::vector<int>* layerIds,
std::vector<Net::Impl::ShapesVec>* inLayersShapes, std::vector<ShapesVec>* inLayersShapes,
std::vector<Net::Impl::ShapesVec>* outLayersShapes) const std::vector<ShapesVec>* outLayersShapes) const
{ {
getLayersShapes(Net::Impl::ShapesVec(1, netInputShape), getLayersShapes(ShapesVec(1, netInputShape),
layerIds, inLayersShapes, outLayersShapes); layerIds, inLayersShapes, outLayersShapes);
} }
void Net::getLayerShapes(const MatShape& netInputShape, void Net::getLayerShapes(const MatShape& netInputShape,
const int layerId, const int layerId,
Net::Impl::ShapesVec* inLayerShapes, ShapesVec* inLayerShapes,
Net::Impl::ShapesVec* outLayerShapes) const ShapesVec* outLayerShapes) const
{ {
getLayerShapes(Net::Impl::ShapesVec(1, netInputShape), getLayerShapes(ShapesVec(1, netInputShape),
layerId, inLayerShapes, outLayerShapes); layerId, inLayerShapes, outLayerShapes);
} }
void Net::getLayerShapes(const Net::Impl::ShapesVec& netInputShapes, void Net::getLayerShapes(const ShapesVec& netInputShapes,
const int layerId, const int layerId,
Net::Impl::ShapesVec* inLayerShapes, ShapesVec* inLayerShapes,
Net::Impl::ShapesVec* outLayerShapes) const ShapesVec* outLayerShapes) const
{ {
Impl::LayerShapes shapes; LayerShapes shapes;
impl->getLayerShapes(netInputShapes, layerId, shapes); impl->getLayerShapes(netInputShapes, layerId, shapes);
if (inLayerShapes) if (inLayerShapes)
*inLayerShapes = shapes.in; *inLayerShapes = shapes.in;
...@@ -915,7 +1128,7 @@ int64 Net::getFLOPS(const int layerId, ...@@ -915,7 +1128,7 @@ int64 Net::getFLOPS(const int layerId,
Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId); Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerId);
CV_Assert(layer != impl->layers.end()); CV_Assert(layer != impl->layers.end());
Impl::LayerShapes shapes; LayerShapes shapes;
impl->getLayerShapes(netInputShapes, layerId, shapes); impl->getLayerShapes(netInputShapes, layerId, shapes);
return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out); return layer->second.getLayerInstance()->getFLOPS(shapes.in, shapes.out);
...@@ -986,41 +1199,70 @@ void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes, ...@@ -986,41 +1199,70 @@ void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
size_t& weights, size_t& blobs) const size_t& weights, size_t& blobs) const
{ {
std::vector<int> layerIds; std::vector<int> layerIds;
std::vector<size_t> w, b;
getMemoryConsumption(netInputShapes, layerIds, w, b);
weights = blobs = 0;
for(int i = 0; i < layerIds.size(); i++)
{
weights += w[i];
blobs += b[i];
}
}
void Net::getMemoryConsumption(const int layerId,
const MatShape& netInputShape,
size_t& weights, size_t& blobs) const
{
getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
weights, blobs);
}
void Net::getMemoryConsumption(const MatShape& netInputShape,
size_t& weights, size_t& blobs) const
{
getMemoryConsumption(std::vector<MatShape>(1, netInputShape),
weights, blobs);
}
void Net::getMemoryConsumption(const std::vector<MatShape>& netInputShapes,
std::vector<int>& layerIds, std::vector<size_t>& weights,
std::vector<size_t>& blobs) const
{
layerIds.clear();
weights.clear();
blobs.clear();
std::vector<std::vector<MatShape> > outLayerShapes; std::vector<std::vector<MatShape> > outLayerShapes;
getLayersShapes(netInputShapes, &layerIds, 0, &outLayerShapes); getLayersShapes(netInputShapes, &layerIds, 0, &outLayerShapes);
weights = blobs = 0;
for(int i = 0; i < layerIds.size(); i++) for(int i = 0; i < layerIds.size(); i++)
{ {
int w = 0, b = 0;
Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]); Impl::MapIdToLayerData::iterator layer = impl->layers.find(layerIds[i]);
CV_Assert(layer != impl->layers.end()); CV_Assert(layer != impl->layers.end());
for(int j = 0; j < layer->second.params.blobs.size(); j++) for(int j = 0; j < layer->second.params.blobs.size(); j++)
{ {
const Mat& weightsBlob = layer->second.params.blobs[j]; const Mat& weightsBlob = layer->second.params.blobs[j];
weights += weightsBlob.total()*weightsBlob.elemSize(); w += weightsBlob.total()*weightsBlob.elemSize();
} }
for(int j = 0; j < outLayerShapes[i].size(); j++) for(int j = 0; j < outLayerShapes[i].size(); j++)
{ {
blobs += total(outLayerShapes[i][j]) * sizeof(float); b += total(outLayerShapes[i][j]) * sizeof(float);
} }
}
}
void Net::getMemoryConsumption(const int layerId, weights.push_back(w);
const MatShape& netInputShape, blobs.push_back(b);
size_t& weights, size_t& blobs) const }
{
getMemoryConsumption(layerId, std::vector<MatShape>(1, netInputShape),
weights, blobs);
} }
void Net::getMemoryConsumption(const MatShape& netInputShape, void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>& layerIds,
size_t& weights, size_t& blobs) const std::vector<size_t>& weights, std::vector<size_t>& blobs) const
{ {
getMemoryConsumption(std::vector<MatShape>(1, netInputShape), getMemoryConsumption(std::vector<MatShape>(1, netInputShape), layerIds,
weights, blobs); weights, blobs);
} }
......
...@@ -30,6 +30,15 @@ public: ...@@ -30,6 +30,15 @@ public:
epsilon = params.get<float>("eps", 1E-5); epsilon = params.get<float>("eps", 1E-5);
} }
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_Assert(blobs.size() >= 2); CV_Assert(blobs.size() >= 2);
......
...@@ -61,7 +61,12 @@ public: ...@@ -61,7 +61,12 @@ public:
return true; return true;
} }
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) {} void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{
for (int i = 0, n = outputs.size(); i < n; ++i)
if (outputs[i].data != inputs[i]->data)
inputs[i]->copyTo(outputs[i]);
}
}; };
Ptr<BlankLayer> BlankLayer::create(const LayerParams& params) Ptr<BlankLayer> BlankLayer::create(const LayerParams& params)
......
...@@ -20,17 +20,17 @@ public: ...@@ -20,17 +20,17 @@ public:
class PBody : public cv::ParallelLoopBody class PBody : public cv::ParallelLoopBody
{ {
Func &func; Func &func;
Dtype *data; Dtype *src, *dst;
public: public:
PBody(Mat &mat, Func &func_) : PBody(Mat &src, Mat &dst, Func &func_) :
func(func_), data(mat.ptr<Dtype>()) func(func_), src(src.ptr<Dtype>()), dst(dst.ptr<Dtype>())
{} {}
void operator()(const Range &r) const void operator()(const Range &r) const
{ {
for (int i = r.start; i < r.end; i++) for (int i = r.start; i < r.end; i++)
data[i] = func(data[i]); dst[i] = func(src[i]);
} }
}; };
...@@ -49,13 +49,13 @@ public: ...@@ -49,13 +49,13 @@ public:
{ {
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
const Mat &src = *inputs[i]; Mat &src = *inputs[i];
Mat &dst = outputs[i]; Mat &dst = outputs[i];
CV_Assert(src.ptr() == dst.ptr() && src.isContinuous()); CV_Assert(src.isContinuous() && dst.isContinuous());
Range sizeRange = Range(0, dst.total()); Range sizeRange = Range(0, dst.total());
CV_Assert(src.type() == CV_32F); CV_Assert(src.type() == CV_32F);
PBody<float> body(dst, func); PBody<float> body(src, dst, func);
if( run_parallel ) if( run_parallel )
cv::parallel_for_(sizeRange, body); cv::parallel_for_(sizeRange, body);
else else
......
...@@ -178,7 +178,7 @@ public: ...@@ -178,7 +178,7 @@ public:
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
Mat srcBlob = *inputs[i]; Mat srcBlob = *inputs[i];
MatShape inputShape = shape(srcBlob); MatShape inputShape = shape(srcBlob), outShape = shape(outputs[i]);
if (performReordering) if (performReordering)
{ {
...@@ -204,6 +204,11 @@ public: ...@@ -204,6 +204,11 @@ public:
} }
internals[i].copyTo(outputs[i]); internals[i].copyTo(outputs[i]);
} }
else
{
if (outputs[i].data != srcBlob.data)
srcBlob.reshape(1, outShape).copyTo(outputs[i]);
}
} }
} }
......
...@@ -27,6 +27,15 @@ public: ...@@ -27,6 +27,15 @@ public:
hasBias = params.get<bool>("bias_term", false); hasBias = params.get<bool>("bias_term", false);
} }
bool getMemoryShapes(const std::vector<MatShape> &inputs,
const int requiredOutputs,
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const
{
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return true;
}
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
CV_Assert(blobs.size() == 1 + hasBias); CV_Assert(blobs.size() == 1 + hasBias);
......
...@@ -72,16 +72,16 @@ public: ...@@ -72,16 +72,16 @@ public:
{ {
CV_Assert(inputs.size() == 1); CV_Assert(inputs.size() == 1);
outputs.resize(outputsCount >= 0 ? outputsCount : requiredOutputs, Layer::getMemoryShapes(inputs, outputsCount >= 0 ? outputsCount : requiredOutputs,
inputs[0]); outputs, internals);
return true;
return false;
} }
void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
{ {
for (size_t i = 0; i < outputs.size(); i++) for (size_t i = 0; i < outputs.size(); i++)
{ {
if (outputs[i].data != inputs[0]->data)
inputs[0]->copyTo(outputs[i]); inputs[0]->copyTo(outputs[i]);
} }
} }
......
...@@ -121,6 +121,10 @@ TEST(Reproducibility_FCN, Accuracy) ...@@ -121,6 +121,10 @@ TEST(Reproducibility_FCN, Accuracy)
if (sample.size() != inputSize) if (sample.size() != inputSize)
resize(sample, sample, inputSize); resize(sample, sample, inputSize);
std::vector<int> layerIds;
std::vector<size_t> weights, blobs;
net.getMemoryConsumption(shape(1,3,227,227), layerIds, weights, blobs);
net.setBlob(".data", blobFromImage(sample, 1.)); net.setBlob(".data", blobFromImage(sample, 1.));
net.forward(); net.forward();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment