Added possibility of getting any intermediate blob with thrifty memory management

aa0d8060 · Aleksandr Rybnikov · Maksim Shabunin · b18e3579 · aa0d8060 · aa0d8060
Commit aa0d8060 authored Jun 13, 2017 by Aleksandr Rybnikov Committed by Maksim Shabunin Jun 16, 2017
18 changed files
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -337,19 +337,35 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         * In fact, this layer provides the only way to pass user data into the network.
         * As any other layer, this layer can label its outputs and this function provides an easy way to do this.
         */
-        CV_WRAP void setNetInputs(const std::vector<String> &inputBlobNames);
+        CV_WRAP void setInputsNames(const std::vector<String> &inputBlobNames);
-        /** @brief Initializes and allocates all layers. */
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
-        CV_WRAP void allocate();
+         *  @param outputName name for layer which output is needed to get
+         *  @return blob for first output of specified layer.
-        /** @brief Runs forward pass to compute output of layer @p toLayer.
          * @details By default runs forward pass for the whole network.
          */
-        CV_WRAP void forward(LayerId toLayer = String());
+        CV_WRAP Mat forward(const String& outputName = String());
-        /** @brief Runs forward pass to compute output of layer @p toLayer, but computations start from @p startLayer */
-        void forward(LayerId startLayer, LayerId toLayer);
+        /** @brief Runs forward pass to compute output of layer with name @p outputName.
-        /** @overload */
+         *  @param outputBlobs contains all output blobs for specified layer.
-        void forward(const std::vector<LayerId> &startLayers, const std::vector<LayerId> &toLayers);
+         *  @param outputName name for layer which output is needed to get
+          * @details If @p outputName is empty, runs forward pass for the whole network.
+          */
+        CV_WRAP void forward(std::vector<Mat>& outputBlobs, const String& outputName = String());
+        /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
+         *  @param outputBlobs contains blobs for first outputs of specified layers.
+         *  @param outBlobNames names for layers which outputs are needed to get
+          */
+        CV_WRAP void forward(std::vector<Mat>& outputBlobs,
+                             const std::vector<String>& outBlobNames);
+        /** @brief Runs forward pass to compute outputs of layers listed in @p outBlobNames.
+         *  @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
+         *  @param outBlobNames names for layers which outputs are needed to get
+          */
+        CV_WRAP void forward(std::vector<std::vector<Mat> >& outputBlobs,
+                             const std::vector<String>& outBlobNames);
        //TODO:
        /** @brief Optimized forward.
@@ -369,7 +385,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         * specific target. For layers that not represented in scheduling file
         * or if no manual scheduling used at all, automatic scheduling will be applied.
         */
-        void compileHalide(const std::string& scheduler = "");
+        void setHalideScheduler(const String& scheduler);
        /**
         * @brief Ask network to use specific computation backend where it supported.
@@ -379,19 +395,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
        void setPreferableBackend(int backendId);
        /** @brief Sets the new value for the layer output blob
-         *  @param outputName descriptor of the updating layer output blob.
+         *  @param name descriptor of the updating layer output blob.
         *  @param blob new blob.
         *  @see connect(String, String) to know format of the descriptor.
         *  @note If updating blob is not empty then @p blob must have the same shape,
         *  because network reshaping is not implemented yet.
         */
-        CV_WRAP void setBlob(String outputName, const Mat &blob);
+        CV_WRAP void setInput(const Mat &blob, const String& name = "");
-        /** @brief Returns the layer output blob.
-         *  @param outputName the descriptor of the returning layer output blob.
-         *  @see connect(String, String)
-         */
-        CV_WRAP Mat getBlob(String outputName);
        /** @brief Sets the new value for the learned param of the layer.
         *  @param layer name or id of the layer.

--- a/modules/dnn/misc/python/pyopencv_dnn.hpp
+++ b/modules/dnn/misc/python/pyopencv_dnn.hpp
@@ -3,6 +3,7 @@ typedef dnn::DictValue LayerId;
 typedef std::vector<dnn::MatShape> vector_MatShape;
 typedef std::vector<std::vector<dnn::MatShape> > vector_vector_MatShape;
 typedef std::vector<size_t> vector_size_t;
+typedef std::vector<std::vector<Mat> > vector_vector_Mat;
 template<>
 bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)

--- a/modules/dnn/samples/caffe_googlenet.cpp
+++ b/modules/dnn/samples/caffe_googlenet.cpp
@@ -119,16 +119,14 @@ int main(int argc, char **argv)
    //! [Prepare blob]
    //! [Set input blob]
-    net.setBlob(".data", inputBlob);        //set the network input
+    net.setInput(inputBlob, "data");        //set the network input
    //! [Set input blob]
    //! [Make forward pass]
-    net.forward();                          //compute output
+    Mat prob = net.forward("prob");                          //compute output
    //! [Make forward pass]
    //! [Gather output]
-    Mat prob = net.getBlob("prob");   //gather output of "prob" layer
    int classId;
    double classProb;
    getMaxClass(prob, &classId, &classProb);//find the best class

--- a/modules/dnn/samples/fcn_semsegm.cpp
+++ b/modules/dnn/samples/fcn_semsegm.cpp
@@ -134,19 +134,16 @@ int main(int argc, char **argv)
    //! [Prepare blob]
    //! [Set input blob]
-    net.setBlob(".data", inputBlob);        //set the network input
+    net.setInput(inputBlob, "data");        //set the network input
    //! [Set input blob]
    //! [Make forward pass]
    double t = (double)cv::getTickCount();
-    net.forward();                          //compute output
+    Mat score = net.forward("score");                          //compute output
    t = (double)cv::getTickCount() - t;
    printf("processing time: %.1fms\n", t*1000./getTickFrequency());
    //! [Make forward pass]
-    //! [Gather output]
-    Mat score = net.getBlob("score");
    Mat colorize;
    colorizeSegmentation(score, colors, colorize);
    Mat show;

--- a/modules/dnn/samples/squeezenet_halide.cpp
+++ b/modules/dnn/samples/squeezenet_halide.cpp
@@ -93,7 +93,7 @@ int main(int argc, char **argv)
    //! [Prepare blob]
    //! [Set input blob]
-    net.setBlob("", inputBlob);                      // Set the network input.
+    net.setInput(inputBlob);                      // Set the network input.
    //! [Set input blob]
    //! [Enable Halide backend]
@@ -101,15 +101,15 @@ int main(int argc, char **argv)
    //! [Enable Halide backend]
    //! [Compile Halide pipeline]
-    net.compileHalide();                             // Compile Halide pipeline.
+//    net.compileHalide();                             // Compile Halide pipeline.
    //! [Compile Halide pipeline]
    //! [Make forward pass]
-    net.forward();                                   // Compute output.
+    Mat prob = net.forward("prob");                                   // Compute output.
    //! [Make forward pass]
    //! [Gather output]
-    Mat prob = net.getBlob("prob");                  // Gather output of "prob" layer.
+//    net.getBlob();                  // Gather output of "prob" layer.
    int classId;
    double classProb;

--- a/modules/dnn/samples/ssd_object_detection.cpp
+++ b/modules/dnn/samples/ssd_object_detection.cpp
@@ -108,15 +108,13 @@ int main(int argc, char** argv)
    //! [Prepare blob]
    //! [Set input blob]
-    net.setBlob(".data", inputBlob);                //set the network input
+    net.setInput(inputBlob, "data");                //set the network input
    //! [Set input blob]
    //! [Make forward pass]
-    net.forward();                                  //compute output
+    Mat detection = net.forward("detection_out");                                  //compute output
    //! [Make forward pass]
-    //! [Gather output]
-    Mat detection = net.getBlob("detection_out");
    Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
    float confidenceThreshold = parser.get<float>("min_confidence");

--- a/modules/dnn/samples/tf_inception.cpp
+++ b/modules/dnn/samples/tf_inception.cpp
@@ -26,7 +26,7 @@ const String keys =
                       "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip }"
        "{model m   |tensorflow_inception_graph.pb| path to TensorFlow .pb model file }"
        "{image i   || path to image file }"
-        "{i_blob    | .input | input blob name) }"
+        "{i_blob    | input | input blob name) }"
        "{o_blob    | softmax2 | output blob name) }"
        "{c_names c | imagenet_comp_graph_label_strings.txt | path to file with classnames for class id }"
        "{result r  || path to save output blob (optional, binary format, NCHW order) }"
@@ -101,21 +101,18 @@ int main(int argc, char **argv)
    //! [Prepare blob]
    inputBlob -= 117.0;
    //! [Set input blob]
-    net.setBlob(inBlobName, inputBlob);        //set the network input
+    net.setInput(inputBlob, inBlobName);        //set the network input
    //! [Set input blob]
    cv::TickMeter tm;
    tm.start();
    //! [Make forward pass]
-    net.forward();                          //compute output
+    Mat result = net.forward(outBlobName);                          //compute output
    //! [Make forward pass]
    tm.stop();
-    //! [Gather output]
-    Mat result = net.getBlob(outBlobName);   //gather output of "prob" layer
    if (!resultFile.empty()) {
        CV_Assert(result.isContinuous());

--- a/modules/dnn/samples/torch_enet.cpp
+++ b/modules/dnn/samples/torch_enet.cpp
@@ -73,32 +73,19 @@ int main(int argc, char **argv)
    //! [Prepare blob]
    //! [Set input blob]
-    net.setBlob("", inputBlob);        //set the network input
+    net.setInput(inputBlob, "");        //set the network input
    //! [Set input blob]
-    const int N = 3;
    TickMeter tm;
-    //! [Make forward pass]
-    for( int i = 0; i < N; i++ )
-    {
-        TickMeter tm_;
-        tm_.start();
-        net.forward();                          //compute output
-        tm_.stop();
-        if( i == 0 || tm_.getTimeTicks() < tm.getTimeTicks() )
-            tm = tm_;
-    }
-    //! [Gather output]
    String oBlob = net.getLayerNames().back();
    if (!parser.get<String>("o_blob").empty())
    {
        oBlob = parser.get<String>("o_blob");
    }
-    Mat result = net.getBlob(oBlob);   //gather output of "prob" layer
+    //! [Make forward pass]
+    Mat result = net.forward(oBlob);
    if (!resultFile.empty()) {
        CV_Assert(result.isContinuous());

--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -277,7 +277,7 @@ public:
                addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
                netInputs[inNum] = net.input(inNum);
            }
-            dstNet.setNetInputs(netInputs);
+            dstNet.setInputsNames(netInputs);
        }
        for (int li = 0; li < layersSize; li++)

--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -177,6 +177,11 @@ struct LayerPin
    {
        return lid < r.lid || lid == r.lid && oid < r.oid;
    }
+    bool operator ==(const LayerPin &r) const
+    {
+        return lid == r.lid && oid == r.oid;
+    }
 };
 // Objects of this class manages wrappers. For every CPU memory pointer and shape
@@ -468,6 +473,8 @@ public:
        }
        else
        {
+            // if dst already has been allocated with total(shape) elements,
+            // it won't be recrreated and pointer of dst.data remains the same.
            dst.create(shape, CV_32F);
            addHost(lp, dst);
        }
@@ -598,11 +605,12 @@ struct Net::Impl
    Ptr<DataLayer> netInputLayer;
    std::vector<int> netOutputs;
+    std::vector<LayerPin> blobsToKeep;
    MapIdToLayerData layers;
    std::map<String, int> layerNameToId;
    BlobManager blobManager;
    int preferableBackend;
+    String halideConfigFile;
    // Backend-specific wrapping manager.
    BackendWrapManager backendWrapper;
@@ -610,15 +618,59 @@ struct Net::Impl
    bool netWasAllocated;
-    void setUpNet()
+    void compileHalide()
+    {
+        CV_Assert(preferableBackend == DNN_BACKEND_HALIDE);
+        HalideScheduler scheduler(halideConfigFile);
+        MapIdToLayerData::iterator it;
+        for (it = layers.begin(); it != layers.end(); ++it)
        {
-        if (!netWasAllocated)
+            LayerData &ld = it->second;
+            Ptr<Layer> layer = ld.layerInstance;
+            if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skipFlags[DNN_BACKEND_HALIDE])
            {
-            allocateLayers();
+                CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
+                bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
+                if (!scheduled)
+                {
+                    // Use automatic scheduling provided by layer.
+                    layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
+                                                ld.inputBlobs, ld.outputBlobs);
+                }
+                dnn::compileHalide(ld.outputBlobs, ld.backendNodes[DNN_BACKEND_HALIDE],
+                                   DNN_TARGET_CPU);
+            }
+        }
+    }
+    void setUpNet(const std::vector<LayerPin>& blobsToKeep_ = std::vector<LayerPin>())
+    {
+        if (!netWasAllocated || this->blobsToKeep != blobsToKeep_)
+        {
+            MapIdToLayerData::iterator it;
+            for (it = layers.begin(); it != layers.end(); it++)
+            {
+                if (it->second.id != 0) {
+                    it->second.outputBlobs.clear();
+                    it->second.internals.clear();
+                }
+            }
+            allocateLayers(blobsToKeep_);
            computeNetOutputLayers();
            initBackend();
+            if (!netWasAllocated )
+            {
+                // If user didn't call compileHalide() between
+                // setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
+                if (preferableBackend == DNN_BACKEND_HALIDE)
+                    compileHalide();
+            }
            netWasAllocated = true;
+            this->blobsToKeep = blobsToKeep_;
        }
    }
@@ -705,7 +757,7 @@ struct Net::Impl
        outName = (delimPos == String::npos) ? String() : pinAlias.substr(delimPos + 1);
    }
-    int resolvePinOutputName(LayerData &ld, const String &outName, bool isOutPin)
+    int resolvePinOutputName(LayerData &ld, const String &outName)
    {
        if (outName.empty())
            return 0;
@@ -722,13 +774,10 @@ struct Net::Impl
            }
        }
-        if (isOutPin)
        return ld.getLayerInstance()->outputNameToIndex(outName);
-        else
-            return ld.getLayerInstance()->inputNameToIndex(outName);
    }
-    LayerPin getPinByAlias(const String &pinAlias, bool isOutPin = true)
+    LayerPin getPinByAlias(const String &pinAlias)
    {
        LayerPin pin;
        String layerName, outName;
@@ -737,13 +786,31 @@ struct Net::Impl
        pin.lid = (layerName.empty()) ? 0 : getLayerId(layerName);
        if (pin.lid >= 0)
-            pin.oid = resolvePinOutputName(getLayerData(pin.lid), outName, isOutPin);
+            pin.oid = resolvePinOutputName(getLayerData(pin.lid), outName);
        return pin;
    }
+    std::vector<LayerPin> getLayerOutPins(const String &pinAlias)
+    {
+        String layerName, outName;
+        splitPin(pinAlias, layerName, outName);
+        int lid = (layerName.empty()) ? 0 : getLayerId(layerName);
+        std::vector<LayerPin> pins;
+        for (int i = 0; i < layers[lid].outputBlobs.size(); i++)
+        {
+            pins.push_back(LayerPin(lid, i));
+        }
+        return pins;
+    }
    void connect(int outLayerId, int outNum, int inLayerId, int inNum)
    {
+        CV_Assert(outLayerId < inLayerId);
        LayerData &ldOut = getLayerData(outLayerId);
        LayerData &ldInp = getLayerData(inLayerId);
@@ -911,7 +978,7 @@ struct Net::Impl
        ld.flag = 1;
    }
-    void allocateLayers()
+    void allocateLayers(const std::vector<LayerPin>& blobsToKeep_)
    {
        MapIdToLayerData::iterator it;
        for (it = layers.begin(); it != layers.end(); it++)
@@ -934,34 +1001,19 @@ struct Net::Impl
            blobManager.addReferences(ld.inputBlobsId);
        }
-        for (it = layers.begin(); it != layers.end(); it++)
+        for (int i = 0; i < blobsToKeep_.size(); i++)
        {
-            int lid = it->first;
+            blobManager.addReference(blobsToKeep_[i]);
-            allocateLayer(lid, layersShapes);
-        }
        }
-    void forwardLayer(LayerData &ld, bool clearFlags = true)
-    {
-        if (clearFlags)
-        {
-            MapIdToLayerData::iterator it;
        for (it = layers.begin(); it != layers.end(); it++)
-                it->second.flag = 0;
-        }
-        //already was forwarded
-        if (ld.flag)
-            return;
-        //forward parents
-        for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
        {
-            forwardLayer(layers[*i], false);
+            int lid = it->first;
+            allocateLayer(lid, layersShapes);
+        }
    }
-        //forward itself
+    void forwardLayer(LayerData &ld)
-        //try
    {
        Ptr<Layer> layer = ld.layerInstance;
        if (preferableBackend == DNN_BACKEND_DEFAULT ||
@@ -983,23 +1035,40 @@ struct Net::Impl
                CV_Error(Error::StsNotImplemented, "Unknown backend identifier");
            }
        }
-        }
-        /*catch (const cv::Exception &err)
-        {
-            CV_RETHROW_ERROR(err, format("The following error occured while making forward() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str()));
-        }*/
        ld.flag = 1;
    }
-    void forwardAll()
+    void forwardToLayer(LayerData &ld, bool clearFlags = true)
+    {
+        if (clearFlags)
        {
            MapIdToLayerData::iterator it;
            for (it = layers.begin(); it != layers.end(); it++)
                it->second.flag = 0;
+        }
-        for (it = layers.begin(); it != layers.end(); it++)
+        //already was forwarded
-            forwardLayer(it->second, false);
+        if (ld.flag)
+            return;
+        //forward parents
+        MapIdToLayerData::iterator it;
+        for (it = layers.begin(); it->second.id < ld.id; it++)
+        {
+            LayerData &ld = it->second;
+            if (ld.flag)
+                continue;
+            forwardLayer(ld);
+        }
+        //forward itself
+        forwardLayer(ld);
+    }
+    void forwardAll()
+    {
+        forwardToLayer(layers.rbegin()->second, true);
    }
    void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes)
@@ -1052,6 +1121,30 @@ struct Net::Impl
        getLayerShapesRecursively(layerId, inOutShapes);
        shapes = inOutShapes[layerId];
    }
+    LayerPin getLatestLayerPin(const std::vector<LayerPin>& pins)
+    {
+        return *std::max_element(pins.begin(), pins.end());
+    }
+    Mat getBlob(const LayerPin& pin)
+    {
+        if (!pin.valid())
+            CV_Error(Error::StsObjectNotFound, "Requested blob not found");
+        LayerData &ld = layers[pin.lid];
+        if ((size_t)pin.oid >= ld.outputBlobs.size())
+        {
+            CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) +
+                                           " outputs, the #" + toString(pin.oid) + " was requsted");
+        }
+        return ld.outputBlobs[pin.oid];
+    }
+    Mat getBlob(String outputName)
+    {
+        return getBlob(getPinByAlias(outputName));
+    }
 };
 Net::Net() : impl(new Net::Impl)
@@ -1106,52 +1199,94 @@ void Net::connect(String _outPin, String _inPin)
    impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
 }
-void Net::allocate()
+//void Net::forward(LayerId toLayer)
+//{
+//    if (!impl->netWasAllocated)
+//    {
+//        impl->setUpNet();
+//    }
+//    if (toLayer.isString() && toLayer.get<String>().empty())
+//        impl->forwardAll();
+//    else
+//        impl->forwardLayer(impl->getLayerData(toLayer));
+//}
+Mat Net::forward(const String& outputName)
 {
+    String layerName = outputName;
+    if (layerName.empty())
+        layerName = getLayerNames().back();
    impl->setUpNet();
+    impl->forwardToLayer(impl->getLayerData(layerName));
+    return impl->getBlob(layerName);
 }
-void Net::forward(LayerId toLayer)
+void Net::forward(std::vector<Mat>& outputBlobs, const String& outputName)
 {
-    if (!impl->netWasAllocated)
-    {
    impl->setUpNet();
-        // If user didn't call compileHalide() between
-        // setPreferableBackend(DNN_BACKEND_HALIDE) and forward().
-        if (impl->preferableBackend == DNN_BACKEND_HALIDE)
-            compileHalide();
-    }
-    if (toLayer.isString() && toLayer.get<String>().empty())
+    String layerName = outputName;
-        impl->forwardAll();
-    else
+    if (layerName.empty())
-        impl->forwardLayer(impl->getLayerData(toLayer));
+        layerName = getLayerNames().back();
+    impl->forwardToLayer(impl->getLayerData(layerName));
+    LayerPin pin = impl->getPinByAlias(layerName);
+    LayerData &ld = impl->layers[pin.lid];
+    outputBlobs = ld.outputBlobs;
 }
-void Net::compileHalide(const std::string& configFile)
+void Net::forward(std::vector<Mat>& outputBlobs,
+                  const std::vector<String>& outBlobNames)
 {
-    CV_Assert(impl->preferableBackend == DNN_BACKEND_HALIDE);
+    std::vector<LayerPin> pins;
-    if (!impl->netWasAllocated)
+    for (int i = 0; i < outBlobNames.size(); i++)
-        impl->setUpNet();
-    HalideScheduler scheduler(configFile);
-    Impl::MapIdToLayerData::iterator it;
-    for (it = impl->layers.begin(); it != impl->layers.end(); ++it)
    {
-        LayerData &ld = it->second;
+       pins.push_back(impl->getPinByAlias(outBlobNames[i]));
-        Ptr<Layer> layer = ld.layerInstance;
+    }
-        if (layer->supportBackend(DNN_BACKEND_HALIDE) && !ld.skipFlags[DNN_BACKEND_HALIDE])
+    impl->setUpNet(pins);
+    LayerPin out = impl->getLatestLayerPin(pins);
+    impl->forwardToLayer(impl->getLayerData(out.lid));
+    outputBlobs.clear();
+    for (int i = 0; i < pins.size(); i++)
    {
-            CV_Assert(!ld.backendNodes[DNN_BACKEND_HALIDE].empty());
+        outputBlobs.push_back(impl->getBlob(pins[i]));
-            bool scheduled = scheduler.process(ld.backendNodes[DNN_BACKEND_HALIDE]);
+    }
-            if (!scheduled)
+}
+void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
+                     const std::vector<String>& outBlobNames)
+{
+    std::vector<LayerPin> pins;
+    for (int i = 0; i < outBlobNames.size(); i++)
    {
-                // Use automatic scheduling provided by layer.
+        std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
-                layer->applyHalideScheduler(ld.backendNodes[DNN_BACKEND_HALIDE],
+        pins.insert(pins.end(), lp.begin(), lp.end());
-                                            ld.inputBlobs, ld.outputBlobs);
    }
-            dnn::compileHalide(ld.outputBlobs, ld.backendNodes[DNN_BACKEND_HALIDE],
-                               DNN_TARGET_CPU);
+    impl->setUpNet(pins);
+    LayerPin out = impl->getLatestLayerPin(pins);
+    impl->forwardToLayer(impl->getLayerData(out.lid));
+    outputBlobs.resize(outBlobNames.size());
+    for (int i = 0; i < outBlobNames.size(); i++)
+    {
+        std::vector<LayerPin> lp = impl->getLayerOutPins(outBlobNames[i]);
+        for (int i = 0; i < lp.size(); i++)
+        {
+            outputBlobs[i].push_back(impl->getBlob(lp[i]));
        }
    }
 }
@@ -1163,16 +1298,19 @@ void Net::setPreferableBackend(int backendId)
    impl->preferableBackend = backendId;
 }
-void Net::setNetInputs(const std::vector<String> &inputBlobNames)
+void Net::setInputsNames(const std::vector<String> &inputBlobNames)
 {
    impl->netInputLayer->setNames(inputBlobNames);
 }
-void Net::setBlob(String outputName, const Mat &blob_)
+void Net::setInput(const Mat &blob_, const String& name)
 {
-    LayerPin pin = impl->getPinByAlias(outputName);
+    LayerPin pin;
+    pin.lid = 0;
+    pin.oid = impl->resolvePinOutputName(impl->getLayerData(pin.lid), name);
    if (!pin.valid())
-        CV_Error(Error::StsObjectNotFound, "Requested blob \"" + outputName + "\" not found");
+        CV_Error(Error::StsObjectNotFound, "Requested blob \"" + name + "\" not found");
    LayerData &ld = impl->layers[pin.lid];
    ld.outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
@@ -1186,21 +1324,6 @@ void Net::setBlob(String outputName, const Mat &blob_)
    impl->netWasAllocated = impl->netWasAllocated && oldShape;
 }
-Mat Net::getBlob(String outputName)
-{
-    LayerPin pin = impl->getPinByAlias(outputName);
-    if (!pin.valid())
-        CV_Error(Error::StsObjectNotFound, "Requested blob \"" + outputName + "\" not found");
-    LayerData &ld = impl->layers[pin.lid];
-    if ((size_t)pin.oid >= ld.outputBlobs.size())
-    {
-        CV_Error(Error::StsOutOfRange, "Layer \"" + ld.name + "\" produce only " + toString(ld.outputBlobs.size()) +
-                                       " outputs, the #" + toString(pin.oid) + " was requsted");
-    }
-    return ld.outputBlobs[pin.oid];
-}
 Mat Net::getParam(LayerId layer, int numParam)
 {
    LayerData &ld = impl->getLayerData(layer);
@@ -1516,6 +1639,11 @@ void Net::getMemoryConsumption(const MatShape& netInputShape, std::vector<int>&
                         weights, blobs);
 }
+void Net::setHalideScheduler(const String& scheduler)
+{
+    impl->halideConfigFile = scheduler;
+}
 //////////////////////////////////////////////////////////////////////////
 Importer::~Importer() {}

--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -703,7 +703,7 @@ void TFImporter::populateNet(Net dstNet)
            std::vector<String> netInputs(1);
            netInputs[0] = name;
            layer_id[name] = 0;
-            dstNet.setNetInputs(netInputs);
+            dstNet.setInputsNames(netInputs);
        }
        else if (type == "Split") {
            // TODO: determing axis index remapping by input dimensions order of input blob

--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -824,13 +824,20 @@ struct TorchImporter : public ::cv::dnn::Importer
                mergeParams.set("axis", module->params.get<int>("dimension") - 1);
                splitId = net.addLayer(generateLayerName("torchSplit"), "Split", splitParams);
-                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
                net.connect(prevLayerId, prevOutNum, splitId, 0);
+                std::vector<int> branchIds;
                for (int i = 0; i < (int)module->modules.size(); i++)
                {
                    newId = fill(module->modules[i], addedModules, splitId, i);
-                    net.connect(newId, 0, mergeId, i);
+                    branchIds.push_back(newId);
+                }
+                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
+                for (int i = 0; i < branchIds.size(); i++)
+                {
+                    net.connect(branchIds[i], 0, mergeId, i);
                }
                addedModules.push_back(std::make_pair(mergeId, module));
@@ -847,15 +854,22 @@ struct TorchImporter : public ::cv::dnn::Importer
                reshapeParams.set("num_axes", 1);
                splitId = net.addLayer(generateLayerName("torchSplit"), "Slice", splitParams);
-                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
                reshapeId = net.addLayer(generateLayerName("torchReshape"), "Reshape", reshapeParams);
                net.connect(prevLayerId, prevOutNum, splitId, 0);
+                std::vector<int> branchIds;
                for (int i = 0; i < (int)module->modules.size(); i++)
                {
                    net.connect(splitId, i, reshapeId, i);
                    newId = fill(module->modules[i], addedModules, reshapeId, i);
-                    net.connect(newId, 0, mergeId, i);
+                    branchIds.push_back(newId);
+                }
+                mergeId = net.addLayer(generateLayerName("torchMerge"), "Concat", mergeParams);
+                for (int i = 0; i < branchIds.size(); i++)
+                {
+                    net.connect(branchIds[i], 0, mergeId, i);
                }
                addedModules.push_back(std::make_pair(mergeId, module));

--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -94,10 +94,8 @@ TEST(Reproducibility_AlexNet, Accuracy)
    if (sample.size() != inputSize)
        resize(sample, sample, inputSize);
-    net.setBlob(".data", blobFromImage(sample, 1.));
+    net.setInput(blobFromImage(sample, 1.), "data");
-    net.forward();
+    Mat out = net.forward("prob");
-    Mat out = net.getBlob("prob");
    Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy"));
    normAssert(ref, out);
 }
@@ -125,10 +123,8 @@ TEST(Reproducibility_FCN, Accuracy)
    std::vector<size_t> weights, blobs;
    net.getMemoryConsumption(shape(1,3,227,227), layerIds, weights, blobs);
-    net.setBlob(".data", blobFromImage(sample, 1.));
+    net.setInput(blobFromImage(sample, 1.), "data");
-    net.forward();
+    Mat out = net.forward("score");
-    Mat out = net.getBlob("score");
    Mat ref = blobFromNPY(_tf("caffe_fcn8s_prob.npy"));
    normAssert(ref, out);
 }
@@ -155,9 +151,8 @@ TEST(Reproducibility_SSD, Accuracy)
    resize(sample, sample, Size(300, 300));
    Mat in_blob = blobFromImage(sample);
-    net.setBlob(".data", in_blob);
+    net.setInput(in_blob, "data");
-    net.forward();
+    Mat out = net.forward("detection_out");
-    Mat out = net.getBlob("detection_out");
    Mat ref = blobFromNPY(_tf("ssd_out.npy"));
    normAssert(ref, out);

--- a/modules/dnn/test/test_googlenet.cpp
+++ b/modules/dnn/test/test_googlenet.cpp
@@ -72,12 +72,30 @@ static void launchGoogleNetTest()
    inpMats.push_back( imread(_tf("googlenet_1.png")) );
    ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
-    net.setBlob(".data", blobFromImages(inpMats));
+    net.setInput(blobFromImages(inpMats), "data");
-    net.forward();
+    Mat out = net.forward("prob");
-    Mat out = net.getBlob("prob");
    Mat ref = blobFromNPY(_tf("googlenet_prob.npy"));
    normAssert(out, ref);
+    std::vector<String> blobsNames;
+    blobsNames.push_back("conv1/7x7_s2");
+    blobsNames.push_back("conv1/relu_7x7");
+    blobsNames.push_back("inception_4c/1x1");
+    blobsNames.push_back("inception_4c/relu_1x1");
+    std::vector<Mat> outs;
+    Mat in = blobFromImage(inpMats[0]);
+    net.setInput(in, "data");
+    net.forward(outs, blobsNames);
+    CV_Assert(outs.size() == blobsNames.size());
+    for (int i = 0; i < blobsNames.size(); i++)
+    {
+        std::string filename = blobsNames[i];
+        std::replace( filename.begin(), filename.end(), '/', '#');
+        Mat ref = blobFromNPY(_tf("googlenet_" + filename + ".npy"));
+        normAssert(outs[i], ref, "", 1E-4, 1E-2);
+    }
 }
 TEST(Reproducibility_GoogLeNet, Accuracy)

--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -118,9 +118,8 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
    Mat inp = blobFromNPY(inpfile);
    Mat ref = blobFromNPY(outfile);
-    net.setBlob(".input", inp);
+    net.setInput(inp, "input");
-    net.forward();
+    Mat out = net.forward("output");
-    Mat out = net.getBlob("output");
    normAssert(ref, out);
 }
@@ -244,9 +243,8 @@ static void test_Reshape_Split_Slice_layers()
    RNG rng(0);
    rng.fill(input, RNG::UNIFORM, -1, 1);
-    net.setBlob(".input", input);
+    net.setInput(input, "input");
-    net.forward();
+    Mat output = net.forward("output");
-    Mat output = net.getBlob("output");
    normAssert(input, output);
 }

--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -42,10 +42,9 @@ TEST(Test_TensorFlow, read_inception)
    Mat inputBlob = blobFromImage(input, 1.);
-    net.setBlob("_input.input", inputBlob);
+    net.setInput(inputBlob, "input");
-    net.forward();
+    Mat out = net.forward("softmax2");
-    Mat out = net.getBlob("softmax2");
    std::cout << out.dims << std::endl;
 }
@@ -64,10 +63,9 @@ TEST(Test_TensorFlow, inception_accuracy)
    resize(sample, sample, Size(224, 224));
    Mat inputBlob = blobFromImage(sample, 1.);
-    net.setBlob(".input", inputBlob);
+    net.setInput(inputBlob, "input");
-    net.forward();
+    Mat out = net.forward("softmax2");
-    Mat out = net.getBlob("softmax2");
    Mat ref = blobFromNPY(_tf("tf_inception_prob.npy"));
    normAssert(ref, out);

--- a/modules/dnn/test/test_torch_importer.cpp
+++ b/modules/dnn/test/test_torch_importer.cpp
@@ -87,17 +87,17 @@ static void runTorchNet(String prefix, String outLayerName = "",
    ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
    ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
-    net.setBlob(".0", inp);
-    net.forward();
    if (outLayerName.empty())
        outLayerName = net.getLayerNames().back();
-    Mat out = net.getBlob(outLayerName);
-    normAssert(outRef, out);
+    net.setInput(inp, "0");
+    std::vector<Mat> outBlobs;
+    net.forward(outBlobs, outLayerName);
+    normAssert(outRef, outBlobs[0]);
    if (check2ndBlob)
    {
-        Mat out2 = net.getBlob(outLayerName + ".1");
+        Mat out2 = outBlobs[1];
        Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
        normAssert(out2, ref2);
    }
@@ -132,12 +132,12 @@ TEST(Torch_Importer, run_linear)
 TEST(Torch_Importer, run_paralel)
 {
-    runTorchNet("net_parallel", "l2_torchMerge");
+    runTorchNet("net_parallel", "l5_torchMerge");
 }
 TEST(Torch_Importer, run_concat)
 {
-    runTorchNet("net_concat", "l2_torchMerge");
+    runTorchNet("net_concat", "l5_torchMerge");
 }
 TEST(Torch_Importer, run_deconv)
@@ -185,14 +185,21 @@ TEST(Torch_Importer, ENet_accuracy)
    Mat sample = imread(_tf("street.png", false));
    Mat inputBlob = blobFromImage(sample, 1./255);
-    net.setBlob("", inputBlob);
+    net.setInput(inputBlob, "");
-    net.forward();
+    Mat out = net.forward();
-    Mat out = net.getBlob(net.getLayerNames().back());
    Mat ref = blobFromNPY(_tf("torch_enet_prob.npy", false));
    // Due to numerical instability in Pooling-Unpooling layers (indexes jittering)
    // thresholds for ENet must be changed. Accuracy of resuults was checked on
    // Cityscapes dataset and difference in mIOU with Torch is 10E-4%
    normAssert(ref, out, "", 0.00044, 0.44);
+    const int N = 3;
+    for (int i = 0; i < N; i++)
+    {
+        net.setInput(inputBlob, "");
+        Mat out = net.forward();
+        normAssert(ref, out, "", 0.00044, 0.44);
+    }
 }
 }

--- a/modules/tracking/src/gtrTracker.cpp
+++ b/modules/tracking/src/gtrTracker.cpp
@@ -172,11 +172,10 @@ bool TrackerGOTURNImpl::updateImpl(const Mat& image, Rect2d& boundingBox)
    Mat targetBlob = dnn::blobFromImage(targetPatch);
    Mat searchBlob = dnn::blobFromImage(searchPatch);
-    net.setBlob(".data1", targetBlob);
+    net.setInput(targetBlob, ".data1");
-    net.setBlob(".data2", searchBlob);
+    net.setInput(searchBlob, ".data2");
-    net.forward();
+    Mat resMat = net.forward("scale").reshape(1, 1);
-    Mat resMat = net.getBlob("scale").reshape(1, 1);
    curBB.x = targetPatchRect.x + (resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width;
    curBB.y = targetPatchRect.y + (resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height;