Custom deep learning layers in Python

d5b95632 · Dmitry Kurtaev · ca1975ca · d5b95632 · d5b95632 · d5b95632
Commit d5b95632 authored Apr 25, 2018 by Dmitry Kurtaev
5 changed files
--- a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
+++ b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
@@ -190,3 +190,37 @@ In our case resize's output shape will be stored in layer's `blobs[0]`.
 Next we register a layer and try to import the model.
 @snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
+## Define a custom layer in Python
+The following example shows how to customize OpenCV's layers in Python.
+Let's consider [Holistically-Nested Edge Detection](https://arxiv.org/abs/1504.06375)
+deep learning model. That was trained with one and only difference comparing to
+a current version of [Caffe framework](http://caffe.berkeleyvision.org/). `Crop`
+layers that receive two input blobs and crop the first one to match spatial dimensions
+of the second one used to crop from the center. Nowadays Caffe's layer does it
+from the top-left corner. So using the latest version of Caffe or OpenCV you'll
+get shifted results with filled borders.
+Next we're going to replace OpenCV's `Crop` layer that makes top-left cropping by
+a centric one.
+- Create a class with `getMemoryShapes` and `forward` methods
+@snippet dnn/edge_detection.py CropLayer
+@note Both methods should return lists.
+- Register a new layer.
+@snippet dnn/edge_detection.py Register
+That's it! We've replaced an implemented OpenCV's layer to a custom one.
+You may find a full script in the [source code](https://github.com/opencv/opencv/tree/master/samples/dnn/edge_detection.py).
+<table border="0">
+<tr>
+<td>![](js_tutorials/js_assets/lena.jpg)</td>
+<td>![](images/lena_hed.jpg)</td>
+</tr>
+</table>
--- a/doc/tutorials/dnn/images/lena_hed.jpg
+++ b/doc/tutorials/dnn/images/lena_hed.jpg
--- a/modules/dnn/misc/python/pyopencv_dnn.hpp
+++ b/modules/dnn/misc/python/pyopencv_dnn.hpp
@@ -40,4 +40,182 @@ bool pyopencv_to(PyObject *o, std::vector<Mat> &blobs, const char *name) //requi
  return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
 }
-#endif
+template<typename T>
+PyObject* pyopencv_from(const dnn::DictValue &dv)
+{
+    if (dv.size() > 1)
+    {
+        std::vector<T> vec(dv.size());
+        for (int i = 0; i < dv.size(); ++i)
+            vec[i] = dv.get<T>(i);
+        return pyopencv_from_generic_vec(vec);
+    }
+    else
+        return pyopencv_from(dv.get<T>());
+}
+template<>
+PyObject* pyopencv_from(const dnn::DictValue &dv)
+{
+    if (dv.isInt()) return pyopencv_from<int>(dv);
+    if (dv.isReal()) return pyopencv_from<float>(dv);
+    if (dv.isString()) return pyopencv_from<String>(dv);
+    CV_Error(Error::StsNotImplemented, "Unknown value type");
+    return NULL;
+}
+template<>
+PyObject* pyopencv_from(const dnn::LayerParams& lp)
+{
+    PyObject* dict = PyDict_New();
+    for (std::map<String, dnn::DictValue>::const_iterator it = lp.begin(); it != lp.end(); ++it)
+    {
+        CV_Assert(!PyDict_SetItemString(dict, it->first.c_str(), pyopencv_from(it->second)));
+    }
+    return dict;
+}
+class pycvLayer CV_FINAL : public dnn::Layer
+{
+public:
+    pycvLayer(const dnn::LayerParams &params, PyObject* pyLayer) : Layer(params)
+    {
+        PyGILState_STATE gstate;
+        gstate = PyGILState_Ensure();
+        PyObject* args = PyTuple_New(2);
+        CV_Assert(!PyTuple_SetItem(args, 0, pyopencv_from(params)));
+        CV_Assert(!PyTuple_SetItem(args, 1, pyopencv_from(params.blobs)));
+        o = PyObject_CallObject(pyLayer, args);
+        Py_DECREF(args);
+        PyGILState_Release(gstate);
+        if (!o)
+            CV_Error(Error::StsError, "Failed to create an instance of custom layer");
+    }
+    static void registerLayer(const std::string& type, PyObject* o)
+    {
+        std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
+        if (it != pyLayers.end())
+            it->second.push_back(o);
+        else
+            pyLayers[type] = std::vector<PyObject*>(1, o);
+    }
+    static void unregisterLayer(const std::string& type)
+    {
+        std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
+        if (it != pyLayers.end())
+        {
+            if (it->second.size() > 1)
+                it->second.pop_back();
+            else
+                pyLayers.erase(it);
+        }
+    }
+    static Ptr<dnn::Layer> create(dnn::LayerParams &params)
+    {
+        std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(params.type);
+        if (it == pyLayers.end())
+            CV_Error(Error::StsNotImplemented, "Layer with a type \"" + params.type +
+                                               "\" is not implemented");
+        CV_Assert(!it->second.empty());
+        return Ptr<dnn::Layer>(new pycvLayer(params, it->second.back()));
+    }
+    virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
+                                 const int,
+                                 std::vector<std::vector<int> > &outputs,
+                                 std::vector<std::vector<int> > &) const CV_OVERRIDE
+    {
+        PyGILState_STATE gstate;
+        gstate = PyGILState_Ensure();
+        PyObject* args = PyList_New(inputs.size());
+        for(size_t i = 0; i < inputs.size(); ++i)
+            PyList_SET_ITEM(args, i, pyopencv_from_generic_vec(inputs[i]));
+        PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("getMemoryShapes"), args, NULL);
+        Py_DECREF(args);
+        PyGILState_Release(gstate);
+        if (!res)
+            CV_Error(Error::StsNotImplemented, "Failed to call \"getMemoryShapes\" method");
+        pyopencv_to_generic_vec(res, outputs, ArgInfo("", 0));
+        return false;
+    }
+    virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &) CV_OVERRIDE
+    {
+        PyGILState_STATE gstate;
+        gstate = PyGILState_Ensure();
+        std::vector<Mat> inps(inputs.size());
+        for (size_t i = 0; i < inputs.size(); ++i)
+            inps[i] = *inputs[i];
+        PyObject* args = pyopencv_from(inps);
+        PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("forward"), args, NULL);
+        Py_DECREF(args);
+        PyGILState_Release(gstate);
+        if (!res)
+            CV_Error(Error::StsNotImplemented, "Failed to call \"forward\" method");
+        std::vector<Mat> pyOutputs;
+        pyopencv_to(res, pyOutputs, ArgInfo("", 0));
+        CV_Assert(pyOutputs.size() == outputs.size());
+        for (size_t i = 0; i < outputs.size(); ++i)
+        {
+            CV_Assert(pyOutputs[i].size == outputs[i].size);
+            CV_Assert(pyOutputs[i].type() == outputs[i].type());
+            pyOutputs[i].copyTo(outputs[i]);
+        }
+    }
+    virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
+    {
+        CV_Error(Error::StsNotImplemented, "");
+    }
+private:
+    // Map layers types to python classes.
+    static std::map<std::string, std::vector<PyObject*> > pyLayers;
+    PyObject* o;  // Instance of implemented python layer.
+};
+std::map<std::string, std::vector<PyObject*> > pycvLayer::pyLayers;
+static PyObject *pyopencv_cv_dnn_registerLayer(PyObject*, PyObject *args, PyObject *kw)
+{
+    const char *keywords[] = { "type", "class", NULL };
+    char* layerType;
+    PyObject *classInstance;
+    if (!PyArg_ParseTupleAndKeywords(args, kw, "sO", (char**)keywords, &layerType, &classInstance))
+        return NULL;
+    if (!PyCallable_Check(classInstance)) {
+        PyErr_SetString(PyExc_TypeError, "class must be callable");
+        return NULL;
+    }
+    pycvLayer::registerLayer(layerType, classInstance);
+    dnn::LayerFactory::registerLayer(layerType, pycvLayer::create);
+    Py_RETURN_NONE;
+}
+static PyObject *pyopencv_cv_dnn_unregisterLayer(PyObject*, PyObject *args, PyObject *kw)
+{
+    const char *keywords[] = { "type", NULL };
+    char* layerType;
+    if (!PyArg_ParseTupleAndKeywords(args, kw, "s", (char**)keywords, &layerType))
+        return NULL;
+    pycvLayer::unregisterLayer(layerType);
+    dnn::LayerFactory::unregisterLayer(layerType);
+    Py_RETURN_NONE;
+}
+#endif  // HAVE_OPENCV_DNN
--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@@ -1783,6 +1783,10 @@ static PyMethodDef special_methods[] = {
  {"createTrackbar", pycvCreateTrackbar, METH_VARARGS, "createTrackbar(trackbarName, windowName, value, count, onChange) -> None"},
  {"createButton", (PyCFunction)pycvCreateButton, METH_VARARGS | METH_KEYWORDS, "createButton(buttonName, onChange [, userData, buttonType, initialButtonState]) -> None"},
  {"setMouseCallback", (PyCFunction)pycvSetMouseCallback, METH_VARARGS | METH_KEYWORDS, "setMouseCallback(windowName, onMouse [, param]) -> None"},
+#endif
+#ifdef HAVE_OPENCV_DNN
+  {"dnn_registerLayer", (PyCFunction)pyopencv_cv_dnn_registerLayer, METH_VARARGS | METH_KEYWORDS, "registerLayer(type, class) -> None"},
+  {"dnn_unregisterLayer", (PyCFunction)pyopencv_cv_dnn_unregisterLayer, METH_VARARGS | METH_KEYWORDS, "unregisterLayer(type) -> None"},
 #endif
  {NULL, NULL},
 };

--- a/samples/dnn/edge_detection.py
+++ b/samples/dnn/edge_detection.py
+import cv2 as cv
+import argparse
+parser = argparse.ArgumentParser(
+        description='This sample shows how to define custom OpenCV deep learning layers in Python. '
+                    'Holistically-Nested Edge Detection (https://arxiv.org/abs/1504.06375) neural network '
+                    'is used as an example model. Find a pre-trained model at https://github.com/s9xie/hed.')
+parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
+parser.add_argument('--prototxt', help='Path to deploy.prototxt', required=True)
+parser.add_argument('--caffemodel', help='Path to hed_pretrained_bsds.caffemodel', required=True)
+parser.add_argument('--width', help='Resize input image to a specific width', default=500, type=int)
+parser.add_argument('--height', help='Resize input image to a specific height', default=500, type=int)
+args = parser.parse_args()
+#! [CropLayer]
+class CropLayer(object):
+    def __init__(self, params, blobs):
+        self.xstart = 0
+        self.xend = 0
+        self.ystart = 0
+        self.yend = 0
+    # Our layer receives two inputs. We need to crop the first input blob
+    # to match a shape of the second one (keeping batch size and number of channels)
+    def getMemoryShapes(self, inputs):
+        inputShape, targetShape = inputs[0], inputs[1]
+        batchSize, numChannels = inputShape[0], inputShape[1]
+        height, width = targetShape[2], targetShape[3]
+        self.ystart = (inputShape[2] - targetShape[2]) / 2
+        self.xstart = (inputShape[3] - targetShape[3]) / 2
+        self.yend = self.ystart + height
+        self.xend = self.xstart + width
+        return [[batchSize, numChannels, height, width]]
+    def forward(self, inputs):
+        return [inputs[0][:,:,self.ystart:self.yend,self.xstart:self.xend]]
+#! [CropLayer]
+#! [Register]
+cv.dnn_registerLayer('Crop', CropLayer)
+#! [Register]
+# Load the model.
+net = cv.dnn.readNet(args.prototxt, args.caffemodel)
+kWinName = 'Holistically-Nested Edge Detection'
+cv.namedWindow('Input', cv.WINDOW_NORMAL)
+cv.namedWindow(kWinName, cv.WINDOW_NORMAL)
+cap = cv.VideoCapture(args.input if args.input else 0)
+while cv.waitKey(1) < 0:
+    hasFrame, frame = cap.read()
+    if not hasFrame:
+        cv.waitKey()
+        break
+    cv.imshow('Input', frame)
+    inp = cv.dnn.blobFromImage(frame, scalefactor=1.0, size=(args.width, args.height),
+                               mean=(104.00698793, 116.66876762, 122.67891434),
+                               swapRB=False, crop=False)
+    net.setInput(inp)
+    out = net.forward()
+    out = out[0, 0]
+    out = cv.resize(out, (frame.shape[1], frame.shape[0]))
+    cv.imshow(kWinName, out)