Merge pull request #10255 from dkurt:dnn_roi_pooling

f2070c9f · Alexander Alekhin · 34e414dc · 17dcf0e8 · f2070c9f · f2070c9f
Commit f2070c9f authored Dec 08, 2017 by Alexander Alekhin
7 changed files
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -242,7 +242,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        {
            MAX,
            AVE,
-            STOCHASTIC
+            STOCHASTIC,
+            ROI
        };
        int type;
@@ -251,6 +252,9 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
        bool computeMaxIdx;
        String padMode;
        bool ceilMode;
+        // ROIPooling parameters.
+        Size pooledSize;
+        float spatialScale;
        static Ptr<PoolingLayer> create(const LayerParams& params);
    };

--- a/modules/dnn/misc/caffe/opencv-caffe.pb.cc
+++ b/modules/dnn/misc/caffe/opencv-caffe.pb.cc
--- a/modules/dnn/misc/caffe/opencv-caffe.pb.h
+++ b/modules/dnn/misc/caffe/opencv-caffe.pb.h
--- a/modules/dnn/src/caffe/opencv-caffe.proto
+++ b/modules/dnn/src/caffe/opencv-caffe.proto
@@ -552,6 +552,7 @@ message LayerParameter {
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
+  optional ROIPoolingParameter roi_pooling_param = 8266711;  // https://github.com/rbgirshick/caffe-fast-rcnn/tree/fast-rcnn
  optional ScaleParameter scale_param = 142;
  optional SigmoidParameter sigmoid_param = 124;
  optional SoftmaxParameter softmax_param = 125;
@@ -1605,3 +1606,15 @@ message NormalizedBBox {
  optional float score = 7;
  optional float size = 8;
 }
+// origin: https://github.com/rbgirshick/caffe-fast-rcnn/tree/fast-rcnn
+// Message that stores parameters used by ROIPoolingLayer
+message ROIPoolingParameter {
+  // Pad, kernel size, and stride are all given as a single value for equal
+  // dimensions in height and width or as Y, X pairs.
+  optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
+  optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
+  // Multiplicative spatial scale factor to translate ROI coords from their
+  // input scale to the scale used when pooling
+  optional float spatial_scale = 3 [default = 1];
+}
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -88,6 +88,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(Convolution,    ConvolutionLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Deconvolution,  DeconvolutionLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Pooling,        PoolingLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(ROIPooling,     PoolingLayer);
    CV_DNN_REGISTER_LAYER_CLASS(LRN,            LRNLayer);
    CV_DNN_REGISTER_LAYER_CLASS(InnerProduct,   InnerProductLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Softmax,        SoftmaxLayer);

--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -560,4 +560,20 @@ TEST(Layer_Test_Reorg, Accuracy)
    testLayerUsingDarknetModels("reorg", false, false);
 }
+TEST(Layer_Test_ROIPooling, Accuracy)
+{
+    Net net = readNetFromCaffe(_tf("net_roi_pooling.prototxt"));
+    Mat inp = blobFromNPY(_tf("net_roi_pooling.input.npy"));
+    Mat rois = blobFromNPY(_tf("net_roi_pooling.rois.npy"));
+    Mat ref = blobFromNPY(_tf("net_roi_pooling.npy"));
+    net.setInput(inp, "input");
+    net.setInput(rois, "rois");
+    Mat out = net.forward();
+    normAssert(out, ref);
+}
 }