Implemented global_pooling

66c728ae · Anna Petrovicheva · 42b9f08f · 66c728ae · 66c728ae · 66c728ae
Commit 66c728ae authored Jul 19, 2016 by Anna Petrovicheva
6 changed files
--- a/modules/dnn/src/caffe/caffe.proto
+++ b/modules/dnn/src/caffe/caffe.proto
@@ -142,8 +142,6 @@ message DetectionOutputParameter {
  // Background label id. If there is no background class,
  // set it as -1.
  optional int32 background_label_id = 3 [default = 0];
-  // Parameters used for non maximum suppression.
-  optional NonMaximumSuppressionParameter nms_param = 4;
  // Type of coding method for bbox.
  optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
  // If true, variance is encoded in target; otherwise we need to adjust the
@@ -155,17 +153,11 @@ message DetectionOutputParameter {
  // Only consider detections whose confidences are larger than a threshold.
  // If not provided, consider all boxes.
  optional float confidence_threshold = 9;
-  // If true, visualize the detection results.
+  // Parameters used for non maximum suppression.
-  optional bool visualize = 10 [default = false];
-  // The threshold used to visualize the detection results.
-  optional float visualize_threshold = 11;
-}
-message NonMaximumSuppressionParameter {
  // Threshold to be used in nms.
-  optional float nms_threshold = 1 [default = 0.3];
+  optional float nms_threshold = 10 [default = 0.3];
  // Maximum number of results to be kept.
-  optional int32 top_k = 2;
+  optional int32 top_k = 11;
 }
 message Datum {
@@ -605,6 +597,12 @@ message ConvolutionParameter {
    CUDNN = 2;
  }
  optional Engine engine = 15 [default = DEFAULT];
+  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
+  // holes. (Kernel dilation is sometimes referred to by its use in the
+  // algorithme à trous from Holschneider et al. 1987.)
+  optional uint32 dilation_h = 18; // The dilation height
+  optional uint32 dilation_w = 19; // The dilation width
+  optional uint32 dilation = 20; // The dilation; defaults to 1
 }
 message DataParameter {

--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -53,7 +53,7 @@ namespace dnn
 {
    ConvolutionLayer::ConvolutionLayer(LayerParams &params) : Layer(params)
    {
-        getKernelParams(params, kerH, kerW, padH, padW, strideH, strideW, dilationH, dilationW);
+        getConvolutionKernelParams(params, kerH, kerW, padH, padW, strideH, strideW, dilationH, dilationW);
        numOutput = params.get<int>("num_output");
        bias = params.get<bool>("bias_term", true);

--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
@@ -46,53 +46,101 @@ namespace cv
 namespace dnn
 {
-void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW)
+namespace util
 {
-    if (params.has("kernel_h") && params.has("kernel_w"))
+std::string makeName(const std::string& str1, const std::string& str2)
-    {
+{
-        kernelH = params.get<int>("kernel_h");
+    return str1 + str2;
-        kernelW = params.get<int>("kernel_w");
+}
-    }
-    else if (params.has("kernel_size"))
+bool getParameter(LayerParams &params, const std::string& nameBase, const std::string& nameAll, int &parameterH, int &parameterW, bool hasDefault = false, const int& defaultValue = 0)
-    {
+{
-        kernelH = kernelW = params.get<int>("kernel_size");
+    std::string nameH = makeName(nameBase, std::string("_h"));
-    }
+    std::string nameW = makeName(nameBase, std::string("_w"));
-    else
+    std::string nameAll_ = nameAll;
+    if(nameAll_ == "")
    {
-        CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
+        nameAll_ = nameBase;
    }
-    if (params.has("pad_h") && params.has("pad_w"))
+    if (params.has(nameH) && params.has(nameW))
    {
-        padH = params.get<int>("pad_h");
+        parameterH = params.get<int>(nameH);
-        padW = params.get<int>("pad_w");
+        parameterW = params.get<int>(nameW);
+        return true;
    }
    else
    {
-        padH = padW = params.get<int>("pad", 0);
+        if (params.has(nameAll_))
+        {
+            if(hasDefault)
+            {
+                parameterH = parameterW = params.get<int>(nameAll_, defaultValue);
+            }
+            else
+            {
+                parameterH = parameterW = params.get<int>(nameAll_);
+            }
+            return true;
+        }
+        else
+        {
+            return false;
+        }
    }
+}
-    if (params.has("stride_h") && params.has("stride_w"))
+void getKernelSize(LayerParams &params, int &kernelH, int &kernelW)
-    {
+{
-        strideH = params.get<int>("stride_h");
+    if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW))
-        strideW = params.get<int>("stride_w");
-    }
-    else
    {
-        strideH = strideW = params.get<int>("stride", 1);
+        CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
    }
-    if (params.has("dilation_h") && params.has("dilation_w"))
+    CV_Assert(kernelH > 0 && kernelW > 0);
+}
+void getStrideAndPadding(LayerParams &params, int &padH, int &padW, int &strideH, int &strideW)
+{
+    util::getParameter(params, "pad", "pad", padH, padW, true, 0);
+    util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
+    CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
+}
+}
+void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW)
+{
+    util::getStrideAndPadding(params, padH, padW, strideH, strideW);
+    globalPooling = params.has("global_pooling");
+    if (globalPooling)
    {
-        dilationH = params.get<int>("dilation_h");
+        if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size"))
-        dilationW = params.get<int>("dilation_w");
+        {
+            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
+        }
+        if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1)
+        {
+            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1");
+        }
    }
    else
    {
-        dilationH = dilationW = params.get<int>("dilation", 1);
+        util::getKernelSize(params, kernelH, kernelW);
    }
+}
+void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW)
+{
+    util::getKernelSize(params, kernelH, kernelW);
+    util::getStrideAndPadding(params, padH, padW, strideH, strideW);
+    util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
-    CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0 && dilationH > 0 && dilationW > 0);
+    CV_Assert(dilationH > 0 && dilationW > 0);
 }
 }

--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@@ -48,7 +48,9 @@ namespace cv
 namespace dnn
 {
-void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW);
+void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW);
+void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW);
 }
 }

--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -72,8 +72,7 @@ namespace dnn
            type = MAX;
        }
-        int defaultDilation = 1;
+        getPoolingKernelParams(params, kernelH, kernelW, globalPooling, padH, padW, strideH, strideW);
-        getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW, defaultDilation, defaultDilation);
    }
    void PoolingLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
@@ -82,6 +81,13 @@ namespace dnn
        inpW = inputs[0]->cols();
        inpH = inputs[0]->rows();
+        if(globalPooling)
+        {
+            kernelH = inpH;
+            kernelW = inpW;
+        }
        computeOutputShape(inpH, inpW);
        outputs.resize(inputs.size());

--- a/modules/dnn/src/layers/pooling_layer.hpp
+++ b/modules/dnn/src/layers/pooling_layer.hpp
@@ -60,6 +60,7 @@ namespace dnn
        int padH, padW;
        int strideH, strideW;
        int kernelH, kernelW;
+        bool globalPooling;
        int inpH, inpW;
        int outH, outW;