Merge branch 'object_detection_sample_ssd' into dnn*

# Conflicts: # modules/dnn/include/opencv2/dnn/blob.hpp # modules/dnn/src/init.cpp # modules/dnn/src/layers/concat_layer.cpp # modules/dnn/src/layers/convolution_layer.cpp # modules/dnn/src/layers/convolution_layer.hpp # modules/dnn/src/layers/layers_common.cpp # modules/dnn/src/layers/layers_common.hpp # modules/dnn/src/layers/op_im2col.hpp # modules/dnn/src/layers/pooling_layer.cpp # modules/dnn/src/layers/pooling_layer.hpp

Merge branch 'object_detection_sample_ssd' into dnn*
# Conflicts: # modules/dnn/include/opencv2/dnn/blob.hpp # modules/dnn/src/init.cpp # modules/dnn/src/layers/concat_layer.cpp # modules/dnn/src/layers/convolution_layer.cpp # modules/dnn/src/layers/convolution_layer.hpp # modules/dnn/src/layers/layers_common.cpp # modules/dnn/src/layers/layers_common.hpp # modules/dnn/src/layers/op_im2col.hpp # modules/dnn/src/layers/pooling_layer.cpp # modules/dnn/src/layers/pooling_layer.hpp
8e68d837 · Vitaliy Lyudvichenko · 266692e1 · e88b0618 · 8e68d837 · 8e68d837
Commit 8e68d837 authored Aug 17, 2016 by Vitaliy Lyudvichenko
36 changed files
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -209,21 +209,21 @@ namespace dnn
    {
    public:

-        CV_PROP_RW Size kernel, stride, pad;
+        CV_PROP_RW Size kernel, stride, pad, dilation;
    };

    class CV_EXPORTS_W ConvolutionLayer : public BaseConvolutionLayer
    {
    public:

-        static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
+        static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
    };

    class CV_EXPORTS_W DeconvolutionLayer : public BaseConvolutionLayer
    {
    public:

-        static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
+        static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
    };

    class CV_EXPORTS_W LRNLayer : public Layer
@@ -256,8 +256,10 @@ namespace dnn

        CV_PROP_RW int type;
        CV_PROP_RW Size kernel, stride, pad;
+        CV_PROP_RW bool globalPooling;

        static CV_WRAP Ptr<PoolingLayer> create(int type = PoolingLayer::MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
+        static CV_WRAP Ptr<PoolingLayer> createGlobal(int type = PoolingLayer::MAX);
    };

    class CV_EXPORTS_W SoftmaxLayer : public Layer

--- a/modules/dnn/include/opencv2/dnn/blob.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.hpp
@@ -229,6 +229,18 @@ namespace dnn
        /** @brief Checks equality of two blobs shapes. */
        bool equalShape(const Blob &other) const;

+        /** @brief Returns slice of first two dimensions.
+         *  @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
+         */
+        Mat getPlane(int n, int cn);
+
+        /** @brief Returns slice of first dimension.
+         *  @details The behaviour is similar to getPlane(), but returns all
+         * channels * rows * cols values, corresponding to the n-th value
+         * of the first dimension.
+         */
+        Mat getPlanes(int n);
+
        /* Shape getters of 4-dimensional blobs. */
        int cols() const;       //!< Returns size of the fourth axis blob.
        int rows() const;       //!< Returns size of the thrid  axis blob.
@@ -262,12 +274,6 @@ namespace dnn
        float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
        //TODO: add const ptr methods

-        /** @brief Returns slice of first two dimensions.
-         *  @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
-         *  @todo Method will be removed. Use slice() from shape_utils.hpp.
-         */
-        Mat getPlane(int n, int cn);
-
        /** @brief Shares data from other @p blob.
         * @returns *this
         */

--- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
@@ -456,6 +456,12 @@ inline Mat Blob::getPlane(int n, int cn)
    return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn));
 }

+inline Mat Blob::getPlanes(int n)
+{
+    CV_Assert(dims() > 3);
+    return Mat(dims() - 1, sizes() + 1, type(), ptr(n));
+}
+
 inline int Blob::cols() const
 {
    return xsize(3);

--- a/modules/dnn/include/opencv2/dnn/dict.hpp
+++ b/modules/dnn/include/opencv2/dnn/dict.hpp
@@ -112,7 +112,7 @@ class CV_EXPORTS Dict
 public:

    //! Checks a presence of the @p key in the dictionary.
-    bool has(const String &key);
+    bool has(const String &key) const;

    //! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
    DictValue *ptr(const String &key);

--- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
@@ -287,7 +287,7 @@ inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)

 /////////////////////////////////////////////////////////////////

-inline bool Dict::has(const String &key)
+inline bool Dict::has(const String &key) const
 {
    return dict.count(key) != 0;
 }

--- a/modules/dnn/samples/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
+++ b/modules/dnn/samples/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
+name: "VGG_VOC0712_SSD_300x300_deploy"
+input: "data"
+input_dim: 1
+input_dim: 3
+input_dim: 300
+input_dim: 300
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1_1"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "conv2_1"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  param {
+    lr_mult: 0
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2"
+  top: "conv3_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "pool3"
+  type: "Pooling"
+  bottom: "conv3_3"
+  top: "pool3"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3"
+  top: "conv4_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu4_3"
+  type: "ReLU"
+  bottom: "conv4_3"
+  top: "conv4_3"
+}
+layer {
+  name: "pool4"
+  type: "Pooling"
+  bottom: "conv4_3"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv5_1"
+  type: "Convolution"
+  bottom: "pool4"
+  top: "conv5_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu5_1"
+  type: "ReLU"
+  bottom: "conv5_1"
+  top: "conv5_1"
+}
+layer {
+  name: "conv5_2"
+  type: "Convolution"
+  bottom: "conv5_1"
+  top: "conv5_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu5_2"
+  type: "ReLU"
+  bottom: "conv5_2"
+  top: "conv5_2"
+}
+layer {
+  name: "conv5_3"
+  type: "Convolution"
+  bottom: "conv5_2"
+  top: "conv5_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu5_3"
+  type: "ReLU"
+  bottom: "conv5_3"
+  top: "conv5_3"
+}
+layer {
+  name: "pool5"
+  type: "Pooling"
+  bottom: "conv5_3"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "fc6"
+  type: "Convolution"
+  bottom: "pool5"
+  top: "fc6"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 1024
+    pad: 6
+    kernel_size: 3
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+    dilation: 6
+  }
+}
+layer {
+  name: "relu6"
+  type: "ReLU"
+  bottom: "fc6"
+  top: "fc6"
+}
+layer {
+  name: "fc7"
+  type: "Convolution"
+  bottom: "fc6"
+  top: "fc7"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 1024
+    kernel_size: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "relu7"
+  type: "ReLU"
+  bottom: "fc7"
+  top: "fc7"
+}
+layer {
+  name: "conv6_1"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "conv6_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_1_relu"
+  type: "ReLU"
+  bottom: "conv6_1"
+  top: "conv6_1"
+}
+layer {
+  name: "conv6_2"
+  type: "Convolution"
+  bottom: "conv6_1"
+  top: "conv6_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_relu"
+  type: "ReLU"
+  bottom: "conv6_2"
+  top: "conv6_2"
+}
+layer {
+  name: "conv7_1"
+  type: "Convolution"
+  bottom: "conv6_2"
+  top: "conv7_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_1_relu"
+  type: "ReLU"
+  bottom: "conv7_1"
+  top: "conv7_1"
+}
+layer {
+  name: "conv7_2"
+  type: "Convolution"
+  bottom: "conv7_1"
+  top: "conv7_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_relu"
+  type: "ReLU"
+  bottom: "conv7_2"
+  top: "conv7_2"
+}
+layer {
+  name: "conv8_1"
+  type: "Convolution"
+  bottom: "conv7_2"
+  top: "conv8_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_1_relu"
+  type: "ReLU"
+  bottom: "conv8_1"
+  top: "conv8_1"
+}
+layer {
+  name: "conv8_2"
+  type: "Convolution"
+  bottom: "conv8_1"
+  top: "conv8_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_relu"
+  type: "ReLU"
+  bottom: "conv8_2"
+  top: "conv8_2"
+}
+layer {
+  name: "pool6"
+  type: "Pooling"
+  bottom: "conv8_2"
+  top: "pool6"
+  pooling_param {
+    pool: AVE
+    global_pooling: true
+  }
+}
+layer {
+  name: "conv4_3_norm"
+  type: "NormalizeBBox"
+  bottom: "conv4_3"
+  top: "conv4_3_norm"
+  normalize_bbox_param {
+    across_spatial: false
+    scale_filler {
+      type: "constant"
+      value: 20
+    }
+    channel_shared: false
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc"
+  type: "Convolution"
+  bottom: "conv4_3_norm"
+  top: "conv4_3_norm_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 12
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv4_3_norm_mbox_loc"
+  top: "conv4_3_norm_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv4_3_norm_mbox_loc_perm"
+  top: "conv4_3_norm_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf"
+  type: "Convolution"
+  bottom: "conv4_3_norm"
+  top: "conv4_3_norm_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 63
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv4_3_norm_mbox_conf"
+  top: "conv4_3_norm_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv4_3_norm_mbox_conf_perm"
+  top: "conv4_3_norm_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv4_3_norm"
+  bottom: "data"
+  top: "conv4_3_norm_mbox_priorbox"
+  prior_box_param {
+    min_size: 30.0
+    aspect_ratio: 2
+    flip: true
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "fc7_mbox_loc"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "fc7_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "fc7_mbox_loc_perm"
+  type: "Permute"
+  bottom: "fc7_mbox_loc"
+  top: "fc7_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "fc7_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "fc7_mbox_loc_perm"
+  top: "fc7_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "fc7_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "fc7_mbox_conf_perm"
+  type: "Permute"
+  bottom: "fc7_mbox_conf"
+  top: "fc7_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "fc7_mbox_conf_perm"
+  top: "fc7_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "fc7_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "fc7"
+  bottom: "data"
+  top: "fc7_mbox_priorbox"
+  prior_box_param {
+    min_size: 60.0
+    max_size: 114.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv6_2"
+  top: "conv6_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv6_2_mbox_loc"
+  top: "conv6_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv6_2_mbox_loc_perm"
+  top: "conv6_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv6_2"
+  top: "conv6_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv6_2_mbox_conf"
+  top: "conv6_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv6_2_mbox_conf_perm"
+  top: "conv6_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv6_2"
+  bottom: "data"
+  top: "conv6_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 114.0
+    max_size: 168.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv7_2"
+  top: "conv7_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv7_2_mbox_loc"
+  top: "conv7_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv7_2_mbox_loc_perm"
+  top: "conv7_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv7_2"
+  top: "conv7_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv7_2_mbox_conf"
+  top: "conv7_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv7_2_mbox_conf_perm"
+  top: "conv7_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv7_2"
+  bottom: "data"
+  top: "conv7_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 168.0
+    max_size: 222.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv8_2"
+  top: "conv8_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv8_2_mbox_loc"
+  top: "conv8_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv8_2_mbox_loc_perm"
+  top: "conv8_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv8_2"
+  top: "conv8_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv8_2_mbox_conf"
+  top: "conv8_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv8_2_mbox_conf_perm"
+  top: "conv8_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv8_2"
+  bottom: "data"
+  top: "conv8_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 222.0
+    max_size: 276.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "pool6_mbox_loc"
+  type: "Convolution"
+  bottom: "pool6"
+  top: "pool6_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "pool6_mbox_loc_perm"
+  type: "Permute"
+  bottom: "pool6_mbox_loc"
+  top: "pool6_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "pool6_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "pool6_mbox_loc_perm"
+  top: "pool6_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "pool6_mbox_conf"
+  type: "Convolution"
+  bottom: "pool6"
+  top: "pool6_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "pool6_mbox_conf_perm"
+  type: "Permute"
+  bottom: "pool6_mbox_conf"
+  top: "pool6_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "pool6_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "pool6_mbox_conf_perm"
+  top: "pool6_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "pool6_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "pool6"
+  bottom: "data"
+  top: "pool6_mbox_priorbox"
+  prior_box_param {
+    min_size: 276.0
+    max_size: 330.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: true
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+  }
+}
+layer {
+  name: "mbox_loc"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_loc_flat"
+  bottom: "fc7_mbox_loc_flat"
+  bottom: "conv6_2_mbox_loc_flat"
+  bottom: "conv7_2_mbox_loc_flat"
+  bottom: "conv8_2_mbox_loc_flat"
+  bottom: "pool6_mbox_loc_flat"
+  top: "mbox_loc"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mbox_conf"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_conf_flat"
+  bottom: "fc7_mbox_conf_flat"
+  bottom: "conv6_2_mbox_conf_flat"
+  bottom: "conv7_2_mbox_conf_flat"
+  bottom: "conv8_2_mbox_conf_flat"
+  bottom: "pool6_mbox_conf_flat"
+  top: "mbox_conf"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mbox_priorbox"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_priorbox"
+  bottom: "fc7_mbox_priorbox"
+  bottom: "conv6_2_mbox_priorbox"
+  bottom: "conv7_2_mbox_priorbox"
+  bottom: "conv8_2_mbox_priorbox"
+  bottom: "pool6_mbox_priorbox"
+  top: "mbox_priorbox"
+  concat_param {
+    axis: 2
+  }
+}
+layer {
+  name: "mbox_conf_reshape"
+  type: "Reshape"
+  bottom: "mbox_conf"
+  top: "mbox_conf_reshape"
+  reshape_param {
+    shape {
+      dim: 0
+      dim: -1
+      dim: 21
+    }
+  }
+}
+layer {
+  name: "mbox_conf_softmax"
+  type: "Softmax"
+  bottom: "mbox_conf_reshape"
+  top: "mbox_conf_softmax"
+  softmax_param {
+    axis: 2
+  }
+}
+layer {
+  name: "mbox_conf_flatten"
+  type: "Flatten"
+  bottom: "mbox_conf_softmax"
+  top: "mbox_conf_flatten"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "detection_out"
+  type: "DetectionOutput"
+  bottom: "mbox_loc"
+  bottom: "mbox_conf_flatten"
+  bottom: "mbox_priorbox"
+  top: "detection_out"
+  include {
+    phase: TEST
+  }
+  detection_output_param {
+    num_classes: 21
+    share_location: true
+    background_label_id: 0
+    nms_threshold: 0.45
+    top_k: 400
+    code_type: CENTER_SIZE
+    keep_top_k: 200
+    confidence_threshold: 0.01
+  }
+}
+
--- a/modules/dnn/samples/fcn32s-heavy-pascal.prototxt
+++ b/modules/dnn/samples/fcn32s-heavy-pascal.prototxt
+#
+# This prototxt is based on voc-fcn32s/val.prototxt file from
+# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
+# Caffe (BSD) license:
+# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
+#
+name: "voc-fcn32s"
+input: "data"
+input_dim: 1
+input_dim: 3
+input_dim: 500
+input_dim: 500
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 100
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "conv2_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2"
+  top: "conv3_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "pool3"
+  type: "Pooling"
+  bottom: "conv3_3"
+  top: "pool3"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3"
+  top: "conv4_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu4_3"
+  type: "ReLU"
+  bottom: "conv4_3"
+  top: "conv4_3"
+}
+layer {
+  name: "pool4"
+  type: "Pooling"
+  bottom: "conv4_3"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv5_1"
+  type: "Convolution"
+  bottom: "pool4"
+  top: "conv5_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu5_1"
+  type: "ReLU"
+  bottom: "conv5_1"
+  top: "conv5_1"
+}
+layer {
+  name: "conv5_2"
+  type: "Convolution"
+  bottom: "conv5_1"
+  top: "conv5_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu5_2"
+  type: "ReLU"
+  bottom: "conv5_2"
+  top: "conv5_2"
+}
+layer {
+  name: "conv5_3"
+  type: "Convolution"
+  bottom: "conv5_2"
+  top: "conv5_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu5_3"
+  type: "ReLU"
+  bottom: "conv5_3"
+  top: "conv5_3"
+}
+layer {
+  name: "pool5"
+  type: "Pooling"
+  bottom: "conv5_3"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "fc6"
+  type: "Convolution"
+  bottom: "pool5"
+  top: "fc6"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 4096
+    pad: 0
+    kernel_size: 7
+    stride: 1
+  }
+}
+layer {
+  name: "relu6"
+  type: "ReLU"
+  bottom: "fc6"
+  top: "fc6"
+}
+layer {
+  name: "fc7"
+  type: "Convolution"
+  bottom: "fc6"
+  top: "fc7"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 4096
+    pad: 0
+    kernel_size: 1
+    stride: 1
+  }
+}
+layer {
+  name: "relu7"
+  type: "ReLU"
+  bottom: "fc7"
+  top: "fc7"
+}
+layer {
+  name: "score_fr"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "score_fr"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    pad: 0
+    kernel_size: 1
+  }
+}
+layer {
+  name: "upscore"
+  type: "Deconvolution"
+  bottom: "score_fr"
+  top: "upscore"
+  param {
+    lr_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    bias_term: false
+    kernel_size: 64
+    stride: 32
+  }
+}
+layer {
+  name: "score"
+  type: "Crop"
+  bottom: "upscore"
+  bottom: "data"
+  top: "score"
+  crop_param {
+    axis: 2
+    offset: 19
+  }
+}
--- a/modules/dnn/samples/fcn8s-heavy-pascal.prototxt
+++ b/modules/dnn/samples/fcn8s-heavy-pascal.prototxt
+#
+# This prototxt is based on voc-fcn8s/val.prototxt file from
+# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under
+# Caffe (BSD) license:
+# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license
+#
+name: "voc-fcn8s"
+input: "data"
+input_dim: 1
+input_dim: 3
+input_dim: 500
+input_dim: 500
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 100
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu1_1"
+  type: "ReLU"
+  bottom: "conv1_1"
+  top: "conv1_1"
+}
+layer {
+  name: "conv1_2"
+  type: "Convolution"
+  bottom: "conv1_1"
+  top: "conv1_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu1_2"
+  type: "ReLU"
+  bottom: "conv1_2"
+  top: "conv1_2"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv1_2"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv2_1"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "conv2_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu2_1"
+  type: "ReLU"
+  bottom: "conv2_1"
+  top: "conv2_1"
+}
+layer {
+  name: "conv2_2"
+  type: "Convolution"
+  bottom: "conv2_1"
+  top: "conv2_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu2_2"
+  type: "ReLU"
+  bottom: "conv2_2"
+  top: "conv2_2"
+}
+layer {
+  name: "pool2"
+  type: "Pooling"
+  bottom: "conv2_2"
+  top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv3_1"
+  type: "Convolution"
+  bottom: "pool2"
+  top: "conv3_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu3_1"
+  type: "ReLU"
+  bottom: "conv3_1"
+  top: "conv3_1"
+}
+layer {
+  name: "conv3_2"
+  type: "Convolution"
+  bottom: "conv3_1"
+  top: "conv3_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu3_2"
+  type: "ReLU"
+  bottom: "conv3_2"
+  top: "conv3_2"
+}
+layer {
+  name: "conv3_3"
+  type: "Convolution"
+  bottom: "conv3_2"
+  top: "conv3_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu3_3"
+  type: "ReLU"
+  bottom: "conv3_3"
+  top: "conv3_3"
+}
+layer {
+  name: "pool3"
+  type: "Pooling"
+  bottom: "conv3_3"
+  top: "pool3"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv4_1"
+  type: "Convolution"
+  bottom: "pool3"
+  top: "conv4_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu4_1"
+  type: "ReLU"
+  bottom: "conv4_1"
+  top: "conv4_1"
+}
+layer {
+  name: "conv4_2"
+  type: "Convolution"
+  bottom: "conv4_1"
+  top: "conv4_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu4_2"
+  type: "ReLU"
+  bottom: "conv4_2"
+  top: "conv4_2"
+}
+layer {
+  name: "conv4_3"
+  type: "Convolution"
+  bottom: "conv4_2"
+  top: "conv4_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu4_3"
+  type: "ReLU"
+  bottom: "conv4_3"
+  top: "conv4_3"
+}
+layer {
+  name: "pool4"
+  type: "Pooling"
+  bottom: "conv4_3"
+  top: "pool4"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "conv5_1"
+  type: "Convolution"
+  bottom: "pool4"
+  top: "conv5_1"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu5_1"
+  type: "ReLU"
+  bottom: "conv5_1"
+  top: "conv5_1"
+}
+layer {
+  name: "conv5_2"
+  type: "Convolution"
+  bottom: "conv5_1"
+  top: "conv5_2"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu5_2"
+  type: "ReLU"
+  bottom: "conv5_2"
+  top: "conv5_2"
+}
+layer {
+  name: "conv5_3"
+  type: "Convolution"
+  bottom: "conv5_2"
+  top: "conv5_3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  name: "relu5_3"
+  type: "ReLU"
+  bottom: "conv5_3"
+  top: "conv5_3"
+}
+layer {
+  name: "pool5"
+  type: "Pooling"
+  bottom: "conv5_3"
+  top: "pool5"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  name: "fc6"
+  type: "Convolution"
+  bottom: "pool5"
+  top: "fc6"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 4096
+    pad: 0
+    kernel_size: 7
+    stride: 1
+  }
+}
+layer {
+  name: "relu6"
+  type: "ReLU"
+  bottom: "fc6"
+  top: "fc6"
+}
+layer {
+  name: "fc7"
+  type: "Convolution"
+  bottom: "fc6"
+  top: "fc7"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 4096
+    pad: 0
+    kernel_size: 1
+    stride: 1
+  }
+}
+layer {
+  name: "relu7"
+  type: "ReLU"
+  bottom: "fc7"
+  top: "fc7"
+}
+layer {
+  name: "score_fr"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "score_fr"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    pad: 0
+    kernel_size: 1
+  }
+}
+layer {
+  name: "upscore2"
+  type: "Deconvolution"
+  bottom: "score_fr"
+  top: "upscore2"
+  param {
+    lr_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    bias_term: false
+    kernel_size: 4
+    stride: 2
+  }
+}
+layer {
+  name: "score_pool4"
+  type: "Convolution"
+  bottom: "pool4"
+  top: "score_pool4"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    pad: 0
+    kernel_size: 1
+  }
+}
+layer {
+  name: "score_pool4c"
+  type: "Crop"
+  bottom: "score_pool4"
+  bottom: "upscore2"
+  top: "score_pool4c"
+  crop_param {
+    axis: 2
+    offset: 5
+  }
+}
+layer {
+  name: "fuse_pool4"
+  type: "Eltwise"
+  bottom: "upscore2"
+  bottom: "score_pool4c"
+  top: "fuse_pool4"
+  eltwise_param {
+    operation: SUM
+  }
+}
+layer {
+  name: "upscore_pool4"
+  type: "Deconvolution"
+  bottom: "fuse_pool4"
+  top: "upscore_pool4"
+  param {
+    lr_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    bias_term: false
+    kernel_size: 4
+    stride: 2
+  }
+}
+layer {
+  name: "score_pool3"
+  type: "Convolution"
+  bottom: "pool3"
+  top: "score_pool3"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    pad: 0
+    kernel_size: 1
+  }
+}
+layer {
+  name: "score_pool3c"
+  type: "Crop"
+  bottom: "score_pool3"
+  bottom: "upscore_pool4"
+  top: "score_pool3c"
+  crop_param {
+    axis: 2
+    offset: 9
+  }
+}
+layer {
+  name: "fuse_pool3"
+  type: "Eltwise"
+  bottom: "upscore_pool4"
+  bottom: "score_pool3c"
+  top: "fuse_pool3"
+  eltwise_param {
+    operation: SUM
+  }
+}
+layer {
+  name: "upscore8"
+  type: "Deconvolution"
+  bottom: "fuse_pool3"
+  top: "upscore8"
+  param {
+    lr_mult: 0
+  }
+  convolution_param {
+    num_output: 21
+    bias_term: false
+    kernel_size: 16
+    stride: 8
+  }
+}
+layer {
+  name: "score"
+  type: "Crop"
+  bottom: "upscore8"
+  bottom: "data"
+  top: "score"
+  crop_param {
+    axis: 2
+    offset: 31
+  }
+}
--- a/modules/dnn/samples/fcn_semsegm.cpp
+++ b/modules/dnn/samples/fcn_semsegm.cpp
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/core/ocl.hpp>
+using namespace cv;
+using namespace cv::dnn;
+
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+using namespace std;
+
+static const string fcnType = "fcn8s";
+
+static vector<cv::Vec3b> readColors(const string &filename = "pascal-classes.txt")
+{
+    vector<cv::Vec3b> colors;
+
+    ifstream fp(filename.c_str());
+    if (!fp.is_open())
+    {
+        cerr << "File with colors not found: " << filename << endl;
+        exit(-1);
+    }
+
+    string line;
+    while (!fp.eof())
+    {
+        getline(fp, line);
+        if (line.length())
+        {
+            stringstream ss(line);
+
+            string name; ss >> name;
+            int temp;
+            cv::Vec3b color;
+            ss >> temp; color[0] = temp;
+            ss >> temp; color[1] = temp;
+            ss >> temp; color[2] = temp;
+            colors.push_back(color);
+        }
+    }
+
+    fp.close();
+    return colors;
+}
+
+static void colorizeSegmentation(dnn::Blob &score, const vector<cv::Vec3b> &colors, cv::Mat &segm)
+{
+    const int rows = score.rows();
+    const int cols = score.cols();
+    const int chns = score.channels();
+
+    cv::Mat maxCl(rows, cols, CV_8UC1);
+    cv::Mat maxVal(rows, cols, CV_32FC1);
+    for (int ch = 0; ch < chns; ch++)
+    {
+        for (int row = 0; row < rows; row++)
+        {
+            const float *ptrScore = score.ptrf(0, ch, row);
+            uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
+            float *ptrMaxVal = maxVal.ptr<float>(row);
+            for (int col = 0; col < cols; col++)
+            {
+                if (ptrScore[col] > ptrMaxVal[col])
+                {
+                    ptrMaxVal[col] = ptrScore[col];
+                    ptrMaxCl[col] = ch;
+                }
+            }
+        }
+    }
+
+    segm.create(rows, cols, CV_8UC3);
+    for (int row = 0; row < rows; row++)
+    {
+        const uchar *ptrMaxCl = maxCl.ptr<uchar>(row);
+        cv::Vec3b *ptrSegm = segm.ptr<cv::Vec3b>(row);
+        for (int col = 0; col < cols; col++)
+        {
+            ptrSegm[col] = colors[ptrMaxCl[col]];
+        }
+    }
+
+}
+
+int main(int argc, char **argv)
+{
+    cv::ocl::setUseOpenCL(false);
+
+    String modelTxt = fcnType + "-heavy-pascal.prototxt";
+    String modelBin = fcnType + "-heavy-pascal.caffemodel";
+    String imageFile = (argc > 1) ? argv[1] : "rgb.jpg";
+
+    vector<cv::Vec3b> colors = readColors();
+
+    //! [Create the importer of Caffe model]
+    Ptr<dnn::Importer> importer;
+    try                                     //Try to import Caffe GoogleNet model
+    {
+        importer = dnn::createCaffeImporter(modelTxt, modelBin);
+    }
+    catch (const cv::Exception &err)        //Importer can throw errors, we will catch them
+    {
+        cerr << err.msg << endl;
+    }
+    //! [Create the importer of Caffe model]
+
+    if (!importer)
+    {
+        cerr << "Can't load network by using the following files: " << endl;
+        cerr << "prototxt:   " << modelTxt << endl;
+        cerr << "caffemodel: " << modelBin << endl;
+        cerr << fcnType << "-heavy-pascal.caffemodel can be downloaded here:" << endl;
+        cerr << "http://dl.caffe.berkeleyvision.org/" << fcnType << "-heavy-pascal.caffemodel" << endl;
+        exit(-1);
+    }
+
+    //! [Initialize network]
+    dnn::Net net;
+    importer->populateNet(net);
+    importer.release();                     //We don't need importer anymore
+    //! [Initialize network]
+
+    //! [Prepare blob]
+    Mat img = imread(imageFile);
+    if (img.empty())
+    {
+        cerr << "Can't read image from the file: " << imageFile << endl;
+        exit(-1);
+    }
+
+    resize(img, img, Size(500, 500));       //FCN accepts 500x500 RGB-images
+    dnn::Blob inputBlob = dnn::Blob::fromImages(img);   //Convert Mat to dnn::Blob image batch
+    //! [Prepare blob]
+
+    //! [Set input blob]
+    net.setBlob(".data", inputBlob);        //set the network input
+    //! [Set input blob]
+
+    //! [Make forward pass]
+    net.forward();                          //compute output
+    //! [Make forward pass]
+
+    //! [Gather output]
+    dnn::Blob score = net.getBlob("score");
+
+    cv::Mat colorize;
+    colorizeSegmentation(score, colors, colorize);
+    cv::Mat show;
+    cv::addWeighted(img, 0.4, colorize, 0.6, 0.0, show);
+    cv::imshow("show", show);
+    cv::waitKey(0);
+    return 0;
+} //main
--- a/modules/dnn/samples/pascal-classes.txt
+++ b/modules/dnn/samples/pascal-classes.txt
+background 0 0 0
+aeroplane 128 0 0
+bicycle 0 128 0
+bird 128 128 0
+boat 0 0 128
+bottle 128 0 128
+bus 0 128 128
+car 128 128 128
+cat 64 0 0
+chair 192 0 0
+cow 64 128 0
+diningtable 192 128 0
+dog 64 0 128
+horse 192 0 128
+motorbike 64 128 128
+person 192 128 128
+pottedplant 0 64 0
+sheep 128 64 0
+sofa 0 192 0
+train 128 192 0
+tvmonitor 0 64 128
--- a/modules/dnn/samples/rgb.jpg
+++ b/modules/dnn/samples/rgb.jpg
--- a/modules/dnn/samples/ssd_object_detection.cpp
+++ b/modules/dnn/samples/ssd_object_detection.cpp
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+using namespace cv;
+using namespace cv::dnn;
+
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+using namespace std;
+
+const size_t width = 300;
+const size_t height = 300;
+
+Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
+{
+    Mat mean;
+
+    const int meanValues[3] = {104, 117, 123};
+    vector<Mat> meanChannels;
+    for(size_t i = 0; i < 3; i++)
+    {
+        Mat channel(imageHeight, imageWidth, CV_32F, Scalar(meanValues[i]));
+        meanChannels.push_back(channel);
+    }
+    cv::merge(meanChannels, mean);
+    return mean;
+}
+
+Mat preprocess(const Mat& frame)
+{
+    Mat preprocessed;
+    frame.convertTo(preprocessed, CV_32FC3);
+    resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
+
+    Mat mean = getMean(width, height);
+    cv::subtract(preprocessed, mean, preprocessed);
+
+    return preprocessed;
+}
+
+const char* about = "This sample uses Single-Shot Detector "
+                    "(https://arxiv.org/abs/1512.02325)"
+                    "to detect objects on image\n"; // TODO: link
+
+const char* params
+    = "{ help           | false | print usage         }"
+      "{ proto          |       | model configuration }"
+      "{ model          |       | model weights       }"
+      "{ image          |       | image for detection }"
+      "{ min_confidence | 0.5   | min confidence      }";
+
+int main(int argc, char** argv)
+{
+    cv::CommandLineParser parser(argc, argv, params);
+
+    if (parser.get<bool>("help"))
+    {
+        std::cout << about << std::endl;
+        parser.printMessage();
+        return 0;
+    }
+
+    String modelConfiguration = parser.get<string>("proto");
+    String modelBinary = parser.get<string>("model");
+
+    //! [Create the importer of Caffe model]
+    Ptr<dnn::Importer> importer;
+
+    // Import Caffe SSD model
+    try
+    {
+        importer = dnn::createCaffeImporter(modelConfiguration, modelBinary);
+    }
+    catch (const cv::Exception &err) //Importer can throw errors, we will catch them
+    {
+        cerr << err.msg << endl;
+    }
+    //! [Create the importer of Caffe model]
+
+    if (!importer)
+    {
+        cerr << "Can't load network by using the following files: " << endl;
+        cerr << "prototxt:   " << modelConfiguration << endl;
+        cerr << "caffemodel: " << modelBinary << endl;
+        cerr << "Models can be downloaded here:" << endl;
+        cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl;
+        exit(-1);
+    }
+
+    //! [Initialize network]
+    dnn::Net net;
+    importer->populateNet(net);
+    importer.release();          //We don't need importer anymore
+    //! [Initialize network]
+
+    cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
+
+    //! [Prepare blob]
+    Mat preprocessedFrame = preprocess(frame);
+
+    dnn::Blob inputBlob = dnn::Blob::fromImages(preprocessedFrame); //Convert Mat to dnn::Blob image
+    //! [Prepare blob]
+
+    //! [Set input blob]
+    net.setBlob(".data", inputBlob);                //set the network input
+    //! [Set input blob]
+
+    //! [Make forward pass]
+    net.forward();                                  //compute output
+    //! [Make forward pass]
+
+    //! [Gather output]
+    dnn::Blob detection = net.getBlob("detection_out");
+    Mat detectionMat(detection.rows(), detection.cols(), CV_32F, detection.ptrf());
+
+    float confidenceThreshold = parser.get<float>("min_confidence");
+    for(int i = 0; i < detectionMat.rows; i++)
+    {
+        float confidence = detectionMat.at<float>(i, 2);
+
+        if(confidence > confidenceThreshold)
+        {
+            size_t objectClass = detectionMat.at<float>(i, 1);
+
+            float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
+            float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
+            float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
+            float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;
+
+            std::cout << "Class: " << objectClass << std::endl;
+            std::cout << "Confidence: " << confidence << std::endl;
+
+            std::cout << " " << xLeftBottom
+                      << " " << yLeftBottom
+                      << " " << xRightTop
+                      << " " << yRightTop << std::endl;
+
+            Rect object(xLeftBottom, yLeftBottom,
+                        xRightTop - xLeftBottom,
+                        yRightTop - yLeftBottom);
+
+            rectangle(frame, object, Scalar(0, 255, 0));
+        }
+    }
+
+    imshow("detections", frame);
+    waitKey();
+
+    return 0;
+} // main
--- a/modules/dnn/src/caffe/caffe.proto
+++ b/modules/dnn/src/caffe/caffe.proto
@@ -73,6 +73,93 @@ message BlobProtoVector {
  repeated BlobProto blobs = 1;
 }

+message CropParameter {
+  // To crop, elements of the first bottom are selected to fit the dimensions
+  // of the second, reference bottom. The crop is configured by
+  // - the crop `axis` to pick the dimensions for cropping
+  // - the crop `offset` to set the shift for all/each dimension
+  // to align the cropped bottom with the reference bottom.
+  // All dimensions up to but excluding `axis` are preserved, while
+  // the dimensions including and trailing `axis` are cropped.
+  // If only one `offset` is set, then all dimensions are offset by this amount.
+  // Otherwise, the number of offsets must equal the number of cropped axes to
+  // shift the crop in each dimension accordingly.
+  // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
+  // and `axis` may be negative to index from the end (e.g., -1 for the last
+  // axis).
+  optional int32 axis = 1 [default = 2];
+  repeated uint32 offset = 2;
+}
+
+message PermuteParameter {
+  // The new orders of the axes of data. Notice it should be with
+  // in the same range as the input data, and it starts from 0.
+  // Do not provide repeated order.
+  repeated uint32 order = 1;
+}
+
+// Message that stores parameters used by NormalizeBBoxLayer
+message NormalizeBBoxParameter {
+  optional bool across_spatial = 1 [default = true];
+  // Initial value of scale. Default is 1.0 for all
+  optional FillerParameter scale_filler = 2;
+  // Whether or not scale parameters are shared across channels.
+  optional bool channel_shared = 3 [default = true];
+  // Epsilon for not dividing by zero while normalizing variance
+  optional float eps = 4 [default = 1e-10];
+}
+
+// Message that store parameters used by PriorBoxLayer
+message PriorBoxParameter {
+  // Encode/decode type.
+  enum CodeType {
+    CORNER = 1;
+    CENTER_SIZE = 2;
+  }
+  // Minimum box size (in pixels). Required!
+  optional float min_size = 1;
+  // Maximum box size (in pixels). Required!
+  optional float max_size = 2;
+  // Various of aspect ratios. Duplicate ratios will be ignored.
+  // If none is provided, we use default ratio 1.
+  repeated float aspect_ratio = 3;
+  // If true, will flip each aspect ratio.
+  // For example, if there is aspect ratio "r",
+  // we will generate aspect ratio "1.0/r" as well.
+  optional bool flip = 4 [default = true];
+  // If true, will clip the prior so that it is within [0, 1]
+  optional bool clip = 5 [default = true];
+  // Variance for adjusting the prior bboxes.
+  repeated float variance = 6;
+}
+
+// Message that store parameters used by DetectionOutputLayer
+message DetectionOutputParameter {
+  // Number of classes to be predicted. Required!
+  optional uint32 num_classes = 1;
+  // If true, bounding box are shared among different classes.
+  optional bool share_location = 2 [default = true];
+  // Background label id. If there is no background class,
+  // set it as -1.
+  optional int32 background_label_id = 3 [default = 0];
+  // Type of coding method for bbox.
+  optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
+  // If true, variance is encoded in target; otherwise we need to adjust the
+  // predicted offset accordingly.
+  optional bool variance_encoded_in_target = 8 [default = false];
+  // Number of total bboxes to be kept per image after nms step.
+  // -1 means keeping all bboxes after nms step.
+  optional int32 keep_top_k = 7 [default = -1];
+  // Only consider detections whose confidences are larger than a threshold.
+  // If not provided, consider all boxes.
+  optional float confidence_threshold = 9;
+  // Parameters used for non maximum suppression.
+  // Threshold to be used in nms.
+  optional float nms_threshold = 10 [default = 0.3];
+  // Maximum number of results to be kept.
+  optional int32 top_k = 11;
+}
+
 message Datum {
  optional int32 channels = 1;
  optional int32 height = 2;
@@ -317,7 +404,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 137 (last added: reduction_param)
+// LayerParameter next available layer-specific ID: 142 (last added: detection_output_param)
 message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
@@ -369,7 +456,9 @@ message LayerParameter {
  optional ConcatParameter concat_param = 104;
  optional ContrastiveLossParameter contrastive_loss_param = 105;
  optional ConvolutionParameter convolution_param = 106;
+  optional CropParameter crop_param = 137;
  optional DataParameter data_param = 107;
+  optional DetectionOutputParameter detection_output_param = 141;
  optional DropoutParameter dropout_param = 108;
  optional DummyDataParameter dummy_data_param = 109;
  optional EltwiseParameter eltwise_param = 110;
@@ -385,17 +474,20 @@ message LayerParameter {
  optional LRNParameter lrn_param = 118;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
+  optional NormalizeBBoxParameter normalize_bbox_param = 139;
+  optional PermuteParameter permute_param = 138;
  optional PoolingParameter pooling_param = 121;
  optional PowerParameter power_param = 122;
  optional PReLUParameter prelu_param = 131;
+  optional PriorBoxParameter prior_box_param = 140;
  optional PythonParameter python_param = 130;
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
  optional SigmoidParameter sigmoid_param = 124;
+  optional SliceParameter slice_param = 126;
  optional SoftmaxParameter softmax_param = 125;
  optional SPPParameter spp_param = 132;
-  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional WindowDataParameter window_data_param = 129;
@@ -505,6 +597,12 @@ message ConvolutionParameter {
    CUDNN = 2;
  }
  optional Engine engine = 15 [default = DEFAULT];
+  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
+  // holes. (Kernel dilation is sometimes referred to by its use in the
+  // algorithme à trous from Holschneider et al. 1987.)
+  optional uint32 dilation_h = 18; // The dilation height
+  optional uint32 dilation_w = 19; // The dilation width
+  optional uint32 dilation = 20; // The dilation; defaults to 1
 }

 message DataParameter {
@@ -1155,3 +1253,15 @@ message PReLUParameter {
  // Whether or not slope paramters are shared across channels.
  optional bool channel_shared = 2 [default = false];
 }
+
+// The normalized bounding box [0, 1] w.r.t. the input image size.
+message NormalizedBBox {
+  optional float xmin = 1;
+  optional float ymin = 2;
+  optional float xmax = 3;
+  optional float ymax = 4;
+  optional int32 label = 5;
+  optional bool difficult = 6;
+  optional float score = 7;
+  optional float size = 8;
+}
--- a/modules/dnn/src/caffe/compiled/caffe.tar.gz
+++ b/modules/dnn/src/caffe/compiled/caffe.tar.gz
--- a/modules/dnn/src/caffe/layer_loaders.cpp
+++ b/modules/dnn/src/caffe/layer_loaders.cpp
@@ -2,6 +2,7 @@
 #include "layer_loaders.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <climits>
+#include "layers/layers_common.hpp"

 namespace cv
 {
@@ -57,7 +58,8 @@ static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Siz
 static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
 {
    l->setParamsFrom(params);
-    getCaffeConvParams(params, l->kernel, l->pad, l->stride);
+    //getCaffeConvParams(params, l->kernel, l->pad, l->stride);
+    getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, l->pad.width, l->stride.height, l->stride.width, l->dilation.height, l->dilation.width);

    bool bias = params.get<bool>("bias_term", true);
    int numOutput = params.get<int>("num_output");
@@ -88,6 +90,7 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
 {
    int type;
    Size kernel, stride, pad;
+    bool globalPooling;

    if (params.has("pool"))
    {
@@ -106,9 +109,13 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
        type = PoolingLayer::MAX;
    }

-    getCaffeConvParams(params, kernel, pad, stride);
+    getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width);
+    //getCaffeConvParams(params, kernel, pad, stride);

-    return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
+    if (!globalPooling)
+        return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
+    else
+        return Ptr<Layer>(PoolingLayer::createGlobal(type));
 }

 template<>

--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -43,6 +43,14 @@
 #include "caffe/layer_loaders.hpp"
 #include "layers/blank_layer.hpp"

+#include "layers/crop_layer.hpp"
+#include "layers/eltwise_layer.hpp"
+#include "layers/flatten_layer.hpp"
+#include "layers/permute_layer.hpp"
+#include "layers/prior_box_layer.hpp"
+#include "layers/detection_output_layer.hpp"
+#include "layers/normalize_bbox_layer.hpp"
+
 namespace cv
 {
 namespace dnn
@@ -87,6 +95,15 @@ void initModule()
    REG_RUNTIME_LAYER_FUNC(Power,           createLayerFromCaffe<PowerLayer>);
    REG_RUNTIME_LAYER_CLASS(Dropout,        BlankLayer)

+    REG_RUNTIME_LAYER_CLASS(Crop, CropLayer)
+    REG_RUNTIME_LAYER_CLASS(Eltwise, EltwiseLayer)
+
+    REG_RUNTIME_LAYER_CLASS(Permute, PermuteLayer)
+    //REG_RUNTIME_LAYER_CLASS(Flatten, FlattenLayer)
+    REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer)
+    REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer)
+    REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer)
+
    init.status = true;
 }


--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -77,7 +77,8 @@ void ConvolutionLayerImpl::init()
    CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());

-    useOpenCL = ocl::useOpenCL() && tryUseOpenCL;
+    //TODO: dilation in OCL mode
+    useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1);
 }

 void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
@@ -127,7 +128,8 @@ void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vecto
 bool ConvolutionLayerImpl::is1x1() const
 {
    return (kernel.height == 1 && kernel.width == 1) &&
-           (stride.height == 1 && stride.width == 1);
+           (stride.height == 1 && stride.width == 1) &&
+           (dilation.height == 1 && dilation.width == 1);
 }

 template<typename XMat>
@@ -182,7 +184,7 @@ void ConvolutionLayerImpl::im2col(const UMat &srcImg, UMat &dstCol)
        return;
    }
 #ifdef HAVE_OPENCL
-    CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, this->colBlob.umatRef()));
+    CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, this->colBlob.umatRef()));
    dstCol = this->colBlob.umatRefConst();
 #else
    CV_Error(Error::StsInternal, "");
@@ -200,9 +202,9 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)

    Mat &colMat = colBlob.matRef();
    if (srcImg.type() == CV_32F)
-        im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<float>());
+        im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<float>());
    if (srcImg.type() == CV_64F)
-        im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<double>());
+        im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<double>());

    dstCol = colMat;
 }
@@ -213,8 +215,8 @@ void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
    inpW = input.cols();
    inpCn = input.channels();

-    outH = (inpH + 2 * pad.height - kernel.height) / stride.height + 1;
-    outW = (inpW + 2 * pad.width - kernel.width) / stride.width + 1;
+    outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
+    outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
    outCn = numOutput;

    topH = outH; topW = outW; topCn = outCn;
@@ -252,7 +254,7 @@ template<typename XMat>
 void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
 {
    XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
-    XMat biasesMat  = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
+    XMat biasesMat  = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();

    for (size_t ii = 0; ii < outputs.size(); ii++)
    {
@@ -315,21 +317,23 @@ void DeConvolutionLayerImpl::col2im(const UMat &colMat, UMat &dstImg)

 //Initializers

-Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad)
+Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
 {
    ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
    l->kernel = kernel;
    l->pad = pad;
    l->stride = stride;
+    l->dilation = dilation;
    return Ptr<BaseConvolutionLayer>(l);
 }

-Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad)
+Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
 {
    DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
    l->kernel = kernel;
    l->pad = pad;
    l->stride = stride;
+    l->dilation = dilation;
    return Ptr<BaseConvolutionLayer>(l);
 }


--- a/modules/dnn/src/layers/crop_layer.cpp
+++ b/modules/dnn/src/layers/crop_layer.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "crop_layer.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+    CropLayer::CropLayer(LayerParams &params) : Layer(params)
+    {
+        start_axis = params.get<int>("axis");
+        if (4 <= start_axis)
+            CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
+
+        DictValue paramOffset = params.get("offset");
+
+        offset.resize(4, 0);
+        if (1 < paramOffset.size())
+        {
+            if (4 - start_axis != paramOffset.size())
+                CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
+            for (size_t i = start_axis; i < offset.size(); i++)
+            {
+                offset[i] = paramOffset.get<int>(i);
+            }
+        }
+        else
+        {
+            const int offset_val = paramOffset.get<int>(0);
+            for (size_t i = start_axis; i < offset.size(); i++)
+            {
+                offset[i] = offset_val;
+            }
+        }
+    }
+
+    void CropLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    {
+        CV_Assert(2 == inputs.size());
+
+        const Blob &inpBlob = *inputs[0];
+        CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
+
+        const Blob &inpSzBlob = *inputs[1];
+
+        outSizes.resize(4, 0);
+        for (int i = 0; i < 4; i++)
+        {
+            if (i < start_axis)
+                outSizes[i] = inpBlob.size(i);
+            else
+                outSizes[i] = inpSzBlob.size(i);
+            if (offset[i] + outSizes[i] > inpBlob.size(i))
+                CV_Error(Error::StsBadArg, "invalid crop parameters");
+        }
+
+        outputs.resize(1);
+        outputs[0].create(BlobShape(outSizes));
+    }
+
+    void CropLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    {
+        Blob input = *inputs[0];
+        Blob output = outputs[0];
+        for (int num = 0; num < outSizes[0]; ++num)
+        {
+            for (int ch = 0; ch < outSizes[1]; ++ch)
+            {
+                for (int row = 0; row < outSizes[2]; ++row)
+                {
+                    float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
+                    float *dstData = output.ptrf(num, ch, row);
+                    memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
+                }
+            }
+        }
+    }
+}
+}
--- a/modules/dnn/src/layers/crop_layer.hpp
+++ b/modules/dnn/src/layers/crop_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_CROP_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+    class CropLayer : public Layer
+    {
+        int start_axis;
+        std::vector<int> offset;
+        std::vector<int> outSizes;
+
+    public:
+        CropLayer(LayerParams& params);
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    };
+}
+}
+#endif
--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
+/*M ///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "detection_output_layer.hpp"
+#include <float.h>
+#include <string>
+
+namespace cv
+{
+namespace dnn
+{
+
+namespace util
+{
+template <typename T>
+std::string to_string(T value)
+{
+    std::ostringstream stream;
+    stream << value;
+    return stream.str();
+}
+
+template <typename T>
+void make_error(const std::string& message1, const T& message2)
+{
+    std::string error(message1);
+    error += std::string(util::to_string<int>(message2));
+    CV_Error(Error::StsBadArg, error.c_str());
+}
+
+template <typename T>
+bool SortScorePairDescend(const std::pair<float, T>& pair1,
+                          const std::pair<float, T>& pair2)
+{
+    return pair1.first > pair2.first;
+}
+}
+
+const std::string DetectionOutputLayer::_layerName = std::string("DetectionOutput");
+
+bool DetectionOutputLayer::getParameterDict(const LayerParams &params,
+                                    const std::string &parameterName,
+                                    DictValue& result)
+{
+    if (!params.has(parameterName))
+    {
+        return false;
+    }
+
+    result = params.get(parameterName);
+    return true;
+}
+
+template<typename T>
+T DetectionOutputLayer::getParameter(const LayerParams &params,
+                             const std::string &parameterName,
+                             const size_t &idx,
+                             const bool required,
+                             const T& defaultValue)
+{
+    DictValue dictValue;
+    bool success = getParameterDict(params, parameterName, dictValue);
+    if(!success)
+    {
+        if(required)
+        {
+            std::string message = _layerName;
+            message += " layer parameter does not contain ";
+            message += parameterName;
+            message += " parameter.";
+            CV_Error(Error::StsBadArg, message);
+        }
+        else
+        {
+            return defaultValue;
+        }
+    }
+    return dictValue.get<T>(idx);
+}
+
+void DetectionOutputLayer::getCodeType(LayerParams &params)
+{
+    String codeTypeString = params.get<String>("code_type").toLowerCase();
+    if (codeTypeString == "corner")
+        _codeType = caffe::PriorBoxParameter_CodeType_CORNER;
+    else if (codeTypeString == "center_size")
+        _codeType = caffe::PriorBoxParameter_CodeType_CENTER_SIZE;
+    else
+        _codeType = caffe::PriorBoxParameter_CodeType_CORNER;
+}
+
+DetectionOutputLayer::DetectionOutputLayer(LayerParams &params) : Layer(params)
+{
+    _numClasses = getParameter<unsigned>(params, "num_classes");
+    _shareLocation = getParameter<bool>(params, "share_location");
+    _numLocClasses = _shareLocation ? 1 : _numClasses;
+    _backgroundLabelId = getParameter<int>(params, "background_label_id");
+    _varianceEncodedInTarget = getParameter<bool>(params, "variance_encoded_in_target", 0, false, false);
+    _keepTopK = getParameter<int>(params, "keep_top_k");
+    _confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, -FLT_MAX);
+    _topK = getParameter<int>(params, "top_k", 0, false, -1);
+
+    getCodeType(params);
+
+    // Parameters used in nms.
+    _nmsThreshold = getParameter<float>(params, "nms_threshold");
+    CV_Assert(_nmsThreshold > 0.);
+}
+
+void DetectionOutputLayer::checkInputs(const std::vector<Blob*> &inputs)
+{
+    for (size_t i = 1; i < inputs.size(); i++)
+    {
+        for (size_t j = 0; j < _numAxes; j++)
+        {
+            CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
+        }
+    }
+}
+
+void DetectionOutputLayer::allocate(const std::vector<Blob*> &inputs,
+                                    std::vector<Blob> &outputs)
+{
+    CV_Assert(inputs.size() > 0);
+    CV_Assert(inputs[0]->num() == inputs[1]->num());
+    _num = inputs[0]->num();
+
+    _numPriors = inputs[2]->rows() / 4;
+    CV_Assert((_numPriors * _numLocClasses * 4) == inputs[0]->channels());
+    CV_Assert(int(_numPriors * _numClasses) == inputs[1]->channels());
+
+    // num() and channels() are 1.
+    // Since the number of bboxes to be kept is unknown before nms, we manually
+    // set it to (fake) 1.
+    // Each row is a 7 dimension std::vector, which stores
+    // [image_id, label, confidence, xmin, ymin, xmax, ymax]
+    BlobShape outputShape = BlobShape(1, 1, 1, 7);
+    outputs[0].create(BlobShape(outputShape));
+}
+
+void DetectionOutputLayer::forward(std::vector<Blob*> &inputs,
+                                   std::vector<Blob> &outputs)
+{
+    const float* locationData = inputs[0]->ptrf();
+    const float* confidenceData = inputs[1]->ptrf();
+    const float* priorData = inputs[2]->ptrf();
+
+    // Retrieve all location predictions.
+    std::vector<LabelBBox> allLocationPredictions;
+    GetLocPredictions(locationData, _num, _numPriors, _numLocClasses,
+                      _shareLocation, &allLocationPredictions);
+
+    // Retrieve all confidences.
+    std::vector<std::map<int, std::vector<float> > > allConfidenceScores;
+    GetConfidenceScores(confidenceData, _num, _numPriors, _numClasses,
+                        &allConfidenceScores);
+
+    // Retrieve all prior bboxes. It is same within a batch since we assume all
+    // images in a batch are of same dimension.
+    std::vector<caffe::NormalizedBBox> priorBBoxes;
+    std::vector<std::vector<float> > priorVariances;
+    GetPriorBBoxes(priorData, _numPriors, &priorBBoxes, &priorVariances);
+
+    // Decode all loc predictions to bboxes.
+    std::vector<LabelBBox> allDecodedBBoxes;
+    DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, _num,
+                    _shareLocation, _numLocClasses, _backgroundLabelId,
+                    _codeType, _varianceEncodedInTarget, &allDecodedBBoxes);
+
+    int numKept = 0;
+    std::vector<std::map<int, std::vector<int> > > allIndices;
+    for (int i = 0; i < _num; ++i)
+    {
+        const LabelBBox& decodeBBoxes = allDecodedBBoxes[i];
+        const std::map<int, std::vector<float> >& confidenceScores =
+            allConfidenceScores[i];
+        std::map<int, std::vector<int> > indices;
+        int numDetections = 0;
+        for (int c = 0; c < (int)_numClasses; ++c)
+        {
+            if (c == _backgroundLabelId)
+            {
+                // Ignore background class.
+                continue;
+            }
+            if (confidenceScores.find(c) == confidenceScores.end())
+            {
+                // Something bad happened if there are no predictions for current label.
+                util::make_error<int>("Could not find confidence predictions for label ", c);
+            }
+
+            const std::vector<float>& scores = confidenceScores.find(c)->second;
+            int label = _shareLocation ? -1 : c;
+            if (decodeBBoxes.find(label) == decodeBBoxes.end())
+            {
+                // Something bad happened if there are no predictions for current label.
+                util::make_error<int>("Could not find location predictions for label ", label);
+                continue;
+            }
+            const std::vector<caffe::NormalizedBBox>& bboxes =
+                decodeBBoxes.find(label)->second;
+            ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold,
+                         _topK, &(indices[c]));
+            numDetections += indices[c].size();
+        }
+        if (_keepTopK > -1 && numDetections > _keepTopK)
+        {
+            std::vector<std::pair<float, std::pair<int, int> > > scoreIndexPairs;
+            for (std::map<int, std::vector<int> >::iterator it = indices.begin();
+                 it != indices.end(); ++it)
+            {
+                int label = it->first;
+                const std::vector<int>& labelIndices = it->second;
+                if (confidenceScores.find(label) == confidenceScores.end())
+                {
+                    // Something bad happened for current label.
+                    util::make_error<int>("Could not find location predictions for label ", label);
+                    continue;
+                }
+                const std::vector<float>& scores = confidenceScores.find(label)->second;
+                for (size_t j = 0; j < labelIndices.size(); ++j)
+                {
+                    size_t idx = labelIndices[j];
+                    CV_Assert(idx < scores.size());
+                    scoreIndexPairs.push_back(
+                        std::make_pair(scores[idx], std::make_pair(label, idx)));
+                }
+            }
+            // Keep outputs k results per image.
+            std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(),
+                      util::SortScorePairDescend<std::pair<int, int> >);
+            scoreIndexPairs.resize(_keepTopK);
+            // Store the new indices.
+            std::map<int, std::vector<int> > newIndices;
+            for (size_t j = 0; j < scoreIndexPairs.size(); ++j)
+            {
+                int label = scoreIndexPairs[j].second.first;
+                int idx = scoreIndexPairs[j].second.second;
+                newIndices[label].push_back(idx);
+            }
+            allIndices.push_back(newIndices);
+            numKept += _keepTopK;
+        }
+        else
+        {
+            allIndices.push_back(indices);
+            numKept += numDetections;
+        }
+    }
+
+    if (numKept == 0)
+    {
+        CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections");
+        return;
+    }
+    std::vector<int> outputsShape(2, 1);
+    outputsShape.push_back(numKept);
+    outputsShape.push_back(7);
+    outputs[0].create(outputsShape);
+    float* outputsData = outputs[0].ptrf();
+
+    int count = 0;
+    for (int i = 0; i < _num; ++i)
+    {
+        const std::map<int, std::vector<float> >& confidenceScores =
+            allConfidenceScores[i];
+        const LabelBBox& decodeBBoxes = allDecodedBBoxes[i];
+        for (std::map<int, std::vector<int> >::iterator it = allIndices[i].begin();
+             it != allIndices[i].end(); ++it)
+        {
+            int label = it->first;
+            if (confidenceScores.find(label) == confidenceScores.end())
+            {
+                // Something bad happened if there are no predictions for current label.
+                util::make_error<int>("Could not find confidence predictions for label ", label);
+                continue;
+            }
+            const std::vector<float>& scores = confidenceScores.find(label)->second;
+            int locLabel = _shareLocation ? -1 : label;
+            if (decodeBBoxes.find(locLabel) == decodeBBoxes.end())
+            {
+                // Something bad happened if there are no predictions for current label.
+                util::make_error<int>("Could not find location predictions for label ", locLabel);
+                continue;
+            }
+            const std::vector<caffe::NormalizedBBox>& bboxes =
+                decodeBBoxes.find(locLabel)->second;
+            std::vector<int>& indices = it->second;
+
+            for (size_t j = 0; j < indices.size(); ++j)
+            {
+                int idx = indices[j];
+                outputsData[count * 7] = i;
+                outputsData[count * 7 + 1] = label;
+                outputsData[count * 7 + 2] = scores[idx];
+                caffe::NormalizedBBox clipBBox;
+                ClipBBox(bboxes[idx], &clipBBox);
+                outputsData[count * 7 + 3] = clipBBox.xmin();
+                outputsData[count * 7 + 4] = clipBBox.ymin();
+                outputsData[count * 7 + 5] = clipBBox.xmax();
+                outputsData[count * 7 + 6] = clipBBox.ymax();
+
+                ++count;
+            }
+        }
+    }
+}
+
+float DetectionOutputLayer::BBoxSize(const caffe::NormalizedBBox& bbox,
+                                     const bool normalized)
+{
+    if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin())
+    {
+        // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
+        return 0;
+    }
+    else
+    {
+        if (bbox.has_size())
+        {
+            return bbox.size();
+        }
+        else
+        {
+            float width = bbox.xmax() - bbox.xmin();
+            float height = bbox.ymax() - bbox.ymin();
+            if (normalized)
+            {
+                return width * height;
+            }
+            else
+            {
+                // If bbox is not within range [0, 1].
+                return (width + 1) * (height + 1);
+            }
+        }
+    }
+}
+
+void DetectionOutputLayer::ClipBBox(const caffe::NormalizedBBox& bbox,
+                                    caffe::NormalizedBBox* clipBBox)
+{
+    clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f));
+    clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f));
+    clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f));
+    clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f));
+    clipBBox->clear_size();
+    clipBBox->set_size(BBoxSize(*clipBBox));
+    clipBBox->set_difficult(bbox.difficult());
+}
+
+void DetectionOutputLayer::DecodeBBox(
+    const caffe::NormalizedBBox& priorBBox, const std::vector<float>& priorVariance,
+    const CodeType codeType, const bool varianceEncodedInTarget,
+    const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* decodeBBox)
+{
+    if (codeType == caffe::PriorBoxParameter_CodeType_CORNER)
+    {
+        if (varianceEncodedInTarget)
+        {
+            // variance is encoded in target, we simply need to add the offset
+            // predictions.
+            decodeBBox->set_xmin(priorBBox.xmin() + bbox.xmin());
+            decodeBBox->set_ymin(priorBBox.ymin() + bbox.ymin());
+            decodeBBox->set_xmax(priorBBox.xmax() + bbox.xmax());
+            decodeBBox->set_ymax(priorBBox.ymax() + bbox.ymax());
+        }
+        else
+        {
+            // variance is encoded in bbox, we need to scale the offset accordingly.
+            decodeBBox->set_xmin(
+                priorBBox.xmin() + priorVariance[0] * bbox.xmin());
+            decodeBBox->set_ymin(
+                priorBBox.ymin() + priorVariance[1] * bbox.ymin());
+            decodeBBox->set_xmax(
+                priorBBox.xmax() + priorVariance[2] * bbox.xmax());
+            decodeBBox->set_ymax(
+                priorBBox.ymax() + priorVariance[3] * bbox.ymax());
+        }
+    }
+    else
+    if (codeType == caffe::PriorBoxParameter_CodeType_CENTER_SIZE)
+    {
+        float priorWidth = priorBBox.xmax() - priorBBox.xmin();
+        CV_Assert(priorWidth > 0);
+
+        float priorHeight = priorBBox.ymax() - priorBBox.ymin();
+        CV_Assert(priorHeight > 0);
+
+        float priorCenterX = (priorBBox.xmin() + priorBBox.xmax()) / 2.;
+        float priorCenterY = (priorBBox.ymin() + priorBBox.ymax()) / 2.;
+
+        float decodeBBoxCenterX, decodeBBoxCenterY;
+        float decodeBBoxWidth, decodeBBoxHeight;
+        if (varianceEncodedInTarget)
+        {
+            // variance is encoded in target, we simply need to retore the offset
+            // predictions.
+            decodeBBoxCenterX = bbox.xmin() * priorWidth + priorCenterX;
+            decodeBBoxCenterY = bbox.ymin() * priorHeight + priorCenterY;
+            decodeBBoxWidth = exp(bbox.xmax()) * priorWidth;
+            decodeBBoxHeight = exp(bbox.ymax()) * priorHeight;
+        }
+        else
+        {
+            // variance is encoded in bbox, we need to scale the offset accordingly.
+            decodeBBoxCenterX =
+                priorVariance[0] * bbox.xmin() * priorWidth + priorCenterX;
+            decodeBBoxCenterY =
+                priorVariance[1] * bbox.ymin() * priorHeight + priorCenterY;
+            decodeBBoxWidth =
+                exp(priorVariance[2] * bbox.xmax()) * priorWidth;
+            decodeBBoxHeight =
+                exp(priorVariance[3] * bbox.ymax()) * priorHeight;
+        }
+
+        decodeBBox->set_xmin(decodeBBoxCenterX - decodeBBoxWidth / 2.);
+        decodeBBox->set_ymin(decodeBBoxCenterY - decodeBBoxHeight / 2.);
+        decodeBBox->set_xmax(decodeBBoxCenterX + decodeBBoxWidth / 2.);
+        decodeBBox->set_ymax(decodeBBoxCenterY + decodeBBoxHeight / 2.);
+    }
+    else
+    {
+        CV_Error(Error::StsBadArg, "Unknown LocLossType.");
+    }
+    float bboxSize = BBoxSize(*decodeBBox);
+    decodeBBox->set_size(bboxSize);
+}
+
+void DetectionOutputLayer::DecodeBBoxes(
+    const std::vector<caffe::NormalizedBBox>& priorBBoxes,
+    const std::vector<std::vector<float> >& priorVariances,
+    const CodeType codeType, const bool varianceEncodedInTarget,
+    const std::vector<caffe::NormalizedBBox>& bboxes,
+    std::vector<caffe::NormalizedBBox>* decodeBBoxes)
+{
+    CV_Assert(priorBBoxes.size() == priorVariances.size());
+    CV_Assert(priorBBoxes.size() == bboxes.size());
+    int numBBoxes = priorBBoxes.size();
+    if (numBBoxes >= 1)
+    {
+        CV_Assert(priorVariances[0].size() == 4);
+    }
+    decodeBBoxes->clear();
+    for (int i = 0; i < numBBoxes; ++i)
+    {
+        caffe::NormalizedBBox decodeBBox;
+        DecodeBBox(priorBBoxes[i], priorVariances[i], codeType,
+                   varianceEncodedInTarget, bboxes[i], &decodeBBox);
+        decodeBBoxes->push_back(decodeBBox);
+    }
+}
+
+void DetectionOutputLayer::DecodeBBoxesAll(
+    const std::vector<LabelBBox>& allLocPreds,
+    const std::vector<caffe::NormalizedBBox>& priorBBoxes,
+    const std::vector<std::vector<float> >& priorVariances,
+    const size_t num, const bool shareLocation,
+    const int numLocClasses, const int backgroundLabelId,
+    const CodeType codeType, const bool varianceEncodedInTarget,
+    std::vector<LabelBBox>* allDecodeBBoxes)
+{
+    CV_Assert(allLocPreds.size() == num);
+    allDecodeBBoxes->clear();
+    allDecodeBBoxes->resize(num);
+    for (size_t i = 0; i < num; ++i)
+    {
+        // Decode predictions into bboxes.
+        LabelBBox& decodeBBoxes = (*allDecodeBBoxes)[i];
+        for (int c = 0; c < numLocClasses; ++c)
+        {
+            int label = shareLocation ? -1 : c;
+            if (label == backgroundLabelId)
+            {
+                // Ignore background class.
+                continue;
+            }
+            if (allLocPreds[i].find(label) == allLocPreds[i].end())
+            {
+                // Something bad happened if there are no predictions for current label.
+                util::make_error<int>("Could not find location predictions for label ", label);
+            }
+            const std::vector<caffe::NormalizedBBox>& labelLocPreds =
+                allLocPreds[i].find(label)->second;
+            DecodeBBoxes(priorBBoxes, priorVariances,
+                         codeType, varianceEncodedInTarget,
+                         labelLocPreds, &(decodeBBoxes[label]));
+        }
+    }
+}
+
+void DetectionOutputLayer::GetPriorBBoxes(const float* priorData, const int& numPriors,
+                                          std::vector<caffe::NormalizedBBox>* priorBBoxes,
+                                          std::vector<std::vector<float> >* priorVariances)
+{
+    priorBBoxes->clear();
+    priorVariances->clear();
+    for (int i = 0; i < numPriors; ++i)
+    {
+        int startIdx = i * 4;
+        caffe::NormalizedBBox bbox;
+        bbox.set_xmin(priorData[startIdx]);
+        bbox.set_ymin(priorData[startIdx + 1]);
+        bbox.set_xmax(priorData[startIdx + 2]);
+        bbox.set_ymax(priorData[startIdx + 3]);
+        float bboxSize = BBoxSize(bbox);
+        bbox.set_size(bboxSize);
+        priorBBoxes->push_back(bbox);
+    }
+
+    for (int i = 0; i < numPriors; ++i)
+    {
+        int startIdx = (numPriors + i) * 4;
+        std::vector<float> var;
+        for (int j = 0; j < 4; ++j)
+        {
+            var.push_back(priorData[startIdx + j]);
+        }
+        priorVariances->push_back(var);
+    }
+}
+
+void DetectionOutputLayer::ScaleBBox(const caffe::NormalizedBBox& bbox,
+                                     const int height, const int width,
+                                     caffe::NormalizedBBox* scaleBBox)
+{
+    scaleBBox->set_xmin(bbox.xmin() * width);
+    scaleBBox->set_ymin(bbox.ymin() * height);
+    scaleBBox->set_xmax(bbox.xmax() * width);
+    scaleBBox->set_ymax(bbox.ymax() * height);
+    scaleBBox->clear_size();
+    bool normalized = !(width > 1 || height > 1);
+    scaleBBox->set_size(BBoxSize(*scaleBBox, normalized));
+    scaleBBox->set_difficult(bbox.difficult());
+}
+
+
+void DetectionOutputLayer::GetLocPredictions(
+    const float* locData, const int num,
+    const int numPredsPerClass, const int numLocClasses,
+    const bool shareLocation, std::vector<LabelBBox>* locPreds)
+{
+    locPreds->clear();
+    if (shareLocation)
+    {
+        CV_Assert(numLocClasses == 1);
+    }
+    locPreds->resize(num);
+    for (int i = 0; i < num; ++i)
+    {
+        LabelBBox& labelBBox = (*locPreds)[i];
+        for (int p = 0; p < numPredsPerClass; ++p)
+        {
+            int startIdx = p * numLocClasses * 4;
+            for (int c = 0; c < numLocClasses; ++c)
+            {
+                int label = shareLocation ? -1 : c;
+                if (labelBBox.find(label) == labelBBox.end())
+                {
+                    labelBBox[label].resize(numPredsPerClass);
+                }
+                labelBBox[label][p].set_xmin(locData[startIdx + c * 4]);
+                labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]);
+                labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]);
+                labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]);
+            }
+        }
+        locData += numPredsPerClass * numLocClasses * 4;
+    }
+}
+
+void DetectionOutputLayer::GetConfidenceScores(
+    const float* confData, const int num,
+    const int numPredsPerClass, const int numClasses,
+    std::vector<std::map<int, std::vector<float> > >* confPreds)
+{
+    confPreds->clear();
+    confPreds->resize(num);
+    for (int i = 0; i < num; ++i)
+    {
+        std::map<int, std::vector<float> >& labelScores = (*confPreds)[i];
+        for (int p = 0; p < numPredsPerClass; ++p)
+        {
+            int startIdx = p * numClasses;
+            for (int c = 0; c < numClasses; ++c)
+            {
+                labelScores[c].push_back(confData[startIdx + c]);
+            }
+        }
+        confData += numPredsPerClass * numClasses;
+    }
+}
+
+void DetectionOutputLayer::ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes,
+                                        const std::vector<float>& scores,
+                                        const float score_threshold,
+                                        const float nms_threshold, const int top_k,
+                                        std::vector<int>* indices)
+{
+    // Sanity check.
+    CV_Assert(bboxes.size() == scores.size());
+
+    // Get top_k scores (with corresponding indices).
+    std::vector<std::pair<float, int> > score_index_vec;
+    GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec);
+
+    // Do nms.
+    indices->clear();
+    while (score_index_vec.size() != 0)
+    {
+        const int idx = score_index_vec.front().second;
+        bool keep = true;
+        for (size_t k = 0; k < indices->size(); ++k)
+        {
+            if (keep)
+            {
+                const int kept_idx = (*indices)[k];
+                float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]);
+                keep = overlap <= nms_threshold;
+            }
+            else
+            {
+                break;
+            }
+        }
+        if (keep)
+        {
+            indices->push_back(idx);
+        }
+        score_index_vec.erase(score_index_vec.begin());
+    }
+}
+
+
+void DetectionOutputLayer::GetMaxScoreIndex(
+    const std::vector<float>& scores, const float threshold,const int top_k,
+    std::vector<std::pair<float, int> >* score_index_vec)
+{
+    // Generate index score pairs.
+    for (size_t i = 0; i < scores.size(); ++i)
+    {
+        if (scores[i] > threshold)
+        {
+            score_index_vec->push_back(std::make_pair(scores[i], i));
+        }
+    }
+
+    // Sort the score pair according to the scores in descending order
+    std::stable_sort(score_index_vec->begin(), score_index_vec->end(),
+                     util::SortScorePairDescend<int>);
+
+    // Keep top_k scores if needed.
+    if (top_k > -1 && top_k < (int)score_index_vec->size())
+    {
+        score_index_vec->resize(top_k);
+    }
+}
+
+void DetectionOutputLayer::IntersectBBox(const caffe::NormalizedBBox& bbox1,
+                                         const caffe::NormalizedBBox& bbox2,
+                                         caffe::NormalizedBBox* intersect_bbox) {
+    if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() ||
+        bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin())
+    {
+        // Return [0, 0, 0, 0] if there is no intersection.
+        intersect_bbox->set_xmin(0);
+        intersect_bbox->set_ymin(0);
+        intersect_bbox->set_xmax(0);
+        intersect_bbox->set_ymax(0);
+    }
+    else
+    {
+        intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin()));
+        intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin()));
+        intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax()));
+        intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax()));
+    }
+}
+
+float DetectionOutputLayer::JaccardOverlap(const caffe::NormalizedBBox& bbox1,
+                                           const caffe::NormalizedBBox& bbox2,
+                                           const bool normalized) {
+    caffe::NormalizedBBox intersect_bbox;
+    IntersectBBox(bbox1, bbox2, &intersect_bbox);
+    float intersect_width, intersect_height;
+    if (normalized)
+    {
+        intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin();
+        intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin();
+    }
+    else
+    {
+        intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1;
+        intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1;
+    }
+    if (intersect_width > 0 && intersect_height > 0)
+    {
+        float intersect_size = intersect_width * intersect_height;
+        float bbox1_size = BBoxSize(bbox1);
+        float bbox2_size = BBoxSize(bbox2);
+        return intersect_size / (bbox1_size + bbox2_size - intersect_size);
+    }
+    else
+    {
+        return 0.;
+    }
+}
+
+}
+}
--- a/modules/dnn/src/layers/detection_output_layer.hpp
+++ b/modules/dnn/src/layers/detection_output_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__
+
+#include "../precomp.hpp"
+#include "caffe.pb.h"
+
+namespace cv
+{
+namespace dnn
+{
+class DetectionOutputLayer : public Layer
+{
+    unsigned _numClasses;
+    bool _shareLocation;
+    int _numLocClasses;
+
+    int _backgroundLabelId;
+
+    typedef caffe::PriorBoxParameter_CodeType CodeType;
+    CodeType _codeType;
+
+    bool _varianceEncodedInTarget;
+    int _keepTopK;
+    float _confidenceThreshold;
+
+    int _num;
+    int _numPriors;
+
+    float _nmsThreshold;
+    int _topK;
+
+    static const size_t _numAxes = 4;
+    static const std::string _layerName;
+
+public:
+    DetectionOutputLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    void checkInputs(const std::vector<Blob*> &inputs);
+    void getCodeType(LayerParams &params);
+
+    template<typename T>
+    T getParameter(const LayerParams &params,
+                   const std::string &parameterName,
+                   const size_t &idx = 0,
+                   const bool required = true,
+                   const T& defaultValue = T());
+
+    bool getParameterDict(const LayerParams &params,
+                          const std::string &parameterName,
+                          DictValue& result);
+
+    typedef std::map<int, std::vector<caffe::NormalizedBBox> > LabelBBox;
+
+    // Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1].
+    void ClipBBox(const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* clip_bbox);
+
+    // Decode a bbox according to a prior bbox.
+    void DecodeBBox(const caffe::NormalizedBBox& prior_bbox,
+                    const std::vector<float>& prior_variance, const CodeType code_type,
+                    const bool variance_encoded_in_target, const caffe::NormalizedBBox& bbox,
+                    caffe::NormalizedBBox* decode_bbox);
+
+    // Decode a set of bboxes according to a set of prior bboxes.
+    void DecodeBBoxes(const std::vector<caffe::NormalizedBBox>& prior_bboxes,
+                      const std::vector<std::vector<float> >& prior_variances,
+                      const CodeType code_type, const bool variance_encoded_in_target,
+                      const std::vector<caffe::NormalizedBBox>& bboxes,
+                      std::vector<caffe::NormalizedBBox>* decode_bboxes);
+
+    // Decode all bboxes in a batch.
+    void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_pred,
+                         const std::vector<caffe::NormalizedBBox>& prior_bboxes,
+                         const std::vector<std::vector<float> >& prior_variances,
+                         const size_t num, const bool share_location,
+                         const int num_loc_classes, const int background_label_id,
+                         const CodeType code_type, const bool variance_encoded_in_target,
+                         std::vector<LabelBBox>* all_decode_bboxes);
+
+    // Get prior bounding boxes from prior_data.
+    //    prior_data: 1 x 2 x num_priors * 4 x 1 blob.
+    //    num_priors: number of priors.
+    //    prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
+    //    prior_variances: stores all the variances needed by prior bboxes.
+    void GetPriorBBoxes(const float* priorData, const int& numPriors,
+                        std::vector<caffe::NormalizedBBox>* priorBBoxes,
+                        std::vector<std::vector<float> >* priorVariances);
+
+    // Scale the caffe::NormalizedBBox w.r.t. height and width.
+    void ScaleBBox(const caffe::NormalizedBBox& bbox, const int height, const int width,
+                   caffe::NormalizedBBox* scale_bbox);
+
+    // Do non maximum suppression given bboxes and scores.
+    // Inspired by Piotr Dollar's NMS implementation in EdgeBox.
+    // https://goo.gl/jV3JYS
+    //    bboxes: a set of bounding boxes.
+    //    scores: a set of corresponding confidences.
+    //    score_threshold: a threshold used to filter detection results.
+    //    nms_threshold: a threshold used in non maximum suppression.
+    //    top_k: if not -1, keep at most top_k picked indices.
+    //    indices: the kept indices of bboxes after nms.
+    void ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes,
+                      const std::vector<float>& scores, const float score_threshold,
+                      const float nms_threshold, const int top_k, std::vector<int>* indices);
+
+
+    // Do non maximum suppression given bboxes and scores.
+    //    bboxes: a set of bounding boxes.
+    //    scores: a set of corresponding confidences.
+    //    threshold: the threshold used in non maximu suppression.
+    //    top_k: if not -1, keep at most top_k picked indices.
+    //    reuse_overlaps: if true, use and update overlaps; otherwise, always
+    //      compute overlap.
+    //    overlaps: a temp place to optionally store the overlaps between pairs of
+    //      bboxes if reuse_overlaps is true.
+    //    indices: the kept indices of bboxes after nms.
+    void ApplyNMS(const std::vector<caffe::NormalizedBBox>& bboxes,
+                  const std::vector<float>& scores,
+                  const float threshold, const int top_k, const bool reuse_overlaps,
+                  std::map<int, std::map<int, float> >* overlaps, std::vector<int>* indices);
+
+    void ApplyNMS(const bool* overlapped, const int num, std::vector<int>* indices);
+
+    // Get confidence predictions from conf_data.
+    //    conf_data: num x num_preds_per_class * num_classes blob.
+    //    num: the number of images.
+    //    num_preds_per_class: number of predictions per class.
+    //    num_classes: number of classes.
+    //    conf_preds: stores the confidence prediction, where each item contains
+    //      confidence prediction for an image.
+    void GetConfidenceScores(const float* conf_data, const int num,
+                             const int num_preds_per_class, const int num_classes,
+                             std::vector<std::map<int, std::vector<float> > >* conf_scores);
+
+    // Get confidence predictions from conf_data.
+    //    conf_data: num x num_preds_per_class * num_classes blob.
+    //    num: the number of images.
+    //    num_preds_per_class: number of predictions per class.
+    //    num_classes: number of classes.
+    //    class_major: if true, data layout is
+    //      num x num_classes x num_preds_per_class; otherwise, data layerout is
+    //      num x num_preds_per_class * num_classes.
+    //    conf_preds: stores the confidence prediction, where each item contains
+    //      confidence prediction for an image.
+    void GetConfidenceScores(const float* conf_data, const int num,
+                             const int num_preds_per_class, const int num_classes,
+                             const bool class_major,
+                             std::vector<std::map<int, std::vector<float> > >* conf_scores);
+
+    // Get location predictions from loc_data.
+    //    loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
+    //    num: the number of images.
+    //    num_preds_per_class: number of predictions per class.
+    //    num_loc_classes: number of location classes. It is 1 if share_location is
+    //      true; and is equal to number of classes needed to predict otherwise.
+    //    share_location: if true, all classes share the same location prediction.
+    //    loc_preds: stores the location prediction, where each item contains
+    //      location prediction for an image.
+    void GetLocPredictions(const float* loc_data, const int num,
+                           const int num_preds_per_class, const int num_loc_classes,
+                           const bool share_location, std::vector<LabelBBox>* loc_preds);
+
+    // Get max scores with corresponding indices.
+    //    scores: a set of scores.
+    //    threshold: only consider scores higher than the threshold.
+    //    top_k: if -1, keep all; otherwise, keep at most top_k.
+    //    score_index_vec: store the sorted (score, index) pair.
+    void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold,
+                          const int top_k, std::vector<std::pair<float, int> >* score_index_vec);
+
+    // Compute the jaccard (intersection over union IoU) overlap between two bboxes.
+    float JaccardOverlap(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2,
+                         const bool normalized = true);
+
+    // Compute the intersection between two bboxes.
+    void IntersectBBox(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2,
+                       caffe::NormalizedBBox* intersect_bbox);
+
+    // Compute bbox size.
+    float BBoxSize(const caffe::NormalizedBBox& bbox, const bool normalized = true);
+};
+}
+}
+#endif
--- a/modules/dnn/src/layers/eltwise_layer.cpp
+++ b/modules/dnn/src/layers/eltwise_layer.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "eltwise_layer.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+    EltwiseLayer::EltwiseLayer(LayerParams &params) : Layer(params)
+    {
+        if (params.has("operation"))
+        {
+            String operation = params.get<String>("operation").toLowerCase();
+            if (operation == "prod")
+                op = PROD;
+            else if (operation == "sum")
+                op = SUM;
+            else if (operation == "max")
+                op = MAX;
+            else
+                CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
+        }
+        else
+        {
+            op = SUM;
+        }
+
+        if (params.has("coeff"))
+        {
+            DictValue paramCoeff = params.get("coeff");
+            coeffs.resize(paramCoeff.size(), 1);
+            for (int i = 0; i < paramCoeff.size(); i++)
+            {
+                coeffs[i] = paramCoeff.get<int>(i);
+            }
+        }
+    }
+
+    void EltwiseLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    {
+        CV_Assert(2 <= inputs.size());
+        CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
+        CV_Assert(op == SUM || coeffs.size() == 0);
+
+        const BlobShape &shape0 = inputs[0]->shape();
+        for (size_t i = 1; i < inputs.size(); ++i)
+        {
+            CV_Assert(shape0 == inputs[i]->shape());
+        }
+        outputs.resize(1);
+        outputs[0].create(shape0);
+    }
+
+    void EltwiseLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    {
+        switch (op)
+        {
+        case SUM:
+            {
+                CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
+                Mat& output = outputs[0].matRef();
+                output.setTo(0.);
+                if (0 < coeffs.size())
+                {
+                    for (size_t i = 0; i < inputs.size(); i++)
+                    {
+                        output += inputs[i]->matRefConst() * coeffs[i];
+                    }
+                }
+                else
+                {
+                    for (size_t i = 0; i < inputs.size(); i++)
+                    {
+                        output += inputs[i]->matRefConst();
+                    }
+                }
+            }
+            break;
+        case PROD:
+            {
+                Mat& output = outputs[0].matRef();
+                output.setTo(1.);
+                for (size_t i = 0; i < inputs.size(); i++)
+                {
+                    output = output.mul(inputs[i]->matRefConst());
+                }
+            }
+            break;
+        case MAX:
+            {
+                Mat& output = outputs[0].matRef();
+                cv::max(inputs[0]->matRefConst(), inputs[1]->matRefConst(), output);
+                for (size_t i = 2; i < inputs.size(); i++)
+                {
+                    cv::max(output, inputs[i]->matRefConst(), output);
+                }
+            }
+            break;
+        default:
+            CV_Assert(0);
+            break;
+        };
+    }
+}
+}
--- a/modules/dnn/src/layers/eltwise_layer.hpp
+++ b/modules/dnn/src/layers/eltwise_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_ELTWISE_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+    class EltwiseLayer : public Layer
+    {
+        enum EltwiseOp
+        {
+            PROD = 0,
+            SUM = 1,
+            MAX = 2,
+        };
+
+        EltwiseOp op;
+        std::vector<int> coeffs;
+    public:
+        EltwiseLayer(LayerParams& params);
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    };
+}
+}
+#endif
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "flatten_layer.hpp"
+#include <float.h>
+#include <algorithm>
+
+namespace cv
+{
+namespace dnn
+{
+
+const std::string FlattenLayer::_layerName = std::string("Flatten");
+
+bool FlattenLayer::getParameterDict(const LayerParams &params,
+                                    const std::string &parameterName,
+                                    DictValue& result)
+{
+    if (!params.has(parameterName))
+    {
+        return false;
+    }
+
+    result = params.get(parameterName);
+    return true;
+}
+
+template<typename T>
+T FlattenLayer::getParameter(const LayerParams &params,
+                             const std::string &parameterName,
+                             const size_t &idx,
+                             const bool required,
+                             const T& defaultValue)
+{
+    DictValue dictValue;
+    bool success = getParameterDict(params, parameterName, dictValue);
+    if(!success)
+    {
+        if(required)
+        {
+            std::string message = _layerName;
+            message += " layer parameter does not contain ";
+            message += parameterName;
+            message += " parameter.";
+            CV_Error(Error::StsBadArg, message);
+        }
+        else
+        {
+            return defaultValue;
+        }
+    }
+    return dictValue.get<T>(idx);
+}
+
+FlattenLayer::FlattenLayer(LayerParams &params) : Layer(params)
+{
+    _startAxis = getParameter<int>(params, "axis");
+    _endAxis = getParameter<int>(params, "end_axis", 0, false, -1);
+}
+
+void FlattenLayer::checkInputs(const std::vector<Blob*> &inputs)
+{
+    CV_Assert(inputs.size() > 0);
+    for (size_t i = 1; i < inputs.size(); i++)
+    {
+        for (size_t j = 0; j < _numAxes; j++)
+        {
+            CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
+        }
+    }
+}
+
+void FlattenLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    checkInputs(inputs);
+
+    _numAxes = inputs[0]->shape().dims();
+    if(_endAxis <= 0)
+    {
+        _endAxis += _numAxes;
+    }
+    CV_Assert(_startAxis >= 0);
+    CV_Assert(_endAxis >= _startAxis && _endAxis < (int)_numAxes);
+
+    size_t flattenedDimensionSize = 1;
+    for (int i = _startAxis; i <= _endAxis; i++)
+    {
+        flattenedDimensionSize *= inputs[0]->shape()[i];
+    }
+
+    std::vector<int> outputShape;
+    for (int i = 0; i < _startAxis; i++)
+    {
+        outputShape.push_back(inputs[0]->shape()[i]);
+    }
+    outputShape.push_back(flattenedDimensionSize);
+    for (size_t i = _endAxis + 1; i < _numAxes; i++)
+    {
+        outputShape.push_back(inputs[0]->shape()[i]);
+    }
+    CV_Assert(outputShape.size() <= 4);
+
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+        outputs[i].create(BlobShape(outputShape));
+    }
+}
+
+void FlattenLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    for (size_t j = 0; j < inputs.size(); j++)
+    {
+        outputs[j].matRef() = inputs[j]->matRef();
+    }
+}
+}
+}
--- a/modules/dnn/src/layers/flatten_layer.hpp
+++ b/modules/dnn/src/layers/flatten_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+class FlattenLayer : public Layer
+{
+    int _startAxis;
+    int _endAxis;
+
+    size_t _numAxes;
+    static const std::string _layerName;
+
+public:
+    FlattenLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    void checkInputs(const std::vector<Blob*> &inputs);
+
+    template<typename T>
+    T getParameter(const LayerParams &params,
+                   const std::string &parameterName,
+                   const size_t &idx = 0,
+                   const bool required = true,
+                   const T& defaultValue = T());
+
+    bool getParameterDict(const LayerParams &params,
+                          const std::string &parameterName, DictValue &result);
+};
+}
+}
+#endif
--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
@@ -46,5 +46,104 @@ namespace cv
 namespace dnn
 {

+namespace util
+{
+
+std::string makeName(const std::string& str1, const std::string& str2)
+{
+    return str1 + str2;
+}
+
+bool getParameter(LayerParams &params, const std::string& nameBase, const std::string& nameAll, int &parameterH, int &parameterW, bool hasDefault = false, const int& defaultValue = 0)
+{
+    std::string nameH = makeName(nameBase, std::string("_h"));
+    std::string nameW = makeName(nameBase, std::string("_w"));
+    std::string nameAll_ = nameAll;
+    if(nameAll_ == "")
+    {
+        nameAll_ = nameBase;
+    }
+
+    if (params.has(nameH) && params.has(nameW))
+    {
+        parameterH = params.get<int>(nameH);
+        parameterW = params.get<int>(nameW);
+        return true;
+    }
+    else
+    {
+        if (params.has(nameAll_))
+        {
+            parameterH = parameterW = params.get<int>(nameAll_);
+            return true;
+        }
+        else
+        {
+            if(hasDefault)
+            {
+                parameterH = parameterW = defaultValue;
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}
+
+void getKernelSize(LayerParams &params, int &kernelH, int &kernelW)
+{
+    if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW))
+    {
+        CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
+    }
+
+    CV_Assert(kernelH > 0 && kernelW > 0);
+}
+
+void getStrideAndPadding(LayerParams &params, int &padH, int &padW, int &strideH, int &strideW)
+{
+    util::getParameter(params, "pad", "pad", padH, padW, true, 0);
+    util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
+
+    CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
+}
+}
+
+
+void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW)
+{
+    util::getStrideAndPadding(params, padH, padW, strideH, strideW);
+
+    globalPooling = params.has("global_pooling");
+
+    if (globalPooling)
+    {
+        if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size"))
+        {
+            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
+        }
+        if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1)
+        {
+            CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1");
+        }
+    }
+    else
+    {
+        util::getKernelSize(params, kernelH, kernelW);
+    }
+}
+
+void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW)
+{
+    util::getKernelSize(params, kernelH, kernelW);
+    util::getStrideAndPadding(params, padH, padW, strideH, strideW);
+
+    util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
+
+    CV_Assert(dilationH > 0 && dilationW > 0);
+}
+
 }
 }
--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@@ -50,6 +50,10 @@ namespace cv
 namespace dnn
 {

+void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW);
+
+void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW);
+
 }
 }


--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "normalize_bbox_layer.hpp"
+#include "op_blas.hpp"
+
+#include <float.h>
+#include <algorithm>
+
+namespace cv
+{
+namespace dnn
+{
+
+const std::string NormalizeBBoxLayer::_layerName = std::string("NormalizeBBox");
+
+bool NormalizeBBoxLayer::getParameterDict(const LayerParams &params,
+                                          const std::string &parameterName,
+                                          DictValue& result)
+{
+    if (!params.has(parameterName))
+    {
+        return false;
+    }
+
+    result = params.get(parameterName);
+    return true;
+}
+
+template<typename T>
+T NormalizeBBoxLayer::getParameter(const LayerParams &params,
+                                   const std::string &parameterName,
+                                   const size_t &idx,
+                                   const bool required,
+                                   const T& defaultValue)
+{
+    DictValue dictValue;
+    bool success = getParameterDict(params, parameterName, dictValue);
+    if(!success)
+    {
+        if(required)
+        {
+            std::string message = _layerName;
+            message += " layer parameter does not contain ";
+            message += parameterName;
+            message += " parameter.";
+            CV_Error(Error::StsBadArg, message);
+        }
+        else
+        {
+            return defaultValue;
+        }
+    }
+    return dictValue.get<T>(idx);
+}
+
+NormalizeBBoxLayer::NormalizeBBoxLayer(LayerParams &params) : Layer(params)
+{
+    _eps = getParameter<float>(params, "eps", 0, false, 1e-10f);
+    _across_spatial = getParameter<bool>(params, "across_spatial");
+    _channel_shared = getParameter<bool>(params, "channel_shared");
+}
+
+void NormalizeBBoxLayer::checkInputs(const std::vector<Blob*> &inputs)
+{
+    CV_Assert(inputs.size() > 0);
+    for (size_t i = 1; i < inputs.size(); i++)
+    {
+        for (size_t j = 0; j < _numAxes; j++)
+        {
+            CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
+        }
+    }
+    CV_Assert(inputs[0]->dims() > 2);
+}
+
+void NormalizeBBoxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    checkInputs(inputs);
+
+    _num = inputs[0]->num();
+    _channels = inputs[0]->shape()[1];
+    _rows = inputs[0]->shape()[2];
+    _cols = inputs[0]->shape()[3];
+
+    _channelSize = _rows * _cols;
+    _imageSize = _channelSize * _channels;
+
+    _buffer = Mat(_channels, _channelSize, CV_32F);
+
+    _sumChannelMultiplier = Mat(_channels, 1, CV_32F, Scalar(1.0));
+    _sumSpatialMultiplier = Mat(1, _channelSize, CV_32F, Scalar(1.0));
+
+    _scale = blobs[0];
+
+    for(size_t i = 0; i < inputs.size(); i++)
+    {
+        outputs[i].create(BlobShape(inputs[0]->shape()));
+    }
+}
+
+void NormalizeBBoxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    Mat zeroBuffer(_channels, _channelSize, CV_32F, Scalar(0));
+    Mat absDiff;
+
+    for (size_t j = 0; j < inputs.size(); j++)
+    {
+        for (size_t n = 0; n < _num; ++n)
+        {
+            Mat src = Mat(_channels, _channelSize, CV_32F, inputs[j]->ptrf(n));
+            Mat dst = Mat(_channels, _channelSize, CV_32F, outputs[j].ptrf(n));
+
+            _buffer = src.mul(src);
+
+            if (_across_spatial)
+            {
+                absdiff(_buffer, zeroBuffer, absDiff);
+
+                // add eps to avoid overflow
+                double absSum = sum(absDiff)[0] + _eps;
+
+                float norm = sqrt(absSum);
+                dst = src / norm;
+            }
+            else
+            {
+                Mat norm(_channelSize, 1, _buffer.type()); // 1 x _channelSize
+
+                // (_channels x_channelSize)T * _channels x 1 -> _channelSize x 1
+                gemmCPU(_buffer, _sumChannelMultiplier, 1, norm, 0, GEMM_1_T);
+
+                // compute norm
+                pow(norm, 0.5f, norm);
+
+                // scale the layer
+                // _channels x 1 * (_channelSize x 1)T -> _channels x _channelSize
+                gemmCPU(_sumChannelMultiplier, norm, 1, _buffer, 0, GEMM_2_T);
+
+                dst = src / _buffer;
+            }
+
+            // scale the output
+            if (_channel_shared)
+            {
+                // _scale: 1 x 1
+                dst *= _scale.matRefConst().at<float>(0, 0);
+            }
+            else
+            {
+                // _scale: _channels x 1
+                // _channels x 1 * 1 x _channelSize -> _channels x _channelSize
+                gemmCPU(_scale.matRefConst(), _sumSpatialMultiplier, 1, _buffer, 0);
+
+                dst = dst.mul(_buffer);
+           }
+        }
+    }
+}
+}
+}
--- a/modules/dnn/src/layers/normalize_bbox_layer.hpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+class NormalizeBBoxLayer : public Layer
+{
+    Mat _buffer;
+
+    Mat _sumChannelMultiplier;
+    Mat _sumSpatialMultiplier;
+
+    Blob _scale;
+
+    float _eps;
+    bool _across_spatial;
+    bool _channel_shared;
+
+    size_t _num;
+    size_t _channels;
+    size_t _rows;
+    size_t _cols;
+
+    size_t _channelSize;
+    size_t _imageSize;
+
+    static const size_t _numAxes = 4;
+    static const std::string _layerName;
+
+public:
+    NormalizeBBoxLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    void checkInputs(const std::vector<Blob*> &inputs);
+
+    template<typename T>
+    T getParameter(const LayerParams &params,
+                   const std::string &parameterName,
+                   const size_t &idx = 0,
+                   const bool required = true,
+                   const T& defaultValue = T());
+
+    bool getParameterDict(const LayerParams &params,
+                          const std::string &parameterName,
+                          DictValue& result);
+};
+}
+}
+#endif
--- a/modules/dnn/src/layers/op_im2col.cpp
+++ b/modules/dnn/src/layers/op_im2col.cpp
@@ -56,8 +56,12 @@ bool im2col_ocl(const UMat &img,
                 int kernel_h, int kernel_w,
                 int pad_h, int pad_w,
                 int stride_h, int stride_w,
+                 int dilation_h, int dilation_w,
                 UMat &col)
 {
+    //TODO
+    CV_Assert(dilation_h == 1 && dilation_w == 1);
+
    int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
    int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
    int channels_col = channels * kernel_h * kernel_w;

--- a/modules/dnn/src/layers/op_im2col.hpp
+++ b/modules/dnn/src/layers/op_im2col.hpp
@@ -41,8 +41,8 @@

 #ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
 #define __OPENCV_DNN_LAYERS_IM2COL_HPP__
-#include "../precomp.hpp"
-#include <iostream>
+#include <opencv2/core.hpp>
+#include <cstdlib>

 namespace cv
 {
@@ -57,6 +57,7 @@ class im2col_CpuPBody : public cv::ParallelLoopBody
    int kernel_h, kernel_w;
    int pad_h, pad_w;
    int stride_h, stride_w;
+    int dilation_h, dilation_w;
    Dtype* data_col;
    int height_col, width_col, channels_col;

@@ -68,17 +69,21 @@ public:
                    int kernel_h, int kernel_w,
                    int pad_h, int pad_w,
                    int stride_h, int stride_w,
+                    int dilation_h, int dilation_w,
                    Dtype* data_col)
    {
        im2col_CpuPBody<Dtype> t;
+
        t.data_im = data_im;
        t.data_col = data_col;
        t.channels = channels; t.height = height; t.width = width;
        t.kernel_h = kernel_h; t.kernel_w = kernel_w;
        t.pad_h = pad_h; t.pad_w = pad_w;
        t.stride_h = stride_h; t.stride_w = stride_w;
-        t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
-        t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+        t.dilation_h = dilation_h; t.dilation_w = dilation_w;
+
+        t.height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+        t.width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
        t.channels_col = channels * kernel_h * kernel_w;

        cv::parallel_for_(Range(0, t.channels_col), t);
@@ -86,17 +91,20 @@ public:

    virtual void operator ()(const Range &r) const
    {
-        for (int c = r.start; c < r.end; ++c) {
+        for (int c = r.start; c < r.end; ++c)
+        {
            int w_offset = c % kernel_w;
            int h_offset = (c / kernel_w) % kernel_h;
            int c_im = c / kernel_h / kernel_w;
-            for (int h = 0; h < height_col; ++h) {
-                for (int w = 0; w < width_col; ++w) {
-                    int h_pad = h * stride_h - pad_h + h_offset;
-                    int w_pad = w * stride_w - pad_w + w_offset;
+            for (int h = 0; h < height_col; ++h)
+            {
+                for (int w = 0; w < width_col; ++w)
+                {
+                    int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
+                    int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
                    if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
                        data_col[(c * height_col + h) * width_col + w] =
-                        data_im[(c_im * height + h_pad) * width + w_pad];
+                            data_im[(c_im * height + h_pad) * width + w_pad];
                    else
                        data_col[(c * height_col + h) * width_col + w] = 0;
                }
@@ -180,11 +188,12 @@ void col2im_cpu(const Dtype* data_col,
                int kernel_h, int kernel_w,
                int pad_h, int pad_w,
                int stride_h, int stride_w,
+                int dilation_h, int dilation_w,
                Dtype* data_im)
 {
-    int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
-    int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
-    int channels_col = channels * kernel_h * kernel_w;
+    int height_col = (height + 2 * pad_h - (dilation_h * (patch_h - 1) + 1)) / stride_h + 1;
+    int width_col = (width + 2 * pad_w - (dilation_w * (patch_w - 1) + 1)) / stride_w + 1;
+    int channels_col = channels * patch_h * patch_w;

    std::memset(data_im, 0, height * width * channels * sizeof(Dtype));

@@ -198,12 +207,12 @@ void col2im_cpu(const Dtype* data_col,
        {
            for (int w = 0; w < width_col; ++w)
            {
-                int h_pad = h * stride_h - pad_h + h_offset;
-                int w_pad = w * stride_w - pad_w + w_offset;
+                int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
+                int w_pad = w * stride_w - pad_w + w_offset * dilation_w;

                if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
                    data_im[(c_im * height + h_pad) * width + w_pad] +=
-                    data_col[(c * height_col + h) * width_col + w];
+                        data_col[(c * height_col + h) * width_col + w];
            }
        }
    }
@@ -215,6 +224,7 @@ bool im2col_ocl(const UMat &img,
                int kernel_h, int kernel_w,
                int pad_h, int pad_w,
                int stride_h, int stride_w,
+                int dilation_h, int dilation_w,
                UMat &col);

 bool col2im_ocl(const UMat &col,

--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "permute_layer.hpp"
+#include <float.h>
+#include <algorithm>
+
+namespace cv
+{
+namespace dnn
+{
+void PermuteLayer::checkCurrentOrder(int currentOrder)
+{
+    if(currentOrder < 0 || currentOrder > 3)
+    {
+        CV_Error(
+            Error::StsBadArg,
+            "Orders of dimensions in Permute layer parameter"
+            "must be in [0...3] interval");
+    }
+
+    if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
+    {
+        CV_Error(Error::StsBadArg,
+                 "Permute layer parameter contains duplicated orders.");
+    }
+}
+
+void PermuteLayer::checkNeedForPermutation()
+{
+    _needsPermute = false;
+    for (size_t i = 0; i < _numAxes; ++i)
+    {
+        if (_order[i] != i)
+        {
+            _needsPermute = true;
+            break;
+        }
+    }
+}
+
+PermuteLayer::PermuteLayer(LayerParams &params) : Layer(params)
+{
+    if (!params.has("order"))
+    {
+        _needsPermute = false;
+        return;
+    }
+
+    DictValue paramOrder = params.get("order");
+    if(paramOrder.size() > 4)
+    {
+        CV_Error(
+            Error::StsBadArg,
+            "Too many (> 4) orders of dimensions in Permute layer");
+    }
+
+    _numAxes = paramOrder.size();
+
+    for (size_t i = 0; i < _numAxes; i++)
+    {
+        int currentOrder = paramOrder.get<int>(i);
+        checkCurrentOrder(currentOrder);
+        _order.push_back(currentOrder);
+    }
+
+    checkNeedForPermutation();
+}
+
+void PermuteLayer::computeStrides()
+{
+    _oldStride.resize(_numAxes);
+    _newStride.resize(_numAxes);
+
+    _oldStride[_numAxes - 1] = 1;
+    _newStride[_numAxes - 1] = 1;
+
+    for(int i = _numAxes - 2; i >= 0; i--)
+    {
+        _oldStride[i] = _oldStride[i + 1] * _oldDimensionSize[i + 1];
+        _newStride[i] = _newStride[i + 1] * _newDimensionSize[i + 1];
+    }
+
+    _count = _oldStride[0] * _oldDimensionSize[0];
+}
+
+void PermuteLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    if(!_needsPermute)
+    {
+        return;
+    }
+
+    CV_Assert(inputs.size() > 0);
+    CV_Assert((int)_numAxes == inputs[0]->shape().dims());
+
+    outputs.resize(inputs.size());
+
+    _oldDimensionSize = inputs[0]->shape();
+    for (size_t i = 0; i < _numAxes; i++)
+    {
+        _newDimensionSize[i] = _oldDimensionSize[_order[i]];
+    }
+
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+        CV_Assert(inputs[i]->rows() == _oldDimensionSize[2] && inputs[i]->cols() == _oldDimensionSize[3]);
+        outputs[i].create(BlobShape(_newDimensionSize));
+    }
+
+    computeStrides();
+}
+
+void PermuteLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    if(!_needsPermute)
+    {
+        for (size_t j = 0; j < inputs.size(); j++)
+        {
+            outputs[j].matRef() = inputs[j]->matRef();
+        }
+        return;
+    }
+
+    for (size_t k = 0; k < inputs.size(); k++)
+    {
+        float *srcData = inputs[k]->ptrf();
+        float *dstData = outputs[k].ptrf();
+
+        for (size_t i = 0; i < _count; ++i)
+        {
+            int oldPosition = 0;
+            int newPosition = i;
+
+            for (size_t j = 0; j < _numAxes; ++j)
+            {
+                oldPosition += (newPosition / _newStride[j]) * _oldStride[_order[j]];
+                newPosition %= _newStride[j];
+            }
+            dstData[i] = srcData[oldPosition];
+        }
+    }
+}
+}
+}
--- a/modules/dnn/src/layers/permute_layer.hpp
+++ b/modules/dnn/src/layers/permute_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+class PermuteLayer : public Layer
+{
+    size_t _count;
+    std::vector<size_t> _order;
+
+    BlobShape _oldDimensionSize;
+    BlobShape _newDimensionSize;
+
+    std::vector<size_t> _oldStride;
+    std::vector<size_t> _newStride;
+    bool _needsPermute;
+
+    size_t _numAxes;
+
+    void checkCurrentOrder(int currentOrder);
+    void checkNeedForPermutation();
+    void computeStrides();
+
+public:
+    PermuteLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+};
+}
+}
+#endif
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -57,11 +57,12 @@ namespace dnn

 PoolingLayerImpl::PoolingLayerImpl()
 {
-
+    globalPooling = false;
 }

 PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size pad_)
 {
+    globalPooling = false;
    type = type_;
    kernel = kernel_;
    pad = pad_;
@@ -73,6 +74,12 @@ void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Bl
    CV_Assert(inputs.size() > 0);

    inp = inputs[0]->size2();
+
+    if(globalPooling)
+    {
+        kernel = inp;
+    }
+
    computeOutputShape(inp);

    useOpenCL = ocl::useOpenCL();
@@ -266,5 +273,12 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
    return Ptr<PoolingLayer>(new PoolingLayerImpl(type, kernel, stride, pad));
 }

+Ptr<PoolingLayer> PoolingLayer::createGlobal(int type)
+{
+    Ptr<PoolingLayer> l = PoolingLayer::create(type);
+    l->globalPooling = true;
+    return l;
+}
+
 }
 }
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include "prior_box_layer.hpp"
+#include <float.h>
+#include <algorithm>
+#include <cmath>
+
+namespace cv
+{
+namespace dnn
+{
+
+const std::string PriorBoxLayer::_layerName = std::string("PriorBox");
+
+bool PriorBoxLayer::getParameterDict(const LayerParams &params,
+                                     const std::string &parameterName,
+                                     DictValue& result)
+{
+    if (!params.has(parameterName))
+    {
+        return false;
+    }
+
+    result = params.get(parameterName);
+    return true;
+}
+
+template<typename T>
+T PriorBoxLayer::getParameter(const LayerParams &params,
+                              const std::string &parameterName,
+                              const size_t &idx,
+                              const bool required,
+                              const T& defaultValue)
+{
+    DictValue dictValue;
+    bool success = getParameterDict(params, parameterName, dictValue);
+    if(!success)
+    {
+        if(required)
+        {
+            std::string message = _layerName;
+            message += " layer parameter does not contain ";
+            message += parameterName;
+            message += " parameter.";
+            CV_Error(Error::StsBadArg, message);
+        }
+        else
+        {
+            return defaultValue;
+        }
+    }
+    return dictValue.get<T>(idx);
+}
+
+void PriorBoxLayer::getAspectRatios(const LayerParams &params)
+{
+    DictValue aspectRatioParameter;
+    bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
+    CV_Assert(aspectRatioRetieved);
+
+    for (int i = 0; i < aspectRatioParameter.size(); ++i)
+    {
+        float aspectRatio = aspectRatioParameter.get<float>(i);
+        bool alreadyExists = false;
+
+        for (size_t j = 0; j < _aspectRatios.size(); ++j)
+        {
+            if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6)
+            {
+                alreadyExists = true;
+                break;
+            }
+        }
+        if (!alreadyExists)
+        {
+            _aspectRatios.push_back(aspectRatio);
+            if (_flip)
+            {
+                _aspectRatios.push_back(1./aspectRatio);
+            }
+        }
+    }
+}
+
+void PriorBoxLayer::getVariance(const LayerParams &params)
+{
+    DictValue varianceParameter;
+    bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
+    CV_Assert(varianceParameterRetrieved);
+
+    int varianceSize = varianceParameter.size();
+    if (varianceSize > 1)
+    {
+        // Must and only provide 4 variance.
+        CV_Assert(varianceSize == 4);
+
+        for (int i = 0; i < varianceSize; ++i)
+        {
+            float variance = varianceParameter.get<float>(i);
+            CV_Assert(variance > 0);
+            _variance.push_back(variance);
+        }
+    }
+    else
+    {
+        if (varianceSize == 1)
+        {
+            float variance = varianceParameter.get<float>(0);
+            CV_Assert(variance > 0);
+            _variance.push_back(variance);
+        }
+        else
+        {
+            // Set default to 0.1.
+            _variance.push_back(0.1f);
+        }
+    }
+}
+
+PriorBoxLayer::PriorBoxLayer(LayerParams &params) : Layer(params)
+{
+    _minSize = getParameter<unsigned>(params, "min_size");
+    CV_Assert(_minSize > 0);
+
+    _flip = getParameter<bool>(params, "flip");
+    _clip = getParameter<bool>(params, "clip");
+
+    _aspectRatios.clear();
+    _aspectRatios.push_back(1.);
+
+    getAspectRatios(params);
+    getVariance(params);
+
+    _numPriors = _aspectRatios.size();
+
+    _maxSize = -1;
+    if (params.has("max_size"))
+    {
+        _maxSize = params.get("max_size").get<float>(0);
+        CV_Assert(_maxSize > _minSize);
+
+        _numPriors += 1;
+    }
+}
+
+void PriorBoxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(inputs.size() == 2);
+
+    _layerWidth = inputs[0]->cols();
+    _layerHeight = inputs[0]->rows();
+
+    _imageWidth = inputs[1]->cols();
+    _imageHeight = inputs[1]->rows();
+
+    _stepX = static_cast<float>(_imageWidth) / _layerWidth;
+    _stepY = static_cast<float>(_imageHeight) / _layerHeight;
+
+    // Since all images in a batch has same height and width, we only need to
+    // generate one set of priors which can be shared across all images.
+    size_t outNum = 1;
+    // 2 channels. First channel stores the mean of each prior coordinate.
+    // Second channel stores the variance of each prior coordinate.
+    size_t outChannels = 2;
+    _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
+
+    outputs[0].create(BlobShape(outNum, outChannels, _outChannelSize));
+    outputs[0].matRef() = 0;
+}
+
+void PriorBoxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    (void)inputs; // to suppress unused parameter warning
+
+    float* outputPtr = outputs[0].ptrf();
+
+    // first prior: aspect_ratio = 1, size = min_size
+    int idx = 0;
+    for (size_t h = 0; h < _layerHeight; ++h)
+    {
+        for (size_t w = 0; w < _layerWidth; ++w)
+        {
+            _boxWidth = _boxHeight = _minSize;
+
+            float center_x = (w + 0.5) * _stepX;
+            float center_y = (h + 0.5) * _stepY;
+            // xmin
+            outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
+            // ymin
+            outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
+            // xmax
+            outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
+            // ymax
+            outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
+
+            if (_maxSize > 0)
+            {
+                // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
+                _boxWidth = _boxHeight = sqrt(_minSize * _maxSize);
+                // xmin
+                outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
+                // ymin
+                outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
+                // xmax
+                outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
+                // ymax
+                outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
+            }
+
+            // rest of priors
+            for (size_t r = 0; r < _aspectRatios.size(); ++r)
+            {
+                float ar = _aspectRatios[r];
+                if (fabs(ar - 1.) < 1e-6)
+                {
+                    continue;
+                }
+                _boxWidth = _minSize * sqrt(ar);
+                _boxHeight = _minSize / sqrt(ar);
+                // xmin
+                outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
+                // ymin
+                outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
+                // xmax
+                outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
+                // ymax
+                outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
+            }
+        }
+    }
+    // clip the prior's coordidate such that it is within [0, 1]
+    if (_clip)
+    {
+        for (size_t d = 0; d < _outChannelSize; ++d)
+        {
+            outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
+        }
+    }
+    // set the variance.
+    outputPtr = outputs[0].ptrf(0, 1);
+    if(_variance.size() == 1)
+    {
+        Mat secondChannel(outputs[0].rows(), outputs[0].cols(), CV_32F, outputPtr);
+        secondChannel.setTo(Scalar(_variance[0]));
+    }
+    else
+    {
+        int count = 0;
+        for (size_t h = 0; h < _layerHeight; ++h)
+        {
+            for (size_t w = 0; w < _layerWidth; ++w)
+            {
+                for (size_t i = 0; i < _numPriors; ++i)
+                {
+                    for (int j = 0; j < 4; ++j)
+                    {
+                        outputPtr[count] = _variance[j];
+                        ++count;
+                    }
+                }
+            }
+        }
+    }
+}
+}
+}
--- a/modules/dnn/src/layers/prior_box_layer.hpp
+++ b/modules/dnn/src/layers/prior_box_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__
+#include "../precomp.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+class PriorBoxLayer : public Layer
+{
+    size_t _layerWidth;
+    size_t _layerHeight;
+
+    size_t _imageWidth;
+    size_t _imageHeight;
+
+    size_t _outChannelSize;
+
+    float _stepX;
+    float _stepY;
+
+    float _minSize;
+    float _maxSize;
+
+    float _boxWidth;
+    float _boxHeight;
+
+    std::vector<float> _aspectRatios;
+    std::vector<float> _variance;
+
+    bool _flip;
+    bool _clip;
+
+    size_t _numPriors;
+
+    static const size_t _numAxes = 4;
+    static const std::string _layerName;
+
+public:
+    PriorBoxLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+    template<typename T>
+    T getParameter(const LayerParams &params,
+                   const std::string &parameterName,
+                   const size_t &idx = 0,
+                   const bool required = true,
+                   const T& defaultValue = T());
+
+    bool getParameterDict(const LayerParams &params,
+                          const std::string &parameterName,
+                          DictValue& result);
+
+    void getAspectRatios(const LayerParams &params);
+    void getVariance(const LayerParams &params);
+};
+}
+}
+#endif