Merge pull request #750 from ludv1x:dnn-python-bindings

b1346e5a · Vadim Pisarevsky · 67a305ad · cd029345 · b1346e5a · b1346e5a
Commit b1346e5a authored Sep 20, 2016 by Vadim Pisarevsky
50 changed files
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@@ -52,12 +52,10 @@ endif()
 # ----------------------------------------------------------------------------
 # Download pre-trained models for complex testing on GoogLeNet and AlexNet
 # ----------------------------------------------------------------------------
-OCV_OPTION(${the_module}_DOWNLOAD_CAFFE_MODELS "Use GoogLeNet Caffe model for testing" OFF IF BUILD_TESTS AND PYTHON2_EXECUTABLE AND DEFINED ENV{OPENCV_TEST_DATA_PATH})
+OCV_OPTION(${the_module}_DOWNLOAD_CAFFE_MODELS "Use GoogLeNet Caffe model for testing" OFF IF BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH})
-if(BUILD_TESTS AND PYTHON2_EXECUTABLE AND DEFINED ENV{OPENCV_TEST_DATA_PATH}
+if(BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH} AND (DOWNLOAD_EXTERNAL_TEST_DATA OR ${the_module}_DOWNLOAD_CAFFE_MODELS))
-        AND (DOWNLOAD_EXTERNAL_TEST_DATA OR ${the_module}_DOWNLOAD_CAFFE_MODELS))
    add_custom_command( TARGET opencv_test_${name} POST_BUILD
-                        COMMAND ${PYTHON2_EXECUTABLE} download_model.py test_models.json
+                        COMMAND ${CMAKE_COMMAND} -Dmodel=GoogleNet -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/download_model.cmake)
-                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts )
    add_definitions(-DENABLE_CAFFE_MODEL_TESTS=1)
 endif()
@@ -68,21 +66,29 @@ OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer (exper
 if(${the_module}_BUILD_TORCH_IMPORTER)
    add_definitions(-DENABLE_TORCH_IMPORTER=1)
    ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4702 /wd4127 /wd4267) #supress warnings in original torch files
+    if(NOT DEFINED HAVE_TORCH_EXE)
+        execute_process(COMMAND th ${CMAKE_CURRENT_SOURCE_DIR}/testdata/dnn/torch/torch_nn_echo.lua RESULT_VARIABLE TORCH_EXE_STATUS)
+        set(HAVE_TORCH_EXE OFF)
+        if(${TORCH_EXE_STATUS} EQUAL 0)
+            set(HAVE_TORCH_EXE ON)
+        endif()
+        set(HAVE_TORCH_EXE ${HAVE_TORCH_EXE} CACHE INTERNAL "Have torch binary")
+    endif()
 endif()
 # ----------------------------------------------------------------------------
 # Generating test data for Torch importer
 # ----------------------------------------------------------------------------
-OCV_OPTION(${the_module}_BUILD_TORCH_TESTS "Build Torch tests (installed torch7 with nn module is required)" ON IF BUILD_TESTS AND ${the_module}_BUILD_TORCH_IMPORTER)
+OCV_OPTION(${the_module}_BUILD_TORCH_TESTS "Build Torch tests (installed torch7 with nn module is required)" ON IF BUILD_TESTS AND ${the_module}_BUILD_TORCH_IMPORTER AND HAVE_TORCH_EXE)
 if(${the_module}_BUILD_TORCH_TESTS)
    if(NOT DEFINED ENV{OPENCV_TEST_DATA_PATH})
        message(FATAL_ERROR "OPENCV_TEST_DATA_PATH environment variable was not specified")
    endif()
-    execute_process(COMMAND th ${CMAKE_CURRENT_SOURCE_DIR}/testdata/dnn/torch/torch_nn_echo.lua RESULT_VARIABLE TORCH_STATUS)
+    if(NOT HAVE_TORCH_EXE)
-    if(TORCH_STATUS)
+        message(FATAL_ERROR "Torch executable \"th\" not found or nn module not found")
-        message(FATAL_ERROR "Torch executable \"th\" not found (status: ${TORCH_STATUS}) or nn module not found")
    endif()
    add_custom_command( TARGET opencv_test_${name} POST_BUILD

--- a/modules/dnn/cmake/OpenCVFindLibProtobuf.cmake
+++ b/modules/dnn/cmake/OpenCVFindLibProtobuf.cmake
@@ -23,20 +23,11 @@ else()
  endif()
  set(PROTOBUF_LIBRARIES libprotobuf)
-  set(PROTOBUF_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/protobuf/src ${CMAKE_CURRENT_BINARY_DIR})
+  set(PROTOBUF_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/protobuf/src ${CMAKE_CURRENT_SOURCE_DIR}/misc/caffe)
-  set(PROTOBUF_SRCS ${CMAKE_CURRENT_BINARY_DIR}/caffe.pb.cc)
+  set(PROTOBUF_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/misc/caffe/caffe.pb.cc)
-  set(PROTOBUF_HDRS ${CMAKE_CURRENT_BINARY_DIR}/caffe.pb.h)
+  set(PROTOBUF_HDRS ${CMAKE_CURRENT_SOURCE_DIR}/misc/caffe/caffe.pb.h)
  add_definitions(-DHAVE_PROTOBUF=1)
-  add_custom_command(
-    OUTPUT ${PROTOBUF_SRCS} ${PROTOBUF_HDRS}
-    COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_SOURCE_DIR}/src/caffe/compiled/caffe.tar.gz
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-    COMMENT "Unpacking compiled caffe protobuf files"
-    VERBATIM
-  )
-  set_source_files_properties(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} PROPERTIES GENERATED TRUE)
  #supress warnings in autogenerated caffe.pb.* files
  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-parameter)
  ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4125 /wd4267 /wd4127 /wd4244 /wd4512 /wd4702)

--- a/modules/dnn/cmake/OpenCVFindMKL.cmake
+++ b/modules/dnn/cmake/OpenCVFindMKL.cmake
@@ -13,11 +13,18 @@
 # MKL_LIBRARIES     - IPP libraries that are used by OpenCV
 #
+macro (mkl_find_lib VAR NAME DIRS)
+    find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH)
+    set(${VAR} ${${VAR}}/${NAME})
+    unset(${VAR} CACHE)
+endmacro()
 macro(mkl_fail)
    set(HAVE_MKL OFF CACHE BOOL "True if MKL found")
    set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
    unset(MKL_INCLUDE_DIRS CACHE)
    unset(MKL_LIBRARIES CACHE)
+    return()
 endmacro()
 macro(get_mkl_version VERSION_FILE)
@@ -42,28 +49,27 @@ endif()
 #check current MKL_ROOT_DIR
 if(NOT MKL_ROOT_DIR OR NOT EXISTS ${MKL_ROOT_DIR}/include/mkl.h)
-    set(MKLROOT_PATHS ${MKL_ROOT_DIR})
+    set(mkl_root_paths ${MKL_ROOT_DIR})
    if(DEFINED $ENV{MKLROOT})
-        list(APPEND MKLROOT_PATHS $ENV{MKLROOT})
+        list(APPEND mkl_root_paths $ENV{MKLROOT})
    endif()
    if(WIN32)
        set(ProgramFilesx86 "ProgramFiles(x86)")
-        list(APPEND MKLROOT_PATHS $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
+        list(APPEND mkl_root_paths $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
    endif()
    if(UNIX)
-        list(APPEND MKLROOT_PATHS "/opt/intel/mkl")
+        list(APPEND mkl_root_paths "/opt/intel/mkl")
    endif()
-    find_path(MKL_ROOT_DIR include/mkl.h PATHS ${MKLROOT_PATHS})
+    find_path(MKL_ROOT_DIR include/mkl.h PATHS ${mkl_root_paths})
 endif()
 if(NOT MKL_ROOT_DIR)
    mkl_fail()
-    return()
 endif()
 set(MKL_INCLUDE_DIRS ${MKL_ROOT_DIR}/include)
-set(MKL_INCLUDE_HEADERS ${MKL_INCLUDE_DIRS}/mkl.h ${MKL_INCLUDE_DIRS}/mkl_version.h)
+get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
 #determine arch
 if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
@@ -81,43 +87,50 @@ else()
    set(MKL_ARCH "ia32")
 endif()
-if(MSVC)
+if(${MKL_VERSION_STR} VERSION_GREATER "11.3.0" OR ${MKL_VERSION_STR} VERSION_EQUAL "11.3.0")
-    set(MKL_EXT ".lib")
+    set(mkl_lib_find_paths
-    set(MKL_PRE "")
+        ${MKL_ROOT_DIR}/lib
-else()
+        ${MKL_ROOT_DIR}/lib/${MKL_ARCH} ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH})
-    set(MKL_EXT ".a")
-    set(MKL_PRE "lib")
+    set(mkl_lib_list
-endif()
+        mkl_core
+        mkl_intel_${MKL_LP64})
-set(MKL_LIB_DIR ${MKL_ROOT_DIR}/lib/${MKL_ARCH})
-set(MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_core${MKL_EXT} ${MKL_LIB_DIR}/${MKL_PRE}mkl_intel_${MKL_LP64}${MKL_EXT})
+    if(MKL_WITH_TBB)
+        list(APPEND mkl_lib_list mkl_tbb_thread tbb)
-if(MKL_WITH_TBB)
+    elseif(MKL_WITH_OPENMP)
-    list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_tbb_thread${MKL_EXT})
+        if(MSVC)
-    list(APPEND MKL_LIBRARIES ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}/tbb${MKL_EXT})
+            list(APPEND mkl_lib_list mkl_intel_thread libiomp5md)
-elseif(MKL_WITH_OPENMP)
+        else()
-    message(FATAL_ERROR "Multithreaded MKL is not supported yet")
+            list(APPEND mkl_lib_list libmkl_gnu_thread)
+        endif()
+    else()
+        list(APPEND mkl_lib_list mkl_sequential)
+    endif()
 else()
-    list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_sequential${MKL_EXT})
+    message(STATUS "MKL version ${MKL_VERSION_STR} is not supported")
+    mkl_fail()
 endif()
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(MKL MKL_INCLUDE_HEADERS MKL_LIBRARIES)
-if(MKL_FOUND)
-    get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
-    message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
-    set(HAVE_MKL ON CACHE BOOL "True if MKL found")
+set(MKL_LIBRARIES "")
-    set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
+foreach(lib ${mkl_lib_list})
-    set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
+    find_library(${lib} ${lib} ${mkl_lib_find_paths})
-    if(NOT UNIX)
+    mark_as_advanced(${lib})
-        set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
+    if(NOT ${lib})
-    else()
+        mkl_fail()
-        #it's ugly but helps to avoid cyclic lib problem
-        set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
-        set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
    endif()
+    list(APPEND MKL_LIBRARIES ${${lib}})
+endforeach()
+message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
+set(HAVE_MKL ON CACHE BOOL "True if MKL found")
+set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
+set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
+if(NOT UNIX)
+    set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
 else()
+    #it's ugly but helps to avoid cyclic lib problem
+    set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
+    set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
 endif()
\ No newline at end of file
--- a/modules/dnn/cmake/download_model.cmake
+++ b/modules/dnn/cmake/download_model.cmake
+set(GoogleNet_url "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel")
+set(GoogleNet_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/bvlc_googlenet.caffemodel")
+set(GoogleNet_sha "405fc5acd08a3bb12de8ee5e23a96bec22f08204")
+set(VGG16_url "http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel")
+set(GG16_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/VGG_ILSVRC_16_layers.caffemodel")
+set(voc-fcn32s_url "http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel")
+set(voc-fcn32s_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/fcn32s-heavy-pascal.caffemodel")
+if(NOT model)
+    set(model "GoogleNet")
+endif()
+message(STATUS "Downloading ${${model}_url} to ${${model}_dst}")
+if(NOT EXISTS ${${model}_dst})
+    if(${${model}_sha})
+        file(DOWNLOAD ${${model}_url} ${${model}_dst} SHOW_PROGRESS EXPECTED_HASH SHA1=${${model}_sha} STATUS status_vec)
+    else()
+        file(DOWNLOAD ${${model}_url} ${${model}_dst} SHOW_PROGRESS STATUS status_vec)
+    endif()
+    list(GET status_vec 0 status)
+    list(GET status_vec 1 status_msg)
+    if(status EQUAL 0)
+        message(STATUS "Ok! ${status_msg}")
+    else()
+        message(STATUS "Fail! ${status_msg}")
+    endif()
+endif()
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
--- a/modules/dnn/include/opencv2/dnn/blob.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.hpp
@@ -54,7 +54,7 @@ namespace dnn
 //! @{
    /** @brief Lightweight class for storing and processing a shape of blob (or anything else). */
-    struct BlobShape
+    struct CV_EXPORTS_W BlobShape
    {
        BlobShape();                                        //!< Creates [1, 1, 1, 1] shape @todo Make more clearer behavior.
        explicit BlobShape(int s0);                         //!< Creates 1-dim shape [@p s0]
@@ -154,7 +154,7 @@ namespace dnn
        /** @brief Constructs Blob from existing Mat or UMat. */
        Blob(InputArray data);
-        /** @brief Constucts 4-dimensional blob (so-called batch) from image or array of images.
+        /** @brief Constructs 4-dimensional blob (so-called batch) from image or array of images.
         * @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of such images)
         * @param dstCn specifies size of second axis of ouptut blob
         */
@@ -229,6 +229,18 @@ namespace dnn
        /** @brief Checks equality of two blobs shapes. */
        bool equalShape(const Blob &other) const;
+        /** @brief Returns slice of first two dimensions.
+         *  @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
+         */
+        Mat getPlane(int n, int cn);
+        /** @brief Returns slice of first dimension.
+         *  @details The behaviour is similar to getPlane(), but returns all
+         * channels * rows * cols values, corresponding to the n-th value
+         * of the first dimension.
+         */
+        Mat getPlanes(int n);
        /* Shape getters of 4-dimensional blobs. */
        int cols() const;       //!< Returns size of the fourth axis blob.
        int rows() const;       //!< Returns size of the thrid  axis blob.
@@ -262,12 +274,6 @@ namespace dnn
        float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
        //TODO: add const ptr methods
-        /** @brief Returns slice of first two dimensions.
-         *  @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
-         *  @todo Method will be removed. Use slice() from shape_utils.hpp.
-         */
-        Mat getPlane(int n, int cn);
        /** @brief Shares data from other @p blob.
         * @returns *this
         */
@@ -312,17 +318,17 @@ namespace dnn
 public:
        enum DataState
        {
-            UNINITIALIZED,
+            UNINITIALIZED   = 0,
-            HEAD_AT_MAT,
+            HEAD_AT_MAT     = 1 << 0,
-            HEAD_AT_UMAT,
+            HEAD_AT_UMAT    = 1 << 1,
-            SYNCED
+            SYNCED          = HEAD_AT_MAT | HEAD_AT_UMAT
        };
        enum AllocFlag
        {
-            ALLOC_MAT  = 1,
+            ALLOC_MAT  = HEAD_AT_MAT,
-            ALLOC_UMAT = 2,
+            ALLOC_UMAT = HEAD_AT_UMAT,
-            ALLOC_BOTH = 3
+            ALLOC_BOTH = SYNCED
        };
    };

--- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
@@ -456,6 +456,12 @@ inline Mat Blob::getPlane(int n, int cn)
    return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn));
 }
+inline Mat Blob::getPlanes(int n)
+{
+    CV_Assert(dims() > 3);
+    return Mat(dims() - 1, sizes() + 1, type(), ptr(n));
+}
 inline int Blob::cols() const
 {
    return xsize(3);

--- a/modules/dnn/include/opencv2/dnn/dict.hpp
+++ b/modules/dnn/include/opencv2/dnn/dict.hpp
@@ -59,10 +59,12 @@ namespace dnn
 struct DictValue
 {
    DictValue(const DictValue &r);
-    DictValue(int p = 0)        : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
+    DictValue(int64 i = 0)      : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
+    DictValue(int i)            : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; }       //!< Constructs integer scalar
    DictValue(unsigned p)       : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; }       //!< Constructs integer scalar
    DictValue(double p)         : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; }     //!< Constructs floating point scalar
-    DictValue(const String &p)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = p; }   //!< Constructs string scalar
+    DictValue(const String &s)  : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< Constructs string scalar
+    DictValue(const char *s)    : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; }   //!< @overload
    template<typename TypeIter>
    static DictValue arrayInt(TypeIter begin, int size);    //!< Constructs integer array
@@ -111,7 +113,7 @@ class CV_EXPORTS Dict
 public:
    //! Checks a presence of the @p key in the dictionary.
-    bool has(const String &key);
+    bool has(const String &key) const;
    //! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
    DictValue *ptr(const String &key);

--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -86,7 +86,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
    public:
        //! List of learned parameters must be stored here to allow read them by using Net::getParam().
-        std::vector<Blob> blobs;
+        CV_PROP_RW std::vector<Blob> blobs;
        /** @brief Allocates internal buffers and output blobs with respect to the shape of inputs.
         *  @param[in]  input  vector of already allocated input blobs
@@ -104,6 +104,18 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         */
        virtual void forward(std::vector<Blob*> &input, std::vector<Blob> &output) = 0;
+        /** @brief @overload */
+        CV_WRAP void allocate(const std::vector<Blob> &inputs, CV_OUT std::vector<Blob> &outputs);
+        /** @brief @overload */
+        CV_WRAP std::vector<Blob> allocate(const std::vector<Blob> &inputs);
+        /** @brief @overload */
+        CV_WRAP void forward(const std::vector<Blob> &inputs, CV_IN_OUT std::vector<Blob> &outputs);
+        /** @brief Allocates layer and computes output. */
+        CV_WRAP void run(const std::vector<Blob> &inputs, CV_OUT std::vector<Blob> &outputs);
        /** @brief Returns index of input blob into the input array.
         *  @param inputName label of input blob
         *
@@ -116,8 +128,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         */
        virtual int outputNameToIndex(String outputName);
-        String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
+        CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
-        String type; //!< Type name which was used for creating layer by layer factory.
+        CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
        Layer();
        explicit Layer(const LayerParams &params);      //!< Initializes only #name, #type and #blobs fields.
@@ -135,12 +147,15 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
     *
     * This class supports reference counting of its instances, i. e. copies point to the same instance.
     */
-    class CV_EXPORTS_W Net
+    class CV_EXPORTS_W_SIMPLE Net
    {
    public:
-        Net();  //!< Default constructor.
+        CV_WRAP Net();  //!< Default constructor.
-        ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
+        CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
+        /** Returns true if there are no layers in the network. */
+        CV_WRAP bool empty() const;
        /** @brief Adds new layer to the net.
         *  @param name   unique name of the adding layer.
@@ -157,13 +172,18 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
        /** @brief Converts string name of the layer to the integer identifier.
         *  @returns id of the layer, or -1 if the layer wasn't found.
         */
-        int getLayerId(const String &layer);
+        CV_WRAP int getLayerId(const String &layer);
+        CV_WRAP std::vector<String> getLayerNames() const;
        /** @brief Container for strings and integers. */
        typedef DictValue LayerId;
+        /** @brief Returns pointer to layer with specified name which the network use. */
+        CV_WRAP Ptr<Layer> getLayer(LayerId layerId);
        /** @brief Delete layer for the network (not implemented yet) */
-        void deleteLayer(LayerId layer);
+        CV_WRAP void deleteLayer(LayerId layer);
        /** @brief Connects output of the first layer to input of the second layer.
         *  @param outPin descriptor of the first layer output.
@@ -178,7 +198,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         *
         *  @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
         */
-        void connect(String outPin, String inpPin);
+        CV_WRAP void connect(String outPin, String inpPin);
        /** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
         *  @param outLayerId identifier of the first layer
@@ -188,19 +208,22 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         */
        void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
-        /** @brief Sets ouputs names of the network input pseudo layer.
+        /** @brief Sets outputs names of the network input pseudo layer.
         *
         * Each net always has special own the network input pseudo layer with id=0.
         * This layer stores the user blobs only and don't make any computations.
         * In fact, this layer provides the only way to pass user data into the network.
         * As any other layer, this layer can label its outputs and this function provides an easy way to do this.
         */
-        void setNetInputs(const std::vector<String> &inputBlobNames);
+        CV_WRAP void setNetInputs(const std::vector<String> &inputBlobNames);
+        /** @brief Initializes and allocates all layers. */
+        CV_WRAP void allocate();
-        /** @brief Runs forward pass for the whole network */
+        /** @brief Runs forward pass to compute output of layer @p toLayer.
-        void forward();
+          * @details By default runs forward pass for the whole network.
-        /** @brief Runs forward pass to compute output of layer @p toLayer */
+          */
-        void forward(LayerId toLayer);
+        CV_WRAP void forward(LayerId toLayer = String());
        /** @brief Runs forward pass to compute output of layer @p toLayer, but computations start from @p startLayer */
        void forward(LayerId startLayer, LayerId toLayer);
        /** @overload */
@@ -222,12 +245,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         *  @note If updating blob is not empty then @p blob must have the same shape,
         *  because network reshaping is not implemented yet.
         */
-        void setBlob(String outputName, const Blob &blob);
+        CV_WRAP void setBlob(String outputName, const Blob &blob);
        /** @brief Returns the layer output blob.
         *  @param outputName the descriptor of the returning layer output blob.
         *  @see connect(String, String)
         */
-        Blob getBlob(String outputName);
+        CV_WRAP Blob getBlob(String outputName);
        /** @brief Sets the new value for the learned param of the layer.
         *  @param layer name or id of the layer.
@@ -237,13 +261,14 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
         *  @note If shape of the new blob differs from the previous shape,
         *  then the following forward pass may fail.
        */
-        void setParam(LayerId layer, int numParam, const Blob &blob);
+        CV_WRAP void setParam(LayerId layer, int numParam, const Blob &blob);
        /** @brief Returns parameter blob of the layer.
         *  @param layer name or id of the layer.
         *  @param numParam index of the layer parameter in the Layer::blobs array.
         *  @see Layer::blobs
         */
-        Blob getParam(LayerId layer, int numParam = 0);
+        CV_WRAP Blob getParam(LayerId layer, int numParam = 0);
    private:
@@ -252,12 +277,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
    };
    /** @brief Small interface class for loading trained serialized models of different dnn-frameworks. */
-    class Importer
+    class CV_EXPORTS_W Importer
    {
    public:
-        /** @brief Adds loaded layers into the @p net and sets connetions between them. */
+        /** @brief Adds loaded layers into the @p net and sets connections between them. */
-        virtual void populateNet(Net net) = 0;
+        CV_WRAP virtual void populateNet(Net net) = 0;
        virtual ~Importer();
    };
@@ -267,7 +292,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
     *  @param caffeModel path to the .caffemodel file with learned network.
     *  @returns Pointer to the created importer, NULL in failure cases.
     */
-    CV_EXPORTS Ptr<Importer> createCaffeImporter(const String &prototxt, const String &caffeModel = String());
+    CV_EXPORTS_W Ptr<Importer> createCaffeImporter(const String &prototxt, const String &caffeModel = String());
+    /** @brief Reads a network model stored in Caffe model files.
+      * @details This is shortcut consisting from createCaffeImporter and Net::populateNet calls.
+      */
+    CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
    /** @brief Creates the importer of <a href="http://torch.ch">Torch7</a> framework network.
     *  @param filename path to the file, dumped from Torch by using torch.save() function.
@@ -294,12 +324,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
     *
     * Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported.
     */
-    CV_EXPORTS Ptr<Importer> createTorchImporter(const String &filename, bool isBinary = true);
+    CV_EXPORTS_W Ptr<Importer> createTorchImporter(const String &filename, bool isBinary = true);
    /** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework.
     *  @warning This function has the same limitations as createTorchImporter().
     */
-    CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary = true);
+    CV_EXPORTS_W Blob readTorchBlob(const String &filename, bool isBinary = true);
 //! @}
 }

--- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
@@ -287,7 +287,7 @@ inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
 /////////////////////////////////////////////////////////////////
-inline bool Dict::has(const String &key)
+inline bool Dict::has(const String &key) const
 {
    return dict.count(key) != 0;
 }

--- a/modules/dnn/misc/caffe/caffe.pb.cc
+++ b/modules/dnn/misc/caffe/caffe.pb.cc
--- a/modules/dnn/misc/caffe/caffe.pb.h
+++ b/modules/dnn/misc/caffe/caffe.pb.h
--- a/modules/dnn/misc/python/pyopencv_dnn.hpp
+++ b/modules/dnn/misc/python/pyopencv_dnn.hpp
+#ifdef HAVE_OPENCV_DNN
+typedef dnn::DictValue LayerId;
+typedef std::vector<cv::dnn::Blob> vector_Blob;
+template<>
+bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name);
+template<> struct pyopencvVecConverter<dnn::Blob>
+{
+    static bool to(PyObject* obj, std::vector<dnn::Blob>& value, const ArgInfo info)
+    {
+        if (PyArray_Check(obj))
+        {
+            value.resize(1);
+            return pyopencv_to(obj, value[0], info.name);
+        }
+        return pyopencv_to_generic_vec(obj, value, info);
+    }
+    static PyObject* from(const std::vector<dnn::Blob>& value)
+    {
+        return pyopencv_from_generic_vec(value);
+    }
+};
+template<>
+bool pyopencv_to(PyObject *o, std::vector<dnn::Blob> &blobs, const char *name) //required for Layer::blobs RW
+{
+    return pyopencvVecConverter<dnn::Blob>::to(o, blobs, ArgInfo(name, false));
+}
+template<>
+bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name)
+{
+    Mat &dst = blob.matRef();
+    if (!pyopencv_to(o, dst, name))
+        return false;
+    if (PyArray_Check(o)) //try fix channels
+    {
+        PyArrayObject* oarr = (PyArrayObject*) o;
+        if (PyArray_NDIM(oarr) == dst.dims)
+            return true;
+        int ndims = PyArray_NDIM(oarr);
+        std::vector<int> shape(ndims);
+        const npy_intp* _sizes = PyArray_DIMS(oarr);
+        for (int i = 0; i < ndims; i++)
+            shape[i] = (int)_sizes[i];
+        dst = dst.reshape(1, ndims, &shape[0]);
+    }
+    return true;
+}
+template<>
+PyObject *pyopencv_from(const dnn::Blob &blob)
+{
+    return pyopencv_from(blob.matRefConst());
+}
+template<>
+bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)
+{
+    (void)name;
+    if (!o || o == Py_None)
+        return true; //Current state will be used
+    else if (PyLong_Check(o))
+    {
+        dv = dnn::DictValue((int64)PyLong_AsLongLong(o));
+        return true;
+    }
+    else if (PyFloat_Check(o))
+    {
+        dv = dnn::DictValue(PyFloat_AS_DOUBLE(o));
+        return true;
+    }
+    else if (PyString_Check(o))
+    {
+        dv = dnn::DictValue(String(PyString_AsString(o)));
+        return true;
+    }
+    else
+        return false;
+}
+template<>
+bool pyopencv_to(PyObject *o, dnn::BlobShape &shape, const char *name)
+{
+    std::vector<int> data;
+    if (!pyopencv_to_generic_vec(o, data, ArgInfo(name, false)))
+        return false;
+    shape = data.size() ? dnn::BlobShape((int)data.size(), &data[0]) : dnn::BlobShape::empty();
+    return true;
+}
+template<>
+PyObject *pyopencv_from(const dnn::BlobShape &shape)
+{
+    std::vector<int> data(shape.ptr(), shape.ptr() + shape.dims());
+    return pyopencv_from_generic_vec(data);
+}
+#endif
\ No newline at end of file
--- a/modules/dnn/samples/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
+++ b/modules/dnn/samples/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
--- a/modules/dnn/samples/caffe_googlenet.cpp
+++ b/modules/dnn/samples/caffe_googlenet.cpp
@@ -84,23 +84,18 @@ std::vector<String> readClassNames(const char *filename = "synset_words.txt")
 int main(int argc, char **argv)
 {
+    cv::dnn::initModule();  //Required if OpenCV is built as static libs
    String modelTxt = "bvlc_googlenet.prototxt";
    String modelBin = "bvlc_googlenet.caffemodel";
    String imageFile = (argc > 1) ? argv[1] : "space_shuttle.jpg";
-    //! [Create the importer of Caffe model]
+    //! [Read and initialize network]
-    Ptr<dnn::Importer> importer;
+    Net net = dnn::readNetFromCaffe(modelTxt, modelBin);
-    try                                     //Try to import Caffe GoogleNet model
+    //! [Read and initialize network]
-    {
-        importer = dnn::createCaffeImporter(modelTxt, modelBin);
-    }
-    catch (const cv::Exception &err)        //Importer can throw errors, we will catch them
-    {
-        std::cerr << err.msg << std::endl;
-    }
-    //! [Create the importer of Caffe model]
-    if (!importer)
+    //! [Check that network was read successfully]
+    if (net.empty())
    {
        std::cerr << "Can't load network by using the following files: " << std::endl;
        std::cerr << "prototxt:   " << modelTxt << std::endl;
@@ -109,12 +104,7 @@ int main(int argc, char **argv)
        std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
        exit(-1);
    }
+    //! [Check that network was read successfully]
-    //! [Initialize network]
-    dnn::Net net;
-    importer->populateNet(net);
-    importer.release();                     //We don't need importer anymore
-    //! [Initialize network]
    //! [Prepare blob]
    Mat img = imread(imageFile);

--- a/modules/dnn/samples/fcn_semsegm.cpp
+++ b/modules/dnn/samples/fcn_semsegm.cpp
 #include <opencv2/dnn.hpp>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
+#include <opencv2/core/ocl.hpp>
 using namespace cv;
 using namespace cv::dnn;
@@ -85,6 +86,9 @@ static void colorizeSegmentation(dnn::Blob &score, const vector<cv::Vec3b> &colo
 int main(int argc, char **argv)
 {
+    cv::dnn::initModule();          //Required if OpenCV is built as static libs
+    cv::ocl::setUseOpenCL(false);   //OpenCL switcher
    String modelTxt = fcnType + "-heavy-pascal.prototxt";
    String modelBin = fcnType + "-heavy-pascal.caffemodel";
    String imageFile = (argc > 1) ? argv[1] : "rgb.jpg";

--- a/modules/dnn/samples/googlenet_python.py
+++ b/modules/dnn/samples/googlenet_python.py
+from __future__ import print_function
+import numpy as np
+import cv2
+from cv2 import dnn
+import timeit
+def prepare_image(img):
+    img = cv2.resize(img, (224, 224))
+    #convert interleaved image (RGBRGB) to planar(RRGGBB)
+    blob = np.moveaxis(img, 2, 0)
+    blob = np.reshape(blob.astype(np.float32), (-1, 3, 224, 224))
+    return blob
+def timeit_forward(net):
+    print("OpenCL:", cv2.ocl.useOpenCL())
+    print("Runtime:", timeit.timeit(lambda: net.forward(), number=10))
+def get_class_list():
+    with open('synset_words.txt', 'rt') as f:
+        return [ x[x.find(" ") + 1 :] for x in f ]
+blob = prepare_image(cv2.imread('space_shuttle.jpg'))
+print("Input:", blob.shape, blob.dtype)
+cv2.ocl.setUseOpenCL(True)  #Disable OCL if you want
+net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel')
+net.setBlob(".data", blob)
+net.forward()
+#timeit_forward(net)        #Uncomment to check performance
+prob = net.getBlob("prob")
+print("Output:", prob.shape, prob.dtype)
+classes = get_class_list()
+print("Best match", classes[prob.argmax()])
\ No newline at end of file
--- a/modules/dnn/samples/ssd_object_detection.cpp
+++ b/modules/dnn/samples/ssd_object_detection.cpp
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+using namespace cv;
+using namespace cv::dnn;
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+using namespace std;
+const size_t width = 300;
+const size_t height = 300;
+Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
+{
+    Mat mean;
+    const int meanValues[3] = {104, 117, 123};
+    vector<Mat> meanChannels;
+    for(size_t i = 0; i < 3; i++)
+    {
+        Mat channel(imageHeight, imageWidth, CV_32F, Scalar(meanValues[i]));
+        meanChannels.push_back(channel);
+    }
+    cv::merge(meanChannels, mean);
+    return mean;
+}
+Mat preprocess(const Mat& frame)
+{
+    Mat preprocessed;
+    frame.convertTo(preprocessed, CV_32FC3);
+    resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
+    Mat mean = getMean(width, height);
+    cv::subtract(preprocessed, mean, preprocessed);
+    return preprocessed;
+}
+const char* about = "This sample uses Single-Shot Detector "
+                    "(https://arxiv.org/abs/1512.02325)"
+                    "to detect objects on image\n"; // TODO: link
+const char* params
+    = "{ help           | false | print usage         }"
+      "{ proto          |       | model configuration }"
+      "{ model          |       | model weights       }"
+      "{ image          |       | image for detection }"
+      "{ min_confidence | 0.5   | min confidence      }";
+int main(int argc, char** argv)
+{
+    cv::CommandLineParser parser(argc, argv, params);
+    if (parser.get<bool>("help"))
+    {
+        std::cout << about << std::endl;
+        parser.printMessage();
+        return 0;
+    }
+    cv::dnn::initModule();          //Required if OpenCV is built as static libs
+    String modelConfiguration = parser.get<string>("proto");
+    String modelBinary = parser.get<string>("model");
+    //! [Create the importer of Caffe model]
+    Ptr<dnn::Importer> importer;
+    // Import Caffe SSD model
+    try
+    {
+        importer = dnn::createCaffeImporter(modelConfiguration, modelBinary);
+    }
+    catch (const cv::Exception &err) //Importer can throw errors, we will catch them
+    {
+        cerr << err.msg << endl;
+    }
+    //! [Create the importer of Caffe model]
+    if (!importer)
+    {
+        cerr << "Can't load network by using the following files: " << endl;
+        cerr << "prototxt:   " << modelConfiguration << endl;
+        cerr << "caffemodel: " << modelBinary << endl;
+        cerr << "Models can be downloaded here:" << endl;
+        cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl;
+        exit(-1);
+    }
+    //! [Initialize network]
+    dnn::Net net;
+    importer->populateNet(net);
+    importer.release();          //We don't need importer anymore
+    //! [Initialize network]
+    cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
+    //! [Prepare blob]
+    Mat preprocessedFrame = preprocess(frame);
+    dnn::Blob inputBlob = dnn::Blob::fromImages(preprocessedFrame); //Convert Mat to dnn::Blob image
+    //! [Prepare blob]
+    //! [Set input blob]
+    net.setBlob(".data", inputBlob);                //set the network input
+    //! [Set input blob]
+    //! [Make forward pass]
+    net.forward();                                  //compute output
+    //! [Make forward pass]
+    //! [Gather output]
+    dnn::Blob detection = net.getBlob("detection_out");
+    Mat detectionMat(detection.rows(), detection.cols(), CV_32F, detection.ptrf());
+    float confidenceThreshold = parser.get<float>("min_confidence");
+    for(int i = 0; i < detectionMat.rows; i++)
+    {
+        float confidence = detectionMat.at<float>(i, 2);
+        if(confidence > confidenceThreshold)
+        {
+            size_t objectClass = detectionMat.at<float>(i, 1);
+            float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
+            float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
+            float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
+            float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;
+            std::cout << "Class: " << objectClass << std::endl;
+            std::cout << "Confidence: " << confidence << std::endl;
+            std::cout << " " << xLeftBottom
+                      << " " << yLeftBottom
+                      << " " << xRightTop
+                      << " " << yRightTop << std::endl;
+            Rect object(xLeftBottom, yLeftBottom,
+                        xRightTop - xLeftBottom,
+                        yRightTop - yLeftBottom);
+            rectangle(frame, object, Scalar(0, 255, 0));
+        }
+    }
+    imshow("detections", frame);
+    waitKey();
+    return 0;
+} // main
--- a/modules/dnn/scripts/download_model.py
+++ b/modules/dnn/scripts/download_model.py
-#!/usr/bin/env python
-import os
-import sys
-import time
-import urllib
-import hashlib
-import argparse
-import json
-def reporthook(count, block_size, total_size):
-    """
-    From http://blog.moleculea.com/2012/10/04/urlretrieve-progres-indicator/
-    """
-    global start_time
-    global prev_duration
-    if count == 0:
-        start_time = time.time()
-        prev_duration = -1
-        return
-    duration = max(1, time.time() - start_time)
-    if int(duration) == int(prev_duration):
-        return
-    progress_size = int(count * block_size)
-    speed = int(progress_size / (1024 * duration))
-    percent = int(count * block_size * 100 / total_size)
-    sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
-                     (percent, progress_size / (1024 * 1024), speed, duration))
-    sys.stdout.flush()
-    prev_duration = duration
-# Function for checking SHA1.
-def model_checks_out(filename, sha1):
-    with open(filename, 'r') as f:
-        return hashlib.sha1(f.read()).hexdigest() == sha1
-def model_download(filename, url, sha1):
-    # Check if model exists.
-    if os.path.exists(filename) and model_checks_out(filename, sha1):
-        print("Model {} already exists.".format(filename))
-        return
-    # Download and verify model.
-    urllib.urlretrieve(url, filename, reporthook)
-    print model_checks_out(filename, sha1)
-    if not model_checks_out(filename, sha1):
-        print("ERROR: model {} did not download correctly!".format(url))
-        sys.exit(1)
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Downloading trained model binaries.")
-    parser.add_argument("download_list")
-    args = parser.parse_args()
-    test_dir = os.environ.get("OPENCV_TEST_DATA_PATH")
-    if not test_dir:
-        print "ERROR: OPENCV_TEST_DATA_PATH environment not specified"
-        sys.exit(1)
-    try:
-        with open(args.download_list, 'r') as f:
-            models_to_download = json.load(f)
-    except:
-        print "ERROR: Can't pasrse {}".format(args.download_list)
-        sys.exit(1)
-    for model_name in models_to_download:
-        model = models_to_download[model_name]
-        dst_dir = os.path.join(test_dir, os.path.dirname(model['file']))
-        dst_file = os.path.join(test_dir, model['file'])
-        if not os.path.exists(dst_dir):
-            print "ERROR: Can't find module testdata path '{}'".format(dst_dir)
-            sys.exit(1)
-        print "Downloading model '{}' to {} from {} ...".format(model_name, dst_file, model['url'])
-        model_download(dst_file, model['url'], model['sha1'])
\ No newline at end of file
--- a/modules/dnn/scripts/test_models.json
+++ b/modules/dnn/scripts/test_models.json
-{
-  "googlenet": {
-    "file": "dnn/bvlc_googlenet.caffemodel",
-    "url": "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel",
-    "sha1": "405fc5acd08a3bb12de8ee5e23a96bec22f08204"
-  }
-}
\ No newline at end of file
--- a/modules/dnn/src/blob.cpp
+++ b/modules/dnn/src/blob.cpp
@@ -63,16 +63,15 @@ Blob::Blob(InputArray data)
 #ifndef CV_DNN_UMAT
    m = data.getMat();
 #else
-    CV_Assert(data.isMat() || data.isUMat());
+    if (data.isUMat())
-    if (data.isMat())
    {
-        m = data.getMat();
+        um = data.getUMat();
-        state = HEAD_AT_MAT;
+        state = HEAD_AT_UMAT;
    }
    else
    {
-        um = data.getUMat();
+        m = data.getMat();
-        state = HEAD_AT_UMAT;
+        state = HEAD_AT_MAT;
    }
 #endif
 }

--- a/modules/dnn/src/caffe/caffe.proto
+++ b/modules/dnn/src/caffe/caffe.proto
@@ -91,6 +91,75 @@ message CropParameter {
  repeated uint32 offset = 2;
 }
+message PermuteParameter {
+  // The new orders of the axes of data. Notice it should be with
+  // in the same range as the input data, and it starts from 0.
+  // Do not provide repeated order.
+  repeated uint32 order = 1;
+}
+// Message that stores parameters used by NormalizeBBoxLayer
+message NormalizeBBoxParameter {
+  optional bool across_spatial = 1 [default = true];
+  // Initial value of scale. Default is 1.0 for all
+  optional FillerParameter scale_filler = 2;
+  // Whether or not scale parameters are shared across channels.
+  optional bool channel_shared = 3 [default = true];
+  // Epsilon for not dividing by zero while normalizing variance
+  optional float eps = 4 [default = 1e-10];
+}
+// Message that store parameters used by PriorBoxLayer
+message PriorBoxParameter {
+  // Encode/decode type.
+  enum CodeType {
+    CORNER = 1;
+    CENTER_SIZE = 2;
+  }
+  // Minimum box size (in pixels). Required!
+  optional float min_size = 1;
+  // Maximum box size (in pixels). Required!
+  optional float max_size = 2;
+  // Various of aspect ratios. Duplicate ratios will be ignored.
+  // If none is provided, we use default ratio 1.
+  repeated float aspect_ratio = 3;
+  // If true, will flip each aspect ratio.
+  // For example, if there is aspect ratio "r",
+  // we will generate aspect ratio "1.0/r" as well.
+  optional bool flip = 4 [default = true];
+  // If true, will clip the prior so that it is within [0, 1]
+  optional bool clip = 5 [default = true];
+  // Variance for adjusting the prior bboxes.
+  repeated float variance = 6;
+}
+// Message that store parameters used by DetectionOutputLayer
+message DetectionOutputParameter {
+  // Number of classes to be predicted. Required!
+  optional uint32 num_classes = 1;
+  // If true, bounding box are shared among different classes.
+  optional bool share_location = 2 [default = true];
+  // Background label id. If there is no background class,
+  // set it as -1.
+  optional int32 background_label_id = 3 [default = 0];
+  // Type of coding method for bbox.
+  optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
+  // If true, variance is encoded in target; otherwise we need to adjust the
+  // predicted offset accordingly.
+  optional bool variance_encoded_in_target = 8 [default = false];
+  // Number of total bboxes to be kept per image after nms step.
+  // -1 means keeping all bboxes after nms step.
+  optional int32 keep_top_k = 7 [default = -1];
+  // Only consider detections whose confidences are larger than a threshold.
+  // If not provided, consider all boxes.
+  optional float confidence_threshold = 9;
+  // Parameters used for non maximum suppression.
+  // Threshold to be used in nms.
+  optional float nms_threshold = 10 [default = 0.3];
+  // Maximum number of results to be kept.
+  optional int32 top_k = 11;
+}
 message Datum {
  optional int32 channels = 1;
  optional int32 height = 2;
@@ -335,7 +404,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 138 (last added: crop_param)
+// LayerParameter next available layer-specific ID: 142 (last added: detection_output_param)
 message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
@@ -389,6 +458,7 @@ message LayerParameter {
  optional ConvolutionParameter convolution_param = 106;
  optional CropParameter crop_param = 137;
  optional DataParameter data_param = 107;
+  optional DetectionOutputParameter detection_output_param = 141;
  optional DropoutParameter dropout_param = 108;
  optional DummyDataParameter dummy_data_param = 109;
  optional EltwiseParameter eltwise_param = 110;
@@ -404,17 +474,20 @@ message LayerParameter {
  optional LRNParameter lrn_param = 118;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
+  optional NormalizeBBoxParameter normalize_bbox_param = 139;
+  optional PermuteParameter permute_param = 138;
  optional PoolingParameter pooling_param = 121;
  optional PowerParameter power_param = 122;
  optional PReLUParameter prelu_param = 131;
+  optional PriorBoxParameter prior_box_param = 140;
  optional PythonParameter python_param = 130;
  optional ReductionParameter reduction_param = 136;
  optional ReLUParameter relu_param = 123;
  optional ReshapeParameter reshape_param = 133;
  optional SigmoidParameter sigmoid_param = 124;
+  optional SliceParameter slice_param = 126;
  optional SoftmaxParameter softmax_param = 125;
  optional SPPParameter spp_param = 132;
-  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional WindowDataParameter window_data_param = 129;
@@ -524,6 +597,12 @@ message ConvolutionParameter {
    CUDNN = 2;
  }
  optional Engine engine = 15 [default = DEFAULT];
+  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
+  // holes. (Kernel dilation is sometimes referred to by its use in the
+  // algorithme a trous from Holschneider et al. 1987.)
+  optional uint32 dilation_h = 18; // The dilation height
+  optional uint32 dilation_w = 19; // The dilation width
+  optional uint32 dilation = 20; // The dilation; defaults to 1
 }
 message DataParameter {
@@ -1174,3 +1253,15 @@ message PReLUParameter {
  // Whether or not slope paramters are shared across channels.
  optional bool channel_shared = 2 [default = false];
 }
+// The normalized bounding box [0, 1] w.r.t. the input image size.
+message NormalizedBBox {
+  optional float xmin = 1;
+  optional float ymin = 2;
+  optional float xmax = 3;
+  optional float ymax = 4;
+  optional int32 label = 5;
+  optional bool difficult = 6;
+  optional float score = 7;
+  optional float size = 8;
+}
--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
--- a/modules/dnn/src/caffe/compiled/caffe.tar.gz
+++ b/modules/dnn/src/caffe/compiled/caffe.tar.gz
--- a/modules/dnn/src/caffe/glog_emulator.hpp
+++ b/modules/dnn/src/caffe/glog_emulator.hpp
@@ -46,52 +46,59 @@
 #include <sstream>
 #include <opencv2/core.hpp>
-#define CHECK(cond)     cv::GLogWrapper(__FILE__, CV_Func, __LINE__, "CHECK", #cond, cond)
+#define CHECK(cond)     for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, "CHECK", #cond, cond); _logger.exit(); _logger.check()) _logger.stream()
-#define CHECK_EQ(a, b)  cv::GLogWrapper(__FILE__, CV_Func, __LINE__, "CHECK", #a"="#b, ((a) == (b)))
+#define CHECK_EQ(a, b)  for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, "CHECK", #a"="#b, ((a) == (b))); _logger.exit(); _logger.check()) _logger.stream()
-#define LOG(TYPE)       cv::GLogWrapper(__FILE__, CV_Func, __LINE__, #TYPE)
+#define LOG(TYPE)       for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, #TYPE); _logger.exit(); _logger.check()) _logger.stream()
 namespace cv
 {
+namespace dnn
+{
 class GLogWrapper
 {
-    std::stringstream stream;
    const char *file, *func, *type, *cond_str;
    int line;
-    bool cond_staus;
+    bool cond_staus, exit_loop;
+    std::stringstream sstream;
 public:
    GLogWrapper(const char *_file, const char *_func, int _line,
-                const char *_type,
+          const char *_type,
-                const char *_cond_str = NULL, bool _cond_status = true
+          const char *_cond_str = NULL, bool _cond_status = true
-               ) :
+    ) :
-               file(_file), func(_func), type(_type), cond_str(_cond_str),
+        file(_file), func(_func), type(_type), cond_str(_cond_str),
-               line(_line), cond_staus(_cond_status) {}
+        line(_line), cond_staus(_cond_status), exit_loop(true) {}
+    std::iostream &stream()
+    {
+        return sstream;
+    }
-    template<typename T>
+    bool exit()
-    GLogWrapper &operator<<(const T &v)
    {
-        if (!cond_str || cond_str && !cond_staus)
+        return exit_loop;
-            stream << v;
-        return *this;
    }
-    ~GLogWrapper()
+    void check()
    {
+        exit_loop = false;
        if (cond_str && !cond_staus)
        {
-            cv::error(cv::Error::StsError, "FAILED: " + String(cond_str) + "." + stream.str(), func, file, line);
+            cv::error(cv::Error::StsError, "FAILED: " + String(cond_str) + ". " + sstream.str(), func, file, line);
        }
        else if (!cond_str && strcmp(type, "CHECK"))
        {
            if (!std::strcmp(type, "INFO"))
-                std::cout << stream.str() << std::endl;
+                std::cout << sstream.str() << std::endl;
            else
-                std::cerr << stream.str() << std::endl;
+                std::cerr << sstream.str() << std::endl;
        }
    }
 };
+}
 }
 #endif
--- a/modules/dnn/src/caffe/layer_loaders.cpp
+++ b/modules/dnn/src/caffe/layer_loaders.cpp
@@ -2,62 +2,20 @@
 #include "layer_loaders.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 #include <climits>
+#include "layers/layers_common.hpp"
 namespace cv
 {
 namespace dnn
 {
-//Utils
-//Extracts params used into Conv, Deconv and Pooling layers
-static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
-{
-    if (params.has("kernel_h") && params.has("kernel_w"))
-    {
-        kernel.height = params.get<int>("kernel_h");
-        kernel.width = params.get<int>("kernel_w");
-    }
-    else if (params.has("kernel_size"))
-    {
-        kernel.height = kernel.width = params.get<int>("kernel_size");
-    }
-    else
-    {
-        CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
-    }
-    CV_Assert(kernel.height > 0 && kernel.width > 0);
-    if (params.has("pad_h") && params.has("pad_w"))
-    {
-        pad.height = params.get<int>("pad_h");
-        pad.width = params.get<int>("pad_w");
-    }
-    else
-    {
-        pad.height = pad.width = params.get<int>("pad", 0);
-    }
-    CV_Assert(pad.height >= 0 && pad.width >= 0);
-    if (params.has("stride_h") && params.has("stride_w"))
-    {
-        stride.height = params.get<int>("stride_h");
-        stride.width = params.get<int>("stride_w");
-    }
-    else
-    {
-        stride.height = stride.width = params.get<int>("stride", 1);
-    }
-    CV_Assert(stride.height > 0 && stride.width > 0);
-}
 //Layers
 //Convolution and Deconvolution
 static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
 {
    l->setParamsFrom(params);
-    getCaffeConvParams(params, l->kernel, l->pad, l->stride);
+    getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, l->pad.width, l->stride.height, l->stride.width, l->dilation.height, l->dilation.width);
    bool bias = params.get<bool>("bias_term", true);
    int numOutput = params.get<int>("num_output");
@@ -88,6 +46,7 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
 {
    int type;
    Size kernel, stride, pad;
+    bool globalPooling;
    if (params.has("pool"))
    {
@@ -106,9 +65,13 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
        type = PoolingLayer::MAX;
    }
-    getCaffeConvParams(params, kernel, pad, stride);
+    getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width);
+    //getCaffeConvParams(params, kernel, pad, stride);
-    return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
+    if (!globalPooling)
+        return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
+    else
+        return Ptr<Layer>(PoolingLayer::createGlobal(type));
 }
 template<>
@@ -197,11 +160,6 @@ Ptr<Layer> createLayerFromCaffe<ReshapeLayer>(LayerParams &params)
    return Ptr<Layer>(ReshapeLayer::create(newShape, applyingRange));
 }
-Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&)
-{
-    return Ptr<Layer>(ReshapeLayer::create(Shape(0, -1)));
-}
 template<>
 Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams& params)
 {
@@ -274,30 +232,16 @@ Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
 template<> //CropLayer specialization
 Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams& params)
 {
-    int start_axis = params.get<int>("axis");
+    int start_axis = params.get<int>("axis", 2);
-    if (4 <= start_axis)
+    DictValue *paramOffset = params.ptr("offset");
-        CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
-    DictValue paramOffset = params.get("offset");
+    std::vector<int> offset;
+    if (paramOffset)
-    std::vector<int> offset(4, 0);
-    if (1 < paramOffset.size())
    {
-        if (4 - start_axis != paramOffset.size())
+        for (int i = 0; i < paramOffset->size(); i++)
-            CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
+            offset.push_back(paramOffset->get<int>(i));
-        for (size_t i = start_axis; i < offset.size(); i++)
-        {
-            offset[i] = paramOffset.get<int>(i);
-        }
-    }
-    else
-    {
-        const int offset_val = paramOffset.get<int>(0);
-        for (size_t i = start_axis; i < offset.size(); i++)
-        {
-            offset[i] = offset_val;
-        }
    }
    return Ptr<Layer>(CropLayer::create(start_axis, offset));
 }

--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -44,6 +44,7 @@
 #include <algorithm>
 #include <iostream>
 #include <sstream>
+#include <iterator>
 using namespace cv;
 using namespace cv::dnn;
@@ -59,7 +60,7 @@ namespace dnn
 {
 template<typename T>
-String toString(const T &v)
+static String toString(const T &v)
 {
    std::ostringstream ss;
    ss << v;
@@ -127,7 +128,7 @@ struct LayerData
 };
 //fake layer containing network input blobs
-struct NetInputLayer : public Layer
+struct DataLayer : public Layer
 {
    void allocate(const std::vector<Blob*>&, std::vector<Blob>&) {}
    void forward(std::vector<Blob*>&, std::vector<Blob>&) {}
@@ -152,7 +153,7 @@ struct Net::Impl
    Impl()
    {
        //allocate fake net input layer
-        netInputLayer = Ptr<NetInputLayer>(new NetInputLayer());
+        netInputLayer = Ptr<DataLayer>(new DataLayer());
        LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
        inpl.id = 0;
        inpl.name = "_input";
@@ -163,7 +164,7 @@ struct Net::Impl
        netWasAllocated = false;
    }
-    Ptr<NetInputLayer> netInputLayer;
+    Ptr<DataLayer> netInputLayer;
    std::vector<int> netOutputs;
    typedef std::map<int, LayerData> MapIdToLayerData;
@@ -328,11 +329,16 @@ struct Net::Impl
                netOutputs.push_back(lid);
        }
+        #ifndef NDEBUG
        std::cout << "\nNet Outputs(" << netOutputs.size() << "):\n";
        for (size_t i = 0; i < netOutputs.size(); i++)
-            std::cout << layers[netOutputs[i]].name << std::endl;
+            std::cout << layers[netOutputs[i]].name << "\n";
+        #endif
    }
+    #define CV_RETHROW_ERROR(err, newmsg)\
+        cv::error(err.code, newmsg, err.func.c_str(), err.file.c_str(), err.line)
    void allocateLayer(int lid)
    {
        LayerData &ld = layers[lid];
@@ -361,7 +367,15 @@ struct Net::Impl
        //allocate layer
        ld.outputBlobs.resize(std::max((size_t)1, ld.requiredOutputs.size())); //layer produce at least one output blob
-        ld.getLayerInstance()->allocate(ld.inputBlobs, ld.outputBlobs);
+        try
+        {
+            Ptr<Layer> layerPtr = ld.getLayerInstance();
+            layerPtr->allocate(ld.inputBlobs, ld.outputBlobs);
+        }
+        catch (const cv::Exception &err)
+        {
+            CV_RETHROW_ERROR(err, format("The following error occured while making allocate() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str()));
+        }
        ld.flag = 1;
    }
@@ -399,7 +413,14 @@ struct Net::Impl
        }
        //forward itself
-        ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
+        try
+        {
+            ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
+        }
+        catch (const cv::Exception &err)
+        {
+            CV_RETHROW_ERROR(err, format("The following error occured while making forward() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str()));
+        }
        ld.flag = 1;
    }
@@ -417,12 +438,10 @@ struct Net::Impl
 Net::Net() : impl(new Net::Impl)
 {
 }
 Net::~Net()
 {
 }
 int Net::addLayer(const String &name, const String &type, LayerParams &params)
@@ -469,16 +488,19 @@ void Net::connect(String _outPin, String _inPin)
    impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
 }
-void Net::forward()
+void Net::allocate()
 {
    impl->setUpNet();
-    impl->forwardAll();
 }
 void Net::forward(LayerId toLayer)
 {
    impl->setUpNet();
-    impl->forwardLayer(impl->getLayerData(toLayer));
+    if (toLayer.isString() && toLayer.get<String>().empty())
+        impl->forwardAll();
+    else
+        impl->forwardLayer(impl->getLayerData(toLayer));
 }
 void Net::setNetInputs(const std::vector<String> &inputBlobNames)
@@ -521,6 +543,16 @@ Blob Net::getParam(LayerId layer, int numParam)
    return layerBlobs[numParam];
 }
+void Net::setParam(LayerId layer, int numParam, const Blob &blob)
+{
+    LayerData &ld = impl->getLayerData(layer);
+    std::vector<Blob> &layerBlobs = ld.layerInstance->blobs;
+    CV_Assert(numParam < (int)layerBlobs.size());
+    //we don't make strong checks, use this function carefully
+    layerBlobs[numParam] = blob;
+}
 int Net::getLayerId(const String &layer)
 {
    return impl->getLayerId(layer);
@@ -531,6 +563,34 @@ void Net::deleteLayer(LayerId)
    CV_Error(Error::StsNotImplemented, "");
 }
+Ptr<Layer> Net::getLayer(LayerId layerId)
+{
+    LayerData &ld = impl->getLayerData(layerId);
+    if (!ld.layerInstance)
+        CV_Error(Error::StsNullPtr, format("Requseted layer \"%s\" was not initialized", ld.name.c_str()));
+    return ld.layerInstance;
+}
+std::vector<String> Net::getLayerNames() const
+{
+    std::vector<String> res;
+    res.reserve(impl->layers.size());
+    Impl::MapIdToLayerData::iterator it;
+    for (it = impl->layers.begin(); it != impl->layers.end(); it++)
+    {
+        if (it->second.id) //skip Data layer
+            res.push_back(it->second.name);
+    }
+    return res;
+}
+bool Net::empty() const
+{
+    return impl->layers.size() <= 1; //first layer is default Data layer
+}
 //////////////////////////////////////////////////////////////////////////
 Importer::~Importer() {}
@@ -560,6 +620,43 @@ int Layer::outputNameToIndex(String)
    return -1;
 }
+template <typename T>
+static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
+{
+    pv.resize(v.size());
+    for (size_t i = 0; i < v.size(); i++)
+        pv[i] = const_cast<T*>(&v[i]);
+}
+void Layer::allocate(const std::vector<Blob> &inputs, std::vector<Blob> &outputs)
+{
+    std::vector<Blob*> inputsp;
+    vecToPVec(inputs, inputsp);
+    this->allocate(inputsp, outputs);
+}
+std::vector<Blob> Layer::allocate(const std::vector<Blob> &inputs)
+{
+    std::vector<Blob> outputs;
+    this->allocate(inputs, outputs);
+    return outputs;
+}
+void Layer::forward(const std::vector<Blob> &inputs, std::vector<Blob> &outputs)
+{
+    std::vector<Blob*> inputsp;
+    vecToPVec(inputs, inputsp);
+    this->forward(inputsp, outputs);
+}
+void Layer::run(const std::vector<Blob> &inputs, std::vector<Blob> &outputs)
+{
+    std::vector<Blob*> inputsp;
+    vecToPVec(inputs, inputsp);
+    this->allocate(inputsp, outputs);
+    this->forward(inputsp, outputs);
+}
 Layer::~Layer() {}
 //////////////////////////////////////////////////////////////////////////

--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -43,6 +43,14 @@
 #include "caffe/layer_loaders.hpp"
 #include "layers/blank_layer.hpp"
+#include "layers/crop_layer.hpp"
+#include "layers/eltwise_layer.hpp"
+#include "layers/flatten_layer.hpp"
+#include "layers/permute_layer.hpp"
+#include "layers/prior_box_layer.hpp"
+#include "layers/detection_output_layer.hpp"
+#include "layers/normalize_bbox_layer.hpp"
 namespace cv
 {
 namespace dnn
@@ -69,7 +77,7 @@ void initModule()
    REG_RUNTIME_LAYER_FUNC(Split,           createLayerFromCaffe<SplitLayer>);
    REG_RUNTIME_LAYER_FUNC(Concat,          createLayerFromCaffe<ConcatLayer>);
    REG_RUNTIME_LAYER_FUNC(Reshape,         createLayerFromCaffe<ReshapeLayer>);
-    REG_RUNTIME_LAYER_FUNC(Flatten,         createFlattenLayerFromCaffe);
+    REG_RUNTIME_LAYER_CLASS(Flatten,        FlattenLayer);
    REG_RUNTIME_LAYER_FUNC(Convolution,     createLayerFromCaffe<ConvolutionLayer>);
    REG_RUNTIME_LAYER_FUNC(Deconvolution,   createLayerFromCaffe<DeconvolutionLayer>);
@@ -89,6 +97,10 @@ void initModule()
    REG_RUNTIME_LAYER_FUNC(Crop,            createLayerFromCaffe<CropLayer>);
    REG_RUNTIME_LAYER_FUNC(Eltwise,         createLayerFromCaffe<EltwiseLayer>);
+    REG_RUNTIME_LAYER_CLASS(Permute, PermuteLayer)
+    REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer)
+    REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer)
+    REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer)
    init.status = true;
 }

--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -77,7 +77,8 @@ void ConvolutionLayerImpl::init()
    CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
    CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
-    useOpenCL = ocl::useOpenCL() && tryUseOpenCL;
+    //TODO: dilation in OCL mode
+    useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1);
 }
 void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
@@ -127,14 +128,15 @@ void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vecto
 bool ConvolutionLayerImpl::is1x1() const
 {
    return (kernel.height == 1 && kernel.width == 1) &&
-           (stride.height == 1 && stride.width == 1);
+           (stride.height == 1 && stride.width == 1) &&
+           (dilation.height == 1 && dilation.width == 1);
 }
 template<typename XMat>
 void ConvolutionLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
-    XMat biasesMat  = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
+    XMat biasesMat  = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
    for (size_t ii = 0; ii < outputs.size(); ii++)
    {
@@ -182,7 +184,7 @@ void ConvolutionLayerImpl::im2col(const UMat &srcImg, UMat &dstCol)
        return;
    }
 #ifdef HAVE_OPENCL
-    CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, this->colBlob.umatRef()));
+    CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, this->colBlob.umatRef()));
    dstCol = this->colBlob.umatRefConst();
 #else
    CV_Error(Error::StsInternal, "");
@@ -200,9 +202,9 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)
    Mat &colMat = colBlob.matRef();
    if (srcImg.type() == CV_32F)
-        im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<float>());
+        im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<float>());
    if (srcImg.type() == CV_64F)
-        im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<double>());
+        im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<double>());
    dstCol = colMat;
 }
@@ -213,8 +215,8 @@ void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
    inpW = input.cols();
    inpCn = input.channels();
-    outH = (inpH + 2 * pad.height - kernel.height) / stride.height + 1;
+    outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
-    outW = (inpW + 2 * pad.width - kernel.width) / stride.width + 1;
+    outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
    outCn = numOutput;
    topH = outH; topW = outW; topCn = outCn;
@@ -252,7 +254,7 @@ template<typename XMat>
 void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
 {
    XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
-    XMat biasesMat  = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
+    XMat biasesMat  = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
    for (size_t ii = 0; ii < outputs.size(); ii++)
    {
@@ -315,21 +317,23 @@ void DeConvolutionLayerImpl::col2im(const UMat &colMat, UMat &dstImg)
 //Initializers
-Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad)
+Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
 {
    ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
    l->kernel = kernel;
    l->pad = pad;
    l->stride = stride;
+    l->dilation = dilation;
    return Ptr<BaseConvolutionLayer>(l);
 }
-Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad)
+Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
 {
    DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
    l->kernel = kernel;
    l->pad = pad;
    l->stride = stride;
+    l->dilation = dilation;
    return Ptr<BaseConvolutionLayer>(l);
 }

--- a/modules/dnn/src/layers/crop_layer.cpp
+++ b/modules/dnn/src/layers/crop_layer.cpp
@@ -47,57 +47,82 @@ namespace cv
 {
 namespace dnn
 {
-    CropLayerImpl::CropLayerImpl(int start_axis_, const std::vector<int> &offset_)
+CropLayerImpl::CropLayerImpl(int start_axis_, const std::vector<int> &offset_)
+{
+    startAxis = start_axis_;
+    offset = offset_;
+}
+void CropLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+{
+    CV_Assert(2 == inputs.size());
+    const Blob &inpBlob = *inputs[0];
+    const Blob &inpSzBlob = *inputs[1];
+    int start_axis = inpBlob.canonicalAxis(startAxis);
+    int dims = inpBlob.dims();
+    std::vector<int> offset_final(dims, 0);
+    if (offset.size() == 1)
    {
-        start_axis = start_axis_;
+        for (int i = start_axis; i < dims; i++)
-        offset = offset_;
+            offset_final[i] = offset[0];
    }
+    else if (offset.size() > 1)
-    void CropLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
    {
-        CV_Assert(2 == inputs.size());
+        if ((int)offset.size() != dims - start_axis)
+            CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
-        const Blob &inpBlob = *inputs[0];
+        for (int i = start_axis; i < dims; i++)
-        CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
+            offset_final[i] = offset[i - start_axis];
+    }
-        const Blob &inpSzBlob = *inputs[1];
+    BlobShape dstShape = inpBlob.shape();
+    crop_ranges.resize(dims, Range::all());
+    for (int i = start_axis; i < dims; i++)
+    {
+        dstShape[i] = inpSzBlob.size(i);
-        outSizes.resize(4, 0);
+        if (!offset.empty()) //normal case
-        for (int i = 0; i < 4; i++)
        {
-            if (i < start_axis)
+            if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size(i) > inpBlob.size(i))
-                outSizes[i] = inpBlob.size(i);
-            else
-                outSizes[i] = inpSzBlob.size(i);
-            if (offset[i] + outSizes[i] > inpBlob.size(i))
                CV_Error(Error::StsBadArg, "invalid crop parameters");
-        }
-        outputs.resize(1);
-        outputs[0].create(BlobShape(outSizes));
-    }
-    void CropLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+            crop_ranges[i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size(i));
-    {
+        }
-        Blob input = *inputs[0];
+        else //detect offset automatically so that cropped image is center of original one
-        Blob output = outputs[0];
-        for (int num = 0; num < outSizes[0]; ++num)
        {
-            for (int ch = 0; ch < outSizes[1]; ++ch)
+            if (inpSzBlob.size(i) > inpBlob.size(i))
-            {
+                CV_Error(Error::StsBadArg, "invalid output blob size");
-                for (int row = 0; row < outSizes[2]; ++row)
-                {
+            int cur_crop = (inpBlob.size(i) - inpSzBlob.size(i)) / 2;
-                    float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
+            crop_ranges[i] = Range(cur_crop, cur_crop + inpSzBlob.size(i));
-                    float *dstData = output.ptrf(num, ch, row);
-                    memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
-                }
-            }
        }
    }
-    Ptr<CropLayer> CropLayer::create(int start_axis, const std::vector<int> &offset)
+    outputs.resize(1);
-    {
+    outputs[0].create(dstShape);
-        return Ptr<CropLayer>(new CropLayerImpl(start_axis, offset));
+}
-    }
+void CropLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+{
+    Blob &input = *inputs[0];
+    Blob &output = outputs[0];
+    #ifdef HAVE_OPENCL
+    if (input.getState() == Blob::HEAD_AT_UMAT)
+        input.umatRefConst()(&crop_ranges[0]).copyTo(output.umatRef());
+    else
+    #endif
+        input.matRefConst()(&crop_ranges[0]).copyTo(output.matRef());
+}
+Ptr<CropLayer> CropLayer::create(int start_axis, const std::vector<int> &offset)
+{
+    return Ptr<CropLayer>(new CropLayerImpl(start_axis, offset));
+}
 }
 }
--- a/modules/dnn/src/layers/crop_layer.hpp
+++ b/modules/dnn/src/layers/crop_layer.hpp
@@ -50,9 +50,7 @@ namespace dnn
 {
    class CropLayerImpl : public CropLayer
    {
-        int start_axis;
+        std::vector<Range> crop_ranges;
-        std::vector<int> offset;
-        std::vector<int> outSizes;
    public:
        CropLayerImpl(int start_axis, const std::vector<int> &offset);

--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
--- a/modules/dnn/src/layers/detection_output_layer.hpp
+++ b/modules/dnn/src/layers/detection_output_layer.hpp
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
--- a/modules/dnn/src/layers/flatten_layer.hpp
+++ b/modules/dnn/src/layers/flatten_layer.hpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#ifndef __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
+#define __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
+#include "../precomp.hpp"
+namespace cv
+{
+namespace dnn
+{
+class FlattenLayer : public Layer
+{
+    int _startAxis;
+    int _endAxis;
+    size_t _numAxes;
+    BlobShape resultShape;
+public:
+    FlattenLayer(LayerParams &params);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void checkInputs(const std::vector<Blob*> &inputs);
+};
+}
+}
+#endif
--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@@ -50,6 +50,10 @@ namespace cv
 namespace dnn
 {
+void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW);
+void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW);
 }
 }

--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
--- a/modules/dnn/src/layers/normalize_bbox_layer.hpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.hpp
--- a/modules/dnn/src/layers/op_im2col.cpp
+++ b/modules/dnn/src/layers/op_im2col.cpp
--- a/modules/dnn/src/layers/op_im2col.hpp
+++ b/modules/dnn/src/layers/op_im2col.hpp
--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
--- a/modules/dnn/src/layers/permute_layer.hpp
+++ b/modules/dnn/src/layers/permute_layer.hpp
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
--- a/modules/dnn/src/layers/prior_box_layer.hpp
+++ b/modules/dnn/src/layers/prior_box_layer.hpp
--- a/modules/dnn/src/layers/recurrent_layers.cpp
+++ b/modules/dnn/src/layers/recurrent_layers.cpp
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
--- a/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown
+++ b/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown