Adding of OCL versions of Concat, Split, Slice layers

Moving more Caffe loaders to sparate file

Adding of OCL versions of Concat, Split, Slice layers
Moving more Caffe loaders to sparate file
7f0260c1 · Vitaliy Lyudvichenko · d0a9683f · 7f0260c1 · 7f0260c1 · 7f0260c1
Commit 7f0260c1 authored Aug 01, 2016 by Vitaliy Lyudvichenko
16 changed files
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -275,9 +275,37 @@ namespace dnn
        static Ptr<InnerProductLayer> create(int axis = 1);
    };
+    /* Reshaping */
+    class CV_EXPORTS_W ConcatLayer : public Layer
+    {
+    public:
+        int axis;
+        static Ptr<ConcatLayer> create(int axis = 1);
+    };
+    class CV_EXPORTS_W SplitLayer : public Layer
+    {
+    public:
+        int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
+        static Ptr<SplitLayer> create(int outputsCount = -1);
+    };
+    class CV_EXPORTS_W SliceLayer : public Layer
+    {
+    public:
+        int axis;
+        std::vector<int> sliceIndices;
+        static Ptr<SliceLayer> create(int axis);
+        static Ptr<SliceLayer> create(int axis, const std::vector<int> &sliceIndices);
+    };
    /* Activations */
-    class ReLULayer : public Layer
+    class CV_EXPORTS_W ReLULayer : public Layer
    {
    public:
        double negativeSlope;
@@ -285,31 +313,31 @@ namespace dnn
        static Ptr<ReLULayer> create(double negativeSlope = 0);
    };
-    class TanHLayer : public Layer
+    class CV_EXPORTS_W TanHLayer : public Layer
    {
    public:
        static Ptr<TanHLayer> create();
    };
-    class SigmoidLayer : public Layer
+    class CV_EXPORTS_W SigmoidLayer : public Layer
    {
    public:
        static Ptr<SigmoidLayer> create();
    };
-    class BNLLLayer : public Layer
+    class CV_EXPORTS_W BNLLLayer : public Layer
    {
    public:
        static Ptr<BNLLLayer> create();
    };
-    class AbsLayer : public Layer
+    class CV_EXPORTS_W AbsLayer : public Layer
    {
    public:
        static Ptr<AbsLayer> create();
    };
-    class PowerLayer : public Layer
+    class CV_EXPORTS_W PowerLayer : public Layer
    {
    public:
        double power, scale, shift;

--- a/modules/dnn/include/opencv2/dnn/blob.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.hpp
@@ -262,6 +262,7 @@ namespace dnn
        /** @brief Returns slice of first two dimensions.
         *  @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
+         *  @todo Method will be removed. Use slice() from shape_utils.hpp.
         */
        Mat getPlane(int n, int cn);
@@ -282,6 +283,7 @@ namespace dnn
        int type() const;       //!< Returns type of the blob.
        int elemSize() const;   //!< Returns size of single element in bytes.
+        int getState() const;   //!< Returns current state of the blob, @see DataState.
    private:
        const int *sizes() const;

--- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
@@ -507,6 +507,15 @@ inline int Blob::elemSize() const
    return CV_ELEM_SIZE(type());
 }
+inline int Blob::getState() const
+{
+#ifdef CV_DNN_UMAT
+    return this->state;
+#else
+    return m.empty() ? UNINITIALIZED : HEAD_AT_MAT;
+#endif
+}
 }
 }

--- a/modules/dnn/src/caffe/layer_loaders.cpp
+++ b/modules/dnn/src/caffe/layer_loaders.cpp
 #include "../precomp.hpp"
 #include "layer_loaders.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
-#include "../layers/layers_common.hpp"
 namespace cv
 {
 namespace dnn
 {
+//Utils
+//Extracts params used into Conv, Deconv and Pooling layers
+static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
+{
+    if (params.has("kernel_h") && params.has("kernel_w"))
+    {
+        kernel.height = params.get<int>("kernel_h");
+        kernel.width = params.get<int>("kernel_w");
+    }
+    else if (params.has("kernel_size"))
+    {
+        kernel.height = kernel.width = params.get<int>("kernel_size");
+    }
+    else
+    {
+        CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
+    }
+    CV_Assert(kernel.height > 0 && kernel.width > 0);
+    if (params.has("pad_h") && params.has("pad_w"))
+    {
+        pad.height = params.get<int>("pad_h");
+        pad.width = params.get<int>("pad_w");
+    }
+    else
+    {
+        pad.height = pad.width = params.get<int>("pad", 0);
+    }
+    CV_Assert(pad.height >= 0 && pad.width >= 0);
+    if (params.has("stride_h") && params.has("stride_w"))
+    {
+        stride.height = params.get<int>("stride_h");
+        stride.width = params.get<int>("stride_w");
+    }
+    else
+    {
+        stride.height = stride.width = params.get<int>("stride", 1);
+    }
+    CV_Assert(stride.height > 0 && stride.width > 0);
+}
+//Layers
+//Convolution and Deconvolution
+static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
+{
+    l->setParamsFrom(params);
+    getCaffeConvParams(params, l->kernel, l->pad, l->stride);
+    bool bias = params.get<bool>("bias_term", true);
+    int numOutput = params.get<int>("num_output");
+    int group = params.get<int>("group", 1);
+    CV_Assert(numOutput % group == 0);
+    CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
+}
+template<>
+Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams &params)
+{
+    Ptr<BaseConvolutionLayer> l = ConvolutionLayer::create();
+    initConvDeconvLayerFromCaffe(l, params);
+    return Ptr<Layer>(l);
+}
+template<>
+Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams &params)
+{
+    Ptr<BaseConvolutionLayer> l = DeconvolutionLayer::create();
+    initConvDeconvLayerFromCaffe(l, params);
+    return Ptr<Layer>(l);
+}
 template<>
 Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
 {
@@ -88,7 +162,54 @@ Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams& params)
    return Ptr<Layer>(LRNLayer::create(type, size, alpha, beta));
 }
-//Activation layers
+/* Reshape layers */
+template<>
+Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams& params)
+{
+    return Ptr<Layer>(ConcatLayer::create(params.get<int>("axis", 1)));
+}
+template<>
+Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams &params)
+{
+    int outputsCount;
+    //TODO: maybe "top_count" param is useless because it can be determined by output connections number
+    if (params.has("top_count"))
+    {
+        outputsCount = params.get<int>("top_count");
+        CV_Assert(outputsCount >= 0);
+    }
+    else
+    {
+        outputsCount = -1;
+    }
+    return Ptr<Layer>(SplitLayer::create(outputsCount));
+}
+template<>
+Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams& params)
+{
+    int axis = params.get<int>("axis", 1);
+    if (!params.has("slice_point"))
+    {
+        return Ptr<Layer>(SliceLayer::create(axis));
+    }
+    else
+    {
+        const DictValue &indicesValue = params.get("slice_point");
+        std::vector<int> sliceIndices(indicesValue.size());
+        for (int i = 0; i < indicesValue.size(); i++)
+            sliceIndices[i] = indicesValue.get<int>(i);
+        return Ptr<Layer>(SliceLayer::create(axis, sliceIndices));
+    }
+}
+/* Activation layers */
 template <typename ActivationLayer> //Intended for parameters-free activations
 Ptr<Layer> createLayerFromCaffe(LayerParams&)
@@ -113,6 +234,8 @@ Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
 }
 //Explicit instantiation
+template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
+template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams&);
 template Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams&);

--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -40,21 +40,12 @@
 //M*/
 #include "precomp.hpp"
+#include "caffe/layer_loaders.hpp"
 #include "layers/concat_layer.hpp"
-#include "layers/convolution_layer.hpp"
 #include "layers/blank_layer.hpp"
-#include "layers/elementwise_layers.hpp"
-#include "layers/fully_connected_layer.hpp"
-#include "layers/lrn_layer.hpp"
 #include "layers/mvn_layer.hpp"
-#include "layers/pooling_layer.hpp"
 #include "layers/reshape_layer.hpp"
-#include "layers/slice_layer.hpp"
-#include "layers/softmax_layer.hpp"
-#include "layers/split_layer.hpp"
-#include "caffe/layer_loaders.hpp"
 namespace cv
 {
@@ -78,15 +69,20 @@ void initModule()
    if (init.status)
        return;
-    REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
+    REG_RUNTIME_LAYER_FUNC(Slice,           createLayerFromCaffe<SliceLayer>);
-    REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
+    REG_RUNTIME_LAYER_FUNC(Split,           createLayerFromCaffe<SplitLayer>);
+    REG_RUNTIME_LAYER_FUNC(Concat,          createLayerFromCaffe<ConcatLayer>);
    REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
-    REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer)
    REG_RUNTIME_LAYER_FUNC(Flatten, createFlattenLayer);
+    REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
+    REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer)
+    REG_RUNTIME_LAYER_FUNC(Convolution,     createLayerFromCaffe<ConvolutionLayer>);
+    REG_RUNTIME_LAYER_FUNC(Deconvolution,   createLayerFromCaffe<DeconvolutionLayer>);
    REG_RUNTIME_LAYER_FUNC(Pooling,         createLayerFromCaffe<PoolingLayer>);
    REG_RUNTIME_LAYER_FUNC(LRN,             createLayerFromCaffe<LRNLayer>);
    REG_RUNTIME_LAYER_FUNC(InnerProduct,    createLayerFromCaffe<InnerProductLayer>);
-    REG_STATIC_LAYER_FUNC(Softmax,          createLayerFromCaffe<SoftmaxLayer>);
+    REG_RUNTIME_LAYER_FUNC(Softmax,         createLayerFromCaffe<SoftmaxLayer>);
    REG_RUNTIME_LAYER_FUNC(ReLU,            createLayerFromCaffe<ReLULayer>);
    REG_RUNTIME_LAYER_FUNC(Sigmoid,         createLayerFromCaffe<SigmoidLayer>);
@@ -94,11 +90,6 @@ void initModule()
    REG_RUNTIME_LAYER_FUNC(BNLL,            createLayerFromCaffe<BNLLLayer>);
    REG_RUNTIME_LAYER_FUNC(AbsVal,          createLayerFromCaffe<AbsLayer>);
    REG_RUNTIME_LAYER_FUNC(Power,           createLayerFromCaffe<PowerLayer>);
-    REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
-    REG_RUNTIME_LAYER_FUNC(Convolution, createConvolutionLayerFromCaffe)
-    REG_RUNTIME_LAYER_FUNC(Deconvolution, createDeconvolutionLayerFromCaffe)
-    REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer)
    init.status = true;
 }

--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -42,60 +42,80 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "concat_layer.hpp"
+#include <opencv2/core/ocl.hpp>
 namespace cv
 {
 namespace dnn
 {
-    ConcatLayer::ConcatLayer(LayerParams &params) : Layer(params)
-    {
-        axis = params.get<int>("axis", 1);
-        CV_Assert(axis >= 0);
-    }
-    void ConcatLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+ConcatLayerImpl::ConcatLayerImpl(int axis_ /*= 1*/)
-    {
+{
-        CV_Assert(inputs.size() > 0);
+    axis = axis_;
+}
-        int refType = inputs[0]->type();
+void ConcatLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
-        BlobShape refShape = inputs[0]->shape();
+{
-        CV_Assert(axis < refShape.dims());
+    CV_Assert(inputs.size() > 0);
-        int axisSum = 0;
+    BlobShape refShape = inputs[0]->shape();
-        for (size_t i = 0; i < inputs.size(); i++)
+    axisIdx = inputs[0]->canonicalAxis(axis);
-        {
-            BlobShape curShape = inputs[i]->shape();
-            CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == refType);
+    int axisSum = 0;
-            for (int axisId = 0; axisId < refShape.dims(); axisId++)
+    useOpenCL = false;
-            {
+    for (size_t i = 0; i < inputs.size(); i++)
-                if (axisId != axis && refShape[axisId] != curShape[axisId])
+    {
-                    CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
+        BlobShape curShape = inputs[i]->shape();
-            }
-            axisSum += curShape[axis];
+        CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == inputs[0]->type());
+        for (int curAxis = 0; curAxis < refShape.dims(); curAxis++)
+        {
+            if (curAxis != axisIdx && refShape[curAxis] != curShape[curAxis])
+                CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
        }
-        refShape[axis] = axisSum;
+        axisSum += curShape[axisIdx];
-        outputs.resize(1);
+        useOpenCL |= inputs[i]->getState() == Blob::HEAD_AT_MAT;
-        outputs[0].create(refShape);
    }
-    void ConcatLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+    refShape[axisIdx] = axisSum;
-    {
+    useOpenCL &= ocl::useOpenCL();
-        const Mat& outMat = outputs[0].matRef();
+    int allocFlags = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT;
-        std::vector<Range> ranges(outputs[0].dims(), Range::all());
-        int sizeStart = 0;
-        for (size_t i = 0; i < inputs.size(); i++)
-        {
-            int sizeEnd = sizeStart + inputs[i]->size(axis);
-            ranges[axis] = Range(sizeStart, sizeEnd);
-            Mat outSubMat = outMat(&ranges[0]);
+    outputs.resize(1);
-            inputs[i]->matRef().copyTo(outSubMat);
+    outputs[0].create(refShape, inputs[0]->type(), allocFlags);
+}
-            sizeStart = sizeEnd;
-        }
+void ConcatLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
+{
+    #ifdef HAVE_OPENCL
+    if (useOpenCL)
+        forward_<UMat>(inputs, outputs);
+    else
+    #endif
+        forward_<Mat>(inputs, outputs);
+}
+template<typename XMat>
+void ConcatLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    XMat& outMat = outputs[0].getRef<XMat>();
+    std::vector<Range> ranges(outputs[0].dims(), Range::all());
+    ranges[axisIdx].start = 0;
+    for (size_t i = 0; i < inputs.size(); i++)
+    {
+        ranges[axisIdx].end = ranges[axisIdx].start + inputs[i]->size(axisIdx);
+        inputs[i]->getRefConst<XMat>().copyTo(outMat(&ranges[0]));
+        ranges[axisIdx].start = ranges[axisIdx].end;
    }
 }
+Ptr<ConcatLayer> ConcatLayer::create(int axis)
+{
+    return Ptr<ConcatLayer>(new ConcatLayerImpl(axis));
+}
+}
 }
--- a/modules/dnn/src/layers/concat_layer.hpp
+++ b/modules/dnn/src/layers/concat_layer.hpp
@@ -42,20 +42,29 @@
 #ifndef __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
 #include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>
 namespace cv
 {
 namespace dnn
 {
-    class ConcatLayer : public Layer
-    {
+class ConcatLayerImpl : public ConcatLayer
-        int axis;
+{
+    bool useOpenCL;
-    public:
+    int axisIdx;
-        ConcatLayer(LayerParams& params);
-        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    template<typename XMat>
-        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
-    };
+public:
+    ConcatLayerImpl(int axis_ = 1);
+    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+};
 }
 }
 #endif
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -82,6 +82,8 @@ void ConvolutionLayerImpl::init()
 void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
+    init();
    CV_Assert(inputs.size() > 0);
    const Blob &input = *inputs[0];
    CV_Assert(input.dims() == 4 && (input.type() == CV_32F || input.type() == CV_64F));
@@ -331,37 +333,5 @@ Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, S
    return Ptr<BaseConvolutionLayer>(l);
 }
-//Importers
-template<typename CLayer>
-static void initConvDeconvLayerFromCaffe(CLayer *l, LayerParams &params)
-{
-    l->setParamsFrom(params);
-    getCaffeConvParams(params, l->kernel, l->pad, l->stride);
-    bool bias = params.get<bool>("bias_term", true);
-    int numOutput = params.get<int>("num_output");
-    int group = params.get<int>("group", 1);
-    CV_Assert(numOutput % group == 0);
-    CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
-}
-Ptr<Layer> createConvolutionLayerFromCaffe(LayerParams &params)
-{
-    ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
-    initConvDeconvLayerFromCaffe(l, params);
-    l->init();
-    return Ptr<Layer>(l);
-}
-Ptr<Layer> createDeconvolutionLayerFromCaffe(LayerParams &params)
-{
-    ConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
-    initConvDeconvLayerFromCaffe(l, params);
-    l->init();
-    return Ptr<Layer>(l);
-}
 }
 }
--- a/modules/dnn/src/layers/layers_common.cpp
+++ b/modules/dnn/src/layers/layers_common.cpp
@@ -46,45 +46,5 @@ namespace cv
 namespace dnn
 {
-void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
-{
-    if (params.has("kernel_h") && params.has("kernel_w"))
-    {
-        kernel.height = params.get<int>("kernel_h");
-        kernel.width = params.get<int>("kernel_w");
-    }
-    else if (params.has("kernel_size"))
-    {
-        kernel.height = kernel.width = params.get<int>("kernel_size");
-    }
-    else
-    {
-        CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
-    }
-    CV_Assert(kernel.height > 0 && kernel.width > 0);
-    if (params.has("pad_h") && params.has("pad_w"))
-    {
-        pad.height = params.get<int>("pad_h");
-        pad.width = params.get<int>("pad_w");
-    }
-    else
-    {
-        pad.height = pad.width = params.get<int>("pad", 0);
-    }
-    CV_Assert(pad.height >= 0 && pad.width >= 0);
-    if (params.has("stride_h") && params.has("stride_w"))
-    {
-        stride.height = params.get<int>("stride_h");
-        stride.width = params.get<int>("stride_w");
-    }
-    else
-    {
-        stride.height = stride.width = params.get<int>("stride", 1);
-    }
-    CV_Assert(stride.height > 0 && stride.width > 0);
-}
 }
 }
--- a/modules/dnn/src/layers/layers_common.hpp
+++ b/modules/dnn/src/layers/layers_common.hpp
@@ -42,14 +42,14 @@
 #ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
 #define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
 #include <opencv2/dnn.hpp>
+#include "op_blas.hpp"
+#include "op_im2col.hpp"
 namespace cv
 {
 namespace dnn
 {
-void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride);
 }
 }

--- a/modules/dnn/src/layers/slice_layer.cpp
+++ b/modules/dnn/src/layers/slice_layer.cpp
@@ -42,55 +42,57 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "slice_layer.hpp"
+#include <opencv2/core/ocl.hpp>
+#include <opencv2/dnn/shape_utils.hpp>
 namespace cv
 {
 namespace dnn
 {
-SliceLayer::SliceLayer(LayerParams &params) : Layer(params)
+SliceLayerImpl::SliceLayerImpl(int axis_ /*= 1*/)
 {
-    inAxis = params.get<int>("axis", 1);
+    axis = axis_;
+}
-    if (!params.has("slice_point"))
-        return;
-    const DictValue &_slicePoints = params.get("slice_point");
+SliceLayerImpl::SliceLayerImpl(int axis_, const std::vector<int> &sliceIndices_)
-    slicePoints.resize(_slicePoints.size());
+{
-    for (int i = 0; i < _slicePoints.size(); i++)
+    axis = axis_;
-    {
+    sliceIndices = sliceIndices_;
-        slicePoints[i] = _slicePoints.get<int>(i);
-        CV_Assert(slicePoints[i] > 0 && (i == 0 || slicePoints[i-1] < slicePoints[i]));
-    }
 }
-void SliceLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SliceLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() == 1);
-    const Blob inpBlob = *inputs[0];
+    const Blob &inpBlob = *inputs[0];
-    int axis = inpBlob.canonicalAxis(inAxis);
+    useOpenCL = ocl::useOpenCL() && inpBlob.getState() == Blob::HEAD_AT_UMAT;
-    int axisSize = inpBlob.size(axis);
+    axisIdx = inpBlob.canonicalAxis(axis);
+    int axisSize = inpBlob.size(axisIdx);
    BlobShape inpShape = inpBlob.shape();
+    int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
-    if (slicePoints.size()) //divide blob with respect to passed parameters
+    if (sliceIndices.size()) //divide blob with respect to passed parameters
    {
        std::vector<int> outAxisSize;
        int prevSlice = 0;
-        for (size_t i = 0; i < slicePoints.size(); i++)
+        for (size_t i = 0; i < sliceIndices.size(); i++)
        {
-            CV_Assert(prevSlice < slicePoints[i] && slicePoints[i] < axisSize);
+            if (!(prevSlice < sliceIndices[i] && sliceIndices[i] < axisSize))
-            outAxisSize.push_back(slicePoints[i] - prevSlice);
+                CV_Error(Error::StsBadArg, "Slice indices should be positive, increased and don't exceed size of sliced dimension");
-            prevSlice = slicePoints[i];
+            outAxisSize.push_back(sliceIndices[i] - prevSlice);
+            prevSlice = sliceIndices[i];
        }
        outAxisSize.push_back(axisSize - prevSlice);
        outputs.resize(outAxisSize.size());
        for (size_t i = 0; i < outAxisSize.size(); i++)
        {
-            inpShape[axis] = outAxisSize[i];
+            inpShape[axisIdx] = outAxisSize[i];
-            outputs[i].create(inpShape, inpBlob.type());
+            outputs[i].create(inpShape, inpBlob.type(), allocFlags);
        }
    }
    else //divide blob with respect to count of output blobs
@@ -100,30 +102,45 @@ void SliceLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &o
        for (size_t i = 0; i < outputs.size(); i++)
        {
-            inpShape[axis] = outAxisSize;
+            inpShape[axisIdx] = outAxisSize;
-            outputs[i].create(inpShape, inpBlob.type());
+            outputs[i].create(inpShape, inpBlob.type(), allocFlags);
        }
    }
 }
-void SliceLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SliceLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+{
+    #ifdef HAVE_OPENCL
+    if (useOpenCL)
+        forward_<UMat>(inputs, outputs);
+    else
+    #endif
+        forward_<Mat>(inputs, outputs);
+}
+template<typename XMat>
+void SliceLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
-    Blob &inpBlob = *inputs[0];
+    const XMat& inpMat = inputs[0]->getRefConst<XMat>();
-    const int axis = inpBlob.canonicalAxis(inAxis);
+    std::vector<Range> ranges(inputs[0]->dims(), Range::all());
-    const Mat& inpMat = inpBlob.matRef();
-    std::vector<Range> ranges(inpBlob.dims(), Range::all());
+    ranges[axisIdx].start = 0;
-    int sizeStart = 0;
    for (size_t i = 0; i < outputs.size(); i++)
    {
-        int sizeEnd = sizeStart + outputs[i].size(axis);
+        ranges[axisIdx].end = ranges[axisIdx].start + outputs[i].size(axisIdx);
-        ranges[axis] = Range(sizeStart, sizeEnd);
+        inpMat(&ranges[0]).copyTo(outputs[i].getRef<XMat>());
+        ranges[axisIdx].start = ranges[axisIdx].end;
+    }
+}
-        Mat inpSubMat = inpMat(&ranges[0]);
+Ptr<SliceLayer> SliceLayer::create(int axis)
-        inpSubMat.copyTo(outputs[i].matRef());
+{
+    return Ptr<SliceLayer>(new SliceLayerImpl(axis));
+}
-        sizeStart = sizeEnd;
+Ptr<SliceLayer> SliceLayer::create(int axis, const std::vector<int> &sliceIndices)
-    }
+{
+    return Ptr<SliceLayer>(new SliceLayerImpl(axis, sliceIndices));
 }
 }

--- a/modules/dnn/src/layers/slice_layer.hpp
+++ b/modules/dnn/src/layers/slice_layer.hpp
@@ -42,24 +42,28 @@
 #ifndef __OPENCV_DNN_LAYERS_SLICE_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_SLICE_LAYER_HPP__
 #include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>
 namespace cv
 {
 namespace dnn
 {
-class SliceLayer : public Layer
+class SliceLayerImpl : public SliceLayer
 {
+    bool useOpenCL;
+    int axisIdx;
+    template<typename XMat>
+    void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
 public:
-    SliceLayer(LayerParams &params);
+    SliceLayerImpl(int axis_ = 1);
+    SliceLayerImpl(int axis_, const std::vector<int> &sliceIndices_);
    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
-private:
-    int inAxis;
-    std::vector<int> slicePoints;
 };
 }

--- a/modules/dnn/src/layers/split_layer.cpp
+++ b/modules/dnn/src/layers/split_layer.cpp
@@ -42,41 +42,46 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "split_layer.hpp"
+#include <opencv2/core/ocl.hpp>
 namespace cv
 {
 namespace dnn
 {
-//TODO: maybe "top_count" param is useless because it can be determined by output connections number?
+SplitLayerImpl::SplitLayerImpl(int outputsCount_ /*= -1*/)
-SplitLayer::SplitLayer(LayerParams &params) : Layer(params)
 {
-    if (params.has("top_count"))
+    outputsCount = outputsCount_;
-    {
-        outputsNum = params.get<int>("top_count");
-        CV_Assert(outputsNum >= 0);
-    }
-    else
-    {
-        outputsNum = -1;
-    }
 }
-void SplitLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SplitLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() == 1);
+    useOpenCL = ocl::useOpenCL() && inputs[0]->getState() == Blob::HEAD_AT_UMAT;
+    int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT;
-    if (outputsNum >= 0)
+    if (outputsCount >= 0)
-        outputs.resize(outputsNum);
+        outputs.resize(outputsCount);
    for (size_t i = 0; i < outputs.size(); i++)
-        outputs[i].create(inputs[0]->shape(), inputs[0]->type());
+        outputs[i].create(inputs[0]->shape(), inputs[0]->type(), allocFlags);
 }
-void SplitLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SplitLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    for (size_t i = 0; i < outputs.size(); i++)
-        inputs[0]->matRefConst().copyTo(outputs[i].matRef());
+    {
+        if (useOpenCL)
+            inputs[0]->umatRefConst().copyTo(outputs[i].umatRef());
+        else
+            inputs[0]->matRefConst().copyTo(outputs[i].matRef());
+    }
+}
+Ptr<SplitLayer> SplitLayer::create(int outputsCount)
+{
+    return Ptr<SplitLayer>(new SplitLayerImpl(outputsCount));
 }
 }

--- a/modules/dnn/src/layers/split_layer.hpp
+++ b/modules/dnn/src/layers/split_layer.hpp
@@ -42,23 +42,23 @@
 #ifndef __OPENCV_DNN_LAYERS_SPLIT_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_SPLIT_LAYER_HPP__
 #include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>
 namespace cv
 {
 namespace dnn
 {
-class SplitLayer : public Layer
+class SplitLayerImpl : public SplitLayer
 {
+    bool useOpenCL;
 public:
-    SplitLayer(LayerParams &params);
+    SplitLayerImpl(int outputsCount_ = -1);
    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
-private:
-    int outputsNum;
 };
 }

--- a/modules/dnn/src/opencl/activations.cl
+++ b/modules/dnn/src/opencl/activations.cl
@@ -5,7 +5,7 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
 ) {
  int index = get_global_id(0);
  if(index < count)
-#ifndef RELU_NO_SLOPE  
+#ifndef RELU_NO_SLOPE
  out[index] = in[index] > 0 ? in[index] : in[index] * negative_slope;
 #else
  out[index] = in[index] > 0 ? in[index] : 0;
@@ -34,7 +34,7 @@ __kernel void BNLLForward(const int n, __global const T* in, __global T* out) {
 __kernel void AbsValForward(const int n, __global const T* in, __global T* out) {
  int index = get_global_id(0);
  if (index < n)
-    out[index] = abs(in[index]);
+    out[index] = fabs(in[index]);
 }
 __kernel void PowForward(const int n, __global const T* in, __global T* out, const T power, const T scale, const T shift) {

--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -58,6 +58,31 @@ static String _tf(TString filename)
    return (getOpenCVExtraDir() + "/dnn/layers/") + filename;
 }
+enum RunLayerMode
+{
+    ALLOC_ONLY = 1,
+    FORWARD_ONLY = 2,
+    ALLOC_AND_FORWARD = ALLOC_ONLY | FORWARD_ONLY
+};
+typedef Ptr<std::vector<Blob*> > PtrToVecPtrBlob;
+PtrToVecPtrBlob
+runLayer(Ptr<Layer> layer, std::vector<Blob> &inpBlobs, std::vector<Blob> &outBlobs, int mode = ALLOC_AND_FORWARD)
+{
+    PtrToVecPtrBlob inpPtrs(new std::vector<Blob*>());
+    inpPtrs->reserve(inpBlobs.size());
+    for (size_t i = 0; i < inpBlobs.size(); i++)
+        inpPtrs->push_back(&inpBlobs[i]);
+    if (mode & ALLOC_ONLY) layer->allocate(*inpPtrs, outBlobs);
+    if (mode & FORWARD_ONLY) layer->forward(*inpPtrs, outBlobs);
+    return inpPtrs;
+}
 void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool useCommonInputBlob = true)
 {
    String prototxt = _tf(basename + ".prototxt");
@@ -137,7 +162,12 @@ OCL_TEST(Layer_Test_DeConvolution, Accuracy)
 TEST(Layer_Test_InnerProduct, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_inner_product", true);
+     OCL_OFF(testLayerUsingCaffeModels("layer_inner_product", true));
+}
+OCL_TEST(Layer_Test_InnerProduct, Accuracy)
+{
+    OCL_ON(testLayerUsingCaffeModels("layer_inner_product", true));
+    OCL_OFF();
 }
 TEST(Layer_Test_Pooling_max, Accuracy)
@@ -164,7 +194,7 @@ OCL_TEST(Layer_Test_Pooling_ave, Accuracy)
 TEST(Layer_Test_MVN, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_mvn");
+     OCL_OFF(testLayerUsingCaffeModels("layer_mvn"));
 }
 TEST(Layer_Test_Reshape, squeeze)
@@ -184,7 +214,28 @@ TEST(Layer_Test_Reshape, squeeze)
    EXPECT_EQ(outVec[0].shape(), BlobShape(4, 3, 2));
 }
-TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
+template<typename XMat>
+static void test_Layer_Concat()
+{
+    Matx21f a(1.f, 1.f), b(2.f, 2.f), c(3.f, 3.f);
+    std::vector<Blob> res(1), src = { Blob(XMat(a)), Blob(XMat(b)), Blob(XMat(c)) };
+    Blob ref(XMat(Matx23f(1.f, 2.f, 3.f, 1.f, 2.f, 3.f)));
+    runLayer(ConcatLayer::create(1), src, res);
+    normAssert(ref, res[0]);
+}
+TEST(Layer_Concat, Accuracy)
+{
+    OCL_OFF(test_Layer_Concat<Mat>());
+}
+OCL_TEST(Layer_Concat, Accuracy)
+{
+    OCL_ON(test_Layer_Concat<Mat>());
+    OCL_OFF();
+}
+template<typename XMat>
+void test_Reshape_Split_Slice_layers()
 {
    Net net;
    {
@@ -193,9 +244,9 @@ TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
        importer->populateNet(net);
    }
-    Blob input(BlobShape(Vec2i(6, 12)));
+    Blob input(BlobShape(6, 12));
    RNG rng(0);
-    rng.fill(input.matRef(), RNG::UNIFORM, -1, 1);
+    rng.fill(input.getRef<XMat>(), RNG::UNIFORM, -1, 1);
    net.setBlob(".input", input);
    net.forward();
@@ -203,28 +254,14 @@ TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
    normAssert(input, output);
 }
+TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
-enum RunLayerMode
 {
-    ALLOC_ONLY          = 1,
+    OCL_OFF(test_Reshape_Split_Slice_layers<Mat>());
-    FORWARD_ONLY        = 2,
+}
-    ALLOC_AND_FORWARD   = ALLOC_ONLY | FORWARD_ONLY
+OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
-};
-typedef Ptr<std::vector<Blob*> > PtrToVecPtrBlob;
-PtrToVecPtrBlob
-runLayer(Ptr<Layer> layer, std::vector<Blob> &inpBlobs, std::vector<Blob> &outBlobs, int mode=ALLOC_AND_FORWARD)
 {
-    PtrToVecPtrBlob inpPtrs( new std::vector<Blob*>() );
+    OCL_ON(test_Reshape_Split_Slice_layers<UMat>());
-    inpPtrs->reserve(inpBlobs.size());
+    OCL_OFF();
-    for (size_t i = 0; i < inpBlobs.size(); i++)
-        inpPtrs->push_back(&inpBlobs[i]);
-    if (mode & ALLOC_ONLY) layer->allocate(*inpPtrs, outBlobs);
-    if (mode & FORWARD_ONLY) layer->forward(*inpPtrs, outBlobs);
-    return inpPtrs;
 }
 class Layer_LSTM_Test : public ::testing::Test