Adding of templated GPU/CPU implementation of Convolution layer

b70a9dc1 · Vitaliy Lyudvichenko · b26896c0 · b70a9dc1 · b70a9dc1 · b70a9dc1
Commit b70a9dc1 authored Jul 09, 2016 by Vitaliy Lyudvichenko
6 changed files
--- a/modules/dnn/include/opencv2/dnn/blob.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.hpp
@@ -297,6 +297,7 @@ namespace dnn
        mutable uchar state;
 #endif

+public:
        enum DataState
        {
            UNINITIALIZED,

--- a/modules/dnn/include/opencv2/dnn/blob.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/blob.inl.hpp
@@ -346,12 +346,12 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const

 inline float *Blob::ptrf(int n, int cn, int row, int col)
 {
-    return matRef().ptr<float>() + offset(n, cn, row, col);
+    return matRef(false).ptr<float>() + offset(n, cn, row, col);
 }

 inline uchar *Blob::ptr(int n, int cn, int row, int col)
 {
-    Mat &mat = matRef();
+    Mat &mat = matRef(false);
    return mat.ptr() + mat.elemSize() * offset(n, cn, row, col);
 }


--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
--- a/modules/dnn/src/layers/convolution_layer.hpp
+++ b/modules/dnn/src/layers/convolution_layer.hpp
@@ -63,18 +63,23 @@ namespace dnn
        int inpGroupCn, outGroupCn;
        int ksize;

-        bool useOpenCL;
+        bool tryUseOpenCL, useOpenCL;
+
+        Blob colBlob, biasOnesBlob;
        Mat colMat, biasOnesMat;

        inline bool is1x1() const;
        virtual void computeInpOutShape(const Blob &inpBlob);
-        void im2col(Blob &inpBlob, int imNum, int cnGroup);
+        void im2col(Blob &inpBlob, int imNum, int cnGroup, Blob &colBlob);

    public:
        ConvolutionLayer() {}
        ConvolutionLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+
+        template<typename XMat>
+        void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };

    class DeConvolutionLayer : public ConvolutionLayer

--- a/modules/dnn/src/layers/op_blas.cpp
+++ b/modules/dnn/src/layers/op_blas.cpp
@@ -11,9 +11,15 @@ namespace cv
 namespace dnn
 {

-void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags /*= 0*/)
+void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags)
 {
-    cv::gemm(A, B, alpha, C, beta, C, flags);
+    if (C.isMat())
+        gemmCPU(A.getMat(), B.getMat(), alpha, C.getMatRef(), beta, flags);
+    else
+    {
+        cv::gemm(A, B, alpha, C, beta, C, flags);
+        std::cout << "OCL gemm\n";
+    }
 }

 inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
@@ -35,10 +41,9 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int
    SwapRowCols(B, Brows, Bcols, transB);
    SwapRowCols(C, Crows, Ccols, transC);

-    CV_DbgAssert(!(flags & GEMM_3_T));
+    CV_Assert(!(flags & GEMM_3_T));
    CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
    CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous());
-    CV_Assert(A.type() == CV_32F || A.type() == CV_64F);
    CV_Assert(A.type() == B.type() && B.type() == C.type());
    CV_Assert(A.data != C.data && B.data != C.data);

@@ -59,6 +64,10 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int
                    B.ptr<double>(), B.cols,
                    beta, C.ptr<double>(), C.cols);
    }
+    else
+    {
+        CV_Error(Error::BadDepth, "Only floating point types are supported");
+    }
    #else
    cv::gemm(A, B, alpha, C, beta, C, flags);
    #endif
@@ -70,7 +79,6 @@ int getBlasThreads()
    return openblas_get_num_threads();
    #else
    return 1;
-
    #endif
 }

@@ -81,7 +89,6 @@ void setBlasThreads(int numThreads)
    goto_set_num_threads(numThreads);
    #else
    (void)numThreads;   //suppress compilers' warning
-    numThreads = 0;
    #endif
 }


--- a/modules/dnn/test/test_googlenet.cpp
+++ b/modules/dnn/test/test_googlenet.cpp
@@ -42,6 +42,7 @@
 #if defined(ENABLE_CAFFE_MODEL_TESTS)
 #include "test_precomp.hpp"
 #include "npy_blob.hpp"
+#include <opencv2/core/ocl.hpp>

 namespace cvtest
 {