Commit b70a9dc1 authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

Adding of templated GPU/CPU implementation of Convolution layer

parent b26896c0
...@@ -297,6 +297,7 @@ namespace dnn ...@@ -297,6 +297,7 @@ namespace dnn
mutable uchar state; mutable uchar state;
#endif #endif
public:
enum DataState enum DataState
{ {
UNINITIALIZED, UNINITIALIZED,
......
...@@ -346,12 +346,12 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const ...@@ -346,12 +346,12 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const
inline float *Blob::ptrf(int n, int cn, int row, int col) inline float *Blob::ptrf(int n, int cn, int row, int col)
{ {
return matRef().ptr<float>() + offset(n, cn, row, col); return matRef(false).ptr<float>() + offset(n, cn, row, col);
} }
inline uchar *Blob::ptr(int n, int cn, int row, int col) inline uchar *Blob::ptr(int n, int cn, int row, int col)
{ {
Mat &mat = matRef(); Mat &mat = matRef(false);
return mat.ptr() + mat.elemSize() * offset(n, cn, row, col); return mat.ptr() + mat.elemSize() * offset(n, cn, row, col);
} }
......
...@@ -63,18 +63,23 @@ namespace dnn ...@@ -63,18 +63,23 @@ namespace dnn
int inpGroupCn, outGroupCn; int inpGroupCn, outGroupCn;
int ksize; int ksize;
bool useOpenCL; bool tryUseOpenCL, useOpenCL;
Blob colBlob, biasOnesBlob;
Mat colMat, biasOnesMat; Mat colMat, biasOnesMat;
inline bool is1x1() const; inline bool is1x1() const;
virtual void computeInpOutShape(const Blob &inpBlob); virtual void computeInpOutShape(const Blob &inpBlob);
void im2col(Blob &inpBlob, int imNum, int cnGroup); void im2col(Blob &inpBlob, int imNum, int cnGroup, Blob &colBlob);
public: public:
ConvolutionLayer() {} ConvolutionLayer() {}
ConvolutionLayer(LayerParams &params); ConvolutionLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs); void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
}; };
class DeConvolutionLayer : public ConvolutionLayer class DeConvolutionLayer : public ConvolutionLayer
......
...@@ -11,9 +11,15 @@ namespace cv ...@@ -11,9 +11,15 @@ namespace cv
namespace dnn namespace dnn
{ {
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags /*= 0*/) void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags)
{ {
cv::gemm(A, B, alpha, C, beta, C, flags); if (C.isMat())
gemmCPU(A.getMat(), B.getMat(), alpha, C.getMatRef(), beta, flags);
else
{
cv::gemm(A, B, alpha, C, beta, C, flags);
std::cout << "OCL gemm\n";
}
} }
inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans) inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
...@@ -35,10 +41,9 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int ...@@ -35,10 +41,9 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int
SwapRowCols(B, Brows, Bcols, transB); SwapRowCols(B, Brows, Bcols, transB);
SwapRowCols(C, Crows, Ccols, transC); SwapRowCols(C, Crows, Ccols, transC);
CV_DbgAssert(!(flags & GEMM_3_T)); CV_Assert(!(flags & GEMM_3_T));
CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols); CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous()); CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous());
CV_Assert(A.type() == CV_32F || A.type() == CV_64F);
CV_Assert(A.type() == B.type() && B.type() == C.type()); CV_Assert(A.type() == B.type() && B.type() == C.type());
CV_Assert(A.data != C.data && B.data != C.data); CV_Assert(A.data != C.data && B.data != C.data);
...@@ -59,6 +64,10 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int ...@@ -59,6 +64,10 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int
B.ptr<double>(), B.cols, B.ptr<double>(), B.cols,
beta, C.ptr<double>(), C.cols); beta, C.ptr<double>(), C.cols);
} }
else
{
CV_Error(Error::BadDepth, "Only floating point types are supported");
}
#else #else
cv::gemm(A, B, alpha, C, beta, C, flags); cv::gemm(A, B, alpha, C, beta, C, flags);
#endif #endif
...@@ -70,7 +79,6 @@ int getBlasThreads() ...@@ -70,7 +79,6 @@ int getBlasThreads()
return openblas_get_num_threads(); return openblas_get_num_threads();
#else #else
return 1; return 1;
#endif #endif
} }
...@@ -81,7 +89,6 @@ void setBlasThreads(int numThreads) ...@@ -81,7 +89,6 @@ void setBlasThreads(int numThreads)
goto_set_num_threads(numThreads); goto_set_num_threads(numThreads);
#else #else
(void)numThreads; //suppress compilers' warning (void)numThreads; //suppress compilers' warning
numThreads = 0;
#endif #endif
} }
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#if defined(ENABLE_CAFFE_MODEL_TESTS) #if defined(ENABLE_CAFFE_MODEL_TESTS)
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include "npy_blob.hpp" #include "npy_blob.hpp"
#include <opencv2/core/ocl.hpp>
namespace cvtest namespace cvtest
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment