Commit e713af1a authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

Adding of OCL activations

parent e33da71a
...@@ -275,6 +275,48 @@ namespace dnn ...@@ -275,6 +275,48 @@ namespace dnn
static Ptr<InnerProductLayer> create(int axis = 1); static Ptr<InnerProductLayer> create(int axis = 1);
}; };
/* Activations */
class ReLULayer : public Layer
{
public:
double negativeSlope;
static Ptr<ReLULayer> create(double negativeSlope = 0);
};
class TanHLayer : public Layer
{
public:
static Ptr<TanHLayer> create();
};
class SigmoidLayer : public Layer
{
public:
static Ptr<SigmoidLayer> create();
};
class BNLLLayer : public Layer
{
public:
static Ptr<BNLLLayer> create();
};
class AbsLayer : public Layer
{
public:
static Ptr<AbsLayer> create();
};
class PowerLayer : public Layer
{
public:
double power, scale, shift;
static Ptr<PowerLayer> create(double power = 1, double scale = 1, double shift = 0);
};
//! @} //! @}
//! @} //! @}
......
...@@ -86,12 +86,12 @@ void initModule() ...@@ -86,12 +86,12 @@ void initModule()
REG_RUNTIME_LAYER_FUNC(LRN, createLRNLayerFromCaffe) REG_RUNTIME_LAYER_FUNC(LRN, createLRNLayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(InnerProduct, createInnerProductLayerFromCaffe) REG_RUNTIME_LAYER_FUNC(InnerProduct, createInnerProductLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>) REG_RUNTIME_LAYER_FUNC(ReLU, createReLULayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>) REG_RUNTIME_LAYER_FUNC(Sigmoid, createSigmoidLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(BNLL, ElementWiseLayer<BNLLFunctor>) REG_RUNTIME_LAYER_FUNC(TanH, createTanHLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(Power, ElementWiseLayer<PowerFunctor>) REG_RUNTIME_LAYER_FUNC(BNLL, createBNLLLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(AbsVal, ElementWiseLayer<AbsValFunctor>) REG_RUNTIME_LAYER_FUNC(AbsVal, createAbsLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(Sigmoid, ElementWiseLayer<SigmoidFunctor>) REG_RUNTIME_LAYER_FUNC(Power, createPowerLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer) REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
REG_RUNTIME_LAYER_FUNC(Convolution, createConvolutionLayerFromCaffe) REG_RUNTIME_LAYER_FUNC(Convolution, createConvolutionLayerFromCaffe)
......
#include "../precomp.hpp"
#include "elementwise_layers.hpp"
namespace cv
{
namespace dnn
{
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
Ptr<_Layer> _Layer::create() { \
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
Ptr<ReLULayer> ReLULayer::create(double negativeSlope)
{
return Ptr<ReLULayer>(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
}
Ptr<TanHLayer> TanHLayer::create()
{
return Ptr<TanHLayer>(new ElementWiseLayer<TanHFunctor>());
}
Ptr<SigmoidLayer> SigmoidLayer::create()
{
return Ptr<SigmoidLayer>(new ElementWiseLayer<SigmoidFunctor>());
}
Ptr<AbsLayer> AbsLayer::create()
{
return Ptr<AbsLayer>(new ElementWiseLayer<AbsValFunctor>());
}
Ptr<BNLLLayer> BNLLLayer::create()
{
return Ptr<BNLLLayer>(new ElementWiseLayer<BNLLFunctor>());
}
Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, double shift /*= 0*/)
{
const PowerFunctor f(power, scale, shift);
return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
}
Ptr<Layer> createReLULayerFromCaffe(LayerParams &params)
{
float negative_slope;
if (params.has("negative_slope"))
negative_slope = params.get<float>("negative_slope");
else
negative_slope = 0.f;
return Ptr<Layer>(ReLULayer::create(negative_slope));
}
Ptr<Layer> createSigmoidLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(SigmoidLayer::create());
}
Ptr<Layer> createTanHLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(TanHLayer::create());
}
Ptr<Layer> createAbsLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(AbsLayer::create());
}
Ptr<Layer> createBNLLLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(BNLLLayer::create());
}
Ptr<Layer> createPowerLayerFromCaffe(LayerParams &params)
{
float power = params.get<float>("power", 1.0f);
float scale = params.get<float>("scale", 1.0f);
float shift = params.get<float>("shift", 0.0f);
return Ptr<Layer>(PowerLayer::create(power, scale, shift));
}
}
}
\ No newline at end of file
...@@ -44,6 +44,11 @@ ...@@ -44,6 +44,11 @@
#include "../precomp.hpp" #include "../precomp.hpp"
#include "layers_common.hpp" #include "layers_common.hpp"
#include <cmath> #include <cmath>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/core/ocl.hpp>
#ifdef HAVE_OPENCL
#include "modules/dnn/opencl_kernels_dnn.hpp"
#endif
namespace cv namespace cv
{ {
...@@ -56,8 +61,9 @@ using std::tanh; ...@@ -56,8 +61,9 @@ using std::tanh;
using std::pow; using std::pow;
template<typename Func> template<typename Func>
class ElementWiseLayer : public Layer class ElementWiseLayer : public Func::Layer
{ {
bool useOpenCL;
Func func; Func func;
template<typename Dtype> template<typename Dtype>
...@@ -67,8 +73,8 @@ class ElementWiseLayer : public Layer ...@@ -67,8 +73,8 @@ class ElementWiseLayer : public Layer
Dtype *data; Dtype *data;
public: public:
PBody(Blob &blob, Func &func_) : PBody(Mat &mat, Func &func_) :
func(func_), data(blob.ptr<Dtype>()) func(func_), data(mat.ptr<Dtype>())
{} {}
void operator()(const Range &r) const void operator()(const Range &r) const
...@@ -80,35 +86,75 @@ class ElementWiseLayer : public Layer ...@@ -80,35 +86,75 @@ class ElementWiseLayer : public Layer
public: public:
ElementWiseLayer(LayerParams &_params) : func(_params) {} ElementWiseLayer() {}
ElementWiseLayer(const Func &f) : func(f) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs) void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{ {
useOpenCL = ocl::useOpenCL();
outputs.resize(inputs.size()); outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
outputs[i].shareFrom(*inputs[i]); //no data copy outputs[i].shareFrom(*inputs[i]); //no data copy
//hotfix: shareFrom doesn't provide properly Mat/UMat switching //hotfix: shareFrom doesn't provide properly Mat/UMat switching
if (!useOpenCL)
outputs[i].matRef() = inputs[i]->matRefConst(); outputs[i].matRef() = inputs[i]->matRefConst();
else
outputs[i].umatRef() = inputs[i]->umatRefConst();
} }
} }
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs) void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{ {
#ifdef HAVE_OPENCL
if (useOpenCL)
forwardOCL(inputs, outputs);
else
#endif
forwardCPU(inputs, outputs);
}
#ifdef HAVE_OPENCL
void forwardOCL(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
for (size_t i = 0; i < inputs.size(); i++) for (size_t i = 0; i < inputs.size(); i++)
{ {
CV_Assert(inputs[i]->ptr() == outputs[i].ptr() && inputs[i]->type() == outputs[i].type()); const UMat &src = inputs[i]->umatRefConst();
CV_Assert(inputs[i]->matRefConst().isContinuous()); UMat &dst = outputs[i].umatRef();
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker;
CV_Assert(func.initKernel(ker, src));
ker.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, true));
}
}
#endif
Range sizeRange = Range(0, outputs[i].total()); void forwardCPU(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t i = 0; i < inputs.size(); i++)
{
const Mat &src = inputs[i]->matRefConst();
Mat &dst = outputs[i].matRef();
CV_Assert(src.ptr() == dst.ptr() && src.isContinuous());
if (outputs[i].type() == CV_32F) Range sizeRange = Range(0, dst.total());
if (dst.type() == CV_32F)
{ {
cv::parallel_for_(sizeRange, PBody<float>(outputs[i], func)); cv::parallel_for_(sizeRange, PBody<float>(dst, func));
} }
else if (outputs[i].type() == CV_64F) else if (dst.type() == CV_64F)
{ {
cv::parallel_for_(sizeRange, PBody<double>(outputs[i], func)); cv::parallel_for_(sizeRange, PBody<double>(dst, func));
} }
else else
{ {
...@@ -118,88 +164,170 @@ public: ...@@ -118,88 +164,170 @@ public:
} }
}; };
static String oclGetTMacro(const UMat &m)
{
return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
}
struct ReLUFunctor struct ReLUFunctor
{ {
float negative_slope; typedef ReLULayer Layer;
ReLUFunctor(LayerParams &params) double slope;
{
if (params.has("negative_slope")) ReLUFunctor(double slope_)
negative_slope = params.get<float>("negative_slope"); : slope(slope_) {}
else
negative_slope = 0.f;
}
template<typename TFloat> template<typename TFloat>
inline TFloat operator()(TFloat x) const inline TFloat operator()(TFloat x) const
{ {
return (x >= (TFloat)0) ? x : negative_slope * x; return (x >= (TFloat)0) ? x : (TFloat)slope * x;
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
String buildopt = oclGetTMacro(src) + buildoptSlope;
if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
return false;
if (slope != 0)
ker.set(3, (float)slope);
return true;
} }
#endif
}; };
struct TanHFunctor struct TanHFunctor
{ {
TanHFunctor(LayerParams&) {} typedef TanHLayer Layer;
template<typename TFloat> template<typename TFloat>
inline TFloat operator()(TFloat x) const inline TFloat operator()(TFloat x) const
{ {
return tanh(x); return tanh(x);
} }
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
}; };
struct SigmoidFunctor struct SigmoidFunctor
{ {
SigmoidFunctor(LayerParams&) {} typedef SigmoidLayer Layer;
template<typename TFloat> template<typename TFloat>
inline TFloat operator()(TFloat x) const inline TFloat operator()(TFloat x) const
{ {
return (TFloat)1 / ((TFloat)1 + exp(-x)); return (TFloat)1 / ((TFloat)1 + exp(-x));
} }
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
}; };
struct AbsValFunctor struct AbsValFunctor
{ {
AbsValFunctor(LayerParams&) {} typedef AbsLayer Layer;
template<typename TFloat> template<typename TFloat>
inline TFloat operator()(TFloat x) const inline TFloat operator()(TFloat x) const
{ {
return abs(x); return abs(x);
} }
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
}; };
struct PowerFunctor struct BNLLFunctor
{ {
float power, scale, shift; typedef BNLLLayer Layer;
PowerFunctor(LayerParams &params) template<typename TFloat>
inline TFloat operator()(TFloat x) const
{ {
power = params.get<float>("power", 1.0f); return log((TFloat)1 + exp(-abs(x)));
scale = params.get<float>("scale", 1.0f);
shift = params.get<float>("shift", 0.0f);
} }
template<typename TFloat> #ifdef HAVE_OPENCL
inline TFloat operator()(TFloat x) const bool initKernel(ocl::Kernel &ker, const UMat &src) const
{ {
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power); if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
} }
#endif
}; };
struct BNLLFunctor struct PowerFunctor
{ {
BNLLFunctor(LayerParams&) {} typedef PowerLayer Layer;
double power, scale, shift;
PowerFunctor(double power_, double scale_ = 1, double shift_ = 0)
: power(power_), scale(scale_), shift(shift_) {}
template<typename TFloat> template<typename TFloat>
inline TFloat operator()(TFloat x) const inline TFloat operator()(TFloat x) const
{ {
return log((TFloat)1 + exp(-abs(x))); return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
ker.set(3, (float)power);
ker.set(4, (float)scale);
ker.set(5, (float)shift);
return true;
} }
#endif
}; };
template <typename ActivationLayer>
Ptr<Layer> createLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ActivationLayer::create());
}
Ptr<Layer> createReLULayerFromCaffe(LayerParams &params);
Ptr<Layer> createSigmoidLayerFromCaffe(LayerParams&);
Ptr<Layer> createTanHLayerFromCaffe(LayerParams&);
Ptr<Layer> createAbsLayerFromCaffe(LayerParams&);
Ptr<Layer> createBNLLLayerFromCaffe(LayerParams&);
Ptr<Layer> createPowerLayerFromCaffe(LayerParams &params);
} }
} }
#endif #endif
...@@ -99,7 +99,7 @@ template<typename XMat> ...@@ -99,7 +99,7 @@ template<typename XMat>
void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output) void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output)
{ {
const XMat &weight = blobs[0].getRefConst<XMat>(); const XMat &weight = blobs[0].getRefConst<XMat>();
const XMat *biasMat, *biasOnesMat; const XMat *biasMat = NULL, *biasOnesMat = NULL;
if (bias) if (bias)
{ {
biasOnesMat = &biasOnesBlob.getRefConst<XMat>(); biasOnesMat = &biasOnesBlob.getRefConst<XMat>();
......
...@@ -190,6 +190,8 @@ bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst) ...@@ -190,6 +190,8 @@ bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)
return true; return true;
#else #else
(void)src;
(void)dst;
return false; return false;
#endif #endif
} }
......
__kernel void ReLUForward(const int count, __global const T* in, __global T* out
#ifndef RELU_NO_SLOPE
, T negative_slope
#endif
) {
int index = get_global_id(0);
if(index < count)
#ifndef RELU_NO_SLOPE
out[index] = in[index] > 0 ? in[index] : in[index] * negative_slope;
#else
out[index] = in[index] > 0 ? in[index] : 0;
#endif
}
__kernel void TanHForward(const int count, __global T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = tanh(in[index]);
}
__kernel void SigmoidForward(const int count, __global const T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = 1. / (1. + exp(-in[index]));
}
__kernel void BNLLForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n) {
out[index] = in[index] > 0 ? in[index] + log(1. + exp(-in[index])) : log(1. + exp(in[index]));
}
}
__kernel void AbsValForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n)
out[index] = abs(in[index]);
}
__kernel void PowForward(const int n, __global const T* in, __global T* out, const T power, const T scale, const T shift) {
int index = get_global_id(0);
if (index < n)
out[index] = pow(shift + scale * in[index], power);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment