Commit e713af1a authored by Vitaliy Lyudvichenko's avatar Vitaliy Lyudvichenko

Adding of OCL activations

parent e33da71a
......@@ -275,6 +275,48 @@ namespace dnn
static Ptr<InnerProductLayer> create(int axis = 1);
};
/* Activations */
class ReLULayer : public Layer
{
public:
double negativeSlope;
static Ptr<ReLULayer> create(double negativeSlope = 0);
};
class TanHLayer : public Layer
{
public:
static Ptr<TanHLayer> create();
};
class SigmoidLayer : public Layer
{
public:
static Ptr<SigmoidLayer> create();
};
class BNLLLayer : public Layer
{
public:
static Ptr<BNLLLayer> create();
};
class AbsLayer : public Layer
{
public:
static Ptr<AbsLayer> create();
};
class PowerLayer : public Layer
{
public:
double power, scale, shift;
static Ptr<PowerLayer> create(double power = 1, double scale = 1, double shift = 0);
};
//! @}
//! @}
......
......@@ -86,12 +86,12 @@ void initModule()
REG_RUNTIME_LAYER_FUNC(LRN, createLRNLayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(InnerProduct, createInnerProductLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>)
REG_RUNTIME_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>)
REG_RUNTIME_LAYER_CLASS(BNLL, ElementWiseLayer<BNLLFunctor>)
REG_RUNTIME_LAYER_CLASS(Power, ElementWiseLayer<PowerFunctor>)
REG_RUNTIME_LAYER_CLASS(AbsVal, ElementWiseLayer<AbsValFunctor>)
REG_RUNTIME_LAYER_CLASS(Sigmoid, ElementWiseLayer<SigmoidFunctor>)
REG_RUNTIME_LAYER_FUNC(ReLU, createReLULayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(Sigmoid, createSigmoidLayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(TanH, createTanHLayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(BNLL, createBNLLLayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(AbsVal, createAbsLayerFromCaffe)
REG_RUNTIME_LAYER_FUNC(Power, createPowerLayerFromCaffe)
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
REG_RUNTIME_LAYER_FUNC(Convolution, createConvolutionLayerFromCaffe)
......
#include "../precomp.hpp"
#include "elementwise_layers.hpp"
namespace cv
{
namespace dnn
{
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
Ptr<_Layer> _Layer::create() { \
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
Ptr<ReLULayer> ReLULayer::create(double negativeSlope)
{
return Ptr<ReLULayer>(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
}
Ptr<TanHLayer> TanHLayer::create()
{
return Ptr<TanHLayer>(new ElementWiseLayer<TanHFunctor>());
}
Ptr<SigmoidLayer> SigmoidLayer::create()
{
return Ptr<SigmoidLayer>(new ElementWiseLayer<SigmoidFunctor>());
}
Ptr<AbsLayer> AbsLayer::create()
{
return Ptr<AbsLayer>(new ElementWiseLayer<AbsValFunctor>());
}
Ptr<BNLLLayer> BNLLLayer::create()
{
return Ptr<BNLLLayer>(new ElementWiseLayer<BNLLFunctor>());
}
Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, double shift /*= 0*/)
{
const PowerFunctor f(power, scale, shift);
return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
}
Ptr<Layer> createReLULayerFromCaffe(LayerParams &params)
{
float negative_slope;
if (params.has("negative_slope"))
negative_slope = params.get<float>("negative_slope");
else
negative_slope = 0.f;
return Ptr<Layer>(ReLULayer::create(negative_slope));
}
Ptr<Layer> createSigmoidLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(SigmoidLayer::create());
}
Ptr<Layer> createTanHLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(TanHLayer::create());
}
Ptr<Layer> createAbsLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(AbsLayer::create());
}
Ptr<Layer> createBNLLLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(BNLLLayer::create());
}
Ptr<Layer> createPowerLayerFromCaffe(LayerParams &params)
{
float power = params.get<float>("power", 1.0f);
float scale = params.get<float>("scale", 1.0f);
float shift = params.get<float>("shift", 0.0f);
return Ptr<Layer>(PowerLayer::create(power, scale, shift));
}
}
}
\ No newline at end of file
......@@ -44,6 +44,11 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include <cmath>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/core/ocl.hpp>
#ifdef HAVE_OPENCL
#include "modules/dnn/opencl_kernels_dnn.hpp"
#endif
namespace cv
{
......@@ -56,8 +61,9 @@ using std::tanh;
using std::pow;
template<typename Func>
class ElementWiseLayer : public Layer
class ElementWiseLayer : public Func::Layer
{
bool useOpenCL;
Func func;
template<typename Dtype>
......@@ -67,8 +73,8 @@ class ElementWiseLayer : public Layer
Dtype *data;
public:
PBody(Blob &blob, Func &func_) :
func(func_), data(blob.ptr<Dtype>())
PBody(Mat &mat, Func &func_) :
func(func_), data(mat.ptr<Dtype>())
{}
void operator()(const Range &r) const
......@@ -80,35 +86,75 @@ class ElementWiseLayer : public Layer
public:
ElementWiseLayer(LayerParams &_params) : func(_params) {}
ElementWiseLayer() {}
ElementWiseLayer(const Func &f) : func(f) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
useOpenCL = ocl::useOpenCL();
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].shareFrom(*inputs[i]); //no data copy
//hotfix: shareFrom doesn't provide properly Mat/UMat switching
outputs[i].matRef() = inputs[i]->matRefConst();
if (!useOpenCL)
outputs[i].matRef() = inputs[i]->matRefConst();
else
outputs[i].umatRef() = inputs[i]->umatRefConst();
}
}
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forwardOCL(inputs, outputs);
else
#endif
forwardCPU(inputs, outputs);
}
#ifdef HAVE_OPENCL
void forwardOCL(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->ptr() == outputs[i].ptr() && inputs[i]->type() == outputs[i].type());
CV_Assert(inputs[i]->matRefConst().isContinuous());
const UMat &src = inputs[i]->umatRefConst();
UMat &dst = outputs[i].umatRef();
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker;
CV_Assert(func.initKernel(ker, src));
ker.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, true));
}
}
#endif
Range sizeRange = Range(0, outputs[i].total());
void forwardCPU(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t i = 0; i < inputs.size(); i++)
{
const Mat &src = inputs[i]->matRefConst();
Mat &dst = outputs[i].matRef();
CV_Assert(src.ptr() == dst.ptr() && src.isContinuous());
if (outputs[i].type() == CV_32F)
Range sizeRange = Range(0, dst.total());
if (dst.type() == CV_32F)
{
cv::parallel_for_(sizeRange, PBody<float>(outputs[i], func));
cv::parallel_for_(sizeRange, PBody<float>(dst, func));
}
else if (outputs[i].type() == CV_64F)
else if (dst.type() == CV_64F)
{
cv::parallel_for_(sizeRange, PBody<double>(outputs[i], func));
cv::parallel_for_(sizeRange, PBody<double>(dst, func));
}
else
{
......@@ -118,88 +164,170 @@ public:
}
};
static String oclGetTMacro(const UMat &m)
{
return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
}
struct ReLUFunctor
{
float negative_slope;
typedef ReLULayer Layer;
ReLUFunctor(LayerParams &params)
{
if (params.has("negative_slope"))
negative_slope = params.get<float>("negative_slope");
else
negative_slope = 0.f;
}
double slope;
ReLUFunctor(double slope_)
: slope(slope_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return (x >= (TFloat)0) ? x : negative_slope * x;
return (x >= (TFloat)0) ? x : (TFloat)slope * x;
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
String buildopt = oclGetTMacro(src) + buildoptSlope;
if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
return false;
if (slope != 0)
ker.set(3, (float)slope);
return true;
}
#endif
};
struct TanHFunctor
{
TanHFunctor(LayerParams&) {}
typedef TanHLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return tanh(x);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct SigmoidFunctor
{
SigmoidFunctor(LayerParams&) {}
typedef SigmoidLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return (TFloat)1 / ((TFloat)1 + exp(-x));
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct AbsValFunctor
{
AbsValFunctor(LayerParams&) {}
typedef AbsLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return abs(x);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct PowerFunctor
struct BNLLFunctor
{
float power, scale, shift;
typedef BNLLLayer Layer;
PowerFunctor(LayerParams &params)
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
power = params.get<float>("power", 1.0f);
scale = params.get<float>("scale", 1.0f);
shift = params.get<float>("shift", 0.0f);
return log((TFloat)1 + exp(-abs(x)));
}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct BNLLFunctor
struct PowerFunctor
{
BNLLFunctor(LayerParams&) {}
typedef PowerLayer Layer;
double power, scale, shift;
PowerFunctor(double power_, double scale_ = 1, double shift_ = 0)
: power(power_), scale(scale_), shift(shift_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return log((TFloat)1 + exp(-abs(x)));
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
ker.set(3, (float)power);
ker.set(4, (float)scale);
ker.set(5, (float)shift);
return true;
}
#endif
};
template <typename ActivationLayer>
Ptr<Layer> createLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ActivationLayer::create());
}
Ptr<Layer> createReLULayerFromCaffe(LayerParams &params);
Ptr<Layer> createSigmoidLayerFromCaffe(LayerParams&);
Ptr<Layer> createTanHLayerFromCaffe(LayerParams&);
Ptr<Layer> createAbsLayerFromCaffe(LayerParams&);
Ptr<Layer> createBNLLLayerFromCaffe(LayerParams&);
Ptr<Layer> createPowerLayerFromCaffe(LayerParams &params);
}
}
#endif
......@@ -99,7 +99,7 @@ template<typename XMat>
void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output)
{
const XMat &weight = blobs[0].getRefConst<XMat>();
const XMat *biasMat, *biasOnesMat;
const XMat *biasMat = NULL, *biasOnesMat = NULL;
if (bias)
{
biasOnesMat = &biasOnesBlob.getRefConst<XMat>();
......
......@@ -190,6 +190,8 @@ bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)
return true;
#else
(void)src;
(void)dst;
return false;
#endif
}
......
__kernel void ReLUForward(const int count, __global const T* in, __global T* out
#ifndef RELU_NO_SLOPE
, T negative_slope
#endif
) {
int index = get_global_id(0);
if(index < count)
#ifndef RELU_NO_SLOPE
out[index] = in[index] > 0 ? in[index] : in[index] * negative_slope;
#else
out[index] = in[index] > 0 ? in[index] : 0;
#endif
}
__kernel void TanHForward(const int count, __global T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = tanh(in[index]);
}
__kernel void SigmoidForward(const int count, __global const T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = 1. / (1. + exp(-in[index]));
}
__kernel void BNLLForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n) {
out[index] = in[index] > 0 ? in[index] + log(1. + exp(-in[index])) : log(1. + exp(in[index]));
}
}
__kernel void AbsValForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n)
out[index] = abs(in[index]);
}
__kernel void PowForward(const int n, __global const T* in, __global T* out, const T power, const T scale, const T shift) {
int index = get_global_id(0);
if (index < n)
out[index] = pow(shift + scale * in[index], power);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment