Commit 8c2563be authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #10672 from pengli:dnn

parents c401168b 24930839
...@@ -1190,7 +1190,8 @@ struct Net::Impl ...@@ -1190,7 +1190,8 @@ struct Net::Impl
// TODO: OpenCL target support more fusion styles. // TODO: OpenCL target support more fusion styles.
if ( preferableTarget == DNN_TARGET_OPENCL && if ( preferableTarget == DNN_TARGET_OPENCL &&
(!cv::ocl::useOpenCL() || ld.layerInstance->type.compare("Convolution")) ) (!cv::ocl::useOpenCL() || (ld.layerInstance->type != "Convolution" &&
ld.layerInstance->type != "MVN")) )
continue; continue;
Ptr<Layer>& currLayer = ld.layerInstance; Ptr<Layer>& currLayer = ld.layerInstance;
...@@ -1239,13 +1240,14 @@ struct Net::Impl ...@@ -1239,13 +1240,14 @@ struct Net::Impl
} }
} }
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU/Power // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
if ( preferableTarget != DNN_TARGET_OPENCL || if ( preferableTarget != DNN_TARGET_OPENCL ||
(preferableTarget == DNN_TARGET_OPENCL && (preferableTarget == DNN_TARGET_OPENCL &&
nextData && nextData &&
(!nextData->type.compare("ReLU") || ((nextData->type == "ReLU") ||
!nextData->type.compare("ChannelsPReLU") || (nextData->type == "ChannelsPReLU") ||
!nextData->type.compare("Power"))) ) (nextData->type == "TanH") ||
(nextData->type == "Power"))) )
{ {
Ptr<ActivationLayer> nextActivLayer; Ptr<ActivationLayer> nextActivLayer;
......
...@@ -81,9 +81,6 @@ public: ...@@ -81,9 +81,6 @@ public:
dstWeightsData[i] = w; dstWeightsData[i] = w;
dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale;
} }
umat_weight = weights_.getUMat(ACCESS_READ);
umat_bias = bias_.getUMat(ACCESS_READ);
} }
void getScaleShift(Mat& scale, Mat& shift) const void getScaleShift(Mat& scale, Mat& shift) const
...@@ -119,6 +116,12 @@ public: ...@@ -119,6 +116,12 @@ public:
CV_Assert(blobs.size() >= 2); CV_Assert(blobs.size() >= 2);
CV_Assert(inputs.size() == 1); CV_Assert(inputs.size() == 1);
if (umat_weight.empty())
{
umat_weight = weights_.getUMat(ACCESS_READ);
umat_bias = bias_.getUMat(ACCESS_READ);
}
UMat &inpBlob = inputs[0]; UMat &inpBlob = inputs[0];
CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4); CV_Assert(inpBlob.dims == 2 || inpBlob.dims == 4);
int groups = inpBlob.size[0]; int groups = inpBlob.size[0];
......
...@@ -246,6 +246,11 @@ public: ...@@ -246,6 +246,11 @@ public:
power = activ_power->power; power = activ_power->power;
activType = OCL4DNN_CONV_FUSED_ACTIV_POWER; activType = OCL4DNN_CONV_FUSED_ACTIV_POWER;
} }
Ptr<TanHLayer> activ_tanh = activ.dynamicCast<TanHLayer>();
if (!activ_tanh.empty())
{
activType = OCL4DNN_CONV_FUSED_ACTIV_TANH;
}
} }
#endif #endif
return !activ.empty(); return !activ.empty();
...@@ -877,11 +882,16 @@ public: ...@@ -877,11 +882,16 @@ public:
{ {
convolutionOp->setActivPower(true, power); convolutionOp->setActivPower(true, power);
} }
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_TANH)
{
convolutionOp->setActivTanh(true);
}
else else
{ {
convolutionOp->setActivReLU(false, 0); convolutionOp->setActivReLU(false, 0);
convolutionOp->setActivPReLU(false, reluslope); convolutionOp->setActivPReLU(false, reluslope);
convolutionOp->setActivPower(false, 1.f); convolutionOp->setActivPower(false, 1.f);
convolutionOp->setActivTanh(false);
} }
newActiv = false; newActiv = false;
} }
......
...@@ -60,6 +60,36 @@ public: ...@@ -60,6 +60,36 @@ public:
normVariance = params.get<bool>("normalize_variance", true); normVariance = params.get<bool>("normalize_variance", true);
acrossChannels = params.get<bool>("across_channels", false); acrossChannels = params.get<bool>("across_channels", false);
eps = params.get<double>("eps", 1e-9); eps = params.get<double>("eps", 1e-9);
fuse_batch_norm = false;
fuse_relu = false;
relu_slope = 0.f;
}
Ptr<BatchNormLayer> bnorm;
Mat scale, shift;
UMat bnorm_weight, bnorm_bias;
bool fuse_batch_norm;
bool setBatchNorm(const Ptr<BatchNormLayer>& layer )
{
bnorm = layer;
fuse_batch_norm = !bnorm.empty() && (preferableTarget == DNN_TARGET_OPENCL);
return fuse_batch_norm;
}
Ptr<ReLULayer> activ_relu;
float relu_slope;
bool fuse_relu;
bool setActivation(const Ptr<ActivationLayer>& layer)
{
if (!layer.empty() && preferableTarget == DNN_TARGET_OPENCL)
{
activ_relu = layer.dynamicCast<ReLULayer>();
if( !activ_relu.empty() )
relu_slope = activ_relu->negativeSlope;
}
fuse_relu = !activ_relu.empty();
return fuse_relu;
} }
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
...@@ -71,19 +101,24 @@ public: ...@@ -71,19 +101,24 @@ public:
inputs_.getUMatVector(inputs); inputs_.getUMatVector(inputs);
outputs_.getUMatVector(outputs); outputs_.getUMatVector(outputs);
if( fuse_batch_norm && scale.empty())
{
bnorm->getScaleShift(scale, shift);
bnorm_weight = scale.getUMat(ACCESS_READ);
bnorm_bias = shift.getUMat(ACCESS_READ);
}
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{ {
UMat &inpBlob = inputs[inpIdx]; UMat &inpMat = inputs[inpIdx];
UMat &outBlob = outputs[inpIdx]; UMat &outMat = outputs[inpIdx];
int splitDim = (acrossChannels) ? 1 : 2; int splitDim = (acrossChannels) ? 1 : 2;
int i, newRows = 1; int i, newRows = 1;
for( i = 0; i < splitDim; i++ ) for( i = 0; i < splitDim; i++ )
newRows *= inpBlob.size[i]; newRows *= inpMat.size[i];
MatShape s = shape(newRows, inpBlob.total() / newRows); MatShape s = shape(newRows, inpMat.total() / newRows);
UMat& inpMat = inpBlob;
UMat& outMat = outBlob;
UMat oneMat = UMat::ones(s[1], 1, CV_32F); UMat oneMat = UMat::ones(s[1], 1, CV_32F);
UMat meanMat = UMat(s[0], 1, CV_32F); UMat meanMat = UMat(s[0], 1, CV_32F);
UMat devMat = UMat(s[0], 1, CV_32F); UMat devMat = UMat(s[0], 1, CV_32F);
...@@ -121,8 +156,9 @@ public: ...@@ -121,8 +156,9 @@ public:
} }
String kname = format("mvn%d", number); String kname = format("mvn%d", number);
if (normVariance) buildopt += format("%s %s %s ", (normVariance) ? "-DNORM_VARIANCE" : "",
buildopt += "-DNORM_VARIANCE"; (fuse_batch_norm) ? "-DFUSE_BATCH_NORM" : "",
(fuse_relu) ? "-DFUSE_RELU" : "");
ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt); ocl::Kernel kernel1(kname.c_str(), ocl::dnn::mvn_oclsrc, buildopt);
if (kernel1.empty()) if (kernel1.empty())
return false; return false;
...@@ -132,7 +168,11 @@ public: ...@@ -132,7 +168,11 @@ public:
kernel1.set(3, (float)eps); kernel1.set(3, (float)eps);
kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat)); kernel1.set(4, ocl::KernelArg::PtrReadOnly(meanMat));
kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat)); kernel1.set(5, ocl::KernelArg::PtrReadOnly(devMat));
kernel1.set(6, ocl::KernelArg::PtrWriteOnly(outMat)); kernel1.set(6, ocl::KernelArg::PtrReadOnly(bnorm_weight));
kernel1.set(7, ocl::KernelArg::PtrReadOnly(bnorm_bias));
kernel1.set(8, (int)inpMat.size[1]);
kernel1.set(9, (float)relu_slope);
kernel1.set(10, ocl::KernelArg::PtrWriteOnly(outMat));
ret = kernel1.run(2, global, NULL, false); ret = kernel1.run(2, global, NULL, false);
if (!ret) if (!ret)
return false; return false;
......
...@@ -77,7 +77,8 @@ typedef enum { ...@@ -77,7 +77,8 @@ typedef enum {
OCL4DNN_CONV_FUSED_ACTIV_NONE = 0, OCL4DNN_CONV_FUSED_ACTIV_NONE = 0,
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1, OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2, OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3 OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4
} ocl4dnnFusedActiv_t; } ocl4dnnFusedActiv_t;
template<typename Dtype> template<typename Dtype>
...@@ -94,6 +95,7 @@ class OCL4DNNConvSpatial ...@@ -94,6 +95,7 @@ class OCL4DNNConvSpatial
void setActivReLU(bool fuse_activ, float slope); void setActivReLU(bool fuse_activ, float slope);
void setActivPReLU(bool fuse_activ, std::vector<float> &slope); void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
void setActivPower(bool fuse_activ, float power); void setActivPower(bool fuse_activ, float power);
void setActivTanh(bool fuse_activ);
void setBias(bool bias_term); void setBias(bool bias_term);
private: private:
......
...@@ -159,6 +159,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ, ...@@ -159,6 +159,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
case OCL4DNN_CONV_FUSED_ACTIV_POWER: case OCL4DNN_CONV_FUSED_ACTIV_POWER:
addDef("FUSED_CONV_POWER", 1); addDef("FUSED_CONV_POWER", 1);
break; break;
case OCL4DNN_CONV_FUSED_ACTIV_TANH:
addDef("FUSED_CONV_TANH", 1);
break;
default: default:
; ;
} }
...@@ -415,6 +418,17 @@ void OCL4DNNConvSpatial<Dtype>::setActivPower(bool fuse_activ, float power) ...@@ -415,6 +418,17 @@ void OCL4DNNConvSpatial<Dtype>::setActivPower(bool fuse_activ, float power)
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
} }
template<typename Dtype>
void OCL4DNNConvSpatial<Dtype>::setActivTanh(bool fuse_activ)
{
if ( fuse_activ )
{
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_TANH;
}
else
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
}
template<typename Dtype> template<typename Dtype>
bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom, bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
const UMat& bottom2, const UMat& bottom2,
......
...@@ -55,6 +55,9 @@ ...@@ -55,6 +55,9 @@
#elif defined(FUSED_CONV_POWER) #elif defined(FUSED_CONV_POWER)
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power) #define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
#define NEGATIVE_SLOPE_ARG Dtype power, #define NEGATIVE_SLOPE_ARG Dtype power,
#elif defined(FUSED_CONV_TANH)
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
#define NEGATIVE_SLOPE_ARG
#else #else
#define ACTIVATION_RELU_FUNCTION(x, c) (x) #define ACTIVATION_RELU_FUNCTION(x, c) (x)
#define NEGATIVE_SLOPE_ARG #define NEGATIVE_SLOPE_ARG
......
...@@ -89,6 +89,10 @@ __kernel void MVN(__global const Dtype* src, ...@@ -89,6 +89,10 @@ __kernel void MVN(__global const Dtype* src,
const Dtype eps, const Dtype eps,
__global const Dtype* mean, __global const Dtype* mean,
__global const Dtype* dev, __global const Dtype* dev,
__global const Dtype* bnorm_weight,
__global const Dtype* bnorm_bias,
const int channels,
const float relu_slope,
__global Dtype* dst) __global Dtype* dst)
{ {
int x = get_global_id(0); int x = get_global_id(0);
...@@ -106,7 +110,21 @@ __kernel void MVN(__global const Dtype* src, ...@@ -106,7 +110,21 @@ __kernel void MVN(__global const Dtype* src,
#else #else
alpha = 1; alpha = 1;
#endif #endif
Dtype w = 1.f, b = 0.f;
#ifdef FUSE_BATCH_NORM
w = bnorm_weight[x % channels];
b = bnorm_bias[x % channels];
#endif
vec_type src_vec = load(src, index) - (vec_type)mean_val; vec_type src_vec = load(src, index) - (vec_type)mean_val;
vec_type dst_vec = src_vec * alpha; vec_type dst_vec = src_vec * alpha;
dst_vec = dst_vec * w + (vec_type)b;
#ifdef FUSE_RELU
vec_type new_val = dst_vec * relu_slope;
dst_vec = select(new_val, dst_vec, dst_vec > (vec_type)0.f);
#endif
store(dst_vec, dst, index); store(dst_vec, dst, index);
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment