Commit 59b91d6c authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #10821 from dkurt:dnn_layers_fusion

parents a91a11e5 514e6df4
...@@ -472,7 +472,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -472,7 +472,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
bool hasWeights, hasBias; bool hasWeights, hasBias;
float epsilon; float epsilon;
virtual void getScaleShift(Mat& scale, Mat& shift) const = 0;
static Ptr<BatchNormLayer> create(const LayerParams &params); static Ptr<BatchNormLayer> create(const LayerParams &params);
}; };
......
...@@ -281,20 +281,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -281,20 +281,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
virtual bool setActivation(const Ptr<ActivationLayer>& layer); virtual bool setActivation(const Ptr<ActivationLayer>& layer);
/** /**
* @brief Tries to attach to the layer the subsequent batch normalization layer, i.e. do the layer fusion in a partial case. * @brief Try to fuse current layer with a next one
* @param[in] layer The subsequent batch normalization layer. * @param[in] top Next layer to be fused.
* * @returns True if fusion was performed.
* Returns true if the batch normalization layer has been attached successfully.
*/ */
virtual bool setBatchNorm(const Ptr<BatchNormLayer>& layer); virtual bool tryFuse(Ptr<Layer>& top);
/** /**
* @brief Tries to attach to the layer the subsequent scaling layer, i.e. do the layer fusion in a partial case. * @brief Returns parameters of layers with channel-wise multiplication and addition.
* @param[in] layer The subsequent scaling layer. * @param[out] scale Channel-wise multipliers. Total number of values should
* be equal to number of channels.
* @param[out] shift Channel-wise offsets. Total number of values should
* be equal to number of channels.
* *
* Returns true if the scaling layer has been attached successfully. * Some layers can fuse their transformations with further layers.
* In example, convolution + batch normalization. This way base layer
* use weights from layer after it. Fused layer is skipped.
* By default, @p scale and @p shift are empty that means layer has no
* element-wise multiplications or additions.
*/ */
virtual bool setScale(const Ptr<ScaleLayer>& layer); virtual void getScaleShift(Mat& scale, Mat& shift) const;
/** /**
* @brief "Deattaches" all the layers, attached to particular layer. * @brief "Deattaches" all the layers, attached to particular layer.
......
...@@ -1407,46 +1407,30 @@ struct Net::Impl ...@@ -1407,46 +1407,30 @@ struct Net::Impl
if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 ) if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
{ {
LayerData* nextData = &layers[ld.consumers[0].lid]; LayerData* nextData = &layers[ld.consumers[0].lid];
Ptr<BatchNormLayer> nextBNormLayer =
nextData->layerInstance.dynamicCast<BatchNormLayer>();
LayerPin lpNext(ld.consumers[0].lid, 0); LayerPin lpNext(ld.consumers[0].lid, 0);
if( !nextBNormLayer.empty() && pinsToKeep.count(lpNext) == 0 ) while (nextData)
{ {
LayerData* bnormData = nextData; Ptr<Layer> nextLayer = nextData->layerInstance;
nextData = 0; if (currLayer->tryFuse(nextLayer))
if( currLayer->setBatchNorm(nextBNormLayer) )
{ {
printf_(("\tfused with %s\n", nextBNormLayer->name.c_str())); printf_(("\tfused with %s\n", nextLayer->name.c_str()));
bnormData->skip = true; nextData->skip = true;
ld.outputBlobs = layers[lpNext.lid].outputBlobs; ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers; ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
if( bnormData->consumers.size() == 1 ) if (nextData->consumers.size() == 1)
{ {
nextData = &layers[bnormData->consumers[0].lid]; int nextLayerId = nextData->consumers[0].lid;
lpNext = LayerPin(bnormData->consumers[0].lid, 0); nextData = &layers[nextLayerId];
lpNext = LayerPin(nextLayerId, 0);
} }
} else
}
Ptr<ScaleLayer> nextScaleLayer;
if( nextData )
nextScaleLayer = nextData->layerInstance.dynamicCast<ScaleLayer>();
if( !nextScaleLayer.empty() && pinsToKeep.count(lpNext) == 0 )
{
LayerData* scaleData = nextData;
nextData = 0;
if( currLayer->setScale(nextScaleLayer) )
{
printf_(("\tfused with %s\n", nextScaleLayer->name.c_str()));
scaleData->skip = true;
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
if( scaleData->consumers.size() == 1 )
{ {
nextData = &layers[scaleData->consumers[0].lid]; nextData = 0;
lpNext = LayerPin(scaleData->consumers[0].lid, 0); break;
} }
} }
else
break;
} }
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh // For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
...@@ -2627,13 +2611,16 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node) ...@@ -2627,13 +2611,16 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
} }
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; } bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
bool Layer::setBatchNorm(const Ptr<BatchNormLayer>&) { return false; } bool Layer::tryFuse(Ptr<Layer>&) { return false; }
bool Layer::setScale(const Ptr<ScaleLayer>&) { return false; } void Layer::getScaleShift(Mat& scale, Mat& shift) const
{
scale = Mat();
shift = Mat();
}
void Layer::unsetAttached() void Layer::unsetAttached()
{ {
setActivation(Ptr<ActivationLayer>()); setActivation(Ptr<ActivationLayer>());
setBatchNorm(Ptr<BatchNormLayer>());
setScale(Ptr<ScaleLayer>());
} }
template <typename T> template <typename T>
......
...@@ -65,16 +65,18 @@ public: ...@@ -65,16 +65,18 @@ public:
relu_slope = 0.f; relu_slope = 0.f;
} }
Ptr<BatchNormLayer> bnorm;
Mat scale, shift; Mat scale, shift;
UMat bnorm_weight, bnorm_bias;
bool fuse_batch_norm; bool fuse_batch_norm;
bool setBatchNorm(const Ptr<BatchNormLayer>& layer ) virtual bool tryFuse(Ptr<Layer>& top)
{ {
bnorm = layer; if (preferableTarget == DNN_TARGET_OPENCL && !fuse_batch_norm)
fuse_batch_norm = !bnorm.empty() && (preferableTarget == DNN_TARGET_OPENCL); {
return fuse_batch_norm; top->getScaleShift(scale, shift);
fuse_batch_norm = !scale.empty() || !shift.empty();
return fuse_batch_norm;
}
return false;
} }
Ptr<ReLULayer> activ_relu; Ptr<ReLULayer> activ_relu;
...@@ -95,12 +97,8 @@ public: ...@@ -95,12 +97,8 @@ public:
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs) bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs)
{ {
if( fuse_batch_norm && scale.empty()) UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ);
{ UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ);
bnorm->getScaleShift(scale, shift);
bnorm_weight = scale.getUMat(ACCESS_READ);
bnorm_bias = shift.getUMat(ACCESS_READ);
}
int splitDim = (acrossChannels) ? 1 : 2; int splitDim = (acrossChannels) ? 1 : 2;
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
...@@ -171,12 +169,8 @@ public: ...@@ -171,12 +169,8 @@ public:
return ret; return ret;
} }
if( fuse_batch_norm && scale.empty()) UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ);
{ UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ);
bnorm->getScaleShift(scale, shift);
bnorm_weight = scale.getUMat(ACCESS_READ);
bnorm_bias = shift.getUMat(ACCESS_READ);
}
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++) for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{ {
......
...@@ -201,6 +201,12 @@ public: ...@@ -201,6 +201,12 @@ public:
return Ptr<BackendNode>(); return Ptr<BackendNode>();
} }
void getScaleShift(Mat& scale, Mat& shift) const
{
scale = !blobs.empty() ? blobs[0] : Mat();
shift = hasBias ? blobs[1] : Mat();
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs, virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const const std::vector<MatShape> &outputs) const
{ {
......
...@@ -136,6 +136,12 @@ public: ...@@ -136,6 +136,12 @@ public:
return Ptr<BackendNode>(); return Ptr<BackendNode>();
} }
void getScaleShift(Mat& scale, Mat& shift) const
{
scale = Mat();
shift = blobs[0];
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs, virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const const std::vector<MatShape> &outputs) const
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment