Commit 59b91d6c authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #10821 from dkurt:dnn_layers_fusion

parents a91a11e5 514e6df4
......@@ -472,7 +472,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
bool hasWeights, hasBias;
float epsilon;
virtual void getScaleShift(Mat& scale, Mat& shift) const = 0;
static Ptr<BatchNormLayer> create(const LayerParams &params);
};
......
......@@ -281,20 +281,26 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
virtual bool setActivation(const Ptr<ActivationLayer>& layer);
/**
* @brief Tries to attach to the layer the subsequent batch normalization layer, i.e. do the layer fusion in a partial case.
* @param[in] layer The subsequent batch normalization layer.
*
* Returns true if the batch normalization layer has been attached successfully.
* @brief Try to fuse current layer with a next one
* @param[in] top Next layer to be fused.
* @returns True if fusion was performed.
*/
virtual bool setBatchNorm(const Ptr<BatchNormLayer>& layer);
virtual bool tryFuse(Ptr<Layer>& top);
/**
* @brief Tries to attach to the layer the subsequent scaling layer, i.e. do the layer fusion in a partial case.
* @param[in] layer The subsequent scaling layer.
* @brief Returns parameters of layers with channel-wise multiplication and addition.
* @param[out] scale Channel-wise multipliers. Total number of values should
* be equal to number of channels.
* @param[out] shift Channel-wise offsets. Total number of values should
* be equal to number of channels.
*
* Returns true if the scaling layer has been attached successfully.
* Some layers can fuse their transformations with further layers.
* In example, convolution + batch normalization. This way base layer
* use weights from layer after it. Fused layer is skipped.
* By default, @p scale and @p shift are empty that means layer has no
* element-wise multiplications or additions.
*/
virtual bool setScale(const Ptr<ScaleLayer>& layer);
virtual void getScaleShift(Mat& scale, Mat& shift) const;
/**
* @brief "Deattaches" all the layers, attached to particular layer.
......
......@@ -1407,46 +1407,30 @@ struct Net::Impl
if( ld.consumers.size() == 1 && pinsToKeep.count(LayerPin(lid, 0)) == 0 )
{
LayerData* nextData = &layers[ld.consumers[0].lid];
Ptr<BatchNormLayer> nextBNormLayer =
nextData->layerInstance.dynamicCast<BatchNormLayer>();
LayerPin lpNext(ld.consumers[0].lid, 0);
if( !nextBNormLayer.empty() && pinsToKeep.count(lpNext) == 0 )
while (nextData)
{
LayerData* bnormData = nextData;
nextData = 0;
if( currLayer->setBatchNorm(nextBNormLayer) )
Ptr<Layer> nextLayer = nextData->layerInstance;
if (currLayer->tryFuse(nextLayer))
{
printf_(("\tfused with %s\n", nextBNormLayer->name.c_str()));
bnormData->skip = true;
printf_(("\tfused with %s\n", nextLayer->name.c_str()));
nextData->skip = true;
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
if( bnormData->consumers.size() == 1 )
if (nextData->consumers.size() == 1)
{
nextData = &layers[bnormData->consumers[0].lid];
lpNext = LayerPin(bnormData->consumers[0].lid, 0);
int nextLayerId = nextData->consumers[0].lid;
nextData = &layers[nextLayerId];
lpNext = LayerPin(nextLayerId, 0);
}
}
}
Ptr<ScaleLayer> nextScaleLayer;
if( nextData )
nextScaleLayer = nextData->layerInstance.dynamicCast<ScaleLayer>();
if( !nextScaleLayer.empty() && pinsToKeep.count(lpNext) == 0 )
{
LayerData* scaleData = nextData;
nextData = 0;
if( currLayer->setScale(nextScaleLayer) )
{
printf_(("\tfused with %s\n", nextScaleLayer->name.c_str()));
scaleData->skip = true;
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
ld.outputBlobsWrappers = layers[lpNext.lid].outputBlobsWrappers;
if( scaleData->consumers.size() == 1 )
else
{
nextData = &layers[scaleData->consumers[0].lid];
lpNext = LayerPin(scaleData->consumers[0].lid, 0);
nextData = 0;
break;
}
}
else
break;
}
// For now, OpenCL target support fusion with activation of ReLU/ChannelsPReLU/Power/Tanh
......@@ -2627,13 +2611,16 @@ Ptr<BackendNode> Layer::tryAttach(const Ptr<BackendNode>& node)
}
bool Layer::setActivation(const Ptr<ActivationLayer>&) { return false; }
bool Layer::setBatchNorm(const Ptr<BatchNormLayer>&) { return false; }
bool Layer::setScale(const Ptr<ScaleLayer>&) { return false; }
bool Layer::tryFuse(Ptr<Layer>&) { return false; }
void Layer::getScaleShift(Mat& scale, Mat& shift) const
{
scale = Mat();
shift = Mat();
}
void Layer::unsetAttached()
{
setActivation(Ptr<ActivationLayer>());
setBatchNorm(Ptr<BatchNormLayer>());
setScale(Ptr<ScaleLayer>());
}
template <typename T>
......
......@@ -65,16 +65,18 @@ public:
relu_slope = 0.f;
}
Ptr<BatchNormLayer> bnorm;
Mat scale, shift;
UMat bnorm_weight, bnorm_bias;
bool fuse_batch_norm;
bool setBatchNorm(const Ptr<BatchNormLayer>& layer )
virtual bool tryFuse(Ptr<Layer>& top)
{
bnorm = layer;
fuse_batch_norm = !bnorm.empty() && (preferableTarget == DNN_TARGET_OPENCL);
return fuse_batch_norm;
if (preferableTarget == DNN_TARGET_OPENCL && !fuse_batch_norm)
{
top->getScaleShift(scale, shift);
fuse_batch_norm = !scale.empty() || !shift.empty();
return fuse_batch_norm;
}
return false;
}
Ptr<ReLULayer> activ_relu;
......@@ -95,12 +97,8 @@ public:
#ifdef HAVE_OPENCL
bool fast_forward_ocl(std::vector<UMat> &inputs, std::vector<UMat> &outputs)
{
if( fuse_batch_norm && scale.empty())
{
bnorm->getScaleShift(scale, shift);
bnorm_weight = scale.getUMat(ACCESS_READ);
bnorm_bias = shift.getUMat(ACCESS_READ);
}
UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ);
UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ);
int splitDim = (acrossChannels) ? 1 : 2;
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
......@@ -171,12 +169,8 @@ public:
return ret;
}
if( fuse_batch_norm && scale.empty())
{
bnorm->getScaleShift(scale, shift);
bnorm_weight = scale.getUMat(ACCESS_READ);
bnorm_bias = shift.getUMat(ACCESS_READ);
}
UMat bnorm_weight = scale.empty() ? UMat() : scale.getUMat(ACCESS_READ);
UMat bnorm_bias = shift.empty() ? UMat() : shift.getUMat(ACCESS_READ);
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
......
......@@ -201,6 +201,12 @@ public:
return Ptr<BackendNode>();
}
void getScaleShift(Mat& scale, Mat& shift) const
{
scale = !blobs.empty() ? blobs[0] : Mat();
shift = hasBias ? blobs[1] : Mat();
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const
{
......
......@@ -136,6 +136,12 @@ public:
return Ptr<BackendNode>();
}
void getScaleShift(Mat& scale, Mat& shift) const
{
scale = Mat();
shift = blobs[0];
}
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
const std::vector<MatShape> &outputs) const
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment