// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // Copyright (C) 2016, Intel Corporation, all rights reserved. // Third party copyrights are property of their respective owners. /* Implementation of Batch Normalization layer. */ #include "../precomp.hpp" #include "op_halide.hpp" #include namespace cv { namespace dnn { class BatchNormLayerImpl : public BatchNormLayer { public: Mat weights_, bias_; BatchNormLayerImpl(const LayerParams& params) { setParamsFrom(params); CV_Assert(blobs.size() >= 3); hasWeights = params.get("has_weight", false); hasBias = params.get("has_bias", false); epsilon = params.get("eps", 1E-5); size_t n = blobs[0].total(); CV_Assert(blobs[1].total() == n && blobs[0].isContinuous() && blobs[1].isContinuous() && blobs[0].type() == CV_32F && blobs[1].type() == CV_32F); float varMeanScale = 1.f; if (!hasWeights && !hasBias) { CV_Assert(blobs[2].type() == CV_32F); varMeanScale = blobs[2].at(0); if (varMeanScale != 0) varMeanScale = 1/varMeanScale; } const int weightsBlobIndex = 2; const int biasBlobIndex = weightsBlobIndex + hasWeights; if( hasWeights ) { CV_Assert((size_t)weightsBlobIndex < blobs.size()); const Mat& w = blobs[weightsBlobIndex]; CV_Assert(w.isContinuous() && w.type() == CV_32F && w.total() == (size_t)n); } if( hasBias ) { CV_Assert((size_t)biasBlobIndex < blobs.size()); const Mat& b = blobs[weightsBlobIndex]; CV_Assert(b.isContinuous() && b.type() == CV_32F && b.total() == (size_t)n); } const float* meanData = blobs[0].ptr(); const float* stdData = blobs[1].ptr(); const float* weightsData = hasWeights ? blobs[weightsBlobIndex].ptr() : 0; const float* biasData = hasBias ? blobs[biasBlobIndex].ptr() : 0; weights_.create(1, (int)n, CV_32F); bias_.create(1, (int)n, CV_32F); float* dstWeightsData = weights_.ptr(); float* dstBiasData = bias_.ptr(); for (size_t i = 0; i < n; ++i) { float w = (hasWeights ? weightsData[i] : 1.0f) / sqrt(stdData[i] * varMeanScale + epsilon); dstWeightsData[i] = w; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; } } void getScaleShift(Mat& scale, Mat& shift) const { scale = weights_; shift = bias_; } bool getMemoryShapes(const std::vector &inputs, const int requiredOutputs, std::vector &outputs, std::vector &internals) const { Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); return true; } virtual bool supportBackend(int backendId) { return backendId == DNN_BACKEND_DEFAULT || backendId == DNN_BACKEND_HALIDE && haveHalide(); } void forward(std::vector &inputs, std::vector &outputs, std::vector &internals) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); CV_Assert(blobs.size() >= 2); CV_Assert(inputs.size() == 1); Mat &inpBlob = *inputs[0]; int rows = inpBlob.size[2]; int cols = inpBlob.size[3]; for (size_t ii = 0; ii < outputs.size(); ii++) { Mat &outBlob = outputs[ii]; for(int num = 0; num < outBlob.size[0]; num++) { for (int n = 0; n < outBlob.size[1]; n++) { float w = weights_.at(n); float b = bias_.at(n); Mat inpBlobPlane(rows, cols, CV_32F, inpBlob.ptr(num, n)); Mat outBlobPlane(rows, cols, CV_32F, outBlob.ptr(num, n)); inpBlobPlane.convertTo(outBlobPlane, CV_32F, w, b); } } } } virtual Ptr tryAttach(const Ptr& node) { switch (node->backendId) { case DNN_BACKEND_HALIDE: { #ifdef HAVE_HALIDE auto base = node.dynamicCast(); Halide::Func& input = base->funcs.back(); Halide::Var x("x"), y("y"), c("c"), n("n"); Halide::Func top = attachHalide(input(x, y, c, n)); return Ptr(new HalideBackendNode(base, top)); #endif // HAVE_HALIDE break; } } return Ptr(); } virtual Ptr initHalide(const std::vector > &inputs) { #ifdef HAVE_HALIDE Halide::Buffer input = halideBuffer(inputs[0]); Halide::Var x("x"), y("y"), c("c"), n("n"); Halide::Func top = attachHalide(input(x, y, c, n)); return Ptr(new HalideBackendNode(top)); #endif // HAVE_HALIDE return Ptr(); } #ifdef HAVE_HALIDE // attachHalide can work both with Halide::Buffer and Halide::Func. In the // second case it will be a fusion. Halide::Func attachHalide(const Halide::Expr& input) { Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name)); Halide::Var x("x"), y("y"), c("c"), n("n"); const int numChannels = weights_.total(); auto weights = wrapToHalideBuffer(weights_, {numChannels}); auto bias = wrapToHalideBuffer(bias_, {numChannels}); top(x, y, c, n) = input * weights(c) + bias(c); return top; } #endif // HAVE_HALIDE virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const { (void)outputs; // suppress unused variable warning int64 flops = 0; for(int i = 0; i < inputs.size(); i++) { flops += 3*total(inputs[i]); } return flops; } }; Ptr BatchNormLayer::create(const LayerParams& params) { return Ptr(new BatchNormLayerImpl(params)); } } // namespace dnn } // namespace cv