Commit 533bb898 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #11236 from dkurt:dnn_fuse_l2_norm

parents c80a168d 1ba72ca0
...@@ -559,7 +559,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN ...@@ -559,7 +559,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
{ {
public: public:
float pnorm, epsilon; float pnorm, epsilon;
bool acrossSpatial; CV_DEPRECATED bool acrossSpatial;
static Ptr<NormalizeBBoxLayer> create(const LayerParams& params); static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
}; };
......
...@@ -318,6 +318,7 @@ for node in graph_def.node: ...@@ -318,6 +318,7 @@ for node in graph_def.node:
node.input.pop() node.input.pop()
node.input.pop() node.input.pop()
node.input.append(layer_256_1_relu1.name) node.input.append(layer_256_1_relu1.name)
node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
break break
softmaxShape = NodeDef() softmaxShape = NodeDef()
......
...@@ -54,6 +54,9 @@ public: ...@@ -54,6 +54,9 @@ public:
pnorm = params.get<float>("p", 2); pnorm = params.get<float>("p", 2);
epsilon = params.get<float>("eps", 1e-10f); epsilon = params.get<float>("eps", 1e-10f);
acrossSpatial = params.get<bool>("across_spatial", true); acrossSpatial = params.get<bool>("across_spatial", true);
startAxis = params.get<int>("start_axis", 1);
CV_Assert(!params.has("across_spatial") || !params.has("end_axis"));
endAxis = params.get<int>("end_axis", acrossSpatial ? -1 : startAxis);
CV_Assert(pnorm > 0); CV_Assert(pnorm > 0);
} }
...@@ -85,20 +88,26 @@ public: ...@@ -85,20 +88,26 @@ public:
const UMat& inp0 = inputs[0]; const UMat& inp0 = inputs[0];
UMat& buffer = internals[0]; UMat& buffer = internals[0];
size_t num = inp0.size[0]; startAxis = clamp(startAxis, inp0.dims);
size_t channels = inp0.size[1]; endAxis = clamp(endAxis, inp0.dims);
size_t channelSize = inp0.total() / (num * channels);
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
size_t planeSize = inp0.total() / (num * numPlanes);
MatShape s = shape(1, inputs[0].total());
UMat inp = inputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
UMat out = outputs[0].reshape(1, s.size(), &s[0]).reshape(1, num);
for (size_t i = 0; i < num; ++i) for (size_t i = 0; i < num; ++i)
{ {
MatShape s = shape(channels, channelSize); s = shape(numPlanes, planeSize);
UMat src = inputs[i].reshape(1, s.size(), &s[0]); UMat src = inp.row(i).reshape(1, s.size(), &s[0]);
UMat dst = outputs[i].reshape(1, s.size(), &s[0]); UMat dst = out.row(i).reshape(1, s.size(), &s[0]);
UMat abs_mat; UMat abs_mat;
absdiff(src, cv::Scalar::all(0), abs_mat); absdiff(src, cv::Scalar::all(0), abs_mat);
pow(abs_mat, pnorm, buffer); pow(abs_mat, pnorm, buffer);
if (acrossSpatial) if (planeSize == 1)
{ {
// add eps to avoid overflow // add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon; float absSum = sum(buffer)[0] + epsilon;
...@@ -114,7 +123,7 @@ public: ...@@ -114,7 +123,7 @@ public:
// compute inverted norm to call multiply instead divide // compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm); cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, channels, 1, buffer); repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst); multiply(src, buffer, dst);
} }
...@@ -130,7 +139,7 @@ public: ...@@ -130,7 +139,7 @@ public:
else else
{ {
// _scale: _channels x 1 // _scale: _channels x 1
CV_Assert(scale.total() == channels); CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer); repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst); multiply(dst, buffer, dst);
} }
...@@ -162,17 +171,22 @@ public: ...@@ -162,17 +171,22 @@ public:
const Mat& inp0 = *inputs[0]; const Mat& inp0 = *inputs[0];
Mat& buffer = internals[0]; Mat& buffer = internals[0];
size_t num = inp0.size[0]; startAxis = clamp(startAxis, inp0.dims);
size_t channels = inp0.size[1]; endAxis = clamp(endAxis, inp0.dims);
size_t channelSize = inp0.total() / (num * channels);
const float* inpData = inp0.ptr<float>();
float* outData = outputs[0].ptr<float>();
size_t num = total(shape(inp0.size), 0, startAxis);
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
size_t planeSize = inp0.total() / (num * numPlanes);
for (size_t n = 0; n < num; ++n) for (size_t n = 0; n < num; ++n)
{ {
Mat src = Mat(channels, channelSize, CV_32F, (void*)inp0.ptr<float>(n)); Mat src = Mat(numPlanes, planeSize, CV_32F, (void*)inpData);
Mat dst = Mat(channels, channelSize, CV_32F, (void*)outputs[0].ptr<float>(n)); Mat dst = Mat(numPlanes, planeSize, CV_32F, (void*)outData);
cv::pow(abs(src), pnorm, buffer); cv::pow(abs(src), pnorm, buffer);
if (acrossSpatial) if (planeSize == 1)
{ {
// add eps to avoid overflow // add eps to avoid overflow
float absSum = sum(buffer)[0] + epsilon; float absSum = sum(buffer)[0] + epsilon;
...@@ -188,7 +202,7 @@ public: ...@@ -188,7 +202,7 @@ public:
// compute inverted norm to call multiply instead divide // compute inverted norm to call multiply instead divide
cv::pow(norm, -1.0f / pnorm, norm); cv::pow(norm, -1.0f / pnorm, norm);
repeat(norm, channels, 1, buffer); repeat(norm, numPlanes, 1, buffer);
multiply(src, buffer, dst); multiply(src, buffer, dst);
} }
...@@ -204,13 +218,18 @@ public: ...@@ -204,13 +218,18 @@ public:
else else
{ {
// _scale: _channels x 1 // _scale: _channels x 1
CV_Assert(scale.total() == channels); CV_Assert(scale.total() == numPlanes);
repeat(scale, 1, dst.cols, buffer); repeat(scale, 1, dst.cols, buffer);
multiply(dst, buffer, dst); multiply(dst, buffer, dst);
} }
} }
inpData += numPlanes * planeSize;
outData += numPlanes * planeSize;
} }
} }
private:
int startAxis, endAxis;
}; };
......
...@@ -80,14 +80,16 @@ public: ...@@ -80,14 +80,16 @@ public:
{ {
CV_Assert(inpId < node.input_size()); CV_Assert(inpId < node.input_size());
std::string name = node.input(inpId); std::string name = node.input(inpId);
// If operation produces several tensors, they are specified by index
// after ':' character. In example, "input:0".
name = name.substr(0, name.rfind(':'));
const int numNodes = net.node_size(); const int numNodes = net.node_size();
for (int i = 0; i < numNodes; ++i) for (int i = 0; i < numNodes; ++i)
{ {
if (net.node(i).name() == name) if (net.node(i).name() == name)
return net.node(i); return net.node(i);
} }
CV_Error(Error::StsParseError, "Input node with name " + name + " not found"); CV_ErrorNoReturn(Error::StsParseError, "Input node with name " + name + " not found");
return net.node(0); // just return something
} }
// Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused. // Match TensorFlow subgraph starting from <nodeId> with a set of nodes to be fused.
...@@ -400,6 +402,23 @@ private: ...@@ -400,6 +402,23 @@ private:
int numOutDims; int numOutDims;
}; };
class L2NormalizeSubgraph : public Subgraph
{
public:
L2NormalizeSubgraph()
{
int input = addNodeToMatch("");
int square = addNodeToMatch("Square", input);
int reductionIndices = addNodeToMatch("Const");
int sum = addNodeToMatch("Sum", square, reductionIndices);
int y = addNodeToMatch("Const");
int maximum = addNodeToMatch("Maximum", sum, y);
int rsqrt = addNodeToMatch("Rsqrt", maximum);
addNodeToMatch("Mul", input, rsqrt);
setFusedNode("L2Normalize", input, reductionIndices);
}
};
void simplifySubgraphs(tensorflow::GraphDef& net) void simplifySubgraphs(tensorflow::GraphDef& net)
{ {
std::vector<Ptr<Subgraph> > subgraphs; std::vector<Ptr<Subgraph> > subgraphs;
...@@ -410,6 +429,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net) ...@@ -410,6 +429,7 @@ void simplifySubgraphs(tensorflow::GraphDef& net)
subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph())); subgraphs.push_back(Ptr<Subgraph>(new SoftMaxKerasSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph())); subgraphs.push_back(Ptr<Subgraph>(new ReLU6KerasSubgraph()));
subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3))); subgraphs.push_back(Ptr<Subgraph>(new ReshapeKerasSubgraph(3)));
subgraphs.push_back(Ptr<Subgraph>(new L2NormalizeSubgraph()));
int numNodes = net.node_size(); int numNodes = net.node_size();
std::vector<int> matchedNodesIds; std::vector<int> matchedNodesIds;
......
...@@ -37,7 +37,13 @@ using ::google::protobuf::Reflection; ...@@ -37,7 +37,13 @@ using ::google::protobuf::Reflection;
namespace namespace
{ {
static int toNCHW[] = {0, 2, 3, 1}; static int toNCHW(int idx)
{
CV_Assert(-4 <= idx && idx < 4);
if (idx == 0) return 0;
else if (idx > 0) return idx % 3 + 1;
else return (4 + idx) % 3 + 1;
}
// This values are used to indicate layer output's data layout where it's possible. // This values are used to indicate layer output's data layout where it's possible.
enum DataLayout enum DataLayout
...@@ -556,11 +562,23 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons ...@@ -556,11 +562,23 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise.
static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts) static int predictOutputDataLayout(const tensorflow::NodeDef& layer, const std::map<String, int>& data_layouts)
{ {
if (hasLayerAttr(layer, "data_format"))
{
std::string format = getLayerAttr(layer, "data_format").s();
if (format == "NHWC" || format == "channels_last")
return DATA_LAYOUT_NHWC;
else if (format == "NCHW" || format == "channels_first")
return DATA_LAYOUT_NCHW;
else
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
}
// Determine layout by layer's inputs
int layout = DATA_LAYOUT_UNKNOWN; int layout = DATA_LAYOUT_UNKNOWN;
std::map<String, int>::const_iterator it; std::map<String, int>::const_iterator it;
for (int i = 0, n = layer.input_size(); i < n; ++i) for (int i = 0, n = layer.input_size(); i < n; ++i)
{ {
it = data_layouts.find(layer.input(i)); it = data_layouts.find(layer.input(i).substr(0, layer.input(i).rfind(':')));
if (it != data_layouts.end()) if (it != data_layouts.end())
{ {
if (it->second == DATA_LAYOUT_UNKNOWN) if (it->second == DATA_LAYOUT_UNKNOWN)
...@@ -708,17 +726,7 @@ void TFImporter::populateNet(Net dstNet) ...@@ -708,17 +726,7 @@ void TFImporter::populateNet(Net dstNet)
// one input only // one input only
connect(layer_id, dstNet, parsePin(input), id, 0); connect(layer_id, dstNet, parsePin(input), id, 0);
if (hasLayerAttr(layer, "data_format")) if (data_layouts[name] == DATA_LAYOUT_UNKNOWN)
{
std::string format = getLayerAttr(layer, "data_format").s();
if (format == "NHWC" || format == "channels_last")
data_layouts[name] = DATA_LAYOUT_NHWC;
else if (format == "NCHW" || format == "channels_first")
data_layouts[name] = DATA_LAYOUT_NCHW;
else
CV_Error(Error::StsParseError, "Unknown data_format value: " + format);
}
else
data_layouts[name] = DATA_LAYOUT_NHWC; data_layouts[name] = DATA_LAYOUT_NHWC;
} }
else if (type == "BiasAdd" || type == "Add") else if (type == "BiasAdd" || type == "Add")
...@@ -956,7 +964,7 @@ void TFImporter::populateNet(Net dstNet) ...@@ -956,7 +964,7 @@ void TFImporter::populateNet(Net dstNet)
{ {
int axisId = (type == "Concat" ? 0 : layer.input_size() - 1); int axisId = (type == "Concat" ? 0 : layer.input_size() - 1);
int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0);
layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW[axis] : axis); layerParams.set("axis", 0 <= axis && axis < 4 ? toNCHW(axis) : axis);
int id = dstNet.addLayer(name, "Concat", layerParams); int id = dstNet.addLayer(name, "Concat", layerParams);
layer_id[name] = id; layer_id[name] = id;
...@@ -1017,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet) ...@@ -1017,7 +1025,7 @@ void TFImporter::populateNet(Net dstNet)
// num_split // num_split
// 1st blob is dims tensor // 1st blob is dims tensor
int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); int axis = getConstBlob(layer, value_id, 0).int_val().Get(0);
layerParams.set("axis", toNCHW[axis]); layerParams.set("axis", toNCHW(axis));
int id = dstNet.addLayer(name, "Slice", layerParams); int id = dstNet.addLayer(name, "Slice", layerParams);
layer_id[name] = id; layer_id[name] = id;
...@@ -1410,9 +1418,26 @@ void TFImporter::populateNet(Net dstNet) ...@@ -1410,9 +1418,26 @@ void TFImporter::populateNet(Net dstNet)
{ {
// op: "L2Normalize" // op: "L2Normalize"
// input: "input" // input: "input"
CV_Assert(layer.input_size() == 1); // input: "reduction_indices" (axis)
layerParams.set("across_spatial", false); CV_Assert(layer.input_size() == 2);
layerParams.set("channel_shared", false); Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1));
CV_Assert(reductionIndices.type() == CV_32SC1);
const int numAxes = reductionIndices.total();
if (data_layouts[name] == DATA_LAYOUT_NHWC)
for (int i = 0; i < numAxes; ++i)
reductionIndices.at<int>(i) = toNCHW(reductionIndices.at<int>(i));
cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING);
for (int i = 1; i < numAxes; ++i)
{
CV_Assert(reductionIndices.at<int>(i) == reductionIndices.at<int>(i - 1) + 1);
// Axes have the same sign.
CV_Assert(reductionIndices.at<int>(i) * reductionIndices.at<int>(i - 1) >= 0);
}
layerParams.set("start_axis", reductionIndices.at<int>(0));
layerParams.set("end_axis", reductionIndices.at<int>(numAxes - 1));
int id = dstNet.addLayer(name, "Normalize", layerParams); int id = dstNet.addLayer(name, "Normalize", layerParams);
layer_id[name] = id; layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
......
...@@ -193,6 +193,13 @@ TEST_P(Test_TensorFlow_layers, reshape) ...@@ -193,6 +193,13 @@ TEST_P(Test_TensorFlow_layers, reshape)
runTensorFlowNet("unfused_flatten_unknown_batch", targetId); runTensorFlowNet("unfused_flatten_unknown_batch", targetId);
} }
TEST_P(Test_TensorFlow_layers, l2_normalize)
{
int targetId = GetParam();
runTensorFlowNet("l2_normalize", targetId);
runTensorFlowNet("l2_normalize_3d", targetId);
}
INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets()); INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_layers, availableDnnTargets());
typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets; typedef testing::TestWithParam<DNNTarget> Test_TensorFlow_nets;
......
...@@ -482,6 +482,7 @@ node { ...@@ -482,6 +482,7 @@ node {
name: "conv4_3_norm/l2_normalize" name: "conv4_3_norm/l2_normalize"
op: "L2Normalize" op: "L2Normalize"
input: "Relu_4:0" input: "Relu_4:0"
input: "conv4_3_norm/l2_normalize/Sum/reduction_indices"
} }
node { node {
name: "conv4_3_norm/mul_1" name: "conv4_3_norm/mul_1"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment