Commit 0c261acf authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #13065 from dkurt:dnn_update_tf_faster_rcnn

parents 4b298a74 dc9e6d3a
...@@ -1794,44 +1794,46 @@ struct Net::Impl ...@@ -1794,44 +1794,46 @@ struct Net::Impl
} }
// fuse convolution layer followed by eltwise + relu // fuse convolution layer followed by eltwise + relu
if ( IS_DNN_OPENCL_TARGET(preferableTarget) ) if ( IS_DNN_OPENCL_TARGET(preferableTarget) && ld.layerInstance->type == "Convolution" )
{ {
Ptr<EltwiseLayer> nextEltwiseLayer; Ptr<EltwiseLayer> nextEltwiseLayer;
if( nextData ) if( nextData )
nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>(); nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 ) if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
nextData->inputBlobsId.size() == 2 )
{ {
LayerData *eltwiseData = nextData; LayerData *eltwiseData = nextData;
// go down from the second input and find the first non-skipped layer.
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid]; // Eltwise layer has two inputs. We need to determine which
// is a base convolution layer and which could be used as it's bias.
LayerData* biasLayerData = 0;
for (int i = 0; i < 2; ++i)
{
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[i].lid];
CV_Assert(downLayerData); CV_Assert(downLayerData);
while (downLayerData->skip) while (downLayerData->skip)
{ {
if (downLayerData->inputBlobsId.size() == 1)
downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
} else
CV_Assert(downLayerData);
// second input layer is current layer.
if ( ld.id == downLayerData->id )
{ {
// go down from the first input and find the first non-skipped layer downLayerData = 0;
downLayerData = &layers[eltwiseData->inputBlobsId[0].lid]; break;
while (downLayerData->skip) }
}
if (downLayerData && ld.id == downLayerData->id)
{ {
if ( !downLayerData->type.compare("Eltwise") ) biasLayerData = &layers[eltwiseData->inputBlobsId[1 - i].lid];
downLayerData = &layers[downLayerData->inputBlobsId[1].lid]; break;
else
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
} }
}
Ptr<ConvolutionLayer> convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>(); CV_Assert(biasLayerData);
{
// first input layer is convolution layer if( eltwiseData->consumers.size() == 1 )
if( !convLayer.empty() && eltwiseData->consumers.size() == 1 )
{ {
// fuse eltwise + activation layer // fuse eltwise + activation layer
LayerData *firstConvLayerData = downLayerData; if (biasLayerData->id < ld.id)
{ {
nextData = &layers[eltwiseData->consumers[0].lid]; nextData = &layers[eltwiseData->consumers[0].lid];
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
...@@ -1845,8 +1847,8 @@ struct Net::Impl ...@@ -1845,8 +1847,8 @@ struct Net::Impl
!nextData->type.compare("Power")) && !nextData->type.compare("Power")) &&
currLayer->setActivation(nextActivLayer) ) currLayer->setActivation(nextActivLayer) )
{ {
CV_Assert(firstConvLayerData->outputBlobsWrappers.size() == 1 && ld.inputBlobsWrappers.size() == 1); CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1);
ld.inputBlobsWrappers.push_back(firstConvLayerData->outputBlobsWrappers[0]); ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]);
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
eltwiseData->skip = true; eltwiseData->skip = true;
...@@ -1897,9 +1899,6 @@ struct Net::Impl ...@@ -1897,9 +1899,6 @@ struct Net::Impl
} }
} }
if (preferableBackend != DNN_BACKEND_OPENCV)
continue; // Go to the next layer.
// the optimization #2. if there is no layer that takes max pooling layer's computed // the optimization #2. if there is no layer that takes max pooling layer's computed
// max indices (and only some semantical segmentation networks might need this; // max indices (and only some semantical segmentation networks might need this;
// many others only take the maximum values), then we switch the max pooling // many others only take the maximum values), then we switch the max pooling
......
...@@ -95,7 +95,6 @@ public: ...@@ -95,7 +95,6 @@ public:
else if (params.has("pooled_w") || params.has("pooled_h")) else if (params.has("pooled_w") || params.has("pooled_h"))
{ {
type = ROI; type = ROI;
computeMaxIdx = false;
pooledSize.width = params.get<uint32_t>("pooled_w", 1); pooledSize.width = params.get<uint32_t>("pooled_w", 1);
pooledSize.height = params.get<uint32_t>("pooled_h", 1); pooledSize.height = params.get<uint32_t>("pooled_h", 1);
} }
...@@ -141,6 +140,7 @@ public: ...@@ -141,6 +140,7 @@ public:
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
poolOp.release(); poolOp.release();
#endif #endif
computeMaxIdx = type == MAX;
} }
virtual bool supportBackend(int backendId) CV_OVERRIDE virtual bool supportBackend(int backendId) CV_OVERRIDE
...@@ -190,19 +190,14 @@ public: ...@@ -190,19 +190,14 @@ public:
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config)); poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
} }
for (size_t ii = 0; ii < inputs.size(); ii++) CV_Assert_N(inputs.size() == 1, !outputs.empty(), !computeMaxIdx || outputs.size() == 2);
{ UMat& inpMat = inputs[0];
UMat& inpMat = inputs[ii]; UMat& outMat = outputs[0];
int out_index = (type == MAX) ? 2 : 1; UMat maskMat = computeMaxIdx ? outputs[1] : UMat();
UMat& outMat = outputs[out_index * ii];
UMat maskMat = (type == MAX) ? outputs[2 * ii + 1] : UMat();
CV_Assert(inpMat.offset == 0 && outMat.offset == 0); CV_Assert(inpMat.offset == 0 && outMat.offset == 0);
if (!poolOp->Forward(inpMat, outMat, maskMat)) return poolOp->Forward(inpMat, outMat, maskMat);
return false;
}
return true;
} }
#endif #endif
...@@ -229,9 +224,12 @@ public: ...@@ -229,9 +224,12 @@ public:
switch (type) switch (type)
{ {
case MAX: case MAX:
CV_Assert_N(inputs.size() == 1, outputs.size() == 2); {
maxPooling(inputs[0], outputs[0], outputs[1]); CV_Assert_N(inputs.size() == 1, !computeMaxIdx || outputs.size() == 2);
Mat mask = computeMaxIdx ? outputs[1] : Mat();
maxPooling(inputs[0], outputs[0], mask);
break; break;
}
case AVE: case AVE:
CV_Assert_N(inputs.size() == 1, outputs.size() == 1); CV_Assert_N(inputs.size() == 1, outputs.size() == 1);
avePooling(inputs[0], outputs[0]); avePooling(inputs[0], outputs[0]);
...@@ -912,7 +910,10 @@ public: ...@@ -912,7 +910,10 @@ public:
dims[0] = inputs[1][0]; // Number of proposals; dims[0] = inputs[1][0]; // Number of proposals;
dims[1] = psRoiOutChannels; dims[1] = psRoiOutChannels;
} }
outputs.assign(type == MAX ? 2 : 1, shape(dims, 4));
int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1);
CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX));
outputs.assign(numOutputs, shape(dims, 4));
return false; return false;
} }
......
...@@ -358,7 +358,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN) ...@@ -358,7 +358,7 @@ TEST_P(Test_TensorFlow_nets, Faster_RCNN)
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)) (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
throw SkipTestException(""); throw SkipTestException("");
for (int i = 1; i < 2; ++i) for (int i = 0; i < 2; ++i)
{ {
std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false); std::string proto = findDataFile("dnn/" + names[i] + ".pbtxt", false);
std::string model = findDataFile("dnn/" + names[i] + ".pb", false); std::string model = findDataFile("dnn/" + names[i] + ".pb", false);
......
...@@ -32,6 +32,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): ...@@ -32,6 +32,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
width_stride = float(grid_anchor_generator['width_stride'][0]) width_stride = float(grid_anchor_generator['width_stride'][0])
height_stride = float(grid_anchor_generator['height_stride'][0]) height_stride = float(grid_anchor_generator['height_stride'][0])
features_stride = float(config['feature_extractor'][0]['first_stage_features_stride'][0]) features_stride = float(config['feature_extractor'][0]['first_stage_features_stride'][0])
first_stage_nms_iou_threshold = float(config['first_stage_nms_iou_threshold'][0])
first_stage_max_proposals = int(config['first_stage_max_proposals'][0])
print('Number of classes: %d' % num_classes) print('Number of classes: %d' % num_classes)
print('Scales: %s' % str(scales)) print('Scales: %s' % str(scales))
...@@ -47,7 +49,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): ...@@ -47,7 +49,8 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
removeIdentity(graph_def) removeIdentity(graph_def)
def to_remove(name, op): def to_remove(name, op):
return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \
(name.startswith('CropAndResize') and op != 'CropAndResize')
removeUnusedNodesAndAttrs(to_remove, graph_def) removeUnusedNodesAndAttrs(to_remove, graph_def)
...@@ -114,10 +117,10 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): ...@@ -114,10 +117,10 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
detectionOut.addAttr('num_classes', 2) detectionOut.addAttr('num_classes', 2)
detectionOut.addAttr('share_location', True) detectionOut.addAttr('share_location', True)
detectionOut.addAttr('background_label_id', 0) detectionOut.addAttr('background_label_id', 0)
detectionOut.addAttr('nms_threshold', 0.7) detectionOut.addAttr('nms_threshold', first_stage_nms_iou_threshold)
detectionOut.addAttr('top_k', 6000) detectionOut.addAttr('top_k', 6000)
detectionOut.addAttr('code_type', "CENTER_SIZE") detectionOut.addAttr('code_type', "CENTER_SIZE")
detectionOut.addAttr('keep_top_k', 100) detectionOut.addAttr('keep_top_k', first_stage_max_proposals)
detectionOut.addAttr('clip', False) detectionOut.addAttr('clip', False)
graph_def.node.extend([detectionOut]) graph_def.node.extend([detectionOut])
...@@ -147,9 +150,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): ...@@ -147,9 +150,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def) 'SecondStageBoxPredictor/Reshape_1/Reshape', [1, -1], graph_def)
# Replace Flatten subgraph onto a single node. # Replace Flatten subgraph onto a single node.
cropAndResizeNodeName = ''
for i in reversed(range(len(graph_def.node))): for i in reversed(range(len(graph_def.node))):
if graph_def.node[i].op == 'CropAndResize': if graph_def.node[i].op == 'CropAndResize':
graph_def.node[i].input.insert(1, 'detection_out/clip_by_value') graph_def.node[i].input.insert(1, 'detection_out/clip_by_value')
cropAndResizeNodeName = graph_def.node[i].name
if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape': if graph_def.node[i].name == 'SecondStageBoxPredictor/Reshape':
addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def) addConstNode('SecondStageBoxPredictor/Reshape/shape2', [1, -1, 4], graph_def)
...@@ -159,11 +164,15 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): ...@@ -159,11 +164,15 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape', if graph_def.node[i].name in ['SecondStageBoxPredictor/Flatten/flatten/Shape',
'SecondStageBoxPredictor/Flatten/flatten/strided_slice', 'SecondStageBoxPredictor/Flatten/flatten/strided_slice',
'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape']: 'SecondStageBoxPredictor/Flatten/flatten/Reshape/shape',
'SecondStageBoxPredictor/Flatten_1/flatten/Shape',
'SecondStageBoxPredictor/Flatten_1/flatten/strided_slice',
'SecondStageBoxPredictor/Flatten_1/flatten/Reshape/shape']:
del graph_def.node[i] del graph_def.node[i]
for node in graph_def.node: for node in graph_def.node:
if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape': if node.name == 'SecondStageBoxPredictor/Flatten/flatten/Reshape' or \
node.name == 'SecondStageBoxPredictor/Flatten_1/flatten/Reshape':
node.op = 'Flatten' node.op = 'Flatten'
node.input.pop() node.input.pop()
...@@ -171,6 +180,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): ...@@ -171,6 +180,11 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath):
'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']: 'SecondStageBoxPredictor/BoxEncodingPredictor/MatMul']:
node.addAttr('loc_pred_transposed', True) node.addAttr('loc_pred_transposed', True)
if node.name.startswith('MaxPool2D'):
assert(node.op == 'MaxPool')
assert(cropAndResizeNodeName)
node.input = [cropAndResizeNodeName]
################################################################################ ################################################################################
### Postprocessing ### Postprocessing
################################################################################ ################################################################################
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment