Commit 6c196d30 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #11852 from dkurt:dnn_dldt_ir_outs

parents e4b51fa8 346871e2
...@@ -1993,11 +1993,17 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin) ...@@ -1993,11 +1993,17 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet)); backendNode->net = Ptr<InfEngineBackendNet>(new InfEngineBackendNet(ieNet));
for (auto& it : ieNet.getOutputsInfo()) for (auto& it : ieNet.getOutputsInfo())
{ {
Ptr<Layer> cvLayer(new InfEngineBackendLayer(it.second));
InferenceEngine::CNNLayerPtr ieLayer = ieNet.getLayerByName(it.first.c_str());
CV_Assert(ieLayer);
LayerParams lp; LayerParams lp;
int lid = cvNet.addLayer(it.first, "", lp); int lid = cvNet.addLayer(it.first, "", lp);
LayerData& ld = cvNet.impl->layers[lid]; LayerData& ld = cvNet.impl->layers[lid];
ld.layerInstance = Ptr<Layer>(new InfEngineBackendLayer(it.second)); cvLayer->name = it.first;
cvLayer->type = ieLayer->type;
ld.layerInstance = cvLayer;
ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode; ld.backendNodes[DNN_BACKEND_INFERENCE_ENGINE] = backendNode;
for (int i = 0; i < inputsNames.size(); ++i) for (int i = 0; i < inputsNames.size(); ++i)
......
...@@ -925,6 +925,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy) ...@@ -925,6 +925,10 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
Mat out = net.forward(); Mat out = net.forward();
normAssert(outDefault, out); normAssert(outDefault, out);
std::vector<int> outLayers = net.getUnconnectedOutLayers();
ASSERT_EQ(net.getLayer(outLayers[0])->name, "output_merge");
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
} }
// 1. Create a .prototxt file with the following network: // 1. Create a .prototxt file with the following network:
......
...@@ -22,6 +22,7 @@ const char* keys = ...@@ -22,6 +22,7 @@ const char* keys =
"{ height | -1 | Preprocess input image by resizing to a specific height. }" "{ height | -1 | Preprocess input image by resizing to a specific height. }"
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" "{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
"{ thr | .5 | Confidence threshold. }" "{ thr | .5 | Confidence threshold. }"
"{ thr | .4 | Non-maximum suppression threshold. }"
"{ backend | 0 | Choose one of computation backends: " "{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), " "0: automatically (by default), "
"1: Halide language (http://halide-lang.org/), " "1: Halide language (http://halide-lang.org/), "
...@@ -37,7 +38,7 @@ const char* keys = ...@@ -37,7 +38,7 @@ const char* keys =
using namespace cv; using namespace cv;
using namespace dnn; using namespace dnn;
float confThreshold; float confThreshold, nmsThreshold;
std::vector<std::string> classes; std::vector<std::string> classes;
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net); void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
...@@ -59,6 +60,7 @@ int main(int argc, char** argv) ...@@ -59,6 +60,7 @@ int main(int argc, char** argv)
} }
confThreshold = parser.get<float>("thr"); confThreshold = parser.get<float>("thr");
nmsThreshold = parser.get<float>("nms");
float scale = parser.get<float>("scale"); float scale = parser.get<float>("scale");
Scalar mean = parser.get<Scalar>("mean"); Scalar mean = parser.get<Scalar>("mean");
bool swapRB = parser.get<bool>("rgb"); bool swapRB = parser.get<bool>("rgb");
...@@ -144,6 +146,9 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) ...@@ -144,6 +146,9 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
static std::vector<int> outLayers = net.getUnconnectedOutLayers(); static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type; static std::string outLayerType = net.getLayer(outLayers[0])->type;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{ {
// Network produces output blob with a shape 1x1xNx7 where N is a number of // Network produces output blob with a shape 1x1xNx7 where N is a number of
...@@ -160,8 +165,11 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) ...@@ -160,8 +165,11 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
int top = (int)data[i + 4]; int top = (int)data[i + 4];
int right = (int)data[i + 5]; int right = (int)data[i + 5];
int bottom = (int)data[i + 6]; int bottom = (int)data[i + 6];
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. int width = right - left + 1;
drawPred(classId, confidence, left, top, right, bottom, frame); int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
} }
} }
} }
...@@ -181,16 +189,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) ...@@ -181,16 +189,16 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
int top = (int)(data[i + 4] * frame.rows); int top = (int)(data[i + 4] * frame.rows);
int right = (int)(data[i + 5] * frame.cols); int right = (int)(data[i + 5] * frame.cols);
int bottom = (int)(data[i + 6] * frame.rows); int bottom = (int)(data[i + 6] * frame.rows);
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id. int width = right - left + 1;
drawPred(classId, confidence, left, top, right, bottom, frame); int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
} }
} }
} }
else if (outLayerType == "Region") else if (outLayerType == "Region")
{ {
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i) for (size_t i = 0; i < outs.size(); ++i)
{ {
// Network produces output blob with a shape NxC where N is a number of // Network produces output blob with a shape NxC where N is a number of
...@@ -218,18 +226,19 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net) ...@@ -218,18 +226,19 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
} }
} }
} }
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, 0.4f, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
} }
else else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
} }
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
......
...@@ -31,6 +31,7 @@ parser.add_argument('--height', type=int, ...@@ -31,6 +31,7 @@ parser.add_argument('--height', type=int,
parser.add_argument('--rgb', action='store_true', parser.add_argument('--rgb', action='store_true',
help='Indicate that model works with RGB input images instead BGR ones.') help='Indicate that model works with RGB input images instead BGR ones.')
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold')
parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold')
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
help="Choose one of computation backends: " help="Choose one of computation backends: "
"%d: automatically (by default), " "%d: automatically (by default), "
...@@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend) ...@@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend)
net.setPreferableTarget(args.target) net.setPreferableTarget(args.target)
confThreshold = args.thr confThreshold = args.thr
nmsThreshold = args.nms
def getOutputsNames(net): def getOutputsNames(net):
layersNames = net.getLayerNames() layersNames = net.getLayerNames()
...@@ -86,36 +88,43 @@ def postprocess(frame, outs): ...@@ -86,36 +88,43 @@ def postprocess(frame, outs):
lastLayerId = net.getLayerId(layerNames[-1]) lastLayerId = net.getLayerId(layerNames[-1])
lastLayer = net.getLayer(lastLayerId) lastLayer = net.getLayer(lastLayerId)
classIds = []
confidences = []
boxes = []
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
# Network produces output blob with a shape 1x1xNx7 where N is a number of # Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values # detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom] # [batchId, classId, confidence, left, top, right, bottom]
assert(len(outs) == 1) for out in outs:
out = outs[0] for detection in out[0, 0]:
for detection in out[0, 0]: confidence = detection[2]
confidence = detection[2] if confidence > confThreshold:
if confidence > confThreshold: left = int(detection[3])
left = int(detection[3]) top = int(detection[4])
top = int(detection[4]) right = int(detection[5])
right = int(detection[5]) bottom = int(detection[6])
bottom = int(detection[6]) width = right - left + 1
classId = int(detection[1]) - 1 # Skip background label height = bottom - top + 1
drawPred(classId, confidence, left, top, right, bottom) classIds.append(int(detection[1]) - 1) # Skip background label
confidences.append(float(confidence))
boxes.append([left, top, width, height])
elif lastLayer.type == 'DetectionOutput': elif lastLayer.type == 'DetectionOutput':
# Network produces output blob with a shape 1x1xNx7 where N is a number of # Network produces output blob with a shape 1x1xNx7 where N is a number of
# detections and an every detection is a vector of values # detections and an every detection is a vector of values
# [batchId, classId, confidence, left, top, right, bottom] # [batchId, classId, confidence, left, top, right, bottom]
assert(len(outs) == 1) for out in outs:
out = outs[0] for detection in out[0, 0]:
for detection in out[0, 0]: confidence = detection[2]
confidence = detection[2] if confidence > confThreshold:
if confidence > confThreshold: left = int(detection[3] * frameWidth)
left = int(detection[3] * frameWidth) top = int(detection[4] * frameHeight)
top = int(detection[4] * frameHeight) right = int(detection[5] * frameWidth)
right = int(detection[5] * frameWidth) bottom = int(detection[6] * frameHeight)
bottom = int(detection[6] * frameHeight) width = right - left + 1
classId = int(detection[1]) - 1 # Skip background label height = bottom - top + 1
drawPred(classId, confidence, left, top, right, bottom) classIds.append(int(detection[1]) - 1) # Skip background label
confidences.append(float(confidence))
boxes.append([left, top, width, height])
elif lastLayer.type == 'Region': elif lastLayer.type == 'Region':
# Network produces output blob with a shape NxC where N is a number of # Network produces output blob with a shape NxC where N is a number of
# detected objects and C is a number of classes + 4 where the first 4 # detected objects and C is a number of classes + 4 where the first 4
...@@ -138,15 +147,19 @@ def postprocess(frame, outs): ...@@ -138,15 +147,19 @@ def postprocess(frame, outs):
classIds.append(classId) classIds.append(classId)
confidences.append(float(confidence)) confidences.append(float(confidence))
boxes.append([left, top, width, height]) boxes.append([left, top, width, height])
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4) else:
for i in indices: print('Unknown output layer type: ' + lastLayer.type)
i = i[0] exit()
box = boxes[i]
left = box[0] indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
top = box[1] for i in indices:
width = box[2] i = i[0]
height = box[3] box = boxes[i]
drawPred(classIds[i], confidences[i], left, top, left + width, top + height) left = box[0]
top = box[1]
width = box[2]
height = box[3]
drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
# Process inputs # Process inputs
winName = 'Deep learning object detection in OpenCV' winName = 'Deep learning object detection in OpenCV'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment