Merge pull request #13670 from allnes:dnn_fix_obj_detect_sample

f46cd9db · Alexander Alekhin · 37a5af36 · cca2c4b3 · f46cd9db · f46cd9db
Commit f46cd9db authored Feb 05, 2019 by Alexander Alekhin
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 57 deletions

object_detection.cpp samples/dnn/object_detection.cpp +27 -39

object_detection.py samples/dnn/object_detection.py +8 -18

No files found.
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@@ -153,51 +153,39 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
    std::vector<int> classIds;
    std::vector<float> confidences;
    std::vector<Rect> boxes;
-    if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
+    if (outLayerType == "DetectionOutput")
    {
        // Network produces output blob with a shape 1x1xNx7 where N is a number of
        // detections and an every detection is a vector of values
        // [batchId, classId, confidence, left, top, right, bottom]
-        CV_Assert(outs.size() == 1);
+        CV_Assert(outs.size() > 0);
-        float* data = (float*)outs[0].data;
+        for (size_t k = 0; k < outs.size(); k++)
-        for (size_t i = 0; i < outs[0].total(); i += 7)
        {
-            float confidence = data[i + 2];
+            float* data = (float*)outs[k].data;
-            if (confidence > confThreshold)
+            for (size_t i = 0; i < outs[k].total(); i += 7)
            {
-                int left = (int)data[i + 3];
+                float confidence = data[i + 2];
-                int top = (int)data[i + 4];
+                if (confidence > confThreshold)
-                int right = (int)data[i + 5];
+                {
-                int bottom = (int)data[i + 6];
+                    int left   = (int)data[i + 3];
-                int width = right - left + 1;
+                    int top    = (int)data[i + 4];
-                int height = bottom - top + 1;
+                    int right  = (int)data[i + 5];
-                classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
+                    int bottom = (int)data[i + 6];
-                boxes.push_back(Rect(left, top, width, height));
+                    int width  = right - left + 1;
-                confidences.push_back(confidence);
+                    int height = bottom - top + 1;
-            }
+                    if (width * height <= 1)
-        }
+                    {
-    }
+                        left   = (int)(data[i + 3] * frame.cols);
-    else if (outLayerType == "DetectionOutput")
+                        top    = (int)(data[i + 4] * frame.rows);
-    {
+                        right  = (int)(data[i + 5] * frame.cols);
-        // Network produces output blob with a shape 1x1xNx7 where N is a number of
+                        bottom = (int)(data[i + 6] * frame.rows);
-        // detections and an every detection is a vector of values
+                        width  = right - left + 1;
-        // [batchId, classId, confidence, left, top, right, bottom]
+                        height = bottom - top + 1;
-        CV_Assert(outs.size() == 1);
+                    }
-        float* data = (float*)outs[0].data;
+                    classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
-        for (size_t i = 0; i < outs[0].total(); i += 7)
+                    boxes.push_back(Rect(left, top, width, height));
-        {
+                    confidences.push_back(confidence);
-            float confidence = data[i + 2];
+                }
-            if (confidence > confThreshold)
-            {
-                int left = (int)(data[i + 3] * frame.cols);
-                int top = (int)(data[i + 4] * frame.rows);
-                int right = (int)(data[i + 5] * frame.cols);
-                int bottom = (int)(data[i + 6] * frame.rows);
-                int width = right - left + 1;
-                int height = bottom - top + 1;
-                classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
-                boxes.push_back(Rect(left, top, width, height));
-                confidences.push_back(confidence);
            }
        }
    }

--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@@ -102,7 +102,7 @@ def postprocess(frame, outs):
    classIds = []
    confidences = []
    boxes = []
-    if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
+    if lastLayer.type == 'DetectionOutput':
        # Network produces output blob with a shape 1x1xNx7 where N is a number of
        # detections and an every detection is a vector of values
        # [batchId, classId, confidence, left, top, right, bottom]
@@ -116,23 +116,13 @@ def postprocess(frame, outs):
                    bottom = int(detection[6])
                    width = right - left + 1
                    height = bottom - top + 1
-                    classIds.append(int(detection[1]) - 1)  # Skip background label
+                    if width * height <= 1:
-                    confidences.append(float(confidence))
+                        left = int(detection[3] * frameWidth)
-                    boxes.append([left, top, width, height])
+                        top = int(detection[4] * frameHeight)
-    elif lastLayer.type == 'DetectionOutput':
+                        right = int(detection[5] * frameWidth)
-        # Network produces output blob with a shape 1x1xNx7 where N is a number of
+                        bottom = int(detection[6] * frameHeight)
-        # detections and an every detection is a vector of values
+                        width = right - left + 1
-        # [batchId, classId, confidence, left, top, right, bottom]
+                        height = bottom - top + 1
-        for out in outs:
-            for detection in out[0, 0]:
-                confidence = detection[2]
-                if confidence > confThreshold:
-                    left = int(detection[3] * frameWidth)
-                    top = int(detection[4] * frameHeight)
-                    right = int(detection[5] * frameWidth)
-                    bottom = int(detection[6] * frameHeight)
-                    width = right - left + 1
-                    height = bottom - top + 1
                    classIds.append(int(detection[1]) - 1)  # Skip background label
                    confidences.append(float(confidence))
                    boxes.append([left, top, width, height])