Merge pull request #1214 from arrybn:ssd_test

6f4179d4 · Vadim Pisarevsky · 0b845df0 · 692ba7ba · 6f4179d4 · 6f4179d4
Commit 6f4179d4 authored Jun 14, 2017 by Vadim Pisarevsky
10 changed files
--- a/modules/dnn/misc/caffe/caffe.pb.cc
+++ b/modules/dnn/misc/caffe/caffe.pb.cc
--- a/modules/dnn/misc/caffe/caffe.pb.h
+++ b/modules/dnn/misc/caffe/caffe.pb.h
--- a/modules/dnn/samples/ssd_object_detection.cpp
+++ b/modules/dnn/samples/ssd_object_detection.cpp
 #include <opencv2/dnn.hpp>
+#include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
 using namespace cv;
@@ -30,7 +31,7 @@ Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
 Mat preprocess(const Mat& frame)
 {
    Mat preprocessed;
-    frame.convertTo(preprocessed, CV_32FC3);
+    frame.convertTo(preprocessed, CV_32F);
    resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
    Mat mean = getMean(width, height);
@@ -98,6 +99,8 @@ int main(int argc, char** argv)
    cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
+    if (frame.channels() == 4)
+        cvtColor(frame, frame, COLOR_BGRA2BGR);
    //! [Prepare blob]
    Mat preprocessedFrame = preprocess(frame);

--- a/modules/dnn/src/caffe/caffe.proto
+++ b/modules/dnn/src/caffe/caffe.proto
@@ -115,6 +115,21 @@ message PriorBoxParameter {
  optional bool clip = 5 [default = true];
  // Variance for adjusting the prior bboxes.
  repeated float variance = 6;
+  // By default, we calculate img_height, img_width, step_x, step_y based on
+  // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
+  // provided.
+  // Explicitly provide the img_size.
+  optional uint32 img_size = 7;
+  // Either img_size or img_h/img_w should be specified; not both.
+  optional uint32 img_h = 8;
+  optional uint32 img_w = 9;
+  // Explicitly provide the step size.
+  optional float step = 10;
+  // Either step or step_h/step_w should be specified; not both.
+  optional float step_h = 11;
+  optional float step_w = 12;
+  // Offset to the top left corner of each cell.
+  optional float offset = 13 [default = 0.5];
 }
 // Message that store parameters used by DetectionOutputLayer
@@ -126,6 +141,10 @@ message DetectionOutputParameter {
  // Background label id. If there is no background class,
  // set it as -1.
  optional int32 background_label_id = 3 [default = 0];
+  // Parameters used for non maximum suppression.
+  optional NonMaximumSuppressionParameter nms_param = 4;
+  // Parameters used for saving detection results.
+  optional SaveOutputParameter save_output_param = 5;
  // Type of coding method for bbox.
  optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
  // If true, variance is encoded in target; otherwise we need to adjust the
@@ -137,11 +156,6 @@ message DetectionOutputParameter {
  // Only consider detections whose confidences are larger than a threshold.
  // If not provided, consider all boxes.
  optional float confidence_threshold = 9;
-  // Parameters used for non maximum suppression.
-  // Threshold to be used in nms.
-  optional float nms_threshold = 10 [default = 0.3];
-  // Maximum number of results to be kept.
-  optional int32 top_k = 11;
 }
 message Datum {
@@ -503,7 +517,7 @@ message LayerParameter {
  optional LRNParameter lrn_param = 118;
  optional MemoryDataParameter memory_data_param = 119;
  optional MVNParameter mvn_param = 120;
-  optional NormalizeBBoxParameter normalize_bbox_param = 149;
+  optional NormalizeBBoxParameter norm_param = 149;
  optional PermuteParameter permute_param = 148;
  optional ParameterParameter parameter_param = 145;
  optional PoolingParameter pooling_param = 121;
@@ -781,6 +795,39 @@ message DataParameter {
  optional uint32 prefetch = 10 [default = 4];
 }
+message NonMaximumSuppressionParameter {
+  // Threshold to be used in nms.
+  optional float nms_threshold = 1 [default = 0.3];
+  // Maximum number of results to be kept.
+  optional int32 top_k = 2;
+  // Parameter for adaptive nms.
+  optional float eta = 3 [default = 1.0];
+}
+message SaveOutputParameter {
+  // Output directory. If not empty, we will save the results.
+  optional string output_directory = 1;
+  // Output name prefix.
+  optional string output_name_prefix = 2;
+  // Output format.
+  //    VOC - PASCAL VOC output format.
+  //    COCO - MS COCO output format.
+  optional string output_format = 3;
+  // If you want to output results, must also provide the following two files.
+  // Otherwise, we will ignore saving results.
+  // label map file.
+  optional string label_map_file = 4;
+  // A file which contains a list of names and sizes with same order
+  // of the input DB. The file is in the following format:
+  //    name height width
+  //    ...
+  optional string name_size_file = 5;
+  // Number of test images. It can be less than the lines specified in
+  // name_size_file. For example, when we only want to evaluate on part
+  // of the test images.
+  optional uint32 num_test_image = 6;
+}
 message DropoutParameter {
  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
 }

--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -95,6 +95,7 @@ void initModule()
    REG_RUNTIME_LAYER_CLASS(PriorBox,       PriorBoxLayer);
    REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer);
    REG_RUNTIME_LAYER_CLASS(NormalizeBBox,  NormalizeBBoxLayer);
+    REG_RUNTIME_LAYER_CLASS(Normalize,      NormalizeBBoxLayer);
    REG_RUNTIME_LAYER_CLASS(Shift,          ShiftLayer);
    REG_RUNTIME_LAYER_CLASS(Padding,        PaddingLayer);
    REG_RUNTIME_LAYER_CLASS(Scale,          ScaleLayer);

--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
--- a/modules/dnn/src/layers/flatten_layer.cpp
+++ b/modules/dnn/src/layers/flatten_layer.cpp
@@ -84,7 +84,7 @@ public:
        CV_Assert(startAxis >= 0);
        CV_Assert(endAxis >= startAxis && endAxis < (int)numAxes);
-        size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis);
+        size_t flattenedDimensionSize = total(inputs[0], startAxis, endAxis + 1);
        MatShape outputShapeVec;
        for (int i = 0; i < startAxis; i++)

--- a/modules/dnn/src/layers/permute_layer.cpp
+++ b/modules/dnn/src/layers/permute_layer.cpp
@@ -124,7 +124,7 @@ public:
        MatShape shapeBefore = inputs[0], shapeAfter;
        for (size_t i = 0; i < _numAxes; i++)
        {
-            shapeAfter[i] = shapeBefore[_order[i]];
+            shapeAfter.push_back(shapeBefore[_order[i]]);
        }
        outputs.clear();
@@ -132,6 +132,7 @@ public:
        for (size_t i = 0; i < inputs.size(); i++)
        {
            CV_Assert(inputs[i][2] == shapeBefore[2] && inputs[i][3] == shapeBefore[3]);
+            CV_Assert(total(inputs[i]) == total(shapeAfter));
            outputs.push_back(shapeAfter);
        }
@@ -192,11 +193,11 @@ public:
                CV_Assert(inp.dims == numAxes && inp.size == inputs[0]->size);
                CV_Assert(out.dims == numAxes && out.size == outputs[0].size);
-                for( i = 0; i < numAxes; i++ )
+//                for( i = 0; i < numAxes; i++ )
-                {
+//                {
-                    CV_Assert(inp.size[i] == _oldDimensionSize[i]);
+//                    CV_Assert(inp.size[i] == _oldDimensionSize[i]);
-                    CV_Assert(out.size[i] == _newDimensionSize[i]);
+//                    CV_Assert(out.size[i] == _newDimensionSize[i]);
-                }
+//                }
                CV_Assert(inp.isContinuous() && out.isContinuous());
                CV_Assert(inp.type() == CV_32F && out.type() == CV_32F);

--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -183,6 +183,22 @@ public:
            _numPriors += 1;
        }
+        if (params.has("step_h") || params.has("step_w")) {
+          CV_Assert(!params.has("step"));
+          _stepY = getParameter<float>(params, "step_h");
+          CV_Assert(_stepY > 0.);
+          _stepX = getParameter<float>(params, "step_w");
+          CV_Assert(_stepX > 0.);
+        } else if (params.has("step")) {
+          const float step = getParameter<float>(params, "step");
+          CV_Assert(step > 0);
+          _stepY = step;
+          _stepX = step;
+        } else {
+          _stepY = 0;
+          _stepX = 0;
+        }
    }
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -216,8 +232,14 @@ public:
        int _imageWidth = inputs[1]->size[3];
        int _imageHeight = inputs[1]->size[2];
-        float _stepX = static_cast<float>(_imageWidth) / _layerWidth;
+        float stepX, stepY;
-        float _stepY = static_cast<float>(_imageHeight) / _layerHeight;
+        if (_stepX == 0 || _stepY == 0) {
+          stepX = static_cast<float>(_imageWidth) / _layerWidth;
+          stepY = static_cast<float>(_imageHeight) / _layerHeight;
+        } else {
+          stepX = _stepX;
+          stepY = _stepY;
+        }
        int _outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
@@ -231,8 +253,8 @@ public:
            {
                _boxWidth = _boxHeight = _minSize;
-                float center_x = (w + 0.5) * _stepX;
+                float center_x = (w + 0.5) * stepX;
-                float center_y = (h + 0.5) * _stepY;
+                float center_y = (h + 0.5) * stepY;
                // xmin
                outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                // ymin
@@ -332,6 +354,8 @@ public:
    float _boxWidth;
    float _boxHeight;
+    float _stepX, _stepY;
    std::vector<float> _aspectRatios;
    std::vector<float> _variance;

--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -134,4 +134,32 @@ TEST(Reproducibility_FCN, Accuracy)
 }
 #endif
+TEST(Reproducibility_SSD, Accuracy)
+{
+    Net net;
+    {
+        const string proto = findDataFile("dnn/ssd_vgg16.prototxt", false);
+        const string model = findDataFile("dnn/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", false);
+        Ptr<Importer> importer = createCaffeImporter(proto, model);
+        ASSERT_TRUE(importer != NULL);
+        importer->populateNet(net);
+    }
+    Mat sample = imread(_tf("street.png"));
+    ASSERT_TRUE(!sample.empty());
+    if (sample.channels() == 4)
+        cvtColor(sample, sample, COLOR_BGRA2BGR);
+    sample.convertTo(sample, CV_32F);
+    resize(sample, sample, Size(300, 300));
+    Mat in_blob = blobFromImage(sample);
+    net.setBlob(".data", in_blob);
+    net.forward();
+    Mat out = net.getBlob("detection_out");
+    Mat ref = blobFromNPY(_tf("ssd_out.npy"));
+    normAssert(ref, out);
+}
 }