obj_detect.cpp 4.82 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#include <fstream>
#include <iostream>
#include <cstdlib>

#include <opencv2/core_detect.hpp>

using namespace cv;
using namespace std;
using namespace cv::dnn;
using namespace cv::dnn_objdetect;

int main(int argc, char **argv)
{
    if (argc < 4)
    {
        std::cerr << "Usage " << argv[0] << ": "
                  << "<model-definition-file> "
                  << "<model-weights-file> "
                  << "<test-image> "
                  << "<threshold>(optional)\n";
        return -1;
    }

    std::string model_prototxt = argv[1];
    std::string model_binary = argv[2];
    std::string test_input_image = argv[3];
    double threshold = 0.7;

    if (argc == 5)
    {
      threshold = atof(argv[4]);
      if (threshold > 1.0 || threshold < 0.0)
      {
        std::cerr << "Threshold should belong to [0, 1]\n";
        return -1;
      }
    }

    // Load the network
    std::cout << "Loading the network...\n";
    Net net = dnn::readNetFromCaffe(model_prototxt, model_binary);
    if (net.empty())
    {
       std::cerr << "Couldn't load the model !\n";
       return -2;
    }
    else
    {
      std::cout << "Done loading the network !\n\n";
    }

    // Load the test image
    Mat img = cv::imread(test_input_image);
    Mat original_img(img);
    if (img.empty())
    {
        std::cerr << "Couldn't load image: " << test_input_image << "\n";
        return -3;
    }

    cv::namedWindow("Initial Image", WINDOW_AUTOSIZE);
    cv::imshow("Initial Image", img);

    cv::resize(img, img, cv::Size(416, 416));
    Mat img_copy(img);
    img.convertTo(img, CV_32FC3);
    Mat input_blob = blobFromImage(img, 1.0, Size(), cv::Scalar(104, 117, 123), false);

    // Set the input blob

    // Set the output layers
    std::cout << "Getting the output of all the three blobs...\n";
    std::vector<Mat> outblobs(3);
    std::vector<cv::String> out_layers;
    out_layers.push_back("slice");
    out_layers.push_back("softmax");
    out_layers.push_back("sigmoid");

    // Bbox delta blob
    std::vector<Mat> temp_blob;
    net.setInput(input_blob);
    cv::TickMeter t;

    t.start();
    net.forward(temp_blob, out_layers[0]);
    t.stop();
    outblobs[0] = temp_blob[2];

    // class_scores blob
    net.setInput(input_blob);
    t.start();
    outblobs[1] = net.forward(out_layers[1]);
    t.stop();

    // conf_scores blob
    net.setInput(input_blob);
    t.start();
    outblobs[2] = net.forward(out_layers[2]);
    t.stop();

    // Check that the blobs are valid
    for (size_t i = 0; i < outblobs.size(); ++i)
    {
        if (outblobs[i].empty())
        {
          std::cerr << "Blob: " << i << " is empty !\n";
        }
    }

    int delta_bbox_size[3] = {23, 23, 36};
    Mat delta_bbox(3, delta_bbox_size, CV_32F, outblobs[0].ptr<float>());

    int class_scores_size[2] = {4761, 20};
    Mat class_scores(2, class_scores_size, CV_32F, outblobs[1].ptr<float>());

    int conf_scores_size[3] = {23, 23, 9};
    Mat conf_scores(3, conf_scores_size, CV_32F, outblobs[2].ptr<float>());

    InferBbox inf(delta_bbox, class_scores, conf_scores);
    inf.filter(threshold);


    double average_time = t.getTimeSec() / t.getCounter();
    std::cout << "\nTotal objects detected: " << inf.detections.size()
              << " in " << average_time << " seconds\n";
    std::cout << "------\n";
    float x_ratio = (float)original_img.cols / img_copy.cols;
    float y_ratio = (float)original_img.rows / img_copy.rows;
    for (size_t i = 0; i < inf.detections.size(); ++i)
    {

      int xmin = inf.detections[i].xmin;
      int ymin = inf.detections[i].ymin;
      int xmax = inf.detections[i].xmax;
      int ymax = inf.detections[i].ymax;
      cv::String class_name = inf.detections[i].label_name;
      std::cout << "Class: " << class_name << "\n"
                << "Probability: " << inf.detections[i].class_prob << "\n"
                << "Co-ordinates: " << inf.detections[i].xmin << " "
                << inf.detections[i].ymin << " "
                << inf.detections[i].xmax << " "
                << inf.detections[i].ymax << "\n";
      std::cout << "------\n";
      // Draw the corresponding bounding box(s)
      cv::rectangle(original_img, cv::Point((int)(xmin * x_ratio), (int)(ymin * y_ratio)),
          cv::Point((int)(xmax * x_ratio), (int)(ymax * y_ratio)), cv::Scalar(255, 0, 0), 2);
      cv::putText(original_img, class_name, cv::Point((int)(xmin * x_ratio), (int)(ymin * y_ratio)),
        cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(255, 0, 0), 1);
    }

    try
    {
      cv::namedWindow("Final Detections", WINDOW_AUTOSIZE);
      cv::imshow("Final Detections", original_img);
      cv::imwrite("image.png", original_img);
      cv::waitKey(0);
    }
    catch (const char* msg)
    {
      std::cerr << msg << "\n";
      return -4;
    }

    return 0;
}