Unite deep learning image classification samples

8e4fe30d · Dmitry Kurtaev · e8d94ea8 · e8d94ea8 · e8d94ea8 · e8d94ea8
Commit 8e4fe30d authored Mar 03, 2018 by Dmitry Kurtaev
21 changed files
--- a/samples/data/dnn/.gitignore
+++ b/samples/data/dnn/.gitignore
-*.caffemodel
--- a/samples/data/dnn/MobileNetSSD_300x300.prototxt
+++ b/samples/data/dnn/MobileNetSSD_300x300.prototxt
--- a/samples/data/dnn/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
+++ b/samples/data/dnn/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
--- a/samples/data/dnn/bvlc_googlenet.prototxt
+++ b/samples/data/dnn/bvlc_googlenet.prototxt
--- a/samples/data/dnn/enet-classes.txt
+++ b/samples/data/dnn/enet-classes.txt
-Unlabeled    0   0   0
-Road         128  64 128
-Sidewalk     244  35 232
-Building     70  70  70
-Wall         102 102 156
-Fence        190 153 153
-Pole         153 153 153
-TrafficLight 250 170  30
-TrafficSign  220 220   0
-Vegetation   107 142  35
-Terrain      152 251 152
-Sky          70 130 180
-Person       220  20  60
-Rider        255   0   0
-Car          0   0 142
-Truck        0   0  70
-Bus          0  60 100
-Train        0  80 100
-Motorcycle   0   0 230
-Bicycle      119  11  32
\ No newline at end of file
--- a/samples/data/dnn/fcn32s-heavy-pascal.prototxt
+++ b/samples/data/dnn/fcn32s-heavy-pascal.prototxt
--- a/samples/data/dnn/fcn8s-heavy-pascal.prototxt
+++ b/samples/data/dnn/fcn8s-heavy-pascal.prototxt
--- a/samples/data/dnn/pascal-classes.txt
+++ b/samples/data/dnn/pascal-classes.txt
-background 0 0 0
-aeroplane 128 0 0
-bicycle 0 128 0
-bird 128 128 0
-boat 0 0 128
-bottle 128 0 128
-bus 0 128 128
-car 128 128 128
-cat 64 0 0
-chair 192 0 0
-cow 64 128 0
-diningtable 192 128 0
-dog 64 0 128
-horse 192 0 128
-motorbike 64 128 128
-person 192 128 128
-pottedplant 0 64 0
-sheep 128 64 0
-sofa 0 192 0
-train 128 192 0
-tvmonitor 0 64 128
--- a/samples/data/dnn/rgb.jpg
+++ b/samples/data/dnn/rgb.jpg
--- a/samples/data/dnn/space_shuttle.jpg
+++ b/samples/data/dnn/space_shuttle.jpg
--- a/samples/data/dnn/synset_words.txt
+++ b/samples/data/dnn/synset_words.txt
--- a/samples/dnn/README.md
+++ b/samples/dnn/README.md
@@ -14,6 +14,14 @@
 | [Faster-RCNN](https://github.com/rbgirshick/py-faster-rcnn) | `1.0` | `800x600` | `102.9801, 115.9465, 122.7717` | BGR |
 | [R-FCN](https://github.com/YuwenXiong/py-R-FCN) | `1.0` | `800x600` | `102.9801 115.9465 122.7717` | BGR |

+
+### Classification
+|    Model | Scale |   Size WxH|   Mean subtraction | Channels order |
+|---------------|-------|-----------|--------------------|-------|
+| GoogLeNet | `1.0` | `224x224` | `104 117 123` | BGR |
+| [SqueezeNet](https://github.com/DeepScale/SqueezeNet) | `1.0` | `227x227` | `0 0 0` | BGR |
+
+
 ## References
 * [Models downloading script](https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py)
 * [Configuration files adopted for OpenCV](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn)

--- a/samples/dnn/caffe_googlenet.cpp
+++ b/samples/dnn/caffe_googlenet.cpp
-/**M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-#include <opencv2/dnn.hpp>
-#include <opencv2/imgproc.hpp>
-#include <opencv2/highgui.hpp>
-#include <opencv2/core/utils/trace.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
-#include <fstream>
-#include <iostream>
-#include <cstdlib>
-using namespace std;
-
-/* Find best class for the blob (i. e. class with maximal probability) */
-static void getMaxClass(const Mat &probBlob, int *classId, double *classProb)
-{
-    Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
-    Point classNumber;
-
-    minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
-    *classId = classNumber.x;
-}
-
-static std::vector<String> readClassNames(const char *filename )
-{
-    std::vector<String> classNames;
-
-    std::ifstream fp(filename);
-    if (!fp.is_open())
-    {
-        std::cerr << "File with classes labels not found: " << filename << std::endl;
-        exit(-1);
-    }
-
-    std::string name;
-    while (!fp.eof())
-    {
-        std::getline(fp, name);
-        if (name.length())
-            classNames.push_back( name.substr(name.find(' ')+1) );
-    }
-
-    fp.close();
-    return classNames;
-}
-
-const char* params
-    = "{ help           | false | Sample app for loading googlenet model }"
-      "{ proto          | bvlc_googlenet.prototxt | model configuration }"
-      "{ model          | bvlc_googlenet.caffemodel | model weights }"
-      "{ label          | synset_words.txt | names of ILSVRC2012 classes }"
-      "{ image          | space_shuttle.jpg | path to image file }"
-      "{ opencl         | false | enable OpenCL }"
-;
-
-int main(int argc, char **argv)
-{
-    CV_TRACE_FUNCTION();
-
-    CommandLineParser parser(argc, argv, params);
-
-    if (parser.get<bool>("help"))
-    {
-        parser.printMessage();
-        return 0;
-    }
-
-    String modelTxt = parser.get<string>("proto");
-    String modelBin = parser.get<string>("model");
-    String imageFile = parser.get<String>("image");
-    String classNameFile = parser.get<String>("label");
-
-    Net net;
-    try {
-        //! [Read and initialize network]
-        net = dnn::readNetFromCaffe(modelTxt, modelBin);
-        //! [Read and initialize network]
-    }
-    catch (const cv::Exception& e) {
-        std::cerr << "Exception: " << e.what() << std::endl;
-        //! [Check that network was read successfully]
-        if (net.empty())
-        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelTxt << std::endl;
-            std::cerr << "caffemodel: " << modelBin << std::endl;
-            std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
-        }
-        //! [Check that network was read successfully]
-    }
-
-    if (parser.get<bool>("opencl"))
-    {
-        net.setPreferableTarget(DNN_TARGET_OPENCL);
-    }
-
-    //! [Prepare blob]
-    Mat img = imread(imageFile);
-    if (img.empty())
-    {
-        std::cerr << "Can't read image from the file: " << imageFile << std::endl;
-        exit(-1);
-    }
-
-    //GoogLeNet accepts only 224x224 BGR-images
-    Mat inputBlob = blobFromImage(img, 1.0f, Size(224, 224),
-                                  Scalar(104, 117, 123), false);   //Convert Mat to batch of images
-    //! [Prepare blob]
-    net.setInput(inputBlob, "data");        //set the network input
-    Mat prob = net.forward("prob");         //compute output
-
-    cv::TickMeter t;
-    for (int i = 0; i < 10; i++)
-    {
-        CV_TRACE_REGION("forward");
-        //! [Set input blob]
-        net.setInput(inputBlob, "data");        //set the network input
-        //! [Set input blob]
-        t.start();
-        //! [Make forward pass]
-        prob = net.forward("prob");                          //compute output
-        //! [Make forward pass]
-        t.stop();
-    }
-
-    //! [Gather output]
-    int classId;
-    double classProb;
-    getMaxClass(prob, &classId, &classProb);//find the best class
-    //! [Gather output]
-
-    //! [Print results]
-    std::vector<String> classNames = readClassNames(classNameFile.c_str());
-    std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl;
-    std::cout << "Probability: " << classProb * 100 << "%" << std::endl;
-    //! [Print results]
-    std::cout << "Time: " << (double)t.getTimeMilli() / t.getCounter() << " ms (average from " << t.getCounter() << " iterations)" << std::endl;
-
-    return 0;
-} //main
--- a/samples/dnn/classification.cpp
+++ b/samples/dnn/classification.cpp
--- a/samples/dnn/classification.py
+++ b/samples/dnn/classification.py
+import cv2 as cv
+import argparse
+import numpy as np
+import sys
+
+backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL)
+
+parser = argparse.ArgumentParser(description='Use this script to run classification deep learning networks using OpenCV.')
+parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.')
+parser.add_argument('--model', required=True,
+                    help='Path to a binary file of model contains trained weights. '
+                         'It could be a file with extensions .caffemodel (Caffe), '
+                         '.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)')
+parser.add_argument('--config',
+                    help='Path to a text file of model contains network configuration. '
+                         'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)')
+parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'],
+                    help='Optional name of an origin framework of the model. '
+                         'Detect it automatically if it does not set.')
+parser.add_argument('--classes', help='Optional path to a text file with names of classes.')
+parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0],
+                    help='Preprocess input image by subtracting mean values. '
+                         'Mean values should be in BGR order.')
+parser.add_argument('--scale', type=float, default=1.0,
+                    help='Preprocess input image by multiplying on a scale factor.')
+parser.add_argument('--width', type=int, required=True,
+                    help='Preprocess input image by resizing to a specific width.')
+parser.add_argument('--height', type=int, required=True,
+                    help='Preprocess input image by resizing to a specific height.')
+parser.add_argument('--rgb', action='store_true',
+                    help='Indicate that model works with RGB input images instead BGR ones.')
+parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
+                    help="Choose one of computation backends: "
+                         "%d: default C++ backend, "
+                         "%d: Halide language (http://halide-lang.org/), "
+                         "%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends)
+parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
+                    help='Choose one of target computation devices: '
+                         '%d: CPU target (by default), '
+                         '%d: OpenCL' % targets)
+args = parser.parse_args()
+
+# Load names of classes
+classes = None
+if args.classes:
+    with open(args.classes, 'rt') as f:
+        classes = f.read().rstrip('\n').split('\n')
+
+# Load a network
+modelExt = args.model[args.model.rfind('.'):]
+if args.framework == 'caffe' or modelExt == '.caffemodel':
+    net = cv.dnn.readNetFromCaffe(args.config, args.model)
+elif args.framework == 'tensorflow' or modelExt == '.pb':
+    net = cv.dnn.readNetFromTensorflow(args.model, args.config)
+elif args.framework == 'torch' or modelExt in ['.t7', '.net']:
+    net = cv.dnn.readNetFromTorch(args.model)
+elif args.framework == 'darknet' or modelExt == '.weights':
+    net = cv.dnn.readNetFromDarknet(args.config, args.model)
+else:
+    print('Cannot determine an origin framework of model from file %s' % args.model)
+    sys.exit(0)
+
+net.setPreferableBackend(args.backend)
+net.setPreferableTarget(args.target)
+
+winName = 'Deep learning image classification in OpenCV'
+cv.namedWindow(winName, cv.WINDOW_NORMAL)
+
+cap = cv.VideoCapture(args.input if args.input else 0)
+while cv.waitKey(1) < 0:
+    hasFrame, frame = cap.read()
+    if not hasFrame:
+        cv.waitKey()
+        break
+
+    # Create a 4D blob from a frame.
+    blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False)
+
+    # Run a model
+    net.setInput(blob)
+    out = net.forward()
+
+    # Get a class with a highest score.
+    out = out.flatten()
+    classId = np.argmax(out)
+    confidence = out[classId]
+
+    # Put efficiency information.
+    t, _ = net.getPerfProfile()
+    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
+    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+
+    # Print predicted class.
+    label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
+    cv.putText(frame, label, (0, 40), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
+
+    cv.imshow(winName, frame)
--- a/samples/dnn/classification_classes_ILSVRC2012.txt
+++ b/samples/dnn/classification_classes_ILSVRC2012.txt
--- a/samples/dnn/googlenet_python.py
+++ b/samples/dnn/googlenet_python.py
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
--- a/samples/dnn/squeezenet_halide.cpp
+++ b/samples/dnn/squeezenet_halide.cpp
--- a/samples/dnn/tf_inception.cpp
+++ b/samples/dnn/tf_inception.cpp