OpenCV face detection network in TensorFlow

eab556e1 · Dmitry Kurtaev · 53305d4a · eab556e1 · eab556e1 · eab556e1
Commit eab556e1 authored Feb 20, 2018 by Dmitry Kurtaev
4 changed files
--- a/modules/dnn/misc/face_detector_accuracy.py
+++ b/modules/dnn/misc/face_detector_accuracy.py
+# This script is used to estimate an accuracy of different face detection models.
+# COCO evaluation tool is used to compute an accuracy metrics (Average Precision).
+# Script works with different face detection datasets.
+import os
+import json
+from fnmatch import fnmatch
+from math import pi
+import cv2 as cv
+import argparse
+import os
+import sys
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+parser = argparse.ArgumentParser(
+        description='Evaluate OpenCV face detection algorithms '
+                    'using COCO evaluation tool, http://cocodataset.org/#detections-eval')
+parser.add_argument('--proto', help='Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph')
+parser.add_argument('--model', help='Path to .caffemodel trained in Caffe or .pb from TensorFlow')
+parser.add_argument('--caffe', help='Indicate that tested model is from Caffe. Otherwise model from TensorFlow is expected.', action='store_true')
+parser.add_argument('--cascade', help='Optional path to trained Haar cascade as '
+                                      'an additional model for evaluation')
+parser.add_argument('--ann', help='Path to text file with ground truth annotations')
+parser.add_argument('--pics', help='Path to images root directory')
+parser.add_argument('--fddb', help='Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/', action='store_true')
+parser.add_argument('--wider', help='Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/', action='store_true')
+args = parser.parse_args()
+dataset = {}
+dataset['images'] = []
+dataset['categories'] = [{ 'id': 0, 'name': 'face' }]
+dataset['annotations'] = []
+def ellipse2Rect(params):
+    rad_x = params[0]
+    rad_y = params[1]
+    angle = params[2] * 180.0 / pi
+    center_x = params[3]
+    center_y = params[4]
+    pts = cv.ellipse2Poly((int(center_x), int(center_y)), (int(rad_x), int(rad_y)),
+                          int(angle), 0, 360, 10)
+    rect = cv.boundingRect(pts)
+    left = rect[0]
+    top = rect[1]
+    right = rect[0] + rect[2]
+    bottom = rect[1] + rect[3]
+    return left, top, right, bottom
+def addImage(imagePath):
+    assert('images' in  dataset)
+    imageId = len(dataset['images'])
+    dataset['images'].append({
+        'id': int(imageId),
+        'file_name': imagePath
+    })
+    return imageId
+def addBBox(imageId, left, top, width, height):
+    assert('annotations' in  dataset)
+    dataset['annotations'].append({
+        'id': len(dataset['annotations']),
+        'image_id': int(imageId),
+        'category_id': 0,  # Face
+        'bbox': [int(left), int(top), int(width), int(height)],
+        'iscrowd': 0,
+        'area': float(width * height)
+    })
+def addDetection(detections, imageId, left, top, width, height, score):
+    detections.append({
+      'image_id': int(imageId),
+      'category_id': 0,  # Face
+      'bbox': [int(left), int(top), int(width), int(height)],
+      'score': float(score)
+    })
+def fddb_dataset(annotations, images):
+    for d in os.listdir(annotations):
+        if fnmatch(d, 'FDDB-fold-*-ellipseList.txt'):
+            with open(os.path.join(annotations, d), 'rt') as f:
+                lines = [line.rstrip('\n') for line in f]
+                lineId = 0
+                while lineId < len(lines):
+                    # Image
+                    imgPath = lines[lineId]
+                    lineId += 1
+                    imageId = addImage(os.path.join(images, imgPath) + '.jpg')
+                    img = cv.imread(os.path.join(images, imgPath) + '.jpg')
+                    # Faces
+                    numFaces = int(lines[lineId])
+                    lineId += 1
+                    for i in range(numFaces):
+                        params = [float(v) for v in lines[lineId].split()]
+                        lineId += 1
+                        left, top, right, bottom = ellipse2Rect(params)
+                        addBBox(imageId, left, top, width=right - left + 1,
+                                height=bottom - top + 1)
+def wider_dataset(annotations, images):
+    with open(annotations, 'rt') as f:
+        lines = [line.rstrip('\n') for line in f]
+        lineId = 0
+        while lineId < len(lines):
+            # Image
+            imgPath = lines[lineId]
+            lineId += 1
+            imageId = addImage(os.path.join(images, imgPath))
+            # Faces
+            numFaces = int(lines[lineId])
+            lineId += 1
+            for i in range(numFaces):
+                params = [int(v) for v in lines[lineId].split()]
+                lineId += 1
+                left, top, width, height = params[0], params[1], params[2], params[3]
+                addBBox(imageId, left, top, width, height)
+def evaluate():
+    cocoGt = COCO('annotations.json')
+    cocoDt = cocoGt.loadRes('detections.json')
+    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    cocoEval.summarize()
+### Convert to COCO annotations format #########################################
+assert(args.fddb or args.wider)
+if args.fddb:
+    fddb_dataset(args.ann, args.pics)
+elif args.wider:
+    wider_dataset(args.ann, args.pics)
+with open('annotations.json', 'wt') as f:
+    json.dump(dataset, f)
+### Obtain detections ##########################################################
+detections = []
+if args.proto and args.model:
+    if args.caffe:
+        net = cv.dnn.readNetFromCaffe(args.proto, args.model)
+    else:
+        net = cv.dnn.readNetFromTensorflow(args.model, args.proto)
+    def detect(img, imageId):
+        imgWidth = img.shape[1]
+        imgHeight = img.shape[0]
+        net.setInput(cv.dnn.blobFromImage(img, 1.0, (300, 300), (104., 177., 123.), False, False))
+        out = net.forward()
+        for i in range(out.shape[2]):
+            confidence = out[0, 0, i, 2]
+            left = int(out[0, 0, i, 3] * img.shape[1])
+            top = int(out[0, 0, i, 4] * img.shape[0])
+            right = int(out[0, 0, i, 5] * img.shape[1])
+            bottom = int(out[0, 0, i, 6] * img.shape[0])
+            addDetection(detections, imageId, left, top, width=right - left + 1,
+                         height=bottom - top + 1, score=confidence)
+elif args.cascade:
+    cascade = cv.CascadeClassifier(args.cascade)
+    def detect(img, imageId):
+        srcImgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
+        faces = cascade.detectMultiScale(srcImgGray)
+        for rect in faces:
+            left, top, width, height = rect[0], rect[1], rect[2], rect[3]
+            addDetection(detections, imageId, left, top, width, height, score=1.0)
+for i in range(len(dataset['images'])):
+    sys.stdout.write('\r%d / %d' % (i + 1, len(dataset['images'])))
+    sys.stdout.flush()
+    img = cv.imread(dataset['images'][i]['file_name'])
+    imageId = int(dataset['images'][i]['id'])
+    detect(img, imageId)
+with open('detections.json', 'wt') as f:
+    json.dump(detections, f)
+evaluate()
+def rm(f):
+    if os.path.exists(f):
+        os.remove(f)
+rm('annotations.json')
+rm('detections.json')
--- a/modules/dnn/misc/quantize_face_detector.py
+++ b/modules/dnn/misc/quantize_face_detector.py
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -651,7 +651,8 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
            tensor->set_dtype(tensorflow::DT_FLOAT);
            tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
-            ExcludeLayer(net, li, 0, false);
+            net.mutable_node(tensorId)->set_name(name);
+            CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
            layers_to_ignore.insert(name);
            continue;
        }
@@ -1477,6 +1478,17 @@ void TFImporter::populateNet(Net dstNet)
            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
        }
+        else if (type == "L2Normalize")
+        {
+            // op: "L2Normalize"
+            // input: "input"
+            CV_Assert(layer.input_size() == 1);
+            layerParams.set("across_spatial", false);
+            layerParams.set("channel_shared", false);
+            int id = dstNet.addLayer(name, "Normalize", layerParams);
+            layer_id[name] = id;
+            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+        }
        else if (type == "PriorBox")
        {
            if (hasLayerAttr(layer, "min_size"))
@@ -1489,6 +1501,8 @@ void TFImporter::populateNet(Net dstNet)
                layerParams.set("clip", getLayerAttr(layer, "clip").b());
            if (hasLayerAttr(layer, "offset"))
                layerParams.set("offset", getLayerAttr(layer, "offset").f());
+            if (hasLayerAttr(layer, "step"))
+                layerParams.set("step", getLayerAttr(layer, "step").f());
            const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
                                              "width", "height"};
@@ -1538,8 +1552,17 @@ void TFImporter::populateNet(Net dstNet)
                connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
            data_layouts[name] = DATA_LAYOUT_UNKNOWN;
        }
+        else if (type == "Softmax")
+        {
+            if (hasLayerAttr(layer, "axis"))
+                layerParams.set("axis", getLayerAttr(layer, "axis").i());
+            int id = dstNet.addLayer(name, "Softmax", layerParams);
+            layer_id[name] = id;
+            connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
+        }
        else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
-                 type == "Relu" || type == "Elu" || type == "Softmax" ||
+                 type == "Relu" || type == "Elu" ||
                 type == "Identity" || type == "Relu6")
        {
            std::string dnnType = type;

--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -353,4 +353,28 @@ TEST(Test_TensorFlow, memory_read)
    runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
 }
+TEST(Test_TensorFlow, opencv_face_detector_uint8)
+{
+    std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
+    std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
+    Net net = readNetFromTensorflow(model, proto);
+    Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
+    Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
+    net.setInput(blob);
+    // Output has shape 1x1xNx7 where N - number of detections.
+    // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
+    Mat out = net.forward();
+    // References are from test for Caffe model.
+    Mat ref = (Mat_<float>(6, 5) << 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
+                                    0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
+                                    0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
+                                    0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
+                                    0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
+                                    0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
+    normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref, "", 2.8e-4, 3.4e-3);
+}
 }