Commit eab556e1 authored by Dmitry Kurtaev's avatar Dmitry Kurtaev

OpenCV face detection network in TensorFlow

parent 53305d4a
# This script is used to estimate an accuracy of different face detection models.
# COCO evaluation tool is used to compute an accuracy metrics (Average Precision).
# Script works with different face detection datasets.
import os
import json
from fnmatch import fnmatch
from math import pi
import cv2 as cv
import argparse
import os
import sys
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
parser = argparse.ArgumentParser(
description='Evaluate OpenCV face detection algorithms '
'using COCO evaluation tool, http://cocodataset.org/#detections-eval')
parser.add_argument('--proto', help='Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph')
parser.add_argument('--model', help='Path to .caffemodel trained in Caffe or .pb from TensorFlow')
parser.add_argument('--caffe', help='Indicate that tested model is from Caffe. Otherwise model from TensorFlow is expected.', action='store_true')
parser.add_argument('--cascade', help='Optional path to trained Haar cascade as '
'an additional model for evaluation')
parser.add_argument('--ann', help='Path to text file with ground truth annotations')
parser.add_argument('--pics', help='Path to images root directory')
parser.add_argument('--fddb', help='Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/', action='store_true')
parser.add_argument('--wider', help='Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/', action='store_true')
args = parser.parse_args()
dataset = {}
dataset['images'] = []
dataset['categories'] = [{ 'id': 0, 'name': 'face' }]
dataset['annotations'] = []
def ellipse2Rect(params):
rad_x = params[0]
rad_y = params[1]
angle = params[2] * 180.0 / pi
center_x = params[3]
center_y = params[4]
pts = cv.ellipse2Poly((int(center_x), int(center_y)), (int(rad_x), int(rad_y)),
int(angle), 0, 360, 10)
rect = cv.boundingRect(pts)
left = rect[0]
top = rect[1]
right = rect[0] + rect[2]
bottom = rect[1] + rect[3]
return left, top, right, bottom
def addImage(imagePath):
assert('images' in dataset)
imageId = len(dataset['images'])
dataset['images'].append({
'id': int(imageId),
'file_name': imagePath
})
return imageId
def addBBox(imageId, left, top, width, height):
assert('annotations' in dataset)
dataset['annotations'].append({
'id': len(dataset['annotations']),
'image_id': int(imageId),
'category_id': 0, # Face
'bbox': [int(left), int(top), int(width), int(height)],
'iscrowd': 0,
'area': float(width * height)
})
def addDetection(detections, imageId, left, top, width, height, score):
detections.append({
'image_id': int(imageId),
'category_id': 0, # Face
'bbox': [int(left), int(top), int(width), int(height)],
'score': float(score)
})
def fddb_dataset(annotations, images):
for d in os.listdir(annotations):
if fnmatch(d, 'FDDB-fold-*-ellipseList.txt'):
with open(os.path.join(annotations, d), 'rt') as f:
lines = [line.rstrip('\n') for line in f]
lineId = 0
while lineId < len(lines):
# Image
imgPath = lines[lineId]
lineId += 1
imageId = addImage(os.path.join(images, imgPath) + '.jpg')
img = cv.imread(os.path.join(images, imgPath) + '.jpg')
# Faces
numFaces = int(lines[lineId])
lineId += 1
for i in range(numFaces):
params = [float(v) for v in lines[lineId].split()]
lineId += 1
left, top, right, bottom = ellipse2Rect(params)
addBBox(imageId, left, top, width=right - left + 1,
height=bottom - top + 1)
def wider_dataset(annotations, images):
with open(annotations, 'rt') as f:
lines = [line.rstrip('\n') for line in f]
lineId = 0
while lineId < len(lines):
# Image
imgPath = lines[lineId]
lineId += 1
imageId = addImage(os.path.join(images, imgPath))
# Faces
numFaces = int(lines[lineId])
lineId += 1
for i in range(numFaces):
params = [int(v) for v in lines[lineId].split()]
lineId += 1
left, top, width, height = params[0], params[1], params[2], params[3]
addBBox(imageId, left, top, width, height)
def evaluate():
cocoGt = COCO('annotations.json')
cocoDt = cocoGt.loadRes('detections.json')
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
### Convert to COCO annotations format #########################################
assert(args.fddb or args.wider)
if args.fddb:
fddb_dataset(args.ann, args.pics)
elif args.wider:
wider_dataset(args.ann, args.pics)
with open('annotations.json', 'wt') as f:
json.dump(dataset, f)
### Obtain detections ##########################################################
detections = []
if args.proto and args.model:
if args.caffe:
net = cv.dnn.readNetFromCaffe(args.proto, args.model)
else:
net = cv.dnn.readNetFromTensorflow(args.model, args.proto)
def detect(img, imageId):
imgWidth = img.shape[1]
imgHeight = img.shape[0]
net.setInput(cv.dnn.blobFromImage(img, 1.0, (300, 300), (104., 177., 123.), False, False))
out = net.forward()
for i in range(out.shape[2]):
confidence = out[0, 0, i, 2]
left = int(out[0, 0, i, 3] * img.shape[1])
top = int(out[0, 0, i, 4] * img.shape[0])
right = int(out[0, 0, i, 5] * img.shape[1])
bottom = int(out[0, 0, i, 6] * img.shape[0])
addDetection(detections, imageId, left, top, width=right - left + 1,
height=bottom - top + 1, score=confidence)
elif args.cascade:
cascade = cv.CascadeClassifier(args.cascade)
def detect(img, imageId):
srcImgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
faces = cascade.detectMultiScale(srcImgGray)
for rect in faces:
left, top, width, height = rect[0], rect[1], rect[2], rect[3]
addDetection(detections, imageId, left, top, width, height, score=1.0)
for i in range(len(dataset['images'])):
sys.stdout.write('\r%d / %d' % (i + 1, len(dataset['images'])))
sys.stdout.flush()
img = cv.imread(dataset['images'][i]['file_name'])
imageId = int(dataset['images'][i]['id'])
detect(img, imageId)
with open('detections.json', 'wt') as f:
json.dump(detections, f)
evaluate()
def rm(f):
if os.path.exists(f):
os.remove(f)
rm('annotations.json')
rm('detections.json')
This diff is collapsed.
......@@ -651,7 +651,8 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
tensor->set_dtype(tensorflow::DT_FLOAT);
tensor->set_tensor_content(content.data, content.total() * content.elemSize1());
ExcludeLayer(net, li, 0, false);
net.mutable_node(tensorId)->set_name(name);
CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second);
layers_to_ignore.insert(name);
continue;
}
......@@ -1477,6 +1478,17 @@ void TFImporter::populateNet(Net dstNet)
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
else if (type == "L2Normalize")
{
// op: "L2Normalize"
// input: "input"
CV_Assert(layer.input_size() == 1);
layerParams.set("across_spatial", false);
layerParams.set("channel_shared", false);
int id = dstNet.addLayer(name, "Normalize", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
}
else if (type == "PriorBox")
{
if (hasLayerAttr(layer, "min_size"))
......@@ -1489,6 +1501,8 @@ void TFImporter::populateNet(Net dstNet)
layerParams.set("clip", getLayerAttr(layer, "clip").b());
if (hasLayerAttr(layer, "offset"))
layerParams.set("offset", getLayerAttr(layer, "offset").f());
if (hasLayerAttr(layer, "step"))
layerParams.set("step", getLayerAttr(layer, "step").f());
const std::string paramNames[] = {"variance", "aspect_ratio", "scales",
"width", "height"};
......@@ -1538,8 +1552,17 @@ void TFImporter::populateNet(Net dstNet)
connect(layer_id, dstNet, parsePin(layer.input(i)), id, i);
data_layouts[name] = DATA_LAYOUT_UNKNOWN;
}
else if (type == "Softmax")
{
if (hasLayerAttr(layer, "axis"))
layerParams.set("axis", getLayerAttr(layer, "axis").i());
int id = dstNet.addLayer(name, "Softmax", layerParams);
layer_id[name] = id;
connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size());
}
else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" ||
type == "Relu" || type == "Elu" || type == "Softmax" ||
type == "Relu" || type == "Elu" ||
type == "Identity" || type == "Relu6")
{
std::string dnnType = type;
......
......@@ -353,4 +353,28 @@ TEST(Test_TensorFlow, memory_read)
runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
}
TEST(Test_TensorFlow, opencv_face_detector_uint8)
{
std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false);
std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false);
Net net = readNetFromTensorflow(model, proto);
Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
net.setInput(blob);
// Output has shape 1x1xNx7 where N - number of detections.
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
Mat out = net.forward();
// References are from test for Caffe model.
Mat ref = (Mat_<float>(6, 5) << 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631,
0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168,
0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290,
0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477,
0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494,
0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref, "", 2.8e-4, 3.4e-3);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment