Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5e0f95b9
Commit
5e0f95b9
authored
Feb 22, 2018
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9708 from dkurt:tf_face_detector
parents
a11b7a82
eab556e1
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
592 additions
and
2 deletions
+592
-2
face_detector_accuracy.py
modules/dnn/misc/face_detector_accuracy.py
+195
-0
quantize_face_detector.py
modules/dnn/misc/quantize_face_detector.py
+348
-0
tf_importer.cpp
modules/dnn/src/tensorflow/tf_importer.cpp
+25
-2
test_tf_importer.cpp
modules/dnn/test/test_tf_importer.cpp
+24
-0
No files found.
modules/dnn/misc/face_detector_accuracy.py
0 → 100644
View file @
5e0f95b9
# This script is used to estimate an accuracy of different face detection models.
# COCO evaluation tool is used to compute an accuracy metrics (Average Precision).
# Script works with different face detection datasets.
import
os
import
json
from
fnmatch
import
fnmatch
from
math
import
pi
import
cv2
as
cv
import
argparse
import
os
import
sys
from
pycocotools.coco
import
COCO
from
pycocotools.cocoeval
import
COCOeval
parser
=
argparse
.
ArgumentParser
(
description
=
'Evaluate OpenCV face detection algorithms '
'using COCO evaluation tool, http://cocodataset.org/#detections-eval'
)
parser
.
add_argument
(
'--proto'
,
help
=
'Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph'
)
parser
.
add_argument
(
'--model'
,
help
=
'Path to .caffemodel trained in Caffe or .pb from TensorFlow'
)
parser
.
add_argument
(
'--caffe'
,
help
=
'Indicate that tested model is from Caffe. Otherwise model from TensorFlow is expected.'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--cascade'
,
help
=
'Optional path to trained Haar cascade as '
'an additional model for evaluation'
)
parser
.
add_argument
(
'--ann'
,
help
=
'Path to text file with ground truth annotations'
)
parser
.
add_argument
(
'--pics'
,
help
=
'Path to images root directory'
)
parser
.
add_argument
(
'--fddb'
,
help
=
'Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--wider'
,
help
=
'Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/'
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
dataset
=
{}
dataset
[
'images'
]
=
[]
dataset
[
'categories'
]
=
[{
'id'
:
0
,
'name'
:
'face'
}]
dataset
[
'annotations'
]
=
[]
def
ellipse2Rect
(
params
):
rad_x
=
params
[
0
]
rad_y
=
params
[
1
]
angle
=
params
[
2
]
*
180.0
/
pi
center_x
=
params
[
3
]
center_y
=
params
[
4
]
pts
=
cv
.
ellipse2Poly
((
int
(
center_x
),
int
(
center_y
)),
(
int
(
rad_x
),
int
(
rad_y
)),
int
(
angle
),
0
,
360
,
10
)
rect
=
cv
.
boundingRect
(
pts
)
left
=
rect
[
0
]
top
=
rect
[
1
]
right
=
rect
[
0
]
+
rect
[
2
]
bottom
=
rect
[
1
]
+
rect
[
3
]
return
left
,
top
,
right
,
bottom
def
addImage
(
imagePath
):
assert
(
'images'
in
dataset
)
imageId
=
len
(
dataset
[
'images'
])
dataset
[
'images'
]
.
append
({
'id'
:
int
(
imageId
),
'file_name'
:
imagePath
})
return
imageId
def
addBBox
(
imageId
,
left
,
top
,
width
,
height
):
assert
(
'annotations'
in
dataset
)
dataset
[
'annotations'
]
.
append
({
'id'
:
len
(
dataset
[
'annotations'
]),
'image_id'
:
int
(
imageId
),
'category_id'
:
0
,
# Face
'bbox'
:
[
int
(
left
),
int
(
top
),
int
(
width
),
int
(
height
)],
'iscrowd'
:
0
,
'area'
:
float
(
width
*
height
)
})
def
addDetection
(
detections
,
imageId
,
left
,
top
,
width
,
height
,
score
):
detections
.
append
({
'image_id'
:
int
(
imageId
),
'category_id'
:
0
,
# Face
'bbox'
:
[
int
(
left
),
int
(
top
),
int
(
width
),
int
(
height
)],
'score'
:
float
(
score
)
})
def
fddb_dataset
(
annotations
,
images
):
for
d
in
os
.
listdir
(
annotations
):
if
fnmatch
(
d
,
'FDDB-fold-*-ellipseList.txt'
):
with
open
(
os
.
path
.
join
(
annotations
,
d
),
'rt'
)
as
f
:
lines
=
[
line
.
rstrip
(
'
\n
'
)
for
line
in
f
]
lineId
=
0
while
lineId
<
len
(
lines
):
# Image
imgPath
=
lines
[
lineId
]
lineId
+=
1
imageId
=
addImage
(
os
.
path
.
join
(
images
,
imgPath
)
+
'.jpg'
)
img
=
cv
.
imread
(
os
.
path
.
join
(
images
,
imgPath
)
+
'.jpg'
)
# Faces
numFaces
=
int
(
lines
[
lineId
])
lineId
+=
1
for
i
in
range
(
numFaces
):
params
=
[
float
(
v
)
for
v
in
lines
[
lineId
]
.
split
()]
lineId
+=
1
left
,
top
,
right
,
bottom
=
ellipse2Rect
(
params
)
addBBox
(
imageId
,
left
,
top
,
width
=
right
-
left
+
1
,
height
=
bottom
-
top
+
1
)
def
wider_dataset
(
annotations
,
images
):
with
open
(
annotations
,
'rt'
)
as
f
:
lines
=
[
line
.
rstrip
(
'
\n
'
)
for
line
in
f
]
lineId
=
0
while
lineId
<
len
(
lines
):
# Image
imgPath
=
lines
[
lineId
]
lineId
+=
1
imageId
=
addImage
(
os
.
path
.
join
(
images
,
imgPath
))
# Faces
numFaces
=
int
(
lines
[
lineId
])
lineId
+=
1
for
i
in
range
(
numFaces
):
params
=
[
int
(
v
)
for
v
in
lines
[
lineId
]
.
split
()]
lineId
+=
1
left
,
top
,
width
,
height
=
params
[
0
],
params
[
1
],
params
[
2
],
params
[
3
]
addBBox
(
imageId
,
left
,
top
,
width
,
height
)
def
evaluate
():
cocoGt
=
COCO
(
'annotations.json'
)
cocoDt
=
cocoGt
.
loadRes
(
'detections.json'
)
cocoEval
=
COCOeval
(
cocoGt
,
cocoDt
,
'bbox'
)
cocoEval
.
evaluate
()
cocoEval
.
accumulate
()
cocoEval
.
summarize
()
### Convert to COCO annotations format #########################################
assert
(
args
.
fddb
or
args
.
wider
)
if
args
.
fddb
:
fddb_dataset
(
args
.
ann
,
args
.
pics
)
elif
args
.
wider
:
wider_dataset
(
args
.
ann
,
args
.
pics
)
with
open
(
'annotations.json'
,
'wt'
)
as
f
:
json
.
dump
(
dataset
,
f
)
### Obtain detections ##########################################################
detections
=
[]
if
args
.
proto
and
args
.
model
:
if
args
.
caffe
:
net
=
cv
.
dnn
.
readNetFromCaffe
(
args
.
proto
,
args
.
model
)
else
:
net
=
cv
.
dnn
.
readNetFromTensorflow
(
args
.
model
,
args
.
proto
)
def
detect
(
img
,
imageId
):
imgWidth
=
img
.
shape
[
1
]
imgHeight
=
img
.
shape
[
0
]
net
.
setInput
(
cv
.
dnn
.
blobFromImage
(
img
,
1.0
,
(
300
,
300
),
(
104.
,
177.
,
123.
),
False
,
False
))
out
=
net
.
forward
()
for
i
in
range
(
out
.
shape
[
2
]):
confidence
=
out
[
0
,
0
,
i
,
2
]
left
=
int
(
out
[
0
,
0
,
i
,
3
]
*
img
.
shape
[
1
])
top
=
int
(
out
[
0
,
0
,
i
,
4
]
*
img
.
shape
[
0
])
right
=
int
(
out
[
0
,
0
,
i
,
5
]
*
img
.
shape
[
1
])
bottom
=
int
(
out
[
0
,
0
,
i
,
6
]
*
img
.
shape
[
0
])
addDetection
(
detections
,
imageId
,
left
,
top
,
width
=
right
-
left
+
1
,
height
=
bottom
-
top
+
1
,
score
=
confidence
)
elif
args
.
cascade
:
cascade
=
cv
.
CascadeClassifier
(
args
.
cascade
)
def
detect
(
img
,
imageId
):
srcImgGray
=
cv
.
cvtColor
(
img
,
cv
.
COLOR_BGR2GRAY
)
faces
=
cascade
.
detectMultiScale
(
srcImgGray
)
for
rect
in
faces
:
left
,
top
,
width
,
height
=
rect
[
0
],
rect
[
1
],
rect
[
2
],
rect
[
3
]
addDetection
(
detections
,
imageId
,
left
,
top
,
width
,
height
,
score
=
1.0
)
for
i
in
range
(
len
(
dataset
[
'images'
])):
sys
.
stdout
.
write
(
'
\r
%
d /
%
d'
%
(
i
+
1
,
len
(
dataset
[
'images'
])))
sys
.
stdout
.
flush
()
img
=
cv
.
imread
(
dataset
[
'images'
][
i
][
'file_name'
])
imageId
=
int
(
dataset
[
'images'
][
i
][
'id'
])
detect
(
img
,
imageId
)
with
open
(
'detections.json'
,
'wt'
)
as
f
:
json
.
dump
(
detections
,
f
)
evaluate
()
def
rm
(
f
):
if
os
.
path
.
exists
(
f
):
os
.
remove
(
f
)
rm
(
'annotations.json'
)
rm
(
'detections.json'
)
modules/dnn/misc/quantize_face_detector.py
0 → 100644
View file @
5e0f95b9
import
argparse
import
cv2
as
cv
import
tensorflow
as
tf
import
numpy
as
np
import
struct
from
tensorflow.python.tools
import
optimize_for_inference_lib
from
tensorflow.tools.graph_transforms
import
TransformGraph
from
tensorflow.core.framework.node_def_pb2
import
NodeDef
from
google.protobuf
import
text_format
parser
=
argparse
.
ArgumentParser
(
description
=
"Use this script to create TensorFlow graph "
"with weights from OpenCV's face detection network. "
"Only backbone part of SSD model is converted this way. "
"Look for .pbtxt configuration file at "
"https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt"
)
parser
.
add_argument
(
'--model'
,
help
=
'Path to .caffemodel weights'
,
required
=
True
)
parser
.
add_argument
(
'--proto'
,
help
=
'Path to .prototxt Caffe model definition'
,
required
=
True
)
parser
.
add_argument
(
'--pb'
,
help
=
'Path to output .pb TensorFlow model'
,
required
=
True
)
parser
.
add_argument
(
'--pbtxt'
,
help
=
'Path to output .pbxt TensorFlow graph'
,
required
=
True
)
parser
.
add_argument
(
'--quantize'
,
help
=
'Quantize weights to uint8'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--fp16'
,
help
=
'Convert weights to half precision floats'
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
assert
(
not
args
.
quantize
or
not
args
.
fp16
)
dtype
=
tf
.
float16
if
args
.
fp16
else
tf
.
float32
################################################################################
cvNet
=
cv
.
dnn
.
readNetFromCaffe
(
args
.
proto
,
args
.
model
)
def
dnnLayer
(
name
):
return
cvNet
.
getLayer
(
long
(
cvNet
.
getLayerId
(
name
)))
def
scale
(
x
,
name
):
with
tf
.
variable_scope
(
name
):
layer
=
dnnLayer
(
name
)
w
=
tf
.
Variable
(
layer
.
blobs
[
0
]
.
flatten
(),
dtype
=
dtype
,
name
=
'mul'
)
if
len
(
layer
.
blobs
)
>
1
:
b
=
tf
.
Variable
(
layer
.
blobs
[
1
]
.
flatten
(),
dtype
=
dtype
,
name
=
'add'
)
return
tf
.
nn
.
bias_add
(
tf
.
multiply
(
x
,
w
),
b
)
else
:
return
tf
.
multiply
(
x
,
w
,
name
)
def
conv
(
x
,
name
,
stride
=
1
,
pad
=
'SAME'
,
dilation
=
1
,
activ
=
None
):
with
tf
.
variable_scope
(
name
):
layer
=
dnnLayer
(
name
)
w
=
tf
.
Variable
(
layer
.
blobs
[
0
]
.
transpose
(
2
,
3
,
1
,
0
),
dtype
=
dtype
,
name
=
'weights'
)
if
dilation
==
1
:
conv
=
tf
.
nn
.
conv2d
(
x
,
filter
=
w
,
strides
=
(
1
,
stride
,
stride
,
1
),
padding
=
pad
)
else
:
assert
(
stride
==
1
)
conv
=
tf
.
nn
.
atrous_conv2d
(
x
,
w
,
rate
=
dilation
,
padding
=
pad
)
if
len
(
layer
.
blobs
)
>
1
:
b
=
tf
.
Variable
(
layer
.
blobs
[
1
]
.
flatten
(),
dtype
=
dtype
,
name
=
'bias'
)
conv
=
tf
.
nn
.
bias_add
(
conv
,
b
)
return
activ
(
conv
)
if
activ
else
conv
def
batch_norm
(
x
,
name
):
with
tf
.
variable_scope
(
name
):
# Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input.
# Here we do a cast to fp32 but remove it in the frozen graph.
if
x
.
dtype
!=
tf
.
float32
:
x
=
tf
.
cast
(
x
,
tf
.
float32
)
layer
=
dnnLayer
(
name
)
assert
(
len
(
layer
.
blobs
)
>=
3
)
mean
=
layer
.
blobs
[
0
]
.
flatten
()
std
=
layer
.
blobs
[
1
]
.
flatten
()
scale
=
layer
.
blobs
[
2
]
.
flatten
()
eps
=
1e-5
hasBias
=
len
(
layer
.
blobs
)
>
3
hasWeights
=
scale
.
shape
!=
(
1
,)
if
not
hasWeights
and
not
hasBias
:
mean
/=
scale
[
0
]
std
/=
scale
[
0
]
mean
=
tf
.
Variable
(
mean
,
dtype
=
tf
.
float32
,
name
=
'mean'
)
std
=
tf
.
Variable
(
std
,
dtype
=
tf
.
float32
,
name
=
'std'
)
gamma
=
tf
.
Variable
(
scale
if
hasWeights
else
np
.
ones
(
mean
.
shape
),
dtype
=
tf
.
float32
,
name
=
'gamma'
)
beta
=
tf
.
Variable
(
layer
.
blobs
[
3
]
.
flatten
()
if
hasBias
else
np
.
zeros
(
mean
.
shape
),
dtype
=
tf
.
float32
,
name
=
'beta'
)
bn
=
tf
.
nn
.
fused_batch_norm
(
x
,
gamma
,
beta
,
mean
,
std
,
eps
,
is_training
=
False
)[
0
]
if
bn
.
dtype
!=
dtype
:
bn
=
tf
.
cast
(
bn
,
dtype
)
return
bn
def
l2norm
(
x
,
name
):
with
tf
.
variable_scope
(
name
):
layer
=
dnnLayer
(
name
)
w
=
tf
.
Variable
(
layer
.
blobs
[
0
]
.
flatten
(),
dtype
=
dtype
,
name
=
'mul'
)
return
tf
.
nn
.
l2_normalize
(
x
,
3
,
epsilon
=
1e-10
)
*
w
### Graph definition ###########################################################
inp
=
tf
.
placeholder
(
dtype
,
[
1
,
300
,
300
,
3
],
'data'
)
data_bn
=
batch_norm
(
inp
,
'data_bn'
)
data_scale
=
scale
(
data_bn
,
'data_scale'
)
data_scale
=
tf
.
pad
(
data_scale
,
[[
0
,
0
],
[
3
,
3
],
[
3
,
3
],
[
0
,
0
]])
conv1_h
=
conv
(
data_scale
,
stride
=
2
,
pad
=
'VALID'
,
name
=
'conv1_h'
)
conv1_bn_h
=
batch_norm
(
conv1_h
,
'conv1_bn_h'
)
conv1_scale_h
=
scale
(
conv1_bn_h
,
'conv1_scale_h'
)
conv1_relu
=
tf
.
nn
.
relu
(
conv1_scale_h
)
conv1_pool
=
tf
.
layers
.
max_pooling2d
(
conv1_relu
,
pool_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
padding
=
'SAME'
,
name
=
'conv1_pool'
)
layer_64_1_conv1_h
=
conv
(
conv1_pool
,
'layer_64_1_conv1_h'
)
layer_64_1_bn2_h
=
batch_norm
(
layer_64_1_conv1_h
,
'layer_64_1_bn2_h'
)
layer_64_1_scale2_h
=
scale
(
layer_64_1_bn2_h
,
'layer_64_1_scale2_h'
)
layer_64_1_relu2
=
tf
.
nn
.
relu
(
layer_64_1_scale2_h
)
layer_64_1_conv2_h
=
conv
(
layer_64_1_relu2
,
'layer_64_1_conv2_h'
)
layer_64_1_sum
=
layer_64_1_conv2_h
+
conv1_pool
layer_128_1_bn1_h
=
batch_norm
(
layer_64_1_sum
,
'layer_128_1_bn1_h'
)
layer_128_1_scale1_h
=
scale
(
layer_128_1_bn1_h
,
'layer_128_1_scale1_h'
)
layer_128_1_relu1
=
tf
.
nn
.
relu
(
layer_128_1_scale1_h
)
layer_128_1_conv1_h
=
conv
(
layer_128_1_relu1
,
stride
=
2
,
name
=
'layer_128_1_conv1_h'
)
layer_128_1_bn2
=
batch_norm
(
layer_128_1_conv1_h
,
'layer_128_1_bn2'
)
layer_128_1_scale2
=
scale
(
layer_128_1_bn2
,
'layer_128_1_scale2'
)
layer_128_1_relu2
=
tf
.
nn
.
relu
(
layer_128_1_scale2
)
layer_128_1_conv2
=
conv
(
layer_128_1_relu2
,
'layer_128_1_conv2'
)
layer_128_1_conv_expand_h
=
conv
(
layer_128_1_relu1
,
stride
=
2
,
name
=
'layer_128_1_conv_expand_h'
)
layer_128_1_sum
=
layer_128_1_conv2
+
layer_128_1_conv_expand_h
layer_256_1_bn1
=
batch_norm
(
layer_128_1_sum
,
'layer_256_1_bn1'
)
layer_256_1_scale1
=
scale
(
layer_256_1_bn1
,
'layer_256_1_scale1'
)
layer_256_1_relu1
=
tf
.
nn
.
relu
(
layer_256_1_scale1
)
layer_256_1_conv1
=
tf
.
pad
(
layer_256_1_relu1
,
[[
0
,
0
],
[
1
,
1
],
[
1
,
1
],
[
0
,
0
]])
layer_256_1_conv1
=
conv
(
layer_256_1_conv1
,
stride
=
2
,
pad
=
'VALID'
,
name
=
'layer_256_1_conv1'
)
layer_256_1_bn2
=
batch_norm
(
layer_256_1_conv1
,
'layer_256_1_bn2'
)
layer_256_1_scale2
=
scale
(
layer_256_1_bn2
,
'layer_256_1_scale2'
)
layer_256_1_relu2
=
tf
.
nn
.
relu
(
layer_256_1_scale2
)
layer_256_1_conv2
=
conv
(
layer_256_1_relu2
,
'layer_256_1_conv2'
)
layer_256_1_conv_expand
=
conv
(
layer_256_1_relu1
,
stride
=
2
,
name
=
'layer_256_1_conv_expand'
)
layer_256_1_sum
=
layer_256_1_conv2
+
layer_256_1_conv_expand
layer_512_1_bn1
=
batch_norm
(
layer_256_1_sum
,
'layer_512_1_bn1'
)
layer_512_1_scale1
=
scale
(
layer_512_1_bn1
,
'layer_512_1_scale1'
)
layer_512_1_relu1
=
tf
.
nn
.
relu
(
layer_512_1_scale1
)
layer_512_1_conv1_h
=
conv
(
layer_512_1_relu1
,
'layer_512_1_conv1_h'
)
layer_512_1_bn2_h
=
batch_norm
(
layer_512_1_conv1_h
,
'layer_512_1_bn2_h'
)
layer_512_1_scale2_h
=
scale
(
layer_512_1_bn2_h
,
'layer_512_1_scale2_h'
)
layer_512_1_relu2
=
tf
.
nn
.
relu
(
layer_512_1_scale2_h
)
layer_512_1_conv2_h
=
conv
(
layer_512_1_relu2
,
dilation
=
2
,
name
=
'layer_512_1_conv2_h'
)
layer_512_1_conv_expand_h
=
conv
(
layer_512_1_relu1
,
'layer_512_1_conv_expand_h'
)
layer_512_1_sum
=
layer_512_1_conv2_h
+
layer_512_1_conv_expand_h
last_bn_h
=
batch_norm
(
layer_512_1_sum
,
'last_bn_h'
)
last_scale_h
=
scale
(
last_bn_h
,
'last_scale_h'
)
fc7
=
tf
.
nn
.
relu
(
last_scale_h
,
name
=
'last_relu'
)
conv6_1_h
=
conv
(
fc7
,
'conv6_1_h'
,
activ
=
tf
.
nn
.
relu
)
conv6_2_h
=
conv
(
conv6_1_h
,
stride
=
2
,
name
=
'conv6_2_h'
,
activ
=
tf
.
nn
.
relu
)
conv7_1_h
=
conv
(
conv6_2_h
,
'conv7_1_h'
,
activ
=
tf
.
nn
.
relu
)
conv7_2_h
=
tf
.
pad
(
conv7_1_h
,
[[
0
,
0
],
[
1
,
1
],
[
1
,
1
],
[
0
,
0
]])
conv7_2_h
=
conv
(
conv7_2_h
,
stride
=
2
,
pad
=
'VALID'
,
name
=
'conv7_2_h'
,
activ
=
tf
.
nn
.
relu
)
conv8_1_h
=
conv
(
conv7_2_h
,
pad
=
'SAME'
,
name
=
'conv8_1_h'
,
activ
=
tf
.
nn
.
relu
)
conv8_2_h
=
conv
(
conv8_1_h
,
pad
=
'SAME'
,
name
=
'conv8_2_h'
,
activ
=
tf
.
nn
.
relu
)
conv9_1_h
=
conv
(
conv8_2_h
,
'conv9_1_h'
,
activ
=
tf
.
nn
.
relu
)
conv9_2_h
=
conv
(
conv9_1_h
,
pad
=
'SAME'
,
name
=
'conv9_2_h'
,
activ
=
tf
.
nn
.
relu
)
conv4_3_norm
=
l2norm
(
layer_256_1_relu1
,
'conv4_3_norm'
)
### Locations and confidences ##################################################
locations
=
[]
confidences
=
[]
flattenLayersNames
=
[]
# Collect all reshape layers names that should be replaced to flattens.
for
top
,
suffix
in
zip
([
locations
,
confidences
],
[
'_mbox_loc'
,
'_mbox_conf'
]):
for
bottom
,
name
in
zip
([
conv4_3_norm
,
fc7
,
conv6_2_h
,
conv7_2_h
,
conv8_2_h
,
conv9_2_h
],
[
'conv4_3_norm'
,
'fc7'
,
'conv6_2'
,
'conv7_2'
,
'conv8_2'
,
'conv9_2'
]):
name
+=
suffix
flat
=
tf
.
layers
.
flatten
(
conv
(
bottom
,
name
))
flattenLayersNames
.
append
(
flat
.
name
[:
flat
.
name
.
find
(
':'
)])
top
.
append
(
flat
)
mbox_loc
=
tf
.
concat
(
locations
,
axis
=-
1
,
name
=
'mbox_loc'
)
mbox_conf
=
tf
.
concat
(
confidences
,
axis
=-
1
,
name
=
'mbox_conf'
)
total
=
int
(
np
.
prod
(
mbox_conf
.
shape
[
1
:]))
mbox_conf_reshape
=
tf
.
reshape
(
mbox_conf
,
[
-
1
,
2
],
name
=
'mbox_conf_reshape'
)
mbox_conf_softmax
=
tf
.
nn
.
softmax
(
mbox_conf_reshape
,
name
=
'mbox_conf_softmax'
)
mbox_conf_flatten
=
tf
.
reshape
(
mbox_conf_softmax
,
[
-
1
,
total
],
name
=
'mbox_conf_flatten'
)
flattenLayersNames
.
append
(
'mbox_conf_flatten'
)
with
tf
.
Session
()
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
())
### Check correctness ######################################################
out_nodes
=
[
'mbox_loc'
,
'mbox_conf_flatten'
]
inp_nodes
=
[
inp
.
name
[:
inp
.
name
.
find
(
':'
)]]
np
.
random
.
seed
(
2701
)
inputData
=
np
.
random
.
standard_normal
([
1
,
3
,
300
,
300
])
.
astype
(
np
.
float32
)
cvNet
.
setInput
(
inputData
)
outDNN
=
cvNet
.
forward
(
out_nodes
)
outTF
=
sess
.
run
([
mbox_loc
,
mbox_conf_flatten
],
feed_dict
=
{
inp
:
inputData
.
transpose
(
0
,
2
,
3
,
1
)})
print
'Max diff @ locations:
%
e'
%
np
.
max
(
np
.
abs
(
outDNN
[
0
]
-
outTF
[
0
]))
print
'Max diff @ confidence:
%
e'
%
np
.
max
(
np
.
abs
(
outDNN
[
1
]
-
outTF
[
1
]))
# Save a graph
graph_def
=
sess
.
graph
.
as_graph_def
()
# Freeze graph. Replaces variables to constants.
graph_def
=
tf
.
graph_util
.
convert_variables_to_constants
(
sess
,
graph_def
,
out_nodes
)
# Optimize graph. Removes training-only ops, unused nodes.
graph_def
=
optimize_for_inference_lib
.
optimize_for_inference
(
graph_def
,
inp_nodes
,
out_nodes
,
dtype
.
as_datatype_enum
)
# Fuse constant operations.
transforms
=
[
"fold_constants(ignore_errors=True)"
]
if
args
.
quantize
:
transforms
+=
[
"quantize_weights(minimum_size=0)"
]
transforms
+=
[
"sort_by_execution_order"
]
graph_def
=
TransformGraph
(
graph_def
,
inp_nodes
,
out_nodes
,
transforms
)
# By default, float16 weights are stored in repeated tensor's field called
# `half_val`. It has type int32 with leading zeros for unused bytes.
# This type is encoded by Varint that means only 7 bits are used for value
# representation but the last one is indicated the end of encoding. This way
# float16 might takes 1 or 2 or 3 bytes depends on value. To impove compression,
# we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone.
for
node
in
graph_def
.
node
:
if
'value'
in
node
.
attr
:
halfs
=
node
.
attr
[
"value"
]
.
tensor
.
half_val
if
not
node
.
attr
[
"value"
]
.
tensor
.
tensor_content
and
halfs
:
node
.
attr
[
"value"
]
.
tensor
.
tensor_content
=
struct
.
pack
(
'H'
*
len
(
halfs
),
*
halfs
)
node
.
attr
[
"value"
]
.
tensor
.
ClearField
(
'half_val'
)
# Serialize
with
tf
.
gfile
.
FastGFile
(
args
.
pb
,
'wb'
)
as
f
:
f
.
write
(
graph_def
.
SerializeToString
())
################################################################################
# Write a text graph representation
################################################################################
def
tensorMsg
(
values
):
msg
=
'tensor { dtype: DT_FLOAT tensor_shape { dim { size:
%
d } }'
%
len
(
values
)
for
value
in
values
:
msg
+=
'float_val:
%
f '
%
value
return
msg
+
'}'
# Remove Const nodes and unused attributes.
for
i
in
reversed
(
range
(
len
(
graph_def
.
node
))):
if
graph_def
.
node
[
i
]
.
op
in
[
'Const'
,
'Dequantize'
]:
del
graph_def
.
node
[
i
]
for
attr
in
[
'T'
,
'data_format'
,
'Tshape'
,
'N'
,
'Tidx'
,
'Tdim'
,
'use_cudnn_on_gpu'
,
'Index'
,
'Tperm'
,
'is_training'
,
'Tpaddings'
]:
if
attr
in
graph_def
.
node
[
i
]
.
attr
:
del
graph_def
.
node
[
i
]
.
attr
[
attr
]
# Append prior box generators
min_sizes
=
[
30
,
60
,
111
,
162
,
213
,
264
]
max_sizes
=
[
60
,
111
,
162
,
213
,
264
,
315
]
steps
=
[
8
,
16
,
32
,
64
,
100
,
300
]
aspect_ratios
=
[[
2
],
[
2
,
3
],
[
2
,
3
],
[
2
,
3
],
[
2
],
[
2
]]
layers
=
[
conv4_3_norm
,
fc7
,
conv6_2_h
,
conv7_2_h
,
conv8_2_h
,
conv9_2_h
]
for
i
in
range
(
6
):
priorBox
=
NodeDef
()
priorBox
.
name
=
'PriorBox_
%
d'
%
i
priorBox
.
op
=
'PriorBox'
priorBox
.
input
.
append
(
layers
[
i
]
.
name
[:
layers
[
i
]
.
name
.
find
(
':'
)])
priorBox
.
input
.
append
(
inp_nodes
[
0
])
# data
text_format
.
Merge
(
'i:
%
d'
%
min_sizes
[
i
],
priorBox
.
attr
[
"min_size"
])
text_format
.
Merge
(
'i:
%
d'
%
max_sizes
[
i
],
priorBox
.
attr
[
"max_size"
])
text_format
.
Merge
(
'b: true'
,
priorBox
.
attr
[
"flip"
])
text_format
.
Merge
(
'b: false'
,
priorBox
.
attr
[
"clip"
])
text_format
.
Merge
(
tensorMsg
(
aspect_ratios
[
i
]),
priorBox
.
attr
[
"aspect_ratio"
])
text_format
.
Merge
(
tensorMsg
([
0.1
,
0.1
,
0.2
,
0.2
]),
priorBox
.
attr
[
"variance"
])
text_format
.
Merge
(
'f:
%
f'
%
steps
[
i
],
priorBox
.
attr
[
"step"
])
text_format
.
Merge
(
'f: 0.5'
,
priorBox
.
attr
[
"offset"
])
graph_def
.
node
.
extend
([
priorBox
])
# Concatenate prior boxes
concat
=
NodeDef
()
concat
.
name
=
'mbox_priorbox'
concat
.
op
=
'ConcatV2'
for
i
in
range
(
6
):
concat
.
input
.
append
(
'PriorBox_
%
d'
%
i
)
concat
.
input
.
append
(
'mbox_loc/axis'
)
graph_def
.
node
.
extend
([
concat
])
# DetectionOutput layer
detectionOut
=
NodeDef
()
detectionOut
.
name
=
'detection_out'
detectionOut
.
op
=
'DetectionOutput'
detectionOut
.
input
.
append
(
'mbox_loc'
)
detectionOut
.
input
.
append
(
'mbox_conf_flatten'
)
detectionOut
.
input
.
append
(
'mbox_priorbox'
)
text_format
.
Merge
(
'i: 2'
,
detectionOut
.
attr
[
'num_classes'
])
text_format
.
Merge
(
'b: true'
,
detectionOut
.
attr
[
'share_location'
])
text_format
.
Merge
(
'i: 0'
,
detectionOut
.
attr
[
'background_label_id'
])
text_format
.
Merge
(
'f: 0.45'
,
detectionOut
.
attr
[
'nms_threshold'
])
text_format
.
Merge
(
'i: 400'
,
detectionOut
.
attr
[
'top_k'
])
text_format
.
Merge
(
's: "CENTER_SIZE"'
,
detectionOut
.
attr
[
'code_type'
])
text_format
.
Merge
(
'i: 200'
,
detectionOut
.
attr
[
'keep_top_k'
])
text_format
.
Merge
(
'f: 0.01'
,
detectionOut
.
attr
[
'confidence_threshold'
])
graph_def
.
node
.
extend
([
detectionOut
])
# Replace L2Normalization subgraph onto a single node.
for
i
in
reversed
(
range
(
len
(
graph_def
.
node
))):
if
graph_def
.
node
[
i
]
.
name
in
[
'conv4_3_norm/l2_normalize/Square'
,
'conv4_3_norm/l2_normalize/Sum'
,
'conv4_3_norm/l2_normalize/Maximum'
,
'conv4_3_norm/l2_normalize/Rsqrt'
]:
del
graph_def
.
node
[
i
]
for
node
in
graph_def
.
node
:
if
node
.
name
==
'conv4_3_norm/l2_normalize'
:
node
.
op
=
'L2Normalize'
node
.
input
.
pop
()
node
.
input
.
pop
()
node
.
input
.
append
(
layer_256_1_relu1
.
name
)
break
softmaxShape
=
NodeDef
()
softmaxShape
.
name
=
'reshape_before_softmax'
softmaxShape
.
op
=
'Const'
text_format
.
Merge
(
'tensor {'
' dtype: DT_INT32'
' tensor_shape { dim { size: 3 } }'
' int_val: 0'
' int_val: -1'
' int_val: 2'
'}'
,
softmaxShape
.
attr
[
"value"
])
graph_def
.
node
.
extend
([
softmaxShape
])
for
node
in
graph_def
.
node
:
if
node
.
name
==
'mbox_conf_reshape'
:
node
.
input
[
1
]
=
softmaxShape
.
name
elif
node
.
name
==
'mbox_conf_softmax'
:
text_format
.
Merge
(
'i: 2'
,
node
.
attr
[
'axis'
])
elif
node
.
name
in
flattenLayersNames
:
node
.
op
=
'Flatten'
inpName
=
node
.
input
[
0
]
node
.
input
.
pop
()
node
.
input
.
pop
()
node
.
input
.
append
(
inpName
)
tf
.
train
.
write_graph
(
graph_def
,
""
,
args
.
pbtxt
,
as_text
=
True
)
modules/dnn/src/tensorflow/tf_importer.cpp
View file @
5e0f95b9
...
...
@@ -651,7 +651,8 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map<String, int>& cons
tensor
->
set_dtype
(
tensorflow
::
DT_FLOAT
);
tensor
->
set_tensor_content
(
content
.
data
,
content
.
total
()
*
content
.
elemSize1
());
ExcludeLayer
(
net
,
li
,
0
,
false
);
net
.
mutable_node
(
tensorId
)
->
set_name
(
name
);
CV_Assert
(
const_layers
.
insert
(
std
::
make_pair
(
name
,
tensorId
)).
second
);
layers_to_ignore
.
insert
(
name
);
continue
;
}
...
...
@@ -1477,6 +1478,17 @@ void TFImporter::populateNet(Net dstNet)
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
0
)),
id
,
0
);
}
else
if
(
type
==
"L2Normalize"
)
{
// op: "L2Normalize"
// input: "input"
CV_Assert
(
layer
.
input_size
()
==
1
);
layerParams
.
set
(
"across_spatial"
,
false
);
layerParams
.
set
(
"channel_shared"
,
false
);
int
id
=
dstNet
.
addLayer
(
name
,
"Normalize"
,
layerParams
);
layer_id
[
name
]
=
id
;
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
0
)),
id
,
0
);
}
else
if
(
type
==
"PriorBox"
)
{
if
(
hasLayerAttr
(
layer
,
"min_size"
))
...
...
@@ -1489,6 +1501,8 @@ void TFImporter::populateNet(Net dstNet)
layerParams
.
set
(
"clip"
,
getLayerAttr
(
layer
,
"clip"
).
b
());
if
(
hasLayerAttr
(
layer
,
"offset"
))
layerParams
.
set
(
"offset"
,
getLayerAttr
(
layer
,
"offset"
).
f
());
if
(
hasLayerAttr
(
layer
,
"step"
))
layerParams
.
set
(
"step"
,
getLayerAttr
(
layer
,
"step"
).
f
());
const
std
::
string
paramNames
[]
=
{
"variance"
,
"aspect_ratio"
,
"scales"
,
"width"
,
"height"
};
...
...
@@ -1538,8 +1552,17 @@ void TFImporter::populateNet(Net dstNet)
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
i
)),
id
,
i
);
data_layouts
[
name
]
=
DATA_LAYOUT_UNKNOWN
;
}
else
if
(
type
==
"Softmax"
)
{
if
(
hasLayerAttr
(
layer
,
"axis"
))
layerParams
.
set
(
"axis"
,
getLayerAttr
(
layer
,
"axis"
).
i
());
int
id
=
dstNet
.
addLayer
(
name
,
"Softmax"
,
layerParams
);
layer_id
[
name
]
=
id
;
connectToAllBlobs
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
0
)),
id
,
layer
.
input_size
());
}
else
if
(
type
==
"Abs"
||
type
==
"Tanh"
||
type
==
"Sigmoid"
||
type
==
"Relu"
||
type
==
"Elu"
||
type
==
"Softmax"
||
type
==
"Relu"
||
type
==
"Elu"
||
type
==
"Identity"
||
type
==
"Relu6"
)
{
std
::
string
dnnType
=
type
;
...
...
modules/dnn/test/test_tf_importer.cpp
View file @
5e0f95b9
...
...
@@ -386,4 +386,28 @@ TEST(Test_TensorFlow, memory_read)
runTensorFlowNet
(
"batch_norm_text"
,
DNN_TARGET_CPU
,
true
,
l1
,
lInf
,
true
);
}
TEST
(
Test_TensorFlow
,
opencv_face_detector_uint8
)
{
std
::
string
proto
=
findDataFile
(
"dnn/opencv_face_detector.pbtxt"
,
false
);
std
::
string
model
=
findDataFile
(
"dnn/opencv_face_detector_uint8.pb"
,
false
);
Net
net
=
readNetFromTensorflow
(
model
,
proto
);
Mat
img
=
imread
(
findDataFile
(
"gpu/lbpcascade/er.png"
,
false
));
Mat
blob
=
blobFromImage
(
img
,
1.0
,
Size
(),
Scalar
(
104.0
,
177.0
,
123.0
),
false
,
false
);
net
.
setInput
(
blob
);
// Output has shape 1x1xNx7 where N - number of detections.
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
Mat
out
=
net
.
forward
();
// References are from test for Caffe model.
Mat
ref
=
(
Mat_
<
float
>
(
6
,
5
)
<<
0.99520785
,
0.80997437
,
0.16379407
,
0.87996572
,
0.26685631
,
0.9934696
,
0.2831718
,
0.50738752
,
0.345781
,
0.5985168
,
0.99096733
,
0.13629119
,
0.24892329
,
0.19756334
,
0.3310290
,
0.98977017
,
0.23901358
,
0.09084064
,
0.29902688
,
0.1769477
,
0.97203469
,
0.67965847
,
0.06876482
,
0.73999709
,
0.1513494
,
0.95097077
,
0.51901293
,
0.45863652
,
0.5777427
,
0.5347801
);
normAssert
(
out
.
reshape
(
1
,
out
.
total
()
/
7
).
rowRange
(
0
,
6
).
colRange
(
2
,
7
),
ref
,
""
,
2.8e-4
,
3.4e-3
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment