Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
6e33769e
Commit
6e33769e
authored
Dec 23, 2019
by
Liubov Batanina
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add human parsing demo
parent
5e2bcc91
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
265 additions
and
20 deletions
+265
-20
tf_importer.cpp
modules/dnn/src/tensorflow/tf_importer.cpp
+100
-20
human_parsing.py
samples/dnn/human_parsing.py
+165
-0
No files found.
modules/dnn/src/tensorflow/tf_importer.cpp
View file @
6e33769e
...
@@ -1935,34 +1935,114 @@ void TFImporter::populateNet(Net dstNet)
...
@@ -1935,34 +1935,114 @@ void TFImporter::populateNet(Net dstNet)
Mat
indices
=
getTensorContent
(
getConstBlob
(
layer
,
value_id
,
1
));
Mat
indices
=
getTensorContent
(
getConstBlob
(
layer
,
value_id
,
1
));
CV_Assert
(
indices
.
type
()
==
CV_32SC1
);
CV_Assert
(
indices
.
type
()
==
CV_32SC1
);
if
(
indices
.
total
()
!=
2
||
indices
.
at
<
int
>
(
0
)
!=
1
||
indices
.
at
<
int
>
(
1
)
!=
2
)
if
(
indices
.
total
()
==
1
&&
indices
.
at
<
int
>
(
0
)
==
0
)
CV_Error
(
Error
::
StsNotImplemented
,
"Unsupported mode of reduce_mean operation."
);
layerParams
.
set
(
"pool"
,
"ave"
);
layerParams
.
set
(
"global_pooling"
,
true
);
int
id
=
dstNet
.
addLayer
(
name
,
"Pooling"
,
layerParams
);
layer_id
[
name
]
=
id
;
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
0
)),
id
,
0
);
// There are two attributes, "keepdims" and a deprecated "keep_dims".
bool
keepDims
=
false
;
if
(
hasLayerAttr
(
layer
,
"keepdims"
))
keepDims
=
getLayerAttr
(
layer
,
"keepdims"
).
b
();
else
if
(
hasLayerAttr
(
layer
,
"keep_dims"
))
keepDims
=
getLayerAttr
(
layer
,
"keep_dims"
).
b
();
if
(
!
keepDims
)
{
{
LayerParams
flattenLp
;
LayerParams
flattenLp
;
std
::
string
flattenName
=
name
+
"/flatten"
;
std
::
string
flattenName
=
name
+
"/flatten"
;
CV_Assert
(
layer_id
.
find
(
flattenName
)
==
layer_id
.
end
());
CV_Assert
(
layer_id
.
find
(
flattenName
)
==
layer_id
.
end
());
int
flattenId
=
dstNet
.
addLayer
(
flattenName
,
"Flatten"
,
flattenLp
);
int
flattenId
=
dstNet
.
addLayer
(
flattenName
,
"Flatten"
,
flattenLp
);
layer_id
[
flattenName
]
=
flattenId
;
layer_id
[
flattenName
]
=
flattenId
;
connect
(
layer_id
,
dstNet
,
Pin
(
name
),
flattenId
,
0
);
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
0
)),
flattenId
,
0
);
LayerParams
reshapeLp
;
std
::
string
reshapeName
=
name
+
"/reshape"
;
CV_Assert
(
layer_id
.
find
(
reshapeName
)
==
layer_id
.
end
());
reshapeLp
.
set
(
"axis"
,
0
);
reshapeLp
.
set
(
"num_axes"
,
1
);
std
::
vector
<
int
>
newShape
=
{
1
,
1
,
-
1
};
reshapeLp
.
set
(
"dim"
,
DictValue
::
arrayInt
(
&
newShape
[
0
],
newShape
.
size
()));
int
reshapeId
=
dstNet
.
addLayer
(
reshapeName
,
"Reshape"
,
reshapeLp
);
layer_id
[
reshapeName
]
=
reshapeId
;
connect
(
layer_id
,
dstNet
,
Pin
(
flattenName
),
reshapeId
,
0
);
LayerParams
avgLp
;
std
::
string
avgName
=
name
+
"/avg"
;
CV_Assert
(
layer_id
.
find
(
avgName
)
==
layer_id
.
end
());
avgLp
.
set
(
"pool"
,
"ave"
);
avgLp
.
set
(
"kernel_h"
,
3
);
// TODO: node.shape[0]
avgLp
.
set
(
"kernel_w"
,
1
);
int
avgId
=
dstNet
.
addLayer
(
avgName
,
"Pooling"
,
avgLp
);
layer_id
[
avgName
]
=
avgId
;
// one input only
connect
(
layer_id
,
dstNet
,
Pin
(
reshapeName
),
avgId
,
0
);
LayerParams
reshapeLp2
;
std
::
string
reshapeName2
=
name
;
CV_Assert
(
layer_id
.
find
(
reshapeName2
)
==
layer_id
.
end
());
newShape
=
{
2
,
20
,
314
,
253
};
// TODO: remove out shapes
reshapeLp2
.
set
(
"dim"
,
DictValue
::
arrayInt
<
int
*>
(
&
newShape
[
0
],
newShape
.
size
()));
int
reshapeId2
=
dstNet
.
addLayer
(
reshapeName2
,
"Reshape"
,
reshapeLp2
);
layer_id
[
reshapeName2
]
=
reshapeId2
;
connect
(
layer_id
,
dstNet
,
Pin
(
avgName
),
reshapeId2
,
0
);
}
else
{
if
(
indices
.
total
()
!=
2
||
indices
.
at
<
int
>
(
0
)
!=
1
||
indices
.
at
<
int
>
(
1
)
!=
2
)
CV_Error
(
Error
::
StsNotImplemented
,
"Unsupported mode of reduce_mean operation."
);
layerParams
.
set
(
"pool"
,
"ave"
);
layerParams
.
set
(
"global_pooling"
,
true
);
int
id
=
dstNet
.
addLayer
(
name
,
"Pooling"
,
layerParams
);
layer_id
[
name
]
=
id
;
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
0
)),
id
,
0
);
// There are two attributes, "keepdims" and a deprecated "keep_dims".
bool
keepDims
=
false
;
if
(
hasLayerAttr
(
layer
,
"keepdims"
))
keepDims
=
getLayerAttr
(
layer
,
"keepdims"
).
b
();
else
if
(
hasLayerAttr
(
layer
,
"keep_dims"
))
keepDims
=
getLayerAttr
(
layer
,
"keep_dims"
).
b
();
if
(
!
keepDims
)
{
LayerParams
flattenLp
;
std
::
string
flattenName
=
name
+
"/flatten"
;
CV_Assert
(
layer_id
.
find
(
flattenName
)
==
layer_id
.
end
());
int
flattenId
=
dstNet
.
addLayer
(
flattenName
,
"Flatten"
,
flattenLp
);
layer_id
[
flattenName
]
=
flattenId
;
connect
(
layer_id
,
dstNet
,
Pin
(
name
),
flattenId
,
0
);
}
}
}
}
}
else
if
(
type
==
"Pack"
)
{
CV_Assert
(
hasLayerAttr
(
layer
,
"axis"
));
int
dim
=
(
int
)
getLayerAttr
(
layer
,
"axis"
).
i
();
if
(
dim
!=
0
)
CV_Error
(
Error
::
StsNotImplemented
,
"Unsupported mode of pack operation."
);
CV_Assert
(
hasLayerAttr
(
layer
,
"N"
));
int
num
=
(
int
)
getLayerAttr
(
layer
,
"N"
).
i
();
CV_Assert
(
layer
.
input_size
()
==
num
);
std
::
string
base_name
=
name
+
"/reshape_"
;
std
::
vector
<
std
::
string
>
reshape_names
;
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
std
::
string
reshape_name
=
base_name
+
std
::
to_string
(
i
);
reshape_names
.
push_back
(
reshape_name
);
LayerParams
reshapeLP
;
reshapeLP
.
set
(
"axis"
,
dim
);
reshapeLP
.
set
(
"num_axes"
,
1
);
std
::
vector
<
int
>
outShape
=
{
1
,
-
1
};
reshapeLP
.
set
(
"dim"
,
DictValue
::
arrayInt
(
&
outShape
[
0
],
outShape
.
size
()));
int
id
=
dstNet
.
addLayer
(
reshape_name
,
"Reshape"
,
reshapeLP
);
layer_id
[
reshape_name
]
=
id
;
connect
(
layer_id
,
dstNet
,
parsePin
(
layer
.
input
(
i
)),
id
,
0
);
}
layerParams
.
set
(
"axis"
,
dim
);
int
id
=
dstNet
.
addLayer
(
name
,
"Concat"
,
layerParams
);
layer_id
[
name
]
=
id
;
for
(
int
li
=
0
;
li
<
num
;
li
++
)
{
Pin
inp
=
parsePin
(
reshape_names
[
li
]);
connect
(
layer_id
,
dstNet
,
inp
,
id
,
li
);
}
}
else
if
(
type
==
"ClipByValue"
)
else
if
(
type
==
"ClipByValue"
)
{
{
// op: "ClipByValue"
// op: "ClipByValue"
...
...
samples/dnn/human_parsing.py
0 → 100644
View file @
6e33769e
import
cv2
as
cv
import
numpy
as
np
import
argparse
backends
=
(
cv
.
dnn
.
DNN_BACKEND_DEFAULT
,
cv
.
dnn
.
DNN_BACKEND_HALIDE
,
cv
.
dnn
.
DNN_BACKEND_OPENCV
,
cv
.
dnn
.
DNN_BACKEND_INFERENCE_ENGINE
)
targets
=
(
cv
.
dnn
.
DNN_TARGET_CPU
,
cv
.
dnn
.
DNN_TARGET_OPENCL
,
cv
.
dnn
.
DNN_TARGET_OPENCL_FP16
,
cv
.
dnn
.
DNN_TARGET_MYRIAD
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Use this script to run human parsing using JPPNet'
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
'--input'
,
'-i'
,
help
=
'Path to input image. Skip this argument to capture frames from a camera.'
)
parser
.
add_argument
(
'--model'
,
'-m'
,
required
=
True
,
help
=
'Path to pb model.'
)
parser
.
add_argument
(
'--backend'
,
choices
=
backends
,
default
=
cv
.
dnn
.
DNN_BACKEND_DEFAULT
,
type
=
int
,
help
=
"Choose one of computation backends: "
"
%
d: automatically (by default), "
"
%
d: Halide language (http://halide-lang.org/), "
"
%
d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
"
%
d: OpenCV implementation"
%
backends
)
parser
.
add_argument
(
'--target'
,
choices
=
targets
,
default
=
cv
.
dnn
.
DNN_TARGET_CPU
,
type
=
int
,
help
=
'Choose one of target computation devices: '
'
%
d: CPU target (by default), '
'
%
d: OpenCL, '
'
%
d: OpenCL fp16 (half-float precision), '
'
%
d: VPU'
%
targets
)
# To get pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view
# For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet
# Change script evaluate_parsing_JPPNet-s2.py for human parsing
# 1. Remove preprocessing to create image_batch_origin:
# - with tf.name_scope("create_inputs"):
# ...
# Add
# - image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input')
#
# 2. Create input
# image = cv2.imread(path/to/image)
# image_rev = np.flip(image, axis=1)
# image_h, image_w = image.shape[:2]
# input = np.stack([image, image_rev], axis=0)
#
# 3. Hardcode image_h and image_w shapes to determine output shapes
# - parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, [image_h, image_w]),
# tf.image.resize_images(parsing_out1_075, [image_h, image_w]),
# tf.image.resize_images(parsing_out1_125, [image_h, image_w])]), axis=0)
# Do similarly with parsing_out2, parsing_out3
# 4. Remove postprocessing
# - parsing_ = sess.run(raw_output, feed_dict={'input:0': input})
#
# 5. To save model after sess.run(...) add:
# - input_graph_def = tf.get_default_graph().as_graph_def()
# - output_node = "Mean_3"
# - output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node)
# -
# - output_graph = "LIP_JPPNet.pb"
# - with tf.gfile.GFile(output_graph, "wb") as f:
# - f.write(output_graph_def.SerializeToString())
def
preprocess
(
image_path
):
"""
Create 4-dimensional blob from image and flip image
:param image_path: path to input image
"""
image
=
cv
.
imread
(
image_path
)
image_rev
=
np
.
flip
(
image
,
axis
=
1
)
input
=
cv
.
dnn
.
blobFromImages
([
image
,
image_rev
],
mean
=
(
104.00698793
,
116.66876762
,
122.67891434
))
return
input
def
run_net
(
input
,
model_path
,
backend
,
target
):
"""
Read network and infer model
:param model_path: path to JPPNet model
"""
net
=
cv
.
dnn
.
readNet
(
model_path
)
net
.
setPreferableBackend
(
backend
)
net
.
setPreferableTarget
(
target
)
net
.
setInput
(
input
)
out
=
net
.
forward
()
return
out
def
postprocess
(
out
):
"""
Create a grayscale human segmentation
:param out: network output
"""
# LIP classes
# 0 Background
# 1 Hat
# 2 Hair
# 3 Glove
# 4 Sunglasses
# 5 UpperClothes
# 6 Dress
# 7 Coat
# 8 Socks
# 9 Pants
# 10 Jumpsuits
# 11 Scarf
# 12 Skirt
# 13 Face
# 14 LeftArm
# 15 RightArm
# 16 LeftLeg
# 17 RightLeg
# 18 LeftShoe
# 19 RightShoe
head_output
,
tail_output
=
np
.
split
(
out
,
indices_or_sections
=
[
1
],
axis
=
0
)
head_output
=
head_output
.
squeeze
(
0
)
tail_output
=
tail_output
.
squeeze
(
0
)
tail_list
=
np
.
split
(
tail_output
,
indices_or_sections
=
list
(
range
(
1
,
20
)),
axis
=
0
)
tail_list
=
[
arr
.
squeeze
(
0
)
for
arr
in
tail_list
]
tail_list_rev
=
[
tail_list
[
i
]
for
i
in
range
(
14
)]
tail_list_rev
.
extend
([
tail_list
[
15
],
tail_list
[
14
],
tail_list
[
17
],
tail_list
[
16
],
tail_list
[
19
],
tail_list
[
18
]])
tail_output_rev
=
np
.
stack
(
tail_list_rev
,
axis
=
0
)
tail_output_rev
=
np
.
flip
(
tail_output_rev
,
axis
=
2
)
raw_output_all
=
np
.
mean
(
np
.
stack
([
head_output
,
tail_output_rev
],
axis
=
0
),
axis
=
0
,
keepdims
=
False
)
raw_output_all
=
np
.
expand_dims
(
raw_output_all
,
axis
=
0
)
raw_output_all
=
np
.
argmax
(
raw_output_all
,
axis
=
1
)
raw_output_all
=
raw_output_all
.
transpose
(
1
,
2
,
0
)
return
raw_output_all
def
decode_labels
(
gray_image
):
"""
Colorize image according to labels
:param gray_image: grayscale human segmentation result
"""
height
,
width
,
_
=
gray_image
.
shape
colors
=
[(
0
,
0
,
0
),
(
128
,
0
,
0
),
(
255
,
0
,
0
),
(
0
,
85
,
0
),
(
170
,
0
,
51
),
(
255
,
85
,
0
),
(
0
,
0
,
85
),
(
0
,
119
,
221
),
(
85
,
85
,
0
),
(
0
,
85
,
85
),
(
85
,
51
,
0
),
(
52
,
86
,
128
),
(
0
,
128
,
0
),
(
0
,
0
,
255
),
(
51
,
170
,
221
),
(
0
,
255
,
255
),(
85
,
255
,
170
),
(
170
,
255
,
85
),
(
255
,
255
,
0
),
(
255
,
170
,
0
)]
segm
=
np
.
stack
([
colors
[
idx
]
for
idx
in
gray_image
.
flatten
()])
segm
=
segm
.
reshape
(
height
,
width
,
3
)
.
astype
(
np
.
uint8
)
segm
=
cv
.
cvtColor
(
segm
,
cv
.
COLOR_BGR2RGB
)
return
segm
def
parse_human
(
image_path
,
model_path
,
backend
,
target
):
"""
Prepare input for execution, run net and postprocess output to parse human.
:param image_path: path to input image
:param model_path: path to JPPNet model
:param backend: name of computation backend
:param target: name of computation target
"""
input
=
preprocess
(
image_path
)
output
=
run_net
(
input
,
model_path
,
backend
,
target
)
grayscale_out
=
postprocess
(
output
)
segmentation
=
decode_labels
(
grayscale_out
)
return
segmentation
if
__name__
==
'__main__'
:
args
,
_
=
parser
.
parse_known_args
()
output
=
parse_human
(
args
.
input
,
args
.
model
,
args
.
backend
,
args
.
target
)
winName
=
'Deep learning human parsing in OpenCV'
cv
.
namedWindow
(
winName
,
cv
.
WINDOW_AUTOSIZE
)
cv
.
imshow
(
winName
,
output
)
cv
.
waitKey
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment