Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
130546e1
Commit
130546e1
authored
Mar 06, 2018
by
Dmitry Kurtaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Semantic segmentation sample.
parent
f2440cea
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
418 additions
and
353 deletions
+418
-353
core.hpp
modules/core/include/opencv2/core.hpp
+9
-1
command_line_parser.cpp
modules/core/src/command_line_parser.cpp
+6
-0
classification_classes_ILSVRC2012.txt
samples/data/dnn/classification_classes_ILSVRC2012.txt
+0
-0
enet-classes.txt
samples/data/dnn/enet-classes.txt
+20
-0
object_detection_classes_coco.txt
samples/data/dnn/object_detection_classes_coco.txt
+0
-0
object_detection_classes_pascal_voc.txt
samples/data/dnn/object_detection_classes_pascal_voc.txt
+0
-0
README.md
samples/dnn/README.md
+7
-0
classification.cpp
samples/dnn/classification.cpp
+13
-25
fcn_semsegm.cpp
samples/dnn/fcn_semsegm.cpp
+0
-138
object_detection.cpp
samples/dnn/object_detection.cpp
+1
-14
segmentation.cpp
samples/dnn/segmentation.cpp
+237
-0
segmentation.py
samples/dnn/segmentation.py
+125
-0
torch_enet.cpp
samples/dnn/torch_enet.cpp
+0
-175
No files found.
modules/core/include/opencv2/core.hpp
View file @
130546e1
...
...
@@ -3159,7 +3159,7 @@ protected:
struct
Param
{
enum
{
INT
=
0
,
BOOLEAN
=
1
,
REAL
=
2
,
STRING
=
3
,
MAT
=
4
,
MAT_VECTOR
=
5
,
ALGORITHM
=
6
,
FLOAT
=
7
,
UNSIGNED_INT
=
8
,
UINT64
=
9
,
UCHAR
=
11
};
UNSIGNED_INT
=
8
,
UINT64
=
9
,
UCHAR
=
11
,
SCALAR
=
12
};
};
...
...
@@ -3252,6 +3252,14 @@ template<> struct ParamType<uchar>
enum
{
type
=
Param
::
UCHAR
};
};
template
<>
struct
ParamType
<
Scalar
>
{
typedef
const
Scalar
&
const_param_type
;
typedef
Scalar
member_type
;
enum
{
type
=
Param
::
SCALAR
};
};
//! @} core_basic
}
//namespace cv
...
...
modules/core/src/command_line_parser.cpp
View file @
130546e1
...
...
@@ -104,6 +104,12 @@ static void from_str(const String& str, int type, void* dst)
ss
>>
*
(
double
*
)
dst
;
else
if
(
type
==
Param
::
STRING
)
*
(
String
*
)
dst
=
str
;
else
if
(
type
==
Param
::
SCALAR
)
{
Scalar
&
scalar
=
*
(
Scalar
*
)
dst
;
for
(
int
i
=
0
;
i
<
4
&&
!
ss
.
eof
();
++
i
)
ss
>>
scalar
[
i
];
}
else
CV_Error
(
Error
::
StsBadArg
,
"unknown/unsupported parameter type"
);
...
...
samples/dnn/classification_classes_ILSVRC2012.txt
→
samples/d
ata/d
nn/classification_classes_ILSVRC2012.txt
View file @
130546e1
File moved
samples/data/dnn/enet-classes.txt
0 → 100644
View file @
130546e1
Unlabeled
Road
Sidewalk
Building
Wall
Fence
Pole
TrafficLight
TrafficSign
Vegetation
Terrain
Sky
Person
Rider
Car
Truck
Bus
Train
Motorcycle
Bicycle
samples/dnn/object_detection_classes_coco.txt
→
samples/d
ata/d
nn/object_detection_classes_coco.txt
View file @
130546e1
File moved
samples/dnn/object_detection_classes_pascal_voc.txt
→
samples/d
ata/d
nn/object_detection_classes_pascal_voc.txt
View file @
130546e1
File moved
samples/dnn/README.md
View file @
130546e1
...
...
@@ -20,7 +20,14 @@
| GoogLeNet |
`1.0`
|
`224x224`
|
`104 117 123`
| BGR |
|
[
SqueezeNet
](
https://github.com/DeepScale/SqueezeNet
)
|
`1.0`
|
`227x227`
|
`0 0 0`
| BGR |
### Semantic segmentation
| Model | Scale | Size WxH| Mean subtraction | Channels order |
|---------------|-------|-----------|--------------------|-------|
|
[
ENet
](
https://github.com/e-lab/ENet-training
)
|
`0.00392 (1/255)`
|
`1024x512`
|
`0 0 0`
| RGB |
| FCN8s |
`1.0`
|
`500x500`
|
`0 0 0`
| BGR |
## References
*
[
Models downloading script
](
https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py
)
*
[
Configuration files adopted for OpenCV
](
https://github.com/opencv/opencv_extra/tree/master/testdata/dnn
)
*
[
How to import models from TensorFlow Object Detection API
](
https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API
)
*
[
Names of classes from different datasets
](
https://github.com/opencv/opencv/tree/master/samples/data/dnn
)
samples/dnn/classification.cpp
View file @
130546e1
#include <fstream>
#include <iostream>
#include <sstream>
#include <opencv2/dnn.hpp>
...
...
@@ -17,17 +16,17 @@ const char* keys =
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ classes | | Optional path to a text file with names of classes. }"
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
"{ scale |
1 | Preprocess input image by multiplying on a scale factor. }"
"{ width |
-1
| Preprocess input image by resizing to a specific width. }"
"{ height |
-1
| Preprocess input image by resizing to a specific height. }"
"{ rgb |
| Indicate that model works with RGB input images instead BGR ones. }"
"{ backend |
0 | Choose one of computation backends: "
"0: default C++ backend, "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}"
"{ target |
0 | Choose one of target computation devices: "
"0: CPU target (by default),"
"1: OpenCL }"
;
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }"
"{ width |
| Preprocess input image by resizing to a specific width. }"
"{ height |
| Preprocess input image by resizing to a specific height. }"
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
"{ backend | 0 | Choose one of computation backends: "
"0: default C++ backend, "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default),"
"1: OpenCL }"
;
using
namespace
cv
;
using
namespace
dnn
;
...
...
@@ -45,7 +44,9 @@ int main(int argc, char** argv)
}
float
scale
=
parser
.
get
<
float
>
(
"scale"
);
Scalar
mean
=
parser
.
get
<
Scalar
>
(
"mean"
);
bool
swapRB
=
parser
.
get
<
bool
>
(
"rgb"
);
CV_Assert
(
parser
.
has
(
"width"
),
parser
.
has
(
"height"
));
int
inpWidth
=
parser
.
get
<
int
>
(
"width"
);
int
inpHeight
=
parser
.
get
<
int
>
(
"height"
);
String
model
=
parser
.
get
<
String
>
(
"model"
);
...
...
@@ -54,19 +55,6 @@ int main(int argc, char** argv)
int
backendId
=
parser
.
get
<
int
>
(
"backend"
);
int
targetId
=
parser
.
get
<
int
>
(
"target"
);
// Parse mean values.
Scalar
mean
;
if
(
parser
.
has
(
"mean"
))
{
std
::
istringstream
meanStr
(
parser
.
get
<
String
>
(
"mean"
));
std
::
vector
<
float
>
meanValues
;
float
val
;
while
(
meanStr
>>
val
)
meanValues
.
push_back
(
val
);
CV_Assert
(
meanValues
.
size
()
==
3
);
mean
=
Scalar
(
meanValues
[
0
],
meanValues
[
1
],
meanValues
[
2
]);
}
// Open file with classes names.
if
(
parser
.
has
(
"classes"
))
{
...
...
samples/dnn/fcn_semsegm.cpp
deleted
100644 → 0
View file @
f2440cea
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using
namespace
cv
;
using
namespace
cv
::
dnn
;
#include <fstream>
#include <iostream>
#include <cstdlib>
using
namespace
std
;
static
const
string
fcnType
=
"fcn8s"
;
static
vector
<
cv
::
Vec3b
>
readColors
(
const
string
&
filename
=
"pascal-classes.txt"
)
{
vector
<
cv
::
Vec3b
>
colors
;
ifstream
fp
(
filename
.
c_str
());
if
(
!
fp
.
is_open
())
{
cerr
<<
"File with colors not found: "
<<
filename
<<
endl
;
exit
(
-
1
);
}
string
line
;
while
(
!
fp
.
eof
())
{
getline
(
fp
,
line
);
if
(
line
.
length
())
{
stringstream
ss
(
line
);
string
name
;
ss
>>
name
;
int
temp
;
cv
::
Vec3b
color
;
ss
>>
temp
;
color
[
0
]
=
(
uchar
)
temp
;
ss
>>
temp
;
color
[
1
]
=
(
uchar
)
temp
;
ss
>>
temp
;
color
[
2
]
=
(
uchar
)
temp
;
colors
.
push_back
(
color
);
}
}
fp
.
close
();
return
colors
;
}
static
void
colorizeSegmentation
(
const
Mat
&
score
,
const
vector
<
cv
::
Vec3b
>
&
colors
,
cv
::
Mat
&
segm
)
{
const
int
rows
=
score
.
size
[
2
];
const
int
cols
=
score
.
size
[
3
];
const
int
chns
=
score
.
size
[
1
];
cv
::
Mat
maxCl
=
cv
::
Mat
::
zeros
(
rows
,
cols
,
CV_8UC1
);
cv
::
Mat
maxVal
(
rows
,
cols
,
CV_32FC1
,
cv
::
Scalar
(
-
FLT_MAX
));
for
(
int
ch
=
0
;
ch
<
chns
;
ch
++
)
{
for
(
int
row
=
0
;
row
<
rows
;
row
++
)
{
const
float
*
ptrScore
=
score
.
ptr
<
float
>
(
0
,
ch
,
row
);
uchar
*
ptrMaxCl
=
maxCl
.
ptr
<
uchar
>
(
row
);
float
*
ptrMaxVal
=
maxVal
.
ptr
<
float
>
(
row
);
for
(
int
col
=
0
;
col
<
cols
;
col
++
)
{
if
(
ptrScore
[
col
]
>
ptrMaxVal
[
col
])
{
ptrMaxVal
[
col
]
=
ptrScore
[
col
];
ptrMaxCl
[
col
]
=
(
uchar
)
ch
;
}
}
}
}
segm
.
create
(
rows
,
cols
,
CV_8UC3
);
for
(
int
row
=
0
;
row
<
rows
;
row
++
)
{
const
uchar
*
ptrMaxCl
=
maxCl
.
ptr
<
uchar
>
(
row
);
cv
::
Vec3b
*
ptrSegm
=
segm
.
ptr
<
cv
::
Vec3b
>
(
row
);
for
(
int
col
=
0
;
col
<
cols
;
col
++
)
{
ptrSegm
[
col
]
=
colors
[
ptrMaxCl
[
col
]];
}
}
}
int
main
(
int
argc
,
char
**
argv
)
{
String
modelTxt
=
fcnType
+
"-heavy-pascal.prototxt"
;
String
modelBin
=
fcnType
+
"-heavy-pascal.caffemodel"
;
String
imageFile
=
(
argc
>
1
)
?
argv
[
1
]
:
"rgb.jpg"
;
vector
<
cv
::
Vec3b
>
colors
=
readColors
();
//! [Initialize network]
dnn
::
Net
net
=
readNetFromCaffe
(
modelTxt
,
modelBin
);
//! [Initialize network]
if
(
net
.
empty
())
{
cerr
<<
"Can't load network by using the following files: "
<<
endl
;
cerr
<<
"prototxt: "
<<
modelTxt
<<
endl
;
cerr
<<
"caffemodel: "
<<
modelBin
<<
endl
;
cerr
<<
fcnType
<<
"-heavy-pascal.caffemodel can be downloaded here:"
<<
endl
;
cerr
<<
"http://dl.caffe.berkeleyvision.org/"
<<
fcnType
<<
"-heavy-pascal.caffemodel"
<<
endl
;
exit
(
-
1
);
}
//! [Prepare blob]
Mat
img
=
imread
(
imageFile
);
if
(
img
.
empty
())
{
cerr
<<
"Can't read image from the file: "
<<
imageFile
<<
endl
;
exit
(
-
1
);
}
resize
(
img
,
img
,
Size
(
500
,
500
),
0
,
0
,
INTER_LINEAR_EXACT
);
//FCN accepts 500x500 BGR-images
Mat
inputBlob
=
blobFromImage
(
img
,
1
,
Size
(),
Scalar
(),
false
);
//Convert Mat to batch of images
//! [Prepare blob]
//! [Set input blob]
net
.
setInput
(
inputBlob
,
"data"
);
//set the network input
//! [Set input blob]
//! [Make forward pass]
double
t
=
(
double
)
cv
::
getTickCount
();
Mat
score
=
net
.
forward
(
"score"
);
//compute output
t
=
(
double
)
cv
::
getTickCount
()
-
t
;
printf
(
"processing time: %.1fms
\n
"
,
t
*
1000.
/
getTickFrequency
());
//! [Make forward pass]
Mat
colorize
;
colorizeSegmentation
(
score
,
colors
,
colorize
);
Mat
show
;
addWeighted
(
img
,
0.4
,
colorize
,
0.6
,
0.0
,
show
);
imshow
(
"show"
,
show
);
waitKey
(
0
);
return
0
;
}
//main
samples/dnn/object_detection.cpp
View file @
130546e1
#include <fstream>
#include <iostream>
#include <sstream>
#include <opencv2/dnn.hpp>
...
...
@@ -54,23 +53,11 @@ int main(int argc, char** argv)
confThreshold
=
parser
.
get
<
float
>
(
"thr"
);
float
scale
=
parser
.
get
<
float
>
(
"scale"
);
Scalar
mean
=
parser
.
get
<
Scalar
>
(
"mean"
);
bool
swapRB
=
parser
.
get
<
bool
>
(
"rgb"
);
int
inpWidth
=
parser
.
get
<
int
>
(
"width"
);
int
inpHeight
=
parser
.
get
<
int
>
(
"height"
);
// Parse mean values.
Scalar
mean
;
if
(
parser
.
has
(
"mean"
))
{
std
::
istringstream
meanStr
(
parser
.
get
<
String
>
(
"mean"
));
std
::
vector
<
float
>
meanValues
;
float
val
;
while
(
meanStr
>>
val
)
meanValues
.
push_back
(
val
);
CV_Assert
(
meanValues
.
size
()
==
3
);
mean
=
Scalar
(
meanValues
[
0
],
meanValues
[
1
],
meanValues
[
2
]);
}
// Open file with classes names.
if
(
parser
.
has
(
"classes"
))
{
...
...
samples/dnn/segmentation.cpp
0 → 100644
View file @
130546e1
#include <fstream>
#include <sstream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
const
char
*
keys
=
"{ help h | | Print help message. }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ model m | | Path to a binary file of model contains trained weights. "
"It could be a file with extensions .caffemodel (Caffe), "
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }"
"{ config c | | Path to a text file of model contains network configuration. "
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }"
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ classes | | Optional path to a text file with names of classes. }"
"{ colors | | Optional path to a text file with colors for an every class. "
"An every color is represented with three values from 0 to 255 in BGR channels order. }"
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }"
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }"
"{ width | | Preprocess input image by resizing to a specific width. }"
"{ height | | Preprocess input image by resizing to a specific height. }"
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }"
"{ backend | 0 | Choose one of computation backends: "
"0: default C++ backend, "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default),"
"1: OpenCL }"
;
using
namespace
cv
;
using
namespace
dnn
;
std
::
vector
<
std
::
string
>
classes
;
std
::
vector
<
Vec3b
>
colors
;
void
showLegend
();
void
colorizeSegmentation
(
const
Mat
&
score
,
Mat
&
segm
);
int
main
(
int
argc
,
char
**
argv
)
{
CommandLineParser
parser
(
argc
,
argv
,
keys
);
parser
.
about
(
"Use this script to run semantic segmentation deep learning networks using OpenCV."
);
if
(
argc
==
1
||
parser
.
has
(
"help"
))
{
parser
.
printMessage
();
return
0
;
}
float
scale
=
parser
.
get
<
float
>
(
"scale"
);
Scalar
mean
=
parser
.
get
<
Scalar
>
(
"mean"
);
bool
swapRB
=
parser
.
get
<
bool
>
(
"rgb"
);
CV_Assert
(
parser
.
has
(
"width"
),
parser
.
has
(
"height"
));
int
inpWidth
=
parser
.
get
<
int
>
(
"width"
);
int
inpHeight
=
parser
.
get
<
int
>
(
"height"
);
String
model
=
parser
.
get
<
String
>
(
"model"
);
String
config
=
parser
.
get
<
String
>
(
"config"
);
String
framework
=
parser
.
get
<
String
>
(
"framework"
);
int
backendId
=
parser
.
get
<
int
>
(
"backend"
);
int
targetId
=
parser
.
get
<
int
>
(
"target"
);
// Open file with classes names.
if
(
parser
.
has
(
"classes"
))
{
std
::
string
file
=
parser
.
get
<
String
>
(
"classes"
);
std
::
ifstream
ifs
(
file
.
c_str
());
if
(
!
ifs
.
is_open
())
CV_Error
(
Error
::
StsError
,
"File "
+
file
+
" not found"
);
std
::
string
line
;
while
(
std
::
getline
(
ifs
,
line
))
{
classes
.
push_back
(
line
);
}
}
// Open file with colors.
if
(
parser
.
has
(
"colors"
))
{
std
::
string
file
=
parser
.
get
<
String
>
(
"colors"
);
std
::
ifstream
ifs
(
file
.
c_str
());
if
(
!
ifs
.
is_open
())
CV_Error
(
Error
::
StsError
,
"File "
+
file
+
" not found"
);
std
::
string
line
;
while
(
std
::
getline
(
ifs
,
line
))
{
std
::
istringstream
colorStr
(
line
.
c_str
());
Vec3b
color
;
for
(
int
i
=
0
;
i
<
3
&&
!
colorStr
.
eof
();
++
i
)
colorStr
>>
color
[
i
];
colors
.
push_back
(
color
);
}
}
CV_Assert
(
parser
.
has
(
"model"
));
//! [Read and initialize network]
Net
net
=
readNet
(
model
,
config
,
framework
);
net
.
setPreferableBackend
(
backendId
);
net
.
setPreferableTarget
(
targetId
);
//! [Read and initialize network]
// Create a window
static
const
std
::
string
kWinName
=
"Deep learning semantic segmentation in OpenCV"
;
namedWindow
(
kWinName
,
WINDOW_NORMAL
);
//! [Open a video file or an image file or a camera stream]
VideoCapture
cap
;
if
(
parser
.
has
(
"input"
))
cap
.
open
(
parser
.
get
<
String
>
(
"input"
));
else
cap
.
open
(
0
);
//! [Open a video file or an image file or a camera stream]
// Process frames.
Mat
frame
,
blob
;
while
(
waitKey
(
1
)
<
0
)
{
cap
>>
frame
;
if
(
frame
.
empty
())
{
waitKey
();
break
;
}
//! [Create a 4D blob from a frame]
blobFromImage
(
frame
,
blob
,
scale
,
Size
(
inpWidth
,
inpHeight
),
mean
,
swapRB
,
false
);
//! [Create a 4D blob from a frame]
//! [Set input blob]
net
.
setInput
(
blob
);
//! [Set input blob]
//! [Make forward pass]
Mat
score
=
net
.
forward
();
//! [Make forward pass]
Mat
segm
;
colorizeSegmentation
(
score
,
segm
);
resize
(
segm
,
segm
,
frame
.
size
(),
0
,
0
,
INTER_NEAREST
);
addWeighted
(
frame
,
0.1
,
segm
,
0.9
,
0.0
,
frame
);
// Put efficiency information.
std
::
vector
<
double
>
layersTimes
;
double
freq
=
getTickFrequency
()
/
1000
;
double
t
=
net
.
getPerfProfile
(
layersTimes
)
/
freq
;
std
::
string
label
=
format
(
"Inference time: %.2f ms"
,
t
);
putText
(
frame
,
label
,
Point
(
0
,
15
),
FONT_HERSHEY_SIMPLEX
,
0.5
,
Scalar
(
0
,
255
,
0
));
imshow
(
kWinName
,
frame
);
if
(
!
classes
.
empty
())
showLegend
();
}
return
0
;
}
void
colorizeSegmentation
(
const
Mat
&
score
,
Mat
&
segm
)
{
const
int
rows
=
score
.
size
[
2
];
const
int
cols
=
score
.
size
[
3
];
const
int
chns
=
score
.
size
[
1
];
if
(
colors
.
empty
())
{
// Generate colors.
colors
.
push_back
(
Vec3b
());
for
(
int
i
=
1
;
i
<
chns
;
++
i
)
{
Vec3b
color
;
for
(
int
j
=
0
;
j
<
3
;
++
j
)
color
[
j
]
=
(
colors
[
i
-
1
][
j
]
+
rand
()
%
256
)
/
2
;
colors
.
push_back
(
color
);
}
}
else
if
(
chns
!=
(
int
)
colors
.
size
())
{
CV_Error
(
Error
::
StsError
,
format
(
"Number of output classes does not match "
"number of colors (%d != %d)"
,
chns
,
colors
.
size
()));
}
Mat
maxCl
=
Mat
::
zeros
(
rows
,
cols
,
CV_8UC1
);
Mat
maxVal
(
rows
,
cols
,
CV_32FC1
,
score
.
data
);
for
(
int
ch
=
1
;
ch
<
chns
;
ch
++
)
{
for
(
int
row
=
0
;
row
<
rows
;
row
++
)
{
const
float
*
ptrScore
=
score
.
ptr
<
float
>
(
0
,
ch
,
row
);
uint8_t
*
ptrMaxCl
=
maxCl
.
ptr
<
uint8_t
>
(
row
);
float
*
ptrMaxVal
=
maxVal
.
ptr
<
float
>
(
row
);
for
(
int
col
=
0
;
col
<
cols
;
col
++
)
{
if
(
ptrScore
[
col
]
>
ptrMaxVal
[
col
])
{
ptrMaxVal
[
col
]
=
ptrScore
[
col
];
ptrMaxCl
[
col
]
=
(
uchar
)
ch
;
}
}
}
}
segm
.
create
(
rows
,
cols
,
CV_8UC3
);
for
(
int
row
=
0
;
row
<
rows
;
row
++
)
{
const
uchar
*
ptrMaxCl
=
maxCl
.
ptr
<
uchar
>
(
row
);
Vec3b
*
ptrSegm
=
segm
.
ptr
<
Vec3b
>
(
row
);
for
(
int
col
=
0
;
col
<
cols
;
col
++
)
{
ptrSegm
[
col
]
=
colors
[
ptrMaxCl
[
col
]];
}
}
}
void
showLegend
()
{
static
const
int
kBlockHeight
=
30
;
static
Mat
legend
;
if
(
legend
.
empty
())
{
const
int
numClasses
=
(
int
)
classes
.
size
();
if
((
int
)
colors
.
size
()
!=
numClasses
)
{
CV_Error
(
Error
::
StsError
,
format
(
"Number of output classes does not match "
"number of labels (%d != %d)"
,
colors
.
size
(),
classes
.
size
()));
}
legend
.
create
(
kBlockHeight
*
numClasses
,
200
,
CV_8UC3
);
for
(
int
i
=
0
;
i
<
numClasses
;
i
++
)
{
Mat
block
=
legend
.
rowRange
(
i
*
kBlockHeight
,
(
i
+
1
)
*
kBlockHeight
);
block
.
setTo
(
colors
[
i
]);
putText
(
block
,
classes
[
i
],
Point
(
0
,
kBlockHeight
/
2
),
FONT_HERSHEY_SIMPLEX
,
0.5
,
Vec3b
(
255
,
255
,
255
));
}
namedWindow
(
"Legend"
,
WINDOW_NORMAL
);
imshow
(
"Legend"
,
legend
);
}
}
samples/dnn/segmentation.py
0 → 100644
View file @
130546e1
import
cv2
as
cv
import
argparse
import
numpy
as
np
import
sys
backends
=
(
cv
.
dnn
.
DNN_BACKEND_DEFAULT
,
cv
.
dnn
.
DNN_BACKEND_HALIDE
,
cv
.
dnn
.
DNN_BACKEND_INFERENCE_ENGINE
)
targets
=
(
cv
.
dnn
.
DNN_TARGET_CPU
,
cv
.
dnn
.
DNN_TARGET_OPENCL
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Use this script to run semantic segmentation deep learning networks using OpenCV.'
)
parser
.
add_argument
(
'--input'
,
help
=
'Path to input image or video file. Skip this argument to capture frames from a camera.'
)
parser
.
add_argument
(
'--model'
,
required
=
True
,
help
=
'Path to a binary file of model contains trained weights. '
'It could be a file with extensions .caffemodel (Caffe), '
'.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)'
)
parser
.
add_argument
(
'--config'
,
help
=
'Path to a text file of model contains network configuration. '
'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)'
)
parser
.
add_argument
(
'--framework'
,
choices
=
[
'caffe'
,
'tensorflow'
,
'torch'
,
'darknet'
],
help
=
'Optional name of an origin framework of the model. '
'Detect it automatically if it does not set.'
)
parser
.
add_argument
(
'--classes'
,
help
=
'Optional path to a text file with names of classes.'
)
parser
.
add_argument
(
'--colors'
,
help
=
'Optional path to a text file with colors for an every class. '
'An every color is represented with three values from 0 to 255 in BGR channels order.'
)
parser
.
add_argument
(
'--mean'
,
nargs
=
'+'
,
type
=
float
,
default
=
[
0
,
0
,
0
],
help
=
'Preprocess input image by subtracting mean values. '
'Mean values should be in BGR order.'
)
parser
.
add_argument
(
'--scale'
,
type
=
float
,
default
=
1.0
,
help
=
'Preprocess input image by multiplying on a scale factor.'
)
parser
.
add_argument
(
'--width'
,
type
=
int
,
required
=
True
,
help
=
'Preprocess input image by resizing to a specific width.'
)
parser
.
add_argument
(
'--height'
,
type
=
int
,
required
=
True
,
help
=
'Preprocess input image by resizing to a specific height.'
)
parser
.
add_argument
(
'--rgb'
,
action
=
'store_true'
,
help
=
'Indicate that model works with RGB input images instead BGR ones.'
)
parser
.
add_argument
(
'--backend'
,
choices
=
backends
,
default
=
cv
.
dnn
.
DNN_BACKEND_DEFAULT
,
type
=
int
,
help
=
"Choose one of computation backends: "
"
%
d: default C++ backend, "
"
%
d: Halide language (http://halide-lang.org/), "
"
%
d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)"
%
backends
)
parser
.
add_argument
(
'--target'
,
choices
=
targets
,
default
=
cv
.
dnn
.
DNN_TARGET_CPU
,
type
=
int
,
help
=
'Choose one of target computation devices: '
'
%
d: CPU target (by default), '
'
%
d: OpenCL'
%
targets
)
args
=
parser
.
parse_args
()
np
.
random
.
seed
(
324
)
# Load names of classes
classes
=
None
if
args
.
classes
:
with
open
(
args
.
classes
,
'rt'
)
as
f
:
classes
=
f
.
read
()
.
rstrip
(
'
\n
'
)
.
split
(
'
\n
'
)
# Load colors
colors
=
None
if
args
.
colors
:
with
open
(
args
.
colors
,
'rt'
)
as
f
:
colors
=
[
np
.
array
(
color
.
split
(
' '
),
np
.
uint8
)
for
color
in
f
.
read
()
.
rstrip
(
'
\n
'
)
.
split
(
'
\n
'
)]
legend
=
None
def
showLegend
(
classes
):
global
legend
if
not
classes
is
None
and
legend
is
None
:
blockHeight
=
30
assert
(
len
(
classes
)
==
len
(
colors
))
legend
=
np
.
zeros
((
blockHeight
*
len
(
colors
),
200
,
3
),
np
.
uint8
)
for
i
in
range
(
len
(
classes
)):
block
=
legend
[
i
*
blockHeight
:(
i
+
1
)
*
blockHeight
]
block
[:,:]
=
colors
[
i
]
cv
.
putText
(
block
,
classes
[
i
],
(
0
,
blockHeight
/
2
),
cv
.
FONT_HERSHEY_SIMPLEX
,
0.5
,
(
255
,
255
,
255
))
cv
.
namedWindow
(
'Legend'
,
cv
.
WINDOW_NORMAL
)
cv
.
imshow
(
'Legend'
,
legend
)
classes
=
None
# Load a network
net
=
cv
.
dnn
.
readNet
(
args
.
model
,
args
.
config
,
args
.
framework
)
net
.
setPreferableBackend
(
args
.
backend
)
net
.
setPreferableTarget
(
args
.
target
)
winName
=
'Deep learning image classification in OpenCV'
cv
.
namedWindow
(
winName
,
cv
.
WINDOW_NORMAL
)
cap
=
cv
.
VideoCapture
(
args
.
input
if
args
.
input
else
0
)
legend
=
None
while
cv
.
waitKey
(
1
)
<
0
:
hasFrame
,
frame
=
cap
.
read
()
if
not
hasFrame
:
cv
.
waitKey
()
break
# Create a 4D blob from a frame.
blob
=
cv
.
dnn
.
blobFromImage
(
frame
,
args
.
scale
,
(
args
.
width
,
args
.
height
),
args
.
mean
,
args
.
rgb
,
crop
=
False
)
# Run a model
net
.
setInput
(
blob
)
score
=
net
.
forward
()
numClasses
=
score
.
shape
[
1
]
height
=
score
.
shape
[
2
]
width
=
score
.
shape
[
3
]
# Draw segmentation
if
not
colors
:
# Generate colors
colors
=
[
np
.
array
([
0
,
0
,
0
],
np
.
uint8
)]
for
i
in
range
(
1
,
numClasses
):
colors
.
append
((
colors
[
i
-
1
]
+
np
.
random
.
randint
(
0
,
256
,
[
3
],
np
.
uint8
))
/
2
)
classIds
=
np
.
argmax
(
score
[
0
],
axis
=
0
)
segm
=
np
.
stack
([
colors
[
idx
]
for
idx
in
classIds
.
flatten
()])
segm
=
segm
.
reshape
(
height
,
width
,
3
)
segm
=
cv
.
resize
(
segm
,
(
frame
.
shape
[
1
],
frame
.
shape
[
0
]),
interpolation
=
cv
.
INTER_NEAREST
)
frame
=
(
0.1
*
frame
+
0.9
*
segm
)
.
astype
(
np
.
uint8
)
# Put efficiency information.
t
,
_
=
net
.
getPerfProfile
()
label
=
'Inference time:
%.2
f ms'
%
(
t
*
1000.0
/
cv
.
getTickFrequency
())
cv
.
putText
(
frame
,
label
,
(
0
,
15
),
cv
.
FONT_HERSHEY_SIMPLEX
,
0.5
,
(
0
,
255
,
0
))
showLegend
(
classes
)
cv
.
imshow
(
winName
,
frame
)
samples/dnn/torch_enet.cpp
deleted
100644 → 0
View file @
f2440cea
/*
Sample of using OpenCV dnn module with Torch ENet model.
*/
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using
namespace
cv
;
using
namespace
cv
::
dnn
;
#include <fstream>
#include <iostream>
#include <cstdlib>
#include <sstream>
using
namespace
std
;
const
String
keys
=
"{help h || Sample app for loading ENet Torch model. "
"The model and class names list can be downloaded here: "
"https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }"
"{model m || path to Torch .net model file (model_best.net) }"
"{image i || path to image file }"
"{result r || path to save output blob (optional, binary format, NCHW order) }"
"{show s || whether to show all output channels or not}"
"{o_blob || output blob's name. If empty, last blob's name in net is used}"
;
static
const
int
kNumClasses
=
20
;
static
const
String
classes
[]
=
{
"Background"
,
"Road"
,
"Sidewalk"
,
"Building"
,
"Wall"
,
"Fence"
,
"Pole"
,
"TrafficLight"
,
"TrafficSign"
,
"Vegetation"
,
"Terrain"
,
"Sky"
,
"Person"
,
"Rider"
,
"Car"
,
"Truck"
,
"Bus"
,
"Train"
,
"Motorcycle"
,
"Bicycle"
};
static
const
Vec3b
colors
[]
=
{
Vec3b
(
0
,
0
,
0
),
Vec3b
(
244
,
126
,
205
),
Vec3b
(
254
,
83
,
132
),
Vec3b
(
192
,
200
,
189
),
Vec3b
(
50
,
56
,
251
),
Vec3b
(
65
,
199
,
228
),
Vec3b
(
240
,
178
,
193
),
Vec3b
(
201
,
67
,
188
),
Vec3b
(
85
,
32
,
33
),
Vec3b
(
116
,
25
,
18
),
Vec3b
(
162
,
33
,
72
),
Vec3b
(
101
,
150
,
210
),
Vec3b
(
237
,
19
,
16
),
Vec3b
(
149
,
197
,
72
),
Vec3b
(
80
,
182
,
21
),
Vec3b
(
141
,
5
,
207
),
Vec3b
(
189
,
156
,
39
),
Vec3b
(
235
,
170
,
186
),
Vec3b
(
133
,
109
,
144
),
Vec3b
(
231
,
160
,
96
)
};
static
void
showLegend
();
static
void
colorizeSegmentation
(
const
Mat
&
score
,
Mat
&
segm
);
int
main
(
int
argc
,
char
**
argv
)
{
CommandLineParser
parser
(
argc
,
argv
,
keys
);
if
(
parser
.
has
(
"help"
)
||
argc
==
1
)
{
parser
.
printMessage
();
return
0
;
}
String
modelFile
=
parser
.
get
<
String
>
(
"model"
);
String
imageFile
=
parser
.
get
<
String
>
(
"image"
);
if
(
!
parser
.
check
())
{
parser
.
printErrors
();
return
0
;
}
String
resultFile
=
parser
.
get
<
String
>
(
"result"
);
//! [Read model and initialize network]
dnn
::
Net
net
=
dnn
::
readNetFromTorch
(
modelFile
);
//! [Prepare blob]
Mat
img
=
imread
(
imageFile
),
input
;
if
(
img
.
empty
())
{
std
::
cerr
<<
"Can't read image from the file: "
<<
imageFile
<<
std
::
endl
;
exit
(
-
1
);
}
Mat
inputBlob
=
blobFromImage
(
img
,
1.
/
255
,
Size
(
1024
,
512
),
Scalar
(),
true
,
false
);
//Convert Mat to batch of images
//! [Prepare blob]
//! [Set input blob]
net
.
setInput
(
inputBlob
);
//set the network input
//! [Set input blob]
TickMeter
tm
;
String
oBlob
=
net
.
getLayerNames
().
back
();
if
(
!
parser
.
get
<
String
>
(
"o_blob"
).
empty
())
{
oBlob
=
parser
.
get
<
String
>
(
"o_blob"
);
}
//! [Make forward pass]
tm
.
start
();
Mat
result
=
net
.
forward
(
oBlob
);
tm
.
stop
();
if
(
!
resultFile
.
empty
())
{
CV_Assert
(
result
.
isContinuous
());
ofstream
fout
(
resultFile
.
c_str
(),
ios
::
out
|
ios
::
binary
);
fout
.
write
((
char
*
)
result
.
data
,
result
.
total
()
*
sizeof
(
float
));
fout
.
close
();
}
std
::
cout
<<
"Output blob: "
<<
result
.
size
[
0
]
<<
" x "
<<
result
.
size
[
1
]
<<
" x "
<<
result
.
size
[
2
]
<<
" x "
<<
result
.
size
[
3
]
<<
"
\n
"
;
std
::
cout
<<
"Inference time, ms: "
<<
tm
.
getTimeMilli
()
<<
std
::
endl
;
if
(
parser
.
has
(
"show"
))
{
Mat
segm
,
show
;
colorizeSegmentation
(
result
,
segm
);
showLegend
();
cv
::
resize
(
segm
,
segm
,
img
.
size
(),
0
,
0
,
cv
::
INTER_NEAREST
);
addWeighted
(
img
,
0.1
,
segm
,
0.9
,
0.0
,
show
);
imshow
(
"Result"
,
show
);
waitKey
();
}
return
0
;
}
//main
static
void
showLegend
()
{
static
const
int
kBlockHeight
=
30
;
cv
::
Mat
legend
(
kBlockHeight
*
kNumClasses
,
200
,
CV_8UC3
);
for
(
int
i
=
0
;
i
<
kNumClasses
;
i
++
)
{
cv
::
Mat
block
=
legend
.
rowRange
(
i
*
kBlockHeight
,
(
i
+
1
)
*
kBlockHeight
);
block
.
setTo
(
colors
[
i
]);
putText
(
block
,
classes
[
i
],
Point
(
0
,
kBlockHeight
/
2
),
FONT_HERSHEY_SIMPLEX
,
0.5
,
Vec3b
(
255
,
255
,
255
));
}
imshow
(
"Legend"
,
legend
);
}
static
void
colorizeSegmentation
(
const
Mat
&
score
,
Mat
&
segm
)
{
const
int
rows
=
score
.
size
[
2
];
const
int
cols
=
score
.
size
[
3
];
const
int
chns
=
score
.
size
[
1
];
Mat
maxCl
=
Mat
::
zeros
(
rows
,
cols
,
CV_8UC1
);
Mat
maxVal
(
rows
,
cols
,
CV_32FC1
,
score
.
data
);
for
(
int
ch
=
1
;
ch
<
chns
;
ch
++
)
{
for
(
int
row
=
0
;
row
<
rows
;
row
++
)
{
const
float
*
ptrScore
=
score
.
ptr
<
float
>
(
0
,
ch
,
row
);
uint8_t
*
ptrMaxCl
=
maxCl
.
ptr
<
uint8_t
>
(
row
);
float
*
ptrMaxVal
=
maxVal
.
ptr
<
float
>
(
row
);
for
(
int
col
=
0
;
col
<
cols
;
col
++
)
{
if
(
ptrScore
[
col
]
>
ptrMaxVal
[
col
])
{
ptrMaxVal
[
col
]
=
ptrScore
[
col
];
ptrMaxCl
[
col
]
=
(
uchar
)
ch
;
}
}
}
}
segm
.
create
(
rows
,
cols
,
CV_8UC3
);
for
(
int
row
=
0
;
row
<
rows
;
row
++
)
{
const
uchar
*
ptrMaxCl
=
maxCl
.
ptr
<
uchar
>
(
row
);
Vec3b
*
ptrSegm
=
segm
.
ptr
<
Vec3b
>
(
row
);
for
(
int
col
=
0
;
col
<
cols
;
col
++
)
{
ptrSegm
[
col
]
=
colors
[
ptrMaxCl
[
col
]];
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment