Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2959e7ab
Commit
2959e7ab
authored
7 years ago
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9188 from arrybn:mobilenet_ssd_sample
parents
d34eec3a
ce1cc352
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
249 additions
and
1 deletion
+249
-1
detection_output_layer.cpp
modules/dnn/src/layers/detection_output_layer.cpp
+1
-1
MobileNetSSD_300x300.prototxt
samples/data/dnn/MobileNetSSD_300x300.prototxt
+0
-0
mobilenet_ssd_python.py
samples/dnn/mobilenet_ssd_python.py
+87
-0
ssd_mobilenet_object_detection.cpp
samples/dnn/ssd_mobilenet_object_detection.cpp
+161
-0
No files found.
modules/dnn/src/layers/detection_output_layer.cpp
View file @
2959e7ab
...
...
@@ -234,7 +234,7 @@ public:
if
(
numKept
==
0
)
{
CV_ErrorNoReturn
(
Error
::
StsError
,
"Couldn't find any detections"
)
;
return
;
}
int
outputShape
[]
=
{
1
,
1
,
(
int
)
numKept
,
7
};
outputs
[
0
].
create
(
4
,
outputShape
,
CV_32F
);
...
...
This diff is collapsed.
Click to expand it.
samples/data/dnn/MobileNetSSD_300x300.prototxt
0 → 100644
View file @
2959e7ab
This diff is collapsed.
Click to expand it.
samples/dnn/mobilenet_ssd_python.py
0 → 100644
View file @
2959e7ab
import
numpy
as
np
import
argparse
try
:
import
cv2
as
cv
except
ImportError
:
raise
ImportError
(
'Can
\'
t find OpenCV Python module. If you
\'
ve built it from sources without installation, '
'configure environemnt variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)'
)
inWidth
=
300
inHeight
=
300
WHRatio
=
inWidth
/
float
(
inHeight
)
inScaleFactor
=
0.007843
meanVal
=
127.5
classNames
=
(
'background'
,
'aeroplane'
,
'bicycle'
,
'bird'
,
'boat'
,
'bottle'
,
'bus'
,
'car'
,
'cat'
,
'chair'
,
'cow'
,
'diningtable'
,
'dog'
,
'horse'
,
'motorbike'
,
'person'
,
'pottedplant'
,
'sheep'
,
'sofa'
,
'train'
,
'tvmonitor'
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--video"
,
help
=
"path to video file. If empty, camera's stream will be used"
)
parser
.
add_argument
(
"--prototxt"
,
default
=
"MobileNetSSD_300x300.prototxt"
,
help
=
"path to caffe prototxt"
)
parser
.
add_argument
(
"-c"
,
"--caffemodel"
,
help
=
"path to caffemodel file, download it here: "
"https://github.com/chuanqi305/MobileNet-SSD/blob/master/MobileNetSSD_train.caffemodel"
)
parser
.
add_argument
(
"--thr"
,
default
=
0.2
,
help
=
"confidence threshold to filter out weak detections"
)
args
=
parser
.
parse_args
()
net
=
dnn
.
readNetFromCaffe
(
args
.
prototxt
,
args
.
caffemodel
)
if
len
(
args
.
video
):
cap
=
cv2
.
VideoCapture
(
args
.
video
)
else
:
cap
=
cv2
.
VideoCapture
(
0
)
while
True
:
# Capture frame-by-frame
ret
,
frame
=
cap
.
read
()
blob
=
dnn
.
blobFromImage
(
frame
,
inScaleFactor
,
(
inWidth
,
inHeight
),
meanVal
)
net
.
setInput
(
blob
)
detections
=
net
.
forward
()
cols
=
frame
.
shape
[
1
]
rows
=
frame
.
shape
[
0
]
if
cols
/
float
(
rows
)
>
WHRatio
:
cropSize
=
(
int
(
rows
*
WHRatio
),
rows
)
else
:
cropSize
=
(
cols
,
int
(
cols
/
WHRatio
))
y1
=
(
rows
-
cropSize
[
1
])
/
2
y2
=
y1
+
cropSize
[
1
]
x1
=
(
cols
-
cropSize
[
0
])
/
2
x2
=
x1
+
cropSize
[
0
]
frame
=
frame
[
y1
:
y2
,
x1
:
x2
]
cols
=
frame
.
shape
[
1
]
rows
=
frame
.
shape
[
0
]
for
i
in
range
(
detections
.
shape
[
2
]):
confidence
=
detections
[
0
,
0
,
i
,
2
]
if
confidence
>
args
.
thr
:
class_id
=
int
(
detections
[
0
,
0
,
i
,
1
])
xLeftBottom
=
int
(
detections
[
0
,
0
,
i
,
3
]
*
cols
)
yLeftBottom
=
int
(
detections
[
0
,
0
,
i
,
4
]
*
rows
)
xRightTop
=
int
(
detections
[
0
,
0
,
i
,
5
]
*
cols
)
yRightTop
=
int
(
detections
[
0
,
0
,
i
,
6
]
*
rows
)
cv2
.
rectangle
(
frame
,
(
xLeftBottom
,
yLeftBottom
),
(
xRightTop
,
yRightTop
),
(
0
,
255
,
0
))
label
=
classNames
[
class_id
]
+
": "
+
str
(
confidence
)
labelSize
,
baseLine
=
cv2
.
getTextSize
(
label
,
cv2
.
FONT_HERSHEY_SIMPLEX
,
0.5
,
1
)
cv2
.
rectangle
(
frame
,
(
xLeftBottom
,
yLeftBottom
-
labelSize
[
1
]),
(
xLeftBottom
+
labelSize
[
0
],
yLeftBottom
+
baseLine
),
(
255
,
255
,
255
),
cv2
.
FILLED
)
cv2
.
putText
(
frame
,
label
,
(
xLeftBottom
,
yLeftBottom
),
cv2
.
FONT_HERSHEY_SIMPLEX
,
0.5
,
(
0
,
0
,
0
))
cv2
.
imshow
(
"detections"
,
frame
)
if
cv2
.
waitKey
(
1
)
>=
0
:
break
This diff is collapsed.
Click to expand it.
samples/dnn/ssd_mobilenet_object_detection.cpp
0 → 100644
View file @
2959e7ab
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using
namespace
cv
;
using
namespace
cv
::
dnn
;
#include <fstream>
#include <iostream>
#include <cstdlib>
using
namespace
std
;
const
size_t
inWidth
=
300
;
const
size_t
inHeight
=
300
;
const
float
WHRatio
=
inWidth
/
(
float
)
inHeight
;
const
float
inScaleFactor
=
0.007843
f
;
const
float
meanVal
=
127.5
;
const
char
*
classNames
[]
=
{
"background"
,
"aeroplane"
,
"bicycle"
,
"bird"
,
"boat"
,
"bottle"
,
"bus"
,
"car"
,
"cat"
,
"chair"
,
"cow"
,
"diningtable"
,
"dog"
,
"horse"
,
"motorbike"
,
"person"
,
"pottedplant"
,
"sheep"
,
"sofa"
,
"train"
,
"tvmonitor"
};
const
char
*
about
=
"This sample uses Single-Shot Detector "
"(https://arxiv.org/abs/1512.02325)"
"to detect objects on image.
\n
"
".caffemodel model's file is avaliable here: "
"https://github.com/chuanqi305/MobileNet-SSD/blob/master/MobileNetSSD_train.caffemodel
\n
"
;
const
char
*
params
=
"{ help | false | print usage }"
"{ proto | MobileNetSSD_300x300.prototxt | model configuration }"
"{ model | | model weights }"
"{ video | | video for detection }"
"{ out | | path to output video file}"
"{ min_confidence | 0.2 | min confidence }"
;
int
main
(
int
argc
,
char
**
argv
)
{
cv
::
CommandLineParser
parser
(
argc
,
argv
,
params
);
if
(
parser
.
get
<
bool
>
(
"help"
))
{
cout
<<
about
<<
endl
;
parser
.
printMessage
();
return
0
;
}
String
modelConfiguration
=
parser
.
get
<
string
>
(
"proto"
);
String
modelBinary
=
parser
.
get
<
string
>
(
"model"
);
//! [Initialize network]
dnn
::
Net
net
=
readNetFromCaffe
(
modelConfiguration
,
modelBinary
);
//! [Initialize network]
VideoCapture
cap
(
parser
.
get
<
String
>
(
"video"
));
if
(
!
cap
.
isOpened
())
// check if we succeeded
{
cap
=
VideoCapture
(
0
);
if
(
!
cap
.
isOpened
())
{
cout
<<
"Couldn't find camera"
<<
endl
;
return
-
1
;
}
}
Size
inVideoSize
=
Size
((
int
)
cap
.
get
(
CV_CAP_PROP_FRAME_WIDTH
),
//Acquire input size
(
int
)
cap
.
get
(
CV_CAP_PROP_FRAME_HEIGHT
));
Size
cropSize
;
if
(
inVideoSize
.
width
/
(
float
)
inVideoSize
.
height
>
WHRatio
)
{
cropSize
=
Size
(
static_cast
<
int
>
(
inVideoSize
.
height
*
WHRatio
),
inVideoSize
.
height
);
}
else
{
cropSize
=
Size
(
inVideoSize
.
width
,
static_cast
<
int
>
(
inVideoSize
.
width
/
WHRatio
));
}
Rect
crop
(
Point
((
inVideoSize
.
width
-
cropSize
.
width
)
/
2
,
(
inVideoSize
.
height
-
cropSize
.
height
)
/
2
),
cropSize
);
VideoWriter
outputVideo
;
outputVideo
.
open
(
parser
.
get
<
String
>
(
"out"
)
,
static_cast
<
int
>
(
cap
.
get
(
CV_CAP_PROP_FOURCC
)),
cap
.
get
(
CV_CAP_PROP_FPS
),
cropSize
,
true
);
for
(;;)
{
Mat
frame
;
cap
>>
frame
;
// get a new frame from camera
//! [Prepare blob]
Mat
inputBlob
=
blobFromImage
(
frame
,
inScaleFactor
,
Size
(
inWidth
,
inHeight
),
meanVal
);
//Convert Mat to batch of images
//! [Prepare blob]
//! [Set input blob]
net
.
setInput
(
inputBlob
,
"data"
);
//set the network input
//! [Set input blob]
TickMeter
tm
;
tm
.
start
();
//! [Make forward pass]
Mat
detection
=
net
.
forward
(
"detection_out"
);
//compute output
tm
.
stop
();
cout
<<
"Inference time, ms: "
<<
tm
.
getTimeMilli
()
<<
endl
;
//! [Make forward pass]
Mat
detectionMat
(
detection
.
size
[
2
],
detection
.
size
[
3
],
CV_32F
,
detection
.
ptr
<
float
>
());
frame
=
frame
(
crop
);
float
confidenceThreshold
=
parser
.
get
<
float
>
(
"min_confidence"
);
for
(
int
i
=
0
;
i
<
detectionMat
.
rows
;
i
++
)
{
float
confidence
=
detectionMat
.
at
<
float
>
(
i
,
2
);
if
(
confidence
>
confidenceThreshold
)
{
size_t
objectClass
=
(
size_t
)(
detectionMat
.
at
<
float
>
(
i
,
1
));
int
xLeftBottom
=
static_cast
<
int
>
(
detectionMat
.
at
<
float
>
(
i
,
3
)
*
frame
.
cols
);
int
yLeftBottom
=
static_cast
<
int
>
(
detectionMat
.
at
<
float
>
(
i
,
4
)
*
frame
.
rows
);
int
xRightTop
=
static_cast
<
int
>
(
detectionMat
.
at
<
float
>
(
i
,
5
)
*
frame
.
cols
);
int
yRightTop
=
static_cast
<
int
>
(
detectionMat
.
at
<
float
>
(
i
,
6
)
*
frame
.
rows
);
ostringstream
ss
;
ss
<<
confidence
;
String
conf
(
ss
.
str
());
Rect
object
((
int
)
xLeftBottom
,
(
int
)
yLeftBottom
,
(
int
)(
xRightTop
-
xLeftBottom
),
(
int
)(
yRightTop
-
yLeftBottom
));
rectangle
(
frame
,
object
,
Scalar
(
0
,
255
,
0
));
String
label
=
String
(
classNames
[
objectClass
])
+
": "
+
conf
;
int
baseLine
=
0
;
Size
labelSize
=
getTextSize
(
label
,
FONT_HERSHEY_SIMPLEX
,
0.5
,
1
,
&
baseLine
);
rectangle
(
frame
,
Rect
(
Point
(
xLeftBottom
,
yLeftBottom
-
labelSize
.
height
),
Size
(
labelSize
.
width
,
labelSize
.
height
+
baseLine
)),
Scalar
(
255
,
255
,
255
),
CV_FILLED
);
putText
(
frame
,
label
,
Point
(
xLeftBottom
,
yLeftBottom
),
FONT_HERSHEY_SIMPLEX
,
0.5
,
Scalar
(
0
,
0
,
0
));
}
}
if
(
outputVideo
.
isOpened
())
outputVideo
<<
frame
;
imshow
(
"detections"
,
frame
);
if
(
waitKey
(
1
)
>=
0
)
break
;
}
return
0
;
}
// main
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment