Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
1a9a3fdc
Commit
1a9a3fdc
authored
Aug 28, 2017
by
Suleyman TURKMEN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update webcam_demo.cpp
parent
e7547d61
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
134 additions
and
136 deletions
+134
-136
webcam_demo.cpp
modules/text/samples/webcam_demo.cpp
+134
-136
No files found.
modules/text/samples/webcam_demo.cpp
View file @
1a9a3fdc
/*
* webcam-demo.cpp
*
* A demo program of End-to-end Scene Text Detection and Recognition.
* A demo program of End-to-end Scene Text Detection and Recognition
using webcam or video
.
*
* Created on: Jul 31, 2014
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
*/
#include "opencv2/text.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/features2d.hpp"
#include <iostream>
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
text
;
...
...
@@ -32,7 +30,7 @@ private:
public
:
Parallel_extractCSER
(
vector
<
Mat
>
&
_channels
,
vector
<
vector
<
ERStat
>
>
&
_regions
,
vector
<
Ptr
<
ERFilter
>
>
_er_filter1
,
vector
<
Ptr
<
ERFilter
>
>
_er_filter2
)
:
channels
(
_channels
),
regions
(
_regions
),
er_filter1
(
_er_filter1
),
er_filter2
(
_er_filter2
){}
:
channels
(
_channels
),
regions
(
_regions
),
er_filter1
(
_er_filter1
),
er_filter2
(
_er_filter2
)
{}
virtual
void
operator
()(
const
cv
::
Range
&
r
)
const
{
...
...
@@ -75,34 +73,81 @@ public:
Parallel_OCR
&
operator
=
(
const
Parallel_OCR
&
a
);
};
//Discard wrongly recognised strings
bool
isRepetitive
(
const
string
&
s
);
//Draw ER's in an image via floodFill
void
er_draw
(
vector
<
Mat
>
&
channels
,
vector
<
vector
<
ERStat
>
>
&
regions
,
vector
<
Vec2i
>
group
,
Mat
&
segmentation
);
//Perform text detection and recognition from webcam
const
char
*
keys
=
{
"{@input | 0 | camera index or video file name}"
"{ image i | | specify input image}"
};
//Perform text detection and recognition from webcam or video
int
main
(
int
argc
,
char
*
argv
[])
{
cout
<<
endl
<<
argv
[
0
]
<<
endl
<<
endl
;
cout
<<
"A demo program of End-to-end Scene Text Detection and Recognition using webcam."
<<
endl
<<
endl
;
cout
<<
" Usage: "
<<
argv
[
0
]
<<
" [camera_index]"
<<
endl
<<
endl
;
CommandLineParser
parser
(
argc
,
argv
,
keys
);
cout
<<
"A demo program of End-to-end Scene Text Detection and Recognition using webcam or video."
<<
endl
<<
endl
;
cout
<<
" Keys: "
<<
endl
;
cout
<<
" Press 'r' to switch between MSER/CSER regions."
<<
endl
;
cout
<<
" Press 'g' to switch between Horizontal and Arbitrary oriented grouping."
<<
endl
;
cout
<<
" Press 'o' to switch between OCRTesseract/OCRHMMDecoder recognition."
<<
endl
;
cout
<<
" Press 's' to scale down frame size to 320x240."
<<
endl
;
cout
<<
" Press 'ESC' to exit."
<<
endl
<<
endl
;
parser
.
printMessage
();
VideoCapture
cap
;
Mat
frame
,
image
,
gray
,
out_img
;
String
input
=
parser
.
get
<
String
>
(
"@input"
);
String
image_file_name
=
parser
.
get
<
String
>
(
"image"
);
if
(
image_file_name
!=
""
)
{
image
=
imread
(
image_file_name
);
if
(
image
.
empty
())
{
cout
<<
"
\n
unable to open "
<<
image_file_name
<<
"
\n
program terminated!
\n
"
;
return
1
;
}
else
{
cout
<<
"
\n
image "
<<
image_file_name
<<
" loaded!
\n
"
;
frame
=
image
.
clone
();
}
}
else
{
cout
<<
"
\n
Initializing capturing... "
;
if
(
input
.
size
()
==
1
&&
isdigit
(
input
[
0
]))
cap
.
open
(
input
[
0
]
-
'0'
);
else
cap
.
open
(
input
);
if
(
!
cap
.
isOpened
())
{
cout
<<
"
\n
Could not initialize capturing!
\n
"
;
return
1
;
}
cout
<<
" Done!"
<<
endl
;
cap
.
read
(
frame
);
}
namedWindow
(
"recognition"
,
WINDOW_NORMAL
);
imshow
(
"recognition"
,
frame
);
waitKey
(
1
);
bool
downsize
=
false
;
int
REGION_TYPE
=
1
;
int
GROUPING_ALGORITHM
=
0
;
int
RECOGNITION
=
0
;
char
*
region_types_str
[
2
]
=
{
const_cast
<
char
*>
(
"ERStats"
),
const_cast
<
char
*>
(
"MSER"
)};
char
*
grouping_algorithms_str
[
2
]
=
{
const_cast
<
char
*>
(
"exhaustive_search"
),
const_cast
<
char
*>
(
"multioriented"
)};
char
*
recognitions_str
[
2
]
=
{
const_cast
<
char
*>
(
"Tesseract"
),
const_cast
<
char
*>
(
"NM_chain_features + KNN"
)};
Mat
frame
,
grey
,
orig_grey
,
out_img
;
String
region_types_str
[
2
]
=
{
"ERStats"
,
"MSER"
};
String
grouping_algorithms_str
[
2
]
=
{
"exhaustive_search"
,
"multioriented"
};
String
recognitions_str
[
2
]
=
{
"Tesseract"
,
"NM_chain_features + KNN"
};
vector
<
Mat
>
channels
;
vector
<
vector
<
ERStat
>
>
regions
(
2
);
//two channels
...
...
@@ -118,15 +163,13 @@ int main(int argc, char* argv[])
er_filters2
.
push_back
(
er_filter2
);
}
//double t_r = getTickCount();
//Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
cout
<<
"Initializing OCR engines ...
"
<<
endl
;
cout
<<
"Initializing OCR engines ...
"
;
int
num_ocrs
=
10
;
vector
<
Ptr
<
OCRTesseract
>
>
ocrs
;
for
(
int
o
=
0
;
o
<
num_ocrs
;
o
++
)
{
ocrs
.
push_back
(
OCRTesseract
::
create
());
ocrs
.
push_back
(
OCRTesseract
::
create
());
}
Mat
transition_p
;
...
...
@@ -140,26 +183,12 @@ int main(int argc, char* argv[])
vector
<
Ptr
<
OCRHMMDecoder
>
>
decoders
;
for
(
int
o
=
0
;
o
<
num_ocrs
;
o
++
)
{
decoders
.
push_back
(
OCRHMMDecoder
::
create
(
loadOCRHMMClassifierNM
(
"OCRHMM_knn_model_data.xml.gz"
),
voc
,
transition_p
,
emission_p
));
decoders
.
push_back
(
OCRHMMDecoder
::
create
(
loadOCRHMMClassifierNM
(
"OCRHMM_knn_model_data.xml.gz"
),
voc
,
transition_p
,
emission_p
));
}
cout
<<
" Done!"
<<
endl
;
//cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
int
cam_idx
=
0
;
if
(
argc
>
1
)
cam_idx
=
atoi
(
argv
[
1
]);
VideoCapture
cap
(
cam_idx
);
if
(
!
cap
.
isOpened
())
{
cout
<<
"ERROR: Cannot open default camera (0)."
<<
endl
;
return
-
1
;
}
while
(
cap
.
read
(
frame
))
while
(
true
)
{
double
t_all
=
(
double
)
getTickCount
();
...
...
@@ -167,93 +196,65 @@ int main(int argc, char* argv[])
resize
(
frame
,
frame
,
Size
(
320
,
240
));
/*Text Detection*/
cvtColor
(
frame
,
grey
,
COLOR_RGB2GRAY
);
grey
.
copyTo
(
orig_grey
);
cvtColor
(
frame
,
gray
,
COLOR_BGR2GRAY
);
// Extract channels to be processed individually
channels
.
clear
();
channels
.
push_back
(
grey
);
channels
.
push_back
(
255
-
grey
);
channels
.
push_back
(
gray
);
channels
.
push_back
(
255
-
gray
);
regions
[
0
].
clear
();
regions
[
1
].
clear
();
//double t_d = (double)getTickCount();
switch
(
REGION_TYPE
)
{
case
0
:
{
parallel_for_
(
cv
::
Range
(
0
,(
int
)
channels
.
size
()),
Parallel_extractCSER
(
channels
,
regions
,
er_filters1
,
er_filters2
));
case
0
:
// ERStats
parallel_for_
(
cv
::
Range
(
0
,
(
int
)
channels
.
size
()),
Parallel_extractCSER
(
channels
,
regions
,
er_filters1
,
er_filters2
));
break
;
}
case
1
:
{
//Extract MSER
case
1
:
// MSER
vector
<
vector
<
Point
>
>
contours
;
vector
<
Rect
>
bboxes
;
Ptr
<
MSER
>
mser
=
MSER
::
create
(
21
,
(
int
)(
0.00002
*
grey
.
cols
*
grey
.
rows
),(
int
)(
0.05
*
grey
.
cols
*
grey
.
rows
),
1
,
0.7
);
mser
->
detectRegions
(
gr
e
y
,
contours
,
bboxes
);
Ptr
<
MSER
>
mser
=
MSER
::
create
(
21
,
(
int
)(
0.00002
*
gray
.
cols
*
gray
.
rows
),
(
int
)(
0.05
*
gray
.
cols
*
gray
.
rows
),
1
,
0.7
);
mser
->
detectRegions
(
gr
a
y
,
contours
,
bboxes
);
//Convert the output of MSER to suitable input for the grouping/recognition algorithms
if
(
contours
.
size
()
>
0
)
MSERsToERStats
(
grey
,
contours
,
regions
);
MSERsToERStats
(
gray
,
contours
,
regions
);
break
;
}
case
2
:
{
break
;
}
}
//cout << "TIME_REGION_DETECTION_ALT = " << ((double)getTickCount() - t_d)*1000/getTickFrequency() << endl;
// Detect character groups
//double t_g = getTickCount();
vector
<
vector
<
Vec2i
>
>
nm_region_groups
;
vector
<
Rect
>
nm_boxes
;
switch
(
GROUPING_ALGORITHM
)
{
case
0
:
{
case
0
:
// exhaustive_search
erGrouping
(
frame
,
channels
,
regions
,
nm_region_groups
,
nm_boxes
,
ERGROUPING_ORIENTATION_HORIZ
);
break
;
}
case
1
:
{
case
1
:
//multioriented
erGrouping
(
frame
,
channels
,
regions
,
nm_region_groups
,
nm_boxes
,
ERGROUPING_ORIENTATION_ANY
,
"./trained_classifier_erGrouping.xml"
,
0.5
);
break
;
}
}
//cout << "TIME_GROUPING_ALT = " << ((double)getTickCount() - t_g)*1000/getTickFrequency() << endl;
/*Text Recognition (OCR)*/
frame
.
copyTo
(
out_img
);
int
scale
=
downsize
?
2
:
1
;
float
scale_img
=
(
float
)((
600.
f
/
frame
.
rows
)
/
scale
);
float
scale_font
=
(
float
)(
2
-
scale_img
)
/
1.4
f
;
int
bottom_bar_height
=
out_img
.
rows
/
7
;
copyMakeBorder
(
frame
,
out_img
,
0
,
bottom_bar_height
,
0
,
0
,
BORDER_CONSTANT
,
Scalar
(
150
,
150
,
150
));
float
scale_font
=
(
float
)(
bottom_bar_height
/
85.0
);
vector
<
string
>
words_detection
;
float
min_confidence1
=
0.
f
,
min_confidence2
=
0.
f
;
if
(
RECOGNITION
==
0
)
{
min_confidence1
=
51.
f
;
min_confidence2
=
60.
f
;
min_confidence1
=
51.
f
;
min_confidence2
=
60.
f
;
}
vector
<
Mat
>
detections
;
//t_r = getTickCount();
for
(
int
i
=
0
;
i
<
(
int
)
nm_boxes
.
size
();
i
++
)
{
rectangle
(
out_img
,
nm_boxes
[
i
].
tl
(),
nm_boxes
[
i
].
br
(),
Scalar
(
255
,
255
,
0
),
3
);
Mat
group_img
=
Mat
::
zeros
(
frame
.
rows
+
2
,
frame
.
cols
+
2
,
CV_8UC1
);
er_draw
(
channels
,
regions
,
nm_region_groups
[
i
],
group_img
);
group_img
(
nm_boxes
[
i
]).
copyTo
(
group_img
);
...
...
@@ -268,27 +269,25 @@ int main(int argc, char* argv[])
// parallel process detections in batches of ocrs.size() (== num_ocrs)
for
(
int
i
=
0
;
i
<
(
int
)
detections
.
size
();
i
=
i
+
(
int
)
num_ocrs
)
{
Range
r
;
if
(
i
+
(
int
)
num_ocrs
<=
(
int
)
detections
.
size
())
r
=
Range
(
i
,
i
+
(
int
)
num_ocrs
);
else
r
=
Range
(
i
,(
int
)
detections
.
size
());
switch
(
RECOGNITION
)
{
case
0
:
parallel_for_
(
r
,
Parallel_OCR
<
OCRTesseract
>
(
detections
,
outputs
,
boxes
,
words
,
confidences
,
ocrs
));
break
;
case
1
:
parallel_for_
(
r
,
Parallel_OCR
<
OCRHMMDecoder
>
(
detections
,
outputs
,
boxes
,
words
,
confidences
,
decoders
));
break
;
}
}
Range
r
;
if
(
i
+
(
int
)
num_ocrs
<=
(
int
)
detections
.
size
())
r
=
Range
(
i
,
i
+
(
int
)
num_ocrs
);
else
r
=
Range
(
i
,(
int
)
detections
.
size
());
switch
(
RECOGNITION
)
{
case
0
:
// Tesseract
parallel_for_
(
r
,
Parallel_OCR
<
OCRTesseract
>
(
detections
,
outputs
,
boxes
,
words
,
confidences
,
ocrs
));
break
;
case
1
:
// NM_chain_features + KNN
parallel_for_
(
r
,
Parallel_OCR
<
OCRHMMDecoder
>
(
detections
,
outputs
,
boxes
,
words
,
confidences
,
decoders
));
break
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
detections
.
size
();
i
++
)
{
outputs
[
i
].
erase
(
remove
(
outputs
[
i
].
begin
(),
outputs
[
i
].
end
(),
'\n'
),
outputs
[
i
].
end
());
//cout << "OCR output = \"" << outputs[i] << "\" length = " << outputs[i].size() << endl;
if
(
outputs
[
i
].
size
()
<
3
)
...
...
@@ -311,56 +310,57 @@ int main(int argc, char* argv[])
rectangle
(
out_img
,
boxes
[
i
][
j
].
tl
()
-
Point
(
3
,
word_size
.
height
+
3
),
boxes
[
i
][
j
].
tl
()
+
Point
(
word_size
.
width
,
0
),
Scalar
(
255
,
0
,
255
),
-
1
);
putText
(
out_img
,
words
[
i
][
j
],
boxes
[
i
][
j
].
tl
()
-
Point
(
1
,
1
),
FONT_HERSHEY_SIMPLEX
,
scale_font
,
Scalar
(
255
,
255
,
255
),(
int
)(
3
*
scale_font
));
}
}
//cout << "TIME_OCR_ALT = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
t_all
=
((
double
)
getTickCount
()
-
t_all
)
*
1000
/
getTickFrequency
();
char
buff
[
100
];
sprintf
(
buff
,
"%2.1f Fps. @ %dx%d"
,
(
float
)(
1000
/
t_all
),
out_img
.
cols
,
out_img
.
rows
);
string
fps_info
=
buff
;
rectangle
(
out_img
,
Point
(
out_img
.
rows
-
(
160
/
scale
),
out_img
.
rows
-
(
70
/
scale
)
),
Point
(
out_img
.
cols
,
out_img
.
rows
),
Scalar
(
255
,
255
,
255
),
-
1
);
putText
(
out_img
,
fps_info
,
Point
(
10
,
out_img
.
rows
-
(
10
/
scale
)
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
));
putText
(
out_img
,
region_types_str
[
REGION_TYPE
],
Point
(
out_img
.
rows
-
(
150
/
scale
),
out_img
.
rows
-
(
50
/
scale
)
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
));
putText
(
out_img
,
grouping_algorithms_str
[
GROUPING_ALGORITHM
],
Point
(
out_img
.
rows
-
(
150
/
scale
),
out_img
.
rows
-
(
30
/
scale
)
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
));
putText
(
out_img
,
recognitions_str
[
RECOGNITION
],
Point
(
out_img
.
rows
-
(
150
/
scale
),
out_img
.
rows
-
(
10
/
scale
)
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
));
int
text_thickness
=
1
+
(
out_img
.
rows
/
500
);
string
fps_info
=
format
(
"%2.1f Fps. %dx%d"
,
(
float
)(
1000
/
t_all
),
frame
.
cols
,
frame
.
rows
);
putText
(
out_img
,
fps_info
,
Point
(
10
,
out_img
.
rows
-
5
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
),
text_thickness
);
putText
(
out_img
,
region_types_str
[
REGION_TYPE
],
Point
((
int
)(
out_img
.
cols
*
0.5
),
out_img
.
rows
-
(
int
)(
bottom_bar_height
/
1.5
)),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
),
text_thickness
);
putText
(
out_img
,
grouping_algorithms_str
[
GROUPING_ALGORITHM
],
Point
((
int
)(
out_img
.
cols
*
0.5
),
out_img
.
rows
-
((
int
)(
bottom_bar_height
/
3
)
+
4
)
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
),
text_thickness
);
putText
(
out_img
,
recognitions_str
[
RECOGNITION
],
Point
((
int
)(
out_img
.
cols
*
0.5
),
out_img
.
rows
-
5
),
FONT_HERSHEY_DUPLEX
,
scale_font
,
Scalar
(
255
,
0
,
0
),
text_thickness
);
imshow
(
"recognition"
,
out_img
);
//imwrite("recognition_alt.jpg", out_img);
int
key
=
waitKey
(
30
);
if
(
key
==
27
)
//wait for key
if
((
image_file_name
==
""
)
&&
!
cap
.
read
(
frame
))
{
cout
<<
"esc key pressed"
<<
endl
;
break
;
cout
<<
"Capturing ended! press any key to exit."
<<
endl
;
waitKey
();
return
0
;
}
else
int
key
=
waitKey
(
30
);
//wait for a key press
switch
(
key
)
{
switch
(
key
)
case
27
:
//ESC
cout
<<
"ESC key pressed and exited."
<<
endl
;
return
0
;
case
32
:
//SPACE
imwrite
(
"recognition_alt.jpg"
,
out_img
);
break
;
case
103
:
//'g'
GROUPING_ALGORITHM
=
(
GROUPING_ALGORITHM
+
1
)
%
2
;
cout
<<
"Grouping switched to "
<<
grouping_algorithms_str
[
GROUPING_ALGORITHM
]
<<
endl
;
break
;
case
111
:
//'o'
RECOGNITION
=
(
RECOGNITION
+
1
)
%
2
;
cout
<<
"OCR switched to "
<<
recognitions_str
[
RECOGNITION
]
<<
endl
;
break
;
case
114
:
//'r'
REGION_TYPE
=
(
REGION_TYPE
+
1
)
%
2
;
cout
<<
"Regions switched to "
<<
region_types_str
[
REGION_TYPE
]
<<
endl
;
break
;
case
115
:
//'s'
downsize
=
!
downsize
;
if
(
!
image
.
empty
())
{
case
103
:
//g
GROUPING_ALGORITHM
=
(
GROUPING_ALGORITHM
+
1
)
%
2
;
cout
<<
"Grouping switched to "
<<
grouping_algorithms_str
[
GROUPING_ALGORITHM
]
<<
endl
;
break
;
case
111
:
//o
RECOGNITION
=
(
RECOGNITION
+
1
)
%
2
;
cout
<<
"OCR switched to "
<<
recognitions_str
[
RECOGNITION
]
<<
endl
;
break
;
case
114
:
//r
REGION_TYPE
=
(
REGION_TYPE
+
1
)
%
2
;
cout
<<
"Regions switched to "
<<
region_types_str
[
REGION_TYPE
]
<<
endl
;
break
;
case
115
:
//s
downsize
=
!
downsize
;
break
;
default:
break
;
frame
=
image
.
clone
();
}
break
;
default:
break
;
}
}
return
0
;
...
...
@@ -389,11 +389,9 @@ bool isRepetitive(const string& s)
return
true
;
}
return
false
;
}
void
er_draw
(
vector
<
Mat
>
&
channels
,
vector
<
vector
<
ERStat
>
>
&
regions
,
vector
<
Vec2i
>
group
,
Mat
&
segmentation
)
{
for
(
int
r
=
0
;
r
<
(
int
)
group
.
size
();
r
++
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment