Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
fd2e37da
Commit
fd2e37da
authored
Oct 30, 2017
by
Vladislav Sovrasov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
text: improve DL-based samples
parent
27961cd8
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
81 additions
and
58 deletions
+81
-58
textDetector.hpp
modules/text/include/opencv2/text/textDetector.hpp
+8
-2
text_recognition_cnn.cpp
modules/text/samples/text_recognition_cnn.cpp
+40
-26
textbox_demo.cpp
modules/text/samples/textbox_demo.cpp
+24
-15
text_detectorCNN.cpp
modules/text/src/text_detectorCNN.cpp
+9
-15
No files found.
modules/text/include/opencv2/text/textDetector.hpp
View file @
fd2e37da
...
@@ -54,9 +54,15 @@ public:
...
@@ -54,9 +54,15 @@ public:
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
@param detectMultiscale if true, multiple scales of the input image will be used as network input
@param detectionSizes a list of sizes for multiscale detection. The values`[(300,300),(700,500),(700,300),(700,700),(1600,1600)]` are
recommended in @cite LiaoSBWL17 to achieve the best quality.
*/
*/
CV_WRAP
static
Ptr
<
TextDetectorCNN
>
create
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
,
bool
detectMultiscale
=
false
);
static
Ptr
<
TextDetectorCNN
>
create
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
,
std
::
vector
<
Size
>
detectionSizes
);
/**
@overload
*/
CV_WRAP
static
Ptr
<
TextDetectorCNN
>
create
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
);
};
};
//! @}
//! @}
...
...
modules/text/samples/text_recognition_cnn.cpp
View file @
fd2e37da
#include <opencv2/text.hpp>
#include <opencv2/text.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <iostream>
#include <fstream>
#include <fstream>
...
@@ -29,22 +30,27 @@ bool fileExists (const string& filename)
...
@@ -29,22 +30,27 @@ bool fileExists (const string& filename)
return
f
.
good
();
return
f
.
good
();
}
}
void
textbox_draw
(
Mat
src
,
vector
<
Rect
>&
groups
,
vector
<
float
>&
probs
,
float
thr
es
)
void
textbox_draw
(
Mat
src
,
std
::
vector
<
Rect
>&
groups
,
std
::
vector
<
float
>&
probs
,
std
::
vector
<
int
>&
index
es
)
{
{
for
(
size_t
i
=
0
;
i
<
group
s
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
indexe
s
.
size
();
i
++
)
{
{
if
(
probs
[
i
]
>
thres
)
if
(
src
.
type
()
==
CV_8UC3
)
{
{
if
(
src
.
type
()
==
CV_8UC3
)
Rect
currrentBox
=
groups
[
indexes
[
i
]];
{
rectangle
(
src
,
currrentBox
,
Scalar
(
0
,
255
,
255
),
2
,
LINE_AA
);
rectangle
(
src
,
groups
[
i
],
Scalar
(
0
,
255
,
255
),
2
,
LINE_AA
);
String
label
=
format
(
"%.2f"
,
probs
[
indexes
[
i
]]);
String
label
=
format
(
"%.2f"
,
probs
[
i
]);
std
::
cout
<<
"text box: "
<<
currrentBox
<<
" confidence: "
<<
probs
[
indexes
[
i
]]
<<
"
\n
"
;
cout
<<
"text box: "
<<
groups
[
i
]
<<
" confidence: "
<<
probs
[
i
]
<<
"
\n
"
;
putText
(
src
,
label
,
groups
.
at
(
i
).
tl
(),
FONT_HERSHEY_PLAIN
,
1
,
Scalar
(
0
,
0
,
255
),
1
,
LINE_AA
);
int
baseLine
=
0
;
}
Size
labelSize
=
getTextSize
(
label
,
FONT_HERSHEY_PLAIN
,
1
,
1
,
&
baseLine
);
else
int
yLeftBottom
=
std
::
max
(
currrentBox
.
y
,
labelSize
.
height
);
rectangle
(
src
,
groups
[
i
],
Scalar
(
255
),
3
,
8
);
rectangle
(
src
,
Point
(
currrentBox
.
x
,
yLeftBottom
-
labelSize
.
height
),
Point
(
currrentBox
.
x
+
labelSize
.
width
,
yLeftBottom
+
baseLine
),
Scalar
(
255
,
255
,
255
),
FILLED
);
putText
(
src
,
label
,
Point
(
currrentBox
.
x
,
yLeftBottom
),
FONT_HERSHEY_PLAIN
,
1
,
Scalar
(
0
,
0
,
0
),
1
,
LINE_AA
);
}
}
else
rectangle
(
src
,
groups
[
i
],
Scalar
(
255
),
3
,
8
);
}
}
}
}
...
@@ -73,33 +79,41 @@ int main(int argc, const char * argv[])
...
@@ -73,33 +79,41 @@ int main(int argc, const char * argv[])
cout
<<
"Starting Text Box Demo"
<<
endl
;
cout
<<
"Starting Text Box Demo"
<<
endl
;
Ptr
<
text
::
TextDetectorCNN
>
textSpotter
=
Ptr
<
text
::
TextDetectorCNN
>
textSpotter
=
text
::
TextDetectorCNN
::
create
(
modelArch
,
moddelWeights
,
false
);
text
::
TextDetectorCNN
::
create
(
modelArch
,
moddelWeights
);
vector
<
Rect
>
bbox
;
vector
<
Rect
>
bbox
;
vector
<
float
>
outProbabillities
;
vector
<
float
>
outProbabillities
;
textSpotter
->
detect
(
image
,
bbox
,
outProbabillities
);
textSpotter
->
detect
(
image
,
bbox
,
outProbabillities
);
std
::
vector
<
int
>
indexes
;
cv
::
dnn
::
NMSBoxes
(
bbox
,
outProbabillities
,
0.4
f
,
0.5
f
,
indexes
);
float
prob_threshold
=
0.6
f
;
Mat
image_copy
=
image
.
clone
();
Mat
image_copy
=
image
.
clone
();
textbox_draw
(
image_copy
,
bbox
,
outProbabillities
,
prob_threshold
);
textbox_draw
(
image_copy
,
bbox
,
outProbabillities
,
indexes
);
imshow
(
"Text detection"
,
image_copy
);
imshow
(
"Text detection"
,
image_copy
);
image_copy
=
image
.
clone
();
image_copy
=
image
.
clone
();
Ptr
<
text
::
OCRHolisticWordRecognizer
>
wordSpotter
=
Ptr
<
text
::
OCRHolisticWordRecognizer
>
wordSpotter
=
text
::
OCRHolisticWordRecognizer
::
create
(
"dictnet_vgg_deploy.prototxt"
,
"dictnet_vgg.caffemodel"
,
"dictnet_vgg_labels.txt"
);
text
::
OCRHolisticWordRecognizer
::
create
(
"dictnet_vgg_deploy.prototxt"
,
"dictnet_vgg.caffemodel"
,
"dictnet_vgg_labels.txt"
);
for
(
size_t
i
=
0
;
i
<
bbox
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
indexes
.
size
();
i
++
)
{
{
if
(
outProbabillities
[
i
]
>
prob_threshold
)
Mat
wordImg
;
{
cvtColor
(
image
(
bbox
[
indexes
[
i
]]),
wordImg
,
COLOR_BGR2GRAY
);
Mat
wordImg
;
string
word
;
cvtColor
(
image
(
bbox
[
i
]),
wordImg
,
COLOR_BGR2GRAY
);
vector
<
float
>
confs
;
string
word
;
wordSpotter
->
run
(
wordImg
,
word
,
NULL
,
NULL
,
&
confs
);
vector
<
float
>
confs
;
wordSpotter
->
run
(
wordImg
,
word
,
NULL
,
NULL
,
&
confs
);
Rect
currrentBox
=
bbox
[
indexes
[
i
]];
rectangle
(
image_copy
,
bbox
[
i
],
Scalar
(
0
,
255
,
255
),
1
,
LINE_AA
);
rectangle
(
image_copy
,
currrentBox
,
Scalar
(
0
,
255
,
255
),
2
,
LINE_AA
);
putText
(
image_copy
,
word
,
bbox
[
i
].
tl
(),
FONT_HERSHEY_PLAIN
,
1
,
Scalar
(
0
,
0
,
255
),
1
,
LINE_AA
);
}
int
baseLine
=
0
;
Size
labelSize
=
getTextSize
(
word
,
FONT_HERSHEY_PLAIN
,
1
,
1
,
&
baseLine
);
int
yLeftBottom
=
std
::
max
(
currrentBox
.
y
,
labelSize
.
height
);
rectangle
(
image_copy
,
Point
(
currrentBox
.
x
,
yLeftBottom
-
labelSize
.
height
),
Point
(
currrentBox
.
x
+
labelSize
.
width
,
yLeftBottom
+
baseLine
),
Scalar
(
255
,
255
,
255
),
FILLED
);
putText
(
image_copy
,
word
,
Point
(
currrentBox
.
x
,
yLeftBottom
),
FONT_HERSHEY_PLAIN
,
1
,
Scalar
(
0
,
0
,
0
),
1
,
LINE_AA
);
}
}
imshow
(
"Text recognition"
,
image_copy
);
imshow
(
"Text recognition"
,
image_copy
);
cout
<<
"Recognition finished. Press any key to exit.
\n
"
;
cout
<<
"Recognition finished. Press any key to exit.
\n
"
;
...
...
modules/text/samples/textbox_demo.cpp
View file @
fd2e37da
#include <opencv2/text.hpp>
#include <opencv2/text.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>
#include <sstream>
#include <sstream>
#include <iostream>
#include <iostream>
...
@@ -27,22 +28,27 @@ bool fileExists (const std::string& filename)
...
@@ -27,22 +28,27 @@ bool fileExists (const std::string& filename)
return
f
.
good
();
return
f
.
good
();
}
}
void
textbox_draw
(
Mat
src
,
std
::
vector
<
Rect
>&
groups
,
std
::
vector
<
float
>&
probs
,
float
thr
es
)
void
textbox_draw
(
Mat
src
,
std
::
vector
<
Rect
>&
groups
,
std
::
vector
<
float
>&
probs
,
std
::
vector
<
int
>&
index
es
)
{
{
for
(
size_t
i
=
0
;
i
<
group
s
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
indexe
s
.
size
();
i
++
)
{
{
if
(
probs
[
i
]
>
thres
)
if
(
src
.
type
()
==
CV_8UC3
)
{
{
if
(
src
.
type
()
==
CV_8UC3
)
Rect
currrentBox
=
groups
[
indexes
[
i
]];
{
rectangle
(
src
,
currrentBox
,
Scalar
(
0
,
255
,
255
),
2
,
LINE_AA
);
rectangle
(
src
,
groups
[
i
],
Scalar
(
0
,
255
,
255
),
2
,
LINE_AA
);
String
label
=
format
(
"%.2f"
,
probs
[
indexes
[
i
]]);
String
label
=
format
(
"%.2f"
,
probs
[
i
]);
std
::
cout
<<
"text box: "
<<
currrentBox
<<
" confidence: "
<<
probs
[
indexes
[
i
]]
<<
"
\n
"
;
std
::
cout
<<
"text box: "
<<
groups
[
i
]
<<
" confidence: "
<<
probs
[
i
]
<<
"
\n
"
;
putText
(
src
,
label
,
groups
.
at
(
i
).
tl
(),
FONT_HERSHEY_PLAIN
,
1
,
Scalar
(
0
,
0
,
255
),
1
,
LINE_AA
);
int
baseLine
=
0
;
}
Size
labelSize
=
getTextSize
(
label
,
FONT_HERSHEY_PLAIN
,
1
,
1
,
&
baseLine
);
else
int
yLeftBottom
=
std
::
max
(
currrentBox
.
y
,
labelSize
.
height
);
rectangle
(
src
,
groups
[
i
],
Scalar
(
255
),
3
,
8
);
rectangle
(
src
,
Point
(
currrentBox
.
x
,
yLeftBottom
-
labelSize
.
height
),
Point
(
currrentBox
.
x
+
labelSize
.
width
,
yLeftBottom
+
baseLine
),
Scalar
(
255
,
255
,
255
),
FILLED
);
putText
(
src
,
label
,
Point
(
currrentBox
.
x
,
yLeftBottom
),
FONT_HERSHEY_PLAIN
,
1
,
Scalar
(
0
,
0
,
0
),
1
,
LINE_AA
);
}
}
else
rectangle
(
src
,
groups
[
i
],
Scalar
(
255
),
3
,
8
);
}
}
}
}
...
@@ -62,7 +68,7 @@ int main(int argc, const char * argv[])
...
@@ -62,7 +68,7 @@ int main(int argc, const char * argv[])
if
(
!
fileExists
(
modelArch
)
||
!
fileExists
(
moddelWeights
))
if
(
!
fileExists
(
modelArch
)
||
!
fileExists
(
moddelWeights
))
{
{
std
::
cout
<<
getHelpStr
(
argv
[
0
]);
std
::
cout
<<
getHelpStr
(
argv
[
0
]);
std
::
cout
<<
"Model files not found in the current directory. Aborting!"
<<
std
::
endl
;
std
::
cout
<<
"Model files not found in the current directory. Aborting!"
<<
std
::
endl
;
exit
(
1
);
exit
(
1
);
}
}
...
@@ -71,13 +77,16 @@ int main(int argc, const char * argv[])
...
@@ -71,13 +77,16 @@ int main(int argc, const char * argv[])
std
::
cout
<<
"Starting Text Box Demo"
<<
std
::
endl
;
std
::
cout
<<
"Starting Text Box Demo"
<<
std
::
endl
;
Ptr
<
text
::
TextDetectorCNN
>
textSpotter
=
Ptr
<
text
::
TextDetectorCNN
>
textSpotter
=
text
::
TextDetectorCNN
::
create
(
modelArch
,
moddelWeights
,
false
);
text
::
TextDetectorCNN
::
create
(
modelArch
,
moddelWeights
);
std
::
vector
<
Rect
>
bbox
;
std
::
vector
<
Rect
>
bbox
;
std
::
vector
<
float
>
outProbabillities
;
std
::
vector
<
float
>
outProbabillities
;
textSpotter
->
detect
(
image
,
bbox
,
outProbabillities
);
textSpotter
->
detect
(
image
,
bbox
,
outProbabillities
);
textbox_draw
(
image
,
bbox
,
outProbabillities
,
0.5
f
);
std
::
vector
<
int
>
indexes
;
cv
::
dnn
::
NMSBoxes
(
bbox
,
outProbabillities
,
0.3
f
,
0.4
f
,
indexes
);
textbox_draw
(
image
,
bbox
,
outProbabillities
,
indexes
);
imshow
(
"TextBox Demo"
,
image
);
imshow
(
"TextBox Demo"
,
image
);
std
::
cout
<<
"Done!"
<<
std
::
endl
<<
std
::
endl
;
std
::
cout
<<
"Done!"
<<
std
::
endl
<<
std
::
endl
;
...
...
modules/text/src/text_detectorCNN.cpp
View file @
fd2e37da
...
@@ -23,8 +23,6 @@ protected:
...
@@ -23,8 +23,6 @@ protected:
Net
net_
;
Net
net_
;
std
::
vector
<
Size
>
sizes_
;
std
::
vector
<
Size
>
sizes_
;
int
inputChannelCount_
;
int
inputChannelCount_
;
bool
detectMultiscale_
;
void
getOutputs
(
const
float
*
buffer
,
int
nbrTextBoxes
,
int
nCol
,
void
getOutputs
(
const
float
*
buffer
,
int
nbrTextBoxes
,
int
nCol
,
std
::
vector
<
Rect
>&
Bbox
,
std
::
vector
<
float
>&
confidence
,
Size
inputShape
)
std
::
vector
<
Rect
>&
Bbox
,
std
::
vector
<
float
>&
confidence
,
Size
inputShape
)
...
@@ -54,21 +52,12 @@ protected:
...
@@ -54,21 +52,12 @@ protected:
}
}
public
:
public
:
TextDetectorCNNImpl
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
,
bool
detectMultiscale
)
:
TextDetectorCNNImpl
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
,
std
::
vector
<
Size
>
detectionSizes
)
:
detectMultiscale_
(
detectMultiscale
)
sizes_
(
detectionSizes
)
{
{
net_
=
readNetFromCaffe
(
modelArchFilename
,
modelWeightsFilename
);
net_
=
readNetFromCaffe
(
modelArchFilename
,
modelWeightsFilename
);
CV_Assert
(
!
net_
.
empty
());
CV_Assert
(
!
net_
.
empty
());
inputChannelCount_
=
3
;
inputChannelCount_
=
3
;
sizes_
.
push_back
(
Size
(
700
,
700
));
if
(
detectMultiscale_
)
{
sizes_
.
push_back
(
Size
(
300
,
300
));
sizes_
.
push_back
(
Size
(
700
,
500
));
sizes_
.
push_back
(
Size
(
700
,
300
));
sizes_
.
push_back
(
Size
(
1600
,
1600
));
}
}
}
void
detect
(
InputArray
inputImage_
,
std
::
vector
<
Rect
>&
Bbox
,
std
::
vector
<
float
>&
confidence
)
void
detect
(
InputArray
inputImage_
,
std
::
vector
<
Rect
>&
Bbox
,
std
::
vector
<
float
>&
confidence
)
...
@@ -92,9 +81,14 @@ public:
...
@@ -92,9 +81,14 @@ public:
}
}
};
};
Ptr
<
TextDetectorCNN
>
TextDetectorCNN
::
create
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
,
bool
detectMultiscale
)
Ptr
<
TextDetectorCNN
>
TextDetectorCNN
::
create
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
,
std
::
vector
<
Size
>
detectionSizes
)
{
return
makePtr
<
TextDetectorCNNImpl
>
(
modelArchFilename
,
modelWeightsFilename
,
detectionSizes
);
}
Ptr
<
TextDetectorCNN
>
TextDetectorCNN
::
create
(
const
String
&
modelArchFilename
,
const
String
&
modelWeightsFilename
)
{
{
return
makePtr
<
TextDetectorCNNImpl
>
(
modelArchFilename
,
modelWeightsFilename
,
detectMultiscale
);
return
create
(
modelArchFilename
,
modelWeightsFilename
,
std
::
vector
<
Size
>
(
1
,
Size
(
300
,
300
))
);
}
}
}
//namespace text
}
//namespace text
}
//namespace cv
}
//namespace cv
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment