Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
3aa88889
Commit
3aa88889
authored
Sep 28, 2017
by
Maksim Shabunin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Reworked HolisticWordspotter to work with dnn module
parent
bad02f37
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
122 additions
and
658 deletions
+122
-658
CMakeLists.txt
modules/text/CMakeLists.txt
+1
-61
FindCaffe.cmake
modules/text/FindCaffe.cmake
+0
-14
FindGlog.cmake
modules/text/FindGlog.cmake
+0
-10
FindProtobuf.cmake
modules/text/FindProtobuf.cmake
+0
-10
ocr.hpp
modules/text/include/opencv2/text/ocr.hpp
+23
-154
dictnet_demo.cpp
modules/text/samples/dictnet_demo.cpp
+30
-59
dictnet_demo.py
modules/text/samples/dictnet_demo.py
+0
-82
ocr_holistic.cpp
modules/text/src/ocr_holistic.cpp
+68
-262
text_config.hpp.in
modules/text/text_config.hpp.in
+0
-6
No files found.
modules/text/CMakeLists.txt
View file @
3aa88889
set
(
the_description
"Text Detection and Recognition"
)
ocv_define_module
(
text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java
)
ocv_define_module
(
text opencv_ml opencv_imgproc opencv_core opencv_features2d
opencv_dnn
OPTIONAL opencv_highgui WRAP python java
)
if
(
NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT
)
set
(
CMAKE_MODULE_PATH
${
CMAKE_MODULE_PATH
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake
)
...
...
@@ -22,62 +22,3 @@ ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
ocv_add_testdata
(
samples/ contrib/text
FILES_MATCHING PATTERN
"*.xml"
PATTERN
"*.xml.gz"
REGEX
"scenetext[0-9]+.jpg"
)
#Principal source from which adaptation came is the cnn_3dobj module
find_package
(
Caffe
)
if
(
Caffe_FOUND
)
message
(
STATUS
"Caffe: YES"
)
set
(
HAVE_CAFFE 1
)
else
()
message
(
STATUS
"Caffe: NO"
)
endif
()
find_package
(
Protobuf
)
if
(
Protobuf_FOUND
)
message
(
STATUS
"Protobuf: YES"
)
set
(
HAVE_PROTOBUF 1
)
else
()
message
(
STATUS
"Protobuf: NO"
)
endif
()
find_package
(
Glog
)
if
(
Glog_FOUND
)
message
(
STATUS
"Glog: YES"
)
set
(
HAVE_GLOG 1
)
else
()
message
(
STATUS
"Glog: NO"
)
endif
()
if
(
HAVE_CAFFE
)
message
(
STATUS
"HAVE CAFFE!!!"
)
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/text_config.hpp.in
${
CMAKE_CURRENT_SOURCE_DIR
}
/include/opencv2/text_config.hpp @ONLY
)
include_directories
(
${
CMAKE_CURRENT_BINARY_DIR
}
)
if
(
${
Caffe_FOUND
}
)
include_directories
(
${
Caffe_INCLUDE_DIR
}
)
#taken from caffe's cmake
find_package
(
HDF5 COMPONENTS HL REQUIRED
)
include_directories
(
SYSTEM
${
HDF5_INCLUDE_DIRS
}
${
HDF5_HL_INCLUDE_DIR
}
)
list
(
APPEND Caffe_LINKER_LIBS
${
HDF5_LIBRARIES
}
)
find_package
(
Boost 1.46 REQUIRED COMPONENTS system thread filesystem
)
include_directories
(
SYSTEM
${
Boost_INCLUDE_DIR
}
)
include_directories
(
SYSTEM /usr/local/cuda-7.5/targets/x86_64-linux/include/
)
list
(
APPEND Caffe_LINKER_LIBS
${
Boost_LIBRARIES
}
)
endif
()
if
(
${
Caffe_FOUND
}
)
#taken from caffe's cmake
target_link_libraries
(
opencv_text
${
Caffe_LIBS
}
${
Glog_LIBS
}
${
Protobuf_LIBS
}
${
HDF5_LIBRARIES
}
${
Boost_LIBRARIES
}
)
endif
()
endif
()
configure_file
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/text_config.hpp.in
${
CMAKE_BINARY_DIR
}
/text_config.hpp @ONLY
)
\ No newline at end of file
modules/text/FindCaffe.cmake
deleted
100644 → 0
View file @
bad02f37
# Caffe package for CNN Triplet training
unset
(
Caffe_FOUND
)
find_path
(
Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
HINTS
/usr/local/include
)
find_library
(
Caffe_LIBS NAMES caffe
HINTS
/usr/local/lib
)
if
(
Caffe_LIBS AND Caffe_INCLUDE_DIR
)
set
(
Caffe_FOUND 1
)
endif
()
modules/text/FindGlog.cmake
deleted
100755 → 0
View file @
bad02f37
#Required for Caffe
unset
(
Glog_FOUND
)
find_library
(
Glog_LIBS NAMES glog
HINTS
/usr/local/lib
)
if
(
Glog_LIBS
)
set
(
Glog_FOUND 1
)
endif
()
modules/text/FindProtobuf.cmake
deleted
100755 → 0
View file @
bad02f37
#Protobuf package required for Caffe
unset
(
Protobuf_FOUND
)
find_library
(
Protobuf_LIBS NAMES protobuf
HINTS
/usr/local/lib
)
if
(
Protobuf_LIBS
)
set
(
Protobuf_FOUND 1
)
endif
()
modules/text/include/opencv2/text/ocr.hpp
View file @
3aa88889
...
...
@@ -46,10 +46,6 @@
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
namespace
cv
{
...
...
@@ -540,107 +536,24 @@ at each window location.
CV_EXPORTS_W
Ptr
<
OCRBeamSearchDecoder
::
ClassifierCallback
>
loadOCRBeamSearchClassifierCNN
(
const
String
&
filename
);
//! @}
//Classifiers should provide diferent backends
//For the moment only caffe is implemeted
enum
{
OCR_HOLISTIC_BACKEND_NONE
,
OCR_HOLISTIC_BACKEND_CAFFE
};
/** @brief Abstract class that implements the classifcation of text images.
*
* The interface is generic enough to describe any image classifier. And allows
* to take advantage of compouting in batches. While word classifiers are the default
* networks, any image classifers should work.
*
*/
class
CV_EXPORTS_W
TextImageClassifier
{
protected
:
Size
inputSz_
;
int
channelCount_
;
/** @brief all image preprocessing is handled here including whitening etc.
*
* @param input the image to be preprocessed for the classifier. If the depth
* is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
*
* @param output reference to the image to be fed to the classifier, the preprocessor will
* resize the image to the apropriate size and convert it to the apropriate depth\
*
* The method preprocess should never be used externally, it is up to classify and classifyBatch
* methods to employ it.
*/
virtual
void
preprocess
(
Mat
&
input
,
Mat
&
output
)
=
0
;
public
:
virtual
~
TextImageClassifier
()
{}
/** @brief produces a class confidence row-vector given an image
*/
CV_WRAP
virtual
void
classify
(
InputArray
image
,
OutputArray
classProbabilities
)
=
0
;
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
*/
CV_WRAP
virtual
void
classifyBatch
(
InputArrayOfArrays
image
,
OutputArray
classProbabilities
)
=
0
;
/** @brief simple getter method returning the size of the oputput row-vector
*/
CV_WRAP
virtual
int
getOutputSize
()
=
0
;
/** @brief simple getter method returning the size of the minibatches for this classifier.
* If not applicabe this method should return 1
*/
CV_WRAP
virtual
int
getMinibatchSize
()
=
0
;
/** @brief simple getter method returning a value describing the framework beeing employed to implement the classifier
*/
CV_WRAP
virtual
int
getBackend
(){
return
OCR_HOLISTIC_BACKEND_NONE
;}
};
class
CV_EXPORTS_W
DictNet
:
public
TextImageClassifier
{
/** @brief Class that uses a pretrained caffe model for word classification.
*
* This network is described in detail in:
* Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
* http://arxiv.org/abs/1412.1842
*/
public
:
virtual
~
DictNet
()
{};
CV_WRAP
virtual
bool
usingGpu
()
=
0
;
/** @brief Constructs a DictNet object from a caffe pretrained model
*
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
*
* @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
* very large, up to 2GB.
*
* @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
*
* @param useGpu boolean flag setting GPU or CPU computation
*
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP
static
Ptr
<
DictNet
>
create
(
String
archFilename
,
String
weightsFilename
,
int
minibatchSz
=
100
,
bool
useGpu
=
0
,
int
backEnd
=
OCR_HOLISTIC_BACKEND_CAFFE
);
};
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
* Given a predefined vocabulary , a
TextImageClassifier
is employed to select the most probable
* Given a predefined vocabulary , a
DictNet
is employed to select the most probable
* word given an input image.
*
* This class implements the logic of providing transcriptions given a vocabulary and and an image
* classifer.
* DictNet is described in detail in:
* Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
* http://arxiv.org/abs/1412.1842
*/
class
CV_EXPORTS
_W
OCRHolisticWordRecognizer
:
public
BaseOCR
class
CV_EXPORTS
OCRHolisticWordRecognizer
:
public
BaseOCR
{
public
:
virtual
void
run
(
Mat
&
image
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
OCR_LEVEL_WORD
)
=
0
;
virtual
void
run
(
Mat
&
image
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
OCR_LEVEL_WORD
)
=
0
;
/** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
...
...
@@ -665,68 +578,24 @@ public:
@param component_level must be OCR_LEVEL_WORD.
*/
virtual
void
run
(
Mat
&
image
,
Mat
&
mask
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
OCR_LEVEL_WORD
)
=
0
;
/**
@brief Method that provides a quick and simple interface to a single word image classifcation
@param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
@param transcription an opencv string that will store the detected word transcription
@param confidence a double that will be updated with the confidence the classifier has for the selected word
*/
CV_WRAP
virtual
void
recogniseImage
(
InputArray
inputImage
,
CV_OUT
String
&
transcription
,
CV_OUT
double
&
confidence
)
=
0
;
/**
@brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
the classifiers parallel capabilities.
@param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
to contain a single word.
@param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
input image
@param confidences a vector of double that will be updated with the confidence the classifier has for each of the
selected words.
*/
CV_WRAP
virtual
void
recogniseImageBatch
(
InputArrayOfArrays
inputImageList
,
CV_OUT
std
::
vector
<
String
>&
transcriptions
,
CV_OUT
std
::
vector
<
double
>&
confidences
)
=
0
;
/**
@brief simple getted for the vocabulary employed
*/
CV_WRAP
virtual
const
std
::
vector
<
String
>&
getVocabulary
()
=
0
;
virtual
void
run
(
Mat
&
image
,
Mat
&
mask
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
OCR_LEVEL_WORD
)
=
0
;
/** @brief Creates an instance of the OCRHolisticWordRecognizer class.
@param classifierPtr an instance of TextImageClassifier, normaly a DictNet instance
@param vocabullaryFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
of the classifier.
*/
CV_WRAP
static
Ptr
<
OCRHolisticWordRecognizer
>
create
(
Ptr
<
TextImageClassifier
>
classifierPtr
,
String
vocabullaryFilename
);
/** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DictNet classifier.
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
@param vocabullaryFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
of the classifier.
*/
CV_WRAP
static
Ptr
<
OCRHolisticWordRecognizer
>
create
(
String
modelArchFilename
,
String
modelWeightsFilename
,
String
vocabullaryFilename
);
static
Ptr
<
OCRHolisticWordRecognizer
>
create
(
const
std
::
string
&
archFilename
,
const
std
::
string
&
weightsFilename
,
const
std
::
string
&
wordsFilename
);
};
//! @}
}
}
}}
// cv::text::
#endif // _OPENCV_TEXT_OCR_HPP_
modules/text/samples/dictnet_demo.cpp
View file @
3aa88889
...
...
@@ -12,79 +12,50 @@
#include "opencv2/imgproc.hpp"
#include <sstream>
#include <vector>
#include <iostream>
#include <iomanip>
#include <fstream>
inline
std
::
string
getHelpStr
(
std
::
string
progFname
){
std
::
stringstream
out
;
out
<<
" Demo of wordspotting CNN for text recognition."
<<
std
::
endl
;
out
<<
" Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"
<<
std
::
endl
<<
std
::
endl
;
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
text
;
out
<<
" Usage: "
<<
progFname
<<
" <output_file> <input_image1> <input_image2> ... <input_imageN>"
<<
std
::
endl
;
out
<<
" Caffe Model files (dictnet_vgg.caffemodel, dictnet_vgg_deploy.prototxt, dictnet_vgg_labels.txt)"
<<
std
::
endl
;
out
<<
" must be in the current directory."
<<
std
::
endl
<<
std
::
endl
;
inline
void
printHelp
()
{
cout
<<
" Demo of wordspotting CNN for text recognition."
<<
endl
;
cout
<<
" Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"
<<
std
::
endl
<<
std
::
endl
;
out
<<
" Obtaining Caffe Model files in linux shell:"
<<
std
::
endl
;
out
<<
" wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"
<<
std
::
endl
;
out
<<
" wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"
<<
std
::
endl
;
out
<<
" wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"
<<
std
::
endl
<<
std
::
endl
;
return
out
.
str
();
}
cout
<<
" Usage: program <input_image>"
<<
endl
;
cout
<<
" Caffe Model files (dictnet_vgg.caffemodel, dictnet_vgg_deploy.prototxt, dictnet_vgg_labels.txt)"
<<
endl
;
cout
<<
" must be in the current directory."
<<
endl
<<
endl
;
inline
bool
fileExists
(
std
::
string
filename
)
{
std
::
ifstream
f
(
filename
.
c_str
());
return
f
.
good
();
cout
<<
" Obtaining Caffe Model files in linux shell:"
<<
endl
;
cout
<<
" wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"
<<
endl
;
cout
<<
" wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"
<<
endl
;
cout
<<
" wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"
<<
endl
<<
endl
;
}
int
main
(
int
argc
,
const
char
*
argv
[]){
const
int
USE_GPU
=
0
;
if
(
argc
<
3
){
std
::
cout
<<
getHelpStr
(
argv
[
0
]);
std
::
cout
<<
"Insufiecient parameters. Aborting!"
<<
std
::
endl
;
int
main
(
int
argc
,
const
char
*
argv
[])
{
if
(
argc
!=
2
)
{
printHelp
();
exit
(
1
);
}
if
(
!
fileExists
(
"dictnet_vgg.caffemodel"
)
||
!
fileExists
(
"dictnet_vgg_deploy.prototxt"
)
||
!
fileExists
(
"dictnet_vgg_labels.txt"
)){
std
::
cout
<<
getHelpStr
(
argv
[
0
]);
std
::
cout
<<
"Model files not found in the current directory. Aborting!"
<<
std
::
endl
;
exit
(
1
);
}
Mat
image
=
imread
(
argv
[
1
],
IMREAD_GRAYSCALE
);
if
(
fileExists
(
argv
[
1
])){
std
::
cout
<<
getHelpStr
(
argv
[
0
]);
std
::
cout
<<
"Output file must not exist. Aborting!"
<<
std
::
endl
;
exit
(
1
);
}
cout
<<
"Read image ("
<<
argv
[
1
]
<<
"): "
<<
image
.
size
<<
", channels: "
<<
image
.
channels
()
<<
", depth: "
<<
image
.
depth
()
<<
endl
;
std
::
vector
<
cv
::
Mat
>
imageList
;
for
(
int
imageIdx
=
2
;
imageIdx
<
argc
;
imageIdx
++
){
if
(
fileExists
(
argv
[
imageIdx
])){
imageList
.
push_back
(
cv
::
imread
(
cv
::
String
(
argv
[
imageIdx
])));
}
else
{
std
::
cout
<<
getHelpStr
(
argv
[
0
]);
std
::
cout
<<
argv
[
imageIdx
]
<<
" doesn't exist. Aborting"
;
}
if
(
image
.
empty
())
{
printHelp
();
exit
(
1
);
}
cv
::
Ptr
<
cv
::
text
::
DictNet
>
cnn
=
cv
::
text
::
DictNet
::
create
(
"dictnet_vgg_deploy.prototxt"
,
"dictnet_vgg.caffemodel"
,
100
,
USE_GPU
);
cv
::
Ptr
<
cv
::
text
::
OCRHolisticWordRecognizer
>
wordSpotter
=
cv
::
text
::
OCRHolisticWordRecognizer
::
create
(
cnn
,
"dictnet_vgg_labels.txt"
);
Ptr
<
OCRHolisticWordRecognizer
>
wordSpotter
=
OCRHolisticWordRecognizer
::
create
(
"dictnet_vgg_deploy.prototxt"
,
"dictnet_vgg.caffemodel"
,
"dictnet_vgg_labels.txt"
);
std
::
vector
<
cv
::
String
>
wordList
;
std
::
vector
<
double
>
outProbabillitie
s
;
wordSpotter
->
r
ecogniseImageBatch
(
imageList
,
wordList
,
outProbabillitie
s
);
std
::
string
word
;
vector
<
float
>
conf
s
;
wordSpotter
->
r
un
(
image
,
word
,
0
,
0
,
&
conf
s
);
std
::
ofstream
out
;
out
.
open
(
argv
[
1
]);
for
(
int
imgIdx
=
0
;
imgIdx
<
int
(
imageList
.
size
());
imgIdx
++
){
out
<<
argv
[
imgIdx
+
2
]
<<
","
<<
wordList
[
imgIdx
]
<<
","
<<
outProbabillities
[
imgIdx
]
<<
std
::
endl
;
}
out
.
close
();
cout
<<
"Detected word: '"
<<
word
<<
"', confidence: "
<<
confs
[
0
]
<<
endl
;
}
modules/text/samples/dictnet_demo.py
deleted
100644 → 0
View file @
bad02f37
#!/usr/bin/env python
import
cv2
import
sys
import
os.path
#Global variable shared between the Mouse callback and main
refPt
=
[]
cropping
=
False
image
=
None
drawImage
=
None
dictNet
=
None
wordSpotter
=
None
def
mouseCallback
(
event
,
x
,
y
,
flags
,
param
):
# grab references to the global variables
global
refPt
,
cropping
,
wordSpotter
,
drawImage
,
image
# if the left mouse button was clicked, record the starting
# (x, y) coordinates and indicate that cropping is being
# performed
if
event
==
cv2
.
EVENT_LBUTTONDOWN
:
refPt
=
[(
x
,
y
)]
cropping
=
True
# check to see if the left mouse button was released
elif
event
==
cv2
.
EVENT_LBUTTONUP
:
# record the ending (x, y) coordinates and indicate that
# the cropping operation is finished
refPt
.
append
((
x
,
y
))
cropping
=
False
# draw a rectangle around the region of interest
roi
=
image
[
refPt
[
0
][
1
]:
refPt
[
1
][
1
],
refPt
[
0
][
0
]:
refPt
[
1
][
0
]]
res
=
wordSpotter
.
recogniseImage
(
roi
)
drawImage
=
image
.
copy
()
cv2
.
rectangle
(
drawImage
,
refPt
[
0
],
refPt
[
1
],
(
0
,
255
,
0
),
2
)
cv2
.
putText
(
drawImage
,
"
%
s:
%
f"
%
(
res
[
0
],
res
[
1
]),
refPt
[
0
],
cv2
.
FONT_HERSHEY_SIMPLEX
,
1
,(
255
,
255
,
255
),
2
)
cv2
.
imshow
(
"Select A Region"
,
drawImage
)
if
__name__
==
'__main__'
:
USEGPU
=
False
helpStr
=
"""Usage: """
+
sys
.
argv
[
0
]
+
""" IMAGE_FILENAME
Press 'q' or 'Q' exit
The modelFiles must be available in the current directory.
In linux shell they can be downloaded (~2GB) with the following commands:
wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel
wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt
wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt
"""
if
((
len
(
sys
.
argv
)
!=
2
)
or
not
(
os
.
path
.
isfile
(
sys
.
argv
[
1
])
)):
print
helpStr
print
'No image file given Aborting!'
sys
.
exit
(
1
)
if
not
(
os
.
path
.
isfile
(
'dictnet_vgg_deploy.prototxt'
)
and
os
.
path
.
isfile
(
'dictnet_vgg.caffemodel'
)
and
os
.
path
.
isfile
(
'dictnet_vgg_labels.txt'
)):
print
helpStr
print
'Model files not present, Aborting!'
sys
.
exit
(
1
)
dictNet
=
cv2
.
text
.
DictNet_create
(
'./dictnet_vgg_deploy.prototxt'
,
'./dictnet_vgg.caffemodel'
,
100
,
USEGPU
)
wordSpotter
=
cv2
.
text
.
OCRHolisticWordRecognizer_create
(
dictNet
,
"./dictnet_vgg_labels.txt"
)
image
=
cv2
.
imread
(
sys
.
argv
[
1
])
drawImage
=
image
.
copy
()
cv2
.
namedWindow
(
"Select A Region"
)
cv2
.
setMouseCallback
(
"Select A Region"
,
mouseCallback
)
while
True
:
cv2
.
imshow
(
"Select A Region"
,
drawImage
)
key
=
cv2
.
waitKey
(
1
)
&
0xFF
# if the 'q' key is pressed, break from the loop
if
key
==
ord
(
"q"
)
or
key
==
ord
(
"Q"
):
break
cv2
.
destroyAllWindows
()
modules/text/src/ocr_holistic.cpp
View file @
3aa88889
#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core.hpp"
#include "opencv2/dnn.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <queue>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
//should this be moved elsewhere?
//In precomp.hpp It doesn't work
#ifdef HAVE_CAFFE
#include "caffe/caffe.hpp"
#endif
using
namespace
std
;
namespace
cv
{
namespace
text
{
//Maybe OpenCV has a routine better suited
inline
bool
fileExists
(
String
filename
)
{
std
::
ifstream
f
(
filename
.
c_str
());
return
f
.
good
();
}
class
DictNetCaffeImpl
:
public
DictNet
{
protected
:
void
preprocess
(
Mat
&
input
,
Mat
&
output
){
if
(
input
.
channels
()
==
3
){
Mat
tmpInput
;
cvtColor
(
input
,
tmpInput
,
COLOR_BGR2GRAY
);
if
(
input
.
depth
()
==
CV_8U
){
tmpInput
.
convertTo
(
output
,
CV_32FC1
,
1
/
255.0
);
}
else
{
//Assuming values are at the desired [0,1] range
tmpInput
.
convertTo
(
output
,
CV_32FC1
);
}
}
else
{
if
(
input
.
channels
()
==
1
){
if
(
input
.
depth
()
==
CV_8U
){
input
.
convertTo
(
output
,
CV_32FC1
,
1
/
255.0
);
}
else
{
//Assuming values are at the desired [0,1] range
input
.
convertTo
(
output
,
CV_32FC1
);
}
}
else
{
CV_Error
(
Error
::
StsError
,
"Expecting images with either 1 or 3 channels"
);
}
}
resize
(
output
,
output
,
this
->
inputGeometry_
);
Scalar
dev
,
mean
;
meanStdDev
(
output
,
mean
,
dev
);
subtract
(
output
,
mean
[
0
],
output
);
divide
(
output
,(
dev
[
0
]
/
128.0
),
output
);
}
void
classifyMiniBatch
(
std
::
vector
<
Mat
>
inputImageList
,
Mat
outputMat
){
//Classifies a list of images containing at most minibatchSz_ images
CV_Assert
(
int
(
inputImageList
.
size
())
<=
this
->
minibatchSz_
);
CV_Assert
(
outputMat
.
isContinuous
());
#ifdef HAVE_CAFFE
net_
->
input_blobs
()[
0
]
->
Reshape
(
inputImageList
.
size
(),
1
,
this
->
inputGeometry_
.
height
,
this
->
inputGeometry_
.
width
);
net_
->
Reshape
();
float
*
inputBuffer
=
net_
->
input_blobs
()[
0
]
->
mutable_cpu_data
();
float
*
inputData
=
inputBuffer
;
for
(
size_t
imgNum
=
0
;
imgNum
<
inputImageList
.
size
();
imgNum
++
){
Mat
preprocessed
;
cv
::
Mat
netInputWraped
(
this
->
inputGeometry_
.
height
,
this
->
inputGeometry_
.
width
,
CV_32FC1
,
inputData
);
this
->
preprocess
(
inputImageList
[
imgNum
],
preprocessed
);
preprocessed
.
copyTo
(
netInputWraped
);
inputData
+=
(
this
->
inputGeometry_
.
height
*
this
->
inputGeometry_
.
width
);
}
this
->
net_
->
ForwardPrefilled
();
const
float
*
outputNetData
=
net_
->
output_blobs
()[
0
]
->
cpu_data
();
float
*
outputMatData
=
(
float
*
)(
outputMat
.
data
);
memcpy
(
outputMatData
,
outputNetData
,
sizeof
(
float
)
*
this
->
outputSize_
*
inputImageList
.
size
());
#endif
}
#ifdef HAVE_CAFFE
Ptr
<
caffe
::
Net
<
float
>
>
net_
;
#endif
Size
inputGeometry_
;
int
minibatchSz_
;
//The existence of the assignment operator mandates this to be nonconst
bool
gpuBackend_
;
//The existence of the assignment operator mandates this to be nonconst
int
outputSize_
;
public
:
DictNetCaffeImpl
(
const
DictNetCaffeImpl
&
dn
)
:
inputGeometry_
(
dn
.
inputGeometry_
),
minibatchSz_
(
dn
.
minibatchSz_
),
gpuBackend_
(
dn
.
gpuBackend_
),
outputSize_
(
dn
.
outputSize_
){
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
#ifdef HAVE_CAFFE
this
->
net_
=
dn
.
net_
;
#endif
}
DictNetCaffeImpl
&
operator
=
(
const
DictNetCaffeImpl
&
dn
){
#ifdef HAVE_CAFFE
this
->
net_
=
dn
.
net_
;
#endif
this
->
inputGeometry_
=
dn
.
inputGeometry_
;
this
->
minibatchSz_
=
dn
.
minibatchSz_
;
this
->
gpuBackend_
=
dn
.
gpuBackend_
;
this
->
outputSize_
=
dn
.
outputSize_
;
return
*
this
;
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
}
DictNetCaffeImpl
(
String
modelArchFilename
,
String
modelWeightsFilename
,
int
maxMinibatchSz
,
bool
useGpu
)
:
minibatchSz_
(
maxMinibatchSz
),
gpuBackend_
(
useGpu
){
CV_Assert
(
this
->
minibatchSz_
>
0
);
CV_Assert
(
fileExists
(
modelArchFilename
));
CV_Assert
(
fileExists
(
modelWeightsFilename
));
#ifdef HAVE_CAFFE
if
(
this
->
gpuBackend_
){
caffe
::
Caffe
::
set_mode
(
caffe
::
Caffe
::
GPU
);
}
else
{
caffe
::
Caffe
::
set_mode
(
caffe
::
Caffe
::
CPU
);
}
this
->
net_
.
reset
(
new
caffe
::
Net
<
float
>
(
modelArchFilename
,
caffe
::
TEST
));
CV_Assert
(
net_
->
num_inputs
()
==
1
);
CV_Assert
(
net_
->
num_outputs
()
==
1
);
CV_Assert
(
this
->
net_
->
input_blobs
()[
0
]
->
channels
()
==
1
);
this
->
net_
->
CopyTrainedLayersFrom
(
modelWeightsFilename
);
caffe
::
Blob
<
float
>*
inputLayer
=
this
->
net_
->
input_blobs
()[
0
];
this
->
inputGeometry_
=
Size
(
inputLayer
->
width
(),
inputLayer
->
height
());
inputLayer
->
Reshape
(
this
->
minibatchSz_
,
1
,
this
->
inputGeometry_
.
height
,
this
->
inputGeometry_
.
width
);
net_
->
Reshape
();
this
->
outputSize_
=
net_
->
output_blobs
()[
0
]
->
channels
();
#else
CV_Error
(
Error
::
StsError
,
"Caffe not available during compilation!"
);
#endif
}
void
classify
(
InputArray
image
,
OutputArray
classProbabilities
){
std
::
vector
<
Mat
>
inputImageList
;
inputImageList
.
push_back
(
image
.
getMat
());
classifyBatch
(
inputImageList
,
classProbabilities
);
}
void
classifyBatch
(
InputArrayOfArrays
inputImageList
,
OutputArray
classProbabilities
){
std
::
vector
<
Mat
>
allImageVector
;
inputImageList
.
getMatVector
(
allImageVector
);
size_t
outputSize
=
size_t
(
this
->
outputSize_
);
//temporary variable to avoid int to size_t arithmentic
size_t
minibatchSize
=
size_t
(
this
->
minibatchSz_
);
//temporary variable to avoid int to size_t arithmentic
classProbabilities
.
create
(
Size
(
int
(
outputSize
),
int
(
allImageVector
.
size
())),
CV_32F
);
Mat
outputMat
=
classProbabilities
.
getMat
();
for
(
size_t
imgNum
=
0
;
imgNum
<
allImageVector
.
size
();
imgNum
+=
minibatchSize
){
size_t
rangeEnd
=
imgNum
+
std
::
min
<
size_t
>
(
allImageVector
.
size
()
-
imgNum
,
minibatchSize
);
std
::
vector
<
Mat
>::
const_iterator
from
=
std
::
vector
<
Mat
>::
const_iterator
(
allImageVector
.
begin
()
+
imgNum
);
std
::
vector
<
Mat
>::
const_iterator
to
=
std
::
vector
<
Mat
>::
const_iterator
(
allImageVector
.
begin
()
+
rangeEnd
);
std
::
vector
<
Mat
>
minibatchInput
(
from
,
to
);
classifyMiniBatch
(
minibatchInput
,
outputMat
.
rowRange
(
int
(
imgNum
),
int
(
rangeEnd
)));
}
}
int
getOutputSize
(){
return
this
->
outputSize_
;
}
int
getMinibatchSize
(){
return
this
->
minibatchSz_
;
}
bool
usingGpu
(){
return
this
->
gpuBackend_
;
}
int
getBackend
(){
return
OCR_HOLISTIC_BACKEND_CAFFE
;
}
};
Ptr
<
DictNet
>
DictNet
::
create
(
String
archFilename
,
String
weightsFilename
,
int
minibatchSz
,
bool
useGpu
,
int
backEnd
){
switch
(
backEnd
){
case
OCR_HOLISTIC_BACKEND_CAFFE
:
return
Ptr
<
DictNet
>
(
new
DictNetCaffeImpl
(
archFilename
,
weightsFilename
,
minibatchSz
,
useGpu
));
break
;
case
OCR_HOLISTIC_BACKEND_NONE
:
default
:
CV_Error
(
Error
::
StsError
,
"DictNet::create backend not implemented"
);
return
Ptr
<
DictNet
>
();
break
;
}
}
class
OCRHolisticWordRecognizerImpl
:
public
OCRHolisticWordRecognizer
{
class
OCRHolisticWordRecognizerImpl
:
public
OCRHolisticWordRecognizer
{
private
:
struct
NetOutput
{
//Auxiliary structure that handles the logic of getting class ids and probabillities from
//the raw outputs of caffe
int
wordIdx
;
float
probabillity
;
dnn
::
Net
net
;
vector
<
string
>
words
;
static
bool
sorter
(
const
NetOutput
&
o1
,
const
NetOutput
&
o2
){
//used with std::sort to provide the most probable class
return
o1
.
probabillity
>
o2
.
probabillity
;
}
static
void
getOutputs
(
const
float
*
buffer
,
int
nbOutputs
,
std
::
vector
<
NetOutput
>&
res
){
res
.
resize
(
nbOutputs
);
for
(
int
k
=
0
;
k
<
nbOutputs
;
k
++
){
res
[
k
].
wordIdx
=
k
;
res
[
k
].
probabillity
=
buffer
[
k
];
}
std
::
sort
(
res
.
begin
(),
res
.
end
(),
NetOutput
::
sorter
);
}
static
void
getClassification
(
const
float
*
buffer
,
int
nbOutputs
,
int
&
classNum
,
double
&
confidence
){
std
::
vector
<
NetOutput
>
tmp
;
getOutputs
(
buffer
,
nbOutputs
,
tmp
);
classNum
=
tmp
[
0
].
wordIdx
;
confidence
=
tmp
[
0
].
probabillity
;
}
};
protected
:
std
::
vector
<
String
>
labels_
;
Ptr
<
TextImageClassifier
>
classifier_
;
public
:
OCRHolisticWordRecognizerImpl
(
Ptr
<
TextImageClassifier
>
classifierPtr
,
String
vocabullaryFilename
)
:
classifier_
(
classifierPtr
){
CV_Assert
(
fileExists
(
vocabullaryFilename
));
//this fails for some rason
std
::
ifstream
labelsFile
(
vocabullaryFilename
.
c_str
());
if
(
!
labelsFile
){
CV_Error
(
Error
::
StsError
,
"Could not read Labels from file"
);
OCRHolisticWordRecognizerImpl
(
const
string
&
archFilename
,
const
string
&
weightsFilename
,
const
string
&
wordsFilename
)
{
net
=
dnn
::
readNetFromCaffe
(
archFilename
,
weightsFilename
);
std
::
ifstream
in
(
wordsFilename
.
c_str
());
if
(
!
in
)
{
CV_Error
(
Error
::
StsError
,
"Could not read Labels from file"
);
}
std
::
string
line
;
while
(
std
::
getline
(
labelsFile
,
line
)){
labels_
.
push_back
(
std
::
string
(
line
));
}
CV_Assert
(
this
->
classifier_
->
getOutputSize
()
==
int
(
this
->
labels_
.
size
()));
}
void
recogniseImage
(
InputArray
inputImage
,
CV_OUT
String
&
transcription
,
CV_OUT
double
&
confidence
){
Mat
netOutput
;
this
->
classifier_
->
classify
(
inputImage
,
netOutput
);
int
classNum
;
NetOutput
::
getClassification
((
float
*
)(
netOutput
.
data
),
this
->
classifier_
->
getOutputSize
(),
classNum
,
confidence
);
transcription
=
this
->
labels_
[
classNum
];
while
(
std
::
getline
(
in
,
line
))
words
.
push_back
(
line
);
CV_Assert
(
getClassCount
()
==
words
.
size
());
}
void
recogniseImageBatch
(
InputArrayOfArrays
inputImageList
,
CV_OUT
std
::
vector
<
String
>&
transcriptionVec
,
CV_OUT
std
::
vector
<
double
>&
confidenceVec
){
Mat
netOutput
;
this
->
classifier_
->
classifyBatch
(
inputImageList
,
netOutput
);
for
(
int
k
=
0
;
k
<
netOutput
.
rows
;
k
++
){
int
classNum
;
double
confidence
;
NetOutput
::
getClassification
((
float
*
)(
netOutput
.
row
(
k
).
data
),
this
->
classifier_
->
getOutputSize
(),
classNum
,
confidence
);
transcriptionVec
.
push_back
(
this
->
labels_
[
classNum
]);
confidenceVec
.
push_back
(
confidence
);
}
}
void
run
(
Mat
&
image
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
){
CV_Assert
(
component_level
==
OCR_LEVEL_WORD
);
//Componnents not applicable for word spotting
void
run
(
Mat
&
image
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
{
CV_Assert
(
component_level
==
OCR_LEVEL_WORD
);
//Componnents not applicable for word spotting
double
confidence
;
String
transcription
;
recogniseImage
(
image
,
transcription
,
confidence
);
output_text
=
transcription
.
c_str
();
output_text
=
classify
(
image
,
confidence
);
if
(
component_rects
!=
NULL
){
component_rects
->
resize
(
1
);
(
*
component_rects
)[
0
]
=
Rect
(
0
,
0
,
image
.
size
().
width
,
image
.
size
().
height
);
}
if
(
component_texts
!=
NULL
){
component_texts
->
resize
(
1
);
(
*
component_texts
)[
0
]
=
transcription
.
c_str
()
;
(
*
component_texts
)[
0
]
=
output_text
;
}
if
(
component_confidences
!=
NULL
){
component_confidences
->
resize
(
1
);
(
*
component_confidences
)[
0
]
=
float
(
confidence
);
(
*
component_confidences
)[
0
]
=
float
(
confidence
);
}
}
void
run
(
Mat
&
image
,
Mat
&
mask
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
){
CV_Assert
(
mask
.
cols
==
image
.
cols
&&
mask
.
rows
==
image
.
rows
);
//Mask is ignored because the CNN operates on a full image
this
->
run
(
image
,
output_text
,
component_rects
,
component_texts
,
component_confidences
,
component_level
);
void
run
(
Mat
&
image
,
Mat
&
mask
,
std
::
string
&
output_text
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
std
::
string
>*
component_texts
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
{
//Mask is ignored because the CNN operates on a full image
CV_Assert
(
mask
.
cols
==
image
.
cols
&&
mask
.
rows
==
image
.
rows
);
this
->
run
(
image
,
output_text
,
component_rects
,
component_texts
,
component_confidences
,
component_level
);
}
std
::
vector
<
String
>&
getVocabulary
(){
return
this
->
labels_
;
protected
:
Size
getPerceptiveField
()
const
{
return
Size
(
100
,
32
);
}
size_t
getClassCount
()
{
int
id
=
net
.
getLayerId
(
"prob"
);
dnn
::
MatShape
inputShape
;
inputShape
.
push_back
(
1
);
inputShape
.
push_back
(
1
);
inputShape
.
push_back
(
getPerceptiveField
().
height
);
inputShape
.
push_back
(
getPerceptiveField
().
width
);
vector
<
dnn
::
MatShape
>
inShapes
,
outShapes
;
net
.
getLayerShapes
(
inputShape
,
id
,
inShapes
,
outShapes
);
CV_Assert
(
outShapes
.
size
()
==
1
&&
outShapes
[
0
].
size
()
==
4
);
CV_Assert
(
outShapes
[
0
][
0
]
==
1
&&
outShapes
[
0
][
2
]
==
1
&&
outShapes
[
0
][
3
]
==
1
);
return
outShapes
[
0
][
1
];
}
string
classify
(
InputArray
image
,
double
&
conf
)
{
CV_Assert
(
image
.
channels
()
==
1
&&
image
.
depth
()
==
CV_8U
);
Mat
resized
;
resize
(
image
,
resized
,
getPerceptiveField
());
Mat
blob
=
dnn
::
blobFromImage
(
resized
);
net
.
setInput
(
blob
,
"data"
);
Mat
prob
=
net
.
forward
(
"prob"
);
CV_Assert
(
prob
.
dims
==
4
&&
!
prob
.
empty
()
&&
prob
.
size
[
1
]
==
(
int
)
getClassCount
());
int
idx
[
4
]
=
{
0
};
minMaxIdx
(
prob
,
0
,
&
conf
,
0
,
idx
);
CV_Assert
(
0
<=
idx
[
1
]
&&
idx
[
1
]
<
(
int
)
words
.
size
());
return
words
[
idx
[
1
]];
}
};
Ptr
<
OCRHolisticWordRecognizer
>
OCRHolisticWordRecognizer
::
create
(
Ptr
<
TextImageClassifier
>
classifierPtr
,
String
vocabullaryFilename
){
return
Ptr
<
OCRHolisticWordRecognizer
>
(
new
OCRHolisticWordRecognizerImpl
(
classifierPtr
,
vocabullaryFilename
));
}
};
Ptr
<
OCRHolisticWordRecognizer
>
OCRHolisticWordRecognizer
::
create
(
String
modelArchFilename
,
String
modelWeightsFilename
,
String
vocabullaryFilename
){
Ptr
<
TextImageClassifier
>
classifierPtr
(
new
DictNetCaffeImpl
(
modelArchFilename
,
modelWeightsFilename
,
100
,
0
));
return
Ptr
<
OCRHolisticWordRecognizer
>
(
new
OCRHolisticWordRecognizerImpl
(
classifierPtr
,
vocabullaryFilename
)
);
Ptr
<
OCRHolisticWordRecognizer
>
OCRHolisticWordRecognizer
::
create
(
const
string
&
archFilename
,
const
string
&
weightsFilename
,
const
string
&
wordsFilename
)
{
return
makePtr
<
OCRHolisticWordRecognizerImpl
>
(
archFilename
,
weightsFilename
,
wordsFilename
);
}
}
}
//namespace text namespace cv
}
}
// cv::text::
modules/text/text_config.hpp.in
View file @
3aa88889
#ifndef __OPENCV_TEXT_CONFIG_HPP__
#define __OPENCV_TEXT_CONFIG_HPP__
// HAVE CAFFE
#cmakedefine HAVE_CAFFE
// HAVE OCR Tesseract
#cmakedefine HAVE_TESSERACT
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment