Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv_contrib
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv_contrib
Commits
be395e59
Commit
be395e59
authored
Jul 19, 2017
by
sghoshcvc
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Modified the class heirarchy
parent
2b8ed124
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
166 additions
and
56 deletions
+166
-56
ocr.hpp
modules/text/include/opencv2/text/ocr.hpp
+0
-4
textDetector.hpp
modules/text/include/opencv2/text/textDetector.hpp
+118
-6
ocr_holistic.cpp
modules/text/src/ocr_holistic.cpp
+41
-41
text_detector.cpp
modules/text/src/text_detector.cpp
+7
-5
text_detectorCNN.cpp
modules/text/src/text_detectorCNN.cpp
+0
-0
No files found.
modules/text/include/opencv2/text/ocr.hpp
View file @
be395e59
...
...
@@ -716,10 +716,6 @@ public:
/** @brief produces a class confidence row-vector given an image
*/
CV_WRAP
virtual
void
classify
(
InputArray
image
,
OutputArray
classProbabilities
)
=
0
;
/** @brief produces a list of bounding box given an image
*/
CV_WRAP
virtual
void
detect
(
InputArray
image
,
OutputArray
classProbabilities
)
=
0
;
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
*/
...
...
modules/text/include/opencv2/text/textDetector.hpp
View file @
be395e59
...
...
@@ -65,19 +65,131 @@ namespace text
//detection scenario
class
CV_EXPORTS_W
BaseDetector
{
public
:
public
:
virtual
~
BaseDetector
()
{};
virtual
void
run
(
Mat
&
image
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
=
0
;
virtual
void
run
(
Mat
&
image
,
Mat
&
mask
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
Rect
>*
component_rects
=
NULL
,
std
::
vector
<
float
>*
component_confidences
=
NULL
,
int
component_level
=
0
)
=
0
;
};
/** A virtual class for different models of text detection (including CNN based deep models)
*/
class
CV_EXPORTS_W
TextRegionDetector
{
protected
:
/** Stores input and output size
*/
//netGeometry inputGeometry_;
//netGeometry outputGeometry_;
Size
inputGeometry_
;
Size
outputGeometry_
;
int
inputChannelCount_
;
int
outputChannelCount_
;
public
:
virtual
~
TextRegionDetector
()
{}
/** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
*/
CV_WRAP
virtual
void
detect
(
InputArray
image
,
OutputArray
bboxProb
)
=
0
;
/** @brief simple getter method returning the size (height, width) of the input sample
*/
CV_WRAP
virtual
Size
getInputGeometry
(){
return
this
->
inputGeometry_
;}
/** @brief simple getter method returning the shape of the oputput
* Any text detector should output a number of text regions alongwith a score of text-ness
* From the shape it can be inferred the number of text regions and number of returned value
* for each region
*/
CV_WRAP
virtual
Size
getOutputGeometry
(){
return
this
->
outputGeometry_
;}
};
/** Generic structure of Deep CNN based Text Detectors
* */
class
CV_EXPORTS_W
DeepCNNTextDetector
:
public
TextRegionDetector
{
/** @brief Class that uses a pretrained caffe model for text detection.
* Any text detection should
* This network is described in detail in:
* Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
* https://arxiv.org/abs/1611.06779
*/
protected
:
/** all deep CNN based text detectors have a preprocessor (normally)
*/
Ptr
<
ImagePreprocessor
>
preprocessor_
;
/** @brief all image preprocessing is handled here including whitening etc.
*
* @param input the image to be preprocessed for the classifier. If the depth
* is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
*
* @param output reference to the image to be fed to the classifier, the preprocessor will
* resize the image to the apropriate size and convert it to the apropriate depth\
*
* The method preprocess should never be used externally, it is up to classify and classifyBatch
* methods to employ it.
*/
virtual
void
preprocess
(
const
Mat
&
input
,
Mat
&
output
);
public
:
virtual
~
DeepCNNTextDetector
()
{};
/** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
*
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
*
* @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
*
* @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
*
* @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
*
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP
static
Ptr
<
DeepCNNTextDetector
>
create
(
String
archFilename
,
String
weightsFilename
,
Ptr
<
ImagePreprocessor
>
preprocessor
,
int
minibatchSz
=
100
,
int
backEnd
=
OCR_HOLISTIC_BACKEND_CAFFE
);
/** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
*
* This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
* The architecture and models weights can be downloaded from:
* https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
* When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
*
* @param weightsFilename is the path to the pretrained weights of the model. When employing
* OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
*
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP
static
Ptr
<
DeepCNNTextDetector
>
createTextBoxNet
(
String
archFilename
,
String
weightsFilename
,
int
backEnd
=
OCR_HOLISTIC_BACKEND_CAFFE
);
friend
class
ImagePreprocessor
;
};
/** @brief textDetector class provides the functionallity of text bounding box detection.
* A TextRegionDetector is employed to find bounding boxes of text
* words given an input image.
*
* This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
* The TextRegionDetector can be any text detector
*
*/
class
CV_EXPORTS_W
textDetector
:
public
BaseDetector
{
...
...
@@ -125,9 +237,9 @@ public:
/** @brief simple getter for the preprocessing functor
/** @brief simple getter for the preprocessing functor
*/
CV_WRAP
virtual
Ptr
<
Text
ImageClassifie
r
>
getClassifier
()
=
0
;
CV_WRAP
virtual
Ptr
<
Text
RegionDetecto
r
>
getClassifier
()
=
0
;
/** @brief Creates an instance of the textDetector class.
...
...
@@ -135,7 +247,7 @@ public:
*/
CV_WRAP
static
Ptr
<
textDetector
>
create
(
Ptr
<
Text
ImageClassifie
r
>
classifierPtr
);
CV_WRAP
static
Ptr
<
textDetector
>
create
(
Ptr
<
Text
RegionDetecto
r
>
classifierPtr
);
/** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
...
...
modules/text/src/ocr_holistic.cpp
View file @
be395e59
...
...
@@ -459,53 +459,53 @@ protected:
#endif
}
void
process_
(
Mat
inputImage
,
Mat
&
outputMat
)
{
// do forward pass and stores the output in outputMat
//Process one image
CV_Assert
(
this
->
minibatchSz_
==
1
);
//CV_Assert(outputMat.isContinuous());
//
void process_(Mat inputImage, Mat &outputMat)
//
{
//
// do forward pass and stores the output in outputMat
//
//Process one image
//
CV_Assert(this->minibatchSz_==1);
//
//CV_Assert(outputMat.isContinuous());
#ifdef HAVE_CAFFE
net_
->
input_blobs
()[
0
]
->
Reshape
(
1
,
this
->
channelCount_
,
this
->
inputGeometry_
.
height
,
this
->
inputGeometry_
.
width
);
net_
->
Reshape
();
float
*
inputBuffer
=
net_
->
input_blobs
()[
0
]
->
mutable_cpu_data
();
float
*
inputData
=
inputBuffer
;
//
#ifdef HAVE_CAFFE
//
net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
//
net_->Reshape();
//
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
//
float* inputData=inputBuffer;
std
::
vector
<
Mat
>
input_channels
;
Mat
preprocessed
;
// if the image have multiple color channels the input layer should be populated accordingly
for
(
int
channel
=
0
;
channel
<
this
->
channelCount_
;
channel
++
){
//
std::vector<Mat> input_channels;
//
Mat preprocessed;
//
// if the image have multiple color channels the input layer should be populated accordingly
//
for (int channel=0;channel < this->channelCount_;channel++){
cv
::
Mat
netInputWraped
(
this
->
inputGeometry_
.
height
,
this
->
inputGeometry_
.
width
,
CV_32FC1
,
inputData
);
input_channels
.
push_back
(
netInputWraped
);
//input_data += width * height;
inputData
+=
(
this
->
inputGeometry_
.
height
*
this
->
inputGeometry_
.
width
);
}
this
->
preprocess
(
inputImage
,
preprocessed
);
split
(
preprocessed
,
input_channels
);
//
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
//
input_channels.push_back(netInputWraped);
//
//input_data += width * height;
//
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
//
}
//
this->preprocess(inputImage,preprocessed);
//
split(preprocessed, input_channels);
//preprocessed.copyTo(netInputWraped);
//
//preprocessed.copyTo(netInputWraped);
this
->
net_
->
Forward
();
const
float
*
outputNetData
=
net_
->
output_blobs
()[
0
]
->
cpu_data
();
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
//
this->net_->Forward();
//
const float* outputNetData=net_->output_blobs()[0]->cpu_data();
//
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
this
->
outputGeometry_
=
Size
(
net_
->
output_blobs
()[
0
]
->
width
(),
net_
->
output_blobs
()[
0
]
->
height
());
int
outputSz
=
this
->
outputSize_
*
this
->
outputGeometry_
.
height
*
this
->
outputGeometry_
.
width
;
outputMat
.
create
(
this
->
outputGeometry_
.
height
,
this
->
outputGeometry_
.
width
,
CV_32FC1
);
float
*
outputMatData
=
(
float
*
)(
outputMat
.
data
);
//
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
//
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
//
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
//
float*outputMatData=(float*)(outputMat.data);
memcpy
(
outputMatData
,
outputNetData
,
sizeof
(
float
)
*
outputSz
);
//
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
#endif
}
//
#endif
//
}
...
...
@@ -587,15 +587,15 @@ public:
inputImageList
.
push_back
(
image
.
getMat
());
classifyBatch
(
inputImageList
,
classProbabilities
);
}
void
detect
(
InputArray
image
,
OutputArray
Bbox_prob
)
{
//
void detect(InputArray image, OutputArray Bbox_prob)
//
{
Bbox_prob
.
create
(
this
->
outputGeometry_
,
CV_32F
);
// dummy initialization is it needed
Mat
outputMat
=
Bbox_prob
.
getMat
();
process_
(
image
.
getMat
(),
outputMat
);
//copy back to outputArray
outputMat
.
copyTo
(
Bbox_prob
);
}
//
Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
//
Mat outputMat = Bbox_prob.getMat();
//
process_(image.getMat(),outputMat);
//
//copy back to outputArray
//
outputMat.copyTo(Bbox_prob);
//
}
void
classifyBatch
(
InputArrayOfArrays
inputImageList
,
OutputArray
classProbabilities
)
{
...
...
modules/text/src/text_detector.cpp
View file @
be395e59
...
...
@@ -23,6 +23,8 @@
namespace
cv
{
namespace
text
{
class
textDetectImpl
:
public
textDetector
{
private
:
struct
NetOutput
{
...
...
@@ -60,9 +62,9 @@ private:
};
protected
:
Ptr
<
Text
ImageClassifie
r
>
classifier_
;
Ptr
<
Text
RegionDetecto
r
>
classifier_
;
public
:
textDetectImpl
(
Ptr
<
Text
ImageClassifie
r
>
classifierPtr
)
:
classifier_
(
classifierPtr
)
textDetectImpl
(
Ptr
<
Text
RegionDetecto
r
>
classifierPtr
)
:
classifier_
(
classifierPtr
)
{
}
...
...
@@ -131,13 +133,13 @@ public:
Ptr
<
Text
ImageClassifie
r
>
getClassifier
()
Ptr
<
Text
RegionDetecto
r
>
getClassifier
()
{
return
this
->
classifier_
;
}
};
Ptr
<
textDetector
>
textDetector
::
create
(
Ptr
<
Text
ImageClassifie
r
>
classifierPtr
)
Ptr
<
textDetector
>
textDetector
::
create
(
Ptr
<
Text
RegionDetecto
r
>
classifierPtr
)
{
return
Ptr
<
textDetector
>
(
new
textDetectImpl
(
classifierPtr
));
}
...
...
@@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei
textbox_mean
.
at
<
uchar
>
(
0
,
2
)
=
123
;
preprocessor
->
set_mean
(
textbox_mean
);
// create a pointer to text box detector(textDetector)
Ptr
<
Text
ImageClassifier
>
classifierPtr
(
DeepCNN
::
create
(
modelArchFilename
,
modelWeightsFilename
,
preprocessor
,
1
));
Ptr
<
Text
RegionDetector
>
classifierPtr
(
DeepCNNTextDetector
::
create
(
modelArchFilename
,
modelWeightsFilename
,
preprocessor
,
1
));
return
Ptr
<
textDetector
>
(
new
textDetectImpl
(
classifierPtr
));
}
...
...
modules/text/src/text_detectorCNN.cpp
0 → 100644
View file @
be395e59
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment