Commit 6c9d6d50 authored by Jcrist99's avatar Jcrist99 Committed by Alexander Alekhin

Merge pull request #1210 from abratchik:contrib.java.wrapper.fix.3.2

fix java wrappers for ERFilter, OCRHMMDecoder, OCRBeamSearchDecoder (#1210)

* fix java wrappers for ERFilter, OCRHMMDecoder, OCRBeamSearchDecoder

* fix comments
parent 9638b145
...@@ -371,6 +371,24 @@ CV_EXPORTS void MSERsToERStats(InputArray image, std::vector<std::vector<Point> ...@@ -371,6 +371,24 @@ CV_EXPORTS void MSERsToERStats(InputArray image, std::vector<std::vector<Point>
// Utility funtion for scripting // Utility funtion for scripting
CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector< std::vector<Point> >& regions); CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector< std::vector<Point> >& regions);
/** @brief Extracts text regions from image.
@param image Source image where text blocks needs to be extracted from. Should be CV_8UC3 (color).
@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12]
@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12]
@param groups_rects Output list of rectangle blocks with text
@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, ERGROUPING_ORIENTATION_ANY.
@param filename The XML or YAML file with the classifier model (e.g. samples/trained_classifier_erGrouping.xml). Only to use when grouping method is ERGROUPING_ORIENTATION_ANY.
@param minProbability The minimum probability for accepting a group. Only to use when grouping method is ERGROUPING_ORIENTATION_ANY.
*/
CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2, CV_OUT std::vector<Rect> &groups_rects,
int method = ERGROUPING_ORIENTATION_HORIZ,
const String& filename = String(),
float minProbability = (float)0.5);
//! @} //! @}
} }
......
...@@ -172,6 +172,13 @@ enum decoder_mode ...@@ -172,6 +172,13 @@ enum decoder_mode
OCR_DECODER_VITERBI = 0 // Other algorithms may be added OCR_DECODER_VITERBI = 0 // Other algorithms may be added
}; };
/* OCR classifier type*/
enum classifier_type
{
OCR_KNN_CLASSIFIER = 0,
OCR_CNN_CLASSIFIER = 1
};
/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models. /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
@note @note
...@@ -299,6 +306,21 @@ public: ...@@ -299,6 +306,21 @@ public:
// cols == rows == vocabulari.size() // cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
/** @brief Creates an instance of the OCRHMMDecoder class. Loads and initializes HMMDecoder from the specified path
@overload
*/
CV_WRAP static Ptr<OCRHMMDecoder> create(const String& filename,
const String& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int classifier = OCR_KNN_CLASSIFIER); // The character classifier type
protected: protected:
Ptr<OCRHMMDecoder::ClassifierCallback> classifier; Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
...@@ -318,6 +340,8 @@ fixed size, while retaining the centroid and aspect ratio, in order to extract a ...@@ -318,6 +340,8 @@ fixed size, while retaining the centroid and aspect ratio, in order to extract a
based on gradient orientations along the chain-code of its perimeter. Then, the region is classified based on gradient orientations along the chain-code of its perimeter. Then, the region is classified
using a KNN model trained with synthetic data of rendered characters with different standard font using a KNN model trained with synthetic data of rendered characters with different standard font
types. types.
@deprecated loadOCRHMMClassifier instead
*/ */
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename); CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
...@@ -330,9 +354,19 @@ The CNN default classifier is based in the scene text recognition method propose ...@@ -330,9 +354,19 @@ The CNN default classifier is based in the scene text recognition method propose
Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
at each window location. at each window location.
@deprecated use loadOCRHMMClassifier instead
*/ */
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename); CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)
@param classifier Can be one of classifier_type enum values.
*/
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
//! @} //! @}
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
...@@ -466,6 +500,20 @@ public: ...@@ -466,6 +500,20 @@ public:
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500); // Size of the beam in Beam Search algorithm int beam_size = 500); // Size of the beam in Beam Search algorithm
/** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.
@overload
*/
CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
const String& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500);
protected: protected:
Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier; Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
......
...@@ -4219,5 +4219,41 @@ void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr< ...@@ -4219,5 +4219,41 @@ void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<
} }
} }
void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter1, const Ptr<ERFilter>& er_filter2,
CV_OUT std::vector<Rect> &groups_rects,
int method,
const String& filename,
float minProbability)
{
// assert correct image type
CV_Assert( image.type() == CV_8UC3 );
CV_Assert( !er_filter1.empty() );
CV_Assert( !er_filter2.empty() );
// Extract channels to be processed individually
vector<Mat> channels;
Mat grey;
cvtColor(image,grey,COLOR_RGB2GRAY);
// here we are only using grey channel
channels.push_back(grey);
channels.push_back(255-grey);
vector<vector<ERStat> > regions(channels.size());
// Apply the default cascade classifier to each independent channel (could be done in parallel)
for (int c=0; c<(int)channels.size(); c++)
{
er_filter1->run(channels[c], regions[c]);
er_filter2->run(channels[c], regions[c]);
}
// Detect character groups
vector< vector<Vec2i> > nm_region_groups;
erGrouping(image, channels, regions, nm_region_groups, groups_rects, method, filename, minProbability);
}
} }
} }
...@@ -499,7 +499,7 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder ...@@ -499,7 +499,7 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size); return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
} }
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier, Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
const String& _vocabulary, const String& _vocabulary,
InputArray transition_p, InputArray transition_p,
InputArray emission_p, InputArray emission_p,
...@@ -509,8 +509,17 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamS ...@@ -509,8 +509,17 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamS
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size); return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
} }
Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(const String& _filename,
const String& _vocabulary,
InputArray transition_p,
InputArray emission_p,
int _mode,
int _beam_size)
{
return makePtr<OCRBeamSearchDecoderImpl>(loadOCRBeamSearchClassifierCNN(_filename), _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
}
class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback class OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
{ {
public: public:
//constructor //constructor
......
...@@ -90,7 +90,7 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect> ...@@ -90,7 +90,7 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
component_confidences->clear(); component_confidences->clear();
} }
CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level) String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
{ {
std::string output1; std::string output1;
std::string output2; std::string output2;
...@@ -109,7 +109,7 @@ CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int comp ...@@ -109,7 +109,7 @@ CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int comp
return String(output2); return String(output2);
} }
CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level) cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{ {
std::string output1; std::string output1;
std::string output2; std::string output2;
...@@ -684,8 +684,17 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> ...@@ -684,8 +684,17 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode); return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
} }
Ptr<OCRHMMDecoder> OCRHMMDecoder::create( const String& _filename,
const String& _vocabulary,
InputArray transition_p,
InputArray emission_p,
int _mode,
int _classifier)
{
return makePtr<OCRHMMDecoderImpl>(loadOCRHMMClassifier(_filename, _classifier), _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
}
class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback class OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
{ {
public: public:
//constructor //constructor
...@@ -916,6 +925,22 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector ...@@ -916,6 +925,22 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
} }
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& _filename, int _classifier)
{
Ptr<OCRHMMDecoder::ClassifierCallback> pt;
switch(_classifier) {
case OCR_KNN_CLASSIFIER:
pt = loadOCRHMMClassifierNM(_filename);
break;
case OCR_CNN_CLASSIFIER:
pt = loadOCRHMMClassifierCNN(_filename);
default:
CV_Error(Error::StsBadArg, "Specified HMM classifier is not supported!");
break;
}
return pt;
}
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename) Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)
...@@ -923,7 +948,7 @@ Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& file ...@@ -923,7 +948,7 @@ Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& file
return makePtr<OCRHMMClassifierKNN>(std::string(filename)); return makePtr<OCRHMMClassifierKNN>(std::string(filename));
} }
class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback class OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
{ {
public: public:
//constructor //constructor
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment