Adds a first implementation of the OCRBeamSearchDecoder class using the Single…

Adds a first implementation of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in Coates, Adam, et al. paper: Text detection and character recognition in scene images with unsupervised feature learning, ICDAR 2011

Adds a first implementation of the OCRBeamSearchDecoder class using the Single…
Adds a first implementation of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in Coates, Adam, et al. paper: Text detection and character recognition in scene images with unsupervised feature learning, ICDAR 2011
52cca0dd · lluis · c05a7e01 · 52cca0dd · 52cca0dd
Commit 52cca0dd authored Jun 26, 2015 by lluis
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 111 additions and 0 deletions

ocr.hpp modules/text/include/opencv2/text/ocr.hpp +111 -0

ocr_beamsearch_decoder.cpp modules/text/src/ocr_beamsearch_decoder.cpp +0 -0

No files found.
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -240,6 +240,117 @@ types.
 */
 CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);

+
+/* OCR BeamSearch Decoder */
+
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
+
+@note
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
+        be found at the demo sample:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+ */
+class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
+{
+public:
+
+    /** @brief Callback with the character classifier is made a class.
+
+    This way it hides the feature extractor and the classifier itself, so developers can write
+    their own OCR code.
+
+    The default character classifier and feature extractor can be loaded using the utility funtion
+    loadOCRBeamSearchClassifierCNN with all its parameters provided in
+    <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+     */
+    class CV_EXPORTS ClassifierCallback
+    {
+    public:
+        virtual ~ClassifierCallback() { }
+        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
+
+        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
+        @param out_class The classifier returns the character class categorical label, or list of
+        class labels, to which the input image corresponds.
+        @param out_confidence The classifier returns the probability of the input image
+        corresponding to each classes in out_class.
+         */
+        virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
+    };
+
+public:
+    /** @brief Recognize text using Beam Search.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 with a single text line (or word).
+
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).
+
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).
+
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).
+
+    @param component_level Only OCR_LEVEL_WORD is supported.
+     */
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
+
+    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
+
+    @param classifier The character classifier with built in feature extractor.
+
+    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
+    must be equal to the number of classes of the classifier.
+
+    @param transition_probabilities_table Table with transition probabilities between character
+    pairs. cols == rows == vocabulary.size().
+
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
+    rows == vocabulary.size().
+
+    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
+    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+     */
+    static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
+                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulari.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulari.size()
+                                     decoder_mode mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
+                                     int beam_size = 50);                              // Size of the beam in Beam Search algorithm
+
+protected:
+
+    Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
+    std::string vocabulary;
+    Mat transition_p;
+    Mat emission_p;
+    decoder_mode mode;
+    int beam_size;
+};
+
+/** @brief Allow to implicitly load the default character classifier when creating an OCRBeamSearchDecoder object.
+
+@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
+
+The default classifier is based in the scene text recognition method proposed by Adam Coates &
+Andrew NG in [Coates11a]. The character classifier sonsists in a Single Layer Convolutional Neural Network and
+a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
+at each window location.
+ */
+CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
+
 //! @}

 }

--- a/modules/text/src/ocr_beamsearch_decoder.cpp
+++ b/modules/text/src/ocr_beamsearch_decoder.cpp