Merge pull request #283 from lluisgomez/master

Adds a first implementation of the OCRBeamSearchDecoder class

Merge pull request #283 from lluisgomez/master
Adds a first implementation of the OCRBeamSearchDecoder class
cf1cc737 · Vadim Pisarevsky · 052de5d3 · c5bfcb95 · cf1cc737 · cf1cc737
Commit cf1cc737 authored Jul 01, 2015 by Vadim Pisarevsky
6 changed files
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -240,6 +240,119 @@ types.
 */
 CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);

+
+/* OCR BeamSearch Decoder */
+
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
+
+@note
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
+        be found at the demo sample:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+ */
+class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
+{
+public:
+
+    /** @brief Callback with the character classifier is made a class.
+
+    This way it hides the feature extractor and the classifier itself, so developers can write
+    their own OCR code.
+
+    The default character classifier and feature extractor can be loaded using the utility funtion
+    loadOCRBeamSearchClassifierCNN with all its parameters provided in
+    <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+     */
+    class CV_EXPORTS ClassifierCallback
+    {
+    public:
+        virtual ~ClassifierCallback() { }
+        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
+
+        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
+        @param recognition_probabilities For each of the N characters found the classifier returns a list with
+        class probabilities for each class.
+        @param oversegmentation The classifier returns a list of N+1 character locations' x-coordinates,
+        including 0 as start-sequence location.
+         */
+        virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
+    };
+
+public:
+    /** @brief Recognize text using Beam Search.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 with a single text line (or word).
+
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).
+
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).
+
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).
+
+    @param component_level Only OCR_LEVEL_WORD is supported.
+     */
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);
+
+    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
+
+    @param classifier The character classifier with built in feature extractor.
+
+    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
+    must be equal to the number of classes of the classifier.
+
+    @param transition_probabilities_table Table with transition probabilities between character
+    pairs. cols == rows == vocabulary.size().
+
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
+    rows == vocabulary.size().
+
+    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
+    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+
+    @param beam_size Size of the beam in Beam Search algorithm.
+     */
+    static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
+                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulari.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulari.size()
+                                     decoder_mode mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
+                                     int beam_size = 50);                              // Size of the beam in Beam Search algorithm
+
+protected:
+
+    Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
+    std::string vocabulary;
+    Mat transition_p;
+    Mat emission_p;
+    decoder_mode mode;
+    int beam_size;
+};
+
+/** @brief Allow to implicitly load the default character classifier when creating an OCRBeamSearchDecoder object.
+
+@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
+
+The default classifier is based in the scene text recognition method proposed by Adam Coates &
+Andrew NG in [Coates11a]. The character classifier sonsists in a Single Layer Convolutional Neural Network and
+a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
+at each window location.
+ */
+CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
+
 //! @}

 }

--- a/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz
+++ b/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz
--- a/modules/text/samples/cropped_word_recognition.cpp
+++ b/modules/text/samples/cropped_word_recognition.cpp
+/*
+ * textdetection.cpp
+ *
+ * A demo program of End-to-end Scene Text Detection and Recognition:
+ * Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:
+ * Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+ *
+ * Created on: Jul 31, 2014
+ *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
+ */
+
+#include "opencv2/text.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include <iostream>
+
+using namespace std;
+using namespace cv;
+using namespace cv::text;
+
+//Perform text recognition in a given cropped word
+int main(int argc, char* argv[])
+{
+    cout << endl << argv[0] << endl << endl;
+    cout << "A demo program of Scene Text cropped word Recognition: " << endl;
+    cout << "Shows the use of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in:" << endl;
+    cout << "Coates, Adam, et al. \"Text detection and character recognition in scene images with unsupervised feature learning.\" ICDAR 2011." << endl << endl;
+
+    Mat image;
+    if(argc>1)
+        image  = imread(argv[1]);
+    else
+    {
+        cout << "    Usage: " << argv[0] << " <input_image>" << endl << endl;
+        return(0);
+    }
+
+    Mat transition_p;
+    string filename = "OCRHMM_transitions_table.xml"; // TODO this table was done with a different vocabulary order?
+                                                      // TODO add a new function in ocr.cpp to create transition tab
+                                                      // for a given lexicon
+    FileStorage fs(filename, FileStorage::READ);
+    fs["transition_probabilities"] >> transition_p;
+    fs.release();
+    Mat emission_p = Mat::eye(62,62,CV_64FC1);
+    string voc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789";
+
+    Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create(
+                loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
+                voc, transition_p, emission_p);
+
+    double t_r = (double)getTickCount();
+    string output;
+
+    vector<Rect>   boxes;
+    vector<string> words;
+    vector<float>  confidences;
+    ocr->run(image, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
+
+    cout << "OCR output = \"" << output << "\". Decoded in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl << endl;
+
+    return 0;
+}
--- a/modules/text/samples/scenetext_word01.jpg
+++ b/modules/text/samples/scenetext_word01.jpg
--- a/modules/text/samples/scenetext_word02.jpg
+++ b/modules/text/samples/scenetext_word02.jpg
--- a/modules/text/src/ocr_beamsearch_decoder.cpp
+++ b/modules/text/src/ocr_beamsearch_decoder.cpp