segmented_word_recognition.cpp 4.15 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
/*
 * segmented_word_recognition.cpp
 *
 * A demo program on segmented word recognition.
 * Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.
 *
 * Created on: Jul 31, 2015
 *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
 */

#include "opencv2/text.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"

#include <iostream>

using namespace std;
using namespace cv;
using namespace text;


int main(int argc, char* argv[]) {

    const String keys =
      "{help h usage ? |      | print this message.}"
      "{@image         |      | source image for recognition.}"
      "{@mask          |      | binary segmentation mask where each contour is a character.}"
      "{lexicon lex l  |      | (optional) lexicon provided as a list of comma separated words.}"
      ;
    CommandLineParser parser(argc, argv, keys);

    parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n");

    String filename1 = parser.get<String>(0);
    String filename2 = parser.get<String>(1);

    parser.printMessage();
    cout << endl << endl;
    if ((parser.has("help")) || (filename1.size()==0))
    {
        return 0;
    }
    if (!parser.check())
    {
        parser.printErrors();
        return 0;
    }

    Mat image = imread(filename1);
    Mat mask;
    if (filename2.size() > 0)
      mask = imread(filename2);
    else
      image.copyTo(mask);

    // be sure the mask is a binry image
    cvtColor(mask, mask, COLOR_BGR2GRAY);
    threshold(mask, mask, 128., 255, THRESH_BINARY);

    // character recognition vocabulary
    string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
    // Emission probabilities for the HMM language model (identity matrix by default)
    Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1);
    // Bigram transition probabilities for the HMM language model
    Mat transitionProbabilities;

    string lex = parser.get<string>("lex");
    if (lex.size()>0)
    {
        // Build tailored language model for the provided lexicon
        vector<string> lexicon;
        size_t pos = 0;
        string delimiter = ",";
        std::string token;
        while ((pos = lex.find(delimiter)) != std::string::npos) {
            token = lex.substr(0, pos);
            lexicon.push_back(token);
            lex.erase(0, pos + delimiter.length());
        }
        lexicon.push_back(lex);
        createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities);
    } else {
        // Or load the generic language model (from Aspell English dictionary)
        FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ);
        fs["transition_probabilities"] >> transitionProbabilities;
        fs.release();
    }

    Ptr<OCRTesseract>  ocrTes = OCRTesseract::create();

    Ptr<OCRHMMDecoder> ocrNM  = OCRHMMDecoder::create(
                                 loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"),
                                 voc, transitionProbabilities, emissionProbabilities);

    Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create(
                                 loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
                                 voc, transitionProbabilities, emissionProbabilities);

    std::string output;
Lluis Gomez-Bigorda's avatar
Lluis Gomez-Bigorda committed
101
    double t_r = (double)getTickCount();
102 103 104 105 106
    ocrTes->run(mask, output);
    output.erase(remove(output.begin(), output.end(), '\n'), output.end());
    cout << " OCR_Tesseract  output \"" << output << "\". Done in "
         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;

Lluis Gomez-Bigorda's avatar
Lluis Gomez-Bigorda committed
107
    t_r = (double)getTickCount();
108 109 110 111
    ocrNM->run(mask, output);
    cout << " OCR_NM         output \"" << output << "\". Done in "
         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;

Lluis Gomez-Bigorda's avatar
Lluis Gomez-Bigorda committed
112
    t_r = (double)getTickCount();
113 114 115 116
    ocrCNN->run(image, mask, output);
    cout << " OCR_CNN        output \"" << output << "\". Done in "
         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
}