Commit 2538bf74 authored by Lluis Gomez-Bigorda's avatar Lluis Gomez-Bigorda

Adds example on segmented word recognition. Shows the use of the OCRHMMDecoder…

Adds example on segmented word recognition. Shows the use of the OCRHMMDecoder with the NM and CNN default classifiers.
parent ee677a25
* segmented_word_recognition.cpp
* A demo program on segmented word recognition.
* Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.
* Created on: Jul 31, 2015
* Author: Lluis Gomez i Bigorda <lgomez AT>
#include "opencv2/text.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
using namespace std;
using namespace cv;
using namespace text;
int main(int argc, char* argv[]) {
const String keys =
"{help h usage ? | | print this message.}"
"{@image | | source image for recognition.}"
"{@mask | | binary segmentation mask where each contour is a character.}"
"{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}"
CommandLineParser parser(argc, argv, keys);
parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n");
String filename1 = parser.get<String>(0);
String filename2 = parser.get<String>(1);
cout << endl << endl;
if ((parser.has("help")) || (filename1.size()==0))
return 0;
if (!parser.check())
return 0;
Mat image = imread(filename1);
Mat mask;
if (filename2.size() > 0)
mask = imread(filename2);
// be sure the mask is a binry image
cvtColor(mask, mask, COLOR_BGR2GRAY);
threshold(mask, mask, 128., 255, THRESH_BINARY);
// character recognition vocabulary
string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
// Emission probabilities for the HMM language model (identity matrix by default)
Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1);
// Bigram transition probabilities for the HMM language model
Mat transitionProbabilities;
string lex = parser.get<string>("lex");
if (lex.size()>0)
// Build tailored language model for the provided lexicon
vector<string> lexicon;
size_t pos = 0;
string delimiter = ",";
std::string token;
while ((pos = lex.find(delimiter)) != std::string::npos) {
token = lex.substr(0, pos);
lex.erase(0, pos + delimiter.length());
} else {
// Or load the generic language model (from Aspell English dictionary)
FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ);
fs["transition_probabilities"] >> transitionProbabilities;
Ptr<OCRTesseract> ocrTes = OCRTesseract::create();
Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create(
voc, transitionProbabilities, emissionProbabilities);
Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create(
voc, transitionProbabilities, emissionProbabilities);
std::string output;
double t_r = getTickCount();
ocrTes->run(mask, output);
output.erase(remove(output.begin(), output.end(), '\n'), output.end());
cout << " OCR_Tesseract output \"" << output << "\". Done in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
t_r = getTickCount();
ocrNM->run(mask, output);
cout << " OCR_NM output \"" << output << "\". Done in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
t_r = getTickCount();
ocrCNN->run(image, mask, output);
cout << " OCR_CNN output \"" << output << "\". Done in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
