Commit 37af0432 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #60 from lluisgomez/master

refactors OCRTesseract interface
parents 47f61f1c 36a31161
......@@ -5,39 +5,19 @@ Scene Text Recognition
OCRTesseract
------------
.. ocv:class:: OCRTesseract
.. ocv:class:: OCRTesseract : public BaseOCR
OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++. Notice that it is compiled only when tesseract-ocr is correctly installed. ::
class CV_EXPORTS OCRTesseract
{
private:
tesseract::TessBaseAPI tess;
public:
//! Default constructor
OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL,
tesseract::OcrEngineMode oem=tesseract::OEM_DEFAULT, tesseract::PageSegMode psmode=tesseract::PSM_AUTO);
~OCRTesseract();
/*!
the key method. Takes image on input and returns recognized text in the output_text parameter
optionally provides also the Rects for individual text elements (e.g. words) and a list of
ranked recognition alternatives.
*/
void run(Mat& image, string& output_text, vector<Rect>* component_rects=NULL,
vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
int component_level=0);
};
To see the OCRTesseract combined with scene text detection, have a look at the end_to_end_recognition demo: https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp
OCRTesseract::OCRTesseract
--------------------------
Constructor.
.. ocv:function:: void OCRTesseract::OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, tesseract::OcrEngineMode oem=tesseract::OEM_DEFAULT, tesseract::PageSegMode psmode=tesseract::PSM_AUTO)
.. note::
* (C++) An example of OCRTesseract recognition combined with scene text detection can be found at the end_to_end_recognition demo: https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp
OCRTesseract::create
--------------------
Creates an instance of the OCRTesseract class. Initializes Tesseract.
.. ocv:function:: Ptr<OCRTesseract> OCRTesseract::create(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, int oem=(int)tesseract::OEM_DEFAULT, int psmode=(int)tesseract::PSM_AUTO)
:param datapath: the name of the parent directory of tessdata ended with "/", or NULL to use the system's default directory.
:param language: an ISO 639-3 code or NULL will default to "eng".
......
......@@ -44,24 +44,14 @@
#ifndef __OPENCV_TEXT_OCR_HPP__
#define __OPENCV_TEXT_OCR_HPP__
#include "text_config.hpp"
#ifdef HAVE_TESSERACT
#include <tesseract/baseapi.h>
#include <tesseract/resultiterator.h>
#endif
#include "opencv2/core.hpp"
#include <vector>
#include <string>
namespace cv
{
namespace text
{
using namespace std;
enum
{
......@@ -69,40 +59,26 @@ enum
OCR_LEVEL_TEXTLINE
};
#ifdef HAVE_TESSERACT
class CV_EXPORTS OCRTesseract
//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
class CV_EXPORTS BaseOCR
{
private:
tesseract::TessBaseAPI tess;
public:
//Default constructor
OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL,
tesseract::OcrEngineMode oem=tesseract::OEM_DEFAULT, tesseract::PageSegMode psmode=tesseract::PSM_AUTO);
~OCRTesseract();
void run(Mat& image, string& output_text, vector<Rect>* component_rects=NULL,
vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
int component_level=0);
virtual ~BaseOCR() {};
virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0) = 0;
};
#else
//stub
class CV_EXPORTS OCRTesseract
class CV_EXPORTS OCRTesseract : public BaseOCR
{
public:
//Default constructor
OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL,
int oem=0, int psmode=0);
virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0);
~OCRTesseract();
void run(Mat& image, string& output_text, vector<Rect>* component_rects=NULL,
vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
int component_level=0);
static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
const char* char_whitelist=NULL, int oem=3, int psmode=3);
};
#endif
}
......
......@@ -102,7 +102,7 @@ int main(int argc, char* argv[])
/*Text Recognition (OCR)*/
double t_r = (double)getTickCount();
OCRTesseract* ocr = new OCRTesseract();
Ptr<OCRTesseract> ocr = OCRTesseract::create();
cout << "TIME_OCR_INITIALIZATION = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
string output;
......
......@@ -54,12 +54,12 @@ private:
vector< vector<Rect> > &boxes;
vector< vector<string> > &words;
vector< vector<float> > &confidences;
vector< OCRTesseract* > &ocrs;
vector< Ptr<OCRTesseract> > &ocrs;
public:
Parallel_OCR(vector<Mat> &_detections, vector<string> &_outputs, vector< vector<Rect> > &_boxes,
vector< vector<string> > &_words, vector< vector<float> > &_confidences,
vector< OCRTesseract* > &_ocrs)
vector< Ptr<OCRTesseract> > &_ocrs)
: detections(_detections), outputs(_outputs), boxes(_boxes), words(_words),
confidences(_confidences), ocrs(_ocrs)
{}
......@@ -120,11 +120,10 @@ int main(int argc, char* argv[])
//Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
int num_ocrs = 10;
vector<OCRTesseract*> ocrs;
vector< Ptr<OCRTesseract> > ocrs;
for (int o=0; o<num_ocrs; o++)
{
OCRTesseract* ocr = new OCRTesseract();
ocrs.push_back(ocr);
ocrs.push_back(OCRTesseract::create());
}
//cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
......
......@@ -55,122 +55,145 @@ namespace text
using namespace std;
#ifdef HAVE_TESSERACT
//Default constructor
OCRTesseract::OCRTesseract(const char* datapath, const char* language, const char* char_whitelist, tesseract::OcrEngineMode oemode, tesseract::PageSegMode psmode)
void OCRTesseract::run(Mat& image, string& output_text, vector<Rect>* component_rects,
vector<string>* component_texts, vector<float>* component_confidences,
int component_level)
{
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
output_text.clear();
if (component_rects != NULL)
component_rects->clear();
if (component_texts != NULL)
component_texts->clear();
if (component_confidences != NULL)
component_confidences->clear();
}
const char *lang = "eng";
if (language != NULL)
lang = language;
class OCRTesseractImpl : public OCRTesseract
{
private:
#ifdef HAVE_TESSERACT
tesseract::TessBaseAPI tess;
#endif
if (tess.Init(datapath, lang, oemode))
public:
//Default constructor
OCRTesseractImpl(const char* datapath, const char* language, const char* char_whitelist, int oemode, int psmode)
{
cout << "OCRTesseract: Could not initialize tesseract." << endl;
throw 1;
}
//cout << "OCRTesseract: tesseract version " << tess.Version() << endl;
#ifdef HAVE_TESSERACT
const char *lang = "eng";
if (language != NULL)
lang = language;
if (tess.Init(datapath, lang, (tesseract::OcrEngineMode)oemode))
{
cout << "OCRTesseract: Could not initialize tesseract." << endl;
throw 1;
}
tesseract::PageSegMode pagesegmode = psmode;
tess.SetPageSegMode(pagesegmode);
//cout << "OCRTesseract: tesseract version " << tess.Version() << endl;
if(char_whitelist != NULL)
tess.SetVariable("tessedit_char_whitelist", char_whitelist);
else
tess.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
tesseract::PageSegMode pagesegmode = (tesseract::PageSegMode)psmode;
tess.SetPageSegMode(pagesegmode);
tess.SetVariable("save_best_choices", "T");
if(char_whitelist != NULL)
tess.SetVariable("tessedit_char_whitelist", char_whitelist);
else
tess.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
}
tess.SetVariable("save_best_choices", "T");
#else
cout << "OCRTesseract("<<oemode<<psmode<<"): Tesseract not found." << endl;
if (datapath != NULL)
cout << " " << datapath << endl;
if (language != NULL)
cout << " " << language << endl;
if (char_whitelist != NULL)
cout << " " << char_whitelist << endl;
#endif
}
OCRTesseract::~OCRTesseract()
{
tess.End();
}
~OCRTesseractImpl()
{
#ifdef HAVE_TESSERACT
tess.End();
#endif
}
void OCRTesseract::run(Mat& image, string& output, vector<Rect>* component_rects,
vector<string>* component_texts, vector<float>* component_confidences, int component_level)
{
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
if (component_texts != 0)
component_texts->clear();
if (component_rects != 0)
component_rects->clear();
if (component_confidences != 0)
component_confidences->clear();
void run(Mat& image, string& output, vector<Rect>* component_rects=NULL,
vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
int component_level=0)
{
tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1());
tess.Recognize(0);
output = string(tess.GetUTF8Text());
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) )
{
tesseract::ResultIterator* ri = tess.GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (component_level == OCR_LEVEL_TEXTLINE)
level = tesseract::RIL_TEXTLINE;
if (ri != 0) {
do {
const char* word = ri->GetUTF8Text(level);
if (word == NULL)
continue;
float conf = ri->Confidence(level);
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
if (component_texts != 0)
component_texts->push_back(string(word));
if (component_rects != 0)
component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1));
if (component_confidences != 0)
component_confidences->push_back(conf);
delete[] word;
} while (ri->Next(level));
#ifdef HAVE_TESSERACT
if (component_texts != 0)
component_texts->clear();
if (component_rects != 0)
component_rects->clear();
if (component_confidences != 0)
component_confidences->clear();
tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1());
tess.Recognize(0);
output = string(tess.GetUTF8Text());
if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) )
{
tesseract::ResultIterator* ri = tess.GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (component_level == OCR_LEVEL_TEXTLINE)
level = tesseract::RIL_TEXTLINE;
if (ri != 0) {
do {
const char* word = ri->GetUTF8Text(level);
if (word == NULL)
continue;
float conf = ri->Confidence(level);
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
if (component_texts != 0)
component_texts->push_back(string(word));
if (component_rects != 0)
component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1));
if (component_confidences != 0)
component_confidences->push_back(conf);
delete[] word;
} while (ri->Next(level));
}
delete ri;
}
delete ri;
}
tess.Clear();
}
tess.Clear();
#else
//Stub constructor
OCRTesseract::OCRTesseract(const char* datapath, const char* language, const char* char_whitelist, int oemode, int psmode)
{
cout << "OCRTesseract("<<oemode<<psmode<<"): Tesseract not found." << endl;
if (datapath != NULL)
cout << " " << datapath << endl;
if (language != NULL)
cout << " " << language << endl;
if (char_whitelist != NULL)
cout << " " << char_whitelist << endl;
}
//Stub destructor
OCRTesseract::~OCRTesseract()
{
}
cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl;
output.clear();
if(component_rects)
component_rects->clear();
if(component_texts)
component_texts->clear();
if(component_confidences)
component_confidences->clear();
#endif
}
//Stub method, does nothing
void OCRTesseract::run(Mat& image, string& output, vector<Rect>* component_rects,
vector<string>* component_texts, vector<float>* component_confidences, int component_level)
{
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl;
output.clear();
if(component_rects)
component_rects->clear();
if(component_texts)
component_texts->clear();
if(component_confidences)
component_confidences->clear();
}
#endif
};
Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language,
const char* char_whitelist, int oem, int psmode)
{
return makePtr<OCRTesseractImpl>(datapath,language,char_whitelist,oem,psmode);
}
}
......
......@@ -45,4 +45,11 @@
#include "opencv2/text.hpp"
#include "text_config.hpp"
#ifdef HAVE_TESSERACT
#include <tesseract/baseapi.h>
#include <tesseract/resultiterator.h>
#endif
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment