Commit 1445a29e authored by Alexander Smorkalov's avatar Alexander Smorkalov Committed by OpenCV Buildbot

Merge pull request #1469 from lluisgomez:scene_text_detection_erGrouping

parents 0ecd7913 2837bfd9
...@@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0, ...@@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0,
*/ */
CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad); CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
/*!
Find groups of Extremal Regions that are organized as text blocks. This function implements
the grouping algorithm described in:
Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013.
Notice that this implementation constrains the results to horizontally-aligned text and
latin script (since ERFilter classifiers are trained only for latin script detection).
The algorithm combines two different clustering techniques in a single parameter-free procedure
to detect groups of regions organized as text. The maximally meaningful groups are fist detected
in several feature spaces, where each feature space is a combination of proximity information
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
heuristically validated in order to assest if they form a valid horizontally-aligned text block.
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
*/
CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector<std::vector<ERStat> > &regions,
std::vector<Rect> &groups);
} }
#endif // _OPENCV_ERFILTER_HPP_ #endif // _OPENCV_ERFILTER_HPP_
This diff is collapsed.
...@@ -16,105 +16,90 @@ ...@@ -16,105 +16,90 @@
using namespace std; using namespace std;
using namespace cv; using namespace cv;
void er_draw(Mat &src, Mat &dst, ERStat& er); void show_help_and_exit(const char *cmd);
void groups_draw(Mat &src, vector<Rect> &groups);
void er_draw(Mat &src, Mat &dst, ERStat& er);
void er_draw(Mat &src, Mat &dst, ERStat& er) int main(int argc, const char * argv[])
{ {
if (er.parent != NULL) // deprecate the root region if (argc < 2) show_help_and_exit(argv[0]);
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
}
} Mat src = imread(argv[1]);
int main(int argc, const char * argv[])
{
// Extract channels to be processed individually
vector<Mat> channels;
computeNMChannels(src, channels);
vector<ERStat> regions; int cn = (int)channels.size();
// Append negative channels to detect ER- (bright regions over dark background)
for (int c = 0; c < cn-1; c++)
channels.push_back(255-channels[c]);
if (argc < 2) { // Create ERFilter objects with the 1st and 2nd stage default classifiers
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl; Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00025,0.13,0.4,true,0.1);
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl; Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.3);
cout << " Usage: " << argv[0] << " input_image <optional_groundtruth_image>" << endl;
cout << " Default classifier files (trained_classifierNM*.xml) should be in ./" << endl;
return -1;
}
Mat original = imread(argv[1]); vector<vector<ERStat> > regions(channels.size());
Mat gt; // Apply the default cascade classifier to each independent channel (could be done in parallel)
if (argc > 2) for (int c=0; c<(int)channels.size(); c++)
{ {
gt = imread(argv[2]); er_filter1->run(channels[c], regions[c]);
cvtColor(gt, gt, COLOR_RGB2GRAY); er_filter2->run(channels[c], regions[c]);
threshold(gt, gt, 254, 255, THRESH_BINARY);
} }
Mat grey(original.size(),CV_8UC1);
cvtColor(original,grey,COLOR_RGB2GRAY);
double t = (double)getTickCount();
// Build ER tree and filter with the 1st stage default classifier // Detect character groups
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml")); vector<Rect> groups;
erGrouping(channels, regions, groups);
er_filter1->run(grey, regions); // draw groups
groups_draw(src, groups);
t = (double)getTickCount() - t; imshow("grouping",src);
cout << " --------------------------------------------------------------------------------------------------" << endl; waitKey(-1);
cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl;
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl;
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
// memory clean-up
er_filter1.release(); er_filter1.release();
er_filter2.release();
// draw regions regions.clear();
Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1); if (!groups.empty())
for (int r=0; r<(int)regions.size(); r++)
er_draw(grey, mask, regions.at(r));
mask = 255-mask;
imwrite("out_first_stage.jpg", mask);
if (argc > 2)
{ {
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2)))); groups.clear();
cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
} }
}
t = (double)getTickCount();
// Default second stage classifier
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"));
er_filter2->run(grey, regions);
t = (double)getTickCount() - t; // helper functions
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl;
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
er_filter2.release(); void show_help_and_exit(const char *cmd)
{
// draw regions cout << endl << cmd << endl << endl;
mask = mask*0; cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
for (int r=0; r<(int)regions.size(); r++) cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
er_draw(grey, mask, regions.at(r)); cout << " Usage: " << cmd << " <input_image> " << endl;
mask = 255-mask; cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
imwrite("out_second_stage.jpg", mask); exit(-1);
}
if (argc > 2) void groups_draw(Mat &src, vector<Rect> &groups)
{
for (int i=groups.size()-1; i>=0; i--)
{ {
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2)))); if (src.type() == CV_8UC3)
cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl; rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 0, 255, 255 ), 3, 8 );
else
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 3, 8 );
} }
}
regions.clear(); void er_draw(Mat &src, Mat &dst, ERStat& er)
{
if (er.parent != NULL) // deprecate the root region
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment