Commit 4171767d authored by Nghia Ho's avatar Nghia Ho

Merge remote-tracking branch 'upstream/master'

parents 8e8ff4dd c0c575d6
<?xml version="1.0"?>
<!-- <!--
This is 20x34 detector of profile faces using LBP features. This is 20x34 detector of profile faces using LBP features.
It was created by Attila Novak during GSoC 2012. It was created by Attila Novak during GSoC 2012.
...@@ -5,7 +6,6 @@ ...@@ -5,7 +6,6 @@
so you may want to run it on the original and on so you may want to run it on the original and on
the flipped image to detect different profile faces. the flipped image to detect different profile faces.
--> -->
<?xml version="1.0"?>
<opencv_storage> <opencv_storage>
<cascade> <cascade>
<stageType>BOOST</stageType> <stageType>BOOST</stageType>
......
<?xml version="1.0"?>
<!-- <!--
This is 12x80 detector of the silverware (forks, spoons, knives) using LBP features. This is 12x80 detector of the silverware (forks, spoons, knives) using LBP features.
It was created by Attila Novak during GSoC 2012. It was created by Attila Novak during GSoC 2012.
...@@ -6,7 +7,6 @@ ...@@ -6,7 +7,6 @@
(probably should run detector several times). (probably should run detector several times).
It also assumes the "top view" when the camera optical axis is orthogonal to the table plane. It also assumes the "top view" when the camera optical axis is orthogonal to the table plane.
--> -->
<?xml version="1.0"?>
<opencv_storage> <opencv_storage>
<cascade> <cascade>
<stageType>BOOST</stageType> <stageType>BOOST</stageType>
......
...@@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0, ...@@ -236,5 +236,28 @@ enum { ERFILTER_NM_RGBLGrad = 0,
*/ */
CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad); CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels, int _mode = ERFILTER_NM_RGBLGrad);
/*!
Find groups of Extremal Regions that are organized as text blocks. This function implements
the grouping algorithm described in:
Gomez L. and Karatzas D.: Multi-script Text Extraction from Natural Scenes, ICDAR 2013.
Notice that this implementation constrains the results to horizontally-aligned text and
latin script (since ERFilter classifiers are trained only for latin script detection).
The algorithm combines two different clustering techniques in a single parameter-free procedure
to detect groups of regions organized as text. The maximally meaningful groups are fist detected
in several feature spaces, where each feature space is a combination of proximity information
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
heuristically validated in order to assest if they form a valid horizontally-aligned text block.
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
*/
CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector<std::vector<ERStat> > &regions,
std::vector<Rect> &groups);
} }
#endif // _OPENCV_ERFILTER_HPP_ #endif // _OPENCV_ERFILTER_HPP_
This diff is collapsed.
...@@ -16,105 +16,90 @@ ...@@ -16,105 +16,90 @@
using namespace std; using namespace std;
using namespace cv; using namespace cv;
void er_draw(Mat &src, Mat &dst, ERStat& er); void show_help_and_exit(const char *cmd);
void groups_draw(Mat &src, vector<Rect> &groups);
void er_draw(Mat &src, Mat &dst, ERStat& er);
void er_draw(Mat &src, Mat &dst, ERStat& er) int main(int argc, const char * argv[])
{ {
if (er.parent != NULL) // deprecate the root region if (argc < 2) show_help_and_exit(argv[0]);
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
}
} Mat src = imread(argv[1]);
int main(int argc, const char * argv[])
{
// Extract channels to be processed individually
vector<Mat> channels;
computeNMChannels(src, channels);
vector<ERStat> regions; int cn = (int)channels.size();
// Append negative channels to detect ER- (bright regions over dark background)
for (int c = 0; c < cn-1; c++)
channels.push_back(255-channels[c]);
if (argc < 2) { // Create ERFilter objects with the 1st and 2nd stage default classifiers
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl; Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00025,0.13,0.4,true,0.1);
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl; Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.3);
cout << " Usage: " << argv[0] << " input_image <optional_groundtruth_image>" << endl;
cout << " Default classifier files (trained_classifierNM*.xml) should be in ./" << endl;
return -1;
}
Mat original = imread(argv[1]); vector<vector<ERStat> > regions(channels.size());
Mat gt; // Apply the default cascade classifier to each independent channel (could be done in parallel)
if (argc > 2) for (int c=0; c<(int)channels.size(); c++)
{ {
gt = imread(argv[2]); er_filter1->run(channels[c], regions[c]);
cvtColor(gt, gt, COLOR_RGB2GRAY); er_filter2->run(channels[c], regions[c]);
threshold(gt, gt, 254, 255, THRESH_BINARY);
} }
Mat grey(original.size(),CV_8UC1);
cvtColor(original,grey,COLOR_RGB2GRAY);
double t = (double)getTickCount();
// Build ER tree and filter with the 1st stage default classifier // Detect character groups
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml")); vector<Rect> groups;
erGrouping(channels, regions, groups);
er_filter1->run(grey, regions); // draw groups
groups_draw(src, groups);
t = (double)getTickCount() - t; imshow("grouping",src);
cout << " --------------------------------------------------------------------------------------------------" << endl; waitKey(-1);
cout << "\t FIRST STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << setw(9) << regions.size()+er_filter1->getNumRejected() << "\t Extremal Regions extracted " << endl;
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the first stage of the sequential classifier." << endl;
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
// memory clean-up
er_filter1.release(); er_filter1.release();
er_filter2.release();
// draw regions regions.clear();
Mat mask = Mat::zeros(grey.rows+2,grey.cols+2,CV_8UC1); if (!groups.empty())
for (int r=0; r<(int)regions.size(); r++)
er_draw(grey, mask, regions.at(r));
mask = 255-mask;
imwrite("out_first_stage.jpg", mask);
if (argc > 2)
{ {
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2)))); groups.clear();
cout << "Recall for the 1st stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl;
} }
}
t = (double)getTickCount();
// Default second stage classifier
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"));
er_filter2->run(grey, regions);
t = (double)getTickCount() - t; // helper functions
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << "\t SECOND STAGE CLASSIFIER done in " << t * 1000. / getTickFrequency() << " ms." << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
cout << setw(9) << regions.size() << "\t Extremal Regions selected by the second stage of the sequential classifier." << endl;
cout << "\t \t (saving into out_second_stage.jpg)" << endl;
cout << " --------------------------------------------------------------------------------------------------" << endl;
er_filter2.release(); void show_help_and_exit(const char *cmd)
{
// draw regions cout << endl << cmd << endl << endl;
mask = mask*0; cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
for (int r=0; r<(int)regions.size(); r++) cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
er_draw(grey, mask, regions.at(r)); cout << " Usage: " << cmd << " <input_image> " << endl;
mask = 255-mask; cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
imwrite("out_second_stage.jpg", mask); exit(-1);
}
if (argc > 2) void groups_draw(Mat &src, vector<Rect> &groups)
{
for (int i=groups.size()-1; i>=0; i--)
{ {
Mat tmp_mask = (255-gt) & (255-mask(Rect(Point(1,1),Size(mask.cols-2,mask.rows-2)))); if (src.type() == CV_8UC3)
cout << "Recall for the 2nd stage filter = " << (float)countNonZero(tmp_mask) / countNonZero(255-gt) << endl; rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 0, 255, 255 ), 3, 8 );
else
rectangle(src,groups.at(i).tl(),groups.at(i).br(),Scalar( 255 ), 3, 8 );
} }
}
regions.clear(); void er_draw(Mat &src, Mat &dst, ERStat& er)
{
if (er.parent != NULL) // deprecate the root region
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment