Commit ffcda05c authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #59 from lluisgomez/master

update to new OpenCV 3.0 ml API and adds parallell_for_ OCR
parents 62eda671 820f2e08
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -45,6 +45,35 @@ public: ...@@ -45,6 +45,35 @@ public:
Parallel_extractCSER & operator=(const Parallel_extractCSER &a); Parallel_extractCSER & operator=(const Parallel_extractCSER &a);
}; };
//OCR recognition is done in parallel for different detections
class Parallel_OCR: public cv::ParallelLoopBody
{
private:
vector<Mat> &detections;
vector<string> &outputs;
vector< vector<Rect> > &boxes;
vector< vector<string> > &words;
vector< vector<float> > &confidences;
vector< OCRTesseract* > &ocrs;
public:
Parallel_OCR(vector<Mat> &_detections, vector<string> &_outputs, vector< vector<Rect> > &_boxes,
vector< vector<string> > &_words, vector< vector<float> > &_confidences,
vector< OCRTesseract* > &_ocrs)
: detections(_detections), outputs(_outputs), boxes(_boxes), words(_words),
confidences(_confidences), ocrs(_ocrs)
{}
virtual void operator()( const cv::Range &r ) const
{
for (int c=r.start; c < r.end; c++)
{
ocrs[c%ocrs.size()]->run(detections[c], outputs[c], &boxes[c], &words[c], &confidences[c], OCR_LEVEL_WORD);
}
}
Parallel_OCR & operator=(const Parallel_OCR &a);
};
//Discard wrongly recognised strings //Discard wrongly recognised strings
bool isRepetitive(const string& s); bool isRepetitive(const string& s);
...@@ -87,10 +116,16 @@ int main(int argc, char* argv[]) ...@@ -87,10 +116,16 @@ int main(int argc, char* argv[])
er_filters2.push_back(er_filter2); er_filters2.push_back(er_filter2);
} }
//Initialize OCR engine
//double t_r = getTickCount(); //double t_r = getTickCount();
OCRTesseract *ocr_tess = new OCRTesseract(); //Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
int num_ocrs = 10;
vector<OCRTesseract*> ocrs;
for (int o=0; o<num_ocrs; o++)
{
OCRTesseract* ocr = new OCRTesseract();
ocrs.push_back(ocr);
}
//cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl; //cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
...@@ -182,7 +217,9 @@ int main(int argc, char* argv[]) ...@@ -182,7 +217,9 @@ int main(int argc, char* argv[])
float scale_img = (float)(600.f/frame.rows); float scale_img = (float)(600.f/frame.rows);
float scale_font = (float)(2-scale_img)/1.4f; float scale_font = (float)(2-scale_img)/1.4f;
vector<string> words_detection; vector<string> words_detection;
string output; float min_confidence1 = 51.f, min_confidence2 = 60.f;
vector<Mat> detections;
//t_r = getTickCount(); //t_r = getTickCount();
...@@ -195,42 +232,51 @@ int main(int argc, char* argv[]) ...@@ -195,42 +232,51 @@ int main(int argc, char* argv[])
er_draw(channels, regions, nm_region_groups[i], group_img); er_draw(channels, regions, nm_region_groups[i], group_img);
group_img(nm_boxes[i]).copyTo(group_img); group_img(nm_boxes[i]).copyTo(group_img);
copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0)); copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0));
detections.push_back(group_img);
}
vector<string> outputs((int)detections.size());
vector< vector<Rect> > boxes((int)detections.size());
vector< vector<string> > words((int)detections.size());
vector< vector<float> > confidences((int)detections.size());
// parallel process detections in batches of ocrs.size()
for (int i=0; i<(int)detections.size(); i=i+(int)ocrs.size())
{
Range r;
if (i+(int)ocrs.size() <= (int)detections.size())
r = Range(i,i+(int)ocrs.size());
else
r = Range(i,(int)detections.size());
vector<Rect> boxes; parallel_for_(r, Parallel_OCR(detections, outputs, boxes, words, confidences, ocrs));
vector<string> words; }
vector<float> confidences;
float min_confidence1 = 0.f, min_confidence2 = 0.f; for (int i=0; i<(int)detections.size(); i++)
{
if (RECOGNITION == 0)
{
ocr_tess->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
min_confidence1 = 51.f;
min_confidence2 = 60.f;
}
output.erase(remove(output.begin(), output.end(), '\n'), output.end()); outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end());
//cout << "OCR output = \"" << output << "\" lenght = " << output.size() << endl; //cout << "OCR output = \"" << output << "\" lenght = " << output.size() << endl;
if (output.size() < 3) if (outputs[i].size() < 3)
continue; continue;
for (int j=0; j<(int)boxes.size(); j++) for (int j=0; j<(int)boxes[i].size(); j++)
{ {
boxes[j].x += nm_boxes[i].x-15; boxes[i][j].x += nm_boxes[i].x-15;
boxes[j].y += nm_boxes[i].y-15; boxes[i][j].y += nm_boxes[i].y-15;
//cout << " word = " << words[j] << "\t confidence = " << confidences[j] << endl; //cout << " word = " << words[j] << "\t confidence = " << confidences[j] << endl;
if ((words[j].size() < 2) || (confidences[j] < min_confidence1) || if ((words[i][j].size() < 2) || (confidences[i][j] < min_confidence1) ||
((words[j].size()==2) && (words[j][0] == words[j][1])) || ((words[i][j].size()==2) && (words[i][j][0] == words[i][j][1])) ||
((words[j].size()< 4) && (confidences[j] < min_confidence2)) || ((words[i][j].size()< 4) && (confidences[i][j] < min_confidence2)) ||
isRepetitive(words[j])) isRepetitive(words[i][j]))
continue; continue;
words_detection.push_back(words[j]); words_detection.push_back(words[i][j]);
rectangle(out_img, boxes[j].tl(), boxes[j].br(), Scalar(255,0,255),3); rectangle(out_img, boxes[i][j].tl(), boxes[i][j].br(), Scalar(255,0,255),3);
Size word_size = getTextSize(words[j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL); Size word_size = getTextSize(words[i][j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL);
rectangle(out_img, boxes[j].tl()-Point(3,word_size.height+3), boxes[j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1); rectangle(out_img, boxes[i][j].tl()-Point(3,word_size.height+3), boxes[i][j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
putText(out_img, words[j], boxes[j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font)); putText(out_img, words[i][j], boxes[i][j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
} }
} }
......
...@@ -62,6 +62,7 @@ namespace text ...@@ -62,6 +62,7 @@ namespace text
using namespace cv::ml; using namespace cv::ml;
using namespace std; using namespace std;
using namespace cv::ml;
// Deletes a tree of ERStat regions starting at root. Used only // Deletes a tree of ERStat regions starting at root. Used only
// internally to this implementation. // internally to this implementation.
...@@ -1017,7 +1018,14 @@ ERClassifierNM1::ERClassifierNM1(const string& filename) ...@@ -1017,7 +1018,14 @@ ERClassifierNM1::ERClassifierNM1(const string& filename)
{ {
if (ifstream(filename.c_str())) if (ifstream(filename.c_str()))
{
boost = StatModel::load<Boost>( filename.c_str() ); boost = StatModel::load<Boost>( filename.c_str() );
if( boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else else
CV_Error(Error::StsBadArg, "Default classifier file not found!"); CV_Error(Error::StsBadArg, "Default classifier file not found!");
} }
...@@ -1025,14 +1033,12 @@ ERClassifierNM1::ERClassifierNM1(const string& filename) ...@@ -1025,14 +1033,12 @@ ERClassifierNM1::ERClassifierNM1(const string& filename)
double ERClassifierNM1::eval(const ERStat& stat) double ERClassifierNM1::eval(const ERStat& stat)
{ {
//Classify //Classify
float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio Mat sample = (Mat_<float>(1,4) << (float)(stat.rect.width)/(stat.rect.height), // aspect ratio
sqrt((float)(stat.area))/stat.perimeter, // compactness sqrt((float)(stat.area))/stat.perimeter, // compactness
(float)(1-stat.euler), //number of holes (float)(1-stat.euler), //number of holes
stat.med_crossings}; stat.med_crossings);
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float votes = boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT ); float votes = boost->predict( sample, noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
// Logistic Correction returns a probability value (in the range(0,1)) // Logistic Correction returns a probability value (in the range(0,1))
return (double)1-(double)1/(1+exp(-2*votes)); return (double)1-(double)1/(1+exp(-2*votes));
...@@ -1043,7 +1049,14 @@ double ERClassifierNM1::eval(const ERStat& stat) ...@@ -1043,7 +1049,14 @@ double ERClassifierNM1::eval(const ERStat& stat)
ERClassifierNM2::ERClassifierNM2(const string& filename) ERClassifierNM2::ERClassifierNM2(const string& filename)
{ {
if (ifstream(filename.c_str())) if (ifstream(filename.c_str()))
{
boost = StatModel::load<Boost>( filename.c_str() ); boost = StatModel::load<Boost>( filename.c_str() );
if( boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else else
CV_Error(Error::StsBadArg, "Default classifier file not found!"); CV_Error(Error::StsBadArg, "Default classifier file not found!");
} }
...@@ -1051,15 +1064,13 @@ ERClassifierNM2::ERClassifierNM2(const string& filename) ...@@ -1051,15 +1064,13 @@ ERClassifierNM2::ERClassifierNM2(const string& filename)
double ERClassifierNM2::eval(const ERStat& stat) double ERClassifierNM2::eval(const ERStat& stat)
{ {
//Classify //Classify
float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio Mat sample = (Mat_<float>(1,7) << (float)(stat.rect.width)/(stat.rect.height), // aspect ratio
sqrt((float)(stat.area))/stat.perimeter, // compactness sqrt((float)(stat.area))/stat.perimeter, // compactness
(float)(1-stat.euler), //number of holes (float)(1-stat.euler), //number of holes
stat.med_crossings, stat.hole_area_ratio, stat.med_crossings, stat.hole_area_ratio,
stat.convex_hull_ratio, stat.num_inflexion_points}; stat.convex_hull_ratio, stat.num_inflexion_points);
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) ); float votes = boost->predict( sample, noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
float votes = boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
// Logistic Correction returns a probability value (in the range(0,1)) // Logistic Correction returns a probability value (in the range(0,1))
return (double)1-(double)1/(1+exp(-2*votes)); return (double)1-(double)1/(1+exp(-2*votes));
...@@ -2231,7 +2242,14 @@ MaxMeaningfulClustering::MaxMeaningfulClustering(unsigned char _method, unsigned ...@@ -2231,7 +2242,14 @@ MaxMeaningfulClustering::MaxMeaningfulClustering(unsigned char _method, unsigned
minProbability = _minProbability; minProbability = _minProbability;
if (ifstream(filename.c_str())) if (ifstream(filename.c_str()))
group_boost = StatModel::load<Boost>(filename.c_str()); {
group_boost = StatModel::load<Boost>( filename.c_str() );
if( group_boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else else
CV_Error(Error::StsBadArg, "erGrouping: Default classifier file not found!"); CV_Error(Error::StsBadArg, "erGrouping: Default classifier file not found!");
} }
...@@ -2542,7 +2560,6 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster) ...@@ -2542,7 +2560,6 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster)
return 0.; return 0.;
vector<float> sample; vector<float> sample;
sample.push_back(0);
sample.push_back((float)cluster.size()); sample.push_back((float)cluster.size());
Mat diameters ( (int)cluster.size(), 1, CV_32F, 1 ); Mat diameters ( (int)cluster.size(), 1, CV_32F, 1 );
...@@ -2724,7 +2741,7 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster) ...@@ -2724,7 +2741,7 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster)
sample.push_back((float)mean[0]); sample.push_back((float)mean[0]);
sample.push_back((float)std[0]); sample.push_back((float)std[0]);
float votes_group = group_boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT ); float votes_group = group_boost->predict( Mat(sample), noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
return (double)1-(double)1/(1+exp(-2*votes_group)); return (double)1-(double)1/(1+exp(-2*votes_group));
} }
...@@ -3022,7 +3039,7 @@ static void erGroupingGK(InputArray _image, InputArrayOfArrays _src, vector<vect ...@@ -3022,7 +3039,7 @@ static void erGroupingGK(InputArray _image, InputArrayOfArrays _src, vector<vect
// assert correct image type // assert correct image type
CV_Assert( channel.type() == CV_8UC1 ); CV_Assert( channel.type() == CV_8UC1 );
CV_Assert( !regions.at(c).empty() ); //CV_Assert( !regions.at(c).empty() );
if ( regions.at(c).size() < 3 ) if ( regions.at(c).size() < 3 )
continue; continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment