Commit ffcda05c authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #59 from lluisgomez/master

update to new OpenCV 3.0 ml API and adds parallell_for_ OCR
parents 62eda671 820f2e08
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -45,6 +45,35 @@ public:
Parallel_extractCSER & operator=(const Parallel_extractCSER &a);
};
//OCR recognition is done in parallel for different detections
class Parallel_OCR: public cv::ParallelLoopBody
{
private:
vector<Mat> &detections;
vector<string> &outputs;
vector< vector<Rect> > &boxes;
vector< vector<string> > &words;
vector< vector<float> > &confidences;
vector< OCRTesseract* > &ocrs;
public:
Parallel_OCR(vector<Mat> &_detections, vector<string> &_outputs, vector< vector<Rect> > &_boxes,
vector< vector<string> > &_words, vector< vector<float> > &_confidences,
vector< OCRTesseract* > &_ocrs)
: detections(_detections), outputs(_outputs), boxes(_boxes), words(_words),
confidences(_confidences), ocrs(_ocrs)
{}
virtual void operator()( const cv::Range &r ) const
{
for (int c=r.start; c < r.end; c++)
{
ocrs[c%ocrs.size()]->run(detections[c], outputs[c], &boxes[c], &words[c], &confidences[c], OCR_LEVEL_WORD);
}
}
Parallel_OCR & operator=(const Parallel_OCR &a);
};
//Discard wrongly recognised strings
bool isRepetitive(const string& s);
......@@ -87,10 +116,16 @@ int main(int argc, char* argv[])
er_filters2.push_back(er_filter2);
}
//Initialize OCR engine
//double t_r = getTickCount();
OCRTesseract *ocr_tess = new OCRTesseract();
//Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
int num_ocrs = 10;
vector<OCRTesseract*> ocrs;
for (int o=0; o<num_ocrs; o++)
{
OCRTesseract* ocr = new OCRTesseract();
ocrs.push_back(ocr);
}
//cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
......@@ -182,7 +217,9 @@ int main(int argc, char* argv[])
float scale_img = (float)(600.f/frame.rows);
float scale_font = (float)(2-scale_img)/1.4f;
vector<string> words_detection;
string output;
float min_confidence1 = 51.f, min_confidence2 = 60.f;
vector<Mat> detections;
//t_r = getTickCount();
......@@ -195,42 +232,51 @@ int main(int argc, char* argv[])
er_draw(channels, regions, nm_region_groups[i], group_img);
group_img(nm_boxes[i]).copyTo(group_img);
copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0));
detections.push_back(group_img);
}
vector<string> outputs((int)detections.size());
vector< vector<Rect> > boxes((int)detections.size());
vector< vector<string> > words((int)detections.size());
vector< vector<float> > confidences((int)detections.size());
// parallel process detections in batches of ocrs.size()
for (int i=0; i<(int)detections.size(); i=i+(int)ocrs.size())
{
Range r;
if (i+(int)ocrs.size() <= (int)detections.size())
r = Range(i,i+(int)ocrs.size());
else
r = Range(i,(int)detections.size());
vector<Rect> boxes;
vector<string> words;
vector<float> confidences;
parallel_for_(r, Parallel_OCR(detections, outputs, boxes, words, confidences, ocrs));
}
float min_confidence1 = 0.f, min_confidence2 = 0.f;
for (int i=0; i<(int)detections.size(); i++)
{
if (RECOGNITION == 0)
{
ocr_tess->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
min_confidence1 = 51.f;
min_confidence2 = 60.f;
}
output.erase(remove(output.begin(), output.end(), '\n'), output.end());
outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end());
//cout << "OCR output = \"" << output << "\" lenght = " << output.size() << endl;
if (output.size() < 3)
if (outputs[i].size() < 3)
continue;
for (int j=0; j<(int)boxes.size(); j++)
for (int j=0; j<(int)boxes[i].size(); j++)
{
boxes[j].x += nm_boxes[i].x-15;
boxes[j].y += nm_boxes[i].y-15;
boxes[i][j].x += nm_boxes[i].x-15;
boxes[i][j].y += nm_boxes[i].y-15;
//cout << " word = " << words[j] << "\t confidence = " << confidences[j] << endl;
if ((words[j].size() < 2) || (confidences[j] < min_confidence1) ||
((words[j].size()==2) && (words[j][0] == words[j][1])) ||
((words[j].size()< 4) && (confidences[j] < min_confidence2)) ||
isRepetitive(words[j]))
if ((words[i][j].size() < 2) || (confidences[i][j] < min_confidence1) ||
((words[i][j].size()==2) && (words[i][j][0] == words[i][j][1])) ||
((words[i][j].size()< 4) && (confidences[i][j] < min_confidence2)) ||
isRepetitive(words[i][j]))
continue;
words_detection.push_back(words[j]);
rectangle(out_img, boxes[j].tl(), boxes[j].br(), Scalar(255,0,255),3);
Size word_size = getTextSize(words[j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL);
rectangle(out_img, boxes[j].tl()-Point(3,word_size.height+3), boxes[j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
putText(out_img, words[j], boxes[j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
words_detection.push_back(words[i][j]);
rectangle(out_img, boxes[i][j].tl(), boxes[i][j].br(), Scalar(255,0,255),3);
Size word_size = getTextSize(words[i][j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL);
rectangle(out_img, boxes[i][j].tl()-Point(3,word_size.height+3), boxes[i][j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
putText(out_img, words[i][j], boxes[i][j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
}
}
......
......@@ -62,6 +62,7 @@ namespace text
using namespace cv::ml;
using namespace std;
using namespace cv::ml;
// Deletes a tree of ERStat regions starting at root. Used only
// internally to this implementation.
......@@ -1017,7 +1018,14 @@ ERClassifierNM1::ERClassifierNM1(const string& filename)
{
if (ifstream(filename.c_str()))
{
boost = StatModel::load<Boost>( filename.c_str() );
if( boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else
CV_Error(Error::StsBadArg, "Default classifier file not found!");
}
......@@ -1025,14 +1033,12 @@ ERClassifierNM1::ERClassifierNM1(const string& filename)
double ERClassifierNM1::eval(const ERStat& stat)
{
//Classify
float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio
Mat sample = (Mat_<float>(1,4) << (float)(stat.rect.width)/(stat.rect.height), // aspect ratio
sqrt((float)(stat.area))/stat.perimeter, // compactness
(float)(1-stat.euler), //number of holes
stat.med_crossings};
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
stat.med_crossings);
float votes = boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
float votes = boost->predict( sample, noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
// Logistic Correction returns a probability value (in the range(0,1))
return (double)1-(double)1/(1+exp(-2*votes));
......@@ -1043,7 +1049,14 @@ double ERClassifierNM1::eval(const ERStat& stat)
ERClassifierNM2::ERClassifierNM2(const string& filename)
{
if (ifstream(filename.c_str()))
{
boost = StatModel::load<Boost>( filename.c_str() );
if( boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else
CV_Error(Error::StsBadArg, "Default classifier file not found!");
}
......@@ -1051,15 +1064,13 @@ ERClassifierNM2::ERClassifierNM2(const string& filename)
double ERClassifierNM2::eval(const ERStat& stat)
{
//Classify
float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio
Mat sample = (Mat_<float>(1,7) << (float)(stat.rect.width)/(stat.rect.height), // aspect ratio
sqrt((float)(stat.area))/stat.perimeter, // compactness
(float)(1-stat.euler), //number of holes
stat.med_crossings, stat.hole_area_ratio,
stat.convex_hull_ratio, stat.num_inflexion_points};
stat.convex_hull_ratio, stat.num_inflexion_points);
vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
float votes = boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
float votes = boost->predict( sample, noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
// Logistic Correction returns a probability value (in the range(0,1))
return (double)1-(double)1/(1+exp(-2*votes));
......@@ -2231,7 +2242,14 @@ MaxMeaningfulClustering::MaxMeaningfulClustering(unsigned char _method, unsigned
minProbability = _minProbability;
if (ifstream(filename.c_str()))
group_boost = StatModel::load<Boost>(filename.c_str());
{
group_boost = StatModel::load<Boost>( filename.c_str() );
if( group_boost.empty() )
{
cout << "Could not read the classifier " << filename.c_str() << endl;
CV_Error(Error::StsBadArg, "Could not read the default classifier!");
}
}
else
CV_Error(Error::StsBadArg, "erGrouping: Default classifier file not found!");
}
......@@ -2542,7 +2560,6 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster)
return 0.;
vector<float> sample;
sample.push_back(0);
sample.push_back((float)cluster.size());
Mat diameters ( (int)cluster.size(), 1, CV_32F, 1 );
......@@ -2724,7 +2741,7 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster)
sample.push_back((float)mean[0]);
sample.push_back((float)std[0]);
float votes_group = group_boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
float votes_group = group_boost->predict( Mat(sample), noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
return (double)1-(double)1/(1+exp(-2*votes_group));
}
......@@ -3022,7 +3039,7 @@ static void erGroupingGK(InputArray _image, InputArrayOfArrays _src, vector<vect
// assert correct image type
CV_Assert( channel.type() == CV_8UC1 );
CV_Assert( !regions.at(c).empty() );
//CV_Assert( !regions.at(c).empty() );
if ( regions.at(c).size() < 3 )
continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment