Merge pull request #59 from lluisgomez/master

update to new OpenCV 3.0 ml API and adds parallell_for_ OCR

Merge pull request #59 from lluisgomez/master
update to new OpenCV 3.0 ml API and adds parallell_for_ OCR
ffcda05c · Vadim Pisarevsky · 62eda671 · 820f2e08 · ffcda05c · ffcda05c
Commit ffcda05c authored Aug 13, 2014 by Vadim Pisarevsky
5 changed files
--- a/modules/text/samples/trained_classifierNM1.xml
+++ b/modules/text/samples/trained_classifierNM1.xml
--- a/modules/text/samples/trained_classifierNM2.xml
+++ b/modules/text/samples/trained_classifierNM2.xml
--- a/modules/text/samples/trained_classifier_erGrouping.xml
+++ b/modules/text/samples/trained_classifier_erGrouping.xml
--- a/modules/text/samples/webcam_demo.cpp
+++ b/modules/text/samples/webcam_demo.cpp
@@ -45,6 +45,35 @@ public:
    Parallel_extractCSER & operator=(const Parallel_extractCSER &a);
 };
+//OCR recognition is done in parallel for different detections
+class Parallel_OCR: public cv::ParallelLoopBody
+{
+private:
+    vector<Mat> &detections;
+    vector<string> &outputs;
+    vector< vector<Rect> > &boxes;
+    vector< vector<string> > &words;
+    vector< vector<float> > &confidences;
+    vector< OCRTesseract* > &ocrs;
+public:
+    Parallel_OCR(vector<Mat> &_detections, vector<string> &_outputs, vector< vector<Rect> > &_boxes,
+                 vector< vector<string> > &_words, vector< vector<float> > &_confidences, 
+                 vector< OCRTesseract* > &_ocrs)
+        : detections(_detections), outputs(_outputs), boxes(_boxes), words(_words), 
+          confidences(_confidences), ocrs(_ocrs)
+    {}
+    virtual void operator()( const cv::Range &r ) const
+    {
+        for (int c=r.start; c < r.end; c++)
+        {
+            ocrs[c%ocrs.size()]->run(detections[c], outputs[c], &boxes[c], &words[c], &confidences[c], OCR_LEVEL_WORD);
+        }
+    }
+    Parallel_OCR & operator=(const Parallel_OCR &a);
+};
 //Discard wrongly recognised strings
 bool   isRepetitive(const string& s);
@@ -87,10 +116,16 @@ int main(int argc, char* argv[])
        er_filters2.push_back(er_filter2);
    }
-    //Initialize OCR engine
    //double t_r = getTickCount();
-    OCRTesseract *ocr_tess = new OCRTesseract();
+    //Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
+    int num_ocrs = 10;
+    vector<OCRTesseract*> ocrs;
+    for (int o=0; o<num_ocrs; o++)
+    {
+      OCRTesseract* ocr = new OCRTesseract();
+      ocrs.push_back(ocr);
+    }
    //cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
@@ -182,7 +217,9 @@ int main(int argc, char* argv[])
        float scale_img  = (float)(600.f/frame.rows);
        float scale_font = (float)(2-scale_img)/1.4f;
        vector<string> words_detection;
-        string output;
+        float min_confidence1 = 51.f, min_confidence2 = 60.f;
+        vector<Mat> detections;
        //t_r = getTickCount();
@@ -195,42 +232,51 @@ int main(int argc, char* argv[])
            er_draw(channels, regions, nm_region_groups[i], group_img);
            group_img(nm_boxes[i]).copyTo(group_img);
            copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0));
+            detections.push_back(group_img);
+        }
+        vector<string> outputs((int)detections.size());
+        vector< vector<Rect> > boxes((int)detections.size());
+        vector< vector<string> > words((int)detections.size());
+        vector< vector<float> > confidences((int)detections.size());
+        // parallel process detections in batches of ocrs.size()
+        for (int i=0; i<(int)detections.size(); i=i+(int)ocrs.size()) 
+        {
+          Range r;
+          if (i+(int)ocrs.size() <= (int)detections.size())
+            r = Range(i,i+(int)ocrs.size());
+          else
+            r = Range(i,(int)detections.size());
-            vector<Rect>   boxes;
+          parallel_for_(r, Parallel_OCR(detections, outputs, boxes, words, confidences, ocrs));
-            vector<string> words;
+        }
-            vector<float>  confidences;
-            float min_confidence1 = 0.f, min_confidence2 = 0.f;
+        for (int i=0; i<(int)detections.size(); i++)
+        {
-            if (RECOGNITION == 0)
-            {
-                ocr_tess->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
-                min_confidence1 = 51.f;
-                min_confidence2 = 60.f;
-            }
-            output.erase(remove(output.begin(), output.end(), '\n'), output.end());
+            outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end());
            //cout << "OCR output = \"" << output << "\" lenght = " << output.size() << endl;
-            if (output.size() < 3)
+            if (outputs[i].size() < 3)
                continue;
-            for (int j=0; j<(int)boxes.size(); j++)
+            for (int j=0; j<(int)boxes[i].size(); j++)
            {
-                boxes[j].x += nm_boxes[i].x-15;
+                boxes[i][j].x += nm_boxes[i].x-15;
-                boxes[j].y += nm_boxes[i].y-15;
+                boxes[i][j].y += nm_boxes[i].y-15;
                //cout << "  word = " << words[j] << "\t confidence = " << confidences[j] << endl;
-                if ((words[j].size() < 2) || (confidences[j] < min_confidence1) ||
+                if ((words[i][j].size() < 2) || (confidences[i][j] < min_confidence1) ||
-                        ((words[j].size()==2) && (words[j][0] == words[j][1])) ||
+                        ((words[i][j].size()==2) && (words[i][j][0] == words[i][j][1])) ||
-                        ((words[j].size()< 4) && (confidences[j] < min_confidence2)) ||
+                        ((words[i][j].size()< 4) && (confidences[i][j] < min_confidence2)) ||
-                        isRepetitive(words[j]))
+                        isRepetitive(words[i][j]))
                    continue;
-                words_detection.push_back(words[j]);
+                words_detection.push_back(words[i][j]);
-                rectangle(out_img, boxes[j].tl(), boxes[j].br(), Scalar(255,0,255),3);
+                rectangle(out_img, boxes[i][j].tl(), boxes[i][j].br(), Scalar(255,0,255),3);
-                Size word_size = getTextSize(words[j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL);
+                Size word_size = getTextSize(words[i][j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL);
-                rectangle(out_img, boxes[j].tl()-Point(3,word_size.height+3), boxes[j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
+                rectangle(out_img, boxes[i][j].tl()-Point(3,word_size.height+3), boxes[i][j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
-                putText(out_img, words[j], boxes[j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
+                putText(out_img, words[i][j], boxes[i][j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
            }
        }

--- a/modules/text/src/erfilter.cpp
+++ b/modules/text/src/erfilter.cpp
@@ -62,6 +62,7 @@ namespace text
 using namespace cv::ml;
 using namespace std;
+using namespace cv::ml;
 // Deletes a tree of ERStat regions starting at root. Used only
 // internally to this implementation.
@@ -1017,7 +1018,14 @@ ERClassifierNM1::ERClassifierNM1(const string& filename)
 {
    if (ifstream(filename.c_str()))
+    {
        boost = StatModel::load<Boost>( filename.c_str() );
+        if( boost.empty() )
+        {
+            cout << "Could not read the classifier " << filename.c_str() << endl;
+            CV_Error(Error::StsBadArg, "Could not read the default classifier!");
+        }
+    }
    else
        CV_Error(Error::StsBadArg, "Default classifier file not found!");
 }
@@ -1025,14 +1033,12 @@ ERClassifierNM1::ERClassifierNM1(const string& filename)
 double ERClassifierNM1::eval(const ERStat& stat)
 {
    //Classify
-    float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio
+    Mat sample = (Mat_<float>(1,4) <<  (float)(stat.rect.width)/(stat.rect.height), // aspect ratio
                     sqrt((float)(stat.area))/stat.perimeter, // compactness
                     (float)(1-stat.euler), //number of holes
-                     stat.med_crossings};
+                     stat.med_crossings);
-    vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
-    float votes = boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
+    float votes = boost->predict( sample, noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
    // Logistic Correction returns a probability value (in the range(0,1))
    return (double)1-(double)1/(1+exp(-2*votes));
@@ -1043,7 +1049,14 @@ double ERClassifierNM1::eval(const ERStat& stat)
 ERClassifierNM2::ERClassifierNM2(const string& filename)
 {
    if (ifstream(filename.c_str()))
+    {
        boost = StatModel::load<Boost>( filename.c_str() );
+        if( boost.empty() )
+        {
+            cout << "Could not read the classifier " << filename.c_str() << endl;
+            CV_Error(Error::StsBadArg, "Could not read the default classifier!");
+        }
+    }
    else
        CV_Error(Error::StsBadArg, "Default classifier file not found!");
 }
@@ -1051,15 +1064,13 @@ ERClassifierNM2::ERClassifierNM2(const string& filename)
 double ERClassifierNM2::eval(const ERStat& stat)
 {
    //Classify
-    float arr[] = {0,(float)(stat.rect.width)/(stat.rect.height), // aspect ratio
+    Mat sample = (Mat_<float>(1,7) << (float)(stat.rect.width)/(stat.rect.height), // aspect ratio
                     sqrt((float)(stat.area))/stat.perimeter, // compactness
                     (float)(1-stat.euler), //number of holes
                     stat.med_crossings, stat.hole_area_ratio,
-                     stat.convex_hull_ratio, stat.num_inflexion_points};
+                     stat.convex_hull_ratio, stat.num_inflexion_points);
-    vector<float> sample (arr, arr + sizeof(arr) / sizeof(arr[0]) );
+    float votes = boost->predict( sample, noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
-    float votes = boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
    // Logistic Correction returns a probability value (in the range(0,1))
    return (double)1-(double)1/(1+exp(-2*votes));
@@ -2231,7 +2242,14 @@ MaxMeaningfulClustering::MaxMeaningfulClustering(unsigned char _method, unsigned
    minProbability = _minProbability;
    if (ifstream(filename.c_str()))
-        group_boost = StatModel::load<Boost>(filename.c_str());
+    {
+        group_boost = StatModel::load<Boost>( filename.c_str() );
+        if( group_boost.empty() )
+        {
+            cout << "Could not read the classifier " << filename.c_str() << endl;
+            CV_Error(Error::StsBadArg, "Could not read the default classifier!");
+        }
+    }
    else
        CV_Error(Error::StsBadArg, "erGrouping: Default classifier file not found!");
 }
@@ -2542,7 +2560,6 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster)
        return 0.;
    vector<float> sample;
-    sample.push_back(0);
    sample.push_back((float)cluster.size());
    Mat diameters      ( (int)cluster.size(), 1, CV_32F, 1 );
@@ -2724,7 +2741,7 @@ double MaxMeaningfulClustering::probability(vector<int> &cluster)
    sample.push_back((float)mean[0]);
    sample.push_back((float)std[0]);
-    float votes_group = group_boost->predict( Mat(sample), noArray(), StatModel::RAW_OUTPUT );
+    float votes_group = group_boost->predict( Mat(sample), noArray(), DTrees::PREDICT_SUM | StatModel::RAW_OUTPUT);
    return (double)1-(double)1/(1+exp(-2*votes_group));
 }
@@ -3022,7 +3039,7 @@ static void erGroupingGK(InputArray _image, InputArrayOfArrays _src, vector<vect
        // assert correct image type
        CV_Assert( channel.type() == CV_8UC1 );
-        CV_Assert( !regions.at(c).empty() );
+        //CV_Assert( !regions.at(c).empty() );
        if ( regions.at(c).size() < 3 )
            continue;