text: impovements in samples and module interface

3253fe9f · Vladislav Sovrasov · 1306621f · 3253fe9f · 3253fe9f · 3253fe9f
Commit 3253fe9f authored Oct 10, 2017 by Vladislav Sovrasov
6 changed files
--- a/modules/text/doc/text.bib
+++ b/modules/text/doc/text.bib
@@ -31,4 +31,14 @@
  journal   = {CoRR},
  volume    = {abs/1407.7504},
  year      = {2014},
 }
\ No newline at end of file
+@inproceedings{LiaoSBWL17,
+  author    = {Minghui Liao and
+               Baoguang Shi and
+               Xiang Bai and
+               Xinggang Wang and
+               Wenyu Liu},
+  title     = {TextBoxes: {A} Fast Text Detector with a Single Deep Neural Network},
+  booktitle = {AAAI},
+  year      = {2017}
+}
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -27,12 +27,16 @@ public:
    @param Bbox a vector of Rect that will store the detected word bounding box
    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
    */
-    virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
    virtual ~TextDetector() {}
 };
 /** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
- * A TextDetectorCNN is employed to find bounding boxes of text words given an input image.
+ This class is representing to find bounding boxes of text words given an input image.
+ This class uses OpenCV dnn module to load pre-trained model described in @cite LiaoSBWL17.
+ The original repository with the modified SSD Caffe version: https://github.com/MhLiao/TextBoxes.
+ Model can be downloaded from [DropBox](https://www.dropbox.com/s/g8pjzv2de9gty8g/TextBoxes_icdar13.caffemodel?dl=0).
+ Modified .prototxt file with the model description can be found in `opencv_contrib/modules/text/samples/textbox.prototxt`.
 */
 class CV_EXPORTS_W TextDetectorCNN : public TextDetector
 {
@@ -44,9 +48,9 @@ public:
    @param Bbox a vector of Rect that will store the detected word bounding box
    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
    */
-    CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
-    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
+    /** @brief Creates an instance of the TextDetectorCNN class using the provided parameters.
    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.

--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@@ -14,14 +14,14 @@ def main():
        print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
        quit()
-    if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
+    if not os.path.isfile('TextBoxes_icdar13.caffemodel') or not os.path.isfile('textbox.prototxt'):
        print " Model files not found in current directory. Aborting"
-        print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
+        print " See the documentation of text::TextDetectorCNN class to get download links."
        quit()
    img = cv2.imread(str(sys.argv[1]))
-    textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
+    textSpotter = cv2.text.TextDetectorCNN_create("textbox.prototxt", "TextBoxes_icdar13.caffemodel")
-    rects, outProbs = textSpotter.textDetectInImage(img);
+    rects, outProbs = textSpotter.detect(img);
    vis = img.copy()
    thres = 0.6

--- a/modules/text/samples/textbox.prototxt
+++ b/modules/text/samples/textbox.prototxt
@@ -885,6 +885,7 @@ layer {
    variance: 0.1
    variance: 0.2
    variance: 0.2
+    additional_y_offset: true
  }
 }
 layer {
@@ -1009,6 +1010,7 @@ layer {
    variance: 0.1
    variance: 0.2
    variance: 0.2
+    additional_y_offset: true
  }
 }
 layer {
@@ -1133,6 +1135,7 @@ layer {
    variance: 0.1
    variance: 0.2
    variance: 0.2
+    additional_y_offset: true
  }
 }
 layer {
@@ -1257,6 +1260,7 @@ layer {
    variance: 0.1
    variance: 0.2
    variance: 0.2
+    additional_y_offset: true
  }
 }
 layer {
@@ -1381,6 +1385,7 @@ layer {
    variance: 0.1
    variance: 0.2
    variance: 0.2
+    additional_y_offset: true
  }
 }
 layer {
@@ -1505,6 +1510,7 @@ layer {
    variance: 0.1
    variance: 0.2
    variance: 0.2
+    additional_y_offset: true
  }
 }
 layer {

--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -10,15 +10,14 @@ using namespace cv;
 namespace
 {
-std::string getHelpStr(std::string progFname)
+std::string getHelpStr(const std::string& progFname)
 {
    std::stringstream out;
    out << "    Demo of text detection CNN for text detection." << std::endl
        << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
        << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
-        << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
+        << "    Caffe Model files  (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<std::endl
-        << "      must be in the current directory." << std::endl
+        << "      must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << std::endl;
-        << "    These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
    return out.str();
 }
@@ -58,8 +57,10 @@ int main(int argc, const char * argv[])
        exit(1);
    }
-    if (!fileExists("textbox.caffemodel") ||
+    const std::string modelArch = "textbox.prototxt";
-            !fileExists("textbox_deploy.prototxt"))
+    const std::string moddelWeights = "TextBoxes_icdar13.caffemodel";
+    if (!fileExists(modelArch) || !fileExists(moddelWeights))
    {
        std::cout<<getHelpStr(argv[0]);
        std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
@@ -70,11 +71,11 @@ int main(int argc, const char * argv[])
    std::cout << "Starting Text Box Demo" << std::endl;
    Ptr<text::TextDetectorCNN> textSpotter =
-            text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
+            text::TextDetectorCNN::create(modelArch, moddelWeights, false);
    std::vector<Rect> bbox;
    std::vector<float> outProbabillities;
-    textSpotter->textDetectInImage(image, bbox, outProbabillities);
+    textSpotter->detect(image, bbox, outProbabillities);
    textbox_draw(image, bbox, outProbabillities, 0.5f);

--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -72,7 +72,7 @@ public:
        }
    }
-    void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
+    void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
    {
        CV_Assert(inputImage_.channels() == inputChannelCount_);
        Mat inputImage = inputImage_.getMat().clone();