Commit 3253fe9f authored by Vladislav Sovrasov's avatar Vladislav Sovrasov

text: impovements in samples and module interface

parent 1306621f
......@@ -32,3 +32,13 @@
volume = {abs/1407.7504},
year = {2014},
}
@inproceedings{LiaoSBWL17,
author = {Minghui Liao and
Baoguang Shi and
Xiang Bai and
Xinggang Wang and
Wenyu Liu},
title = {TextBoxes: {A} Fast Text Detector with a Single Deep Neural Network},
booktitle = {AAAI},
year = {2017}
}
......@@ -27,12 +27,16 @@ public:
@param Bbox a vector of Rect that will store the detected word bounding box
@param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
*/
virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
virtual ~TextDetector() {}
};
/** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
* A TextDetectorCNN is employed to find bounding boxes of text words given an input image.
This class is representing to find bounding boxes of text words given an input image.
This class uses OpenCV dnn module to load pre-trained model described in @cite LiaoSBWL17.
The original repository with the modified SSD Caffe version: https://github.com/MhLiao/TextBoxes.
Model can be downloaded from [DropBox](https://www.dropbox.com/s/g8pjzv2de9gty8g/TextBoxes_icdar13.caffemodel?dl=0).
Modified .prototxt file with the model description can be found in `opencv_contrib/modules/text/samples/textbox.prototxt`.
*/
class CV_EXPORTS_W TextDetectorCNN : public TextDetector
{
......@@ -44,9 +48,9 @@ public:
@param Bbox a vector of Rect that will store the detected word bounding box
@param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
*/
CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
/** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
/** @brief Creates an instance of the TextDetectorCNN class using the provided parameters.
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
......
......@@ -14,14 +14,14 @@ def main():
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
quit()
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
if not os.path.isfile('TextBoxes_icdar13.caffemodel') or not os.path.isfile('textbox.prototxt'):
print " Model files not found in current directory. Aborting"
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
print " See the documentation of text::TextDetectorCNN class to get download links."
quit()
img = cv2.imread(str(sys.argv[1]))
textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
rects, outProbs = textSpotter.textDetectInImage(img);
textSpotter = cv2.text.TextDetectorCNN_create("textbox.prototxt", "TextBoxes_icdar13.caffemodel")
rects, outProbs = textSpotter.detect(img);
vis = img.copy()
thres = 0.6
......
......@@ -885,6 +885,7 @@ layer {
variance: 0.1
variance: 0.2
variance: 0.2
additional_y_offset: true
}
}
layer {
......@@ -1009,6 +1010,7 @@ layer {
variance: 0.1
variance: 0.2
variance: 0.2
additional_y_offset: true
}
}
layer {
......@@ -1133,6 +1135,7 @@ layer {
variance: 0.1
variance: 0.2
variance: 0.2
additional_y_offset: true
}
}
layer {
......@@ -1257,6 +1260,7 @@ layer {
variance: 0.1
variance: 0.2
variance: 0.2
additional_y_offset: true
}
}
layer {
......@@ -1381,6 +1385,7 @@ layer {
variance: 0.1
variance: 0.2
variance: 0.2
additional_y_offset: true
}
}
layer {
......@@ -1505,6 +1510,7 @@ layer {
variance: 0.1
variance: 0.2
variance: 0.2
additional_y_offset: true
}
}
layer {
......
......@@ -10,15 +10,14 @@ using namespace cv;
namespace
{
std::string getHelpStr(std::string progFname)
std::string getHelpStr(const std::string& progFname)
{
std::stringstream out;
out << " Demo of text detection CNN for text detection." << std::endl
<< " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
<< " Usage: " << progFname << " <output_file> <input_image>" << std::endl
<< " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
<< " must be in the current directory." << std::endl
<< " These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
<< " Caffe Model files (textbox.prototxt, TextBoxes_icdar13.caffemodel)"<<std::endl
<< " must be in the current directory. See the documentation of text::TextDetectorCNN class to get download links." << std::endl;
return out.str();
}
......@@ -58,8 +57,10 @@ int main(int argc, const char * argv[])
exit(1);
}
if (!fileExists("textbox.caffemodel") ||
!fileExists("textbox_deploy.prototxt"))
const std::string modelArch = "textbox.prototxt";
const std::string moddelWeights = "TextBoxes_icdar13.caffemodel";
if (!fileExists(modelArch) || !fileExists(moddelWeights))
{
std::cout<<getHelpStr(argv[0]);
std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
......@@ -70,11 +71,11 @@ int main(int argc, const char * argv[])
std::cout << "Starting Text Box Demo" << std::endl;
Ptr<text::TextDetectorCNN> textSpotter =
text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
text::TextDetectorCNN::create(modelArch, moddelWeights, false);
std::vector<Rect> bbox;
std::vector<float> outProbabillities;
textSpotter->textDetectInImage(image, bbox, outProbabillities);
textSpotter->detect(image, bbox, outProbabillities);
textbox_draw(image, bbox, outProbabillities, 0.5f);
......
......@@ -72,7 +72,7 @@ public:
}
}
void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
{
CV_Assert(inputImage_.channels() == inputChannelCount_);
Mat inputImage = inputImage_.getMat().clone();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment