Modified the class heirarchy

be395e59 · sghoshcvc · 2b8ed124 · be395e59 · be395e59 · be395e59
Commit be395e59 authored Jul 19, 2017 by sghoshcvc
5 changed files
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -716,10 +716,6 @@ public:
    /** @brief produces a class confidence row-vector given an image
     */
    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
-    /** @brief produces a list of bounding box given an image
-     */
-
-    CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;

    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
     */

--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -65,19 +65,131 @@ namespace text
 //detection scenario
 class CV_EXPORTS_W BaseDetector
 {
- public:
+public:
    virtual ~BaseDetector() {};

    virtual void run(Mat& image,
-                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;

    virtual void run(Mat& image, Mat& mask,
-                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;

+};
+/** A virtual class for different models of text detection (including CNN based deep models)
+ */
+
+class CV_EXPORTS_W TextRegionDetector
+{
+protected:
+    /** Stores input and output size
+     */
+    //netGeometry inputGeometry_;
+    //netGeometry outputGeometry_;
+    Size inputGeometry_;
+    Size outputGeometry_;
+    int inputChannelCount_;
+    int outputChannelCount_;
+
+public:
+    virtual ~TextRegionDetector() {}
+
+    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
+     */
+    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
+
+
+    /** @brief simple getter method returning the size (height, width) of the input sample
+     */
+    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
+
+    /** @brief simple getter method returning the shape of the oputput
+     *   Any text detector should output a number of text regions alongwith a score of text-ness
+     *   From the shape it can be inferred the number of text regions and number of returned value
+     *   for each region
+     */
+    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
+
+
+
+};
+
+/** Generic structure of Deep CNN based Text Detectors
+ * */
+class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
+{
+    /** @brief Class that uses a pretrained caffe model for text detection.
+     * Any text detection should
+     * This network is described in detail in:
+     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
+     * https://arxiv.org/abs/1611.06779
+     */
+protected:
+    /** all deep CNN based text detectors have a preprocessor (normally)
+         */
+    Ptr<ImagePreprocessor> preprocessor_;
+    /** @brief all image preprocessing is handled here including whitening etc.
+         *
+         *  @param input the image to be preprocessed for the classifier. If the depth
+         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
+         *
+         * @param output reference to the image to be fed to the classifier, the preprocessor will
+         * resize the image to the apropriate size and convert it to the apropriate depth\
+         *
+         * The method preprocess should never be used externally, it is up to classify and classifyBatch
+         * methods to employ it.
+         */
+    virtual void preprocess(const Mat& input,Mat& output);
+public:
+    virtual ~DeepCNNTextDetector() {};
+
+    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
+     *
+     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
+     *
+     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
+     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
+     *
+     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
+     * The architecture and models weights can be downloaded from:
+     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
+
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model. When employing
+     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    friend class ImagePreprocessor;
+
+};
+
+/** @brief textDetector class provides the functionallity of text bounding box detection.
+ * A TextRegionDetector is employed to find bounding boxes of text
+ * words given an input image.
+ *
+ * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
+ * The TextRegionDetector can be any text detector
+ *
+ */

 class CV_EXPORTS_W textDetector : public BaseDetector
 {
@@ -125,9 +237,9 @@ public:



-   /** @brief simple getter for the preprocessing functor
+    /** @brief simple getter for the preprocessing functor
     */
-    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;

    /** @brief Creates an instance of the textDetector class.

@@ -135,7 +247,7 @@ public:


     */
-    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);


    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.

--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -459,53 +459,53 @@ protected:
 #endif
    }

-    void process_(Mat inputImage, Mat &outputMat)
-    {
-        // do forward pass and stores the output in outputMat
-        //Process one image
-        CV_Assert(this->minibatchSz_==1);
-        //CV_Assert(outputMat.isContinuous());
+//    void process_(Mat inputImage, Mat &outputMat)
+//    {
+//        // do forward pass and stores the output in outputMat
+//        //Process one image
+//        CV_Assert(this->minibatchSz_==1);
+//        //CV_Assert(outputMat.isContinuous());

-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
+//#ifdef HAVE_CAFFE
+//        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+//        net_->Reshape();
+//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+//        float* inputData=inputBuffer;

-        std::vector<Mat> input_channels;
-        Mat preprocessed;
-        // if the image have multiple color channels the input layer should be populated accordingly
-        for (int channel=0;channel < this->channelCount_;channel++){
+//        std::vector<Mat> input_channels;
+//        Mat preprocessed;
+//        // if the image have multiple color channels the input layer should be populated accordingly
+//        for (int channel=0;channel < this->channelCount_;channel++){

-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            input_channels.push_back(netInputWraped);
-            //input_data += width * height;
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
-        this->preprocess(inputImage,preprocessed);
-        split(preprocessed, input_channels);
+//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+//            input_channels.push_back(netInputWraped);
+//            //input_data += width * height;
+//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+//        }
+//        this->preprocess(inputImage,preprocessed);
+//        split(preprocessed, input_channels);

-        //preprocessed.copyTo(netInputWraped);
+//        //preprocessed.copyTo(netInputWraped);


-        this->net_->Forward();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+//        this->net_->Forward();
+//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();




-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
-        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
+//        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+//        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+//        float*outputMatData=(float*)(outputMat.data);

-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);



-#endif
-    }
+//#endif
+//    }



@@ -587,15 +587,15 @@ public:
        inputImageList.push_back(image.getMat());
        classifyBatch(inputImageList,classProbabilities);
    }
-    void detect(InputArray image, OutputArray Bbox_prob)
-    {
+//    void detect(InputArray image, OutputArray Bbox_prob)
+//    {

-        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
-        process_(image.getMat(),outputMat);
-        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
-    }
+//        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
+//        Mat outputMat = Bbox_prob.getMat();
+//        process_(image.getMat(),outputMat);
+//        //copy back to outputArray
+//        outputMat.copyTo(Bbox_prob);
+//    }

    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
    {

--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -23,6 +23,8 @@
 namespace cv { namespace text {


+
+
 class textDetectImpl: public textDetector{
 private:
    struct NetOutput{
@@ -60,9 +62,9 @@ private:
    };
 protected:

-    Ptr<TextImageClassifier> classifier_;
+    Ptr<TextRegionDetector> classifier_;
 public:
-    textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
+    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
    {

    }
@@ -131,13 +133,13 @@ public:



-    Ptr<TextImageClassifier> getClassifier()
+    Ptr<TextRegionDetector> getClassifier()
    {
        return this->classifier_;
    }
 };

-Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
+Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
 {
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
@@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei
    textbox_mean.at<uchar>(0,2)=123;
    preprocessor->set_mean(textbox_mean);
 // create a pointer to text box detector(textDetector)
-    Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
+    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }


--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp