added calculation of output size

c697e41b · sghoshcvc · a2cab071 · c697e41b · c697e41b · c697e41b
Commit c697e41b authored Aug 28, 2017 by sghoshcvc
6 changed files
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -861,6 +861,15 @@ public:
 };
 namespace cnn_config{
+/** @brief runtime backend information
+ *
+ * this function finds the status of backends compiled with this module
+ *
+ * @return a list of backends (caffe,opencv-dnn etc.)
+ * */
+CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
 namespace caffe_backend{
 /** @brief Prompts Caffe on the computation device beeing used
@@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 CV_EXPORTS_W bool getCaffeAvailable();
 }//caffe
+namespace dnn_backend {
+/** @brief Provides runtime information on whether DNN module was compiled in.
+ *
+ * The text module API is the same regardless of whether DNN module was available or not
+ * During compilation. When methods that require backend are invocked while no backend support
+ * is compiled, exceptions are thrown. This method allows to test whether the
+ * text module was built with dnn_backend during runtime.
+ *
+ * @return true if opencv_dnn support for the the text module was provided during compilation,
+ * false if opencv_dnn was unavailable.
+ */
+CV_EXPORTS_W bool getDNNAvailable();
+}//dnn_backend
 }//cnn_config
 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.

--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -56,7 +56,7 @@ namespace cv
 namespace text
 {
-//! @addtogroup text_recognize
+//! @addtogroup text_detect
 //! @{
@@ -263,7 +263,7 @@ public:
 };
+//! @}
 }//namespace text
 }//namespace cv

--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
        //exit(1);
    }
+    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+    std::cout << "The Following backends are available" << "\n";
+    for (int i=0;i<backends.size();i++)
+       std::cout << backends[i] << "\n";
+   // printf("%s",x);
    //set to true if you have a GPU with more than 3GB
     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
    }
    // call dict net here for all detected parts
    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
        cv::Point tl_ = bbox.at(i).tl();
        cv::Point br_ = bbox.at(i).br();
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
    }
    out.close();

--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -122,6 +122,7 @@ protected:
    //void set_mean_(Mat M){}
    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
@@ -433,6 +434,7 @@ protected:
        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
        CV_Assert(outputMat.isContinuous());
 #ifdef HAVE_CAFFE
        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
        net_->Reshape();
@@ -450,16 +452,19 @@ protected:
                input_channels.push_back(netInputWraped);
                //input_data += width * height;
                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
            }
            this->preprocess(inputImageList[imgNum],preprocessed);
            split(preprocessed, input_channels);
        }
        this->net_->ForwardPrefilled();
        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
        float*outputMatData=(float*)(outputMat.data);
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@@ -470,9 +475,10 @@ protected:
 #ifdef HAVE_CAFFE
    Ptr<caffe::Net<float> > net_;
 #endif
-    //Size inputGeometry_;
+    //Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
+    //Size outputGeometry_;
 public:
    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -608,7 +614,7 @@ protected:
            preProcessedImList.push_back(preprocessed);
        }
        // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
        float*outputMatData=(float*)(outputMat.data);
       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@@ -625,9 +631,16 @@ protected:
 #ifdef HAVE_DNN
    Ptr<Net> net_;
 #endif
-    //Size inputGeometry_;
+    // hard coding input image size. anything in DNN library to get that from prototxt??
+   // Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
+    //Size outputGeometry_;//= Size(1,1);
+    //int channelCount_;
+   // int inputChannel_ ;//=1;
+    const int _inputHeight =32;
+    const int _inputWidth =100;
+    const int _inputChannel =1;
 public:
    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -678,33 +691,17 @@ public:
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
-// find a wa to check the followings in cv::dnn ???
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        this->channelCount_=this->net_->input_blobs()[0]->channels();
-        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
-        //inputLayerId = net_->getLayerId('data');
+        this->channelCount_ = _inputChannel;//inputLayer->channels();
-      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
-       //                                     inputLayerId,
-      //                                      std::vector<MatShape>* inLayerShapes,
-      //  std::vector<MatShape>* outLayerShapes) const;
-        // should not be hard coded ideally
-        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = 1;//inputLayer->channels();
        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        //net_->Reshape();
+        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
-        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
+        //std::vector<Mat> blobs = outLayer->blobs;
-        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
+        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
@@ -732,7 +729,7 @@ public:
        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
        Mat outputMat = classProbabilities.getMat();
-        printf("ekhane");
        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
        {
            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 }
 namespace cnn_config{
+std::vector<std::string> getAvailableBackends()
+{
+    std::vector<std::string> backends;
+#ifdef HAVE_CAFFE
+    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
+#endif
+#ifdef HAVE_DNN
+    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
+#endif
+    return backends;
+}
 namespace caffe_backend{
 #ifdef HAVE_CAFFE
@@ -856,7 +869,7 @@ bool getCaffeAvailable()
 {
    return true;
 }
-#elif defined(HAVE_DNN)
+#else
 bool getCaffeGpuMode()
 {
@@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
 bool getCaffeAvailable(){
    return 0;
 }
-bool getDNNAvailable(){
-    return true;
-}
+#endif
-#else
+}//namespace caffe
+namespace dnn_backend{
+#ifdef  HAVE_DNN
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
-void setCaffeGpuMode(bool useGpu)
+bool getDNNAvailable(){
-{
+    return true;
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
 }
+#else
-bool getCaffeAvailable(){
+bool getDNNAvailable(){
    return 0;
 }
 #endif
+}//namspace dnn_backend
-}//namespace caffe
 }//namespace cnn_config
 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@@ -931,6 +935,7 @@ private:
            getOutputs(buffer,nbOutputs,tmp);
            classNum=tmp[0].wordIdx;
            confidence=tmp[0].probabillity;
        }
    };
 protected:
@@ -972,6 +977,7 @@ public:
    {
        Mat netOutput;
        this->classifier_->classifyBatch(inputImageList,netOutput);
        for(int k=0;k<netOutput.rows;k++)
        {
            int classNum;

--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -16,9 +16,9 @@
 #include <vector>
-#ifdef HAVE_CAFFE
+//#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
+//#include "caffe/caffe.hpp"
-#endif
+//#endif
 namespace cv { namespace text {

--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -225,75 +225,25 @@ protected:
 #ifdef HAVE_DNN
-        //std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
        Mat preprocessed;
        this->preprocess(inputImage,preprocessed);
-        printf("After preprocess");
-        // preprocesses each image in the inputImageList and push to preprocessedImList
+        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
-//        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-//        {
+       Mat outputNet = this->net_->forward( );
-//            this->preprocess(inputImageList[imgNum],preprocessed);
-//            preProcessedImList.push_back(preprocessed);
-//        }
-        // set input data blob in dnn::net
-        //Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
-        //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
-        net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
-        printf("Input layer");
-       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
-       Mat outputNet = this->net_->forward( );//"mbox_priorbox");
-       printf("After forward");
-       //outputNet = outputNet.reshape(1, 1);
       this->outputGeometry_.height = outputNet.size[2];
       this->outputGeometry_.width = outputNet.size[3];
       this->outputChannelCount_ = outputNet.size[1];
-       printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
        float*outputMatData=(float*)(outputMat.data);
       float*outputNetData=(float*)(outputNet.data);
       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-//        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-//        net_->Reshape();
-//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-//        float* inputData=inputBuffer;
-//        std::vector<Mat> input_channels;
-//        Mat preprocessed;
-//        // if the image have multiple color channels the input layer should be populated accordingly
-//        for (int channel=0;channel < this->inputChannelCount_;channel++){
-//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-//            input_channels.push_back(netInputWraped);
-//            //input_data += width * height;
-//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-//        }
-//        this->preprocess(inputImage,preprocessed);
-//        split(preprocessed, input_channels);
-//        //preprocessed.copyTo(netInputWraped);
-//        this->net_->Forward();
-//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-//        this->outputGeometry_.height = net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-//        float*outputMatData=(float*)(outputMat.data);
-//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
 #endif
@@ -307,6 +257,9 @@ protected:
    //Size inputGeometry_;
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    //int outputSize_;
+    const int _inputHeight =700;
+    const int _inputWidth =700;
+    const int _inputChannel =3;
 public:
    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_){
@@ -355,28 +308,10 @@ public:
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
-//        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-//        CV_Assert(net_->num_inputs()==1);
+        this->inputGeometry_.height =_inputHeight;
-//        CV_Assert(net_->num_outputs()==1);
+        this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+        this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
-//        this->inputGeometry_.height = inputLayer->height();
-//        this->inputGeometry_.width = inputLayer->width();
-//        this->inputChannelCount_ = inputLayer->channels();
-//        //this->inputGeometry_.batchSize =1;
-//        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-//        net_->Reshape();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        //this->outputGeometry_.batchSize =1;
-//        this->outputGeometry_.height =net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-        this->inputGeometry_.height =700;
-        this->inputGeometry_.width = 700 ;//inputLayer->width();
-        this->inputChannelCount_ = 3 ;//inputLayer->channels();
 #else
        CV_Error(Error::StsError,"DNN module not available during compilation!");
@@ -389,7 +324,7 @@ public:
        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
        Mat outputMat = Bbox_prob.getMat();
-        printf("calling");
        process_(image.getMat(),outputMat);
        //copy back to outputArray
        outputMat.copyTo(Bbox_prob);
@@ -487,20 +422,20 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
    case OCR_HOLISTIC_BACKEND_DEFAULT:
 #ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
 #else
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
 #endif
        break;
    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
        break;
    case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
         break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default: