Commit be395e59 authored by sghoshcvc's avatar sghoshcvc

Modified the class heirarchy

parent 2b8ed124
...@@ -716,10 +716,6 @@ public: ...@@ -716,10 +716,6 @@ public:
/** @brief produces a class confidence row-vector given an image /** @brief produces a class confidence row-vector given an image
*/ */
CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0; CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
/** @brief produces a list of bounding box given an image
*/
CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
/** @brief produces a matrix containing class confidence row-vectors given an collection of images /** @brief produces a matrix containing class confidence row-vectors given an collection of images
*/ */
......
...@@ -65,19 +65,131 @@ namespace text ...@@ -65,19 +65,131 @@ namespace text
//detection scenario //detection scenario
class CV_EXPORTS_W BaseDetector class CV_EXPORTS_W BaseDetector
{ {
public: public:
virtual ~BaseDetector() {}; virtual ~BaseDetector() {};
virtual void run(Mat& image, virtual void run(Mat& image,
std::vector<Rect>* component_rects=NULL, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0) = 0; int component_level=0) = 0;
virtual void run(Mat& image, Mat& mask, virtual void run(Mat& image, Mat& mask,
std::vector<Rect>* component_rects=NULL, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0) = 0; int component_level=0) = 0;
};
/** A virtual class for different models of text detection (including CNN based deep models)
*/
class CV_EXPORTS_W TextRegionDetector
{
protected:
/** Stores input and output size
*/
//netGeometry inputGeometry_;
//netGeometry outputGeometry_;
Size inputGeometry_;
Size outputGeometry_;
int inputChannelCount_;
int outputChannelCount_;
public:
virtual ~TextRegionDetector() {}
/** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
*/
CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
/** @brief simple getter method returning the size (height, width) of the input sample
*/
CV_WRAP virtual Size getInputGeometry(){return this->inputGeometry_;}
/** @brief simple getter method returning the shape of the oputput
* Any text detector should output a number of text regions alongwith a score of text-ness
* From the shape it can be inferred the number of text regions and number of returned value
* for each region
*/
CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
};
/** Generic structure of Deep CNN based Text Detectors
* */
class CV_EXPORTS_W DeepCNNTextDetector : public TextRegionDetector
{
/** @brief Class that uses a pretrained caffe model for text detection.
* Any text detection should
* This network is described in detail in:
* Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
* https://arxiv.org/abs/1611.06779
*/
protected:
/** all deep CNN based text detectors have a preprocessor (normally)
*/
Ptr<ImagePreprocessor> preprocessor_;
/** @brief all image preprocessing is handled here including whitening etc.
*
* @param input the image to be preprocessed for the classifier. If the depth
* is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
*
* @param output reference to the image to be fed to the classifier, the preprocessor will
* resize the image to the apropriate size and convert it to the apropriate depth\
*
* The method preprocess should never be used externally, it is up to classify and classifyBatch
* methods to employ it.
*/
virtual void preprocess(const Mat& input,Mat& output);
public:
virtual ~DeepCNNTextDetector() {};
/** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
*
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
*
* @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
*
* @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
*
* @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
*
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
/** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
*
* This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
* The architecture and models weights can be downloaded from:
* https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
* When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
*
* @param weightsFilename is the path to the pretrained weights of the model. When employing
* OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
*
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
friend class ImagePreprocessor;
};
/** @brief textDetector class provides the functionallity of text bounding box detection.
* A TextRegionDetector is employed to find bounding boxes of text
* words given an input image.
*
* This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
* The TextRegionDetector can be any text detector
*
*/
class CV_EXPORTS_W textDetector : public BaseDetector class CV_EXPORTS_W textDetector : public BaseDetector
{ {
...@@ -125,9 +237,9 @@ public: ...@@ -125,9 +237,9 @@ public:
/** @brief simple getter for the preprocessing functor /** @brief simple getter for the preprocessing functor
*/ */
CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0; CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
/** @brief Creates an instance of the textDetector class. /** @brief Creates an instance of the textDetector class.
...@@ -135,7 +247,7 @@ public: ...@@ -135,7 +247,7 @@ public:
*/ */
CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr); CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
/** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier. /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
......
...@@ -459,53 +459,53 @@ protected: ...@@ -459,53 +459,53 @@ protected:
#endif #endif
} }
void process_(Mat inputImage, Mat &outputMat) // void process_(Mat inputImage, Mat &outputMat)
{ // {
// do forward pass and stores the output in outputMat // // do forward pass and stores the output in outputMat
//Process one image // //Process one image
CV_Assert(this->minibatchSz_==1); // CV_Assert(this->minibatchSz_==1);
//CV_Assert(outputMat.isContinuous()); // //CV_Assert(outputMat.isContinuous());
#ifdef HAVE_CAFFE //#ifdef HAVE_CAFFE
net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width); // net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
net_->Reshape(); // net_->Reshape();
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); // float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
float* inputData=inputBuffer; // float* inputData=inputBuffer;
std::vector<Mat> input_channels; // std::vector<Mat> input_channels;
Mat preprocessed; // Mat preprocessed;
// if the image have multiple color channels the input layer should be populated accordingly // // if the image have multiple color channels the input layer should be populated accordingly
for (int channel=0;channel < this->channelCount_;channel++){ // for (int channel=0;channel < this->channelCount_;channel++){
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); // cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
input_channels.push_back(netInputWraped); // input_channels.push_back(netInputWraped);
//input_data += width * height; // //input_data += width * height;
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); // inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
} // }
this->preprocess(inputImage,preprocessed); // this->preprocess(inputImage,preprocessed);
split(preprocessed, input_channels); // split(preprocessed, input_channels);
//preprocessed.copyTo(netInputWraped); // //preprocessed.copyTo(netInputWraped);
this->net_->Forward(); // this->net_->Forward();
const float* outputNetData=net_->output_blobs()[0]->cpu_data(); // const float* outputNetData=net_->output_blobs()[0]->cpu_data();
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data(); // // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); // this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width; // int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); // outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
float*outputMatData=(float*)(outputMat.data); // float*outputMatData=(float*)(outputMat.data);
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); // memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
#endif //#endif
} // }
...@@ -587,15 +587,15 @@ public: ...@@ -587,15 +587,15 @@ public:
inputImageList.push_back(image.getMat()); inputImageList.push_back(image.getMat());
classifyBatch(inputImageList,classProbabilities); classifyBatch(inputImageList,classProbabilities);
} }
void detect(InputArray image, OutputArray Bbox_prob) // void detect(InputArray image, OutputArray Bbox_prob)
{ // {
Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed // Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
Mat outputMat = Bbox_prob.getMat(); // Mat outputMat = Bbox_prob.getMat();
process_(image.getMat(),outputMat); // process_(image.getMat(),outputMat);
//copy back to outputArray // //copy back to outputArray
outputMat.copyTo(Bbox_prob); // outputMat.copyTo(Bbox_prob);
} // }
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
{ {
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
namespace cv { namespace text { namespace cv { namespace text {
class textDetectImpl: public textDetector{ class textDetectImpl: public textDetector{
private: private:
struct NetOutput{ struct NetOutput{
...@@ -60,9 +62,9 @@ private: ...@@ -60,9 +62,9 @@ private:
}; };
protected: protected:
Ptr<TextImageClassifier> classifier_; Ptr<TextRegionDetector> classifier_;
public: public:
textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr) textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
{ {
} }
...@@ -131,13 +133,13 @@ public: ...@@ -131,13 +133,13 @@ public:
Ptr<TextImageClassifier> getClassifier() Ptr<TextRegionDetector> getClassifier()
{ {
return this->classifier_; return this->classifier_;
} }
}; };
Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr) Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
{ {
return Ptr<textDetector>(new textDetectImpl(classifierPtr)); return Ptr<textDetector>(new textDetectImpl(classifierPtr));
} }
...@@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei ...@@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei
textbox_mean.at<uchar>(0,2)=123; textbox_mean.at<uchar>(0,2)=123;
preprocessor->set_mean(textbox_mean); preprocessor->set_mean(textbox_mean);
// create a pointer to text box detector(textDetector) // create a pointer to text box detector(textDetector)
Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1)); Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
return Ptr<textDetector>(new textDetectImpl(classifierPtr)); return Ptr<textDetector>(new textDetectImpl(classifierPtr));
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment