Commit 951e1827 authored by Vladislav Sovrasov's avatar Vladislav Sovrasov

text: cleanup dnn text detection part

parent c33629e0
set(the_description "Text Detection and Recognition")
if(POLICY CMP0023)
message(STATUS "Explicitly setting policy CMP0023 to OLD")
cmake_policy(SET CMP0023 OLD)
endif(POLICY CMP0023)
# Using cmake scripts and modules
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
find_package(Caffe)
if(Caffe_FOUND)
message(STATUS "Caffe: YES")
set(HAVE_CAFFE 1)
else()
message(STATUS "Caffe: NO")
# list(APPEND TEXT_DEPS opencv_dnn)
endif()
#internal dependencies
find_package(Protobuf)
if(Protobuf_FOUND)
message(STATUS "Protobuf: YES")
set(HAVE_PROTOBUF 1)
else()
message(STATUS "Protobuf: NO")
endif()
find_package(Glog)
if(Glog_FOUND)
message(STATUS "Glog: YES")
set(HAVE_GLOG 1)
else()
message(STATUS "Glog: NO")
endif()
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
#ocv_define_module(text ${TEXT_DEPS} WRAP python)
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
find_package(Tesseract)
if(${Tesseract_FOUND})
message(STATUS "Tesseract: YES")
include_directories(${Tesseract_INCLUDE_DIR})
target_link_libraries(opencv_text ${Tesseract_LIBS})
add_definitions(-DHAVE_TESSERACT)
else()
message(STATUS "Tesseract: NO")
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
find_package(Tesseract QUIET)
if(Tesseract_FOUND)
message(STATUS "Tesseract: YES")
set(HAVE_TESSERACT 1)
ocv_include_directories(${Tesseract_INCLUDE_DIR})
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
else()
message(STATUS "Tesseract: NO")
endif()
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
include_directories(${Caffe_INCLUDE_DIR})
find_package(HDF5 COMPONENTS HL REQUIRED)
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
link_directories(SYSTEM ${CUDA_LIBS})
# include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
#link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
add_definitions(-DHAVE_CAFFE)
endif() #HAVE_CAFFE
message(STATUS "TEXT CAFFE SEARCH")
if()
message(STATUS "TEXT NO CAFFE CONFLICT")
else()
message(STATUS "TEXT CAFFE CONFLICT")
endif()
ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(HAVE_opencv_dnn)
message(STATUS "dnn module found")
add_definitions(-DHAVE_DNN)
set(HAVE_DNN 1)
else()
message(STATUS "dnn module not found")
endif()
ocv_add_testdata(samples/ contrib/text
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
)
# Caffe package for CNN Triplet training
unset(Caffe_FOUND)
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
HINTS
/usr/local/include)
find_library(Caffe_LIBS NAMES caffe
HINTS
/usr/local/lib)
if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
set(Caffe_FOUND 1)
endif()
#Required for Caffe
unset(Glog_FOUND)
find_library(Glog_LIBS NAMES glog
HINTS
/usr/local/lib)
if(Glog_LIBS)
set(Glog_FOUND 1)
endif()
#Protobuf package required for Caffe
unset(Protobuf_FOUND)
find_library(Protobuf_LIBS NAMES protobuf
HINTS
/usr/local/lib)
if(Protobuf_LIBS)
set(Protobuf_FOUND 1)
endif()
# Tesseract OCR
unset(Tesseract_FOUND)
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS
/usr/include
/usr/local/include)
find_library(Tesseract_LIBRARY NAMES tesseract
HINTS
/usr/lib
/usr/local/lib)
find_library(Lept_LIBRARY NAMES lept
HINTS
/usr/lib
/usr/local/lib)
set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
set(Tesseract_FOUND 1)
endif()
......@@ -56,74 +56,3 @@ Intro
-----
The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
Two backends are supported 1) caffe 2) opencv-dnn
Instalation of Caffe backend
----------------------------
* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
The caffe wrapping backend has the requirements caffe does.
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
The simplest solution is to build caffe without support for OpenCV.
* Only the OS supported by Caffe are supported by the backend.
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
Other UNIX systems including OSX should be easy to adapt.
Sample script for building Caffe
```bash
#!/bin/bash
SRCROOT="${HOME}/caffe_inst/"
mkdir -p "$SRCROOT"
cd "$SRCROOT"
git clone https://github.com/sghoshcvc/TextBoxes.git
cd TextBoxes
cat Makefile.config.example > Makefile.config
echo 'USE_OPENCV := 0' >> Makefile.config
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@
template <typename T>
friend class Net;
+ virtual ~Callback(){}
};
const vector<Callback*>& before_forward() const { return before_forward_; }
void add_before_forward(Callback* value) {
">/tmp/cleanup_caffe.diff
patch < /tmp/cleanup_caffe.diff
make -j 6
make pycaffe
make distribute
```
```bash
#!/bin/bash
cd $OPENCV_BUILD_DIR #You must set this
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules" ./
```
where $OPECV_CONTRIB is the root directory containing opencv_contrib module
Instalation of Caffe backend
----------------------------
Use of opencv-dnn does not need any additional library.
The recent opencv-3.3.0 needs to be build with extra modules to use text module.
......@@ -5,14 +5,17 @@ endif()
if(NOT Tesseract_FOUND)
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS
/usr/include
/usr/local/include)
find_library(Tesseract_LIBRARY NAMES tesseract
HINTS
/usr/lib
/usr/local/lib)
find_library(Lept_LIBRARY NAMES lept
HINTS
/usr/lib
/usr/local/lib)
if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY)
......
......@@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
To see the text detector at work, have a look at the textdetection demo:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
@defgroup text_recognize Scene Text Recognition
@}
......
......@@ -65,7 +65,6 @@ component tree of the image. :
*/
struct CV_EXPORTS ERStat
{
public:
//! Constructor
explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
......
This diff is collapsed.
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 19 17:54:00 2017
@author: sgnosh
"""
#!/usr/bin/python
import sys
import os
import cv2
import numpy as np
print('\nDeeptextdetection.py')
print(' A demo script of text box alogorithm of the paper:')
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
if (len(sys.argv) < 2):
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
quit()
#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
# print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
#
# quit()
# check model and architecture file existance
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
print " Model files not found in current directory. Aborting"
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
quit()
cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
pathname = os.path.dirname(sys.argv[0])
def main():
print('\nDeeptextdetection.py')
print(' A demo script of text box alogorithm of the paper:')
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
if (len(sys.argv) < 2):
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
quit()
img = cv2.imread(str(sys.argv[1]))
textSpotter=cv2.text.textDetector_create(
"textbox_deploy.prototxt","textbox.caffemodel")
rects,outProbs = textSpotter.textDetectInImage(img);
# for visualization
vis = img.copy()
# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
thres = 0.6
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
print " Model files not found in current directory. Aborting"
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
quit()
img = cv2.imread(str(sys.argv[1]))
textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
rects, outProbs = textSpotter.textDetectInImage(img);
vis = img.copy()
thres = 0.6
#Visualization
for r in range(0,np.shape(rects)[0]):
if outProbs[r] >thres:
rect = rects[r]
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2)
# cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
for r in range(np.shape(rects)[0]):
if outProbs[r] > thres:
rect = rects[r]
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2)
cv2.imshow("Text detection result", vis)
cv2.waitKey()
#Visualization
cv2.imshow("Text detection result", vis)
cv2.waitKey(0)
\ No newline at end of file
if __name__ == "__main__":
main()
/*
* dictnet_demo.cpp
*
* Demonstrates simple use of the holistic word classifier in C++
*
* Created on: June 26, 2016
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
*/
#include "opencv2/text.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <opencv2/text.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <sstream>
#include <vector>
#include <iostream>
#include <iomanip>
#include <fstream>
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres);
inline std::string getHelpStr(std::string progFname){
std::stringstream out;
out << " Demo of text detection CNN for text detection." << std::endl;
out << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
out << " Usage: " << progFname << " <output_file> <input_image>" << std::endl;
out << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
out << " must be in the current directory." << std::endl << std::endl;
using namespace cv;
out << " Obtaining Caffe Model files in linux shell:"<<std::endl;
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl;
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl;
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl;
namespace
{
std::string getHelpStr(std::string progFname)
{
std::stringstream out;
out << " Demo of text detection CNN for text detection." << std::endl
<< " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
<< " Usage: " << progFname << " <output_file> <input_image>" << std::endl
<< " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
<< " must be in the current directory." << std::endl
<< " These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
return out.str();
}
inline bool fileExists (std::string filename) {
bool fileExists (std::string filename)
{
std::ifstream f(filename.c_str());
return f.good();
}
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
{
for (int i=0;i<(int)groups.size(); i++)
for (size_t i = 0; i < groups.size(); i++)
{
if(probs[i]>thres)
if(probs[i] > thres)
{
if (src.type() == CV_8UC3)
{
cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 );
cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 ));
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
String label = format("%.2f", probs[i]);
std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
}
else
rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 );
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
}
}
}
}
int main(int argc, const char * argv[]){
if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
//exit(1);
}
std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
std::cout << "The Following backends are available" << "\n";
for (int i=0;i<backends.size();i++)
std::cout << backends[i] << "\n";
// printf("%s",x);
//set to true if you have a GPU with more than 3GB
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
if (argc < 3){
std::cout<<getHelpStr(argv[0]);
std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
int main(int argc, const char * argv[])
{
if (argc < 2)
{
std::cout << getHelpStr(argv[0]);
std::cout << "Insufiecient parameters. Aborting!" << std::endl;
exit(1);
}
if (!fileExists("textbox.caffemodel") ||
!fileExists("textbox_deploy.prototxt")){
// !fileExists("dictnet_vgg_labels.txt"))
std::cout<<getHelpStr(argv[0]);
std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
exit(1);
}
if (fileExists(argv[1])){
!fileExists("textbox_deploy.prototxt"))
{
std::cout<<getHelpStr(argv[0]);
std::cout<<"Output file must not exist. Aborting!"<<std::endl;
std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
exit(1);
}
cv::Mat image;
image = cv::imread(cv::String(argv[2]));
Mat image = imread(String(argv[1]), IMREAD_COLOR);
std::cout<<"Starting Text Box Demo"<<std::endl;
cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create(
"textbox_deploy.prototxt","textbox.caffemodel");
std::cout << "Starting Text Box Demo" << std::endl;
Ptr<text::TextDetectorCNN> textSpotter =
text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
//cv::Ptr<cv::text::textDetector> wordSpotter=
// cv::text::textDetector::create(cnn);
std::cout<<"Created Text Spotter with text Boxes";
std::vector<cv::Rect> bbox;
std::vector<Rect> bbox;
std::vector<float> outProbabillities;
textSpotter->textDetectInImage(image,bbox,outProbabillities);
// textbox_draw(image, bbox,outProbabillities);
float thres =0.6f;
std::vector<cv::Mat> imageList;
for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
if(outProbabillities[imageIdx]>thres){
imageList.push_back(image(bbox.at(imageIdx)));
}
}
// call dict net here for all detected parts
cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
std::vector<cv::String> wordList;
std::vector<double> wordProbabillities;
wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
// write the output in file
std::ofstream out;
out.open(argv[1]);
for (int i=0;i<(int)wordList.size(); i++)
{
cv::Point tl_ = bbox.at(i).tl();
cv::Point br_ = bbox.at(i).br();
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
}
out.close();
textbox_draw(image, bbox,outProbabillities,wordList);
textSpotter->textDetectInImage(image, bbox, outProbabillities);
textbox_draw(image, bbox, outProbabillities, 0.5f);
cv::imshow("TextBox Demo",image);
imshow("TextBox Demo",image);
std::cout << "Done!" << std::endl << std::endl;
std::cout << "Press any key to exit." << std::endl << std::endl;
if ((cv::waitKey()&0xff) == ' ')
return 0;
waitKey();
return 0;
}
This diff is collapsed.
This diff is collapsed.
......@@ -45,6 +45,8 @@
#include "opencv2/text.hpp"
#include "text_config.hpp"
#ifdef HAVE_TESSERACT
#if !defined(USE_STD_NAMESPACE)
#define USE_STD_NAMESPACE
......
#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <queue>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
//#ifdef HAVE_CAFFE
//#include "caffe/caffe.hpp"
//#endif
namespace cv { namespace text {
class textDetectImpl: public textDetector{
private:
struct NetOutput{
//Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
//the raw outputs of caffe
Rect bbox;
float probability;
static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
{
res.resize(nbrTextBoxes);
for(int k=0;k<nbrTextBoxes;k++)
{
float x_min = buffer[k*nCol+3]*inputShape.width;
float y_min = buffer[k*nCol+4]*inputShape.height;
float x_max = buffer[k*nCol+5]*inputShape.width;
float y_max = buffer[k*nCol +6]*inputShape.height;
x_min = x_min<0?0:x_min;
y_min = y_min<0?0:y_min;
x_max = x_max> inputShape.width?inputShape.width-1:x_max;
y_max = y_max > inputShape.height?inputShape.height-1:y_max;
float wd = x_max-x_min+1;
float ht = y_max-y_min+1;
res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
res[k].probability=buffer[k*nCol+2];
}
}
};
protected:
Ptr<TextRegionDetector> classifier_;
public:
textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
{
}
void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
{
Mat netOutput;
// call the detect function of deepTextCNN class
this->classifier_->detect(inputImage,netOutput);
// get the output geometry i.e height and width of output blob from caffe
Size OutputGeometry_ = this->classifier_->getOutputGeometry();
int nbrTextBoxes = OutputGeometry_.height;
int nCol = OutputGeometry_.width;
std::vector<NetOutput> tmp;
// the output bounding box needs to be resized by the input height and width
Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
// put the output in CV_OUT
for (int k=0;k<nbrTextBoxes;k++)
{
Bbox.push_back(tmp[k].bbox);
confidence.push_back(tmp[k].probability);
}
}
void run(Mat& image, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0)
{
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
std::vector<Rect> bbox;
std::vector<float> score;
textDetectInImage(image,bbox,score);
if(component_rects!=NULL)
{
component_rects->resize(bbox.size()); // should be a user behavior
component_rects = &bbox;
}
if(component_confidences!=NULL)
{
component_confidences->resize(score.size()); // shoub be a user behavior
component_confidences = &score;
}
}
void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0)
{
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
this->run(image,component_rects,component_confidences,component_level);
}
Ptr<TextRegionDetector> getClassifier()
{
return this->classifier_;
}
};
Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
{
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
}
Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
{
// create a custom preprocessor with rawval
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
// set the mean for the preprocessor
Mat textbox_mean(1,3,CV_8U);
textbox_mean.at<uchar>(0,0)=104;
textbox_mean.at<uchar>(0,1)=117;
textbox_mean.at<uchar>(0,2)=123;
preprocessor->set_mean(textbox_mean);
// create a pointer to text box detector(textDetector)
Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
}
} } //namespace text namespace cv
This diff is collapsed.
#ifndef __OPENCV_TEXT_CONFIG_HPP__
#define __OPENCV_TEXT_CONFIG_HPP__
// HAVE OCR Tesseract
#cmakedefine HAVE_TESSERACT
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment