Commit 951e1827 authored by Vladislav Sovrasov's avatar Vladislav Sovrasov

text: cleanup dnn text detection part

parent c33629e0
set(the_description "Text Detection and Recognition") set(the_description "Text Detection and Recognition")
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
if(POLICY CMP0023) if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
message(STATUS "Explicitly setting policy CMP0023 to OLD") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
cmake_policy(SET CMP0023 OLD) find_package(Tesseract QUIET)
endif(POLICY CMP0023) if(Tesseract_FOUND)
# Using cmake scripts and modules
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
find_package(Caffe)
if(Caffe_FOUND)
message(STATUS "Caffe: YES")
set(HAVE_CAFFE 1)
else()
message(STATUS "Caffe: NO")
# list(APPEND TEXT_DEPS opencv_dnn)
endif()
#internal dependencies
find_package(Protobuf)
if(Protobuf_FOUND)
message(STATUS "Protobuf: YES")
set(HAVE_PROTOBUF 1)
else()
message(STATUS "Protobuf: NO")
endif()
find_package(Glog)
if(Glog_FOUND)
message(STATUS "Glog: YES")
set(HAVE_GLOG 1)
else()
message(STATUS "Glog: NO")
endif()
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
#ocv_define_module(text ${TEXT_DEPS} WRAP python)
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
find_package(Tesseract)
if(${Tesseract_FOUND})
message(STATUS "Tesseract: YES") message(STATUS "Tesseract: YES")
include_directories(${Tesseract_INCLUDE_DIR}) set(HAVE_TESSERACT 1)
target_link_libraries(opencv_text ${Tesseract_LIBS}) ocv_include_directories(${Tesseract_INCLUDE_DIR})
add_definitions(-DHAVE_TESSERACT) ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
else() else()
message(STATUS "Tesseract: NO") message(STATUS "Tesseract: NO")
endif() endif()
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF) ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${Caffe_INCLUDE_DIR})
find_package(HDF5 COMPONENTS HL REQUIRED)
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
link_directories(SYSTEM ${CUDA_LIBS})
# include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
#link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
add_definitions(-DHAVE_CAFFE)
endif() #HAVE_CAFFE
message(STATUS "TEXT CAFFE SEARCH")
if()
message(STATUS "TEXT NO CAFFE CONFLICT")
else()
message(STATUS "TEXT CAFFE CONFLICT")
endif()
if(HAVE_opencv_dnn) ocv_add_testdata(samples/ contrib/text
message(STATUS "dnn module found") FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
add_definitions(-DHAVE_DNN) )
set(HAVE_DNN 1)
else()
message(STATUS "dnn module not found")
endif()
# Caffe package for CNN Triplet training
unset(Caffe_FOUND)
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
HINTS
/usr/local/include)
find_library(Caffe_LIBS NAMES caffe
HINTS
/usr/local/lib)
if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
set(Caffe_FOUND 1)
endif()
#Required for Caffe
unset(Glog_FOUND)
find_library(Glog_LIBS NAMES glog
HINTS
/usr/local/lib)
if(Glog_LIBS)
set(Glog_FOUND 1)
endif()
#Protobuf package required for Caffe
unset(Protobuf_FOUND)
find_library(Protobuf_LIBS NAMES protobuf
HINTS
/usr/local/lib)
if(Protobuf_LIBS)
set(Protobuf_FOUND 1)
endif()
# Tesseract OCR
unset(Tesseract_FOUND)
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS
/usr/include
/usr/local/include)
find_library(Tesseract_LIBRARY NAMES tesseract
HINTS
/usr/lib
/usr/local/lib)
find_library(Lept_LIBRARY NAMES lept
HINTS
/usr/lib
/usr/local/lib)
set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
set(Tesseract_FOUND 1)
endif()
...@@ -56,74 +56,3 @@ Intro ...@@ -56,74 +56,3 @@ Intro
----- -----
The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects. The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
Two backends are supported 1) caffe 2) opencv-dnn
Instalation of Caffe backend
----------------------------
* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
The caffe wrapping backend has the requirements caffe does.
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
The simplest solution is to build caffe without support for OpenCV.
* Only the OS supported by Caffe are supported by the backend.
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
Other UNIX systems including OSX should be easy to adapt.
Sample script for building Caffe
```bash
#!/bin/bash
SRCROOT="${HOME}/caffe_inst/"
mkdir -p "$SRCROOT"
cd "$SRCROOT"
git clone https://github.com/sghoshcvc/TextBoxes.git
cd TextBoxes
cat Makefile.config.example > Makefile.config
echo 'USE_OPENCV := 0' >> Makefile.config
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@
template <typename T>
friend class Net;
+ virtual ~Callback(){}
};
const vector<Callback*>& before_forward() const { return before_forward_; }
void add_before_forward(Callback* value) {
">/tmp/cleanup_caffe.diff
patch < /tmp/cleanup_caffe.diff
make -j 6
make pycaffe
make distribute
```
```bash
#!/bin/bash
cd $OPENCV_BUILD_DIR #You must set this
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules" ./
```
where $OPECV_CONTRIB is the root directory containing opencv_contrib module
Instalation of Caffe backend
----------------------------
Use of opencv-dnn does not need any additional library.
The recent opencv-3.3.0 needs to be build with extra modules to use text module.
...@@ -5,14 +5,17 @@ endif() ...@@ -5,14 +5,17 @@ endif()
if(NOT Tesseract_FOUND) if(NOT Tesseract_FOUND)
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS HINTS
/usr/include
/usr/local/include) /usr/local/include)
find_library(Tesseract_LIBRARY NAMES tesseract find_library(Tesseract_LIBRARY NAMES tesseract
HINTS HINTS
/usr/lib
/usr/local/lib) /usr/local/lib)
find_library(Lept_LIBRARY NAMES lept find_library(Lept_LIBRARY NAMES lept
HINTS HINTS
/usr/lib
/usr/local/lib) /usr/local/lib)
if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY) if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY)
......
...@@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D ...@@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping). in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
To see the text detector at work, have a look at the textdetection demo: To see the text detector at work, have a look at the textdetection demo:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp> <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
@defgroup text_recognize Scene Text Recognition @defgroup text_recognize Scene Text Recognition
@} @}
......
...@@ -65,7 +65,6 @@ component tree of the image. : ...@@ -65,7 +65,6 @@ component tree of the image. :
*/ */
struct CV_EXPORTS ERStat struct CV_EXPORTS ERStat
{ {
public: public:
//! Constructor //! Constructor
explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0); explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
......
This diff is collapsed.
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""
Created on Wed Jul 19 17:54:00 2017
@author: sgnosh
"""
#!/usr/bin/python #!/usr/bin/python
import sys import sys
import os import os
import cv2 import cv2
import numpy as np import numpy as np
print('\nDeeptextdetection.py') def main():
print(' A demo script of text box alogorithm of the paper:') print('\nDeeptextdetection.py')
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') print(' A demo script of text box alogorithm of the paper:')
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
if (len(sys.argv) < 2):
if (len(sys.argv) < 2):
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
quit() quit()
#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
# print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n" if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
#
# quit()
# check model and architecture file existance
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
print " Model files not found in current directory. Aborting" print " Model files not found in current directory. Aborting"
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
quit() quit()
cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
pathname = os.path.dirname(sys.argv[0])
img = cv2.imread(str(sys.argv[1]))
textSpotter=cv2.text.textDetector_create(
"textbox_deploy.prototxt","textbox.caffemodel")
rects,outProbs = textSpotter.textDetectInImage(img);
# for visualization
vis = img.copy()
# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
thres = 0.6
img = cv2.imread(str(sys.argv[1]))
textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
rects, outProbs = textSpotter.textDetectInImage(img);
vis = img.copy()
thres = 0.6
#Visualization for r in range(np.shape(rects)[0]):
for r in range(0,np.shape(rects)[0]): if outProbs[r] > thres:
if outProbs[r] >thres:
rect = rects[r] rect = rects[r]
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2) cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2)
# cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
cv2.imshow("Text detection result", vis)
cv2.waitKey()
#Visualization if __name__ == "__main__":
cv2.imshow("Text detection result", vis) main()
cv2.waitKey(0)
\ No newline at end of file
/* #include <opencv2/text.hpp>
* dictnet_demo.cpp #include <opencv2/highgui.hpp>
* #include <opencv2/imgproc.hpp>
* Demonstrates simple use of the holistic word classifier in C++
*
* Created on: June 26, 2016
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
*/
#include "opencv2/text.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <sstream> #include <sstream>
#include <vector>
#include <iostream> #include <iostream>
#include <iomanip>
#include <fstream> #include <fstream>
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres); using namespace cv;
inline std::string getHelpStr(std::string progFname){
std::stringstream out;
out << " Demo of text detection CNN for text detection." << std::endl;
out << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
out << " Usage: " << progFname << " <output_file> <input_image>" << std::endl;
out << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
out << " must be in the current directory." << std::endl << std::endl;
out << " Obtaining Caffe Model files in linux shell:"<<std::endl; namespace
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl; {
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl; std::string getHelpStr(std::string progFname)
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl; {
std::stringstream out;
out << " Demo of text detection CNN for text detection." << std::endl
<< " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
<< " Usage: " << progFname << " <output_file> <input_image>" << std::endl
<< " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
<< " must be in the current directory." << std::endl
<< " These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
return out.str(); return out.str();
} }
inline bool fileExists (std::string filename) { bool fileExists (std::string filename)
{
std::ifstream f(filename.c_str()); std::ifstream f(filename.c_str());
return f.good(); return f.good();
} }
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
{ {
for (int i=0;i<(int)groups.size(); i++) for (size_t i = 0; i < groups.size(); i++)
{ {
if(probs[i]>thres) if(probs[i] > thres)
{ {
if (src.type() == CV_8UC3) if (src.type() == CV_8UC3)
{ {
cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 ); rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 )); String label = format("%.2f", probs[i]);
std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
} }
else else
rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 ); rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
} }
} }
} }
}
int main(int argc, const char * argv[]){ int main(int argc, const char * argv[])
if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){ {
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"; if (argc < 2)
//exit(1); {
} std::cout << getHelpStr(argv[0]);
std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends(); std::cout << "Insufiecient parameters. Aborting!" << std::endl;
std::cout << "The Following backends are available" << "\n";
for (int i=0;i<backends.size();i++)
std::cout << backends[i] << "\n";
// printf("%s",x);
//set to true if you have a GPU with more than 3GB
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
if (argc < 3){
std::cout<<getHelpStr(argv[0]);
std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
exit(1); exit(1);
} }
if (!fileExists("textbox.caffemodel") || if (!fileExists("textbox.caffemodel") ||
!fileExists("textbox_deploy.prototxt")){ !fileExists("textbox_deploy.prototxt"))
// !fileExists("dictnet_vgg_labels.txt")) {
std::cout<<getHelpStr(argv[0]);
std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
exit(1);
}
if (fileExists(argv[1])){
std::cout<<getHelpStr(argv[0]); std::cout<<getHelpStr(argv[0]);
std::cout<<"Output file must not exist. Aborting!"<<std::endl; std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
exit(1); exit(1);
} }
cv::Mat image; Mat image = imread(String(argv[1]), IMREAD_COLOR);
image = cv::imread(cv::String(argv[2]));
std::cout<<"Starting Text Box Demo"<<std::endl; std::cout << "Starting Text Box Demo" << std::endl;
cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create( Ptr<text::TextDetectorCNN> textSpotter =
"textbox_deploy.prototxt","textbox.caffemodel"); text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
//cv::Ptr<cv::text::textDetector> wordSpotter= std::vector<Rect> bbox;
// cv::text::textDetector::create(cnn);
std::cout<<"Created Text Spotter with text Boxes";
std::vector<cv::Rect> bbox;
std::vector<float> outProbabillities; std::vector<float> outProbabillities;
textSpotter->textDetectInImage(image,bbox,outProbabillities); textSpotter->textDetectInImage(image, bbox, outProbabillities);
// textbox_draw(image, bbox,outProbabillities);
float thres =0.6f;
std::vector<cv::Mat> imageList;
for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
if(outProbabillities[imageIdx]>thres){
imageList.push_back(image(bbox.at(imageIdx)));
}
}
// call dict net here for all detected parts
cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
std::vector<cv::String> wordList;
std::vector<double> wordProbabillities;
wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
// write the output in file
std::ofstream out;
out.open(argv[1]);
for (int i=0;i<(int)wordList.size(); i++)
{
cv::Point tl_ = bbox.at(i).tl();
cv::Point br_ = bbox.at(i).br();
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
}
out.close();
textbox_draw(image, bbox,outProbabillities,wordList);
textbox_draw(image, bbox, outProbabillities, 0.5f);
cv::imshow("TextBox Demo",image); imshow("TextBox Demo",image);
std::cout << "Done!" << std::endl << std::endl; std::cout << "Done!" << std::endl << std::endl;
std::cout << "Press any key to exit." << std::endl << std::endl; std::cout << "Press any key to exit." << std::endl << std::endl;
if ((cv::waitKey()&0xff) == ' ') waitKey();
return 0; return 0;
} }
This diff is collapsed.
This diff is collapsed.
...@@ -45,6 +45,8 @@ ...@@ -45,6 +45,8 @@
#include "opencv2/text.hpp" #include "opencv2/text.hpp"
#include "text_config.hpp"
#ifdef HAVE_TESSERACT #ifdef HAVE_TESSERACT
#if !defined(USE_STD_NAMESPACE) #if !defined(USE_STD_NAMESPACE)
#define USE_STD_NAMESPACE #define USE_STD_NAMESPACE
......
#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <queue>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
//#ifdef HAVE_CAFFE
//#include "caffe/caffe.hpp"
//#endif
namespace cv { namespace text {
class textDetectImpl: public textDetector{
private:
struct NetOutput{
//Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
//the raw outputs of caffe
Rect bbox;
float probability;
static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
{
res.resize(nbrTextBoxes);
for(int k=0;k<nbrTextBoxes;k++)
{
float x_min = buffer[k*nCol+3]*inputShape.width;
float y_min = buffer[k*nCol+4]*inputShape.height;
float x_max = buffer[k*nCol+5]*inputShape.width;
float y_max = buffer[k*nCol +6]*inputShape.height;
x_min = x_min<0?0:x_min;
y_min = y_min<0?0:y_min;
x_max = x_max> inputShape.width?inputShape.width-1:x_max;
y_max = y_max > inputShape.height?inputShape.height-1:y_max;
float wd = x_max-x_min+1;
float ht = y_max-y_min+1;
res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
res[k].probability=buffer[k*nCol+2];
}
}
};
protected:
Ptr<TextRegionDetector> classifier_;
public:
textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
{
}
void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
{
Mat netOutput;
// call the detect function of deepTextCNN class
this->classifier_->detect(inputImage,netOutput);
// get the output geometry i.e height and width of output blob from caffe
Size OutputGeometry_ = this->classifier_->getOutputGeometry();
int nbrTextBoxes = OutputGeometry_.height;
int nCol = OutputGeometry_.width;
std::vector<NetOutput> tmp;
// the output bounding box needs to be resized by the input height and width
Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
// put the output in CV_OUT
for (int k=0;k<nbrTextBoxes;k++)
{
Bbox.push_back(tmp[k].bbox);
confidence.push_back(tmp[k].probability);
}
}
void run(Mat& image, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0)
{
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
std::vector<Rect> bbox;
std::vector<float> score;
textDetectInImage(image,bbox,score);
if(component_rects!=NULL)
{
component_rects->resize(bbox.size()); // should be a user behavior
component_rects = &bbox;
}
if(component_confidences!=NULL)
{
component_confidences->resize(score.size()); // shoub be a user behavior
component_confidences = &score;
}
}
void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0)
{
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
this->run(image,component_rects,component_confidences,component_level);
}
Ptr<TextRegionDetector> getClassifier()
{
return this->classifier_;
}
};
Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
{
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
}
Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
{
// create a custom preprocessor with rawval
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
// set the mean for the preprocessor
Mat textbox_mean(1,3,CV_8U);
textbox_mean.at<uchar>(0,0)=104;
textbox_mean.at<uchar>(0,1)=117;
textbox_mean.at<uchar>(0,2)=123;
preprocessor->set_mean(textbox_mean);
// create a pointer to text box detector(textDetector)
Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
}
} } //namespace text namespace cv
This diff is collapsed.
#ifndef __OPENCV_TEXT_CONFIG_HPP__ #ifndef __OPENCV_TEXT_CONFIG_HPP__
#define __OPENCV_TEXT_CONFIG_HPP__ #define __OPENCV_TEXT_CONFIG_HPP__
// HAVE OCR Tesseract
#cmakedefine HAVE_TESSERACT
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment