text: cleanup dnn text detection part

951e1827 · Vladislav Sovrasov · c33629e0 · 951e1827 · c33629e0 · c33629e0
Commit 951e1827 authored Oct 05, 2017 by Vladislav Sovrasov
19 changed files
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
 set(the_description "Text Detection and Recognition")
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
-if(POLICY CMP0023)
+if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
-  message(STATUS "Explicitly setting policy CMP0023 to OLD")
+  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-  cmake_policy(SET CMP0023 OLD)
+  find_package(Tesseract QUIET)
-endif(POLICY CMP0023)
+  if(Tesseract_FOUND)
-# Using cmake scripts and modules
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
-find_package(Caffe)
-if(Caffe_FOUND)
-  message(STATUS "Caffe:   YES")
-  set(HAVE_CAFFE 1)
-else()
-  message(STATUS "Caffe:   NO")
-#  list(APPEND TEXT_DEPS opencv_dnn)
-endif()
-#internal dependencies
-find_package(Protobuf)
-if(Protobuf_FOUND)
-  message(STATUS "Protobuf:   YES")
-  set(HAVE_PROTOBUF 1)
-else()
-  message(STATUS "Protobuf:   NO")
-endif()
-find_package(Glog)
-if(Glog_FOUND)
-  message(STATUS "Glog:   YES")
-  set(HAVE_GLOG 1)
-else()
-  message(STATUS "Glog:   NO")
-endif()
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
-#ocv_define_module(text ${TEXT_DEPS} WRAP python)
-#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
-find_package(Tesseract)
-if(${Tesseract_FOUND})
    message(STATUS "Tesseract:   YES")
-  include_directories(${Tesseract_INCLUDE_DIR})
+    set(HAVE_TESSERACT 1)
-  target_link_libraries(opencv_text ${Tesseract_LIBS})
+    ocv_include_directories(${Tesseract_INCLUDE_DIR})
-  add_definitions(-DHAVE_TESSERACT)
+    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
-else()
+  else()
    message(STATUS "Tesseract:   NO")
  endif()
+endif()
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
+               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
-if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
+ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
-  include_directories(${Caffe_INCLUDE_DIR})
-  find_package(HDF5 COMPONENTS HL REQUIRED)
-  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
-  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
-  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
-  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
-  include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
-  link_directories(SYSTEM ${CUDA_LIBS})
- # include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
-  #link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
-  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
-  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
-  add_definitions(-DHAVE_CAFFE)
-endif() #HAVE_CAFFE
-message(STATUS "TEXT CAFFE SEARCH")
-if()
-  message(STATUS "TEXT NO CAFFE CONFLICT")
-else()
-  message(STATUS "TEXT CAFFE CONFLICT")
-endif()
-if(HAVE_opencv_dnn)
+ocv_add_testdata(samples/ contrib/text
-	message(STATUS "dnn module found")
+    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
-	add_definitions(-DHAVE_DNN)
+)
-	set(HAVE_DNN 1)
-else()
-	message(STATUS "dnn module not found")
-endif()
--- a/modules/text/FindCaffe.cmake
+++ b/modules/text/FindCaffe.cmake
-# Caffe package for CNN Triplet training
-unset(Caffe_FOUND)
-find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
-  HINTS
-  /usr/local/include)
-find_library(Caffe_LIBS NAMES caffe
-  HINTS
-  /usr/local/lib)
-if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
-    set(Caffe_FOUND 1)
-endif()
--- a/modules/text/FindGlog.cmake
+++ b/modules/text/FindGlog.cmake
-#Required for Caffe
-unset(Glog_FOUND)
-find_library(Glog_LIBS NAMES glog
-  HINTS
-  /usr/local/lib)
-if(Glog_LIBS)
-    set(Glog_FOUND 1)
-endif()
--- a/modules/text/FindProtobuf.cmake
+++ b/modules/text/FindProtobuf.cmake
-#Protobuf package required for Caffe
-unset(Protobuf_FOUND)
-find_library(Protobuf_LIBS NAMES protobuf
-  HINTS
-  /usr/local/lib)
-if(Protobuf_LIBS)
-    set(Protobuf_FOUND 1)
-endif()
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
-# Tesseract OCR
-unset(Tesseract_FOUND)
-find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
-  HINTS
-  /usr/include
-  /usr/local/include)
-find_library(Tesseract_LIBRARY NAMES tesseract
-  HINTS
-  /usr/lib
-  /usr/local/lib)
-find_library(Lept_LIBRARY NAMES lept
-  HINTS
-  /usr/lib
-  /usr/local/lib)
-set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
-if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
-    set(Tesseract_FOUND 1)
-endif()
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -56,74 +56,3 @@ Intro
 -----
 The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
-Two backends are supported 1) caffe 2) opencv-dnn
-Instalation of Caffe backend
----------------------------
-* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
-The caffe wrapping backend has the requirements caffe does.
-* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
-The simplest solution is to build caffe without support for OpenCV.
-* Only the OS supported by Caffe are supported by the backend.
-The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
-Other UNIX systems including OSX should be easy to adapt.
-Sample script for building Caffe
-```bash
-#!/bin/bash
-SRCROOT="${HOME}/caffe_inst/"
-mkdir -p "$SRCROOT"
-cd "$SRCROOT"
-git clone https://github.com/sghoshcvc/TextBoxes.git
-cd TextBoxes
-cat Makefile.config.example  > Makefile.config
-echo 'USE_OPENCV := 0' >> Makefile.config
-echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
-echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
-echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
-+++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
-@@ -234,6 +234,7 @@
-     template <typename T>
-     friend class Net;
-+    virtual ~Callback(){}
-   };
-   const vector<Callback*>& before_forward() const { return before_forward_; }
-   void add_before_forward(Callback* value) {
-">/tmp/cleanup_caffe.diff
-patch < /tmp/cleanup_caffe.diff
-make -j 6
-make pycaffe
-make distribute
-```
-```bash
-#!/bin/bash
-cd $OPENCV_BUILD_DIR #You must set this
-CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
-cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules"   ./
-```
-where $OPECV_CONTRIB is the root directory containing opencv_contrib module
-Instalation of Caffe backend
----------------------------
-Use of opencv-dnn does not need any additional library.
-The recent opencv-3.3.0 needs to be build with extra modules to use text module.
--- a/modules/text/cmake/FindTesseract.cmake
+++ b/modules/text/cmake/FindTesseract.cmake
@@ -5,14 +5,17 @@ endif()
 if(NOT Tesseract_FOUND)
  find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
    HINTS
+    /usr/include
    /usr/local/include)
  find_library(Tesseract_LIBRARY NAMES tesseract
    HINTS
+    /usr/lib
    /usr/local/lib)
  find_library(Lept_LIBRARY NAMES lept
    HINTS
+    /usr/lib
    /usr/local/lib)
  if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY)

--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
    @defgroup text_recognize Scene Text Recognition
  @}

--- a/modules/text/include/opencv2/text/erfilter.hpp
+++ b/modules/text/include/opencv2/text/erfilter.hpp
@@ -65,7 +65,6 @@ component tree of the image. :
 */
 struct CV_EXPORTS ERStat
 {
 public:
    //! Constructor
    explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);

--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
 # -*- coding: utf-8 -*-
-"""
-Created on Wed Jul 19 17:54:00 2017
-@author: sgnosh
-"""
 #!/usr/bin/python
 import sys
 import os
 import cv2
 import numpy as np
-print('\nDeeptextdetection.py')
+def main():
-print('       A demo script of text box alogorithm of the paper:')
+    print('\nDeeptextdetection.py')
-print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
+    print('       A demo script of text box alogorithm of the paper:')
+    print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
+    if (len(sys.argv) < 2):
-if (len(sys.argv) < 2):
        print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
        quit()
-#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
-#        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
+    if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
-#
-#        quit()
-# check model and architecture file existance
-if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
        print " Model files not found in current directory. Aborting"
        print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
        quit()
-cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
-pathname = os.path.dirname(sys.argv[0])
-img      = cv2.imread(str(sys.argv[1]))
-textSpotter=cv2.text.textDetector_create(
-                "textbox_deploy.prototxt","textbox.caffemodel")
-rects,outProbs = textSpotter.textDetectInImage(img);
-# for visualization
-vis      = img.copy()
-# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
-thres = 0.6
+    img = cv2.imread(str(sys.argv[1]))
+    textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
+    rects, outProbs = textSpotter.textDetectInImage(img);
+    vis = img.copy()
+    thres = 0.6
-  #Visualization
+    for r in range(np.shape(rects)[0]):
-for r in range(0,np.shape(rects)[0]):
+        if outProbs[r] > thres:
-    if outProbs[r] >thres:
            rect = rects[r]
-        cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2)
+            cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2)
-       # cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
+    cv2.imshow("Text detection result", vis)
+    cv2.waitKey()
-#Visualization
+if __name__ == "__main__":
-cv2.imshow("Text detection result", vis)
+    main()
-cv2.waitKey(0)
\ No newline at end of file
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
-/*
+#include <opencv2/text.hpp>
- * dictnet_demo.cpp
+#include <opencv2/highgui.hpp>
- *
+#include <opencv2/imgproc.hpp>
- * Demonstrates simple use of the holistic word classifier in C++
- *
- * Created on: June 26, 2016
- *     Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
- */
-#include  "opencv2/text.hpp"
-#include  "opencv2/highgui.hpp"
-#include  "opencv2/imgproc.hpp"
 #include  <sstream>
-#include  <vector>
 #include  <iostream>
-#include  <iomanip>
 #include  <fstream>
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres);
+using namespace cv;
-inline std::string getHelpStr(std::string progFname){
-    std::stringstream out;
-    out << "    Demo of text detection CNN for text detection." << std::endl;
-    out << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
-    out << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl;
-    out << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
-    out << "      must be in the current directory." << std::endl << std::endl;
-    out << "    Obtaining Caffe Model files in linux shell:"<<std::endl;
+namespace
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl;
+{
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl;
+std::string getHelpStr(std::string progFname)
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl;
+{
+    std::stringstream out;
+    out << "    Demo of text detection CNN for text detection." << std::endl
+        << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
+        << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
+        << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
+        << "      must be in the current directory." << std::endl
+        << "    These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
    return out.str();
 }
-inline bool fileExists (std::string filename) {
+bool fileExists (std::string filename)
+{
    std::ifstream f(filename.c_str());
    return f.good();
 }
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
+void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
 {
-    for (int i=0;i<(int)groups.size(); i++)
+    for (size_t i = 0; i < groups.size(); i++)
    {
-        if(probs[i]>thres)
+        if(probs[i] > thres)
        {
            if (src.type() == CV_8UC3)
            {
-                cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 );
+                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
-                cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 ));
+                String label = format("%.2f", probs[i]);
+                std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
+                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
            }
            else
-                rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 );
+                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
        }
    }
 }
+}
-int main(int argc, const char * argv[]){
+int main(int argc, const char * argv[])
-    if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
+{
-        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
+    if (argc < 2)
-        //exit(1);
+    {
-    }
+        std::cout << getHelpStr(argv[0]);
-    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+        std::cout << "Insufiecient parameters. Aborting!" << std::endl;
-    std::cout << "The Following backends are available" << "\n";
-    for (int i=0;i<backends.size();i++)
-       std::cout << backends[i] << "\n";
-   // printf("%s",x);
-    //set to true if you have a GPU with more than 3GB
-     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
-    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
-    if (argc < 3){
-        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
        exit(1);
    }
    if (!fileExists("textbox.caffemodel") ||
-            !fileExists("textbox_deploy.prototxt")){
+            !fileExists("textbox_deploy.prototxt"))
-           // !fileExists("dictnet_vgg_labels.txt"))
+    {
-        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
-        exit(1);
-    }
-    if (fileExists(argv[1])){
        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Output file must not exist. Aborting!"<<std::endl;
+        std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
        exit(1);
    }
-    cv::Mat image;
+    Mat image = imread(String(argv[1]), IMREAD_COLOR);
-    image = cv::imread(cv::String(argv[2]));
-    std::cout<<"Starting Text Box Demo"<<std::endl;
+    std::cout << "Starting Text Box Demo" << std::endl;
-    cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create(
+    Ptr<text::TextDetectorCNN> textSpotter =
-                "textbox_deploy.prototxt","textbox.caffemodel");
+            text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
-    //cv::Ptr<cv::text::textDetector> wordSpotter=
+    std::vector<Rect> bbox;
-      //      cv::text::textDetector::create(cnn);
-    std::cout<<"Created Text Spotter with text Boxes";
-    std::vector<cv::Rect> bbox;
    std::vector<float> outProbabillities;
-    textSpotter->textDetectInImage(image,bbox,outProbabillities);
+    textSpotter->textDetectInImage(image, bbox, outProbabillities);
-   // textbox_draw(image, bbox,outProbabillities);
-    float thres =0.6f;
-    std::vector<cv::Mat> imageList;
-    for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
-        if(outProbabillities[imageIdx]>thres){
-            imageList.push_back(image(bbox.at(imageIdx)));
-        }
-    }
-    // call dict net here for all detected parts
-    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
-    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
-            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
-    std::vector<cv::String> wordList;
-    std::vector<double> wordProbabillities;
-    wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
-    // write the output in file
-    std::ofstream out;
-    out.open(argv[1]);
-    for (int i=0;i<(int)wordList.size(); i++)
-    {
-        cv::Point tl_ = bbox.at(i).tl();
-        cv::Point br_ = bbox.at(i).br();
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
-    }
-    out.close();
-    textbox_draw(image, bbox,outProbabillities,wordList);
+    textbox_draw(image, bbox, outProbabillities, 0.5f);
-    cv::imshow("TextBox Demo",image);
+    imshow("TextBox Demo",image);
    std::cout << "Done!" << std::endl << std::endl;
    std::cout << "Press any key to exit." << std::endl << std::endl;
-    if ((cv::waitKey()&0xff) == ' ')
+    waitKey();
    return 0;
 }
--- a/modules/text/src/image_preprocessor.cpp
+++ b/modules/text/src/image_preprocessor.cpp
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@@ -45,6 +45,8 @@
 #include "opencv2/text.hpp"
+#include "text_config.hpp"
 #ifdef HAVE_TESSERACT
 #if !defined(USE_STD_NAMESPACE)
 #define USE_STD_NAMESPACE

--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/core.hpp"
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-//#ifdef HAVE_CAFFE
-//#include "caffe/caffe.hpp"
-//#endif
-namespace cv { namespace text {
-class textDetectImpl: public textDetector{
-private:
-    struct NetOutput{
-        //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
-        //the raw outputs of caffe
-        Rect bbox;
-        float probability;
-        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
-        {
-            res.resize(nbrTextBoxes);
-            for(int k=0;k<nbrTextBoxes;k++)
-            {
-                float x_min = buffer[k*nCol+3]*inputShape.width;
-                float y_min = buffer[k*nCol+4]*inputShape.height;
-                float x_max = buffer[k*nCol+5]*inputShape.width;
-                float y_max = buffer[k*nCol +6]*inputShape.height;
-                x_min = x_min<0?0:x_min;
-                y_min = y_min<0?0:y_min;
-                x_max = x_max> inputShape.width?inputShape.width-1:x_max;
-                y_max = y_max > inputShape.height?inputShape.height-1:y_max;
-                float wd = x_max-x_min+1;
-                float ht = y_max-y_min+1;
-                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
-                res[k].probability=buffer[k*nCol+2];
-            }
-        }
-    };
-protected:
-    Ptr<TextRegionDetector> classifier_;
-public:
-    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
-    {
-    }
-    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
-    {
-                Mat netOutput;
-                // call the detect function of deepTextCNN class
-                this->classifier_->detect(inputImage,netOutput);
-               // get the output geometry i.e height and width of output blob from caffe
-                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
-                int nbrTextBoxes = OutputGeometry_.height;
-                int nCol = OutputGeometry_.width;
-                std::vector<NetOutput> tmp;
-                // the output bounding box needs to be resized by the input height and width
-                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
-                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
-                // put the output in CV_OUT
-                for (int k=0;k<nbrTextBoxes;k++)
-                {
-                    Bbox.push_back(tmp[k].bbox);
-                    confidence.push_back(tmp[k].probability);
-                }
-     }
-    void run(Mat& image, std::vector<Rect>* component_rects=NULL,
-             std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
-        std::vector<Rect> bbox;
-        std::vector<float> score;
-        textDetectInImage(image,bbox,score);
-        if(component_rects!=NULL)
-        {
-            component_rects->resize(bbox.size());  // should be a user behavior
-            component_rects = &bbox;
-        }
-        if(component_confidences!=NULL)
-        {
-            component_confidences->resize(score.size()); // shoub be a user behavior
-            component_confidences = &score;
-        }
-    }
-    void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
-             std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
-        this->run(image,component_rects,component_confidences,component_level);
-    }
-    Ptr<TextRegionDetector> getClassifier()
-    {
-        return this->classifier_;
-    }
-};
-Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
-{
-    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
-}
-Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
-{
-// create a custom preprocessor with rawval
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-// set the mean for the preprocessor
-    Mat textbox_mean(1,3,CV_8U);
-    textbox_mean.at<uchar>(0,0)=104;
-    textbox_mean.at<uchar>(0,1)=117;
-    textbox_mean.at<uchar>(0,2)=123;
-    preprocessor->set_mean(textbox_mean);
-// create a pointer to text box detector(textDetector)
-    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
-    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
-}
-}  } //namespace text namespace cv
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__
+// HAVE OCR Tesseract
+#cmakedefine HAVE_TESSERACT
 #endif