text: cleanup dnn text detection part

951e1827 · Vladislav Sovrasov · c33629e0 · 951e1827 · c33629e0 · c33629e0
Commit 951e1827 authored Oct 05, 2017 by Vladislav Sovrasov
19 changed files
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
 set(the_description "Text Detection and Recognition")
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)

-if(POLICY CMP0023)
-  message(STATUS "Explicitly setting policy CMP0023 to OLD")
-  cmake_policy(SET CMP0023 OLD)
-endif(POLICY CMP0023)
-
-# Using cmake scripts and modules
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
-
-set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
-
-find_package(Caffe)
-if(Caffe_FOUND)
-  message(STATUS "Caffe:   YES")
-  set(HAVE_CAFFE 1)
-else()
-  message(STATUS "Caffe:   NO")
-#  list(APPEND TEXT_DEPS opencv_dnn)
-endif()
-
-#internal dependencies
-find_package(Protobuf)
-if(Protobuf_FOUND)
-  message(STATUS "Protobuf:   YES")
-  set(HAVE_PROTOBUF 1)
-else()
-  message(STATUS "Protobuf:   NO")
-endif()
-
-find_package(Glog)
-if(Glog_FOUND)
-  message(STATUS "Glog:   YES")
-  set(HAVE_GLOG 1)
-else()
-  message(STATUS "Glog:   NO")
-endif()
-
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
-#ocv_define_module(text ${TEXT_DEPS} WRAP python)
-
-#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
-
-find_package(Tesseract)
-if(${Tesseract_FOUND})
+if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
+  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
+  find_package(Tesseract QUIET)
+  if(Tesseract_FOUND)
    message(STATUS "Tesseract:   YES")
-  include_directories(${Tesseract_INCLUDE_DIR})
-  target_link_libraries(opencv_text ${Tesseract_LIBS})
-  add_definitions(-DHAVE_TESSERACT)
-else()
+    set(HAVE_TESSERACT 1)
+    ocv_include_directories(${Tesseract_INCLUDE_DIR})
+    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
+  else()
    message(STATUS "Tesseract:   NO")
  endif()
+endif()

+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
+               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)

-if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
-  include_directories(${Caffe_INCLUDE_DIR})
-  find_package(HDF5 COMPONENTS HL REQUIRED)
-  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
-  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
-  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
-  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
-  include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
-  link_directories(SYSTEM ${CUDA_LIBS})
- # include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
-  #link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
-  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
-  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
-  add_definitions(-DHAVE_CAFFE)
-endif() #HAVE_CAFFE
-
-message(STATUS "TEXT CAFFE SEARCH")
-if()
-  message(STATUS "TEXT NO CAFFE CONFLICT")
-else()
-  message(STATUS "TEXT CAFFE CONFLICT")
-endif()
+ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})

-if(HAVE_opencv_dnn)
-	message(STATUS "dnn module found")
-	add_definitions(-DHAVE_DNN)
-	set(HAVE_DNN 1)
-else()
-	message(STATUS "dnn module not found")
-endif()
+ocv_add_testdata(samples/ contrib/text
+    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
+)
--- a/modules/text/FindCaffe.cmake
+++ b/modules/text/FindCaffe.cmake
-# Caffe package for CNN Triplet training
-unset(Caffe_FOUND)
-
-find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
-  HINTS
-  /usr/local/include)
-
-find_library(Caffe_LIBS NAMES caffe
-  HINTS
-  /usr/local/lib)
-
-if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
-    set(Caffe_FOUND 1)
-endif()
--- a/modules/text/FindGlog.cmake
+++ b/modules/text/FindGlog.cmake
-#Required for Caffe
-unset(Glog_FOUND)
-
-find_library(Glog_LIBS NAMES glog
-  HINTS
-  /usr/local/lib)
-
-if(Glog_LIBS)
-    set(Glog_FOUND 1)
-endif()
--- a/modules/text/FindProtobuf.cmake
+++ b/modules/text/FindProtobuf.cmake
-#Protobuf package required for Caffe
-unset(Protobuf_FOUND)
-
-find_library(Protobuf_LIBS NAMES protobuf
-  HINTS
-  /usr/local/lib)
-
-if(Protobuf_LIBS)
-    set(Protobuf_FOUND 1)
-endif()
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
-# Tesseract OCR
-unset(Tesseract_FOUND)
-
-find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
-  HINTS
-  /usr/include
-  /usr/local/include)
-
-find_library(Tesseract_LIBRARY NAMES tesseract
-  HINTS
-  /usr/lib
-  /usr/local/lib)
-
-find_library(Lept_LIBRARY NAMES lept
-  HINTS
-  /usr/lib
-  /usr/local/lib)
-
-set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
-if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
-    set(Tesseract_FOUND 1)
-endif()
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -56,74 +56,3 @@ Intro
 -----

 The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
-
-Two backends are supported 1) caffe 2) opencv-dnn
-
-
-
-
-Instalation of Caffe backend
----------------------------
-* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
-The caffe wrapping backend has the requirements caffe does.
-* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
-The simplest solution is to build caffe without support for OpenCV.
-* Only the OS supported by Caffe are supported by the backend.
-The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
-Other UNIX systems including OSX should be easy to adapt.
-
-Sample script for building Caffe
-
-```bash
-#!/bin/bash
-SRCROOT="${HOME}/caffe_inst/"
-mkdir -p "$SRCROOT"
-cd "$SRCROOT"
-git clone https://github.com/sghoshcvc/TextBoxes.git
-cd TextBoxes
-cat Makefile.config.example  > Makefile.config
-echo 'USE_OPENCV := 0' >> Makefile.config
-echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
-echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
-
-
-echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
-+++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
-@@ -234,6 +234,7 @@
-
-     template <typename T>
-     friend class Net;
-+    virtual ~Callback(){}
-   };
-   const vector<Callback*>& before_forward() const { return before_forward_; }
-   void add_before_forward(Callback* value) {
-">/tmp/cleanup_caffe.diff
-
-patch < /tmp/cleanup_caffe.diff
-
-
-make -j 6
-
-make pycaffe
-
-make distribute
-```
-
-
-```bash
-#!/bin/bash
-cd $OPENCV_BUILD_DIR #You must set this
-CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
-
-cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules"   ./
-
-
-```
-where $OPECV_CONTRIB is the root directory containing opencv_contrib module
-
-Instalation of Caffe backend
----------------------------
-
-Use of opencv-dnn does not need any additional library.
-
-The recent opencv-3.3.0 needs to be build with extra modules to use text module.
--- a/modules/text/cmake/FindTesseract.cmake
+++ b/modules/text/cmake/FindTesseract.cmake
@@ -5,14 +5,17 @@ endif()
 if(NOT Tesseract_FOUND)
  find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
    HINTS
+    /usr/include
    /usr/local/include)

  find_library(Tesseract_LIBRARY NAMES tesseract
    HINTS
+    /usr/lib
    /usr/local/lib)

  find_library(Lept_LIBRARY NAMES lept
    HINTS
+    /usr/lib
    /usr/local/lib)

  if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY)

--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).

 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>

    @defgroup text_recognize Scene Text Recognition
  @}

--- a/modules/text/include/opencv2/text/erfilter.hpp
+++ b/modules/text/include/opencv2/text/erfilter.hpp
@@ -65,7 +65,6 @@ component tree of the image. :
 */
 struct CV_EXPORTS ERStat
 {
-
 public:
    //! Constructor
    explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);

--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -44,12 +44,10 @@
 #ifndef __OPENCV_TEXT_OCR_HPP__
 #define __OPENCV_TEXT_OCR_HPP__

+#include <opencv2/core.hpp>
+
 #include <vector>
 #include <string>
-#include <iostream>
-#include <sstream>
-
-

 namespace cv
 {
@@ -91,100 +89,61 @@ enum ocr_engine_mode
 };

 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
-
 class CV_EXPORTS_W BaseOCR
 {
- public:
+public:
    virtual ~BaseOCR() {};
-
-    virtual void run(Mat& image, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
-
-    virtual void run(Mat& image, Mat& mask, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
-
-    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
-     * default parameters for all parameters other than the input image.
-     */
-    virtual String run(InputArray image){
-        std::string res;
-        std::vector<Rect> component_rects;
-        std::vector<float> component_confidences;
-        std::vector<std::string> component_texts;
-        Mat inputImage=image.getMat();
-        this->run(inputImage,res,&component_rects,&component_texts,
-                  &component_confidences,OCR_LEVEL_WORD);
-        return res;
-    }
-
 };

-/** @brief OCRTesseract class provides an interface with the tesseract-ocr API
- * (v3.02.02) in C++.
+/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.

 Notice that it is compiled only when tesseract-ocr is correctly installed.

 @note
-   -   (C++) An example of OCRTesseract recognition combined with scene text
-        detection can be found at the end_to_end_recognition demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
-    -   (C++) Another example of OCRTesseract recognition combined with scene
-        text detection can be found at the webcam_demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+   -   (C++) An example of OCRTesseract recognition combined with scene text detection can be found
+        at the end_to_end_recognition demo:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
+    -   (C++) Another example of OCRTesseract recognition combined with scene text detection can be
+        found at the webcam_demo:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
 class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
    /** @brief Recognize text using the tesseract-ocr API.

-    Takes image on input and returns recognized text in the output_text
-    parameter. Optionally provides also the Rects for individual text elements
-    found (e.g. words), and the list of those text elements with their
-    confidence values.
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.

    @param image Input image CV_8UC1 or CV_8UC3
-
    @param output_text Output text of the tesseract-ocr.
-
-    @param component_rects If provided the method will output a list of Rects
-    for the individual text elements found (e.g. words or text lines).
-
-    @param component_texts If provided the method will output a list of text
-    strings for the recognition of individual text elements found (e.g. words or
-    text lines).
-
-    @param component_confidences If provided the method will output a list of
-    confidence values for the recognition of individual text elements found
-    (e.g. words or text lines).
-
-    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
-
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words or text lines).
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words or text lines).
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words or text lines).
+    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
     */
-    using BaseOCR::run;
-    virtual void run (Mat& image, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

-    virtual void run (Mat& image, Mat& mask, std::string& output_text,
-                      std::vector<Rect>* component_rects=NULL,
-                      std::vector<std::string>* component_texts=NULL,
-                      std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

    // aliases for scripting
-    CV_WRAP String run (InputArray image, int min_confidence,
-                        int component_level=0);
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);

-    CV_WRAP String run(InputArray image, InputArray mask,
-                       int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);

    CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;

@@ -205,7 +164,6 @@ public:
     */
    CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
                                    const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);
-
 };


@@ -225,19 +183,19 @@ enum classifier_type

 /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.

-
- * @note
- * -   (C++) An example on using OCRHMMDecoder recognition combined with scene
- *     text detection can be found at the webcam_demo sample:
- *      <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+@note
+   -   (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can
+        be found at the webcam_demo sample:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
-class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
- public:
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
+{
+public:

    /** @brief Callback with the character classifier is made a class.

-    * This way it hides the feature extractor and the classifier itself, so
-    * developers can write their own OCR code.
+    This way it hides the feature extractor and the classifier itself, so developers can write
+    their own OCR code.

    The default character classifier and feature extractor can be loaded using the utility function
    loadOCRHMMClassifierNM and KNN model provided in
@@ -246,120 +204,92 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
    class CV_EXPORTS_W ClassifierCallback
    {
    public:
-
        virtual ~ClassifierCallback() { }
-        /** @brief The character classifier must return a (ranked list of)
-         * class(es) id('s)
-
-         * @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
-         * @param out_class The classifier returns the character class
-         * categorical label, or list of class labels, to which the input image
-         * corresponds.
+        /** @brief The character classifier must return a (ranked list of) class(es) id('s)

-         * @param out_confidence The classifier returns the probability of the
-         * input image corresponding to each classes in out_class.
+        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
+        @param out_class The classifier returns the character class categorical label, or list of
+        class labels, to which the input image corresponds.
+        @param out_confidence The classifier returns the probability of the input image
+        corresponding to each classes in out_class.
         */
-        virtual void eval (InputArray image, std::vector<int>& out_class,
-                           std::vector<double>& out_confidence);
+        virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence);
    };

+public:
    /** @brief Recognize text using HMM.

-    * Takes binary image on input and returns recognized text in the output_text
-    * parameter. Optionally provides also the Rects for individual text elements
-    * found (e.g. words), and the list of those text elements with their
-    * confidence values.
+    Takes binary image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.

-    * @param image Input binary image CV_8UC1 with a single text line (or word).
+    @param image Input binary image CV_8UC1 with a single text line (or word).

-    * @param output_text Output text. Most likely character sequence found by
-    * the HMM decoder.
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.

-    * @param component_rects If provided the method will output a list of Rects
-    * for the individual text elements found (e.g. words).
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).

-    * @param component_texts If provided the method will output a list of text
-    * strings for the recognition of individual text elements found (e.g. words).
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).

-    * @param component_confidences If provided the method will output a list of
-    * confidence values for the recognition of individual text elements found
-    * (e.g. words).
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).

-    * @param component_level Only OCR_LEVEL_WORD is supported.
+    @param component_level Only OCR_LEVEL_WORD is supported.
     */
-    using BaseOCR::run;
-    virtual void run (Mat& image, std::string& output_text,
-                      std::vector<Rect>* component_rects=NULL,
-                      std::vector<std::string>* component_texts=NULL,
-                      std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

    /** @brief Recognize text using HMM.

-    * Takes an image and a mask (where each connected component corresponds to a
-    * segmented character) on input and returns recognized text in the
-    * output_text parameter. Optionally provides also the Rects for individual
-    * text elements found (e.g. words), and the list of those text elements with
-    * their confidence values.
-
-    * @param image Input image CV_8UC1 or CV_8UC3 with a single text line
-    * (or word).
+    Takes an image and a mask (where each connected component corresponds to a segmented character)
+    on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.

-    * @param mask Input binary image CV_8UC1 same size as input image. Each
-    * connected component in mask corresponds to a segmented character in the
-    * input image.
+    @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word).
+    @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image.

-    * @param output_text Output text. Most likely character sequence found by
-    * the HMM decoder.
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.

-    * @param component_rects If provided the method will output a list of Rects
-    * for the individual text elements found (e.g. words).
+    @param component_rects If provided the method will output a list of Rects for the individual
+    text elements found (e.g. words).

-    * @param component_texts If provided the method will output a list of text
-    * strings for the recognition of individual text elements found (e.g. words).
+    @param component_texts If provided the method will output a list of text strings for the
+    recognition of individual text elements found (e.g. words).

-    * @param component_confidences If provided the method will output a list of
-    * confidence values for the recognition of individual text elements found
-    * (e.g. words).
+    @param component_confidences If provided the method will output a list of confidence values
+    for the recognition of individual text elements found (e.g. words).

-    * @param component_level Only OCR_LEVEL_WORD is supported.
+    @param component_level Only OCR_LEVEL_WORD is supported.
     */
-    virtual void run(Mat& image, Mat& mask, std::string& output_text,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL,
-                     std::vector<float>* component_confidences=NULL,
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

    // aliases for scripting
-    CV_WRAP String run(InputArray image,
-                       int min_confidence,
-                       int component_level=0);
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);

-    CV_WRAP String run(InputArray image,
-                       InputArray mask,
-                       int min_confidence,
-                       int component_level=0);
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);

-    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes
-     * HMMDecoder.
+    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.

-     * @param classifier The character classifier with built in feature
-     * extractor.
+    @param classifier The character classifier with built in feature extractor.

-     * @param vocabulary The language vocabulary (chars when ascii english text)
-     * . vocabulary.size() must be equal to the number of classes of the
-     * classifier.
+    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
+    must be equal to the number of classes of the classifier.

-     * @param transition_probabilities_table Table with transition probabilities
-     * between character pairs. cols == rows == vocabulary.size().
+    @param transition_probabilities_table Table with transition probabilities between character
+    pairs. cols == rows == vocabulary.size().

-     * @param emission_probabilities_table Table with observation emission
-     * probabilities. cols == rows == vocabulary.size().
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
+    rows == vocabulary.size().

-     * @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available
-     * for the moment (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
+    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
     */
-
    static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                     const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                       //     size() must be equal to the number of classes
@@ -402,11 +332,9 @@ protected:
    decoder_mode mode;
 };

-/** @brief Allow to implicitly load the default character classifier when
- * creating an OCRHMMDecoder object.
-
- @param filename The XML or YAML file with the classifier model (e.g.OCRHMM_knn_model_data.xml)
+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.

+@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)

 The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
 Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a
@@ -416,16 +344,11 @@ using a KNN model trained with synthetic data of rendered characters with differ
 types.

 @deprecated loadOCRHMMClassifier instead
-
 */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM (
-        const String& filename);

-/** @brief Allow to implicitly load the default character classifier when
- * creating an OCRHMMDecoder object.
-
- @param filename The XML or YAML file with the classifier model (e.g.OCRBeamSearch_CNN_model_data.xml.gz)
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);

+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.

 @param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)

@@ -435,10 +358,8 @@ a linear classifier. It is applied to the input image in a sliding window fashio
 at each window location.

 @deprecated use loadOCRHMMClassifier instead
-
 */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
-        const String& filename);
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);

 /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.

@@ -450,63 +371,48 @@ CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
 //! @}

-
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
 *
 * @param vocabulary The language vocabulary (chars when ASCII English text).
 *
 * @param lexicon The list of words that are expected to be found in a particular image.
-
- * @param transition_probabilities_table Output table with transition
- * probabilities between character pairs. cols == rows == vocabulary.size().
-
- * The function calculate frequency statistics of character pairs from the given
- * lexicon and fills the output transition_probabilities_table with them. The
- * transition_probabilities_table can be used as input in the
- * OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+ *
+ * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+ *
+ * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
 * @note
- *    -   (C++) An alternative would be to load the default generic language
- *        transition table provided in the text module samples folder (created
- *        from ispell 42869 english words list) :
- *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+ *            <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
 **/
-CV_EXPORTS void createOCRHMMTransitionsTable (
-        std::string& vocabulary, std::vector<std::string>& lexicon,
-        OutputArray transition_probabilities_table);
+CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
+
+CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);

-CV_EXPORTS_W Mat createOCRHMMTransitionsTable (
-        const String& vocabulary, std::vector<cv::String>& lexicon);

 /* OCR BeamSearch Decoder */

-/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam
- * Search algorithm.
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.

 @note
-   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with
-        scene text detection can be found at the demo sample:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
+        be found at the demo sample:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
 */
-
-
-/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */
-class TextImageClassifier;
-
-class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
-
- public:
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
+{
+public:

    /** @brief Callback with the character classifier is made a class.

-     * This way it hides the feature extractor and the classifier itself, so
-     * developers can write their own OCR code.
+    This way it hides the feature extractor and the classifier itself, so developers can write
+    their own OCR code.

-     * The default character classifier and feature extractor can be loaded
-     * using the utility funtion loadOCRBeamSearchClassifierCNN with all its
-     * parameters provided in
-     * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+    The default character classifier and feature extractor can be loaded using the utility funtion
+    loadOCRBeamSearchClassifierCNN with all its parameters provided in
+    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
     */
-    class CV_EXPORTS_W ClassifierCallback{
+    class CV_EXPORTS_W ClassifierCallback
+    {
    public:
        virtual ~ClassifierCallback() { }
        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
@@ -519,8 +425,8 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
         */
        virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );

-        virtual int getWindowSize() {return 0;}
-        virtual int getStepSize() {return 0;}
+        int getWindowSize() {return 0;}
+        int getStepSize() {return 0;}
    };

 public:
@@ -545,7 +451,6 @@ public:

    @param component_level Only OCR_LEVEL_WORD is supported.
     */
-    using BaseOCR::run;
    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);
@@ -577,7 +482,6 @@ public:

    @param beam_size Size of the beam in Beam Search algorithm.
     */
-
    static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                     const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                       //     size() must be equal to the number of classes
@@ -598,29 +502,10 @@ public:
                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                     int beam_size = 500);                              // Size of the beam in Beam Search algorithm

-
-
-
-
    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.

    @overload

-    @param filename path to a character classifier file
-
-    @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size()
-    must be equal to the number of classes of the classifier..
-
-    @param transition_probabilities_table Table with transition probabilities between character
-    pairs. cols == rows == vocabulary.size().
-
-    @param emission_probabilities_table Table with observation emission probabilities. cols ==
-    rows == vocabulary.size().
-
-    @param mode HMM Decoding algorithm (only Viterbi for the moment)
-
-    @param beam_size Size of the beam in Beam Search algorithm
-
     */
    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
                                     const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
@@ -631,7 +516,6 @@ public:
                                                                                       //     cols == rows == vocabulary.size()
                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                     int beam_size = 500);
-
 protected:

    Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -656,402 +540,6 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas

 //! @}

-
-//Classifiers should provide diferent backends
-
-enum{
-    OCR_HOLISTIC_BACKEND_NONE, //No back end
-    OCR_HOLISTIC_BACKEND_DNN, // dnn backend opencv_dnn
-    OCR_HOLISTIC_BACKEND_CAFFE, // caffe based backend
-    OCR_HOLISTIC_BACKEND_DEFAULT // to store default value based on environment
-};
-
-class TextImageClassifier;
-
-/**
- * @brief The ImagePreprocessor class
- */
-class CV_EXPORTS_W ImagePreprocessor{
-protected:
-    virtual void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels)=0;
-    virtual void set_mean_(Mat){}
-
-public:
-    virtual ~ImagePreprocessor(){}
-
-    /** @brief this method in provides public acces to the preprocessing with respect to a specific
-     * classifier
-     *
-     * This method's main use would be to use the preprocessor without feeding it to a classifier.
-     * Determining the exact behavior of a preprocessor is the main motivation for this.
-     *
-     * @param input an image without any constraints
-     *
-     * @param output in most cases an image of fixed depth size and whitened
-     *
-     * @param sz the size to which the image would be resize if the preprocessor resizes inputs
-     *
-     * @param outputChannels the number of channels for the output image
-     */
-    CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels);
-
-    /** @brief this method in provides public acces to set the mean of the input images
-     * mean can be a mat either of same size of the image or one value per color channel
-     * A preprocessor can be created without the mean( the pre processor will calculate mean for every image
-     * in that case
-     *
-
-     * @param mean which will be subtracted from the images
-     *
-     */
-
-    CV_WRAP void set_mean(Mat mean);
-
-    /** @brief Creates a functor that only resizes and changes the channels of the input
-     *  without further processing.
-     *
-     * @return shared pointer to the generated preprocessor
-     */
-    CV_WRAP static Ptr<ImagePreprocessor> createResizer();
-
-    /** @brief
-     *
-     * @param sigma
-     *
-     * @return shared pointer to generated preprocessor
-     */
-    CV_WRAP static Ptr<ImagePreprocessor> createImageStandarizer(double sigma);
-
-    /** @brief
-     *
-     * @return shared pointer to generated preprocessor
-     */
-    CV_WRAP static Ptr<ImagePreprocessor> createImageMeanSubtractor(InputArray meanImg);
-    /** @brief
-     * create a functor with the parameters, parameters can be changes by corresponding set functions
-     * @return shared pointer to generated preprocessor
-     */
-
-    CV_WRAP static Ptr<ImagePreprocessor>createImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR");
-
-    friend class TextImageClassifier;
-
-};
-
-/** @brief Abstract class that implements the classifcation of text images.
- *
- * The interface is generic enough to describe any image classifier. And allows
- * to take advantage of compouting in batches. While word classifiers are the default
- * networks, any image classifers should work.
- *
- */
-class CV_EXPORTS_W TextImageClassifier
-{
-protected:
-    Size inputGeometry_;
-    Size outputGeometry_;
-    int channelCount_;
-    Ptr<ImagePreprocessor> preprocessor_;
-    /** @brief all image preprocessing is handled here including whitening etc.
-     *
-         *  @param input the image to be preprocessed for the classifier. If the depth
-     * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
-     *
-     * @param output reference to the image to be fed to the classifier, the preprocessor will
-     * resize the image to the apropriate size and convert it to the apropriate depth\
-     *
-     * The method preprocess should never be used externally, it is up to classify and classifyBatch
-     * methods to employ it.
-     */
-    virtual void preprocess(const Mat& input,Mat& output);
-public:
-    virtual ~TextImageClassifier() {}
-
-    /** @brief
-     */
-    CV_WRAP virtual void setPreprocessor(Ptr<ImagePreprocessor> ptr);
-
-    /** @brief
-     */
-    CV_WRAP Ptr<ImagePreprocessor> getPreprocessor();
-
-    /** @brief produces a class confidence row-vector given an image
-     */
-    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
-
-    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
-     */
-    CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0;
-
-    /** @brief simple getter method returning the number of channels each input sample has
-     */
-    CV_WRAP virtual int getInputChannelCount(){return this->channelCount_;}
-
-    /** @brief simple getter method returning the size of the input sample
-     */
-    CV_WRAP virtual Size getInputSize(){return this->inputGeometry_;}
-
-    /** @brief simple getter method returning the size of the oputput row-vector
-     */
-    CV_WRAP virtual int getOutputSize()=0;
-    /** @brief simple getter method returning the shape of the oputput from caffe
-     */
-    CV_WRAP virtual Size getOutputGeometry()=0;
-
-    /** @brief simple getter method returning the size of the minibatches for this classifier.
-     * If not applicabe this method should return 1
-     */
-    CV_WRAP virtual int getMinibatchSize()=0;
-
-    friend class ImagePreprocessor;
-};
-
-
-
-class CV_EXPORTS_W DeepCNN:public TextImageClassifier
-{
-    /** @brief Class that uses a pretrained caffe model for word classification.
-     *
-     * This network is described in detail in:
-     * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
-     * http://arxiv.org/abs/1412.1842
-     */
-public:
-    virtual ~DeepCNN() {};
-
-    /** @brief Constructs a DeepCNN object from a caffe pretrained model
-     *
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
-     * very large, up to 2GB.
-     *
-     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
-     *
-     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
-     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-
-    /** @brief Constructs a DeepCNN intended to be used for word spotting.
-     *
-     * This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a
-     * deviation of 113. The architecture file can be downloaded from:
-     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
-     * While the weights can be downloaded from:
-     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
-     * The words assigned to the network outputs are available at:
-     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
-     *
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model. When employing
-     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. This file can be very large, the
-     * pretrained DictNet uses 2GB.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-
-};
-
-namespace cnn_config{
-
-/** @brief runtime backend information
- *
- * this function finds the status of backends compiled with this module
- *
- * @return a list of backends (caffe,opencv-dnn etc.)
- * */
-CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
-
-namespace caffe_backend{
-
-/** @brief Prompts Caffe on the computation device beeing used
- *
- * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
- * global behavior. This function queries the current state of caffe.
- * If the module is built without caffe, this method throws an exception.
- *
- * @return true if caffe is computing on the GPU, false if caffe is computing on the CPU
- */
-CV_EXPORTS_W bool getCaffeGpuMode();
-
-/** @brief Sets the computation device beeing used by Caffe
- *
- * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
- * global behavior. This function queries the current state of caffe.
- * If the module is built without caffe, this method throws an exception.
- *
- * @param useGpu  set to true for caffe to be computing on the GPU, false if caffe is
- * computing on the CPU
- */
-CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
-
-/** @brief Provides runtime information on whether Caffe support was compiled in.
- *
- * The text module API is the same regardless of whether CAffe was available or not
- * During compilation. When methods that require Caffe are invocked while Caffe support
- * is not compiled in, exceptions are thrown. This method allows to test whether the
- * text module was built with caffe during runtime.
- *
- * @return true if Caffe support for the the text module was provided during compilation,
- * false if Caffe was unavailable.
- */
-CV_EXPORTS_W bool getCaffeAvailable();
-
-}//caffe
-namespace dnn_backend {
-
-/** @brief Provides runtime information on whether DNN module was compiled in.
- *
- * The text module API is the same regardless of whether DNN module was available or not
- * During compilation. When methods that require backend are invocked while no backend support
- * is compiled, exceptions are thrown. This method allows to test whether the
- * text module was built with dnn_backend during runtime.
- *
- * @return true if opencv_dnn support for the the text module was provided during compilation,
- * false if opencv_dnn was unavailable.
- */
-CV_EXPORTS_W bool getDNNAvailable();
-
-}//dnn_backend
-}//cnn_config
-
-/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
- * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
- * word given an input image.
- *
- * This class implements the logic of providing transcriptions given a vocabulary and and an image
- * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
- * class was built is the DictNet. In order to load it the following files should be downloaded:
-
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
- * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
- */
-class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
-{
-public:
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
-
-    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
-
-    @param image Input image CV_8UC1 or CV_8UC3
-
-    @param mask is totally ignored and is only available for compatibillity reasons
-
-    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
-
-    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
-
-    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
-
-    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
-        be put in the vector.
-
-    @param component_level must be OCR_LEVEL_WORD.
-     */
-
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-
-    /**
-    @brief Method that provides a quick and simple interface to a single word image classifcation
-
-    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
-
-    @param transcription an opencv string that will store the detected word transcription
-
-    @param confidence a double that will be updated with the confidence the classifier has for the selected word
-    */
-    CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0;
-
-    /**
-    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
-    the classifiers parallel capabilities.
-
-    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
-    to contain a single word.
-
-    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
-    input image
-
-    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
-    selected words.
-    */
-    CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
-
-
-    /**
-    @brief simple getter for the vocabulary employed
-    */
-    CV_WRAP virtual const std::vector<String>& getVocabulary()=0;
-
-    /** @brief simple getter for the preprocessing functor
-     */
-    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
-
-    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
-
-    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
-
-    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
-    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
-    of the classifier.
-     */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename);
-
-
-    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
-
-    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
-
-    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
-
-    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
-    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
-    of the classifier.
-    */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename);
-
-    /** @brief
-     *
-     * @param classifierPtr
-     *
-     * @param vocabulary
-     */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
-
-    /** @brief
-     *
-     * @param modelArchFilename
-     *
-     * @param modelWeightsFilename
-     *
-     * @param vocabulary
-     */
-    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
-};
-
-
-}//namespace text
-}//namespace cv
-
-
+}
+}
 #endif // _OPENCV_TEXT_OCR_HPP_
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
-/*M//////////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.

 #ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
 #define __OPENCV_TEXT_TEXTDETECTOR_HPP__

-#include <vector>
-#include <string>
-#include <iostream>
-#include <sstream>
 #include"ocr.hpp"

-
 namespace cv
 {
 namespace text
@@ -59,208 +15,44 @@ namespace text
 //! @addtogroup text_detect
 //! @{

-
-
-//base class BaseDetector declares a common API that would be used in a typical text
-//detection scenario
-class CV_EXPORTS_W BaseDetector
-{
-public:
-    virtual ~BaseDetector() {};
-
-    virtual void run(Mat& image,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=0) = 0;
-
-    virtual void run(Mat& image, Mat& mask,
-                     std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=0) = 0;
-
-};
-/** A virtual class for different models of text detection (including CNN based deep models)
+/** @brief An abstract class providing interface for text detection algorithms
 */
-
-class CV_EXPORTS_W TextRegionDetector
+class CV_EXPORTS_W TextDetector
 {
-protected:
-    /** Stores input and output size
-     */
-    //netGeometry inputGeometry_;
-    //netGeometry outputGeometry_;
-    Size inputGeometry_;
-    Size outputGeometry_;
-    int inputChannelCount_;
-    int outputChannelCount_;
-
 public:
-    virtual ~TextRegionDetector() {}
-
-    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
-     */
-    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
-
-
-    /** @brief simple getter method returning the size (height, width) of the input sample
-     */
-    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
+    /**
+    @brief Method that provides a quick and simple interface to detect text inside an image

-    /** @brief simple getter method returning the shape of the oputput
-     *   Any text detector should output a number of text regions alongwith a score of text-ness
-     *   From the shape it can be inferred the number of text regions and number of returned value
-     *   for each region
+    @param inputImage an image to process
+    @param Bbox a vector of Rect that will store the detected word bounding box
+    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
    */
-    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
-
-
-
+    virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
+    virtual ~TextDetector() {}
 };

-/** Generic structure of Deep CNN based Text Detectors
- * */
-class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
-{
-    /** @brief Class that uses a pretrained caffe model for text detection.
-     * Any text detection should
-     * This network is described in detail in:
-     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
-     * https://arxiv.org/abs/1611.06779
-     */
-protected:
-    /** all deep CNN based text detectors have a preprocessor (normally)
-         */
-    Ptr<ImagePreprocessor> preprocessor_;
-    /** @brief all image preprocessing is handled here including whitening etc.
-         *
-         *  @param input the image to be preprocessed for the classifier. If the depth
-         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
-         *
-         * @param output reference to the image to be fed to the classifier, the preprocessor will
-         * resize the image to the apropriate size and convert it to the apropriate depth\
-         *
-         * The method preprocess should never be used externally, it is up to classify and classifyBatch
-         * methods to employ it.
-         */
-    virtual void preprocess(const Mat& input,Mat& output);
-public:
-    virtual ~DeepCNNTextDetector() {};
-
-    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
-     *
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
-     *
-     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
-     *
-     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
-     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
+/** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
+ * A TextDetectorCNN is employed to find bounding boxes of text words given an input image.
 */
-    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-
-    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
-     *
-     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
-     * The architecture and models weights can be downloaded from:
-     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
-
-     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
-     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
-     *
-     * @param weightsFilename is the path to the pretrained weights of the model. When employing
-     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
-     *
-     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
-     * the only option
-     */
-    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
-    friend class ImagePreprocessor;
-
-};
-
-/** @brief textDetector class provides the functionallity of text bounding box detection.
- * A TextRegionDetector is employed to find bounding boxes of text
- * words given an input image.
- *
- * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
- * The TextRegionDetector can be any text detector
- *
- */
-
-class CV_EXPORTS_W textDetector : public BaseDetector
+class CV_EXPORTS_W TextDetectorCNN : public TextDetector
 {
 public:
-    virtual void run(Mat& image,  std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-    /** @brief detect text with a cnn, input is one image with (multiple) ocuurance of text.
-
-    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
-
-    @param image Input image CV_8UC1 or CV_8UC3
-
-    @param mask is totally ignored and is only available for compatibillity reasons
-
-
-    @param component_rects a vector of Rects, each rect is one text bounding box.
-
-
-
-    @param component_confidences A vector of float returns confidence of text bounding boxes
-
-    @param component_level must be OCR_LEVEL_WORD.
-     */
-
-    virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
-                     std::vector<float>* component_confidences=NULL,
-                     int component_level=OCR_LEVEL_WORD)=0;
-
-
    /**
-    @brief Method that provides a quick and simple interface to detect text inside an image
+    @overload

    @param inputImage an image expected to be a CV_U8C3 of any size
-
    @param Bbox a vector of Rect that will store the detected word bounding box
-
    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
    */
-    CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
-
-
-
-
-    /** @brief simple getter for the preprocessing functor
-     */
-    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
-
-    /** @brief Creates an instance of the textDetector class.
-
-    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
-
-
-     */
-    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
-
+    CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;

    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.

    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
-
    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
-
-
+    @param detectMultiscale if true, multiple scales of the input image will be used as network input
    */
-    CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
-
-
+    CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false);
 };

 //! @}

--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
 # -*- coding: utf-8 -*-
-"""
-Created on Wed Jul 19 17:54:00 2017
-
-@author: sgnosh
-"""
-
 #!/usr/bin/python
-
 import sys
 import os
-
 import cv2
 import numpy as np

-print('\nDeeptextdetection.py')
-print('       A demo script of text box alogorithm of the paper:')
-print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
+def main():
+    print('\nDeeptextdetection.py')
+    print('       A demo script of text box alogorithm of the paper:')
+    print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')

-
-if (len(sys.argv) < 2):
+    if (len(sys.argv) < 2):
        print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
        quit()
-#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
-#        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
-#
-#        quit()
-# check model and architecture file existance
-if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
+
+    if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
        print " Model files not found in current directory. Aborting"
        print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
        quit()
-cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
-pathname = os.path.dirname(sys.argv[0])
-
-
-img      = cv2.imread(str(sys.argv[1]))
-textSpotter=cv2.text.textDetector_create(
-                "textbox_deploy.prototxt","textbox.caffemodel")
-rects,outProbs = textSpotter.textDetectInImage(img);
-# for visualization
-vis      = img.copy()
-# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
-thres = 0.6

+    img = cv2.imread(str(sys.argv[1]))
+    textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
+    rects, outProbs = textSpotter.textDetectInImage(img);
+    vis = img.copy()
+    thres = 0.6

-  #Visualization
-for r in range(0,np.shape(rects)[0]):
-    if outProbs[r] >thres:
+    for r in range(np.shape(rects)[0]):
+        if outProbs[r] > thres:
            rect = rects[r]
-        cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2)
-       # cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
+            cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2)

+    cv2.imshow("Text detection result", vis)
+    cv2.waitKey()

-#Visualization
-cv2.imshow("Text detection result", vis)
-cv2.waitKey(0)
\ No newline at end of file
+if __name__ == "__main__":
+    main()
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
-/*
- * dictnet_demo.cpp
- *
- * Demonstrates simple use of the holistic word classifier in C++
- *
- * Created on: June 26, 2016
- *     Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
- */
-
-#include  "opencv2/text.hpp"
-#include  "opencv2/highgui.hpp"
-#include  "opencv2/imgproc.hpp"
+#include <opencv2/text.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>

 #include  <sstream>
-#include  <vector>
 #include  <iostream>
-#include  <iomanip>
 #include  <fstream>

-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres);
-inline std::string getHelpStr(std::string progFname){
-    std::stringstream out;
-    out << "    Demo of text detection CNN for text detection." << std::endl;
-    out << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
-
-    out << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl;
-    out << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
-    out << "      must be in the current directory." << std::endl << std::endl;
+using namespace cv;

-    out << "    Obtaining Caffe Model files in linux shell:"<<std::endl;
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl;
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl;
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl;
+namespace
+{
+std::string getHelpStr(std::string progFname)
+{
+    std::stringstream out;
+    out << "    Demo of text detection CNN for text detection." << std::endl
+        << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
+        << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
+        << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
+        << "      must be in the current directory." << std::endl
+        << "    These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
    return out.str();
 }

-inline bool fileExists (std::string filename) {
+bool fileExists (std::string filename)
+{
    std::ifstream f(filename.c_str());
    return f.good();
 }
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
+
+void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
 {
-    for (int i=0;i<(int)groups.size(); i++)
+    for (size_t i = 0; i < groups.size(); i++)
    {
-        if(probs[i]>thres)
+        if(probs[i] > thres)
        {
            if (src.type() == CV_8UC3)
            {
-                cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 );
-                cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 ));
+                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
+                String label = format("%.2f", probs[i]);
+                std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
+                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
            }
            else
-                rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 );
+                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
        }
    }
 }

+}

-int main(int argc, const char * argv[]){
-    if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
-        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
-        //exit(1);
-    }
-    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
-    std::cout << "The Following backends are available" << "\n";
-    for (int i=0;i<backends.size();i++)
-       std::cout << backends[i] << "\n";
-
-   // printf("%s",x);
-    //set to true if you have a GPU with more than 3GB
-     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
-    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
-
-    if (argc < 3){
-        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
+int main(int argc, const char * argv[])
+{
+    if (argc < 2)
+    {
+        std::cout << getHelpStr(argv[0]);
+        std::cout << "Insufiecient parameters. Aborting!" << std::endl;
        exit(1);
    }

    if (!fileExists("textbox.caffemodel") ||
-            !fileExists("textbox_deploy.prototxt")){
-           // !fileExists("dictnet_vgg_labels.txt"))
-
-        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
-        exit(1);
-    }
-
-    if (fileExists(argv[1])){
+            !fileExists("textbox_deploy.prototxt"))
+    {
        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Output file must not exist. Aborting!"<<std::endl;
+        std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
        exit(1);
    }

-    cv::Mat image;
-    image = cv::imread(cv::String(argv[2]));
-
+    Mat image = imread(String(argv[1]), IMREAD_COLOR);

-    std::cout<<"Starting Text Box Demo"<<std::endl;
-    cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create(
-                "textbox_deploy.prototxt","textbox.caffemodel");
+    std::cout << "Starting Text Box Demo" << std::endl;
+    Ptr<text::TextDetectorCNN> textSpotter =
+            text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);

-    //cv::Ptr<cv::text::textDetector> wordSpotter=
-      //      cv::text::textDetector::create(cnn);
-    std::cout<<"Created Text Spotter with text Boxes";
-
-    std::vector<cv::Rect> bbox;
+    std::vector<Rect> bbox;
    std::vector<float> outProbabillities;
-    textSpotter->textDetectInImage(image,bbox,outProbabillities);
-   // textbox_draw(image, bbox,outProbabillities);
-    float thres =0.6f;
-    std::vector<cv::Mat> imageList;
-    for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
-        if(outProbabillities[imageIdx]>thres){
-            imageList.push_back(image(bbox.at(imageIdx)));
-        }
-
-    }
-    // call dict net here for all detected parts
-    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
-
-    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
-            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
-
-    std::vector<cv::String> wordList;
-    std::vector<double> wordProbabillities;
-    wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
-    // write the output in file
-    std::ofstream out;
-    out.open(argv[1]);
-
-
-    for (int i=0;i<(int)wordList.size(); i++)
-    {
-        cv::Point tl_ = bbox.at(i).tl();
-        cv::Point br_ = bbox.at(i).br();
-
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
-
-    }
-    out.close();
-    textbox_draw(image, bbox,outProbabillities,wordList);
+    textSpotter->textDetectInImage(image, bbox, outProbabillities);

+    textbox_draw(image, bbox, outProbabillities, 0.5f);

-    cv::imshow("TextBox Demo",image);
+    imshow("TextBox Demo",image);
    std::cout << "Done!" << std::endl << std::endl;
    std::cout << "Press any key to exit." << std::endl << std::endl;
-    if ((cv::waitKey()&0xff) == ' ')
+    waitKey();
    return 0;
 }
--- a/modules/text/src/image_preprocessor.cpp
+++ b/modules/text/src/image_preprocessor.cpp
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include  "opencv2/highgui.hpp"
-#include "opencv2/core.hpp"
-
-
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace cv { namespace text {
-//************************************************************************************
-//******************   ImagePreprocessor   *******************************************
-//************************************************************************************
-
-void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
-    Mat inpImg=input.getMat();
-    Mat outImg;
-    this->preprocess_(inpImg,outImg,sz,outputChannels);
-    outImg.copyTo(output);
-}
-void ImagePreprocessor::set_mean(Mat mean){
-
-
-    this->set_mean_(mean);
-
-}
-
-
-
-class ResizerPreprocessor: public ImagePreprocessor{
-protected:
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1){
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U){
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-    }
-    //void set_mean_(Mat m){}
-public:
-    ResizerPreprocessor(){}
-    ~ResizerPreprocessor(){}
-};
-
-class StandarizerPreprocessor: public ImagePreprocessor{
-protected:
-    double sigma_;
-    //void set_mean_(Mat M){}
-
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-
-        Scalar mean,dev;
-        meanStdDev(output,mean,dev);
-        subtract(output,mean[0],output);
-        divide(output,(dev[0]/sigma_),output);
-    }
-public:
-    StandarizerPreprocessor(double sigma):sigma_(sigma){}
-    ~StandarizerPreprocessor(){}
-
-};
-
-class customPreprocessor:public ImagePreprocessor{
-protected:
-
-    double rawval_;
-    Mat mean_;
-    String channel_order_;
-
-    void set_mean_(Mat imMean_){
-
-        imMean_.copyTo(this->mean_);
-
-
-    }
-
-    void set_raw_scale(int rawval){
-        rawval_ = rawval;
-
-    }
-    void set_channels(String channel_order){
-        channel_order_=channel_order;
-    }
-
-
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        tmpInput.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        tmpInput.convertTo(output, CV_32FC1);
-                    else
-                        tmpInput.convertTo(output, CV_32FC1,rawval_);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        tmpInput.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        tmpInput.convertTo(output, CV_32FC1);
-                    else
-                        tmpInput.convertTo(output, CV_32FC1,rawval_);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        input.convertTo(output,CV_32FC1,1/255.0);
-                    else
-                        input.convertTo(output,CV_32FC1);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        input.convertTo(output, CV_32FC1);
-                    else
-                        input.convertTo(output, CV_32FC1,rawval_);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    if (rawval_ == 1)
-                        input.convertTo(output,CV_32FC3,1/255.0);
-                    else
-                        input.convertTo(output,CV_32FC3);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    if (rawval_ ==1)
-                        input.convertTo(output, CV_32FC3);
-                    else
-                        input.convertTo(output, CV_32FC3,rawval_);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-
-        if (!this->mean_.empty()){
-
-            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
-            subtract(output,mean_s,output);
-        }
-        else{
-            Scalar mean_s;
-            mean_s = mean(output);
-            subtract(output,mean_s,output);
-        }
-
-    }
-
-public:
-    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
-    ~customPreprocessor(){}
-
-};
-
-class MeanSubtractorPreprocessor: public ImagePreprocessor{
-protected:
-    Mat mean_;
-    //void set_mean_(Mat m){}
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        subtract(output,this->mean_,output);
-    }
-public:
-    MeanSubtractorPreprocessor(Mat mean)
-    {
-        mean.copyTo(this->mean_);
-    }
-
-    ~MeanSubtractorPreprocessor(){}
-};
-
-
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
-{
-    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
-{
-    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
-}
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
-{
-
-    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
-{
-    Mat tmp=meanImg.getMat();
-    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
-}
-}
-}
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include  "opencv2/highgui.hpp"
-#include "opencv2/core.hpp"
-
-
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-
-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
-
-#ifdef HAVE_DNN
-#include "opencv2/dnn.hpp"
-#endif
-
-using namespace cv;
-using namespace cv::dnn;
-using namespace std;
-namespace cv { namespace text {
-
-//Maybe OpenCV has a routine better suited
-inline bool fileExists (String filename) {
-    std::ifstream f(filename.c_str());
-    return f.good();
-}
-
-
-
-//************************************************************************************
-//******************   TextImageClassifier   *****************************************
-//************************************************************************************
-
-void TextImageClassifier::preprocess(const Mat& input,Mat& output)
-{
-    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
-}
-
-void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
-{
-    CV_Assert(!ptr.empty());
-    preprocessor_=ptr;
-}
-
-Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
-{
-    return preprocessor_;
-}
-
-
-class DeepCNNCaffeImpl: public DeepCNN{
-protected:
-    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
-    {
-        //Classifies a list of images containing at most minibatchSz_ images
-        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
-        CV_Assert(outputMat.isContinuous());
-
-
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
-
-        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-        {
-            std::vector<Mat> input_channels;
-            Mat preprocessed;
-            // if the image have multiple color channels the input layer should be populated accordingly
-            for (int channel=0;channel < this->channelCount_;channel++){
-
-                cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-                input_channels.push_back(netInputWraped);
-                //input_data += width * height;
-                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-
-            }
-            this->preprocess(inputImageList[imgNum],preprocessed);
-            split(preprocessed, input_channels);
-
-
-        }
-        this->net_->ForwardPrefilled();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
-
-
-        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
-        float*outputMatData=(float*)(outputMat.data);
-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
-
-#endif
-    }
-
-#ifdef HAVE_CAFFE
-    Ptr<caffe::Net<float> > net_;
-#endif
-    //Size inputGeometry_;//=Size(100,32);
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    int outputSize_;
-    //Size outputGeometry_;
-public:
-    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
-        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
-        channelCount_=dn.channelCount_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
-    {
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->channelCount_=dn.channelCount_;
-        this->minibatchSz_=dn.minibatchSz_;
-        this->outputSize_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_CAFFE
-        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-        CV_Assert(net_->num_inputs()==1);
-        CV_Assert(net_->num_outputs()==1);
-        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-                ||this->net_->input_blobs()[0]->channels()==3);
-        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-
-        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-
-        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-
-        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = inputLayer->channels();
-
-        inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        net_->Reshape();
-        this->outputSize_=net_->output_blobs()[0]->channels();
-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-
-
-
-
-
-#else
-        CV_Error(Error::StsError,"Caffe not available during compilation!");
-#endif
-    }
-
-    void classify(InputArray image, OutputArray classProbabilities)
-    {
-        std::vector<Mat> inputImageList;
-        inputImageList.push_back(image.getMat());
-        classifyBatch(inputImageList,classProbabilities);
-    }
-
-    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
-    {
-        std::vector<Mat> allImageVector;
-        inputImageList.getMatVector(allImageVector);
-        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
-
-        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
-        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
-        Mat outputMat = classProbabilities.getMat();
-        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
-        {
-            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
-            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
-            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
-            std::vector<Mat> minibatchInput(from,to);
-            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
-
-        }
-
-    }
-
-    int getOutputSize()
-    {
-        return this->outputSize_;
-    }
-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_CAFFE;
-    }
-};
-
-class DeepCNNOpenCvDNNImpl: public DeepCNN{
-protected:
-
-    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
-    {
-        //Classifies a list of images containing at most minibatchSz_ images
-        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
-        CV_Assert(outputMat.isContinuous());
-
-#ifdef HAVE_DNN
-
-        std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
-
-        Mat preprocessed;
-        // preprocesses each image in the inputImageList and push to preprocessedImList
-        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-        {
-            this->preprocess(inputImageList[imgNum],preprocessed);
-            preProcessedImList.push_back(preprocessed);
-        }
-        // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
-
-        float*outputMatData=(float*)(outputMat.data);
-       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
-       Mat outputNet = this->net_->forward();
-       outputNet = outputNet.reshape(1, 1);
-
-       float*outputNetData=(float*)(outputNet.data);
-
-       memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
-
-#endif
-    }
-
-#ifdef HAVE_DNN
-    Ptr<Net> net_;
-#endif
-    // hard coding input image size. anything in DNN library to get that from prototxt??
-   // Size inputGeometry_;//=Size(100,32);
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    int outputSize_;
-    //Size outputGeometry_;//= Size(1,1);
-    //int channelCount_;
-   // int inputChannel_ ;//=1;
-  //  int _inputHeight;
-    //int _inputWidth ;
-    //int _inputChannel ;
-public:
-    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
-        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
-        channelCount_=dn.channelCount_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn)
-    {
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->channelCount_=dn.channelCount_;
-        this->minibatchSz_=dn.minibatchSz_;
-        this->outputSize_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel )
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_DNN
-
-        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
-
-
-
-        if (this->net_.empty())
-        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
-            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
-            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
-        }
-
-
-        this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = inputChannel;//inputLayer->channels();
-
-        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
-        //std::vector<Mat> blobs = outLayer->blobs;
-
-        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
-        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-
-
-
-
-
-
-#else
-        CV_Error(Error::StsError,"DNN module not available during compilation!");
-#endif
-    }
-
-    void classify(InputArray image, OutputArray classProbabilities)
-    {
-        std::vector<Mat> inputImageList;
-        inputImageList.push_back(image.getMat());
-        classifyBatch(inputImageList,classProbabilities);
-    }
-
-    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
-    {
-        std::vector<Mat> allImageVector;
-        inputImageList.getMatVector(allImageVector);
-        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
-
-        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
-        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
-        Mat outputMat = classProbabilities.getMat();
-
-        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
-        {
-            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
-            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
-            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
-            std::vector<Mat> minibatchInput(from,to);
-            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
-
-        }
-
-    }
-
-    int getOutputSize()
-    {
-        return this->outputSize_;
-    }
-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_DNN;
-    }
-};
-
-Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
-{
-    if(preprocessor.empty())
-    {
-        preprocessor=ImagePreprocessor::createResizer();
-    }
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
-#else
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-#endif
-        break;
-
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-        break;
-  case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-
-Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
-{
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
-#else
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-#endif
-        break;
-
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
-        break;
-   case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-namespace cnn_config{
-std::vector<std::string> getAvailableBackends()
-{
-    std::vector<std::string> backends;
-
-#ifdef HAVE_CAFFE
-    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
-
-#endif
-#ifdef HAVE_DNN
-    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
-#endif
-    return backends;
-
-
-}
-
-namespace caffe_backend{
-
-#ifdef HAVE_CAFFE
-
-bool getCaffeGpuMode()
-{
-    return caffe::Caffe::mode()==caffe::Caffe::GPU;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    if(useGpu)
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::GPU);
-    }else
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::CPU);
-    }
-}
-
-bool getCaffeAvailable()
-{
-    return true;
-}
-#else
-
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
-}
-
-bool getCaffeAvailable(){
-    return 0;
-}
-
-#endif
-
-}//namespace caffe
-namespace dnn_backend{
-#ifdef  HAVE_DNN
-
-
-bool getDNNAvailable(){
-    return true;
-}
-#else
-bool getDNNAvailable(){
-    return 0;
-}
-#endif
-}//namspace dnn_backend
-}//namespace cnn_config
-
-class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
-private:
-    struct NetOutput{
-        //Auxiliary structure that handles the logic of getting class ids and probabillities from
-        //the raw outputs of caffe
-        int wordIdx;
-        float probabillity;
-
-        static bool sorter(const NetOutput& o1,const NetOutput& o2)
-        {//used with std::sort to provide the most probable class
-            return o1.probabillity>o2.probabillity;
-        }
-
-        static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res)
-        {
-            res.resize(nbOutputs);
-            for(int k=0;k<nbOutputs;k++)
-            {
-                res[k].wordIdx=k;
-                res[k].probabillity=buffer[k];
-            }
-            std::sort(res.begin(),res.end(),NetOutput::sorter);
-        }
-
-        static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence)
-        {
-            std::vector<NetOutput> tmp;
-            getOutputs(buffer,nbOutputs,tmp);
-            classNum=tmp[0].wordIdx;
-            confidence=tmp[0].probabillity;
-
-        }
-    };
-protected:
-    std::vector<String> labels_;
-    Ptr<TextImageClassifier> classifier_;
-public:
-    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr)
-    {
-        CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
-        std::ifstream labelsFile(vocabularyFilename.c_str());
-        if(!labelsFile)
-        {
-            CV_Error(Error::StsError,"Could not read Labels from file");
-        }
-        std::string line;
-        while (std::getline(labelsFile, line))
-        {
-            labels_.push_back(std::string(line));
-        }
-        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
-    }
-
-    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr)
-    {
-        this->labels_=vocabulary;
-        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
-    }
-
-    void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)
-    {
-        Mat netOutput;
-        this->classifier_->classify(inputImage,netOutput);
-        int classNum;
-        NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence);
-        transcription=this->labels_[classNum];
-    }
-
-    void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec)
-    {
-        Mat netOutput;
-        this->classifier_->classifyBatch(inputImageList,netOutput);
-
-        for(int k=0;k<netOutput.rows;k++)
-        {
-            int classNum;
-            double confidence;
-            NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
-            transcriptionVec.push_back(this->labels_[classNum]);
-            confidenceVec.push_back(confidence);
-        }
-    }
-
-
-    void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
-        double confidence;
-        String transcription;
-        recogniseImage(image,transcription,confidence);
-        output_text=transcription.c_str();
-        if(component_rects!=NULL)
-        {
-            component_rects->resize(1);
-            (*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
-        }
-        if(component_texts!=NULL)
-        {
-            component_texts->resize(1);
-            (*component_texts)[0]=transcription.c_str();
-        }
-        if(component_confidences!=NULL)
-        {
-            component_confidences->resize(1);
-            (*component_confidences)[0]=float(confidence);
-        }
-    }
-
-    void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
-        this->run(image,output_text,component_rects,component_texts,component_confidences,component_level);
-    }
-
-    std::vector<String>& getVocabulary()
-    {
-        return this->labels_;
-    }
-
-    Ptr<TextImageClassifier> getClassifier()
-    {
-        return this->classifier_;
-    }
-};
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename )
-{
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
-}
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename)
-{
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
-}
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary)
-{
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
-}
-
-Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
-    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
-}
-
-
-
-
-
-}  } //namespace text namespace cv
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@@ -45,6 +45,8 @@

 #include "opencv2/text.hpp"

+#include "text_config.hpp"
+
 #ifdef HAVE_TESSERACT
 #if !defined(USE_STD_NAMESPACE)
 #define USE_STD_NAMESPACE

--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/core.hpp"
-
-
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <queue>
-#include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-
-//#ifdef HAVE_CAFFE
-//#include "caffe/caffe.hpp"
-//#endif
-
-namespace cv { namespace text {
-
-
-
-
-class textDetectImpl: public textDetector{
-private:
-    struct NetOutput{
-        //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
-        //the raw outputs of caffe
-        Rect bbox;
-        float probability;
-
-
-        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
-        {
-
-            res.resize(nbrTextBoxes);
-            for(int k=0;k<nbrTextBoxes;k++)
-            {
-                float x_min = buffer[k*nCol+3]*inputShape.width;
-                float y_min = buffer[k*nCol+4]*inputShape.height;
-                float x_max = buffer[k*nCol+5]*inputShape.width;
-                float y_max = buffer[k*nCol +6]*inputShape.height;
-                x_min = x_min<0?0:x_min;
-                y_min = y_min<0?0:y_min;
-                x_max = x_max> inputShape.width?inputShape.width-1:x_max;
-                y_max = y_max > inputShape.height?inputShape.height-1:y_max;
-                float wd = x_max-x_min+1;
-                float ht = y_max-y_min+1;
-
-                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
-
-                res[k].probability=buffer[k*nCol+2];
-            }
-
-        }
-
-
-    };
-protected:
-
-    Ptr<TextRegionDetector> classifier_;
-public:
-    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
-    {
-
-    }
-
-
-
-    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
-    {
-                Mat netOutput;
-                // call the detect function of deepTextCNN class
-                this->classifier_->detect(inputImage,netOutput);
-               // get the output geometry i.e height and width of output blob from caffe
-                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
-                int nbrTextBoxes = OutputGeometry_.height;
-                int nCol = OutputGeometry_.width;
-
-                std::vector<NetOutput> tmp;
-                // the output bounding box needs to be resized by the input height and width
-                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
-                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
-                // put the output in CV_OUT
-
-                for (int k=0;k<nbrTextBoxes;k++)
-                {
-                    Bbox.push_back(tmp[k].bbox);
-                    confidence.push_back(tmp[k].probability);
-                }
-
-     }
-
-
-
-    void run(Mat& image, std::vector<Rect>* component_rects=NULL,
-             std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
-
-        std::vector<Rect> bbox;
-        std::vector<float> score;
-        textDetectInImage(image,bbox,score);
-
-        if(component_rects!=NULL)
-        {
-            component_rects->resize(bbox.size());  // should be a user behavior
-
-            component_rects = &bbox;
-        }
-
-        if(component_confidences!=NULL)
-        {
-            component_confidences->resize(score.size()); // shoub be a user behavior
-
-            component_confidences = &score;
-        }
-    }
-
-    void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
-             std::vector<float>* component_confidences=NULL,
-             int component_level=0)
-    {
-        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
-        this->run(image,component_rects,component_confidences,component_level);
-    }
-
-
-
-    Ptr<TextRegionDetector> getClassifier()
-    {
-        return this->classifier_;
-    }
-};
-
-Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
-{
-    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
-}
-
-Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
-{
-
-// create a custom preprocessor with rawval
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-// set the mean for the preprocessor
-
-    Mat textbox_mean(1,3,CV_8U);
-    textbox_mean.at<uchar>(0,0)=104;
-    textbox_mean.at<uchar>(0,1)=117;
-    textbox_mean.at<uchar>(0,2)=123;
-    preprocessor->set_mean(textbox_mean);
-// create a pointer to text box detector(textDetector)
-    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
-    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
-}
-
-
-
-
-
-
-
-}  } //namespace text namespace cv
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"

-
-
-#include <iostream>
 #include <fstream>
-#include <sstream>
-#include <queue>
 #include <algorithm>
-#include <iosfwd>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-

-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
-
-#ifdef HAVE_DNN
 #include "opencv2/dnn.hpp"
-#endif

 using namespace cv::dnn;

-#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__)
-
-namespace cv { namespace text {
-
-inline bool fileExists (String filename) {
-    std::ifstream f(filename.c_str());
-    return f.good();
-}
+namespace cv
+{
+namespace text
+{

-class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
+class TextDetectorCNNImpl : public TextDetectorCNN
+{
 protected:
+    Net net_;
+    std::vector<Size> sizes_;
+    int inputChannelCount_;
+    bool detectMultiscale_;


-    void process_(Mat inputImage, Mat &outputMat)
+    void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,
+                               std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape)
    {
-        // do forward pass and stores the output in outputMat
-        CV_Assert(outputMat.isContinuous());
-        if (inputImage.channels() != this->inputChannelCount_)
-            CV_WARN("Number of input channel(s) in the model is not same as input");
-
-
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
-
-        std::vector<Mat> input_channels;
-        Mat preprocessed;
-        // if the image have multiple color channels the input layer should be populated accordingly
-        for (int channel=0;channel < this->inputChannelCount_;channel++){
-
-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            input_channels.push_back(netInputWraped);
-            //input_data += width * height;
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
-        this->preprocess(inputImage,preprocessed);
-        split(preprocessed, input_channels);
-
-        //preprocessed.copyTo(netInputWraped);
-
-
-        this->net_->Forward();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-
-
-
-
-        this->outputGeometry_.height = net_->output_blobs()[0]->height();
-        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
-
-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-
-
-
-#endif
-    }
-
-
-#ifdef HAVE_CAFFE
-    Ptr<caffe::Net<float> > net_;
-#endif
-    //Size inputGeometry_;
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    //int outputSize_;
-public:
-    DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn):
-        minibatchSz_(dn.minibatchSz_){
-        outputGeometry_=dn.outputGeometry_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn)
+        for(int k = 0; k < nbrTextBoxes; k++)
        {
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->inputChannelCount_=dn.inputChannelCount_;
-        this->outputChannelCount_ = dn.outputChannelCount_;
-        // this->minibatchSz_=dn.minibatchSz_;
-        //this->outputGeometry_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_CAFFE
-        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-        CV_Assert(net_->num_inputs()==1);
-        CV_Assert(net_->num_outputs()==1);
-        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-                ||this->net_->input_blobs()[0]->channels()==3);
-        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-
-        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-
-        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+            float x_min = buffer[k*nCol + 3]*inputShape.width;
+            float y_min = buffer[k*nCol + 4]*inputShape.height;

-        this->inputGeometry_.height = inputLayer->height();
-        this->inputGeometry_.width = inputLayer->width();
-        this->inputChannelCount_ = inputLayer->channels();
-        //this->inputGeometry_.batchSize =1;
+            float x_max = buffer[k*nCol + 5]*inputShape.width;
+            float y_max = buffer[k*nCol + 6]*inputShape.height;

-        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        net_->Reshape();
-        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-        //this->outputGeometry_.batchSize =1;
-        this->outputGeometry_.height =net_->output_blobs()[0]->height();
-        this->outputGeometry_.width = net_->output_blobs()[0]->width();
+            CV_Assert(x_min < x_max, y_min < y_max);

-#else
-        CV_Error(Error::StsError,"Caffe not available during compilation!");
-#endif
-    }
+            x_min = std::max(0.f, x_min);
+            y_min = std::max(0.f, y_min);

+            x_max = std::min(inputShape.width - 1.f,  x_max);
+            y_max = std::min(inputShape.height - 1.f,  y_max);

-    void detect(InputArray image, OutputArray Bbox_prob)
-    {
-        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
-        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
-        process_(image.getMat(),outputMat);
-        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
-    }
+            int wd = cvRound(x_max - x_min);
+            int ht = cvRound(y_max - y_min);

-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
+            Bbox.push_back(Rect(cvRound(x_min), cvRound(y_min), wd, ht));
+            confidence.push_back(buffer[k*nCol + 2]);
        }
-    Size getinputGeometry()
-    {
-        return this->inputGeometry_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_CAFFE;
-    }
-    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
-    {
-        CV_Assert(!ptr.empty());
-        preprocessor_=ptr;
-    }
-
-    Ptr<ImagePreprocessor> getPreprocessor()
-    {
-        return preprocessor_;
-    }
-};
-
-
-class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
-protected:
-
-
-    void process_(Mat inputImage, Mat &outputMat)
-    {
-        // do forward pass and stores the output in outputMat
-        CV_Assert(outputMat.isContinuous());
-        if (inputImage.channels() != this->inputChannelCount_)
-            CV_WARN("Number of input channel(s) in the model is not same as input");
-
-
-#ifdef HAVE_DNN
-
-        Mat preprocessed;
-        this->preprocess(inputImage,preprocessed);
-
-        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
-
-       Mat outputNet = this->net_->forward( );
-
-       this->outputGeometry_.height = outputNet.size[2];
-       this->outputGeometry_.width = outputNet.size[3];
-       this->outputChannelCount_ = outputNet.size[1];
-
-       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
-       float*outputNetData=(float*)(outputNet.data);
-       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-
-       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-
-
-
-
-#endif
    }

-
-
-#ifdef HAVE_DNN
-    Ptr<Net> net_;
-#endif
-    //Size inputGeometry_;
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    //int outputSize_;
-    //int inputHeight_;
-    //int inputWidth_;
-    //int inputChannel_;
 public:
-    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
-        minibatchSz_(dn.minibatchSz_){
-        outputGeometry_=dn.outputGeometry_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn)
+    TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) :
+        detectMultiscale_(detectMultiscale)
    {
-#ifdef HAVE_DNN
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->inputChannelCount_=dn.inputChannelCount_;
-        this->outputChannelCount_ = dn.outputChannelCount_;
-        // this->minibatchSz_=dn.minibatchSz_;
-        //this->outputGeometry_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        this->outputGeometry_=dn.outputGeometry_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3)
-        :minibatchSz_(maxMinibatchSz)
-    {
-
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_DNN
-        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
+        net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename);
+        CV_Assert(!net_.empty());
+        inputChannelCount_ = 3;
+        sizes_.push_back(Size(700, 700));

-        if (this->net_.empty())
+        if(detectMultiscale_)
        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
-            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
-            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
+            sizes_.push_back(Size(300, 300));
+            sizes_.push_back(Size(700,500));
+            sizes_.push_back(Size(700,300));
+            sizes_.push_back(Size(1600,1600));
        }
-
-        this->inputGeometry_.height =inputHeight;
-        this->inputGeometry_.width = inputWidth ;//inputLayer->width();
-        this->inputChannelCount_ = inputChannel ;//inputLayer->channels();
-
-#else
-        CV_Error(Error::StsError,"DNN module not available during compilation!");
-#endif
-    }
-
-
-    void detect(InputArray image, OutputArray Bbox_prob)
-    {
-        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
-        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
-
-        process_(image.getMat(),outputMat);
-        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
    }

-    Size getOutputGeometry()
-    {
-        return this->outputGeometry_;
-    }
-    Size getinputGeometry()
+    void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
    {
-        return this->inputGeometry_;
-    }
+        CV_Assert(inputImage_.channels() == inputChannelCount_);
+        Mat inputImage = inputImage_.getMat().clone();
+        Bbox.resize(0);
+        confidence.resize(0);

-    int getMinibatchSize()
+        for(size_t i = 0; i < sizes_.size(); i++)
        {
-        return this->minibatchSz_;
+            Size inputGeometry = sizes_[i];
+            net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data");
+            Mat outputNet = net_.forward();
+            int nbrTextBoxes = outputNet.size[2];
+            int nCol = outputNet.size[3];
+            int outputChannelCount = outputNet.size[1];
+            CV_Assert(outputChannelCount == 1);
+            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size());
        }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_DNN;
-    }
-    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
-    {
-        CV_Assert(!ptr.empty());
-        preprocessor_=ptr;
-    }
-
-    Ptr<ImagePreprocessor> getPreprocessor()
-    {
-        return preprocessor_;
     }
 };

-Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale)
 {
-    if(preprocessor.empty())
-    {
-        // create a custom preprocessor with rawval
-        preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-        // set the mean for the preprocessor
-
-        Mat textbox_mean(1,3,CV_8U);
-        textbox_mean.at<uchar>(0,0)=104;
-        textbox_mean.at<uchar>(0,1)=117;
-        textbox_mean.at<uchar>(0,2)=123;
-        preprocessor->set_mean(textbox_mean);
-    }
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
-#else
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-#endif
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-        break;
-
-    case OCR_HOLISTIC_BACKEND_DNN:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
-        break;
-
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-        break;
-    }
-    //return Ptr<DeepCNNTextDetector>();
-
+    return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale);
 }
-
-
-Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd)
-{
-
-    // create a custom preprocessor with rawval
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
-    // set the mean for the preprocessor
-
-    Mat textbox_mean(1,3,CV_8U);
-    textbox_mean.at<uchar>(0,0)=104;
-    textbox_mean.at<uchar>(0,1)=117;
-    textbox_mean.at<uchar>(0,2)=123;
-    preprocessor->set_mean(textbox_mean);
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_DEFAULT:
-
-#ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
-
-#elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
-#else
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-#endif
-        break;
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
-        break;
-    case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
-         break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
-        return Ptr<DeepCNNTextDetector>();
-        break;
-    }
-    //return Ptr<DeepCNNTextDetector>();
-
-}
-
-void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
-{
-    Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width);
-    this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
-}
-
-
-
-}  } //namespace text namespace cv
+} //namespace text
+} //namespace cv
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__

+// HAVE OCR Tesseract
+#cmakedefine HAVE_TESSERACT
+
 #endif