Text detector class and Custom Image processor Class

9ae765a1 · sghoshcvc · fa94c160 · 9ae765a1 · 9ae765a1 · 9ae765a1
Commit 9ae765a1 authored Jun 22, 2017 by sghoshcvc
12 changed files
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
 set(the_description "Text Detection and Recognition")
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python)
+# Using cmake scripts and modules
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})

-if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
-  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-  find_package(Tesseract QUIET)
-  if(Tesseract_FOUND)
+set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
+
+find_package(Caffe)
+if(Caffe_FOUND)
+  message(STATUS "Caffe:   YES")
+  set(HAVE_CAFFE 1)
+else()
+  message(STATUS "Caffe:   NO")
+#  list(APPEND TEXT_DEPS opencv_dnn)
+endif()
+
+#internal dependencies
+find_package(Protobuf)
+if(Protobuf_FOUND)
+  message(STATUS "Protobuf:   YES")
+  set(HAVE_PROTOBUF 1)
+else()
+  message(STATUS "Protobuf:   NO")
+endif()
+
+find_package(Glog)
+if(Glog_FOUND)
+  message(STATUS "Glog:   YES")
+  set(HAVE_GLOG 1)
+else()
+  message(STATUS "Glog:   NO")
+endif()
+
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
+#ocv_define_module(text ${TEXT_DEPS} WRAP python)
+
+#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
+
+find_package(Tesseract)
+if(${Tesseract_FOUND})
  message(STATUS "Tesseract:   YES")
-    set(HAVE_TESSERACT 1)
-    ocv_include_directories(${Tesseract_INCLUDE_DIR})
-    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
-  else()
+  include_directories(${Tesseract_INCLUDE_DIR})
+  target_link_libraries(opencv_text ${Tesseract_LIBS})
+  add_definitions(-DHAVE_TESSERACT)
+else()
  message(STATUS "Tesseract:   NO")
-  endif()
 endif()

-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
-               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)

-ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})

-ocv_add_testdata(samples/ contrib/text
-    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
-)
+
+if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
+  include_directories(${Caffe_INCLUDE_DIR})
+  find_package(HDF5 COMPONENTS HL REQUIRED)
+  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
+  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
+  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+  include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
+  link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
+  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
+  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
+  add_definitions(-DHAVE_CAFFE)
+endif() #HAVE_CAFFE
+
+message(STATUS "TEXT CAFFE SEARCH")
+if()
+  message(STATUS "TEXT NO CAFFE CONFLICT")
+else()
+  message(STATUS "TEXT CAFFE CONFLICT")
+endif()
+
--- a/modules/text/FindCaffe.cmake
+++ b/modules/text/FindCaffe.cmake
+# Caffe package for CNN Triplet training
+unset(Caffe_FOUND)
+
+find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
+  HINTS
+  /usr/local/include)
+
+find_library(Caffe_LIBS NAMES caffe
+  HINTS
+  /usr/local/lib)
+
+if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
+    set(Caffe_FOUND 1)
+endif()
--- a/modules/text/FindGlog.cmake
+++ b/modules/text/FindGlog.cmake
+#Required for Caffe
+unset(Glog_FOUND)
+
+find_library(Glog_LIBS NAMES glog
+  HINTS
+  /usr/local/lib)
+
+if(Glog_LIBS)
+    set(Glog_FOUND 1)
+endif()
--- a/modules/text/FindProtobuf.cmake
+++ b/modules/text/FindProtobuf.cmake
+#Protobuf package required for Caffe
+unset(Protobuf_FOUND)
+
+find_library(Protobuf_LIBS NAMES protobuf
+  HINTS
+  /usr/local/lib)
+
+if(Protobuf_LIBS)
+    set(Protobuf_FOUND 1)
+endif()
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
+# Tesseract OCR
+unset(Tesseract_FOUND)
+
+find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
+  HINTS
+  /usr/include
+  /usr/local/include)
+
+find_library(Tesseract_LIBRARY NAMES tesseract
+  HINTS
+  /usr/lib
+  /usr/local/lib)
+
+find_library(Lept_LIBRARY NAMES lept
+  HINTS
+  /usr/lib
+  /usr/local/lib)
+
+set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
+if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
+    set(Tesseract_FOUND 1)
+endif()
+
+        
--- a/modules/text/README.md
+++ b/modules/text/README.md
@@ -47,3 +47,75 @@ Notes
 2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch.

 3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.
+
+
+Word spotting CNN
+=================
+
+Intro
+-----
+
+A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary.
+Although other backends will be supported, for the moment only the Caffe backend is supported.
+
+
+
+
+Instalation of Caffe backend
+----------------------------
+The caffe wrapping backend has the requirements caffe does.
+* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
+The simplest solution is to build caffe without support for OpenCV.
+* Only the OS supported by Caffe are supported by the backend. 
+The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
+Other UNIX systems including OSX should be easy to adapt.
+
+Sample script for building Caffe
+
+```bash
+#!/bin/bash
+SRCROOT="${HOME}/caffe_inst/"
+mkdir -p "$SRCROOT"
+cd "$SRCROOT"
+git clone https://github.com/BVLC/caffe.git
+cd caffe
+git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475
+git branch 91b09280f5233cafc62954c98ce8bc4c204e7475
+cat Makefile.config.example  > Makefile.config
+echo 'USE_OPENCV := 0' >> Makefile.config
+echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
+echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
+
+
+echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
+@@ -234,6 +234,7 @@
+ 
+     template <typename T>
+     friend class Net;
+    virtual ~Callback(){}
+   };
+   const vector<Callback*>& before_forward() const { return before_forward_; }
+   void add_before_forward(Callback* value) {
+">/tmp/cleanup_caffe.diff
+
+patch < /tmp/cleanup_caffe.diff
+
+
+make -j 6
+
+make pycaffe
+
+make distribute
+```
+
+
+```bash
+#!/bin/bash
+cd $OPENCV_BUILD_DIR #You must set this
+CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
+
+cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
+
+
+```
--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.

 #include "opencv2/text/erfilter.hpp"
 #include "opencv2/text/ocr.hpp"
+#include "opencv2/text/textDetector.hpp"

 /** @defgroup text Scene Text Detection and Recognition

@@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping).

 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>

    @defgroup text_recognize Scene Text Recognition
  @}

--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
+/*M//////////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
+#define __OPENCV_TEXT_TEXTDETECTOR_HPP__
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include"ocr.hpp"
+
+
+namespace cv
+{
+namespace text
+{
+
+//! @addtogroup text_recognize
+//! @{
+
+
+
+//base class BaseDetector declares a common API that would be used in a typical text
+//recognition scenario
+class CV_EXPORTS_W BaseDetector
+{
+ public:
+    virtual ~BaseDetector() {};
+
+    virtual void run(Mat& image,
+                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
+
+    virtual void run(Mat& image, Mat& mask,
+                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
+
+    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
+     * default parameters for all parameters other than the input image.
+     */
+//    virtual std::vector<Rect>* run(InputArray image){
+//        //std::string res;
+//        std::vector<Rect> component_rects;
+//        std::vector<float> component_confidences;
+//        //std::vector<std::string> component_texts;
+//        Mat inputImage=image.getMat();
+//        this->run(inputImage,&component_rects,
+//                  &component_confidences,OCR_LEVEL_WORD);
+//        return *component_rects;
+//    }
+
+};
+
+
+//Classifiers should provide diferent backends
+//For the moment only caffe is implemeted
+//enum{
+//    OCR_HOLISTIC_BACKEND_NONE,
+//    OCR_HOLISTIC_BACKEND_CAFFE
+//};
+
+
+
+
+
+/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
+ * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
+ * word given an input image.
+ *
+ * This class implements the logic of providing transcriptions given a vocabulary and and an image
+ * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
+ * class was built is the DictNet. In order to load it the following files should be downloaded:
+
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+ */
+class CV_EXPORTS_W textDetector : public BaseDetector
+{
+public:
+    virtual void run(Mat& image,  std::vector<Rect>* component_rects=NULL,
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3
+
+    @param mask is totally ignored and is only available for compatibillity reasons
+
+    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
+
+    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_level must be OCR_LEVEL_WORD.
+     */
+
+    virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+
+    /**
+    @brief Method that provides a quick and simple interface to a single word image classifcation
+
+    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size
+
+    @param transcription an opencv string that will store the detected word transcription
+
+    @param confidence a double that will be updated with the confidence the classifier has for the selected word
+    */
+    CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
+
+    /**
+    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
+    the classifiers parallel capabilities.
+
+    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
+    to contain a single word.
+
+    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
+    input image
+
+    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
+    selected words.
+    */
+    //CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
+
+
+   /** @brief simple getter for the preprocessing functor
+     */
+    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+
+    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+     */
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
+
+    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
+
+    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+    */
+    CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
+
+    /** @brief
+     *
+     * @param classifierPtr
+     *
+     * @param vocabulary
+     */
+ //   CV_WRAP static Ptr<textDetectImage> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
+
+    /** @brief
+     *
+     * @param modelArchFilename
+     *
+     * @param modelWeightsFilename
+     *
+     * @param vocabulary
+     */
+ //   CV_WRAP static Ptr<textDetectImage> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
+};
+
+
+}//namespace text
+}//namespace cv
+
+
+#endif // _OPENCV_TEXT_OCR_HPP_
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__

+// HAVE QT5
+//#cmakedefine HAVE_QT5GUI
+
+// HAVE CAFFE
+//#cmakedefine HAVE_CAFFE
+
 // HAVE OCR Tesseract
-#cmakedefine HAVE_TESSERACT
+//#cmakedefine HAVE_TESSERACT

 #endif