Commit 9ae765a1 authored by sghoshcvc's avatar sghoshcvc

Text detector class and Custom Image processor Class

parent fa94c160
set(the_description "Text Detection and Recognition")
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python)
# Using cmake scripts and modules
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
find_package(Tesseract QUIET)
if(Tesseract_FOUND)
set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
find_package(Caffe)
if(Caffe_FOUND)
message(STATUS "Caffe: YES")
set(HAVE_CAFFE 1)
else()
message(STATUS "Caffe: NO")
# list(APPEND TEXT_DEPS opencv_dnn)
endif()
#internal dependencies
find_package(Protobuf)
if(Protobuf_FOUND)
message(STATUS "Protobuf: YES")
set(HAVE_PROTOBUF 1)
else()
message(STATUS "Protobuf: NO")
endif()
find_package(Glog)
if(Glog_FOUND)
message(STATUS "Glog: YES")
set(HAVE_GLOG 1)
else()
message(STATUS "Glog: NO")
endif()
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
#ocv_define_module(text ${TEXT_DEPS} WRAP python)
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
find_package(Tesseract)
if(${Tesseract_FOUND})
message(STATUS "Tesseract: YES")
set(HAVE_TESSERACT 1)
ocv_include_directories(${Tesseract_INCLUDE_DIR})
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
else()
include_directories(${Tesseract_INCLUDE_DIR})
target_link_libraries(opencv_text ${Tesseract_LIBS})
add_definitions(-DHAVE_TESSERACT)
else()
message(STATUS "Tesseract: NO")
endif()
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
ocv_add_testdata(samples/ contrib/text
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
)
if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
include_directories(${Caffe_INCLUDE_DIR})
find_package(HDF5 COMPONENTS HL REQUIRED)
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
add_definitions(-DHAVE_CAFFE)
endif() #HAVE_CAFFE
message(STATUS "TEXT CAFFE SEARCH")
if()
message(STATUS "TEXT NO CAFFE CONFLICT")
else()
message(STATUS "TEXT CAFFE CONFLICT")
endif()
# Caffe package for CNN Triplet training
unset(Caffe_FOUND)
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
HINTS
/usr/local/include)
find_library(Caffe_LIBS NAMES caffe
HINTS
/usr/local/lib)
if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
set(Caffe_FOUND 1)
endif()
#Required for Caffe
unset(Glog_FOUND)
find_library(Glog_LIBS NAMES glog
HINTS
/usr/local/lib)
if(Glog_LIBS)
set(Glog_FOUND 1)
endif()
#Protobuf package required for Caffe
unset(Protobuf_FOUND)
find_library(Protobuf_LIBS NAMES protobuf
HINTS
/usr/local/lib)
if(Protobuf_LIBS)
set(Protobuf_FOUND 1)
endif()
# Tesseract OCR
unset(Tesseract_FOUND)
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
HINTS
/usr/include
/usr/local/include)
find_library(Tesseract_LIBRARY NAMES tesseract
HINTS
/usr/lib
/usr/local/lib)
find_library(Lept_LIBRARY NAMES lept
HINTS
/usr/lib
/usr/local/lib)
set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
set(Tesseract_FOUND 1)
endif()
......@@ -47,3 +47,75 @@ Notes
2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch.
3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.
Word spotting CNN
=================
Intro
-----
A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary.
Although other backends will be supported, for the moment only the Caffe backend is supported.
Instalation of Caffe backend
----------------------------
The caffe wrapping backend has the requirements caffe does.
* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
The simplest solution is to build caffe without support for OpenCV.
* Only the OS supported by Caffe are supported by the backend.
The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
Other UNIX systems including OSX should be easy to adapt.
Sample script for building Caffe
```bash
#!/bin/bash
SRCROOT="${HOME}/caffe_inst/"
mkdir -p "$SRCROOT"
cd "$SRCROOT"
git clone https://github.com/BVLC/caffe.git
cd caffe
git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475
git branch 91b09280f5233cafc62954c98ce8bc4c204e7475
cat Makefile.config.example > Makefile.config
echo 'USE_OPENCV := 0' >> Makefile.config
echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@
template <typename T>
friend class Net;
+ virtual ~Callback(){}
};
const vector<Callback*>& before_forward() const { return before_forward_; }
void add_before_forward(Callback* value) {
">/tmp/cleanup_caffe.diff
patch < /tmp/cleanup_caffe.diff
make -j 6
make pycaffe
make distribute
```
```bash
#!/bin/bash
cd $OPENCV_BUILD_DIR #You must set this
CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules" ./
```
......@@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.
#include "opencv2/text/erfilter.hpp"
#include "opencv2/text/ocr.hpp"
#include "opencv2/text/textDetector.hpp"
/** @defgroup text Scene Text Detection and Recognition
......@@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping).
To see the text detector at work, have a look at the textdetection demo:
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
@defgroup text_recognize Scene Text Recognition
@}
......
This diff is collapsed.
/*M//////////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
#define __OPENCV_TEXT_TEXTDETECTOR_HPP__
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include"ocr.hpp"
namespace cv
{
namespace text
{
//! @addtogroup text_recognize
//! @{
//base class BaseDetector declares a common API that would be used in a typical text
//recognition scenario
class CV_EXPORTS_W BaseDetector
{
public:
virtual ~BaseDetector() {};
virtual void run(Mat& image,
std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0) = 0;
virtual void run(Mat& image, Mat& mask,
std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0) = 0;
/** @brief Main functionality of the OCR Hierarchy. Subclasses provide
* default parameters for all parameters other than the input image.
*/
// virtual std::vector<Rect>* run(InputArray image){
// //std::string res;
// std::vector<Rect> component_rects;
// std::vector<float> component_confidences;
// //std::vector<std::string> component_texts;
// Mat inputImage=image.getMat();
// this->run(inputImage,&component_rects,
// &component_confidences,OCR_LEVEL_WORD);
// return *component_rects;
// }
};
//Classifiers should provide diferent backends
//For the moment only caffe is implemeted
//enum{
// OCR_HOLISTIC_BACKEND_NONE,
// OCR_HOLISTIC_BACKEND_CAFFE
//};
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
* Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
* word given an input image.
*
* This class implements the logic of providing transcriptions given a vocabulary and and an image
* classifer. The classifier has to be any TextImageClassifier but the classifier for which this
* class was built is the DictNet. In order to load it the following files should be downloaded:
* <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
* <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
* <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
*/
class CV_EXPORTS_W textDetector : public BaseDetector
{
public:
virtual void run(Mat& image, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=OCR_LEVEL_WORD)=0;
/** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
Takes image on input and returns recognized text in the output_text parameter. Optionally
provides also the Rects for individual text elements found (e.g. words), and the list of those
text elements with their confidence values.
@param image Input image CV_8UC1 or CV_8UC3
@param mask is totally ignored and is only available for compatibillity reasons
@param output_text Output text of the the word spoting, always one that exists in the dictionary.
@param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
be put in the vector.
@param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
be put in the vector.
@param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
be put in the vector.
@param component_level must be OCR_LEVEL_WORD.
*/
virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=OCR_LEVEL_WORD)=0;
/**
@brief Method that provides a quick and simple interface to a single word image classifcation
@param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size
@param transcription an opencv string that will store the detected word transcription
@param confidence a double that will be updated with the confidence the classifier has for the selected word
*/
CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
/**
@brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
the classifiers parallel capabilities.
@param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
to contain a single word.
@param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
input image
@param confidences a vector of double that will be updated with the confidence the classifier has for each of the
selected words.
*/
//CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
/** @brief simple getter for the preprocessing functor
*/
CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
/** @brief Creates an instance of the OCRHolisticWordRecognizer class.
@param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
@param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
of the classifier.
*/
CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
/** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
@param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
of the classifier.
*/
CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
/** @brief
*
* @param classifierPtr
*
* @param vocabulary
*/
// CV_WRAP static Ptr<textDetectImage> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
/** @brief
*
* @param modelArchFilename
*
* @param modelWeightsFilename
*
* @param vocabulary
*/
// CV_WRAP static Ptr<textDetectImage> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
};
}//namespace text
}//namespace cv
#endif // _OPENCV_TEXT_OCR_HPP_
This diff is collapsed.
This diff is collapsed.
#ifndef __OPENCV_TEXT_CONFIG_HPP__
#define __OPENCV_TEXT_CONFIG_HPP__
// HAVE QT5
//#cmakedefine HAVE_QT5GUI
// HAVE CAFFE
//#cmakedefine HAVE_CAFFE
// HAVE OCR Tesseract
#cmakedefine HAVE_TESSERACT
//#cmakedefine HAVE_TESSERACT
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment