From 951e18272dcf13ecede1e5c3b7d9f2b2b0e3c456 Mon Sep 17 00:00:00 2001 From: Vladislav Sovrasov Date: Thu, 5 Oct 2017 16:42:30 +0300 Subject: [PATCH] text: cleanup dnn text detection part --- modules/text/CMakeLists.txt | 98 +-- modules/text/FindCaffe.cmake | 14 - modules/text/FindGlog.cmake | 10 - modules/text/FindProtobuf.cmake | 10 - modules/text/FindTesseract.cmake | 22 - modules/text/README.md | 71 -- modules/text/cmake/FindTesseract.cmake | 3 + modules/text/include/opencv2/text.hpp | 2 +- .../text/include/opencv2/text/erfilter.hpp | 1 - modules/text/include/opencv2/text/ocr.hpp | 764 +++--------------- .../include/opencv2/text/textDetector.hpp | 248 +----- modules/text/samples/deeptextdetection.py | 68 +- modules/text/samples/textbox_demo.cpp | 157 ++-- modules/text/src/image_preprocessor.cpp | 387 --------- modules/text/src/ocr_holistic.cpp | 697 ---------------- modules/text/src/precomp.hpp | 2 + modules/text/src/text_detector.cpp | 169 ---- modules/text/src/text_detectorCNN.cpp | 480 ++--------- modules/text/text_config.hpp.in | 3 + 19 files changed, 308 insertions(+), 2898 deletions(-) delete mode 100644 modules/text/FindCaffe.cmake delete mode 100755 modules/text/FindGlog.cmake delete mode 100644 modules/text/FindProtobuf.cmake delete mode 100644 modules/text/FindTesseract.cmake delete mode 100644 modules/text/src/image_preprocessor.cpp delete mode 100644 modules/text/src/ocr_holistic.cpp delete mode 100644 modules/text/src/text_detector.cpp diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt index b58fd41cf..5d0f89f0d 100644 --- a/modules/text/CMakeLists.txt +++ b/modules/text/CMakeLists.txt @@ -1,84 +1,24 @@ set(the_description "Text Detection and Recognition") - -if(POLICY CMP0023) - message(STATUS "Explicitly setting policy CMP0023 to OLD") - cmake_policy(SET CMP0023 OLD) -endif(POLICY CMP0023) - -# Using cmake scripts and modules -list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) - -set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d) - -find_package(Caffe) -if(Caffe_FOUND) - message(STATUS "Caffe: YES") - set(HAVE_CAFFE 1) -else() - message(STATUS "Caffe: NO") -# list(APPEND TEXT_DEPS opencv_dnn) -endif() - -#internal dependencies -find_package(Protobuf) -if(Protobuf_FOUND) - message(STATUS "Protobuf: YES") - set(HAVE_PROTOBUF 1) -else() - message(STATUS "Protobuf: NO") -endif() - -find_package(Glog) -if(Glog_FOUND) - message(STATUS "Glog: YES") - set(HAVE_GLOG 1) -else() - message(STATUS "Glog: NO") -endif() - -ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python) -#ocv_define_module(text ${TEXT_DEPS} WRAP python) - -#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) - -find_package(Tesseract) -if(${Tesseract_FOUND}) - message(STATUS "Tesseract: YES") - include_directories(${Tesseract_INCLUDE_DIR}) - target_link_libraries(opencv_text ${Tesseract_LIBS}) - add_definitions(-DHAVE_TESSERACT) -else() - message(STATUS "Tesseract: NO") +ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java) + +if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) + find_package(Tesseract QUIET) + if(Tesseract_FOUND) + message(STATUS "Tesseract: YES") + set(HAVE_TESSERACT 1) + ocv_include_directories(${Tesseract_INCLUDE_DIR}) + ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES}) + else() + message(STATUS "Tesseract: NO") endif() +endif() +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in + ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY) -if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF) - include_directories(${Caffe_INCLUDE_DIR}) - find_package(HDF5 COMPONENTS HL REQUIRED) - include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) - list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES}) - find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem) - include_directories(SYSTEM ${Boost_INCLUDE_DIR}) - include_directories(SYSTEM ${CUDA_INCLUDE_DIR}) - link_directories(SYSTEM ${CUDA_LIBS}) - # include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ ) - #link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64) - list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES}) - target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES}) - add_definitions(-DHAVE_CAFFE) -endif() #HAVE_CAFFE - -message(STATUS "TEXT CAFFE SEARCH") -if() - message(STATUS "TEXT NO CAFFE CONFLICT") -else() - message(STATUS "TEXT CAFFE CONFLICT") -endif() +ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR}) -if(HAVE_opencv_dnn) - message(STATUS "dnn module found") - add_definitions(-DHAVE_DNN) - set(HAVE_DNN 1) -else() - message(STATUS "dnn module not found") -endif() +ocv_add_testdata(samples/ contrib/text + FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg" +) diff --git a/modules/text/FindCaffe.cmake b/modules/text/FindCaffe.cmake deleted file mode 100644 index 12948f629..000000000 --- a/modules/text/FindCaffe.cmake +++ /dev/null @@ -1,14 +0,0 @@ -# Caffe package for CNN Triplet training -unset(Caffe_FOUND) - -find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp - HINTS - /usr/local/include) - -find_library(Caffe_LIBS NAMES caffe - HINTS - /usr/local/lib) - -if(Caffe_LIBS AND Caffe_INCLUDE_DIR) - set(Caffe_FOUND 1) -endif() diff --git a/modules/text/FindGlog.cmake b/modules/text/FindGlog.cmake deleted file mode 100755 index c30e9f4a6..000000000 --- a/modules/text/FindGlog.cmake +++ /dev/null @@ -1,10 +0,0 @@ -#Required for Caffe -unset(Glog_FOUND) - -find_library(Glog_LIBS NAMES glog - HINTS - /usr/local/lib) - -if(Glog_LIBS) - set(Glog_FOUND 1) -endif() diff --git a/modules/text/FindProtobuf.cmake b/modules/text/FindProtobuf.cmake deleted file mode 100644 index 6d0ad56a1..000000000 --- a/modules/text/FindProtobuf.cmake +++ /dev/null @@ -1,10 +0,0 @@ -#Protobuf package required for Caffe -unset(Protobuf_FOUND) - -find_library(Protobuf_LIBS NAMES protobuf - HINTS - /usr/local/lib) - -if(Protobuf_LIBS) - set(Protobuf_FOUND 1) -endif() diff --git a/modules/text/FindTesseract.cmake b/modules/text/FindTesseract.cmake deleted file mode 100644 index 01835e61b..000000000 --- a/modules/text/FindTesseract.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Tesseract OCR -unset(Tesseract_FOUND) - -find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h - HINTS - /usr/include - /usr/local/include) - -find_library(Tesseract_LIBRARY NAMES tesseract - HINTS - /usr/lib - /usr/local/lib) - -find_library(Lept_LIBRARY NAMES lept - HINTS - /usr/lib - /usr/local/lib) - -set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY}) -if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR) - set(Tesseract_FOUND 1) -endif() diff --git a/modules/text/README.md b/modules/text/README.md index fd33980e8..b6955fd98 100644 --- a/modules/text/README.md +++ b/modules/text/README.md @@ -56,74 +56,3 @@ Intro ----- The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects. - -Two backends are supported 1) caffe 2) opencv-dnn - - - - -Instalation of Caffe backend ----------------------------- -* Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below -The caffe wrapping backend has the requirements caffe does. -* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises. -The simplest solution is to build caffe without support for OpenCV. -* Only the OS supported by Caffe are supported by the backend. -The scripts describing the module have been developed in ubuntu 16.04 and assume such a system. -Other UNIX systems including OSX should be easy to adapt. - -Sample script for building Caffe - -```bash -#!/bin/bash -SRCROOT="${HOME}/caffe_inst/" -mkdir -p "$SRCROOT" -cd "$SRCROOT" -git clone https://github.com/sghoshcvc/TextBoxes.git -cd TextBoxes -cat Makefile.config.example > Makefile.config -echo 'USE_OPENCV := 0' >> Makefile.config -echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config -echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config - - -echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200 -+++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200 -@@ -234,6 +234,7 @@ - - template - friend class Net; -+ virtual ~Callback(){} - }; - const vector& before_forward() const { return before_forward_; } - void add_before_forward(Callback* value) { -">/tmp/cleanup_caffe.diff - -patch < /tmp/cleanup_caffe.diff - - -make -j 6 - -make pycaffe - -make distribute -``` - - -```bash -#!/bin/bash -cd $OPENCV_BUILD_DIR #You must set this -CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04 - -cmake -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0" -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules" ./ - - -``` -where $OPECV_CONTRIB is the root directory containing opencv_contrib module - -Instalation of Caffe backend ----------------------------- - -Use of opencv-dnn does not need any additional library. - -The recent opencv-3.3.0 needs to be build with extra modules to use text module. diff --git a/modules/text/cmake/FindTesseract.cmake b/modules/text/cmake/FindTesseract.cmake index 2a5d868f9..5bdbe2436 100644 --- a/modules/text/cmake/FindTesseract.cmake +++ b/modules/text/cmake/FindTesseract.cmake @@ -5,14 +5,17 @@ endif() if(NOT Tesseract_FOUND) find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h HINTS + /usr/include /usr/local/include) find_library(Tesseract_LIBRARY NAMES tesseract HINTS + /usr/lib /usr/local/lib) find_library(Lept_LIBRARY NAMES lept HINTS + /usr/lib /usr/local/lib) if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY) diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp index 85b8b7419..c06c88983 100644 --- a/modules/text/include/opencv2/text.hpp +++ b/modules/text/include/opencv2/text.hpp @@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping). To see the text detector at work, have a look at the textdetection demo: - + @defgroup text_recognize Scene Text Recognition @} diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp index 2bd1c56a3..c9bac2b32 100644 --- a/modules/text/include/opencv2/text/erfilter.hpp +++ b/modules/text/include/opencv2/text/erfilter.hpp @@ -65,7 +65,6 @@ component tree of the image. : */ struct CV_EXPORTS ERStat { - public: //! Constructor explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0); diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index df9c2b4aa..22c98448c 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -44,12 +44,10 @@ #ifndef __OPENCV_TEXT_OCR_HPP__ #define __OPENCV_TEXT_OCR_HPP__ +#include + #include #include -#include -#include - - namespace cv { @@ -91,100 +89,61 @@ enum ocr_engine_mode }; //base class BaseOCR declares a common API that would be used in a typical text recognition scenario - class CV_EXPORTS_W BaseOCR { - public: +public: virtual ~BaseOCR() {}; - - virtual void run(Mat& image, std::string& output_text, - std::vector* component_rects=NULL, - std::vector* component_texts=NULL, - std::vector* component_confidences=NULL, + virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, + std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0) = 0; - - virtual void run(Mat& image, Mat& mask, std::string& output_text, - std::vector* component_rects=NULL, - std::vector* component_texts=NULL, - std::vector* component_confidences=NULL, + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector* component_rects=NULL, + std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0) = 0; - - /** @brief Main functionality of the OCR Hierarchy. Subclasses provide - * default parameters for all parameters other than the input image. - */ - virtual String run(InputArray image){ - std::string res; - std::vector component_rects; - std::vector component_confidences; - std::vector component_texts; - Mat inputImage=image.getMat(); - this->run(inputImage,res,&component_rects,&component_texts, - &component_confidences,OCR_LEVEL_WORD); - return res; - } - }; -/** @brief OCRTesseract class provides an interface with the tesseract-ocr API - * (v3.02.02) in C++. +/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++. Notice that it is compiled only when tesseract-ocr is correctly installed. @note - - (C++) An example of OCRTesseract recognition combined with scene text - detection can be found at the end_to_end_recognition demo: - - - (C++) Another example of OCRTesseract recognition combined with scene - text detection can be found at the webcam_demo: - + - (C++) An example of OCRTesseract recognition combined with scene text detection can be found + at the end_to_end_recognition demo: + + - (C++) Another example of OCRTesseract recognition combined with scene text detection can be + found at the webcam_demo: + */ class CV_EXPORTS_W OCRTesseract : public BaseOCR { public: /** @brief Recognize text using the tesseract-ocr API. - Takes image on input and returns recognized text in the output_text - parameter. Optionally provides also the Rects for individual text elements - found (e.g. words), and the list of those text elements with their - confidence values. + Takes image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. @param image Input image CV_8UC1 or CV_8UC3 - @param output_text Output text of the tesseract-ocr. - - @param component_rects If provided the method will output a list of Rects - for the individual text elements found (e.g. words or text lines). - - @param component_texts If provided the method will output a list of text - strings for the recognition of individual text elements found (e.g. words or - text lines). - - @param component_confidences If provided the method will output a list of - confidence values for the recognition of individual text elements found - (e.g. words or text lines). - - @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE. - + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words or text lines). + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words or text lines). + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words or text lines). + @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE. */ - using BaseOCR::run; - virtual void run (Mat& image, std::string& output_text, - std::vector* component_rects=NULL, - std::vector* component_texts=NULL, - std::vector* component_confidences=NULL, + virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, + std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); - virtual void run (Mat& image, Mat& mask, std::string& output_text, - std::vector* component_rects=NULL, - std::vector* component_texts=NULL, - std::vector* component_confidences=NULL, - int component_level=0); + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector* component_rects=NULL, + std::vector* component_texts=NULL, std::vector* component_confidences=NULL, + int component_level=0); // aliases for scripting - CV_WRAP String run (InputArray image, int min_confidence, - int component_level=0); + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); - CV_WRAP String run(InputArray image, InputArray mask, - int min_confidence, int component_level=0); + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0; @@ -205,7 +164,6 @@ public: */ CV_WRAP static Ptr create(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO); - }; @@ -225,19 +183,19 @@ enum classifier_type /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models. - - * @note - * - (C++) An example on using OCRHMMDecoder recognition combined with scene - * text detection can be found at the webcam_demo sample: - * +@note + - (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can + be found at the webcam_demo sample: + */ -class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR { - public: +class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR +{ +public: /** @brief Callback with the character classifier is made a class. - * This way it hides the feature extractor and the classifier itself, so - * developers can write their own OCR code. + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. The default character classifier and feature extractor can be loaded using the utility function loadOCRHMMClassifierNM and KNN model provided in @@ -246,120 +204,92 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR { class CV_EXPORTS_W ClassifierCallback { public: - virtual ~ClassifierCallback() { } - /** @brief The character classifier must return a (ranked list of) - * class(es) id('s) - - * @param image Input image CV_8UC1 or CV_8UC3 with a single letter. - * @param out_class The classifier returns the character class - * categorical label, or list of class labels, to which the input image - * corresponds. + /** @brief The character classifier must return a (ranked list of) class(es) id('s) - * @param out_confidence The classifier returns the probability of the - * input image corresponding to each classes in out_class. + @param image Input image CV_8UC1 or CV_8UC3 with a single letter. + @param out_class The classifier returns the character class categorical label, or list of + class labels, to which the input image corresponds. + @param out_confidence The classifier returns the probability of the input image + corresponding to each classes in out_class. */ - virtual void eval (InputArray image, std::vector& out_class, - std::vector& out_confidence); + virtual void eval( InputArray image, std::vector& out_class, std::vector& out_confidence); }; +public: /** @brief Recognize text using HMM. - * Takes binary image on input and returns recognized text in the output_text - * parameter. Optionally provides also the Rects for individual text elements - * found (e.g. words), and the list of those text elements with their - * confidence values. + Takes binary image on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. - * @param image Input binary image CV_8UC1 with a single text line (or word). + @param image Input binary image CV_8UC1 with a single text line (or word). - * @param output_text Output text. Most likely character sequence found by - * the HMM decoder. + @param output_text Output text. Most likely character sequence found by the HMM decoder. - * @param component_rects If provided the method will output a list of Rects - * for the individual text elements found (e.g. words). + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). - * @param component_texts If provided the method will output a list of text - * strings for the recognition of individual text elements found (e.g. words). + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). - * @param component_confidences If provided the method will output a list of - * confidence values for the recognition of individual text elements found - * (e.g. words). + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). - * @param component_level Only OCR_LEVEL_WORD is supported. - */ - using BaseOCR::run; - virtual void run (Mat& image, std::string& output_text, - std::vector* component_rects=NULL, - std::vector* component_texts=NULL, - std::vector* component_confidences=NULL, - int component_level=0); + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, + std::vector* component_texts=NULL, std::vector* component_confidences=NULL, + int component_level=0); /** @brief Recognize text using HMM. - * Takes an image and a mask (where each connected component corresponds to a - * segmented character) on input and returns recognized text in the - * output_text parameter. Optionally provides also the Rects for individual - * text elements found (e.g. words), and the list of those text elements with - * their confidence values. - - * @param image Input image CV_8UC1 or CV_8UC3 with a single text line - * (or word). + Takes an image and a mask (where each connected component corresponds to a segmented character) + on input and returns recognized text in the output_text parameter. Optionally + provides also the Rects for individual text elements found (e.g. words), and the list of those + text elements with their confidence values. - * @param mask Input binary image CV_8UC1 same size as input image. Each - * connected component in mask corresponds to a segmented character in the - * input image. + @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word). + @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image. - * @param output_text Output text. Most likely character sequence found by - * the HMM decoder. + @param output_text Output text. Most likely character sequence found by the HMM decoder. - * @param component_rects If provided the method will output a list of Rects - * for the individual text elements found (e.g. words). + @param component_rects If provided the method will output a list of Rects for the individual + text elements found (e.g. words). - * @param component_texts If provided the method will output a list of text - * strings for the recognition of individual text elements found (e.g. words). + @param component_texts If provided the method will output a list of text strings for the + recognition of individual text elements found (e.g. words). - * @param component_confidences If provided the method will output a list of - * confidence values for the recognition of individual text elements found - * (e.g. words). + @param component_confidences If provided the method will output a list of confidence values + for the recognition of individual text elements found (e.g. words). - * @param component_level Only OCR_LEVEL_WORD is supported. - */ - virtual void run(Mat& image, Mat& mask, std::string& output_text, - std::vector* component_rects=NULL, - std::vector* component_texts=NULL, - std::vector* component_confidences=NULL, + @param component_level Only OCR_LEVEL_WORD is supported. + */ + virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector* component_rects=NULL, + std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); // aliases for scripting - CV_WRAP String run(InputArray image, - int min_confidence, - int component_level=0); + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); - CV_WRAP String run(InputArray image, - InputArray mask, - int min_confidence, - int component_level=0); + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); - /** @brief Creates an instance of the OCRHMMDecoder class. Initializes - * HMMDecoder. + /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder. - * @param classifier The character classifier with built in feature - * extractor. + @param classifier The character classifier with built in feature extractor. - * @param vocabulary The language vocabulary (chars when ascii english text) - * . vocabulary.size() must be equal to the number of classes of the - * classifier. + @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + must be equal to the number of classes of the classifier. - * @param transition_probabilities_table Table with transition probabilities - * between character pairs. cols == rows == vocabulary.size(). + @param transition_probabilities_table Table with transition probabilities between character + pairs. cols == rows == vocabulary.size(). - * @param emission_probabilities_table Table with observation emission - * probabilities. cols == rows == vocabulary.size(). + @param emission_probabilities_table Table with observation emission probabilities. cols == + rows == vocabulary.size(). - * @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available - * for the moment (). + @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment + (). */ - static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor const std::string& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes @@ -402,11 +332,9 @@ protected: decoder_mode mode; }; -/** @brief Allow to implicitly load the default character classifier when - * creating an OCRHMMDecoder object. - - @param filename The XML or YAML file with the classifier model (e.g.OCRHMM_knn_model_data.xml) +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. +@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml) The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann & Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a @@ -416,16 +344,11 @@ using a KNN model trained with synthetic data of rendered characters with differ types. @deprecated loadOCRHMMClassifier instead - */ -CV_EXPORTS_W Ptr loadOCRHMMClassifierNM ( - const String& filename); -/** @brief Allow to implicitly load the default character classifier when - * creating an OCRHMMDecoder object. - - @param filename The XML or YAML file with the classifier model (e.g.OCRBeamSearch_CNN_model_data.xml.gz) +CV_EXPORTS_W Ptr loadOCRHMMClassifierNM(const String& filename); +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. @param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz) @@ -435,10 +358,8 @@ a linear classifier. It is applied to the input image in a sliding window fashio at each window location. @deprecated use loadOCRHMMClassifier instead - */ -CV_EXPORTS_W Ptr loadOCRHMMClassifierCNN ( - const String& filename); +CV_EXPORTS_W Ptr loadOCRHMMClassifierCNN(const String& filename); /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. @@ -450,64 +371,49 @@ CV_EXPORTS_W Ptr loadOCRHMMClassifierCNN ( CV_EXPORTS_W Ptr loadOCRHMMClassifier(const String& filename, int classifier); //! @} - /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). * * @param vocabulary The language vocabulary (chars when ASCII English text). * * @param lexicon The list of words that are expected to be found in a particular image. - - * @param transition_probabilities_table Output table with transition - * probabilities between character pairs. cols == rows == vocabulary.size(). - - * The function calculate frequency statistics of character pairs from the given - * lexicon and fills the output transition_probabilities_table with them. The - * transition_probabilities_table can be used as input in the - * OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods. + * + * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size(). + * + * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods. * @note - * - (C++) An alternative would be to load the default generic language - * transition table provided in the text module samples folder (created - * from ispell 42869 english words list) : - * + * - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) : + * **/ -CV_EXPORTS void createOCRHMMTransitionsTable ( - std::string& vocabulary, std::vector& lexicon, - OutputArray transition_probabilities_table); +CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector& lexicon, OutputArray transition_probabilities_table); + +CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector& lexicon); -CV_EXPORTS_W Mat createOCRHMMTransitionsTable ( - const String& vocabulary, std::vector& lexicon); /* OCR BeamSearch Decoder */ -/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam - * Search algorithm. +/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm. @note - - (C++) An example on using OCRBeamSearchDecoder recognition combined with - scene text detection can be found at the demo sample: - + - (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can + be found at the demo sample: + */ - - -/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */ -class TextImageClassifier; - -class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{ - - public: +class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR +{ +public: /** @brief Callback with the character classifier is made a class. - * This way it hides the feature extractor and the classifier itself, so - * developers can write their own OCR code. + This way it hides the feature extractor and the classifier itself, so developers can write + their own OCR code. - * The default character classifier and feature extractor can be loaded - * using the utility funtion loadOCRBeamSearchClassifierCNN with all its - * parameters provided in - * . + The default character classifier and feature extractor can be loaded using the utility funtion + loadOCRBeamSearchClassifierCNN with all its parameters provided in + . */ - class CV_EXPORTS_W ClassifierCallback{ - public: + class CV_EXPORTS_W ClassifierCallback + { + public: virtual ~ClassifierCallback() { } /** @brief The character classifier must return a (ranked list of) class(es) id('s) @@ -519,8 +425,8 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{ */ virtual void eval( InputArray image, std::vector< std::vector >& recognition_probabilities, std::vector& oversegmentation ); - virtual int getWindowSize() {return 0;} - virtual int getStepSize() {return 0;} + int getWindowSize() {return 0;} + int getStepSize() {return 0;} }; public: @@ -545,7 +451,6 @@ public: @param component_level Only OCR_LEVEL_WORD is supported. */ - using BaseOCR::run; virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); @@ -577,7 +482,6 @@ public: @param beam_size Size of the beam in Beam Search algorithm. */ - static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor const std::string& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes @@ -598,29 +502,10 @@ public: int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); // Size of the beam in Beam Search algorithm - - - - /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path. @overload - @param filename path to a character classifier file - - @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size() - must be equal to the number of classes of the classifier.. - - @param transition_probabilities_table Table with transition probabilities between character - pairs. cols == rows == vocabulary.size(). - - @param emission_probabilities_table Table with observation emission probabilities. cols == - rows == vocabulary.size(). - - @param mode HMM Decoding algorithm (only Viterbi for the moment) - - @param beam_size Size of the beam in Beam Search algorithm - */ CV_WRAP static Ptr create(const String& filename, // The character classifier file const String& vocabulary, // The language vocabulary (chars when ASCII English text) @@ -631,7 +516,6 @@ public: // cols == rows == vocabulary.size() int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); - protected: Ptr classifier; @@ -656,402 +540,6 @@ CV_EXPORTS_W Ptr loadOCRBeamSearchClas //! @} - -//Classifiers should provide diferent backends - -enum{ - OCR_HOLISTIC_BACKEND_NONE, //No back end - OCR_HOLISTIC_BACKEND_DNN, // dnn backend opencv_dnn - OCR_HOLISTIC_BACKEND_CAFFE, // caffe based backend - OCR_HOLISTIC_BACKEND_DEFAULT // to store default value based on environment -}; - -class TextImageClassifier; - -/** - * @brief The ImagePreprocessor class - */ -class CV_EXPORTS_W ImagePreprocessor{ -protected: - virtual void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels)=0; - virtual void set_mean_(Mat){} - -public: - virtual ~ImagePreprocessor(){} - - /** @brief this method in provides public acces to the preprocessing with respect to a specific - * classifier - * - * This method's main use would be to use the preprocessor without feeding it to a classifier. - * Determining the exact behavior of a preprocessor is the main motivation for this. - * - * @param input an image without any constraints - * - * @param output in most cases an image of fixed depth size and whitened - * - * @param sz the size to which the image would be resize if the preprocessor resizes inputs - * - * @param outputChannels the number of channels for the output image - */ - CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels); - - /** @brief this method in provides public acces to set the mean of the input images - * mean can be a mat either of same size of the image or one value per color channel - * A preprocessor can be created without the mean( the pre processor will calculate mean for every image - * in that case - * - - * @param mean which will be subtracted from the images - * - */ - - CV_WRAP void set_mean(Mat mean); - - /** @brief Creates a functor that only resizes and changes the channels of the input - * without further processing. - * - * @return shared pointer to the generated preprocessor - */ - CV_WRAP static Ptr createResizer(); - - /** @brief - * - * @param sigma - * - * @return shared pointer to generated preprocessor - */ - CV_WRAP static Ptr createImageStandarizer(double sigma); - - /** @brief - * - * @return shared pointer to generated preprocessor - */ - CV_WRAP static Ptr createImageMeanSubtractor(InputArray meanImg); - /** @brief - * create a functor with the parameters, parameters can be changes by corresponding set functions - * @return shared pointer to generated preprocessor - */ - - CV_WRAP static PtrcreateImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR"); - - friend class TextImageClassifier; - -}; - -/** @brief Abstract class that implements the classifcation of text images. - * - * The interface is generic enough to describe any image classifier. And allows - * to take advantage of compouting in batches. While word classifiers are the default - * networks, any image classifers should work. - * - */ -class CV_EXPORTS_W TextImageClassifier -{ -protected: - Size inputGeometry_; - Size outputGeometry_; - int channelCount_; - Ptr preprocessor_; - /** @brief all image preprocessing is handled here including whitening etc. - * - * @param input the image to be preprocessed for the classifier. If the depth - * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1] - * - * @param output reference to the image to be fed to the classifier, the preprocessor will - * resize the image to the apropriate size and convert it to the apropriate depth\ - * - * The method preprocess should never be used externally, it is up to classify and classifyBatch - * methods to employ it. - */ - virtual void preprocess(const Mat& input,Mat& output); -public: - virtual ~TextImageClassifier() {} - - /** @brief - */ - CV_WRAP virtual void setPreprocessor(Ptr ptr); - - /** @brief - */ - CV_WRAP Ptr getPreprocessor(); - - /** @brief produces a class confidence row-vector given an image - */ - CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0; - - /** @brief produces a matrix containing class confidence row-vectors given an collection of images - */ - CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0; - - /** @brief simple getter method returning the number of channels each input sample has - */ - CV_WRAP virtual int getInputChannelCount(){return this->channelCount_;} - - /** @brief simple getter method returning the size of the input sample - */ - CV_WRAP virtual Size getInputSize(){return this->inputGeometry_;} - - /** @brief simple getter method returning the size of the oputput row-vector - */ - CV_WRAP virtual int getOutputSize()=0; - /** @brief simple getter method returning the shape of the oputput from caffe - */ - CV_WRAP virtual Size getOutputGeometry()=0; - - /** @brief simple getter method returning the size of the minibatches for this classifier. - * If not applicabe this method should return 1 - */ - CV_WRAP virtual int getMinibatchSize()=0; - - friend class ImagePreprocessor; -}; - - - -class CV_EXPORTS_W DeepCNN:public TextImageClassifier -{ - /** @brief Class that uses a pretrained caffe model for word classification. - * - * This network is described in detail in: - * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015 - * http://arxiv.org/abs/1412.1842 - */ -public: - virtual ~DeepCNN() {}; - - /** @brief Constructs a DeepCNN object from a caffe pretrained model - * - * @param archFilename is the path to the prototxt file containing the deployment model architecture description. - * - * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be - * very large, up to 2GB. - * - * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method; - * - * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter - * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU. - * - * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is - * the only option - */ - CV_WRAP static Ptr create(String archFilename,String weightsFilename,Ptr preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT); - - /** @brief Constructs a DeepCNN intended to be used for word spotting. - * - * This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a - * deviation of 113. The architecture file can be downloaded from: - * - * While the weights can be downloaded from: - * - * The words assigned to the network outputs are available at: - * - * - * @param archFilename is the path to the prototxt file containing the deployment model architecture description. - * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt". - * - * @param weightsFilename is the path to the pretrained weights of the model. When employing - * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. This file can be very large, the - * pretrained DictNet uses 2GB. - * - * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is - * the only option - */ - CV_WRAP static Ptr createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT); - -}; - -namespace cnn_config{ - -/** @brief runtime backend information - * - * this function finds the status of backends compiled with this module - * - * @return a list of backends (caffe,opencv-dnn etc.) - * */ -CV_EXPORTS_W std::vector getAvailableBackends(); - -namespace caffe_backend{ - -/** @brief Prompts Caffe on the computation device beeing used - * - * Caffe can only be controlled globally on whether the GPU or the CPU is used has a - * global behavior. This function queries the current state of caffe. - * If the module is built without caffe, this method throws an exception. - * - * @return true if caffe is computing on the GPU, false if caffe is computing on the CPU - */ -CV_EXPORTS_W bool getCaffeGpuMode(); - -/** @brief Sets the computation device beeing used by Caffe - * - * Caffe can only be controlled globally on whether the GPU or the CPU is used has a - * global behavior. This function queries the current state of caffe. - * If the module is built without caffe, this method throws an exception. - * - * @param useGpu set to true for caffe to be computing on the GPU, false if caffe is - * computing on the CPU - */ -CV_EXPORTS_W void setCaffeGpuMode(bool useGpu); - -/** @brief Provides runtime information on whether Caffe support was compiled in. - * - * The text module API is the same regardless of whether CAffe was available or not - * During compilation. When methods that require Caffe are invocked while Caffe support - * is not compiled in, exceptions are thrown. This method allows to test whether the - * text module was built with caffe during runtime. - * - * @return true if Caffe support for the the text module was provided during compilation, - * false if Caffe was unavailable. - */ -CV_EXPORTS_W bool getCaffeAvailable(); - -}//caffe -namespace dnn_backend { - -/** @brief Provides runtime information on whether DNN module was compiled in. - * - * The text module API is the same regardless of whether DNN module was available or not - * During compilation. When methods that require backend are invocked while no backend support - * is compiled, exceptions are thrown. This method allows to test whether the - * text module was built with dnn_backend during runtime. - * - * @return true if opencv_dnn support for the the text module was provided during compilation, - * false if opencv_dnn was unavailable. - */ -CV_EXPORTS_W bool getDNNAvailable(); - -}//dnn_backend -}//cnn_config - -/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting. - * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable - * word given an input image. - * - * This class implements the logic of providing transcriptions given a vocabulary and and an image - * classifer. The classifier has to be any TextImageClassifier but the classifier for which this - * class was built is the DictNet. In order to load it the following files should be downloaded: - - * - * - * - */ -class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR -{ -public: - virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, - std::vector* component_texts=NULL, std::vector* component_confidences=NULL, - int component_level=OCR_LEVEL_WORD)=0; - - /** @brief Recognize text using a segmentation based word-spotting/classifier cnn. - - Takes image on input and returns recognized text in the output_text parameter. Optionally - provides also the Rects for individual text elements found (e.g. words), and the list of those - text elements with their confidence values. - - @param image Input image CV_8UC1 or CV_8UC3 - - @param mask is totally ignored and is only available for compatibillity reasons - - @param output_text Output text of the the word spoting, always one that exists in the dictionary. - - @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will - be put in the vector. - - @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will - be put in the vector. - - @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will - be put in the vector. - - @param component_level must be OCR_LEVEL_WORD. - */ - - virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector* component_rects=NULL, - std::vector* component_texts=NULL, std::vector* component_confidences=NULL, - int component_level=OCR_LEVEL_WORD)=0; - - - /** - @brief Method that provides a quick and simple interface to a single word image classifcation - - @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word - - @param transcription an opencv string that will store the detected word transcription - - @param confidence a double that will be updated with the confidence the classifier has for the selected word - */ - CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0; - - /** - @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage - the classifiers parallel capabilities. - - @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed - to contain a single word. - - @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each - input image - - @param confidences a vector of double that will be updated with the confidence the classifier has for each of the - selected words. - */ - CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector& transcriptions,CV_OUT std::vector& confidences)=0; - - - /** - @brief simple getter for the vocabulary employed - */ - CV_WRAP virtual const std::vector& getVocabulary()=0; - - /** @brief simple getter for the preprocessing functor - */ - CV_WRAP virtual Ptr getClassifier()=0; - - /** @brief Creates an instance of the OCRHolisticWordRecognizer class. - - @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance - - @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line - in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize - of the classifier. - */ - CV_WRAP static Ptr create(Ptr classifierPtr,String vocabularyFilename); - - - /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier. - - @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture. - - @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form. - - @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line - in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize - of the classifier. - */ - CV_WRAP static Ptr create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename); - - /** @brief - * - * @param classifierPtr - * - * @param vocabulary - */ - CV_WRAP static Ptr create(Ptr classifierPtr,const std::vector& vocabulary); - - /** @brief - * - * @param modelArchFilename - * - * @param modelWeightsFilename - * - * @param vocabulary - */ - CV_WRAP static Ptr create (String modelArchFilename, String modelWeightsFilename, const std::vector& vocabulary); -}; - - -}//namespace text -}//namespace cv - - +} +} #endif // _OPENCV_TEXT_OCR_HPP_ diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp index eda748014..0e51df39f 100644 --- a/modules/text/include/opencv2/text/textDetector.hpp +++ b/modules/text/include/opencv2/text/textDetector.hpp @@ -1,56 +1,12 @@ -/*M////////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Copyright (C) 2013, OpenCV Foundation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. #ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__ #define __OPENCV_TEXT_TEXTDETECTOR_HPP__ -#include -#include -#include -#include #include"ocr.hpp" - namespace cv { namespace text @@ -59,208 +15,44 @@ namespace text //! @addtogroup text_detect //! @{ - - -//base class BaseDetector declares a common API that would be used in a typical text -//detection scenario -class CV_EXPORTS_W BaseDetector -{ -public: - virtual ~BaseDetector() {}; - - virtual void run(Mat& image, - std::vector* component_rects=NULL, - std::vector* component_confidences=NULL, - int component_level=0) = 0; - - virtual void run(Mat& image, Mat& mask, - std::vector* component_rects=NULL, - std::vector* component_confidences=NULL, - int component_level=0) = 0; - -}; -/** A virtual class for different models of text detection (including CNN based deep models) +/** @brief An abstract class providing interface for text detection algorithms */ - -class CV_EXPORTS_W TextRegionDetector +class CV_EXPORTS_W TextDetector { -protected: - /** Stores input and output size - */ - //netGeometry inputGeometry_; - //netGeometry outputGeometry_; - Size inputGeometry_; - Size outputGeometry_; - int inputChannelCount_; - int outputChannelCount_; - public: - virtual ~TextRegionDetector() {} - - /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes - */ - CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0; - - - /** @brief simple getter method returning the size (height, width) of the input sample - */ - CV_WRAP virtual Size getInputGeometry(){return this->inputGeometry_;} - - /** @brief simple getter method returning the shape of the oputput - * Any text detector should output a number of text regions alongwith a score of text-ness - * From the shape it can be inferred the number of text regions and number of returned value - * for each region - */ - CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;} - - - -}; - -/** Generic structure of Deep CNN based Text Detectors - * */ -class CV_EXPORTS_W DeepCNNTextDetector : public TextRegionDetector -{ - /** @brief Class that uses a pretrained caffe model for text detection. - * Any text detection should - * This network is described in detail in: - * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network - * https://arxiv.org/abs/1611.06779 - */ -protected: - /** all deep CNN based text detectors have a preprocessor (normally) - */ - Ptr preprocessor_; - /** @brief all image preprocessing is handled here including whitening etc. - * - * @param input the image to be preprocessed for the classifier. If the depth - * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1] - * - * @param output reference to the image to be fed to the classifier, the preprocessor will - * resize the image to the apropriate size and convert it to the apropriate depth\ - * - * The method preprocess should never be used externally, it is up to classify and classifyBatch - * methods to employ it. - */ - virtual void preprocess(const Mat& input,Mat& output); -public: - virtual ~DeepCNNTextDetector() {}; - - /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model - * - * @param archFilename is the path to the prototxt file containing the deployment model architecture description. - * - * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. - * - * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method; - * - * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter - * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU. - * - * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is - * the only option - */ - CV_WRAP static Ptr create(String archFilename,String weightsFilename,Ptr preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT); - - /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection. - * - * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of () - * The architecture and models weights can be downloaded from: - * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB) - - * @param archFilename is the path to the prototxt file containing the deployment model architecture description. - * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt". - * - * @param weightsFilename is the path to the pretrained weights of the model. When employing - * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. - * - * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is - * the only option - */ - CV_WRAP static Ptr createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT); - friend class ImagePreprocessor; + /** + @brief Method that provides a quick and simple interface to detect text inside an image + @param inputImage an image to process + @param Bbox a vector of Rect that will store the detected word bounding box + @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box + */ + virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector& Bbox, CV_OUT std::vector& confidence) = 0; + virtual ~TextDetector() {} }; -/** @brief textDetector class provides the functionallity of text bounding box detection. - * A TextRegionDetector is employed to find bounding boxes of text - * words given an input image. - * - * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector - * The TextRegionDetector can be any text detector - * +/** @brief TextDetectorCNN class provides the functionallity of text bounding box detection. + * A TextDetectorCNN is employed to find bounding boxes of text words given an input image. */ - -class CV_EXPORTS_W textDetector : public BaseDetector +class CV_EXPORTS_W TextDetectorCNN : public TextDetector { public: - virtual void run(Mat& image, std::vector* component_rects=NULL, - std::vector* component_confidences=NULL, - int component_level=OCR_LEVEL_WORD)=0; - - /** @brief detect text with a cnn, input is one image with (multiple) ocuurance of text. - - Takes image on input and returns recognized text in the output_text parameter. Optionally - provides also the Rects for individual text elements found (e.g. words), and the list of those - text elements with their confidence values. - - @param image Input image CV_8UC1 or CV_8UC3 - - @param mask is totally ignored and is only available for compatibillity reasons - - - @param component_rects a vector of Rects, each rect is one text bounding box. - - - - @param component_confidences A vector of float returns confidence of text bounding boxes - - @param component_level must be OCR_LEVEL_WORD. - */ - - virtual void run(Mat& image, Mat& mask, std::vector* component_rects=NULL, - std::vector* component_confidences=NULL, - int component_level=OCR_LEVEL_WORD)=0; - - /** - @brief Method that provides a quick and simple interface to detect text inside an image + @overload @param inputImage an image expected to be a CV_U8C3 of any size - @param Bbox a vector of Rect that will store the detected word bounding box - @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box */ - CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector& Bbox,CV_OUT std::vector& confidence)=0; - - - - - /** @brief simple getter for the preprocessing functor - */ - CV_WRAP virtual Ptr getClassifier()=0; - - /** @brief Creates an instance of the textDetector class. - - @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance - - - */ - CV_WRAP static Ptr create(Ptr classifierPtr); - + CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector& Bbox, CV_OUT std::vector& confidence) = 0; /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier. @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture. - @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form. - - + @param detectMultiscale if true, multiple scales of the input image will be used as network input */ - CV_WRAP static Ptr create(String modelArchFilename, String modelWeightsFilename); - - + CV_WRAP static Ptr create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false); }; //! @} diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py index 2e8395b60..09dcb2492 100644 --- a/modules/text/samples/deeptextdetection.py +++ b/modules/text/samples/deeptextdetection.py @@ -1,57 +1,37 @@ # -*- coding: utf-8 -*- -""" -Created on Wed Jul 19 17:54:00 2017 - -@author: sgnosh -""" - #!/usr/bin/python - import sys import os - import cv2 import numpy as np -print('\nDeeptextdetection.py') -print(' A demo script of text box alogorithm of the paper:') -print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') - - -if (len(sys.argv) < 2): - print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') - quit() -#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable(): -# print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n" -# -# quit() -# check model and architecture file existance -if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): - print " Model files not found in current directory. Aborting" - print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" - quit() -cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True); -pathname = os.path.dirname(sys.argv[0]) +def main(): + print('\nDeeptextdetection.py') + print(' A demo script of text box alogorithm of the paper:') + print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') + if (len(sys.argv) < 2): + print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') + quit() -img = cv2.imread(str(sys.argv[1])) -textSpotter=cv2.text.textDetector_create( - "textbox_deploy.prototxt","textbox.caffemodel") -rects,outProbs = textSpotter.textDetectInImage(img); -# for visualization -vis = img.copy() -# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown -thres = 0.6 + if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): + print " Model files not found in current directory. Aborting" + print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" + quit() + img = cv2.imread(str(sys.argv[1])) + textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel") + rects, outProbs = textSpotter.textDetectInImage(img); + vis = img.copy() + thres = 0.6 - #Visualization -for r in range(0,np.shape(rects)[0]): - if outProbs[r] >thres: - rect = rects[r] - cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2) - # cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1) + for r in range(np.shape(rects)[0]): + if outProbs[r] > thres: + rect = rects[r] + cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2) + cv2.imshow("Text detection result", vis) + cv2.waitKey() -#Visualization -cv2.imshow("Text detection result", vis) -cv2.waitKey(0) \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp index b76658e1b..9975c3947 100644 --- a/modules/text/samples/textbox_demo.cpp +++ b/modules/text/samples/textbox_demo.cpp @@ -1,151 +1,86 @@ -/* - * dictnet_demo.cpp - * - * Demonstrates simple use of the holistic word classifier in C++ - * - * Created on: June 26, 2016 - * Author: Anguelos Nicolaou - */ - -#include "opencv2/text.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc.hpp" +#include +#include +#include #include -#include #include -#include #include -void textbox_draw(cv::Mat &src, std::vector &groups,std::vector &probs,std::vector wordList,float thres); -inline std::string getHelpStr(std::string progFname){ - std::stringstream out; - out << " Demo of text detection CNN for text detection." << std::endl; - out << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"< " << std::endl; - out << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"< " << std::endl + << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"< &groups,std::vector &probs,std::vector wordList,float thres=0.6) + +void textbox_draw(Mat src, std::vector& groups, std::vector& probs, float thres) { - for (int i=0;i<(int)groups.size(); i++) + for (size_t i = 0; i < groups.size(); i++) { - if(probs[i]>thres) + if(probs[i] > thres) { if (src.type() == CV_8UC3) { - cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 ); - cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 )); + rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA); + String label = format("%.2f", probs[i]); + std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n"; + putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA); } else - rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 ); + rectangle(src, groups[i], Scalar( 255 ), 3, 8 ); } } } +} -int main(int argc, const char * argv[]){ - if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){ - std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"; - //exit(1); - } - std::vector backends=cv::text::cnn_config::getAvailableBackends(); - std::cout << "The Following backends are available" << "\n"; - for (int i=0;i textSpotter=cv::text::textDetector::create( - "textbox_deploy.prototxt","textbox.caffemodel"); + std::cout << "Starting Text Box Demo" << std::endl; + Ptr textSpotter = + text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false); - //cv::Ptr wordSpotter= - // cv::text::textDetector::create(cnn); - std::cout<<"Created Text Spotter with text Boxes"; - - std::vector bbox; + std::vector bbox; std::vector outProbabillities; - textSpotter->textDetectInImage(image,bbox,outProbabillities); - // textbox_draw(image, bbox,outProbabillities); - float thres =0.6f; - std::vector imageList; - for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){ - if(outProbabillities[imageIdx]>thres){ - imageList.push_back(image(bbox.at(imageIdx))); - } - - } - // call dict net here for all detected parts - cv::Ptr cnn=cv::text::DeepCNN::createDictNet( - "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN); - - cv::Ptr wordSpotter= - cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt"); - - std::vector wordList; - std::vector wordProbabillities; - wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities); - // write the output in file - std::ofstream out; - out.open(argv[1]); - - - for (int i=0;i<(int)wordList.size(); i++) - { - cv::Point tl_ = bbox.at(i).tl(); - cv::Point br_ = bbox.at(i).br(); - - out<textDetectInImage(image, bbox, outProbabillities); + textbox_draw(image, bbox, outProbabillities, 0.5f); - cv::imshow("TextBox Demo",image); + imshow("TextBox Demo",image); std::cout << "Done!" << std::endl << std::endl; std::cout << "Press any key to exit." << std::endl << std::endl; - if ((cv::waitKey()&0xff) == ' ') - return 0; + waitKey(); + return 0; } diff --git a/modules/text/src/image_preprocessor.cpp b/modules/text/src/image_preprocessor.cpp deleted file mode 100644 index 3a65a2108..000000000 --- a/modules/text/src/image_preprocessor.cpp +++ /dev/null @@ -1,387 +0,0 @@ -#include "precomp.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/core.hpp" - - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace cv { namespace text { -//************************************************************************************ -//****************** ImagePreprocessor ******************************************* -//************************************************************************************ - -void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){ - Mat inpImg=input.getMat(); - Mat outImg; - this->preprocess_(inpImg,outImg,sz,outputChannels); - outImg.copyTo(output); -} -void ImagePreprocessor::set_mean(Mat mean){ - - - this->set_mean_(mean); - -} - - - -class ResizerPreprocessor: public ImagePreprocessor{ -protected: - void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ - //TODO put all the logic of channel and depth conversions in ImageProcessor class - CV_Assert(outputChannels==1 || outputChannels==3); - CV_Assert(input.channels()==1 || input.channels()==3); - if(input.channels()!=outputChannels) - { - Mat tmpInput; - if(outputChannels==1){ - cvtColor(input,tmpInput,COLOR_BGR2GRAY); - if(input.depth()==CV_8U) - { - tmpInput.convertTo(output,CV_32FC1,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - tmpInput.convertTo(output, CV_32FC1); - } - }else - { - cvtColor(input,tmpInput,COLOR_GRAY2BGR); - if(input.depth()==CV_8U) - { - tmpInput.convertTo(output,CV_32FC3,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - tmpInput.convertTo(output, CV_32FC3); - } - } - }else - { - if(input.channels()==1) - { - if(input.depth()==CV_8U) - { - input.convertTo(output, CV_32FC1,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - input.convertTo(output, CV_32FC1); - } - }else - { - if(input.depth()==CV_8U){ - input.convertTo(output, CV_32FC3,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - input.convertTo(output, CV_32FC3); - } - } - } - if(outputSize.width!=0 && outputSize.height!=0) - { - resize(output,output,outputSize); - } - } - //void set_mean_(Mat m){} -public: - ResizerPreprocessor(){} - ~ResizerPreprocessor(){} -}; - -class StandarizerPreprocessor: public ImagePreprocessor{ -protected: - double sigma_; - //void set_mean_(Mat M){} - - void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ - - //TODO put all the logic of channel and depth conversions in ImageProcessor class - CV_Assert(outputChannels==1 || outputChannels==3); - CV_Assert(input.channels()==1 || input.channels()==3); - if(input.channels()!=outputChannels) - { - Mat tmpInput; - if(outputChannels==1) - { - cvtColor(input,tmpInput,COLOR_BGR2GRAY); - if(input.depth()==CV_8U) - { - tmpInput.convertTo(output,CV_32FC1,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - tmpInput.convertTo(output, CV_32FC1); - } - }else - { - cvtColor(input,tmpInput,COLOR_GRAY2BGR); - if(input.depth()==CV_8U) - { - tmpInput.convertTo(output,CV_32FC3,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - tmpInput.convertTo(output, CV_32FC3); - } - } - }else - { - if(input.channels()==1) - { - if(input.depth()==CV_8U) - { - input.convertTo(output, CV_32FC1,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - input.convertTo(output, CV_32FC1); - } - }else - { - if(input.depth()==CV_8U) - { - input.convertTo(output, CV_32FC3,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - input.convertTo(output, CV_32FC3); - } - } - } - if(outputSize.width!=0 && outputSize.height!=0) - { - resize(output,output,outputSize); - } - - Scalar mean,dev; - meanStdDev(output,mean,dev); - subtract(output,mean[0],output); - divide(output,(dev[0]/sigma_),output); - } -public: - StandarizerPreprocessor(double sigma):sigma_(sigma){} - ~StandarizerPreprocessor(){} - -}; - -class customPreprocessor:public ImagePreprocessor{ -protected: - - double rawval_; - Mat mean_; - String channel_order_; - - void set_mean_(Mat imMean_){ - - imMean_.copyTo(this->mean_); - - - } - - void set_raw_scale(int rawval){ - rawval_ = rawval; - - } - void set_channels(String channel_order){ - channel_order_=channel_order; - } - - - void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ - //TODO put all the logic of channel and depth conversions in ImageProcessor class - - CV_Assert(outputChannels==1 || outputChannels==3); - CV_Assert(input.channels()==1 || input.channels()==3); - if(input.channels()!=outputChannels) - { - Mat tmpInput; - if(outputChannels==1) - { - cvtColor(input,tmpInput,COLOR_BGR2GRAY); - if(input.depth()==CV_8U) - { - if (rawval_ == 1) - tmpInput.convertTo(output,CV_32FC3,1/255.0); - else - tmpInput.convertTo(output,CV_32FC1); - }else - {//Assuming values are at the desired [0,1] range - if (rawval_ ==1) - tmpInput.convertTo(output, CV_32FC1); - else - tmpInput.convertTo(output, CV_32FC1,rawval_); - } - }else - { - cvtColor(input,tmpInput,COLOR_GRAY2BGR); - if(input.depth()==CV_8U) - { - if (rawval_ == 1) - tmpInput.convertTo(output,CV_32FC3,1/255.0); - else - tmpInput.convertTo(output,CV_32FC1); - }else - {//Assuming values are at the desired [0,1] range - if (rawval_ ==1) - tmpInput.convertTo(output, CV_32FC1); - else - tmpInput.convertTo(output, CV_32FC1,rawval_); - } - } - }else - { - if(input.channels()==1) - { - if(input.depth()==CV_8U) - { - if (rawval_ == 1) - input.convertTo(output,CV_32FC1,1/255.0); - else - input.convertTo(output,CV_32FC1); - }else - {//Assuming values are at the desired [0,1] range - if (rawval_ ==1) - input.convertTo(output, CV_32FC1); - else - input.convertTo(output, CV_32FC1,rawval_); - } - }else - { - if(input.depth()==CV_8U) - { - if (rawval_ == 1) - input.convertTo(output,CV_32FC3,1/255.0); - else - input.convertTo(output,CV_32FC3); - }else - {//Assuming values are at the desired [0,1] range - if (rawval_ ==1) - input.convertTo(output, CV_32FC3); - else - input.convertTo(output, CV_32FC3,rawval_); - } - } - } - if(outputSize.width!=0 && outputSize.height!=0) - { - resize(output,output,outputSize); - } - - if (!this->mean_.empty()){ - - Scalar mean_s(this->mean_.at(0,0),this->mean_.at(0,1),this->mean_.at(0,2)); - subtract(output,mean_s,output); - } - else{ - Scalar mean_s; - mean_s = mean(output); - subtract(output,mean_s,output); - } - - } - -public: - customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){} - ~customPreprocessor(){} - -}; - -class MeanSubtractorPreprocessor: public ImagePreprocessor{ -protected: - Mat mean_; - //void set_mean_(Mat m){} - void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ - //TODO put all the logic of channel and depth conversions in ImageProcessor class - CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height); - CV_Assert(outputChannels==1 || outputChannels==3); - CV_Assert(input.channels()==1 || input.channels()==3); - if(input.channels()!=outputChannels) - { - Mat tmpInput; - if(outputChannels==1) - { - cvtColor(input,tmpInput,COLOR_BGR2GRAY); - if(input.depth()==CV_8U) - { - tmpInput.convertTo(output,CV_32FC1,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - tmpInput.convertTo(output, CV_32FC1); - } - }else - { - cvtColor(input,tmpInput,COLOR_GRAY2BGR); - if(input.depth()==CV_8U) - { - tmpInput.convertTo(output,CV_32FC3,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - tmpInput.convertTo(output, CV_32FC3); - } - } - }else - { - if(input.channels()==1) - { - if(input.depth()==CV_8U) - { - input.convertTo(output, CV_32FC1,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - input.convertTo(output, CV_32FC1); - } - }else - { - if(input.depth()==CV_8U) - { - input.convertTo(output, CV_32FC3,1/255.0); - }else - {//Assuming values are at the desired [0,1] range - input.convertTo(output, CV_32FC3); - } - } - } - if(outputSize.width!=0 && outputSize.height!=0) - { - resize(output,output,outputSize); - } - subtract(output,this->mean_,output); - } -public: - MeanSubtractorPreprocessor(Mat mean) - { - mean.copyTo(this->mean_); - } - - ~MeanSubtractorPreprocessor(){} -}; - - - -Ptr ImagePreprocessor::createResizer() -{ - return Ptr(new ResizerPreprocessor); -} - -Ptr ImagePreprocessor::createImageStandarizer(double sigma) -{ - return Ptr(new StandarizerPreprocessor(sigma)); -} -Ptr ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order) -{ - - return Ptr(new customPreprocessor(rawval,channel_order)); -} - -Ptr ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg) -{ - Mat tmp=meanImg.getMat(); - return Ptr(new MeanSubtractorPreprocessor(tmp)); -} -} -} diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp deleted file mode 100644 index 035f104f2..000000000 --- a/modules/text/src/ocr_holistic.cpp +++ /dev/null @@ -1,697 +0,0 @@ -#include "precomp.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/core.hpp" - - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#ifdef HAVE_CAFFE -#include "caffe/caffe.hpp" -#endif - -#ifdef HAVE_DNN -#include "opencv2/dnn.hpp" -#endif - -using namespace cv; -using namespace cv::dnn; -using namespace std; -namespace cv { namespace text { - -//Maybe OpenCV has a routine better suited -inline bool fileExists (String filename) { - std::ifstream f(filename.c_str()); - return f.good(); -} - - - -//************************************************************************************ -//****************** TextImageClassifier ***************************************** -//************************************************************************************ - -void TextImageClassifier::preprocess(const Mat& input,Mat& output) -{ - this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_); -} - -void TextImageClassifier::setPreprocessor(Ptr ptr) -{ - CV_Assert(!ptr.empty()); - preprocessor_=ptr; -} - -Ptr TextImageClassifier::getPreprocessor() -{ - return preprocessor_; -} - - -class DeepCNNCaffeImpl: public DeepCNN{ -protected: - void classifyMiniBatch(std::vector inputImageList, Mat outputMat) - { - //Classifies a list of images containing at most minibatchSz_ images - CV_Assert(int(inputImageList.size())<=this->minibatchSz_); - CV_Assert(outputMat.isContinuous()); - - -#ifdef HAVE_CAFFE - net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width); - net_->Reshape(); - float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); - float* inputData=inputBuffer; - - for(size_t imgNum=0;imgNum input_channels; - Mat preprocessed; - // if the image have multiple color channels the input layer should be populated accordingly - for (int channel=0;channel < this->channelCount_;channel++){ - - cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); - input_channels.push_back(netInputWraped); - //input_data += width * height; - inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); - - } - this->preprocess(inputImageList[imgNum],preprocessed); - split(preprocessed, input_channels); - - - } - this->net_->ForwardPrefilled(); - const float* outputNetData=net_->output_blobs()[0]->cpu_data(); - this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); - int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width; - - - //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width); - float*outputMatData=(float*)(outputMat.data); - memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size()); - -#endif - } - -#ifdef HAVE_CAFFE - Ptr > net_; -#endif - //Size inputGeometry_;//=Size(100,32); - int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst - int outputSize_; - //Size outputGeometry_; -public: - DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn): - minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ - channelCount_=dn.channelCount_; - inputGeometry_=dn.inputGeometry_; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" -#ifdef HAVE_CAFFE - this->net_=dn.net_; -#endif - } - DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn) - { -#ifdef HAVE_CAFFE - this->net_=dn.net_; -#endif - this->setPreprocessor(dn.preprocessor_); - this->inputGeometry_=dn.inputGeometry_; - this->channelCount_=dn.channelCount_; - this->minibatchSz_=dn.minibatchSz_; - this->outputSize_=dn.outputSize_; - this->preprocessor_=dn.preprocessor_; - this->outputGeometry_=dn.outputGeometry_; - return *this; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" - } - - DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr preprocessor, int maxMinibatchSz) - :minibatchSz_(maxMinibatchSz) - { - - CV_Assert(this->minibatchSz_>0); - CV_Assert(fileExists(modelArchFilename)); - CV_Assert(fileExists(modelWeightsFilename)); - CV_Assert(!preprocessor.empty()); - this->setPreprocessor(preprocessor); -#ifdef HAVE_CAFFE - this->net_.reset(new caffe::Net(modelArchFilename, caffe::TEST)); - CV_Assert(net_->num_inputs()==1); - CV_Assert(net_->num_outputs()==1); - CV_Assert(this->net_->input_blobs()[0]->channels()==1 - ||this->net_->input_blobs()[0]->channels()==3); - this->channelCount_=this->net_->input_blobs()[0]->channels(); - - - - this->net_->CopyTrainedLayersFrom(modelWeightsFilename); - - caffe::Blob* inputLayer = this->net_->input_blobs()[0]; - - this->inputGeometry_=Size(inputLayer->width(), inputLayer->height()); - this->channelCount_ = inputLayer->channels(); - - inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width); - net_->Reshape(); - this->outputSize_=net_->output_blobs()[0]->channels(); - this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); - - - - - -#else - CV_Error(Error::StsError,"Caffe not available during compilation!"); -#endif - } - - void classify(InputArray image, OutputArray classProbabilities) - { - std::vector inputImageList; - inputImageList.push_back(image.getMat()); - classifyBatch(inputImageList,classProbabilities); - } - - void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) - { - std::vector allImageVector; - inputImageList.getMatVector(allImageVector); - size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic - - size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic - classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); - Mat outputMat = classProbabilities.getMat(); - for(size_t imgNum=0;imgNum(allImageVector.size()-imgNum,minibatchSize); - std::vector::const_iterator from=std::vector::const_iterator(allImageVector.begin()+imgNum); - std::vector::const_iterator to=std::vector::const_iterator(allImageVector.begin()+rangeEnd); - std::vector minibatchInput(from,to); - classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); - - } - - } - - int getOutputSize() - { - return this->outputSize_; - } - Size getOutputGeometry() - { - return this->outputGeometry_; - } - - int getMinibatchSize() - { - return this->minibatchSz_; - } - - int getBackend() - { - return OCR_HOLISTIC_BACKEND_CAFFE; - } -}; - -class DeepCNNOpenCvDNNImpl: public DeepCNN{ -protected: - - void classifyMiniBatch(std::vector inputImageList, Mat outputMat) - { - //Classifies a list of images containing at most minibatchSz_ images - CV_Assert(int(inputImageList.size())<=this->minibatchSz_); - CV_Assert(outputMat.isContinuous()); - -#ifdef HAVE_DNN - - std::vector preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class? - - Mat preprocessed; - // preprocesses each image in the inputImageList and push to preprocessedImList - for(size_t imgNum=0;imgNumpreprocess(inputImageList[imgNum],preprocessed); - preProcessedImList.push_back(preprocessed); - } - // set input data blob in dnn::net - net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data"); - - float*outputMatData=(float*)(outputMat.data); - //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ; - Mat outputNet = this->net_->forward(); - outputNet = outputNet.reshape(1, 1); - - float*outputNetData=(float*)(outputNet.data); - - memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size()); - -#endif - } - -#ifdef HAVE_DNN - Ptr net_; -#endif - // hard coding input image size. anything in DNN library to get that from prototxt?? - // Size inputGeometry_;//=Size(100,32); - int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst - int outputSize_; - //Size outputGeometry_;//= Size(1,1); - //int channelCount_; - // int inputChannel_ ;//=1; - // int _inputHeight; - //int _inputWidth ; - //int _inputChannel ; -public: - DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn): - minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ - channelCount_=dn.channelCount_; - inputGeometry_=dn.inputGeometry_; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" -#ifdef HAVE_DNN - this->net_=dn.net_; -#endif - } - DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn) - { -#ifdef HAVE_DNN - this->net_=dn.net_; -#endif - this->setPreprocessor(dn.preprocessor_); - this->inputGeometry_=dn.inputGeometry_; - this->channelCount_=dn.channelCount_; - this->minibatchSz_=dn.minibatchSz_; - this->outputSize_=dn.outputSize_; - this->preprocessor_=dn.preprocessor_; - this->outputGeometry_=dn.outputGeometry_; - return *this; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" - } - - DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel ) - :minibatchSz_(maxMinibatchSz) - { - - CV_Assert(this->minibatchSz_>0); - CV_Assert(fileExists(modelArchFilename)); - CV_Assert(fileExists(modelWeightsFilename)); - CV_Assert(!preprocessor.empty()); - this->setPreprocessor(preprocessor); -#ifdef HAVE_DNN - - this->net_ = makePtr(readNetFromCaffe(modelArchFilename,modelWeightsFilename)); - - - - if (this->net_.empty()) - { - std::cerr << "Can't load network by using the following files: " << std::endl; - std::cerr << "prototxt: " << modelArchFilename << std::endl; - std::cerr << "caffemodel: " << modelWeightsFilename << std::endl; - //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl; - //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; - exit(-1); - } - - - this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height()); - this->channelCount_ = inputChannel;//inputLayer->channels(); - - //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width); - Ptr< Layer > outLayer= net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2])); - //std::vector blobs = outLayer->blobs; - - this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels(); - //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); - - - - - - -#else - CV_Error(Error::StsError,"DNN module not available during compilation!"); -#endif - } - - void classify(InputArray image, OutputArray classProbabilities) - { - std::vector inputImageList; - inputImageList.push_back(image.getMat()); - classifyBatch(inputImageList,classProbabilities); - } - - void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) - { - std::vector allImageVector; - inputImageList.getMatVector(allImageVector); - size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic - - size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic - classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); - Mat outputMat = classProbabilities.getMat(); - - for(size_t imgNum=0;imgNum(allImageVector.size()-imgNum,minibatchSize); - std::vector::const_iterator from=std::vector::const_iterator(allImageVector.begin()+imgNum); - std::vector::const_iterator to=std::vector::const_iterator(allImageVector.begin()+rangeEnd); - std::vector minibatchInput(from,to); - classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); - - } - - } - - int getOutputSize() - { - return this->outputSize_; - } - Size getOutputGeometry() - { - return this->outputGeometry_; - } - - int getMinibatchSize() - { - return this->minibatchSz_; - } - - int getBackend() - { - return OCR_HOLISTIC_BACKEND_DNN; - } -}; - -Ptr DeepCNN::create(String archFilename,String weightsFilename,Ptr preprocessor,int minibatchSz,int backEnd) -{ - if(preprocessor.empty()) - { - preprocessor=ImagePreprocessor::createResizer(); - } - switch(backEnd){ - case OCR_HOLISTIC_BACKEND_DEFAULT: - -#ifdef HAVE_CAFFE - return Ptr(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); - -#elif defined(HAVE_DNN) - return Ptr(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1)); -#else - CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); - return Ptr(); -#endif - break; - - case OCR_HOLISTIC_BACKEND_CAFFE: - return Ptr(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); - break; - case OCR_HOLISTIC_BACKEND_DNN: - return Ptr(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1)); - break; - case OCR_HOLISTIC_BACKEND_NONE: - default: - CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); - return Ptr(); - break; - } -} - - -Ptr DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd) -{ - Ptr preprocessor=ImagePreprocessor::createImageStandarizer(113); - switch(backEnd){ - case OCR_HOLISTIC_BACKEND_DEFAULT: - -#ifdef HAVE_CAFFE - return Ptr(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); - -#elif defined(HAVE_DNN) - return Ptr(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1)); -#else - CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); - return Ptr(); -#endif - break; - - case OCR_HOLISTIC_BACKEND_CAFFE: - return Ptr(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); - break; - case OCR_HOLISTIC_BACKEND_DNN: - return Ptr(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1)); - break; - case OCR_HOLISTIC_BACKEND_NONE: - default: - CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); - return Ptr(); - break; - } -} - -namespace cnn_config{ -std::vector getAvailableBackends() -{ - std::vector backends; - -#ifdef HAVE_CAFFE - backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn - -#endif -#ifdef HAVE_DNN - backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend" -#endif - return backends; - - -} - -namespace caffe_backend{ - -#ifdef HAVE_CAFFE - -bool getCaffeGpuMode() -{ - return caffe::Caffe::mode()==caffe::Caffe::GPU; -} - -void setCaffeGpuMode(bool useGpu) -{ - if(useGpu) - { - caffe::Caffe::set_mode(caffe::Caffe::GPU); - }else - { - caffe::Caffe::set_mode(caffe::Caffe::CPU); - } -} - -bool getCaffeAvailable() -{ - return true; -} -#else - -bool getCaffeGpuMode() -{ - CV_Error(Error::StsError,"Caffe not available during compilation!"); - return 0; -} - -void setCaffeGpuMode(bool useGpu) -{ - CV_Error(Error::StsError,"Caffe not available during compilation!"); - CV_Assert(useGpu==1);//Compilation directives force -} - -bool getCaffeAvailable(){ - return 0; -} - -#endif - -}//namespace caffe -namespace dnn_backend{ -#ifdef HAVE_DNN - - -bool getDNNAvailable(){ - return true; -} -#else -bool getDNNAvailable(){ - return 0; -} -#endif -}//namspace dnn_backend -}//namespace cnn_config - -class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{ -private: - struct NetOutput{ - //Auxiliary structure that handles the logic of getting class ids and probabillities from - //the raw outputs of caffe - int wordIdx; - float probabillity; - - static bool sorter(const NetOutput& o1,const NetOutput& o2) - {//used with std::sort to provide the most probable class - return o1.probabillity>o2.probabillity; - } - - static void getOutputs(const float* buffer,int nbOutputs,std::vector& res) - { - res.resize(nbOutputs); - for(int k=0;k tmp; - getOutputs(buffer,nbOutputs,tmp); - classNum=tmp[0].wordIdx; - confidence=tmp[0].probabillity; - - } - }; -protected: - std::vector labels_; - Ptr classifier_; -public: - OCRHolisticWordRecognizerImpl(Ptr classifierPtr,String vocabularyFilename):classifier_(classifierPtr) - { - CV_Assert(fileExists(vocabularyFilename));//this fails for some rason - std::ifstream labelsFile(vocabularyFilename.c_str()); - if(!labelsFile) - { - CV_Error(Error::StsError,"Could not read Labels from file"); - } - std::string line; - while (std::getline(labelsFile, line)) - { - labels_.push_back(std::string(line)); - } - CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); - } - - OCRHolisticWordRecognizerImpl(Ptr classifierPtr,const std::vector& vocabulary):classifier_(classifierPtr) - { - this->labels_=vocabulary; - CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); - } - - void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence) - { - Mat netOutput; - this->classifier_->classify(inputImage,netOutput); - int classNum; - NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence); - transcription=this->labels_[classNum]; - } - - void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector& transcriptionVec,CV_OUT std::vector& confidenceVec) - { - Mat netOutput; - this->classifier_->classifyBatch(inputImageList,netOutput); - - for(int k=0;kclassifier_->getOutputSize(),classNum,confidence); - transcriptionVec.push_back(this->labels_[classNum]); - confidenceVec.push_back(confidence); - } - } - - - void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, - std::vector* component_texts=NULL, std::vector* component_confidences=NULL, - int component_level=0) - { - CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting - double confidence; - String transcription; - recogniseImage(image,transcription,confidence); - output_text=transcription.c_str(); - if(component_rects!=NULL) - { - component_rects->resize(1); - (*component_rects)[0]=Rect(0,0,image.size().width,image.size().height); - } - if(component_texts!=NULL) - { - component_texts->resize(1); - (*component_texts)[0]=transcription.c_str(); - } - if(component_confidences!=NULL) - { - component_confidences->resize(1); - (*component_confidences)[0]=float(confidence); - } - } - - void run(Mat& image, Mat& mask, std::string& output_text, std::vector* component_rects=NULL, - std::vector* component_texts=NULL, std::vector* component_confidences=NULL, - int component_level=0) - { - CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image - this->run(image,output_text,component_rects,component_texts,component_confidences,component_level); - } - - std::vector& getVocabulary() - { - return this->labels_; - } - - Ptr getClassifier() - { - return this->classifier_; - } -}; - -Ptr OCRHolisticWordRecognizer::create(Ptr classifierPtr,String vocabularyFilename ) -{ - return Ptr(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename)); -} - -Ptr OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename) -{ - Ptr preprocessor=ImagePreprocessor::createImageStandarizer(113); - Ptr classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100)); - return Ptr(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename)); -} - -Ptr OCRHolisticWordRecognizer::create(Ptr classifierPtr,const std::vector& vocabulary) -{ - return Ptr(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary)); -} - -Ptr OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector& vocabulary){ - Ptr preprocessor=ImagePreprocessor::createImageStandarizer(113); - Ptr classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100)); - return Ptr(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary)); -} - - - - - -} } //namespace text namespace cv diff --git a/modules/text/src/precomp.hpp b/modules/text/src/precomp.hpp index e85e4eb85..7ccda150f 100644 --- a/modules/text/src/precomp.hpp +++ b/modules/text/src/precomp.hpp @@ -45,6 +45,8 @@ #include "opencv2/text.hpp" +#include "text_config.hpp" + #ifdef HAVE_TESSERACT #if !defined(USE_STD_NAMESPACE) #define USE_STD_NAMESPACE diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp deleted file mode 100644 index 949f5f86d..000000000 --- a/modules/text/src/text_detector.cpp +++ /dev/null @@ -1,169 +0,0 @@ -#include "precomp.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/core.hpp" - - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -//#ifdef HAVE_CAFFE -//#include "caffe/caffe.hpp" -//#endif - -namespace cv { namespace text { - - - - -class textDetectImpl: public textDetector{ -private: - struct NetOutput{ - //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from - //the raw outputs of caffe - Rect bbox; - float probability; - - - static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector& res,Size inputShape) - { - - res.resize(nbrTextBoxes); - for(int k=0;k inputShape.width?inputShape.width-1:x_max; - y_max = y_max > inputShape.height?inputShape.height-1:y_max; - float wd = x_max-x_min+1; - float ht = y_max-y_min+1; - - res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht)); - - res[k].probability=buffer[k*nCol+2]; - } - - } - - - }; -protected: - - Ptr classifier_; -public: - textDetectImpl(Ptr classifierPtr):classifier_(classifierPtr) - { - - } - - - - void textDetectInImage(InputArray inputImage,CV_OUT std::vector& Bbox,CV_OUT std::vector& confidence) - { - Mat netOutput; - // call the detect function of deepTextCNN class - this->classifier_->detect(inputImage,netOutput); - // get the output geometry i.e height and width of output blob from caffe - Size OutputGeometry_ = this->classifier_->getOutputGeometry(); - int nbrTextBoxes = OutputGeometry_.height; - int nCol = OutputGeometry_.width; - - std::vector tmp; - // the output bounding box needs to be resized by the input height and width - Size inputImageShape = Size(inputImage.cols(),inputImage.rows()); - NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape); - // put the output in CV_OUT - - for (int k=0;k* component_rects=NULL, - std::vector* component_confidences=NULL, - int component_level=0) - { - CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting - - std::vector bbox; - std::vector score; - textDetectInImage(image,bbox,score); - - if(component_rects!=NULL) - { - component_rects->resize(bbox.size()); // should be a user behavior - - component_rects = &bbox; - } - - if(component_confidences!=NULL) - { - component_confidences->resize(score.size()); // shoub be a user behavior - - component_confidences = &score; - } - } - - void run(Mat& image, Mat& mask, std::vector* component_rects=NULL, - std::vector* component_confidences=NULL, - int component_level=0) - { - CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image - this->run(image,component_rects,component_confidences,component_level); - } - - - - Ptr getClassifier() - { - return this->classifier_; - } -}; - -Ptr textDetector::create(Ptr classifierPtr) -{ - return Ptr(new textDetectImpl(classifierPtr)); -} - -Ptr textDetector::create(String modelArchFilename, String modelWeightsFilename) -{ - -// create a custom preprocessor with rawval - Ptr preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); -// set the mean for the preprocessor - - Mat textbox_mean(1,3,CV_8U); - textbox_mean.at(0,0)=104; - textbox_mean.at(0,1)=117; - textbox_mean.at(0,2)=123; - preprocessor->set_mean(textbox_mean); -// create a pointer to text box detector(textDetector) - Ptr classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1)); - return Ptr(new textDetectImpl(classifierPtr)); -} - - - - - - - -} } //namespace text namespace cv diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp index 5267b390f..1c3933fda 100644 --- a/modules/text/src/text_detectorCNN.cpp +++ b/modules/text/src/text_detectorCNN.cpp @@ -1,453 +1,101 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + #include "precomp.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/core.hpp" - - -#include #include -#include -#include #include -#include -#include -#include -#include -#include - - -#ifdef HAVE_CAFFE -#include "caffe/caffe.hpp" -#endif -#ifdef HAVE_DNN #include "opencv2/dnn.hpp" -#endif using namespace cv::dnn; -#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__) - -namespace cv { namespace text { - -inline bool fileExists (String filename) { - std::ifstream f(filename.c_str()); - return f.good(); -} - -class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{ -protected: - - - void process_(Mat inputImage, Mat &outputMat) - { - // do forward pass and stores the output in outputMat - CV_Assert(outputMat.isContinuous()); - if (inputImage.channels() != this->inputChannelCount_) - CV_WARN("Number of input channel(s) in the model is not same as input"); - - -#ifdef HAVE_CAFFE - net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width); - net_->Reshape(); - float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); - float* inputData=inputBuffer; - - std::vector input_channels; - Mat preprocessed; - // if the image have multiple color channels the input layer should be populated accordingly - for (int channel=0;channel < this->inputChannelCount_;channel++){ - - cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); - input_channels.push_back(netInputWraped); - //input_data += width * height; - inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); - } - this->preprocess(inputImage,preprocessed); - split(preprocessed, input_channels); - - //preprocessed.copyTo(netInputWraped); - - - this->net_->Forward(); - const float* outputNetData=net_->output_blobs()[0]->cpu_data(); - // const float* outputNetData1=net_->output_blobs()[1]->cpu_data(); - - - - - this->outputGeometry_.height = net_->output_blobs()[0]->height(); - this->outputGeometry_.width = net_->output_blobs()[0]->width(); - this->outputChannelCount_ = net_->output_blobs()[0]->channels(); - int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; - outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); - float*outputMatData=(float*)(outputMat.data); - - memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); - - - -#endif - } - - -#ifdef HAVE_CAFFE - Ptr > net_; -#endif - //Size inputGeometry_; - int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst - //int outputSize_; -public: - DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn): - minibatchSz_(dn.minibatchSz_){ - outputGeometry_=dn.outputGeometry_; - inputGeometry_=dn.inputGeometry_; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" -#ifdef HAVE_CAFFE - this->net_=dn.net_; -#endif - } - DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn) - { -#ifdef HAVE_CAFFE - this->net_=dn.net_; -#endif - this->setPreprocessor(dn.preprocessor_); - this->inputGeometry_=dn.inputGeometry_; - this->inputChannelCount_=dn.inputChannelCount_; - this->outputChannelCount_ = dn.outputChannelCount_; - // this->minibatchSz_=dn.minibatchSz_; - //this->outputGeometry_=dn.outputSize_; - this->preprocessor_=dn.preprocessor_; - this->outputGeometry_=dn.outputGeometry_; - return *this; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" - } - - DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr preprocessor, int maxMinibatchSz) - :minibatchSz_(maxMinibatchSz) - { - - CV_Assert(this->minibatchSz_>0); - CV_Assert(fileExists(modelArchFilename)); - CV_Assert(fileExists(modelWeightsFilename)); - CV_Assert(!preprocessor.empty()); - this->setPreprocessor(preprocessor); -#ifdef HAVE_CAFFE - this->net_.reset(new caffe::Net(modelArchFilename, caffe::TEST)); - CV_Assert(net_->num_inputs()==1); - CV_Assert(net_->num_outputs()==1); - CV_Assert(this->net_->input_blobs()[0]->channels()==1 - ||this->net_->input_blobs()[0]->channels()==3); - // this->channelCount_=this->net_->input_blobs()[0]->channels(); - - - - this->net_->CopyTrainedLayersFrom(modelWeightsFilename); - - caffe::Blob* inputLayer = this->net_->input_blobs()[0]; - - this->inputGeometry_.height = inputLayer->height(); - this->inputGeometry_.width = inputLayer->width(); - this->inputChannelCount_ = inputLayer->channels(); - //this->inputGeometry_.batchSize =1; - - inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width); - net_->Reshape(); - this->outputChannelCount_ = net_->output_blobs()[0]->channels(); - //this->outputGeometry_.batchSize =1; - this->outputGeometry_.height =net_->output_blobs()[0]->height(); - this->outputGeometry_.width = net_->output_blobs()[0]->width(); - -#else - CV_Error(Error::StsError,"Caffe not available during compilation!"); -#endif - } - - - void detect(InputArray image, OutputArray Bbox_prob) - { - Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); - Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed - Mat outputMat = Bbox_prob.getMat(); - process_(image.getMat(),outputMat); - //copy back to outputArray - outputMat.copyTo(Bbox_prob); - } - - Size getOutputGeometry() - { - return this->outputGeometry_; - } - Size getinputGeometry() - { - return this->inputGeometry_; - } - - int getMinibatchSize() - { - return this->minibatchSz_; - } - - int getBackend() - { - return OCR_HOLISTIC_BACKEND_CAFFE; - } - void setPreprocessor(Ptr ptr) - { - CV_Assert(!ptr.empty()); - preprocessor_=ptr; - } - - Ptr getPreprocessor() - { - return preprocessor_; - } -}; - +namespace cv +{ +namespace text +{ -class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{ +class TextDetectorCNNImpl : public TextDetectorCNN +{ protected: + Net net_; + std::vector sizes_; + int inputChannelCount_; + bool detectMultiscale_; - void process_(Mat inputImage, Mat &outputMat) + void getOutputs(const float* buffer,int nbrTextBoxes,int nCol, + std::vector& Bbox, std::vector& confidence, Size inputShape) { - // do forward pass and stores the output in outputMat - CV_Assert(outputMat.isContinuous()); - if (inputImage.channels() != this->inputChannelCount_) - CV_WARN("Number of input channel(s) in the model is not same as input"); - - -#ifdef HAVE_DNN - - Mat preprocessed; - this->preprocess(inputImage,preprocessed); - - net_->setInput(blobFromImage(preprocessed,1, this->inputGeometry_), "data"); - - Mat outputNet = this->net_->forward( ); - - this->outputGeometry_.height = outputNet.size[2]; - this->outputGeometry_.width = outputNet.size[3]; - this->outputChannelCount_ = outputNet.size[1]; + for(int k = 0; k < nbrTextBoxes; k++) + { + float x_min = buffer[k*nCol + 3]*inputShape.width; + float y_min = buffer[k*nCol + 4]*inputShape.height; - outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); - float*outputMatData=(float*)(outputMat.data); - float*outputNetData=(float*)(outputNet.data); - int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; + float x_max = buffer[k*nCol + 5]*inputShape.width; + float y_max = buffer[k*nCol + 6]*inputShape.height; - memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); + CV_Assert(x_min < x_max, y_min < y_max); + x_min = std::max(0.f, x_min); + y_min = std::max(0.f, y_min); + x_max = std::min(inputShape.width - 1.f, x_max); + y_max = std::min(inputShape.height - 1.f, y_max); + int wd = cvRound(x_max - x_min); + int ht = cvRound(y_max - y_min); -#endif + Bbox.push_back(Rect(cvRound(x_min), cvRound(y_min), wd, ht)); + confidence.push_back(buffer[k*nCol + 2]); + } } - - -#ifdef HAVE_DNN - Ptr net_; -#endif - //Size inputGeometry_; - int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst - //int outputSize_; - //int inputHeight_; - //int inputWidth_; - //int inputChannel_; public: - DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn): - minibatchSz_(dn.minibatchSz_){ - outputGeometry_=dn.outputGeometry_; - inputGeometry_=dn.inputGeometry_; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" -#ifdef HAVE_DNN - this->net_=dn.net_; -#endif - } - DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn) - { -#ifdef HAVE_DNN - this->net_=dn.net_; -#endif - this->setPreprocessor(dn.preprocessor_); - this->inputGeometry_=dn.inputGeometry_; - this->inputChannelCount_=dn.inputChannelCount_; - this->outputChannelCount_ = dn.outputChannelCount_; - // this->minibatchSz_=dn.minibatchSz_; - //this->outputGeometry_=dn.outputSize_; - this->preprocessor_=dn.preprocessor_; - this->outputGeometry_=dn.outputGeometry_; - return *this; - //Implemented to supress Visual Studio warning "assignment operator could not be generated" - } - - DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3) - :minibatchSz_(maxMinibatchSz) + TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) : + detectMultiscale_(detectMultiscale) { + net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename); + CV_Assert(!net_.empty()); + inputChannelCount_ = 3; + sizes_.push_back(Size(700, 700)); - CV_Assert(this->minibatchSz_>0); - CV_Assert(fileExists(modelArchFilename)); - CV_Assert(fileExists(modelWeightsFilename)); - CV_Assert(!preprocessor.empty()); - this->setPreprocessor(preprocessor); -#ifdef HAVE_DNN - this->net_ = makePtr(readNetFromCaffe(modelArchFilename,modelWeightsFilename)); - - if (this->net_.empty()) + if(detectMultiscale_) { - std::cerr << "Can't load network by using the following files: " << std::endl; - std::cerr << "prototxt: " << modelArchFilename << std::endl; - std::cerr << "caffemodel: " << modelWeightsFilename << std::endl; - //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl; - //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; - exit(-1); + sizes_.push_back(Size(300, 300)); + sizes_.push_back(Size(700,500)); + sizes_.push_back(Size(700,300)); + sizes_.push_back(Size(1600,1600)); } - - this->inputGeometry_.height =inputHeight; - this->inputGeometry_.width = inputWidth ;//inputLayer->width(); - this->inputChannelCount_ = inputChannel ;//inputLayer->channels(); - -#else - CV_Error(Error::StsError,"DNN module not available during compilation!"); -#endif } - - void detect(InputArray image, OutputArray Bbox_prob) + void textDetectInImage(InputArray inputImage_, std::vector& Bbox, std::vector& confidence) { - Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); - Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed - Mat outputMat = Bbox_prob.getMat(); + CV_Assert(inputImage_.channels() == inputChannelCount_); + Mat inputImage = inputImage_.getMat().clone(); + Bbox.resize(0); + confidence.resize(0); - process_(image.getMat(),outputMat); - //copy back to outputArray - outputMat.copyTo(Bbox_prob); - } - - Size getOutputGeometry() - { - return this->outputGeometry_; - } - Size getinputGeometry() - { - return this->inputGeometry_; - } - - int getMinibatchSize() - { - return this->minibatchSz_; - } - - int getBackend() - { - return OCR_HOLISTIC_BACKEND_DNN; - } - void setPreprocessor(Ptr ptr) - { - CV_Assert(!ptr.empty()); - preprocessor_=ptr; - } - - Ptr getPreprocessor() - { - return preprocessor_; - } + for(size_t i = 0; i < sizes_.size(); i++) + { + Size inputGeometry = sizes_[i]; + net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data"); + Mat outputNet = net_.forward(); + int nbrTextBoxes = outputNet.size[2]; + int nCol = outputNet.size[3]; + int outputChannelCount = outputNet.size[1]; + CV_Assert(outputChannelCount == 1); + getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size()); + } + } }; -Ptr DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr preprocessor,int minibatchSz,int backEnd) +Ptr TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale) { - if(preprocessor.empty()) - { - // create a custom preprocessor with rawval - preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); - // set the mean for the preprocessor - - Mat textbox_mean(1,3,CV_8U); - textbox_mean.at(0,0)=104; - textbox_mean.at(0,1)=117; - textbox_mean.at(0,2)=123; - preprocessor->set_mean(textbox_mean); - } - switch(backEnd){ - case OCR_HOLISTIC_BACKEND_DEFAULT: - -#ifdef HAVE_CAFFE - return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); - -#elif defined(HAVE_DNN) - return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3)); -#else - CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); - return Ptr(); -#endif - case OCR_HOLISTIC_BACKEND_CAFFE: - - return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); - break; - - case OCR_HOLISTIC_BACKEND_DNN: - return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3)); - break; - - case OCR_HOLISTIC_BACKEND_NONE: - default: - CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); - return Ptr(); - break; - } - //return Ptr(); - + return makePtr(modelArchFilename, modelWeightsFilename, detectMultiscale); } - - -Ptr DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd) -{ - - // create a custom preprocessor with rawval - Ptr preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); - // set the mean for the preprocessor - - Mat textbox_mean(1,3,CV_8U); - textbox_mean.at(0,0)=104; - textbox_mean.at(0,1)=117; - textbox_mean.at(0,2)=123; - preprocessor->set_mean(textbox_mean); - switch(backEnd){ - case OCR_HOLISTIC_BACKEND_DEFAULT: - -#ifdef HAVE_CAFFE - return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); - -#elif defined(HAVE_DNN) - return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3)); -#else - CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); - return Ptr(); -#endif - break; - case OCR_HOLISTIC_BACKEND_CAFFE: - return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); - break; - case OCR_HOLISTIC_BACKEND_DNN: - return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3)); - break; - case OCR_HOLISTIC_BACKEND_NONE: - default: - CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); - return Ptr(); - break; - } - //return Ptr(); - -} - -void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output) -{ - Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width); - this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_); -} - - - -} } //namespace text namespace cv +} //namespace text +} //namespace cv diff --git a/modules/text/text_config.hpp.in b/modules/text/text_config.hpp.in index 81e624bab..ec5120a41 100644 --- a/modules/text/text_config.hpp.in +++ b/modules/text/text_config.hpp.in @@ -1,4 +1,7 @@ #ifndef __OPENCV_TEXT_CONFIG_HPP__ #define __OPENCV_TEXT_CONFIG_HPP__ +// HAVE OCR Tesseract +#cmakedefine HAVE_TESSERACT + #endif