text: cleanup dnn text detection part

7 years ago · 951e18272d
parent c33629e053
commit 951e18272d
19 changed files with 308 additions and 2898 deletions
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@ -1,84 +1,24 @@
 set(the_description "Text Detection and Recognition")
-
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
-if(POLICY CMP0023)
+
-  message(STATUS "Explicitly setting policy CMP0023 to OLD")
+if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
-  cmake_policy(SET CMP0023 OLD)
+  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-endif(POLICY CMP0023)
+  find_package(Tesseract QUIET)
-
+  if(Tesseract_FOUND)
-# Using cmake scripts and modules
+    message(STATUS "Tesseract:   YES")
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+    set(HAVE_TESSERACT 1)
-
+    ocv_include_directories(${Tesseract_INCLUDE_DIR})
-set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
+    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
-
+  else()
-find_package(Caffe)
+    message(STATUS "Tesseract:   NO")
 if(Caffe_FOUND)
  message(STATUS "Caffe:   YES")
  set(HAVE_CAFFE 1)
 else()
  message(STATUS "Caffe:   NO")
 #  list(APPEND TEXT_DEPS opencv_dnn)
 endif()
 #internal dependencies
 find_package(Protobuf)
 if(Protobuf_FOUND)
  message(STATUS "Protobuf:   YES")
  set(HAVE_PROTOBUF 1)
 else()
  message(STATUS "Protobuf:   NO")
 endif()
 find_package(Glog)
 if(Glog_FOUND)
  message(STATUS "Glog:   YES")
  set(HAVE_GLOG 1)
 else()
  message(STATUS "Glog:   NO")
 endif()
 ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python)
 #ocv_define_module(text ${TEXT_DEPS} WRAP python)
 #set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
 find_package(Tesseract)
 if(${Tesseract_FOUND})
  message(STATUS "Tesseract:   YES")
  include_directories(${Tesseract_INCLUDE_DIR})
  target_link_libraries(opencv_text ${Tesseract_LIBS})
  add_definitions(-DHAVE_TESSERACT)
 else()
  message(STATUS "Tesseract:   NO")
  endif()
 endif()
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)
-if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
+ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})
  include_directories(${Caffe_INCLUDE_DIR})
  find_package(HDF5 COMPONENTS HL REQUIRED)
  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
  include_directories(SYSTEM ${CUDA_INCLUDE_DIR})
  link_directories(SYSTEM ${CUDA_LIBS})
 # include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
  #link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
  add_definitions(-DHAVE_CAFFE)
 endif() #HAVE_CAFFE
 message(STATUS "TEXT CAFFE SEARCH")
 if()
  message(STATUS "TEXT NO CAFFE CONFLICT")
 else()
  message(STATUS "TEXT CAFFE CONFLICT")
 endif()
-if(HAVE_opencv_dnn)
+ocv_add_testdata(samples/ contrib/text
-	message(STATUS "dnn module found")
+    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
-	add_definitions(-DHAVE_DNN)
+)
 	set(HAVE_DNN 1)
 else()
 	message(STATUS "dnn module not found")
 endif()
--- a/modules/text/FindCaffe.cmake
+++ b/modules/text/FindCaffe.cmake
@ -1,14 +0,0 @@
 # Caffe package for CNN Triplet training
 unset(Caffe_FOUND)
 find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
  HINTS
  /usr/local/include)
 find_library(Caffe_LIBS NAMES caffe
  HINTS
  /usr/local/lib)
 if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
    set(Caffe_FOUND 1)
 endif()
--- a/modules/text/FindGlog.cmake
+++ b/modules/text/FindGlog.cmake
@ -1,10 +0,0 @@
 #Required for Caffe
 unset(Glog_FOUND)
 find_library(Glog_LIBS NAMES glog
  HINTS
  /usr/local/lib)
 if(Glog_LIBS)
    set(Glog_FOUND 1)
 endif()
--- a/modules/text/FindProtobuf.cmake
+++ b/modules/text/FindProtobuf.cmake
@ -1,10 +0,0 @@
 #Protobuf package required for Caffe
 unset(Protobuf_FOUND)
 find_library(Protobuf_LIBS NAMES protobuf
  HINTS
  /usr/local/lib)
 if(Protobuf_LIBS)
    set(Protobuf_FOUND 1)
 endif()
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
@ -1,22 +0,0 @@
 # Tesseract OCR
 unset(Tesseract_FOUND)
 find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
  HINTS
  /usr/include
  /usr/local/include)
 find_library(Tesseract_LIBRARY NAMES tesseract
  HINTS
  /usr/lib
  /usr/local/lib)
 find_library(Lept_LIBRARY NAMES lept
  HINTS
  /usr/lib
  /usr/local/lib)
 set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
 if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
    set(Tesseract_FOUND 1)
 endif()
--- a/modules/text/README.md
+++ b/modules/text/README.md
@ -56,74 +56,3 @@ Intro
 -----
 The text module now have a text detection and recognition using deep CNN. The text detector deep CNN that takes an image which may contain multiple words. This outputs a list of Rects with bounding boxes and probability of text there. The text recognizer provides a probabillity over a given vocabulary for each of these rects.
 Two backends are supported 1) caffe 2) opencv-dnn
 Instalation of Caffe backend
 ----------------------------
 * Please note a custom caffe based on SSD branch is required, the link of the custom caffe is provided below
 The caffe wrapping backend has the requirements caffe does.
 * Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
 The simplest solution is to build caffe without support for OpenCV.
 * Only the OS supported by Caffe are supported by the backend.
 The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
 Other UNIX systems including OSX should be easy to adapt.
 Sample script for building Caffe
 ```bash
 #!/bin/bash
 SRCROOT="${HOME}/caffe_inst/"
 mkdir -p "$SRCROOT"
 cd "$SRCROOT"
 git clone https://github.com/sghoshcvc/TextBoxes.git
 cd TextBoxes
 cat Makefile.config.example  > Makefile.config
 echo 'USE_OPENCV := 0' >> Makefile.config
 echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
 echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
 echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
 +++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
@@ -234,6 +234,7 @@
     template <typename T>
     friend class Net;
 +    virtual ~Callback(){}
   };
   const vector<Callback*>& before_forward() const { return before_forward_; }
   void add_before_forward(Callback* value) {
 ">/tmp/cleanup_caffe.diff
 patch < /tmp/cleanup_caffe.diff
 make -j 6
 make pycaffe
 make distribute
 ```
 ```bash
 #!/bin/bash
 cd $OPENCV_BUILD_DIR #You must set this
 CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
 cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="$OPENCV_CONTRIB/modules"   ./
 ```
 where $OPECV_CONTRIB is the root directory containing opencv_contrib module
 Instalation of Caffe backend
 ----------------------------
 Use of opencv-dnn does not need any additional library.
 The recent opencv-3.3.0 needs to be build with extra modules to use text module.
--- a/modules/text/cmake/FindTesseract.cmake
+++ b/modules/text/cmake/FindTesseract.cmake
@ -5,14 +5,17 @@ endif()
 if(NOT Tesseract_FOUND)
  find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
    HINTS
    /usr/include
    /usr/local/include)
  find_library(Tesseract_LIBRARY NAMES tesseract
    HINTS
    /usr/lib
    /usr/local/lib)
  find_library(Lept_LIBRARY NAMES lept
    HINTS
    /usr/lib
    /usr/local/lib)
  if(Tesseract_INCLUDE_DIR AND Tesseract_LIBRARY AND Lept_LIBRARY)
--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@ -93,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
    @defgroup text_recognize Scene Text Recognition
  @}
--- a/modules/text/include/opencv2/text/erfilter.hpp
+++ b/modules/text/include/opencv2/text/erfilter.hpp
@ -65,7 +65,6 @@ component tree of the image. :
 */
 struct CV_EXPORTS ERStat
 {
 public:
    //! Constructor
    explicit ERStat(int level = 256, int pixel = 0, int x = 0, int y = 0);
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -44,12 +44,10 @@
 #ifndef __OPENCV_TEXT_OCR_HPP__
 #define __OPENCV_TEXT_OCR_HPP__
 #include <opencv2/core.hpp>
 #include <vector>
 #include <string>
 #include <iostream>
 #include <sstream>
 namespace cv
 {
@ -91,100 +89,61 @@ enum ocr_engine_mode
 };
 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
 class CV_EXPORTS_W BaseOCR
 {
- public:
+public:
    virtual ~BaseOCR() {};
-
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-    virtual void run(Mat& image, std::string& output_text,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
-
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-    virtual void run(Mat& image, Mat& mask, std::string& output_text,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
     * default parameters for all parameters other than the input image.
     */
    virtual String run(InputArray image){
        std::string res;
        std::vector<Rect> component_rects;
        std::vector<float> component_confidences;
        std::vector<std::string> component_texts;
        Mat inputImage=image.getMat();
        this->run(inputImage,res,&component_rects,&component_texts,
                  &component_confidences,OCR_LEVEL_WORD);
        return res;
    }
 };
-/** @brief OCRTesseract class provides an interface with the tesseract-ocr API
+/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
 * (v3.02.02) in C++.
 Notice that it is compiled only when tesseract-ocr is correctly installed.
@note
-   -   (C++) An example of OCRTesseract recognition combined with scene text
+   -   (C++) An example of OCRTesseract recognition combined with scene text detection can be found
-        detection can be found at the end_to_end_recognition demo:
+        at the end_to_end_recognition demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
-    -   (C++) Another example of OCRTesseract recognition combined with scene
+    -   (C++) Another example of OCRTesseract recognition combined with scene text detection can be
-        text detection can be found at the webcam_demo:
+        found at the webcam_demo:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
 class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
    /** @brief Recognize text using the tesseract-ocr API.
-    Takes image on input and returns recognized text in the output_text
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    parameter. Optionally provides also the Rects for individual text elements
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    found (e.g. words), and the list of those text elements with their
+    text elements with their confidence values.
    confidence values.
    @param image Input image CV_8UC1 or CV_8UC3
    @param output_text Output text of the tesseract-ocr.
-
+    @param component_rects If provided the method will output a list of Rects for the individual
-    @param component_rects If provided the method will output a list of Rects
+    text elements found (e.g. words or text lines).
-    for the individual text elements found (e.g. words or text lines).
+    @param component_texts If provided the method will output a list of text strings for the
-
+    recognition of individual text elements found (e.g. words or text lines).
-    @param component_texts If provided the method will output a list of text
+    @param component_confidences If provided the method will output a list of confidence values
-    strings for the recognition of individual text elements found (e.g. words or
+    for the recognition of individual text elements found (e.g. words or text lines).
-    text lines).
+    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
    @param component_confidences If provided the method will output a list of
    confidence values for the recognition of individual text elements found
    (e.g. words or text lines).
    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
     */
-    using BaseOCR::run;
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-    virtual void run (Mat& image, std::string& output_text,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0);
-    virtual void run (Mat& image, Mat& mask, std::string& output_text,
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                      std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                      std::vector<std::string>* component_texts=NULL,
+                     int component_level=0);
                      std::vector<float>* component_confidences=NULL,
                      int component_level=0);
    // aliases for scripting
-    CV_WRAP String run (InputArray image, int min_confidence,
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
                        int component_level=0);
-    CV_WRAP String run(InputArray image, InputArray mask,
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
                       int min_confidence, int component_level=0);
    CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
@ -205,7 +164,6 @@ public:
     */
    CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
                                    const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);
 };
@ -225,19 +183,19 @@ enum classifier_type
 /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
-
+@note
- * @note
+   -   (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can
- * -   (C++) An example on using OCRHMMDecoder recognition combined with scene
+        be found at the webcam_demo sample:
- *     text detection can be found at the webcam_demo sample:
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 *      <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
-class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
- public:
+{
 public:
    /** @brief Callback with the character classifier is made a class.
-    * This way it hides the feature extractor and the classifier itself, so
+    This way it hides the feature extractor and the classifier itself, so developers can write
-    * developers can write their own OCR code.
+    their own OCR code.
    The default character classifier and feature extractor can be loaded using the utility function
    loadOCRHMMClassifierNM and KNN model provided in
@ -246,120 +204,92 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
    class CV_EXPORTS_W ClassifierCallback
    {
    public:
        virtual ~ClassifierCallback() { }
-        /** @brief The character classifier must return a (ranked list of)
+        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
         * class(es) id('s)
         * @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
         * @param out_class The classifier returns the character class
         * categorical label, or list of class labels, to which the input image
         * corresponds.
-         * @param out_confidence The classifier returns the probability of the
+        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
-         * input image corresponding to each classes in out_class.
+        @param out_class The classifier returns the character class categorical label, or list of
        class labels, to which the input image corresponds.
        @param out_confidence The classifier returns the probability of the input image
        corresponding to each classes in out_class.
         */
-        virtual void eval (InputArray image, std::vector<int>& out_class,
+        virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence);
                           std::vector<double>& out_confidence);
    };
 public:
    /** @brief Recognize text using HMM.
-    * Takes binary image on input and returns recognized text in the output_text
+    Takes binary image on input and returns recognized text in the output_text parameter. Optionally
-    * parameter. Optionally provides also the Rects for individual text elements
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    * found (e.g. words), and the list of those text elements with their
+    text elements with their confidence values.
    * confidence values.
-    * @param image Input binary image CV_8UC1 with a single text line (or word).
+    @param image Input binary image CV_8UC1 with a single text line (or word).
-    * @param output_text Output text. Most likely character sequence found by
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
    * the HMM decoder.
-    * @param component_rects If provided the method will output a list of Rects
+    @param component_rects If provided the method will output a list of Rects for the individual
-    * for the individual text elements found (e.g. words).
+    text elements found (e.g. words).
-    * @param component_texts If provided the method will output a list of text
+    @param component_texts If provided the method will output a list of text strings for the
-    * strings for the recognition of individual text elements found (e.g. words).
+    recognition of individual text elements found (e.g. words).
-    * @param component_confidences If provided the method will output a list of
+    @param component_confidences If provided the method will output a list of confidence values
-    * confidence values for the recognition of individual text elements found
+    for the recognition of individual text elements found (e.g. words).
    * (e.g. words).
-    * @param component_level Only OCR_LEVEL_WORD is supported.
+    @param component_level Only OCR_LEVEL_WORD is supported.
-    */
+     */
-    using BaseOCR::run;
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-    virtual void run (Mat& image, std::string& output_text,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                      std::vector<Rect>* component_rects=NULL,
+                     int component_level=0);
                      std::vector<std::string>* component_texts=NULL,
                      std::vector<float>* component_confidences=NULL,
                      int component_level=0);
    /** @brief Recognize text using HMM.
-    * Takes an image and a mask (where each connected component corresponds to a
+    Takes an image and a mask (where each connected component corresponds to a segmented character)
-    * segmented character) on input and returns recognized text in the
+    on input and returns recognized text in the output_text parameter. Optionally
-    * output_text parameter. Optionally provides also the Rects for individual
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    * text elements found (e.g. words), and the list of those text elements with
+    text elements with their confidence values.
    * their confidence values.
    * @param image Input image CV_8UC1 or CV_8UC3 with a single text line
    * (or word).
-    * @param mask Input binary image CV_8UC1 same size as input image. Each
+    @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word).
-    * connected component in mask corresponds to a segmented character in the
+    @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image.
    * input image.
-    * @param output_text Output text. Most likely character sequence found by
+    @param output_text Output text. Most likely character sequence found by the HMM decoder.
    * the HMM decoder.
-    * @param component_rects If provided the method will output a list of Rects
+    @param component_rects If provided the method will output a list of Rects for the individual
-    * for the individual text elements found (e.g. words).
+    text elements found (e.g. words).
-    * @param component_texts If provided the method will output a list of text
+    @param component_texts If provided the method will output a list of text strings for the
-    * strings for the recognition of individual text elements found (e.g. words).
+    recognition of individual text elements found (e.g. words).
-    * @param component_confidences If provided the method will output a list of
+    @param component_confidences If provided the method will output a list of confidence values
-    * confidence values for the recognition of individual text elements found
+    for the recognition of individual text elements found (e.g. words).
    * (e.g. words).
-    * @param component_level Only OCR_LEVEL_WORD is supported.
+    @param component_level Only OCR_LEVEL_WORD is supported.
-    */
+     */
-    virtual void run(Mat& image, Mat& mask, std::string& output_text,
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     std::vector<std::string>* component_texts=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0);
    // aliases for scripting
-    CV_WRAP String run(InputArray image,
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
                       int min_confidence,
                       int component_level=0);
-    CV_WRAP String run(InputArray image,
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
                       InputArray mask,
                       int min_confidence,
                       int component_level=0);
-    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes
+    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
     * HMMDecoder.
-     * @param classifier The character classifier with built in feature
+    @param classifier The character classifier with built in feature extractor.
     * extractor.
-     * @param vocabulary The language vocabulary (chars when ascii english text)
+    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
-     * . vocabulary.size() must be equal to the number of classes of the
+    must be equal to the number of classes of the classifier.
     * classifier.
-     * @param transition_probabilities_table Table with transition probabilities
+    @param transition_probabilities_table Table with transition probabilities between character
-     * between character pairs. cols == rows == vocabulary.size().
+    pairs. cols == rows == vocabulary.size().
-     * @param emission_probabilities_table Table with observation emission
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
-     * probabilities. cols == rows == vocabulary.size().
+    rows == vocabulary.size().
-     * @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available
+    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
-     * for the moment (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
     */
    static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                     const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                       //     size() must be equal to the number of classes
@ -402,11 +332,9 @@ protected:
    decoder_mode mode;
 };
-/** @brief Allow to implicitly load the default character classifier when
+/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
 * creating an OCRHMMDecoder object.
 @param filename The XML or YAML file with the classifier model (e.g.OCRHMM_knn_model_data.xml)
@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
 The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
 Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a
@ -416,16 +344,11 @@ using a KNN model trained with synthetic data of rendered characters with differ
 types.
@deprecated loadOCRHMMClassifier instead
 */
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM (
        const String& filename);
-/** @brief Allow to implicitly load the default character classifier when
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
 * creating an OCRHMMDecoder object.
 @param filename The XML or YAML file with the classifier model (e.g.OCRBeamSearch_CNN_model_data.xml.gz)
 /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)
@ -435,10 +358,8 @@ a linear classifier. It is applied to the input image in a sliding window fashio
 at each window location.
@deprecated use loadOCRHMMClassifier instead
 */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
        const String& filename);
 /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
@ -450,64 +371,49 @@ CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
 //! @}
 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
 *
 * @param vocabulary The language vocabulary (chars when ASCII English text).
 *
 * @param lexicon The list of words that are expected to be found in a particular image.
-
+ *
- * @param transition_probabilities_table Output table with transition
+ * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
- * probabilities between character pairs. cols == rows == vocabulary.size().
+ *
-
+ * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
 * The function calculate frequency statistics of character pairs from the given
 * lexicon and fills the output transition_probabilities_table with them. The
 * transition_probabilities_table can be used as input in the
 * OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
 * @note
- *    -   (C++) An alternative would be to load the default generic language
+ *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
- *        transition table provided in the text module samples folder (created
+ *            <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
 *        from ispell 42869 english words list) :
 *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
 **/
-CV_EXPORTS void createOCRHMMTransitionsTable (
+CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
-        std::string& vocabulary, std::vector<std::string>& lexicon,
+
-        OutputArray transition_probabilities_table);
+CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
 CV_EXPORTS_W Mat createOCRHMMTransitionsTable (
        const String& vocabulary, std::vector<cv::String>& lexicon);
 /* OCR BeamSearch Decoder */
-/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
 * Search algorithm.
@note
-   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
-        scene text detection can be found at the demo sample:
+        be found at the demo sample:
-        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
 */
-
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
-
+{
-/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */
+public:
 class TextImageClassifier;
 class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
 public:
    /** @brief Callback with the character classifier is made a class.
-     * This way it hides the feature extractor and the classifier itself, so
+    This way it hides the feature extractor and the classifier itself, so developers can write
-     * developers can write their own OCR code.
+    their own OCR code.
-     * The default character classifier and feature extractor can be loaded
+    The default character classifier and feature extractor can be loaded using the utility funtion
-     * using the utility funtion loadOCRBeamSearchClassifierCNN with all its
+    loadOCRBeamSearchClassifierCNN with all its parameters provided in
-     * parameters provided in
+    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
     * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
     */
-    class CV_EXPORTS_W ClassifierCallback{
+    class CV_EXPORTS_W ClassifierCallback
-     public:
+    {
    public:
        virtual ~ClassifierCallback() { }
        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
@ -519,8 +425,8 @@ class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
         */
        virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
-        virtual int getWindowSize() {return 0;}
+        int getWindowSize() {return 0;}
-        virtual int getStepSize() {return 0;}
+        int getStepSize() {return 0;}
    };
 public:
@ -545,7 +451,6 @@ public:
    @param component_level Only OCR_LEVEL_WORD is supported.
     */
    using BaseOCR::run;
    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);
@ -577,7 +482,6 @@ public:
    @param beam_size Size of the beam in Beam Search algorithm.
     */
    static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                     const std::string& vocabulary,                    // The language vocabulary (chars when ASCII English text)
                                                                                       //     size() must be equal to the number of classes
@ -598,29 +502,10 @@ public:
                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                     int beam_size = 500);                              // Size of the beam in Beam Search algorithm
    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path.
    @overload
    @param filename path to a character classifier file
    @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size()
    must be equal to the number of classes of the classifier..
    @param transition_probabilities_table Table with transition probabilities between character
    pairs. cols == rows == vocabulary.size().
    @param emission_probabilities_table Table with observation emission probabilities. cols ==
    rows == vocabulary.size().
    @param mode HMM Decoding algorithm (only Viterbi for the moment)
    @param beam_size Size of the beam in Beam Search algorithm
     */
    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
                                     const String& vocabulary,                    // The language vocabulary (chars when ASCII English text)
@ -631,7 +516,6 @@ public:
                                                                                       //     cols == rows == vocabulary.size()
                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                     int beam_size = 500);
 protected:
    Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@ -656,402 +540,6 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
 //! @}
-
+}
-//Classifiers should provide diferent backends
+}
 enum{
    OCR_HOLISTIC_BACKEND_NONE, //No back end
    OCR_HOLISTIC_BACKEND_DNN, // dnn backend opencv_dnn
    OCR_HOLISTIC_BACKEND_CAFFE, // caffe based backend
    OCR_HOLISTIC_BACKEND_DEFAULT // to store default value based on environment
 };
 class TextImageClassifier;
 /**
 * @brief The ImagePreprocessor class
 */
 class CV_EXPORTS_W ImagePreprocessor{
 protected:
    virtual void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels)=0;
    virtual void set_mean_(Mat){}
 public:
    virtual ~ImagePreprocessor(){}
    /** @brief this method in provides public acces to the preprocessing with respect to a specific
     * classifier
     *
     * This method's main use would be to use the preprocessor without feeding it to a classifier.
     * Determining the exact behavior of a preprocessor is the main motivation for this.
     *
     * @param input an image without any constraints
     *
     * @param output in most cases an image of fixed depth size and whitened
     *
     * @param sz the size to which the image would be resize if the preprocessor resizes inputs
     *
     * @param outputChannels the number of channels for the output image
     */
    CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels);
    /** @brief this method in provides public acces to set the mean of the input images
     * mean can be a mat either of same size of the image or one value per color channel
     * A preprocessor can be created without the mean( the pre processor will calculate mean for every image
     * in that case
     *
     * @param mean which will be subtracted from the images
     *
     */
    CV_WRAP void set_mean(Mat mean);
    /** @brief Creates a functor that only resizes and changes the channels of the input
     *  without further processing.
     *
     * @return shared pointer to the generated preprocessor
     */
    CV_WRAP static Ptr<ImagePreprocessor> createResizer();
    /** @brief
     *
     * @param sigma
     *
     * @return shared pointer to generated preprocessor
     */
    CV_WRAP static Ptr<ImagePreprocessor> createImageStandarizer(double sigma);
    /** @brief
     *
     * @return shared pointer to generated preprocessor
     */
    CV_WRAP static Ptr<ImagePreprocessor> createImageMeanSubtractor(InputArray meanImg);
    /** @brief
     * create a functor with the parameters, parameters can be changes by corresponding set functions
     * @return shared pointer to generated preprocessor
     */
    CV_WRAP static Ptr<ImagePreprocessor>createImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR");
    friend class TextImageClassifier;
 };
 /** @brief Abstract class that implements the classifcation of text images.
 *
 * The interface is generic enough to describe any image classifier. And allows
 * to take advantage of compouting in batches. While word classifiers are the default
 * networks, any image classifers should work.
 *
 */
 class CV_EXPORTS_W TextImageClassifier
 {
 protected:
    Size inputGeometry_;
    Size outputGeometry_;
    int channelCount_;
    Ptr<ImagePreprocessor> preprocessor_;
    /** @brief all image preprocessing is handled here including whitening etc.
     *
         *  @param input the image to be preprocessed for the classifier. If the depth
     * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
     *
     * @param output reference to the image to be fed to the classifier, the preprocessor will
     * resize the image to the apropriate size and convert it to the apropriate depth\
     *
     * The method preprocess should never be used externally, it is up to classify and classifyBatch
     * methods to employ it.
     */
    virtual void preprocess(const Mat& input,Mat& output);
 public:
    virtual ~TextImageClassifier() {}
    /** @brief
     */
    CV_WRAP virtual void setPreprocessor(Ptr<ImagePreprocessor> ptr);
    /** @brief
     */
    CV_WRAP Ptr<ImagePreprocessor> getPreprocessor();
    /** @brief produces a class confidence row-vector given an image
     */
    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
     */
    CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0;
    /** @brief simple getter method returning the number of channels each input sample has
     */
    CV_WRAP virtual int getInputChannelCount(){return this->channelCount_;}
    /** @brief simple getter method returning the size of the input sample
     */
    CV_WRAP virtual Size getInputSize(){return this->inputGeometry_;}
    /** @brief simple getter method returning the size of the oputput row-vector
     */
    CV_WRAP virtual int getOutputSize()=0;
    /** @brief simple getter method returning the shape of the oputput from caffe
     */
    CV_WRAP virtual Size getOutputGeometry()=0;
    /** @brief simple getter method returning the size of the minibatches for this classifier.
     * If not applicabe this method should return 1
     */
    CV_WRAP virtual int getMinibatchSize()=0;
    friend class ImagePreprocessor;
 };
 class CV_EXPORTS_W DeepCNN:public TextImageClassifier
 {
    /** @brief Class that uses a pretrained caffe model for word classification.
     *
     * This network is described in detail in:
     * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
     * http://arxiv.org/abs/1412.1842
     */
 public:
    virtual ~DeepCNN() {};
    /** @brief Constructs a DeepCNN object from a caffe pretrained model
     *
     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
     *
     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
     * very large, up to 2GB.
     *
     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
     *
     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
     *
     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
     * the only option
     */
    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
    /** @brief Constructs a DeepCNN intended to be used for word spotting.
     *
     * This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a
     * deviation of 113. The architecture file can be downloaded from:
     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
     * While the weights can be downloaded from:
     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
     * The words assigned to the network outputs are available at:
     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
     *
     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
     *
     * @param weightsFilename is the path to the pretrained weights of the model. When employing
     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. This file can be very large, the
     * pretrained DictNet uses 2GB.
     *
     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
     * the only option
     */
    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
 };
 namespace cnn_config{
 /** @brief runtime backend information
 *
 * this function finds the status of backends compiled with this module
 *
 * @return a list of backends (caffe,opencv-dnn etc.)
 * */
 CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
 namespace caffe_backend{
 /** @brief Prompts Caffe on the computation device beeing used
 *
 * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
 * global behavior. This function queries the current state of caffe.
 * If the module is built without caffe, this method throws an exception.
 *
 * @return true if caffe is computing on the GPU, false if caffe is computing on the CPU
 */
 CV_EXPORTS_W bool getCaffeGpuMode();
 /** @brief Sets the computation device beeing used by Caffe
 *
 * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
 * global behavior. This function queries the current state of caffe.
 * If the module is built without caffe, this method throws an exception.
 *
 * @param useGpu  set to true for caffe to be computing on the GPU, false if caffe is
 * computing on the CPU
 */
 CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 /** @brief Provides runtime information on whether Caffe support was compiled in.
 *
 * The text module API is the same regardless of whether CAffe was available or not
 * During compilation. When methods that require Caffe are invocked while Caffe support
 * is not compiled in, exceptions are thrown. This method allows to test whether the
 * text module was built with caffe during runtime.
 *
 * @return true if Caffe support for the the text module was provided during compilation,
 * false if Caffe was unavailable.
 */
 CV_EXPORTS_W bool getCaffeAvailable();
 }//caffe
 namespace dnn_backend {
 /** @brief Provides runtime information on whether DNN module was compiled in.
 *
 * The text module API is the same regardless of whether DNN module was available or not
 * During compilation. When methods that require backend are invocked while no backend support
 * is compiled, exceptions are thrown. This method allows to test whether the
 * text module was built with dnn_backend during runtime.
 *
 * @return true if opencv_dnn support for the the text module was provided during compilation,
 * false if opencv_dnn was unavailable.
 */
 CV_EXPORTS_W bool getDNNAvailable();
 }//dnn_backend
 }//cnn_config
 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
 * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
 * word given an input image.
 *
 * This class implements the logic of providing transcriptions given a vocabulary and and an image
 * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
 * class was built is the DictNet. In order to load it the following files should be downloaded:
 * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
 * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
 * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
 */
 class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
 {
 public:
    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=OCR_LEVEL_WORD)=0;
    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
    Takes image on input and returns recognized text in the output_text parameter. Optionally
    provides also the Rects for individual text elements found (e.g. words), and the list of those
    text elements with their confidence values.
    @param image Input image CV_8UC1 or CV_8UC3
    @param mask is totally ignored and is only available for compatibillity reasons
    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
        be put in the vector.
    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
        be put in the vector.
    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
        be put in the vector.
    @param component_level must be OCR_LEVEL_WORD.
     */
    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=OCR_LEVEL_WORD)=0;
    /**
    @brief Method that provides a quick and simple interface to a single word image classifcation
    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
    @param transcription an opencv string that will store the detected word transcription
    @param confidence a double that will be updated with the confidence the classifier has for the selected word
    */
    CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0;
    /**
    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
    the classifiers parallel capabilities.
    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
    to contain a single word.
    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
    input image
    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
    selected words.
    */
    CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
    /**
    @brief simple getter for the vocabulary employed
    */
    CV_WRAP virtual const std::vector<String>& getVocabulary()=0;
    /** @brief simple getter for the preprocessing functor
     */
    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
    of the classifier.
     */
    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename);
    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
    of the classifier.
    */
    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename);
    /** @brief
     *
     * @param classifierPtr
     *
     * @param vocabulary
     */
    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
    /** @brief
     *
     * @param modelArchFilename
     *
     * @param modelWeightsFilename
     *
     * @param vocabulary
     */
    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
 };
 }//namespace text
 }//namespace cv
 #endif // _OPENCV_TEXT_OCR_HPP_
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@ -1,56 +1,12 @@
-/*M//////////////////////////////////////////////////////////////////////////////////////////
+// This file is part of OpenCV project.
-//
+// It is subject to the license terms in the LICENSE file found in the top-level directory
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+// of this distribution and at http://opencv.org/license.html.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
 #define __OPENCV_TEXT_TEXTDETECTOR_HPP__
 #include <vector>
 #include <string>
 #include <iostream>
 #include <sstream>
 #include"ocr.hpp"
 namespace cv
 {
 namespace text
@ -59,208 +15,44 @@ namespace text
 //! @addtogroup text_detect
 //! @{
-
+/** @brief An abstract class providing interface for text detection algorithms
 //base class BaseDetector declares a common API that would be used in a typical text
 //detection scenario
 class CV_EXPORTS_W BaseDetector
 {
 public:
    virtual ~BaseDetector() {};
    virtual void run(Mat& image,
                     std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
    virtual void run(Mat& image, Mat& mask,
                     std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
 };
 /** A virtual class for different models of text detection (including CNN based deep models)
 */
-
+class CV_EXPORTS_W TextDetector
 class CV_EXPORTS_W TextRegionDetector
 {
 protected:
    /** Stores input and output size
     */
    //netGeometry inputGeometry_;
    //netGeometry outputGeometry_;
    Size inputGeometry_;
    Size outputGeometry_;
    int inputChannelCount_;
    int outputChannelCount_;
 public:
-    virtual ~TextRegionDetector() {}
+    /**
-
+    @brief Method that provides a quick and simple interface to detect text inside an image
    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
     */
    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
    /** @brief simple getter method returning the size (height, width) of the input sample
     */
    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
    /** @brief simple getter method returning the shape of the oputput
     *   Any text detector should output a number of text regions alongwith a score of text-ness
     *   From the shape it can be inferred the number of text regions and number of returned value
     *   for each region
     */
    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
 };
 /** Generic structure of Deep CNN based Text Detectors
 * */
 class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
 {
    /** @brief Class that uses a pretrained caffe model for text detection.
     * Any text detection should
     * This network is described in detail in:
     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
     * https://arxiv.org/abs/1611.06779
     */
 protected:
    /** all deep CNN based text detectors have a preprocessor (normally)
         */
    Ptr<ImagePreprocessor> preprocessor_;
    /** @brief all image preprocessing is handled here including whitening etc.
         *
         *  @param input the image to be preprocessed for the classifier. If the depth
         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
         *
         * @param output reference to the image to be fed to the classifier, the preprocessor will
         * resize the image to the apropriate size and convert it to the apropriate depth\
         *
         * The method preprocess should never be used externally, it is up to classify and classifyBatch
         * methods to employ it.
         */
    virtual void preprocess(const Mat& input,Mat& output);
 public:
    virtual ~DeepCNNTextDetector() {};
    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
     *
     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
     *
     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
     *
     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
     *
     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
     *
     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
     * the only option
     */
    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
     *
     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
     * The architecture and models weights can be downloaded from:
     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
     *
     * @param weightsFilename is the path to the pretrained weights of the model. When employing
     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
     *
     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
     * the only option
     */
    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_DEFAULT);
    friend class ImagePreprocessor;
    @param inputImage an image to process
    @param Bbox a vector of Rect that will store the detected word bounding box
    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
    */
    virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
    virtual ~TextDetector() {}
 };
-/** @brief textDetector class provides the functionallity of text bounding box detection.
+/** @brief TextDetectorCNN class provides the functionallity of text bounding box detection.
- * A TextRegionDetector is employed to find bounding boxes of text
+ * A TextDetectorCNN is employed to find bounding boxes of text words given an input image.
 * words given an input image.
 *
 * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
 * The TextRegionDetector can be any text detector
 *
 */
-
+class CV_EXPORTS_W TextDetectorCNN : public TextDetector
 class CV_EXPORTS_W textDetector : public BaseDetector
 {
 public:
    virtual void run(Mat& image,  std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=OCR_LEVEL_WORD)=0;
    /** @brief detect text with a cnn, input is one image with (multiple) ocuurance of text.
    Takes image on input and returns recognized text in the output_text parameter. Optionally
    provides also the Rects for individual text elements found (e.g. words), and the list of those
    text elements with their confidence values.
    @param image Input image CV_8UC1 or CV_8UC3
    @param mask is totally ignored and is only available for compatibillity reasons
    @param component_rects a vector of Rects, each rect is one text bounding box.
    @param component_confidences A vector of float returns confidence of text bounding boxes
    @param component_level must be OCR_LEVEL_WORD.
     */
    virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=OCR_LEVEL_WORD)=0;
    /**
-    @brief Method that provides a quick and simple interface to detect text inside an image
+    @overload
    @param inputImage an image expected to be a CV_U8C3 of any size
    @param Bbox a vector of Rect that will store the detected word bounding box
    @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box
    */
-    CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
+    CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector<Rect>& Bbox, CV_OUT std::vector<float>& confidence) = 0;
    /** @brief simple getter for the preprocessing functor
     */
    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
    /** @brief Creates an instance of the textDetector class.
    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
     */
    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
-
+    @param detectMultiscale if true, multiple scales of the input image will be used as network input
    */
-    CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
+    CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false);
 };
 //! @}
--- a/modules/text/samples/deeptextdetection.py
+++ b/modules/text/samples/deeptextdetection.py
@ -1,57 +1,37 @@
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jul 19 17:54:00 2017
@author: sgnosh
 """
 #!/usr/bin/python
 import sys
 import os
 import cv2
 import numpy as np
-print('\nDeeptextdetection.py')
+def main():
-print('       A demo script of text box alogorithm of the paper:')
+    print('\nDeeptextdetection.py')
-print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
+    print('       A demo script of text box alogorithm of the paper:')
-
+    print('       * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n')
 if (len(sys.argv) < 2):
  print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
  quit()
 #if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable():
 #        print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"
 #
 #        quit()
 # check model and architecture file existance
 if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
    print " Model files not found in current directory. Aborting"
    print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
    quit()
 cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True);
 pathname = os.path.dirname(sys.argv[0])
    if (len(sys.argv) < 2):
        print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n')
        quit()
-img      = cv2.imread(str(sys.argv[1]))
+    if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'):
-textSpotter=cv2.text.textDetector_create(
+        print " Model files not found in current directory. Aborting"
-                "textbox_deploy.prototxt","textbox.caffemodel")
+        print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models"
-rects,outProbs = textSpotter.textDetectInImage(img);
+        quit()
 # for visualization
 vis      = img.copy()
 # Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown
 thres = 0.6
    img = cv2.imread(str(sys.argv[1]))
    textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel")
    rects, outProbs = textSpotter.textDetectInImage(img);
    vis = img.copy()
    thres = 0.6
-  #Visualization
+    for r in range(np.shape(rects)[0]):
-for r in range(0,np.shape(rects)[0]):
+        if outProbs[r] > thres:
-    if outProbs[r] >thres:
+            rect = rects[r]
-        rect = rects[r]
+            cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2)
        cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2)
       # cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)
    cv2.imshow("Text detection result", vis)
    cv2.waitKey()
-#Visualization
+if __name__ == "__main__":
-cv2.imshow("Text detection result", vis)
+    main()
 cv2.waitKey(0)
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@ -1,151 +1,86 @@
-/*
+#include <opencv2/text.hpp>
- * dictnet_demo.cpp
+#include <opencv2/highgui.hpp>
- *
+#include <opencv2/imgproc.hpp>
 * Demonstrates simple use of the holistic word classifier in C++
 *
 * Created on: June 26, 2016
 *     Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
 */
 #include  "opencv2/text.hpp"
 #include  "opencv2/highgui.hpp"
 #include  "opencv2/imgproc.hpp"
 #include  <sstream>
 #include  <vector>
 #include  <iostream>
 #include  <iomanip>
 #include  <fstream>
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres);
+using namespace cv;
 inline std::string getHelpStr(std::string progFname){
    std::stringstream out;
    out << "    Demo of text detection CNN for text detection." << std::endl;
    out << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
    out << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl;
    out << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl;
    out << "      must be in the current directory." << std::endl << std::endl;
-    out << "    Obtaining Caffe Model files in linux shell:"<<std::endl;
+namespace
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl;
+{
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl;
+std::string getHelpStr(std::string progFname)
-    out << "    wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl;
+{
    std::stringstream out;
    out << "    Demo of text detection CNN for text detection." << std::endl
        << "    Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl
        << "    Usage: " << progFname << " <output_file> <input_image>" << std::endl
        << "    Caffe Model files  (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl
        << "      must be in the current directory." << std::endl
        << "    These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl;
    return out.str();
 }
-inline bool fileExists (std::string filename) {
+bool fileExists (std::string filename)
 {
    std::ifstream f(filename.c_str());
    return f.good();
 }
-void textbox_draw(cv::Mat &src, std::vector<cv::Rect>  &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6)
+
 void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
 {
-    for (int i=0;i<(int)groups.size(); i++)
+    for (size_t i = 0; i < groups.size(); i++)
    {
-        if(probs[i]>thres)
+        if(probs[i] > thres)
        {
            if (src.type() == CV_8UC3)
            {
-                cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 );
+                rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
-                cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 ));
+                String label = format("%.2f", probs[i]);
                std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
                putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
            }
            else
-                rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 );
+                rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
        }
    }
 }
 }
-int main(int argc, const char * argv[]){
+int main(int argc, const char * argv[])
-    if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){
+{
-        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
+    if (argc < 2)
-        //exit(1);
+    {
-    }
+        std::cout << getHelpStr(argv[0]);
-    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+        std::cout << "Insufiecient parameters. Aborting!" << std::endl;
    std::cout << "The Following backends are available" << "\n";
    for (int i=0;i<backends.size();i++)
       std::cout << backends[i] << "\n";
   // printf("%s",x);
    //set to true if you have a GPU with more than 3GB
     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
    if (argc < 3){
        std::cout<<getHelpStr(argv[0]);
        std::cout<<"Insufiecient parameters. Aborting!"<<std::endl;
        exit(1);
    }
    if (!fileExists("textbox.caffemodel") ||
-            !fileExists("textbox_deploy.prototxt")){
+            !fileExists("textbox_deploy.prototxt"))
-           // !fileExists("dictnet_vgg_labels.txt"))
+    {
        std::cout<<getHelpStr(argv[0]);
        std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl;
        exit(1);
    }
    if (fileExists(argv[1])){
        std::cout<<getHelpStr(argv[0]);
-        std::cout<<"Output file must not exist. Aborting!"<<std::endl;
+        std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
        exit(1);
    }
-    cv::Mat image;
+    Mat image = imread(String(argv[1]), IMREAD_COLOR);
    image = cv::imread(cv::String(argv[2]));
-    std::cout<<"Starting Text Box Demo"<<std::endl;
+    std::cout << "Starting Text Box Demo" << std::endl;
-    cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create(
+    Ptr<text::TextDetectorCNN> textSpotter =
-                "textbox_deploy.prototxt","textbox.caffemodel");
+            text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false);
-    //cv::Ptr<cv::text::textDetector> wordSpotter=
+    std::vector<Rect> bbox;
      //      cv::text::textDetector::create(cnn);
    std::cout<<"Created Text Spotter with text Boxes";
    std::vector<cv::Rect> bbox;
    std::vector<float> outProbabillities;
-    textSpotter->textDetectInImage(image,bbox,outProbabillities);
+    textSpotter->textDetectInImage(image, bbox, outProbabillities);
   // textbox_draw(image, bbox,outProbabillities);
    float thres =0.6f;
    std::vector<cv::Mat> imageList;
    for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){
        if(outProbabillities[imageIdx]>thres){
            imageList.push_back(image(bbox.at(imageIdx)));
        }
    }
    // call dict net here for all detected parts
    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
    std::vector<cv::String> wordList;
    std::vector<double> wordProbabillities;
    wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities);
    // write the output in file
    std::ofstream out;
    out.open(argv[1]);
    for (int i=0;i<(int)wordList.size(); i++)
    {
        cv::Point tl_ = bbox.at(i).tl();
        cv::Point br_ = bbox.at(i).br();
        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
    }
    out.close();
    textbox_draw(image, bbox,outProbabillities,wordList);
    textbox_draw(image, bbox, outProbabillities, 0.5f);
-    cv::imshow("TextBox Demo",image);
+    imshow("TextBox Demo",image);
    std::cout << "Done!" << std::endl << std::endl;
    std::cout << "Press any key to exit." << std::endl << std::endl;
-    if ((cv::waitKey()&0xff) == ' ')
+    waitKey();
-        return 0;
+    return 0;
 }
--- a/modules/text/src/image_preprocessor.cpp
+++ b/modules/text/src/image_preprocessor.cpp
@ -1,387 +0,0 @@
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include  "opencv2/highgui.hpp"
 #include "opencv2/core.hpp"
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <queue>
 #include <algorithm>
 #include <iosfwd>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 namespace cv { namespace text {
 //************************************************************************************
 //******************   ImagePreprocessor   *******************************************
 //************************************************************************************
 void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
    Mat inpImg=input.getMat();
    Mat outImg;
    this->preprocess_(inpImg,outImg,sz,outputChannels);
    outImg.copyTo(output);
 }
 void ImagePreprocessor::set_mean(Mat mean){
    this->set_mean_(mean);
 }
 class ResizerPreprocessor: public ImagePreprocessor{
 protected:
    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
        if(input.channels()!=outputChannels)
        {
            Mat tmpInput;
            if(outputChannels==1){
                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
                if(input.depth()==CV_8U)
                {
                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    tmpInput.convertTo(output, CV_32FC1);
                }
            }else
            {
                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
                if(input.depth()==CV_8U)
                {
                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    tmpInput.convertTo(output, CV_32FC3);
                }
            }
        }else
        {
            if(input.channels()==1)
            {
                if(input.depth()==CV_8U)
                {
                    input.convertTo(output, CV_32FC1,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    input.convertTo(output, CV_32FC1);
                }
            }else
            {
                if(input.depth()==CV_8U){
                    input.convertTo(output, CV_32FC3,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    input.convertTo(output, CV_32FC3);
                }
            }
        }
        if(outputSize.width!=0 && outputSize.height!=0)
        {
            resize(output,output,outputSize);
        }
    }
    //void set_mean_(Mat m){}
 public:
    ResizerPreprocessor(){}
    ~ResizerPreprocessor(){}
 };
 class StandarizerPreprocessor: public ImagePreprocessor{
 protected:
    double sigma_;
    //void set_mean_(Mat M){}
    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
        if(input.channels()!=outputChannels)
        {
            Mat tmpInput;
            if(outputChannels==1)
            {
                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
                if(input.depth()==CV_8U)
                {
                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    tmpInput.convertTo(output, CV_32FC1);
                }
            }else
            {
                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
                if(input.depth()==CV_8U)
                {
                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    tmpInput.convertTo(output, CV_32FC3);
                }
            }
        }else
        {
            if(input.channels()==1)
            {
                if(input.depth()==CV_8U)
                {
                    input.convertTo(output, CV_32FC1,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    input.convertTo(output, CV_32FC1);
                }
            }else
            {
                if(input.depth()==CV_8U)
                {
                    input.convertTo(output, CV_32FC3,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    input.convertTo(output, CV_32FC3);
                }
            }
        }
        if(outputSize.width!=0 && outputSize.height!=0)
        {
            resize(output,output,outputSize);
        }
        Scalar mean,dev;
        meanStdDev(output,mean,dev);
        subtract(output,mean[0],output);
        divide(output,(dev[0]/sigma_),output);
    }
 public:
    StandarizerPreprocessor(double sigma):sigma_(sigma){}
    ~StandarizerPreprocessor(){}
 };
 class customPreprocessor:public ImagePreprocessor{
 protected:
    double rawval_;
    Mat mean_;
    String channel_order_;
    void set_mean_(Mat imMean_){
        imMean_.copyTo(this->mean_);
    }
    void set_raw_scale(int rawval){
        rawval_ = rawval;
    }
    void set_channels(String channel_order){
        channel_order_=channel_order;
    }
    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
        if(input.channels()!=outputChannels)
        {
            Mat tmpInput;
            if(outputChannels==1)
            {
                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
                if(input.depth()==CV_8U)
                {
                    if (rawval_ == 1)
                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
                    else
                        tmpInput.convertTo(output,CV_32FC1);
                }else
                {//Assuming values are at the desired [0,1] range
                    if (rawval_ ==1)
                        tmpInput.convertTo(output, CV_32FC1);
                    else
                        tmpInput.convertTo(output, CV_32FC1,rawval_);
                }
            }else
            {
                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
                if(input.depth()==CV_8U)
                {
                    if (rawval_ == 1)
                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
                    else
                        tmpInput.convertTo(output,CV_32FC1);
                }else
                {//Assuming values are at the desired [0,1] range
                    if (rawval_ ==1)
                        tmpInput.convertTo(output, CV_32FC1);
                    else
                        tmpInput.convertTo(output, CV_32FC1,rawval_);
                }
            }
        }else
        {
            if(input.channels()==1)
            {
                if(input.depth()==CV_8U)
                {
                    if (rawval_ == 1)
                        input.convertTo(output,CV_32FC1,1/255.0);
                    else
                        input.convertTo(output,CV_32FC1);
                }else
                {//Assuming values are at the desired [0,1] range
                    if (rawval_ ==1)
                        input.convertTo(output, CV_32FC1);
                    else
                        input.convertTo(output, CV_32FC1,rawval_);
                }
            }else
            {
                if(input.depth()==CV_8U)
                {
                    if (rawval_ == 1)
                        input.convertTo(output,CV_32FC3,1/255.0);
                    else
                        input.convertTo(output,CV_32FC3);
                }else
                {//Assuming values are at the desired [0,1] range
                    if (rawval_ ==1)
                        input.convertTo(output, CV_32FC3);
                    else
                        input.convertTo(output, CV_32FC3,rawval_);
                }
            }
        }
        if(outputSize.width!=0 && outputSize.height!=0)
        {
            resize(output,output,outputSize);
        }
        if (!this->mean_.empty()){
            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
            subtract(output,mean_s,output);
        }
        else{
            Scalar mean_s;
            mean_s = mean(output);
            subtract(output,mean_s,output);
        }
    }
 public:
    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
    ~customPreprocessor(){}
 };
 class MeanSubtractorPreprocessor: public ImagePreprocessor{
 protected:
    Mat mean_;
    //void set_mean_(Mat m){}
    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
        if(input.channels()!=outputChannels)
        {
            Mat tmpInput;
            if(outputChannels==1)
            {
                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
                if(input.depth()==CV_8U)
                {
                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    tmpInput.convertTo(output, CV_32FC1);
                }
            }else
            {
                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
                if(input.depth()==CV_8U)
                {
                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    tmpInput.convertTo(output, CV_32FC3);
                }
            }
        }else
        {
            if(input.channels()==1)
            {
                if(input.depth()==CV_8U)
                {
                    input.convertTo(output, CV_32FC1,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    input.convertTo(output, CV_32FC1);
                }
            }else
            {
                if(input.depth()==CV_8U)
                {
                    input.convertTo(output, CV_32FC3,1/255.0);
                }else
                {//Assuming values are at the desired [0,1] range
                    input.convertTo(output, CV_32FC3);
                }
            }
        }
        if(outputSize.width!=0 && outputSize.height!=0)
        {
            resize(output,output,outputSize);
        }
        subtract(output,this->mean_,output);
    }
 public:
    MeanSubtractorPreprocessor(Mat mean)
    {
        mean.copyTo(this->mean_);
    }
    ~MeanSubtractorPreprocessor(){}
 };
 Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
 {
    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
 }
 Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
 {
    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
 }
 Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
 {
    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
 }
 Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
 {
    Mat tmp=meanImg.getMat();
    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
 }
 }
 }
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@ -1,697 +0,0 @@
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include  "opencv2/highgui.hpp"
 #include "opencv2/core.hpp"
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <queue>
 #include <algorithm>
 #include <iosfwd>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 #ifdef HAVE_CAFFE
 #include "caffe/caffe.hpp"
 #endif
 #ifdef HAVE_DNN
 #include "opencv2/dnn.hpp"
 #endif
 using namespace cv;
 using namespace cv::dnn;
 using namespace std;
 namespace cv { namespace text {
 //Maybe OpenCV has a routine better suited
 inline bool fileExists (String filename) {
    std::ifstream f(filename.c_str());
    return f.good();
 }
 //************************************************************************************
 //******************   TextImageClassifier   *****************************************
 //************************************************************************************
 void TextImageClassifier::preprocess(const Mat& input,Mat& output)
 {
    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
 }
 void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
 {
    CV_Assert(!ptr.empty());
    preprocessor_=ptr;
 }
 Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
 {
    return preprocessor_;
 }
 class DeepCNNCaffeImpl: public DeepCNN{
 protected:
    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
    {
        //Classifies a list of images containing at most minibatchSz_ images
        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
        CV_Assert(outputMat.isContinuous());
 #ifdef HAVE_CAFFE
        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
        net_->Reshape();
        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
        float* inputData=inputBuffer;
        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
        {
            std::vector<Mat> input_channels;
            Mat preprocessed;
            // if the image have multiple color channels the input layer should be populated accordingly
            for (int channel=0;channel < this->channelCount_;channel++){
                cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
                input_channels.push_back(netInputWraped);
                //input_data += width * height;
                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
            }
            this->preprocess(inputImageList[imgNum],preprocessed);
            split(preprocessed, input_channels);
        }
        this->net_->ForwardPrefilled();
        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
        float*outputMatData=(float*)(outputMat.data);
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
 #endif
    }
 #ifdef HAVE_CAFFE
    Ptr<caffe::Net<float> > net_;
 #endif
    //Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
    //Size outputGeometry_;
 public:
    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
        channelCount_=dn.channelCount_;
        inputGeometry_=dn.inputGeometry_;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
 #ifdef HAVE_CAFFE
        this->net_=dn.net_;
 #endif
    }
    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
    {
 #ifdef HAVE_CAFFE
        this->net_=dn.net_;
 #endif
        this->setPreprocessor(dn.preprocessor_);
        this->inputGeometry_=dn.inputGeometry_;
        this->channelCount_=dn.channelCount_;
        this->minibatchSz_=dn.minibatchSz_;
        this->outputSize_=dn.outputSize_;
        this->preprocessor_=dn.preprocessor_;
        this->outputGeometry_=dn.outputGeometry_;
        return *this;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
    }
    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
        :minibatchSz_(maxMinibatchSz)
    {
        CV_Assert(this->minibatchSz_>0);
        CV_Assert(fileExists(modelArchFilename));
        CV_Assert(fileExists(modelWeightsFilename));
        CV_Assert(!preprocessor.empty());
        this->setPreprocessor(preprocessor);
 #ifdef HAVE_CAFFE
        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
        CV_Assert(net_->num_inputs()==1);
        CV_Assert(net_->num_outputs()==1);
        CV_Assert(this->net_->input_blobs()[0]->channels()==1
                ||this->net_->input_blobs()[0]->channels()==3);
        this->channelCount_=this->net_->input_blobs()[0]->channels();
        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
        this->channelCount_ = inputLayer->channels();
        inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
        net_->Reshape();
        this->outputSize_=net_->output_blobs()[0]->channels();
        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
 #else
        CV_Error(Error::StsError,"Caffe not available during compilation!");
 #endif
    }
    void classify(InputArray image, OutputArray classProbabilities)
    {
        std::vector<Mat> inputImageList;
        inputImageList.push_back(image.getMat());
        classifyBatch(inputImageList,classProbabilities);
    }
    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
    {
        std::vector<Mat> allImageVector;
        inputImageList.getMatVector(allImageVector);
        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
        Mat outputMat = classProbabilities.getMat();
        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
        {
            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
            std::vector<Mat> minibatchInput(from,to);
            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
        }
    }
    int getOutputSize()
    {
        return this->outputSize_;
    }
    Size getOutputGeometry()
    {
        return this->outputGeometry_;
    }
    int getMinibatchSize()
    {
        return this->minibatchSz_;
    }
    int getBackend()
    {
        return OCR_HOLISTIC_BACKEND_CAFFE;
    }
 };
 class DeepCNNOpenCvDNNImpl: public DeepCNN{
 protected:
    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
    {
        //Classifies a list of images containing at most minibatchSz_ images
        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
        CV_Assert(outputMat.isContinuous());
 #ifdef HAVE_DNN
        std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
        Mat preprocessed;
        // preprocesses each image in the inputImageList and push to preprocessedImList
        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
        {
            this->preprocess(inputImageList[imgNum],preprocessed);
            preProcessedImList.push_back(preprocessed);
        }
        // set input data blob in dnn::net
        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
        float*outputMatData=(float*)(outputMat.data);
       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
       Mat outputNet = this->net_->forward();
       outputNet = outputNet.reshape(1, 1);
       float*outputNetData=(float*)(outputNet.data);
       memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
 #endif
    }
 #ifdef HAVE_DNN
    Ptr<Net> net_;
 #endif
    // hard coding input image size. anything in DNN library to get that from prototxt??
   // Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
    //Size outputGeometry_;//= Size(1,1);
    //int channelCount_;
   // int inputChannel_ ;//=1;
  //  int _inputHeight;
    //int _inputWidth ;
    //int _inputChannel ;
 public:
    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
        channelCount_=dn.channelCount_;
        inputGeometry_=dn.inputGeometry_;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
 #ifdef HAVE_DNN
        this->net_=dn.net_;
 #endif
    }
    DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn)
    {
 #ifdef HAVE_DNN
        this->net_=dn.net_;
 #endif
        this->setPreprocessor(dn.preprocessor_);
        this->inputGeometry_=dn.inputGeometry_;
        this->channelCount_=dn.channelCount_;
        this->minibatchSz_=dn.minibatchSz_;
        this->outputSize_=dn.outputSize_;
        this->preprocessor_=dn.preprocessor_;
        this->outputGeometry_=dn.outputGeometry_;
        return *this;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
    }
    DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel )
        :minibatchSz_(maxMinibatchSz)
    {
        CV_Assert(this->minibatchSz_>0);
        CV_Assert(fileExists(modelArchFilename));
        CV_Assert(fileExists(modelWeightsFilename));
        CV_Assert(!preprocessor.empty());
        this->setPreprocessor(preprocessor);
 #ifdef HAVE_DNN
        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
        if (this->net_.empty())
        {
            std::cerr << "Can't load network by using the following files: " << std::endl;
            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
        this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height());
        this->channelCount_ = inputChannel;//inputLayer->channels();
        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
        //std::vector<Mat> blobs = outLayer->blobs;
        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
 #else
        CV_Error(Error::StsError,"DNN module not available during compilation!");
 #endif
    }
    void classify(InputArray image, OutputArray classProbabilities)
    {
        std::vector<Mat> inputImageList;
        inputImageList.push_back(image.getMat());
        classifyBatch(inputImageList,classProbabilities);
    }
    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
    {
        std::vector<Mat> allImageVector;
        inputImageList.getMatVector(allImageVector);
        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
        Mat outputMat = classProbabilities.getMat();
        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
        {
            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
            std::vector<Mat> minibatchInput(from,to);
            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
        }
    }
    int getOutputSize()
    {
        return this->outputSize_;
    }
    Size getOutputGeometry()
    {
        return this->outputGeometry_;
    }
    int getMinibatchSize()
    {
        return this->minibatchSz_;
    }
    int getBackend()
    {
        return OCR_HOLISTIC_BACKEND_DNN;
    }
 };
 Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
 {
    if(preprocessor.empty())
    {
        preprocessor=ImagePreprocessor::createResizer();
    }
    switch(backEnd){
    case OCR_HOLISTIC_BACKEND_DEFAULT:
 #ifdef HAVE_CAFFE
        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
 #elif defined(HAVE_DNN)
        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
 #else
        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
        return Ptr<DeepCNN>();
 #endif
        break;
    case OCR_HOLISTIC_BACKEND_CAFFE:
        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
        break;
  case OCR_HOLISTIC_BACKEND_DNN:
        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1));
        break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default:
        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
        return Ptr<DeepCNN>();
        break;
    }
 }
 Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
 {
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
    switch(backEnd){
    case OCR_HOLISTIC_BACKEND_DEFAULT:
 #ifdef HAVE_CAFFE
        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
 #elif defined(HAVE_DNN)
        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
 #else
        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
        return Ptr<DeepCNN>();
 #endif
        break;
    case OCR_HOLISTIC_BACKEND_CAFFE:
        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
        break;
   case OCR_HOLISTIC_BACKEND_DNN:
        return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1));
        break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default:
        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
        return Ptr<DeepCNN>();
        break;
    }
 }
 namespace cnn_config{
 std::vector<std::string> getAvailableBackends()
 {
    std::vector<std::string> backends;
 #ifdef HAVE_CAFFE
    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
 #endif
 #ifdef HAVE_DNN
    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
 #endif
    return backends;
 }
 namespace caffe_backend{
 #ifdef HAVE_CAFFE
 bool getCaffeGpuMode()
 {
    return caffe::Caffe::mode()==caffe::Caffe::GPU;
 }
 void setCaffeGpuMode(bool useGpu)
 {
    if(useGpu)
    {
        caffe::Caffe::set_mode(caffe::Caffe::GPU);
    }else
    {
        caffe::Caffe::set_mode(caffe::Caffe::CPU);
    }
 }
 bool getCaffeAvailable()
 {
    return true;
 }
 #else
 bool getCaffeGpuMode()
 {
    CV_Error(Error::StsError,"Caffe not available during compilation!");
    return 0;
 }
 void setCaffeGpuMode(bool useGpu)
 {
    CV_Error(Error::StsError,"Caffe not available during compilation!");
    CV_Assert(useGpu==1);//Compilation directives force
 }
 bool getCaffeAvailable(){
    return 0;
 }
 #endif
 }//namespace caffe
 namespace dnn_backend{
 #ifdef  HAVE_DNN
 bool getDNNAvailable(){
    return true;
 }
 #else
 bool getDNNAvailable(){
    return 0;
 }
 #endif
 }//namspace dnn_backend
 }//namespace cnn_config
 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
 private:
    struct NetOutput{
        //Auxiliary structure that handles the logic of getting class ids and probabillities from
        //the raw outputs of caffe
        int wordIdx;
        float probabillity;
        static bool sorter(const NetOutput& o1,const NetOutput& o2)
        {//used with std::sort to provide the most probable class
            return o1.probabillity>o2.probabillity;
        }
        static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res)
        {
            res.resize(nbOutputs);
            for(int k=0;k<nbOutputs;k++)
            {
                res[k].wordIdx=k;
                res[k].probabillity=buffer[k];
            }
            std::sort(res.begin(),res.end(),NetOutput::sorter);
        }
        static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence)
        {
            std::vector<NetOutput> tmp;
            getOutputs(buffer,nbOutputs,tmp);
            classNum=tmp[0].wordIdx;
            confidence=tmp[0].probabillity;
        }
    };
 protected:
    std::vector<String> labels_;
    Ptr<TextImageClassifier> classifier_;
 public:
    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr)
    {
        CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
        std::ifstream labelsFile(vocabularyFilename.c_str());
        if(!labelsFile)
        {
            CV_Error(Error::StsError,"Could not read Labels from file");
        }
        std::string line;
        while (std::getline(labelsFile, line))
        {
            labels_.push_back(std::string(line));
        }
        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
    }
    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr)
    {
        this->labels_=vocabulary;
        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
    }
    void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)
    {
        Mat netOutput;
        this->classifier_->classify(inputImage,netOutput);
        int classNum;
        NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence);
        transcription=this->labels_[classNum];
    }
    void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec)
    {
        Mat netOutput;
        this->classifier_->classifyBatch(inputImageList,netOutput);
        for(int k=0;k<netOutput.rows;k++)
        {
            int classNum;
            double confidence;
            NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
            transcriptionVec.push_back(this->labels_[classNum]);
            confidenceVec.push_back(confidence);
        }
    }
    void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
             int component_level=0)
    {
        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
        double confidence;
        String transcription;
        recogniseImage(image,transcription,confidence);
        output_text=transcription.c_str();
        if(component_rects!=NULL)
        {
            component_rects->resize(1);
            (*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
        }
        if(component_texts!=NULL)
        {
            component_texts->resize(1);
            (*component_texts)[0]=transcription.c_str();
        }
        if(component_confidences!=NULL)
        {
            component_confidences->resize(1);
            (*component_confidences)[0]=float(confidence);
        }
    }
    void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
             int component_level=0)
    {
        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
        this->run(image,output_text,component_rects,component_texts,component_confidences,component_level);
    }
    std::vector<String>& getVocabulary()
    {
        return this->labels_;
    }
    Ptr<TextImageClassifier> getClassifier()
    {
        return this->classifier_;
    }
 };
 Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename )
 {
    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
 }
 Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename)
 {
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
 }
 Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary)
 {
    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
 }
 Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
 }
 }  } //namespace text namespace cv
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@ -45,6 +45,8 @@
 #include "opencv2/text.hpp"
 #include "text_config.hpp"
 #ifdef HAVE_TESSERACT
 #if !defined(USE_STD_NAMESPACE)
 #define USE_STD_NAMESPACE
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@ -1,169 +0,0 @@
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <queue>
 #include <algorithm>
 #include <iosfwd>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 //#ifdef HAVE_CAFFE
 //#include "caffe/caffe.hpp"
 //#endif
 namespace cv { namespace text {
 class textDetectImpl: public textDetector{
 private:
    struct NetOutput{
        //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
        //the raw outputs of caffe
        Rect bbox;
        float probability;
        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
        {
            res.resize(nbrTextBoxes);
            for(int k=0;k<nbrTextBoxes;k++)
            {
                float x_min = buffer[k*nCol+3]*inputShape.width;
                float y_min = buffer[k*nCol+4]*inputShape.height;
                float x_max = buffer[k*nCol+5]*inputShape.width;
                float y_max = buffer[k*nCol +6]*inputShape.height;
                x_min = x_min<0?0:x_min;
                y_min = y_min<0?0:y_min;
                x_max = x_max> inputShape.width?inputShape.width-1:x_max;
                y_max = y_max > inputShape.height?inputShape.height-1:y_max;
                float wd = x_max-x_min+1;
                float ht = y_max-y_min+1;
                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
                res[k].probability=buffer[k*nCol+2];
            }
        }
    };
 protected:
    Ptr<TextRegionDetector> classifier_;
 public:
    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
    {
    }
    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
    {
                Mat netOutput;
                // call the detect function of deepTextCNN class
                this->classifier_->detect(inputImage,netOutput);
               // get the output geometry i.e height and width of output blob from caffe
                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
                int nbrTextBoxes = OutputGeometry_.height;
                int nCol = OutputGeometry_.width;
                std::vector<NetOutput> tmp;
                // the output bounding box needs to be resized by the input height and width
                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
                // put the output in CV_OUT
                for (int k=0;k<nbrTextBoxes;k++)
                {
                    Bbox.push_back(tmp[k].bbox);
                    confidence.push_back(tmp[k].probability);
                }
     }
    void run(Mat& image, std::vector<Rect>* component_rects=NULL,
             std::vector<float>* component_confidences=NULL,
             int component_level=0)
    {
        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
        std::vector<Rect> bbox;
        std::vector<float> score;
        textDetectInImage(image,bbox,score);
        if(component_rects!=NULL)
        {
            component_rects->resize(bbox.size());  // should be a user behavior
            component_rects = &bbox;
        }
        if(component_confidences!=NULL)
        {
            component_confidences->resize(score.size()); // shoub be a user behavior
            component_confidences = &score;
        }
    }
    void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
             std::vector<float>* component_confidences=NULL,
             int component_level=0)
    {
        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
        this->run(image,component_rects,component_confidences,component_level);
    }
    Ptr<TextRegionDetector> getClassifier()
    {
        return this->classifier_;
    }
 };
 Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
 {
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
 Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
 {
 // create a custom preprocessor with rawval
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
 // set the mean for the preprocessor
    Mat textbox_mean(1,3,CV_8U);
    textbox_mean.at<uchar>(0,0)=104;
    textbox_mean.at<uchar>(0,1)=117;
    textbox_mean.at<uchar>(0,2)=123;
    preprocessor->set_mean(textbox_mean);
 // create a pointer to text box detector(textDetector)
    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
 }  } //namespace text namespace cv
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@ -1,453 +1,101 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <queue>
 #include <algorithm>
 #include <iosfwd>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 #ifdef HAVE_CAFFE
 #include "caffe/caffe.hpp"
 #endif
 #ifdef HAVE_DNN
 #include "opencv2/dnn.hpp"
 #endif
 using namespace cv::dnn;
-#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__)
+namespace cv
-
+{
-namespace cv { namespace text {
+namespace text
-
+{
 inline bool fileExists (String filename) {
    std::ifstream f(filename.c_str());
    return f.good();
 }
 class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
 protected:
    void process_(Mat inputImage, Mat &outputMat)
    {
        // do forward pass and stores the output in outputMat
        CV_Assert(outputMat.isContinuous());
        if (inputImage.channels() != this->inputChannelCount_)
            CV_WARN("Number of input channel(s) in the model is not same as input");
 #ifdef HAVE_CAFFE
        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
        net_->Reshape();
        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
        float* inputData=inputBuffer;
        std::vector<Mat> input_channels;
        Mat preprocessed;
        // if the image have multiple color channels the input layer should be populated accordingly
        for (int channel=0;channel < this->inputChannelCount_;channel++){
            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
            input_channels.push_back(netInputWraped);
            //input_data += width * height;
            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
        }
        this->preprocess(inputImage,preprocessed);
        split(preprocessed, input_channels);
        //preprocessed.copyTo(netInputWraped);
        this->net_->Forward();
        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
        this->outputGeometry_.height = net_->output_blobs()[0]->height();
        this->outputGeometry_.width = net_->output_blobs()[0]->width();
        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
        float*outputMatData=(float*)(outputMat.data);
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
 #endif
    }
 #ifdef HAVE_CAFFE
    Ptr<caffe::Net<float> > net_;
 #endif
    //Size inputGeometry_;
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    //int outputSize_;
 public:
    DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn):
        minibatchSz_(dn.minibatchSz_){
        outputGeometry_=dn.outputGeometry_;
        inputGeometry_=dn.inputGeometry_;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
 #ifdef HAVE_CAFFE
        this->net_=dn.net_;
 #endif
    }
    DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn)
    {
 #ifdef HAVE_CAFFE
        this->net_=dn.net_;
 #endif
        this->setPreprocessor(dn.preprocessor_);
        this->inputGeometry_=dn.inputGeometry_;
        this->inputChannelCount_=dn.inputChannelCount_;
        this->outputChannelCount_ = dn.outputChannelCount_;
        // this->minibatchSz_=dn.minibatchSz_;
        //this->outputGeometry_=dn.outputSize_;
        this->preprocessor_=dn.preprocessor_;
        this->outputGeometry_=dn.outputGeometry_;
        return *this;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
    }
    DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
        :minibatchSz_(maxMinibatchSz)
    {
        CV_Assert(this->minibatchSz_>0);
        CV_Assert(fileExists(modelArchFilename));
        CV_Assert(fileExists(modelWeightsFilename));
        CV_Assert(!preprocessor.empty());
        this->setPreprocessor(preprocessor);
 #ifdef HAVE_CAFFE
        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
        CV_Assert(net_->num_inputs()==1);
        CV_Assert(net_->num_outputs()==1);
        CV_Assert(this->net_->input_blobs()[0]->channels()==1
                ||this->net_->input_blobs()[0]->channels()==3);
        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
        this->inputGeometry_.height = inputLayer->height();
        this->inputGeometry_.width = inputLayer->width();
        this->inputChannelCount_ = inputLayer->channels();
        //this->inputGeometry_.batchSize =1;
        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
        net_->Reshape();
        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
        //this->outputGeometry_.batchSize =1;
        this->outputGeometry_.height =net_->output_blobs()[0]->height();
        this->outputGeometry_.width = net_->output_blobs()[0]->width();
 #else
        CV_Error(Error::StsError,"Caffe not available during compilation!");
 #endif
    }
    void detect(InputArray image, OutputArray Bbox_prob)
    {
        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
        Mat outputMat = Bbox_prob.getMat();
        process_(image.getMat(),outputMat);
        //copy back to outputArray
        outputMat.copyTo(Bbox_prob);
    }
    Size getOutputGeometry()
    {
        return this->outputGeometry_;
    }
    Size getinputGeometry()
    {
        return this->inputGeometry_;
    }
    int getMinibatchSize()
    {
        return this->minibatchSz_;
    }
    int getBackend()
    {
        return OCR_HOLISTIC_BACKEND_CAFFE;
    }
    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
    {
        CV_Assert(!ptr.empty());
        preprocessor_=ptr;
    }
    Ptr<ImagePreprocessor> getPreprocessor()
    {
        return preprocessor_;
    }
 };
-class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{
+class TextDetectorCNNImpl : public TextDetectorCNN
 {
 protected:
    Net net_;
    std::vector<Size> sizes_;
    int inputChannelCount_;
    bool detectMultiscale_;
-    void process_(Mat inputImage, Mat &outputMat)
+    void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,
                               std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape)
    {
-        // do forward pass and stores the output in outputMat
+        for(int k = 0; k < nbrTextBoxes; k++)
-        CV_Assert(outputMat.isContinuous());
+        {
-        if (inputImage.channels() != this->inputChannelCount_)
+            float x_min = buffer[k*nCol + 3]*inputShape.width;
-            CV_WARN("Number of input channel(s) in the model is not same as input");
+            float y_min = buffer[k*nCol + 4]*inputShape.height;
 #ifdef HAVE_DNN
        Mat preprocessed;
        this->preprocess(inputImage,preprocessed);
        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
       Mat outputNet = this->net_->forward( );
       this->outputGeometry_.height = outputNet.size[2];
       this->outputGeometry_.width = outputNet.size[3];
       this->outputChannelCount_ = outputNet.size[1];
-       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+            float x_max = buffer[k*nCol + 5]*inputShape.width;
-        float*outputMatData=(float*)(outputMat.data);
+            float y_max = buffer[k*nCol + 6]*inputShape.height;
       float*outputNetData=(float*)(outputNet.data);
       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+            CV_Assert(x_min < x_max, y_min < y_max);
            x_min = std::max(0.f, x_min);
            y_min = std::max(0.f, y_min);
            x_max = std::min(inputShape.width - 1.f,  x_max);
            y_max = std::min(inputShape.height - 1.f,  y_max);
            int wd = cvRound(x_max - x_min);
            int ht = cvRound(y_max - y_min);
-#endif
+            Bbox.push_back(Rect(cvRound(x_min), cvRound(y_min), wd, ht));
            confidence.push_back(buffer[k*nCol + 2]);
        }
    }
 #ifdef HAVE_DNN
    Ptr<Net> net_;
 #endif
    //Size inputGeometry_;
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    //int outputSize_;
    //int inputHeight_;
    //int inputWidth_;
    //int inputChannel_;
 public:
-    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
+    TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) :
-        minibatchSz_(dn.minibatchSz_){
+        detectMultiscale_(detectMultiscale)
        outputGeometry_=dn.outputGeometry_;
        inputGeometry_=dn.inputGeometry_;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
 #ifdef HAVE_DNN
        this->net_=dn.net_;
 #endif
    }
    DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn)
    {
 #ifdef HAVE_DNN
        this->net_=dn.net_;
 #endif
        this->setPreprocessor(dn.preprocessor_);
        this->inputGeometry_=dn.inputGeometry_;
        this->inputChannelCount_=dn.inputChannelCount_;
        this->outputChannelCount_ = dn.outputChannelCount_;
        // this->minibatchSz_=dn.minibatchSz_;
        //this->outputGeometry_=dn.outputSize_;
        this->preprocessor_=dn.preprocessor_;
        this->outputGeometry_=dn.outputGeometry_;
        return *this;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
    }
    DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3)
        :minibatchSz_(maxMinibatchSz)
    {
        net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename);
        CV_Assert(!net_.empty());
        inputChannelCount_ = 3;
        sizes_.push_back(Size(700, 700));
-        CV_Assert(this->minibatchSz_>0);
+        if(detectMultiscale_)
        CV_Assert(fileExists(modelArchFilename));
        CV_Assert(fileExists(modelWeightsFilename));
        CV_Assert(!preprocessor.empty());
        this->setPreprocessor(preprocessor);
 #ifdef HAVE_DNN
        this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename));
        if (this->net_.empty())
        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
+            sizes_.push_back(Size(300, 300));
-            std::cerr << "prototxt:   " << modelArchFilename << std::endl;
+            sizes_.push_back(Size(700,500));
-            std::cerr << "caffemodel: " << modelWeightsFilename << std::endl;
+            sizes_.push_back(Size(700,300));
-            //std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
+            sizes_.push_back(Size(1600,1600));
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
        this->inputGeometry_.height =inputHeight;
        this->inputGeometry_.width = inputWidth ;//inputLayer->width();
        this->inputChannelCount_ = inputChannel ;//inputLayer->channels();
 #else
        CV_Error(Error::StsError,"DNN module not available during compilation!");
 #endif
    }
-
+    void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
    void detect(InputArray image, OutputArray Bbox_prob)
    {
-        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
+        CV_Assert(inputImage_.channels() == inputChannelCount_);
-        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
+        Mat inputImage = inputImage_.getMat().clone();
-        Mat outputMat = Bbox_prob.getMat();
+        Bbox.resize(0);
        confidence.resize(0);
-        process_(image.getMat(),outputMat);
+        for(size_t i = 0; i < sizes_.size(); i++)
-        //copy back to outputArray
+        {
-        outputMat.copyTo(Bbox_prob);
+            Size inputGeometry = sizes_[i];
-    }
+            net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data");
-
+            Mat outputNet = net_.forward();
-    Size getOutputGeometry()
+            int nbrTextBoxes = outputNet.size[2];
-    {
+            int nCol = outputNet.size[3];
-        return this->outputGeometry_;
+            int outputChannelCount = outputNet.size[1];
-    }
+            CV_Assert(outputChannelCount == 1);
-    Size getinputGeometry()
+            getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size());
-    {
+        }
-        return this->inputGeometry_;
+     }
    }
    int getMinibatchSize()
    {
        return this->minibatchSz_;
    }
    int getBackend()
    {
        return OCR_HOLISTIC_BACKEND_DNN;
    }
    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
    {
        CV_Assert(!ptr.empty());
        preprocessor_=ptr;
    }
    Ptr<ImagePreprocessor> getPreprocessor()
    {
        return preprocessor_;
    }
 };
-Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale)
 {
-    if(preprocessor.empty())
+    return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale);
    {
        // create a custom preprocessor with rawval
        preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
        // set the mean for the preprocessor
        Mat textbox_mean(1,3,CV_8U);
        textbox_mean.at<uchar>(0,0)=104;
        textbox_mean.at<uchar>(0,1)=117;
        textbox_mean.at<uchar>(0,2)=123;
        preprocessor->set_mean(textbox_mean);
    }
    switch(backEnd){
    case OCR_HOLISTIC_BACKEND_DEFAULT:
 #ifdef HAVE_CAFFE
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
 #elif defined(HAVE_DNN)
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
 #else
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
 #endif
    case OCR_HOLISTIC_BACKEND_CAFFE:
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
        break;
    case OCR_HOLISTIC_BACKEND_DNN:
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3));
        break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default:
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
        break;
    }
    //return Ptr<DeepCNNTextDetector>();
 }
-
+} //namespace text
-
+} //namespace cv
 Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd)
 {
    // create a custom preprocessor with rawval
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
    // set the mean for the preprocessor
    Mat textbox_mean(1,3,CV_8U);
    textbox_mean.at<uchar>(0,0)=104;
    textbox_mean.at<uchar>(0,1)=117;
    textbox_mean.at<uchar>(0,2)=123;
    preprocessor->set_mean(textbox_mean);
    switch(backEnd){
    case OCR_HOLISTIC_BACKEND_DEFAULT:
 #ifdef HAVE_CAFFE
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
 #elif defined(HAVE_DNN)
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
 #else
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
 #endif
        break;
    case OCR_HOLISTIC_BACKEND_CAFFE:
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
        break;
    case OCR_HOLISTIC_BACKEND_DNN:
         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3));
         break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default:
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
        break;
    }
    //return Ptr<DeepCNNTextDetector>();
 }
 void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
 {
    Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width);
    this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
 }
 }  } //namespace text namespace cv
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
@ -1,4 +1,7 @@
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__
 // HAVE OCR Tesseract
 #cmakedefine HAVE_TESSERACT
 #endif