Text detector class and Custom Image processor Class

8 years ago · 9ae765a197
parent fa94c16065
commit 9ae765a197
12 changed files with 2632 additions and 202 deletions
--- a/modules/text/CMakeLists.txt
+++ b/modules/text/CMakeLists.txt
@ -1,24 +1,71 @@
 set(the_description "Text Detection and Recognition")
-ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python)
-
-if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
-  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-  find_package(Tesseract QUIET)
-  if(Tesseract_FOUND)
-    message(STATUS "Tesseract:   YES")
-    set(HAVE_TESSERACT 1)
-    ocv_include_directories(${Tesseract_INCLUDE_DIR})
-    ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES})
-  else()
-    message(STATUS "Tesseract:   NO")
-  endif()
+# Using cmake scripts and modules
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
+
+set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d)
+
+find_package(Caffe)
+if(Caffe_FOUND)
+  message(STATUS "Caffe:   YES")
+  set(HAVE_CAFFE 1)
+else()
+  message(STATUS "Caffe:   NO")
+#  list(APPEND TEXT_DEPS opencv_dnn)
+endif()
+
+#internal dependencies
+find_package(Protobuf)
+if(Protobuf_FOUND)
+  message(STATUS "Protobuf:   YES")
+  set(HAVE_PROTOBUF 1)
+else()
+  message(STATUS "Protobuf:   NO")
+endif()
+
+find_package(Glog)
+if(Glog_FOUND)
+  message(STATUS "Glog:   YES")
+  set(HAVE_GLOG 1)
+else()
+  message(STATUS "Glog:   NO")
+endif()
+
+ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d WRAP python)
+#ocv_define_module(text ${TEXT_DEPS} WRAP python)
+
+#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR})
+
+find_package(Tesseract)
+if(${Tesseract_FOUND})
+  message(STATUS "Tesseract:   YES")
+  include_directories(${Tesseract_INCLUDE_DIR})
+  target_link_libraries(opencv_text ${Tesseract_LIBS})
+  add_definitions(-DHAVE_TESSERACT)
+else()
+  message(STATUS "Tesseract:   NO")
 endif()

-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in
-               ${CMAKE_BINARY_DIR}/text_config.hpp @ONLY)

-ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR})

-ocv_add_testdata(samples/ contrib/text
-    FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg"
-)
+
+if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF)
+  include_directories(${Caffe_INCLUDE_DIR})
+  find_package(HDF5 COMPONENTS HL REQUIRED)
+  include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
+  find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
+  include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+  include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ )
+  link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64)
+  list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
+  target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES})
+  add_definitions(-DHAVE_CAFFE)
+endif() #HAVE_CAFFE
+
+message(STATUS "TEXT CAFFE SEARCH")
+if()
+  message(STATUS "TEXT NO CAFFE CONFLICT")
+else()
+  message(STATUS "TEXT CAFFE CONFLICT")
+endif()
+
--- a/modules/text/FindCaffe.cmake
+++ b/modules/text/FindCaffe.cmake
@ -0,0 +1,14 @@
+# Caffe package for CNN Triplet training
+unset(Caffe_FOUND)
+
+find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp
+  HINTS
+  /usr/local/include)
+
+find_library(Caffe_LIBS NAMES caffe
+  HINTS
+  /usr/local/lib)
+
+if(Caffe_LIBS AND Caffe_INCLUDE_DIR)
+    set(Caffe_FOUND 1)
+endif()
--- a/modules/text/FindGlog.cmake
+++ b/modules/text/FindGlog.cmake
@ -0,0 +1,10 @@
+#Required for Caffe
+unset(Glog_FOUND)
+
+find_library(Glog_LIBS NAMES glog
+  HINTS
+  /usr/local/lib)
+
+if(Glog_LIBS)
+    set(Glog_FOUND 1)
+endif()
--- a/modules/text/FindProtobuf.cmake
+++ b/modules/text/FindProtobuf.cmake
@ -0,0 +1,10 @@
+#Protobuf package required for Caffe
+unset(Protobuf_FOUND)
+
+find_library(Protobuf_LIBS NAMES protobuf
+  HINTS
+  /usr/local/lib)
+
+if(Protobuf_LIBS)
+    set(Protobuf_FOUND 1)
+endif()
--- a/modules/text/FindTesseract.cmake
+++ b/modules/text/FindTesseract.cmake
@ -0,0 +1,24 @@
+# Tesseract OCR
+unset(Tesseract_FOUND)
+
+find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h
+  HINTS
+  /usr/include
+  /usr/local/include)
+
+find_library(Tesseract_LIBRARY NAMES tesseract
+  HINTS
+  /usr/lib
+  /usr/local/lib)
+
+find_library(Lept_LIBRARY NAMES lept
+  HINTS
+  /usr/lib
+  /usr/local/lib)
+
+set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY})
+if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR)
+    set(Tesseract_FOUND 1)
+endif()
+
+        
--- a/modules/text/README.md
+++ b/modules/text/README.md
@ -47,3 +47,75 @@ Notes
 2. Tesseract configure script may fail to detect leptonica, so you may have to edit the configure script - comment off some if's around this message and retain only "then" branch.

 3. You are encouraged to search the Net for some better pre-trained classifiers, as well as classifiers for other languages.
+
+
+Word spotting CNN
+=================
+
+Intro
+-----
+
+A word spotting CNN is a CNN that takes an image assumed to contain a single word and provides a probabillity over a given vocabulary.
+Although other backends will be supported, for the moment only the Caffe backend is supported.
+
+
+
+
+Instalation of Caffe backend
+----------------------------
+The caffe wrapping backend has the requirements caffe does.
+* Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises.
+The simplest solution is to build caffe without support for OpenCV.
+* Only the OS supported by Caffe are supported by the backend. 
+The scripts describing the module have been developed in ubuntu 16.04 and assume such a system.
+Other UNIX systems including OSX should be easy to adapt.
+
+Sample script for building Caffe
+
+```bash
+#!/bin/bash
+SRCROOT="${HOME}/caffe_inst/"
+mkdir -p "$SRCROOT"
+cd "$SRCROOT"
+git clone https://github.com/BVLC/caffe.git
+cd caffe
+git checkout 91b09280f5233cafc62954c98ce8bc4c204e7475
+git branch 91b09280f5233cafc62954c98ce8bc4c204e7475
+cat Makefile.config.example  > Makefile.config
+echo 'USE_OPENCV := 0' >> Makefile.config
+echo 'INCLUDE_DIRS += /usr/include/hdf5/serial/' >> Makefile.config
+echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config
+
+
+echo "--- /tmp/caffe/include/caffe/net.hpp	2017-05-28 04:55:47.929623902 +0200
+++ caffe/distribute/include/caffe/net.hpp	2017-05-28 04:51:33.437090768 +0200
+@@ -234,6 +234,7 @@
+ 
+     template <typename T>
+     friend class Net;
+    virtual ~Callback(){}
+   };
+   const vector<Callback*>& before_forward() const { return before_forward_; }
+   void add_before_forward(Callback* value) {
+">/tmp/cleanup_caffe.diff
+
+patch < /tmp/cleanup_caffe.diff
+
+
+make -j 6
+
+make pycaffe
+
+make distribute
+```
+
+
+```bash
+#!/bin/bash
+cd $OPENCV_BUILD_DIR #You must set this
+CAFFEROOT="${HOME}/caffe_inst/" #If you used the previous code to compile Caffe in ubuntu 16.04
+
+cmake  -DCaffe_LIBS:FILEPATH="$CAFFEROOT/caffe/distribute/lib/libcaffe.so" -DBUILD_opencv_ts:BOOL="0" -DBUILD_opencv_dnn:BOOL="0" -DBUILD_opencv_dnn_modern:BOOL="0" -DCaffe_INCLUDE_DIR:PATH="$CAFFEROOT/caffe/distribute/include" -DWITH_MATLAB:BOOL="0" -DBUILD_opencv_cudabgsegm:BOOL="0"  -DWITH_QT:BOOL="1" -DBUILD_opencv_cudaoptflow:BOOL="0" -DBUILD_opencv_cudastereo:BOOL="0" -DBUILD_opencv_cudafilters:BOOL="0" -DBUILD_opencv_cudev:BOOL="1" -DOPENCV_EXTRA_MODULES_PATH:PATH="/home/anguelos/work/projects/opencv_gsoc/opencv_contrib/modules"   ./
+
+
+```
--- a/modules/text/include/opencv2/text.hpp
+++ b/modules/text/include/opencv2/text.hpp
@ -41,6 +41,7 @@ the use of this software, even if advised of the possibility of such damage.

 #include "opencv2/text/erfilter.hpp"
 #include "opencv2/text/ocr.hpp"
+#include "opencv2/text/textDetector.hpp"

 /** @defgroup text Scene Text Detection and Recognition

@ -92,7 +93,7 @@ grouping horizontally aligned text, and the method proposed by Lluis Gomez and D
 in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping).

 To see the text detector at work, have a look at the textdetection demo:
-<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
+<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>

    @defgroup text_recognize Scene Text Recognition
  @}
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -46,6 +46,10 @@

 #include <vector>
 #include <string>
+#include <iostream>
+#include <sstream>
+
+

 namespace cv
 {
@ -61,82 +65,126 @@ enum
    OCR_LEVEL_TEXTLINE
 };

-//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
+//base class BaseOCR declares a common API that would be used in a typical text
+//recognition scenario
 class CV_EXPORTS_W BaseOCR
 {
-public:
+ public:
    virtual ~BaseOCR() {};
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+
+    virtual void run(Mat& image, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+
+    virtual void run(Mat& image, Mat& mask, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
+
+    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
+     * default parameters for all parameters other than the input image.
+     */
+    virtual String run(InputArray image){
+        std::string res;
+        std::vector<Rect> component_rects;
+        std::vector<float> component_confidences;
+        std::vector<std::string> component_texts;
+        Mat inputImage=image.getMat();
+        this->run(inputImage,res,&component_rects,&component_texts,
+                  &component_confidences,OCR_LEVEL_WORD);
+        return res;
+    }
+
 };

-/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
+/** @brief OCRTesseract class provides an interface with the tesseract-ocr API
+ * (v3.02.02) in C++.

 Notice that it is compiled only when tesseract-ocr is correctly installed.

@note
-   -   (C++) An example of OCRTesseract recognition combined with scene text detection can be found
-        at the end_to_end_recognition demo:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
-    -   (C++) Another example of OCRTesseract recognition combined with scene text detection can be
-        found at the webcam_demo:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+   -   (C++) An example of OCRTesseract recognition combined with scene text
+        detection can be found at the end_to_end_recognition demo:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
+    -   (C++) Another example of OCRTesseract recognition combined with scene
+        text detection can be found at the webcam_demo:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
 class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
    /** @brief Recognize text using the tesseract-ocr API.

-    Takes image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
+    Takes image on input and returns recognized text in the output_text
+    parameter. Optionally provides also the Rects for individual text elements
+    found (e.g. words), and the list of those text elements with their
+    confidence values.

    @param image Input image CV_8UC1 or CV_8UC3
+
    @param output_text Output text of the tesseract-ocr.
-    @param component_rects If provided the method will output a list of Rects for the individual
-    text elements found (e.g. words or text lines).
-    @param component_texts If provided the method will output a list of text strings for the
-    recognition of individual text elements found (e.g. words or text lines).
-    @param component_confidences If provided the method will output a list of confidence values
-    for the recognition of individual text elements found (e.g. words or text lines).
+
+    @param component_rects If provided the method will output a list of Rects
+    for the individual text elements found (e.g. words or text lines).
+
+    @param component_texts If provided the method will output a list of text
+    strings for the recognition of individual text elements found (e.g. words or
+    text lines).
+
+    @param component_confidences If provided the method will output a list of
+    confidence values for the recognition of individual text elements found
+    (e.g. words or text lines).
+
    @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
     */
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+    virtual void run (Mat& image, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                     int component_level=0);

-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+    virtual void run (Mat& image, Mat& mask, std::string& output_text,
+                      std::vector<Rect>* component_rects=NULL,
+                      std::vector<std::string>* component_texts=NULL,
+                      std::vector<float>* component_confidences=NULL,
+                      int component_level=0);

    // aliases for scripting
-    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+    CV_WRAP String run (InputArray image, int min_confidence,
+                        int component_level=0);

-    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image, InputArray mask,
+                       int min_confidence, int component_level=0);

    CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;


-    /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
+    /** @brief Creates an instance of the OCRTesseract class. Initializes
+     * Tesseract.
+
+     * @param datapath the name of the parent directory of tessdata ended with
+     * "/", or NULL to use the system's default directory.
+
+     * @param language an ISO 639-3 code or NULL will default to "eng".
+
+     * @param char_whitelist specifies the list of characters used for
+     * recognition. NULL defaults to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".

-    @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
-    system's default directory.
-    @param language an ISO 639-3 code or NULL will default to "eng".
-    @param char_whitelist specifies the list of characters used for recognition. NULL defaults to
-    "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
-    @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault
-    tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
-    values.
-    @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO
-    (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
-    possible values.
+     * @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by
+     * default tesseract::OEM_DEFAULT is used. See the tesseract-ocr API
+     * documentation for other possible values.
+
+     * @param psmode tesseract-ocr offers different Page Segmentation Modes
+     * (PSM) tesseract::PSM_AUTO (fully automatic layout analysis) is used. See
+     * the tesseract-ocr API documentation for other possible values.
     */
-    CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
-                                    const char* char_whitelist=NULL, int oem=3, int psmode=3);
+    CV_WRAP static Ptr<OCRTesseract> create (const char* datapath=NULL,
+                                             const char* language=NULL,
+                                             const char* char_whitelist=NULL,
+                                             int oem=3, int psmode=3);
 };


@ -147,134 +195,156 @@ enum decoder_mode
    OCR_DECODER_VITERBI = 0 // Other algorithms may be added
 };

-/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
+/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov
+ * Models.

-@note
-   -   (C++) An example on using OCRHMMDecoder recognition combined with scene text detection can
-        be found at the webcam_demo sample:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
+ * @note
+ * -   (C++) An example on using OCRHMMDecoder recognition combined with scene
+ *     text detection can be found at the webcam_demo sample:
+ *      <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
-class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
-{
-public:
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
+ public:

    /** @brief Callback with the character classifier is made a class.

-    This way it hides the feature extractor and the classifier itself, so developers can write
-    their own OCR code.
+    * This way it hides the feature extractor and the classifier itself, so
+    * developers can write their own OCR code.

-    The default character classifier and feature extractor can be loaded using the utility funtion
-    loadOCRHMMClassifierNM and KNN model provided in
-    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
-     */
-    class CV_EXPORTS_W ClassifierCallback
-    {
-    public:
+    * The default character classifier and feature extractor can be loaded using
+    * the utility funtion loadOCRHMMClassifierNM and KNN model provided in
+    * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
+    */
+    class CV_EXPORTS_W ClassifierCallback{
+     public:
        virtual ~ClassifierCallback() { }
-        /** @brief The character classifier must return a (ranked list of) class(es) id('s)
+        /** @brief The character classifier must return a (ranked list of)
+         * class(es) id('s)

-        @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
-        @param out_class The classifier returns the character class categorical label, or list of
-        class labels, to which the input image corresponds.
-        @param out_confidence The classifier returns the probability of the input image
-        corresponding to each classes in out_class.
+         * @param image Input image CV_8UC1 or CV_8UC3 with a single letter.
+         * @param out_class The classifier returns the character class
+         * categorical label, or list of class labels, to which the input image
+         * corresponds.
+
+         * @param out_confidence The classifier returns the probability of the
+         * input image corresponding to each classes in out_class.
         */
-        virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence);
+        virtual void eval (InputArray image, std::vector<int>& out_class,
+                           std::vector<double>& out_confidence);
    };

-public:
    /** @brief Recognize text using HMM.

-    Takes binary image on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
+    * Takes binary image on input and returns recognized text in the output_text
+    * parameter. Optionally provides also the Rects for individual text elements
+    * found (e.g. words), and the list of those text elements with their
+    * confidence values.

-    @param image Input binary image CV_8UC1 with a single text line (or word).
+    * @param image Input binary image CV_8UC1 with a single text line (or word).

-    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+    * @param output_text Output text. Most likely character sequence found by
+    * the HMM decoder.

-    @param component_rects If provided the method will output a list of Rects for the individual
-    text elements found (e.g. words).
+    * @param component_rects If provided the method will output a list of Rects
+    * for the individual text elements found (e.g. words).

-    @param component_texts If provided the method will output a list of text strings for the
-    recognition of individual text elements found (e.g. words).
+    * @param component_texts If provided the method will output a list of text
+    * strings for the recognition of individual text elements found (e.g. words)
+    * .

-    @param component_confidences If provided the method will output a list of confidence values
-    for the recognition of individual text elements found (e.g. words).
+    * @param component_confidences If provided the method will output a list of
+    * confidence values for the recognition of individual text elements found
+    * (e.g. words).

-    @param component_level Only OCR_LEVEL_WORD is supported.
-     */
-    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
-                     int component_level=0);
+    * @param component_level Only OCR_LEVEL_WORD is supported.
+    */
+    virtual void run (Mat& image, std::string& output_text,
+                      std::vector<Rect>* component_rects=NULL,
+                      std::vector<std::string>* component_texts=NULL,
+                      std::vector<float>* component_confidences=NULL,
+                      int component_level=0);

    /** @brief Recognize text using HMM.

-    Takes an image and a mask (where each connected component corresponds to a segmented character)
-    on input and returns recognized text in the output_text parameter. Optionally
-    provides also the Rects for individual text elements found (e.g. words), and the list of those
-    text elements with their confidence values.
+    * Takes an image and a mask (where each connected component corresponds to a
+    * segmented character) on input and returns recognized text in the
+    * output_text parameter. Optionally provides also the Rects for individual
+    * text elements found (e.g. words), and the list of those text elements with
+    * their confidence values.

-    @param image Input image CV_8UC1 or CV_8UC3 with a single text line (or word).
-    @param mask Input binary image CV_8UC1 same size as input image. Each connected component in mask corresponds to a segmented character in the input image.
+    * @param image Input image CV_8UC1 or CV_8UC3 with a single text line
+    * (or word).

-    @param output_text Output text. Most likely character sequence found by the HMM decoder.
+    * @param mask Input binary image CV_8UC1 same size as input image. Each
+    * connected component in mask corresponds to a segmented character in the
+    * input image.

-    @param component_rects If provided the method will output a list of Rects for the individual
-    text elements found (e.g. words).
+    * @param output_text Output text. Most likely character sequence found by
+    * the HMM decoder.

-    @param component_texts If provided the method will output a list of text strings for the
-    recognition of individual text elements found (e.g. words).
+    * @param component_rects If provided the method will output a list of Rects
+    * for the individual text elements found (e.g. words).

-    @param component_confidences If provided the method will output a list of confidence values
-    for the recognition of individual text elements found (e.g. words).
+    * @param component_texts If provided the method will output a list of text
+    * strings for the recognition of individual text elements found (e.g. words)
+    * .

-    @param component_level Only OCR_LEVEL_WORD is supported.
-     */
-    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
-                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+    * @param component_confidences If provided the method will output a list of
+    * confidence values for the recognition of individual text elements found
+    * (e.g. words).
+
+    * @param component_level Only OCR_LEVEL_WORD is supported.
+    */
+    virtual void run(Mat& image, Mat& mask, std::string& output_text,
+                     std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL,
+                     std::vector<float>* component_confidences=NULL,
                     int component_level=0);

    // aliases for scripting
-    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image,
+                       int min_confidence,
+                       int component_level=0);

-    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+    CV_WRAP String run(InputArray image,
+                       InputArray mask,
+                       int min_confidence,
+                       int component_level=0);

-    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
+    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes
+     * HMMDecoder.

-    @param classifier The character classifier with built in feature extractor.
+     * @param classifier The character classifier with built in feature
+     * extractor.

-    @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
-    must be equal to the number of classes of the classifier.
+     * @param vocabulary The language vocabulary (chars when ascii english text)
+     * . vocabulary.size() must be equal to the number of classes of the
+     * classifier.

-    @param transition_probabilities_table Table with transition probabilities between character
-    pairs. cols == rows == vocabulary.size().
+     * @param transition_probabilities_table Table with transition probabilities
+     * between character pairs. cols == rows == vocabulary.size().

-    @param emission_probabilities_table Table with observation emission probabilities. cols ==
-    rows == vocabulary.size().
+     * @param emission_probabilities_table Table with observation emission
+     * probabilities. cols == rows == vocabulary.size().

-    @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
-    (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
+     * @param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available
+     * for the moment (<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
     */
-    static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
-                                                                                       //     size() must be equal to the number of classes
-                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
-                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
-                                     decoder_mode mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
-
-    CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
-                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
-                                                                                       //     size() must be equal to the number of classes
-                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
-                                                                                       //     cols == rows == vocabulari.size()
-                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
-                                                                                       //     cols == rows == vocabulari.size()
-                                     int mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
-
-protected:
+    static Ptr<OCRHMMDecoder> create(
+            const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,    // The character classifier with built in feature extractor
+            const std::string& vocabulary,                              // The language vocabulary (chars when ascii english text) size() must be equal to the number of classes
+            InputArray transition_probabilities_table,                  // Table with transition probabilities between character pairs cols == rows == vocabulari.size()
+            InputArray emission_probabilities_table,                    // Table with observation emission probabilities cols == rows == vocabulari.size()
+            decoder_mode mode = OCR_DECODER_VITERBI);                   // HMM Decoding algorithm (only Viterbi for the moment)
+
+    CV_WRAP static Ptr<OCRHMMDecoder> create(
+            const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,    // The character classifier with built in feature extractor
+            const String& vocabulary,                                   // The language vocabulary (chars when ascii english text) size() must be equal to the number of classes
+            InputArray transition_probabilities_table,                  // Table with transition probabilities between character pairs cols == rows == vocabulari.size()
+            InputArray emission_probabilities_table,                    // Table with observation emission probabilities cols == rows == vocabulari.size()
+            int mode = OCR_DECODER_VITERBI);                            // HMM Decoding algorithm (only Viterbi for the moment)
+
+ protected:

    Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
    std::string vocabulary;
@ -283,76 +353,98 @@ protected:
    decoder_mode mode;
 };

-/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
+/** @brief Allow to implicitly load the default character classifier when
+ * creating an OCRHMMDecoder object.

-@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
+ * @param filename The XML or YAML file with the classifier model (e.g.
+ * OCRHMM_knn_model_data.xml)

-The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
-Jiri Matas in [Neumann11b]. Basically, the region (contour) in the input image is normalized to a
-fixed size, while retaining the centroid and aspect ratio, in order to extract a feature vector
-based on gradient orientations along the chain-code of its perimeter. Then, the region is classified
-using a KNN model trained with synthetic data of rendered characters with different standard font
-types.
+ * The KNN default classifier is based in the scene text recognition method
+ * proposed by Lukás Neumann & Jiri Matas in [Neumann11b]. Basically, the region
+ * (contour) in the input image is normalized to a fixed size, while retaining
+ * the centroid and aspect ratio, in order to extract a feature vector based on
+ * gradient orientations along the chain-code of its perimeter. Then, the region
+ * is classified using a KNN model trained with synthetic data of rendered
+ * characters with different standard font types.
 */
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM (
+        const String& filename);

-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
+/** @brief Allow to implicitly load the default character classifier when
+ * creating an OCRHMMDecoder object.

-/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
+ * @param filename The XML or YAML file with the classifier model (e.g.
+ * OCRBeamSearch_CNN_model_data.xml.gz)

-@param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz)
-
-The CNN default classifier is based in the scene text recognition method proposed by Adam Coates &
-Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and
-a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
-at each window location.
+ * The CNN default classifier is based in the scene text recognition method
+ * proposed by Adam Coates & Andrew NG in [Coates11a]. The character classifier
+ * consists in a Single Layer Convolutional Neural Network and a linear
+ * classifier. It is applied to the input image in a sliding window fashion,
+ * providing a set of recognitions at each window location.
 */
-CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN (
+        const String& filename);

 //! @}

-/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
- *
+/** @brief Utility function to create a tailored language model transitions
+ * table from a given list of words (lexicon).
+
 * @param vocabulary The language vocabulary (chars when ascii english text).
- *
+
 * @param lexicon The list of words that are expected to be found in a particular image.
- *
- * @param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
- *
- * The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+
+ * @param transition_probabilities_table Output table with transition
+ * probabilities between character pairs. cols == rows == vocabulary.size().
+
+ * The function calculate frequency statistics of character pairs from the given
+ * lexicon and fills the output transition_probabilities_table with them. The
+ * transition_probabilities_table can be used as input in the
+ * OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
 * @note
- *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
- *            <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ *    -   (C++) An alternative would be to load the default generic language
+ *        transition table provided in the text module samples folder (created
+ *        from ispell 42869 english words list) :
+ *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
 **/
-CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
-
-CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
+CV_EXPORTS void createOCRHMMTransitionsTable (
+        std::string& vocabulary, std::vector<std::string>& lexicon,
+        OutputArray transition_probabilities_table);

+CV_EXPORTS_W Mat createOCRHMMTransitionsTable (
+        const String& vocabulary, std::vector<cv::String>& lexicon);

 /* OCR BeamSearch Decoder */

-/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
+/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam
+ * Search algorithm.

@note
-   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
-        be found at the demo sample:
-        <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
+   -   (C++) An example on using OCRBeamSearchDecoder recognition combined with
+        scene text detection can be found at the demo sample:
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
 */
-class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
-{
-public:
+
+
+/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */
+class TextImageClassifier;
+
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR{
+
+ public:

    /** @brief Callback with the character classifier is made a class.

-    This way it hides the feature extractor and the classifier itself, so developers can write
-    their own OCR code.
+     * This way it hides the feature extractor and the classifier itself, so
+     * developers can write their own OCR code.

-    The default character classifier and feature extractor can be loaded using the utility funtion
-    loadOCRBeamSearchClassifierCNN with all its parameters provided in
-    <https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
+     * The default character classifier and feature extractor can be loaded
+     * using the utility funtion loadOCRBeamSearchClassifierCNN with all its
+     * parameters provided in
+     * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
     */
-    class CV_EXPORTS_W ClassifierCallback
-    {
-    public:
+    class CV_EXPORTS_W ClassifierCallback{
+     public:
        virtual ~ClassifierCallback() { }
        /** @brief The character classifier must return a (ranked list of) class(es) id('s)

@ -364,8 +456,8 @@ public:
         */
        virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );

-        int getWindowSize() {return 0;}
-        int getStepSize() {return 0;}
+        virtual int getWindowSize() {return 0;}
+        virtual int getStepSize() {return 0;}
    };

 public:
@ -421,6 +513,7 @@ public:

    @param beam_size Size of the beam in Beam Search algorithm.
     */
+
    static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
                                     const std::string& vocabulary,                    // The language vocabulary (chars when ascii english text)
                                                                                       //     size() must be equal to the number of classes
@ -441,6 +534,44 @@ public:
                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                     int beam_size = 500);                              // Size of the beam in Beam Search algorithm

+    /** @brief This method allows to plug a classifier that is derivative of TextImageClassifier in to
+     * OCRBeamSearchDecoder as a ClassifierCallback.
+
+    @param classifier A pointer to a TextImageClassifier decendent
+
+    @param alphabet The language alphabet one char per symbol. alphabet.size() must be equal to the number of classes
+    of the classifier. In future editinons it should be replaced with a vector of strings.
+
+    @param transition_probabilities_table Table with transition probabilities between character
+    pairs. cols == rows == alphabet.size().
+
+    @param emission_probabilities_table Table with observation emission probabilities. cols ==
+    rows == alphabet.size().
+
+    @param windowWidth The width of the windows to which the sliding window will be iterated. The height will
+    be the height of the image. The windows might be resized to fit the classifiers input by the classifiers
+    preprocessor.
+
+    @param windowStep The step for the sliding window
+
+    @param mode HMM Decoding algorithm (only Viterbi for the moment)
+
+    @param beam_size Size of the beam in Beam Search algorithm
+     */
+//    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<TextImageClassifier> classifier, // The character classifier with built in feature extractor
+//                                     String alphabet,                                          // The language alphabet one char per symbol
+//                                                                                               // size() must be equal to the number of classes
+//                                     InputArray transition_probabilities_table,                // Table with transition probabilities between character pairs
+//                                                                                               //     cols == rows == alphabet.size()
+//                                     InputArray emission_probabilities_table,                  // Table with observation emission probabilities
+//                                                                                               //     cols == rows == alphabet.size()
+//                                     int windowWidth,                                          // The width of the windows to which the sliding window will be iterated.
+//                                                                                               // The height will be the height of the image. The windows might be resized to
+//                                                                                               // fit the classifiers input by the classifiers preprocessor
+//                                     int windowStep = 1 ,                                      // The step for the sliding window
+//                                     int mode = OCR_DECODER_VITERBI,                           // HMM Decoding algorithm (only Viterbi for the moment)
+//                                     int beam_size = 500);                                     // Size of the beam in Beam Search algorithm
+
 protected:

    Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@ -465,6 +596,364 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas

 //! @}

-}
-}
+
+//Classifiers should provide diferent backends
+//For the moment only caffe is implemeted
+enum{
+    OCR_HOLISTIC_BACKEND_NONE,
+    OCR_HOLISTIC_BACKEND_CAFFE
+};
+
+class TextImageClassifier;
+
+/**
+ * @brief The ImagePreprocessor class
+ */
+class CV_EXPORTS_W ImagePreprocessor{
+protected:
+    virtual void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels)=0;
+    virtual void set_mean_(Mat){}
+
+public:
+    virtual ~ImagePreprocessor(){}
+
+    /** @brief this method in provides public acces to the preprocessing with respect to a specific
+     * classifier
+     *
+     * This method's main use would be to use the preprocessor without feeding it to a classifier.
+     * Determining the exact behavior of a preprocessor is the main motivation for this.
+     *
+     * @param input an image without any constraints
+     *
+     * @param output in most cases an image of fixed depth size and whitened
+     *
+     * @param sz the size to which the image would be resize if the preprocessor resizes inputs
+     *
+     * @param outputChannels the number of channels for the output image
+     */
+    CV_WRAP void preprocess(InputArray input,OutputArray output,Size sz,int outputChannels);
+
+    CV_WRAP void set_mean(Mat mean);
+
+    /** @brief Creates a functor that only resizes and changes the channels of the input
+     *  without further processing.
+     *
+     * @return shared pointer to the generated preprocessor
+     */
+    CV_WRAP static Ptr<ImagePreprocessor> createResizer();
+
+    /** @brief
+     *
+     * @param sigma
+     *
+     * @return shared pointer to generated preprocessor
+     */
+    CV_WRAP static Ptr<ImagePreprocessor> createImageStandarizer(double sigma);
+
+    /** @brief
+     *
+     * @return shared pointer to generated preprocessor
+     */
+    CV_WRAP static Ptr<ImagePreprocessor> createImageMeanSubtractor(InputArray meanImg);
+
+    CV_WRAP static Ptr<ImagePreprocessor>createImageCustomPreprocessor(double rawval=1.0,String channel_order="BGR");
+
+    friend class TextImageClassifier;
+
+};
+
+/** @brief Abstract class that implements the classifcation of text images.
+ *
+ * The interface is generic enough to describe any image classifier. And allows
+ * to take advantage of compouting in batches. While word classifiers are the default
+ * networks, any image classifers should work.
+ *
+ */
+class CV_EXPORTS_W TextImageClassifier
+{
+protected:
+    Size inputGeometry_;
+    Size outputGeometry_;
+    int channelCount_;
+    Ptr<ImagePreprocessor> preprocessor_;
+    /** @brief all image preprocessing is handled here including whitening etc.
+     *
+         *  @param input the image to be preprocessed for the classifier. If the depth
+     * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
+     *
+     * @param output reference to the image to be fed to the classifier, the preprocessor will
+     * resize the image to the apropriate size and convert it to the apropriate depth\
+     *
+     * The method preprocess should never be used externally, it is up to classify and classifyBatch
+     * methods to employ it.
+     */
+    virtual void preprocess(const Mat& input,Mat& output);
+public:
+    virtual ~TextImageClassifier() {}
+
+    /** @brief
+     */
+    CV_WRAP virtual void setPreprocessor(Ptr<ImagePreprocessor> ptr);
+
+    /** @brief
+     */
+    CV_WRAP Ptr<ImagePreprocessor> getPreprocessor();
+
+    /** @brief produces a class confidence row-vector given an image
+     */
+    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
+    /** @brief produces a list of bounding box given an image
+     */
+
+    CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
+
+    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
+     */
+    CV_WRAP virtual void classifyBatch(InputArrayOfArrays image, OutputArray classProbabilities) = 0;
+
+    /** @brief simple getter method returning the number of channels each input sample has
+     */
+    CV_WRAP virtual int getInputChannelCount(){return this->channelCount_;}
+
+    /** @brief simple getter method returning the size of the input sample
+     */
+    CV_WRAP virtual Size getInputSize(){return this->inputGeometry_;}
+
+    /** @brief simple getter method returning the size of the oputput row-vector
+     */
+    CV_WRAP virtual int getOutputSize()=0;
+    CV_WRAP virtual Size getOutputGeometry()=0;
+
+    /** @brief simple getter method returning the size of the minibatches for this classifier.
+     * If not applicabe this method should return 1
+     */
+    CV_WRAP virtual int getMinibatchSize()=0;
+
+    friend class ImagePreprocessor;
+};
+
+
+
+class CV_EXPORTS_W DeepCNN:public TextImageClassifier
+{
+    /** @brief Class that uses a pretrained caffe model for word classification.
+     *
+     * This network is described in detail in:
+     * Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
+     * http://arxiv.org/abs/1412.1842
+     */
+public:
+    virtual ~DeepCNN() {};
+
+    /** @brief Constructs a DeepCNN object from a caffe pretrained model
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm. This file can be
+     * very large, up to 2GB.
+     *
+     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
+     *
+     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
+     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNN> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+    /** @brief Constructs a DeepCNN intended to be used for word spotting.
+     *
+     * This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a
+     * deviation of 113. The architecture file can be downloaded from:
+     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+     * While the weights can be downloaded from:
+     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+     * The words assigned to the network outputs are available at:
+     * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model. When employing
+     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file. This file can be very large, the
+     * pretrained DictNet uses 2GB.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNN> createDictNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+};
+
+namespace cnn_config{
+namespace caffe_backend{
+
+/** @brief Prompts Caffe on the computation device beeing used
+ *
+ * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
+ * global behavior. This function queries the current state of caffe.
+ * If the module is built without caffe, this method throws an exception.
+ *
+ * @return true if caffe is computing on the GPU, false if caffe is computing on the CPU
+ */
+CV_EXPORTS_W bool getCaffeGpuMode();
+
+/** @brief Sets the computation device beeing used by Caffe
+ *
+ * Caffe can only be controlled globally on whether the GPU or the CPU is used has a
+ * global behavior. This function queries the current state of caffe.
+ * If the module is built without caffe, this method throws an exception.
+ *
+ * @param useGpu  set to true for caffe to be computing on the GPU, false if caffe is
+ * computing on the CPU
+ */
+CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
+
+/** @brief Provides runtime information on whether Caffe support was compiled in.
+ *
+ * The text module API is the same regardless of whether CAffe was available or not
+ * During compilation. When methods that require Caffe are invocked while Caffe support
+ * is not compiled in, exceptions are thrown. This method allows to test whether the
+ * text module was built with caffe during runtime.
+ *
+ * @return true if Caffe support for the the text module was provided during compilation,
+ * false if Caffe was unavailable.
+ */
+CV_EXPORTS_W bool getCaffeAvailable();
+
+}//caffe
+}//cnn_config
+
+/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
+ * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
+ * word given an input image.
+ *
+ * This class implements the logic of providing transcriptions given a vocabulary and and an image
+ * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
+ * class was built is the DictNet. In order to load it the following files should be downloaded:
+
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+ */
+class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
+{
+public:
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3
+
+    @param mask is totally ignored and is only available for compatibillity reasons
+
+    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
+
+    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_level must be OCR_LEVEL_WORD.
+     */
+
+    virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+
+    /**
+    @brief Method that provides a quick and simple interface to a single word image classifcation
+
+    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
+
+    @param transcription an opencv string that will store the detected word transcription
+
+    @param confidence a double that will be updated with the confidence the classifier has for the selected word
+    */
+    CV_WRAP virtual void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)=0;
+
+    /**
+    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
+    the classifiers parallel capabilities.
+
+    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
+    to contain a single word.
+
+    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
+    input image
+
+    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
+    selected words.
+    */
+    CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
+
+
+    /**
+    @brief simple getter for the vocabulary employed
+    */
+    CV_WRAP virtual const std::vector<String>& getVocabulary()=0;
+
+    /** @brief simple getter for the preprocessing functor
+     */
+    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+
+    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+     */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename);
+
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
+
+    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
+
+    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+    */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename);
+
+    /** @brief
+     *
+     * @param classifierPtr
+     *
+     * @param vocabulary
+     */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
+
+    /** @brief
+     *
+     * @param modelArchFilename
+     *
+     * @param modelWeightsFilename
+     *
+     * @param vocabulary
+     */
+    CV_WRAP static Ptr<OCRHolisticWordRecognizer> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
+};
+
+
+}//namespace text
+}//namespace cv
+
+
 #endif // _OPENCV_TEXT_OCR_HPP_
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@ -0,0 +1,235 @@
+/*M//////////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                          License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_TEXT_TEXTDETECTOR_HPP__
+#define __OPENCV_TEXT_TEXTDETECTOR_HPP__
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include"ocr.hpp"
+
+
+namespace cv
+{
+namespace text
+{
+
+//! @addtogroup text_recognize
+//! @{
+
+
+
+//base class BaseDetector declares a common API that would be used in a typical text
+//recognition scenario
+class CV_EXPORTS_W BaseDetector
+{
+ public:
+    virtual ~BaseDetector() {};
+
+    virtual void run(Mat& image,
+                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
+
+    virtual void run(Mat& image, Mat& mask,
+                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
+
+    /** @brief Main functionality of the OCR Hierarchy. Subclasses provide
+     * default parameters for all parameters other than the input image.
+     */
+//    virtual std::vector<Rect>* run(InputArray image){
+//        //std::string res;
+//        std::vector<Rect> component_rects;
+//        std::vector<float> component_confidences;
+//        //std::vector<std::string> component_texts;
+//        Mat inputImage=image.getMat();
+//        this->run(inputImage,&component_rects,
+//                  &component_confidences,OCR_LEVEL_WORD);
+//        return *component_rects;
+//    }
+
+};
+
+
+//Classifiers should provide diferent backends
+//For the moment only caffe is implemeted
+//enum{
+//    OCR_HOLISTIC_BACKEND_NONE,
+//    OCR_HOLISTIC_BACKEND_CAFFE
+//};
+
+
+
+
+
+/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
+ * Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
+ * word given an input image.
+ *
+ * This class implements the logic of providing transcriptions given a vocabulary and and an image
+ * classifer. The classifier has to be any TextImageClassifier but the classifier for which this
+ * class was built is the DictNet. In order to load it the following files should be downloaded:
+
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
+ * <http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
+ */
+class CV_EXPORTS_W textDetector : public BaseDetector
+{
+public:
+    virtual void run(Mat& image,  std::vector<Rect>* component_rects=NULL,
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+    /** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
+
+    Takes image on input and returns recognized text in the output_text parameter. Optionally
+    provides also the Rects for individual text elements found (e.g. words), and the list of those
+    text elements with their confidence values.
+
+    @param image Input image CV_8UC1 or CV_8UC3
+
+    @param mask is totally ignored and is only available for compatibillity reasons
+
+    @param output_text Output text of the the word spoting, always one that exists in the dictionary.
+
+    @param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
+        be put in the vector.
+
+    @param component_level must be OCR_LEVEL_WORD.
+     */
+
+    virtual void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
+                     std::vector<float>* component_confidences=NULL,
+                     int component_level=OCR_LEVEL_WORD)=0;
+
+
+    /**
+    @brief Method that provides a quick and simple interface to a single word image classifcation
+
+    @param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size
+
+    @param transcription an opencv string that will store the detected word transcription
+
+    @param confidence a double that will be updated with the confidence the classifier has for the selected word
+    */
+    CV_WRAP virtual void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)=0;
+
+    /**
+    @brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
+    the classifiers parallel capabilities.
+
+    @param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
+    to contain a single word.
+
+    @param transcriptions a vector of opencv strings that will store the detected word transcriptions, one for each
+    input image
+
+    @param confidences a vector of double that will be updated with the confidence the classifier has for each of the
+    selected words.
+    */
+    //CV_WRAP virtual void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptions,CV_OUT std::vector<double>& confidences)=0;
+
+
+   /** @brief simple getter for the preprocessing functor
+     */
+    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class.
+
+    @param classifierPtr an instance of TextImageClassifier, normaly a DeepCNN instance
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+     */
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+
+
+    /** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
+
+    @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
+
+    @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
+
+    @param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary. Each text line
+    in the file is assumed to be a single word. The number of words in the vocabulary must be exactly the same as the outputSize
+    of the classifier.
+    */
+    CV_WRAP static Ptr<textDetector> create(String modelArchFilename, String modelWeightsFilename);
+
+    /** @brief
+     *
+     * @param classifierPtr
+     *
+     * @param vocabulary
+     */
+ //   CV_WRAP static Ptr<textDetectImage> create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary);
+
+    /** @brief
+     *
+     * @param modelArchFilename
+     *
+     * @param modelWeightsFilename
+     *
+     * @param vocabulary
+     */
+ //   CV_WRAP static Ptr<textDetectImage> create (String modelArchFilename, String modelWeightsFilename, const std::vector<String>& vocabulary);
+};
+
+
+}//namespace text
+}//namespace cv
+
+
+#endif // _OPENCV_TEXT_OCR_HPP_
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@ -0,0 +1,879 @@
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include  "opencv2/highgui.hpp"
+#include "opencv2/core.hpp"
+
+
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+
+#ifdef HAVE_CAFFE
+#include "caffe/caffe.hpp"
+#endif
+
+namespace cv { namespace text {
+
+//Maybe OpenCV has a routine better suited
+inline bool fileExists (String filename) {
+    std::ifstream f(filename.c_str());
+    return f.good();
+}
+
+//************************************************************************************
+//******************   ImagePreprocessor   *******************************************
+//************************************************************************************
+
+void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
+    Mat inpImg=input.getMat();
+    Mat outImg;
+    this->preprocess_(inpImg,outImg,sz,outputChannels);
+    outImg.copyTo(output);
+}
+void ImagePreprocessor::set_mean(Mat mean){
+
+
+    this->set_mean_(mean);
+
+}
+
+
+class ResizerPreprocessor: public ImagePreprocessor{
+protected:
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1){
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U){
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+    }
+    //void set_mean_(Mat m){}
+public:
+    ResizerPreprocessor(){}
+    ~ResizerPreprocessor(){}
+};
+
+class StandarizerPreprocessor: public ImagePreprocessor{
+protected:
+    double sigma_;
+    //void set_mean_(Mat M){}
+
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+
+        Scalar mean,dev;
+        meanStdDev(output,mean,dev);
+        subtract(output,mean[0],output);
+        divide(output,(dev[0]/sigma_),output);
+    }
+public:
+    StandarizerPreprocessor(double sigma):sigma_(sigma){}
+    ~StandarizerPreprocessor(){}
+
+};
+
+class customPreprocessor:public ImagePreprocessor{
+protected:
+
+    double rawval_;
+    Mat mean_;
+    String channel_order_;
+
+    void set_mean_(Mat imMean_){
+
+        imMean_.copyTo(this->mean_);
+
+
+    }
+
+    void set_raw_scale(int rawval){
+        rawval_ = rawval;
+
+    }
+    void set_channels(String channel_order){
+        channel_order_=channel_order;
+    }
+
+
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        tmpInput.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        tmpInput.convertTo(output, CV_32FC1);
+                    else
+                        tmpInput.convertTo(output, CV_32FC1,rawval_);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        tmpInput.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        tmpInput.convertTo(output, CV_32FC1);
+                    else
+                        tmpInput.convertTo(output, CV_32FC1,rawval_);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        input.convertTo(output,CV_32FC1,1/255.0);
+                    else
+                        input.convertTo(output,CV_32FC1);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        input.convertTo(output, CV_32FC1);
+                    else
+                        input.convertTo(output, CV_32FC1,rawval_);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    if (rawval_ == 1)
+                        input.convertTo(output,CV_32FC3,1/255.0);
+                    else
+                        input.convertTo(output,CV_32FC3);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    if (rawval_ ==1)
+                        input.convertTo(output, CV_32FC3);
+                    else
+                        input.convertTo(output, CV_32FC3,rawval_);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+
+        if (!this->mean_.empty()){
+
+            Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2));
+            subtract(output,mean_s,output);
+        }
+        else{
+            Scalar mean_s;
+            mean_s = mean(output);
+            subtract(output,mean_s,output);
+        }
+
+    }
+
+public:
+    customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){}
+    ~customPreprocessor(){}
+
+};
+
+class MeanSubtractorPreprocessor: public ImagePreprocessor{
+protected:
+    Mat mean_;
+    //void set_mean_(Mat m){}
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        subtract(output,this->mean_,output);
+    }
+public:
+    MeanSubtractorPreprocessor(Mat mean)
+    {
+        mean.copyTo(this->mean_);
+    }
+
+    ~MeanSubtractorPreprocessor(){}
+};
+
+
+
+
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
+{
+    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
+{
+    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
+}
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order)
+{
+
+    return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order));
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
+{
+    Mat tmp=meanImg.getMat();
+    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
+}
+
+//************************************************************************************
+//******************   TextImageClassifier   *****************************************
+//************************************************************************************
+
+void TextImageClassifier::preprocess(const Mat& input,Mat& output)
+{
+    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
+}
+
+void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
+{
+    CV_Assert(!ptr.empty());
+    preprocessor_=ptr;
+}
+
+Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
+{
+    return preprocessor_;
+}
+
+
+class DeepCNNCaffeImpl: public DeepCNN{
+protected:
+    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
+    {
+        //Classifies a list of images containing at most minibatchSz_ images
+        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
+        CV_Assert(outputMat.isContinuous());
+
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+
+        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+        {
+            std::vector<Mat> input_channels;
+            Mat preprocessed;
+            // if the image have multiple color channels the input layer should be populated accordingly
+            for (int channel=0;channel < this->channelCount_;channel++){
+
+                cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+                input_channels.push_back(netInputWraped);
+                //input_data += width * height;
+                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+            }
+            this->preprocess(inputImageList[imgNum],preprocessed);
+            split(preprocessed, input_channels);
+
+        }
+        this->net_->ForwardPrefilled();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+
+        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
+        float*outputMatData=(float*)(outputMat.data);
+        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
+
+#endif
+    }
+
+    void process_(Mat inputImage, Mat &outputMat)
+    {
+        // do forward pass and stores the output in outputMat
+        //Process one image
+        CV_Assert(this->minibatchSz_==1);
+        //CV_Assert(outputMat.isContinuous());
+
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+
+        std::vector<Mat> input_channels;
+        Mat preprocessed;
+        // if the image have multiple color channels the input layer should be populated accordingly
+        for (int channel=0;channel < this->channelCount_;channel++){
+
+            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+            input_channels.push_back(netInputWraped);
+            //input_data += width * height;
+            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+        }
+        this->preprocess(inputImage,preprocessed);
+        split(preprocessed, input_channels);
+
+        //preprocessed.copyTo(netInputWraped);
+
+
+        this->net_->Forward();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+
+
+
+
+        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+        float*outputMatData=(float*)(outputMat.data);
+
+        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+
+
+
+#endif
+    }
+
+
+
+#ifdef HAVE_CAFFE
+    Ptr<caffe::Net<float> > net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    int outputSize_;
+public:
+    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
+        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
+        channelCount_=dn.channelCount_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
+    {
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->channelCount_=dn.channelCount_;
+        this->minibatchSz_=dn.minibatchSz_;
+        this->outputSize_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        this->outputGeometry_=dn.outputGeometry_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_CAFFE
+        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+        CV_Assert(net_->num_inputs()==1);
+        CV_Assert(net_->num_outputs()==1);
+        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+                ||this->net_->input_blobs()[0]->channels()==3);
+        this->channelCount_=this->net_->input_blobs()[0]->channels();
+
+
+
+        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
+
+        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+
+        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = inputLayer->channels();
+
+        inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
+        net_->Reshape();
+        this->outputSize_=net_->output_blobs()[0]->channels();
+        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+
+
+
+
+
+#else
+        CV_Error(Error::StsError,"Caffe not available during compilation!");
+#endif
+    }
+
+    void classify(InputArray image, OutputArray classProbabilities)
+    {
+        std::vector<Mat> inputImageList;
+        inputImageList.push_back(image.getMat());
+        classifyBatch(inputImageList,classProbabilities);
+    }
+    void detect(InputArray image, OutputArray Bbox_prob)
+    {
+
+        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
+        Mat outputMat = Bbox_prob.getMat();
+        process_(image.getMat(),outputMat);
+        //copy back to outputArray
+        outputMat.copyTo(Bbox_prob);
+    }
+
+    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
+    {
+        std::vector<Mat> allImageVector;
+        inputImageList.getMatVector(allImageVector);
+        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
+
+        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
+        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
+        Mat outputMat = classProbabilities.getMat();
+        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
+        {
+            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
+            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
+            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
+            std::vector<Mat> minibatchInput(from,to);
+            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
+
+        }
+
+    }
+
+    int getOutputSize()
+    {
+        return this->outputSize_;
+    }
+    Size getOutputGeometry()
+    {
+        return this->outputGeometry_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_CAFFE;
+    }
+};
+
+
+Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+{
+    if(preprocessor.empty())
+    {
+        preprocessor=ImagePreprocessor::createResizer();
+    }
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+
+Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
+{
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+namespace cnn_config{
+namespace caffe_backend{
+
+#ifdef HAVE_CAFFE
+
+bool getCaffeGpuMode()
+{
+    return caffe::Caffe::mode()==caffe::Caffe::GPU;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    if(useGpu)
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+    }else
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::CPU);
+    }
+}
+
+bool getCaffeAvailable()
+{
+    return true;
+}
+
+#else
+
+bool getCaffeGpuMode()
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    return 0;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    CV_Assert(useGpu==1);//Compilation directives force
+}
+
+bool getCaffeAvailable(){
+    return 0;
+}
+
+#endif
+
+}//namespace caffe
+}//namespace cnn_config
+
+class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
+private:
+    struct NetOutput{
+        //Auxiliary structure that handles the logic of getting class ids and probabillities from
+        //the raw outputs of caffe
+        int wordIdx;
+        float probabillity;
+
+        static bool sorter(const NetOutput& o1,const NetOutput& o2)
+        {//used with std::sort to provide the most probable class
+            return o1.probabillity>o2.probabillity;
+        }
+
+        static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res)
+        {
+            res.resize(nbOutputs);
+            for(int k=0;k<nbOutputs;k++)
+            {
+                res[k].wordIdx=k;
+                res[k].probabillity=buffer[k];
+            }
+            std::sort(res.begin(),res.end(),NetOutput::sorter);
+        }
+
+        static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence)
+        {
+            std::vector<NetOutput> tmp;
+            getOutputs(buffer,nbOutputs,tmp);
+            classNum=tmp[0].wordIdx;
+            confidence=tmp[0].probabillity;
+        }
+    };
+protected:
+    std::vector<String> labels_;
+    Ptr<TextImageClassifier> classifier_;
+public:
+    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr)
+    {
+        CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
+        std::ifstream labelsFile(vocabularyFilename.c_str());
+        if(!labelsFile)
+        {
+            CV_Error(Error::StsError,"Could not read Labels from file");
+        }
+        std::string line;
+        while (std::getline(labelsFile, line))
+        {
+            labels_.push_back(std::string(line));
+        }
+        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
+    }
+
+    OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr)
+    {
+        this->labels_=vocabulary;
+        CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size()));
+    }
+
+    void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence)
+    {
+        Mat netOutput;
+        this->classifier_->classify(inputImage,netOutput);
+        int classNum;
+        NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence);
+        transcription=this->labels_[classNum];
+    }
+
+    void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec)
+    {
+        Mat netOutput;
+        this->classifier_->classifyBatch(inputImageList,netOutput);
+        for(int k=0;k<netOutput.rows;k++)
+        {
+            int classNum;
+            double confidence;
+            NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence);
+            transcriptionVec.push_back(this->labels_[classNum]);
+            confidenceVec.push_back(confidence);
+        }
+    }
+
+
+    void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
+        double confidence;
+        String transcription;
+        recogniseImage(image,transcription,confidence);
+        output_text=transcription.c_str();
+        if(component_rects!=NULL)
+        {
+            component_rects->resize(1);
+            (*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
+        }
+        if(component_texts!=NULL)
+        {
+            component_texts->resize(1);
+            (*component_texts)[0]=transcription.c_str();
+        }
+        if(component_confidences!=NULL)
+        {
+            component_confidences->resize(1);
+            (*component_confidences)[0]=float(confidence);
+        }
+    }
+
+    void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+             std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
+        this->run(image,output_text,component_rects,component_texts,component_confidences,component_level);
+    }
+
+    std::vector<String>& getVocabulary()
+    {
+        return this->labels_;
+    }
+
+    Ptr<TextImageClassifier> getClassifier()
+    {
+        return this->classifier_;
+    }
+};
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename )
+{
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
+}
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename)
+{
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename));
+}
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary)
+{
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
+}
+
+Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100));
+    return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary));
+}
+
+
+
+
+
+}  } //namespace text namespace cv
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@ -0,0 +1,643 @@
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/core.hpp"
+
+
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <queue>
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+
+#ifdef HAVE_CAFFE
+#include "caffe/caffe.hpp"
+#endif
+
+namespace cv { namespace text {
+
+//Maybe OpenCV has a routine better suited
+//inline bool fileExists (String filename) {
+//    std::ifstream f(filename.c_str());
+//    return f.good();
+//}
+
+//************************************************************************************
+//******************   ImagePreprocessor   *******************************************
+//************************************************************************************
+
+/*void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
+    Mat inpImg=input.getMat();
+    Mat outImg;
+    this->preprocess_(inpImg,outImg,sz,outputChannels);
+    outImg.copyTo(output);
+}*/
+
+
+/*class ResizerPreprocessor: public ImagePreprocessor{
+protected:
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1){
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U){
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+    }
+public:
+    ResizerPreprocessor(){}
+    ~ResizerPreprocessor(){}
+};
+
+class StandarizerPreprocessor: public ImagePreprocessor{
+protected:
+    double sigma_;
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        Scalar dev,mean;
+        meanStdDev(output,mean,dev);
+        subtract(output,mean[0],output);
+        divide(output,(dev[0]/sigma_),output);
+    }
+public:
+    StandarizerPreprocessor(double sigma):sigma_(sigma){}
+    ~StandarizerPreprocessor(){}
+};
+
+class MeanSubtractorPreprocessor: public ImagePreprocessor{
+protected:
+    Mat mean_;
+    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+        //TODO put all the logic of channel and depth conversions in ImageProcessor class
+        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
+        CV_Assert(outputChannels==1 || outputChannels==3);
+        CV_Assert(input.channels()==1 || input.channels()==3);
+        if(input.channels()!=outputChannels)
+        {
+            Mat tmpInput;
+            if(outputChannels==1)
+            {
+                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
+                if(input.depth()==CV_8U)
+                {
+                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    tmpInput.convertTo(output, CV_32FC3);
+                }
+            }
+        }else
+        {
+            if(input.channels()==1)
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC1,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC1);
+                }
+            }else
+            {
+                if(input.depth()==CV_8U)
+                {
+                    input.convertTo(output, CV_32FC3,1/255.0);
+                }else
+                {//Assuming values are at the desired [0,1] range
+                    input.convertTo(output, CV_32FC3);
+                }
+            }
+        }
+        if(outputSize.width!=0 && outputSize.height!=0)
+        {
+            resize(output,output,outputSize);
+        }
+        subtract(output,this->mean_,output);
+    }
+public:
+    MeanSubtractorPreprocessor(Mat mean)
+    {
+        mean.copyTo(this->mean_);
+    }
+
+    ~MeanSubtractorPreprocessor(){}
+};
+
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
+{
+    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
+{
+    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
+}
+
+Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
+{
+    Mat tmp=meanImg.getMat();
+    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
+}
+
+//************************************************************************************
+//******************   TextImageClassifier   *****************************************
+//************************************************************************************
+
+void TextImageClassifier::preprocess(const Mat& input,Mat& output)
+{
+    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
+}
+
+void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
+{
+    CV_Assert(!ptr.empty());
+    preprocessor_=ptr;
+}
+
+Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
+{
+    return preprocessor_;
+}*/
+
+/*
+class DeepCNNCaffeImpl: public DeepCNN{
+protected:
+    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
+    {
+        //Classifies a list of images containing at most minibatchSz_ images
+        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
+        CV_Assert(outputMat.isContinuous());
+#ifdef HAVE_CAFFE
+        net_->input_blobs()[0]->Reshape(inputImageList.size(), 1,this->inputGeometry_.height,this->inputGeometry_.width);
+        net_->Reshape();
+        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+        float* inputData=inputBuffer;
+        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+        {
+            Mat preprocessed;
+            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+            this->preprocess(inputImageList[imgNum],preprocessed);
+            preprocessed.copyTo(netInputWraped);
+            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+        }
+        this->net_->ForwardPrefilled();
+        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+        float*outputMatData=(float*)(outputMat.data);
+        memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
+#endif
+    }
+
+#ifdef HAVE_CAFFE
+    Ptr<caffe::Net<float> > net_;
+#endif
+    //Size inputGeometry_;
+    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
+    int outputSize_;
+public:
+    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
+        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
+        channelCount_=dn.channelCount_;
+        inputGeometry_=dn.inputGeometry_;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+    }
+    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
+    {
+#ifdef HAVE_CAFFE
+        this->net_=dn.net_;
+#endif
+        this->setPreprocessor(dn.preprocessor_);
+        this->inputGeometry_=dn.inputGeometry_;
+        this->channelCount_=dn.channelCount_;
+        this->minibatchSz_=dn.minibatchSz_;
+        this->outputSize_=dn.outputSize_;
+        this->preprocessor_=dn.preprocessor_;
+        return *this;
+        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
+    }
+
+    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
+        :minibatchSz_(maxMinibatchSz)
+    {
+        CV_Assert(this->minibatchSz_>0);
+        CV_Assert(fileExists(modelArchFilename));
+        CV_Assert(fileExists(modelWeightsFilename));
+        CV_Assert(!preprocessor.empty());
+        this->setPreprocessor(preprocessor);
+#ifdef HAVE_CAFFE
+        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+        CV_Assert(net_->num_inputs()==1);
+        CV_Assert(net_->num_outputs()==1);
+        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+                ||this->net_->input_blobs()[0]->channels()==3);
+        this->channelCount_=this->net_->input_blobs()[0]->channels();
+        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
+        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
+        inputLayer->Reshape(this->minibatchSz_,1,this->inputGeometry_.height, this->inputGeometry_.width);
+        net_->Reshape();
+        this->outputSize_=net_->output_blobs()[0]->channels();
+
+#else
+        CV_Error(Error::StsError,"Caffe not available during compilation!");
+#endif
+    }
+
+    void classify(InputArray image, OutputArray classProbabilities)
+    {
+        std::vector<Mat> inputImageList;
+        inputImageList.push_back(image.getMat());
+        classifyBatch(inputImageList,classProbabilities);
+    }
+
+    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
+    {
+        std::vector<Mat> allImageVector;
+        inputImageList.getMatVector(allImageVector);
+        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
+        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
+        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
+        Mat outputMat = classProbabilities.getMat();
+        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
+        {
+            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
+            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
+            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
+            std::vector<Mat> minibatchInput(from,to);
+            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
+        }
+    }
+
+    int getOutputSize()
+    {
+        return this->outputSize_;
+    }
+
+    int getMinibatchSize()
+    {
+        return this->minibatchSz_;
+    }
+
+    int getBackend()
+    {
+        return OCR_HOLISTIC_BACKEND_CAFFE;
+    }
+};
+
+
+Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
+{
+    if(preprocessor.empty())
+    {
+        preprocessor=ImagePreprocessor::createResizer();
+    }
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+
+Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
+{
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
+    switch(backEnd){
+    case OCR_HOLISTIC_BACKEND_CAFFE:
+        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        break;
+    case OCR_HOLISTIC_BACKEND_NONE:
+    default:
+        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
+        return Ptr<DeepCNN>();
+        break;
+    }
+}
+
+namespace cnn_config{
+namespace caffe_backend{
+
+#ifdef HAVE_CAFFE
+
+bool getCaffeGpuMode()
+{
+    return caffe::Caffe::mode()==caffe::Caffe::GPU;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    if(useGpu)
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::GPU);
+    }else
+    {
+        caffe::Caffe::set_mode(caffe::Caffe::CPU);
+    }
+}
+
+bool getCaffeAvailable()
+{
+    return true;
+}
+
+#else
+
+bool getCaffeGpuMode()
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    return 0;
+}
+
+void setCaffeGpuMode(bool useGpu)
+{
+    CV_Error(Error::StsError,"Caffe not available during compilation!");
+    CV_Assert(useGpu==1);//Compilation directives force
+}
+
+bool getCaffeAvailable(){
+    return 0;
+}
+
+#endif
+
+}//namespace caffe
+}//namespace cnn_config
+*/
+
+class textDetectImpl: public textDetector{
+private:
+    struct NetOutput{
+        //Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
+        //the raw outputs of caffe
+        Rect bbox;
+        float probability;
+
+//        static bool sorter(const NetOutput& o1,const NetOutput& o2)
+//        {//used with std::sort to provide the most probable class
+//            return o1.probabillity>o2.probabillity;
+//        }
+
+        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
+        {
+
+            res.resize(nbrTextBoxes);
+            for(int k=0;k<nbrTextBoxes;k++)
+            {
+                float x_min = buffer[k*nCol+3]*inputShape.width;
+                float y_min = buffer[k*nCol+4]*inputShape.height;
+                float x_max = buffer[k*nCol+5]*inputShape.width;
+                float y_max = buffer[k*nCol +6]*inputShape.height;
+                x_min = x_min<0?0:x_min;
+                y_min = y_min<0?0:y_min;
+                x_max = x_max> inputShape.width?inputShape.width-1:x_max;
+                y_max = y_max > inputShape.height?inputShape.height-1:y_max;
+                float wd = x_max-x_min+1;
+                float ht = y_max-y_min+1;
+
+                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
+               // printf("%f %f %f %f\n",buffer[k*nCol+3],buffer[k*nCol+4],buffer[k*nCol+5],buffer[k*nCol+6]);
+                res[k].probability=buffer[k*nCol+2];
+            }
+//            std::sort(res.begin(),res.end(),NetOutput::sorter);
+        }
+
+//        static void getDetections(const float* buffer,int nbOutputs,int &classNum,double& confidence)
+//        {
+//            std::vector<NetOutput> tmp;
+//            getOutputs(buffer,nbOutputs,tmp);
+//            classNum=tmp[0].wordIdx;
+//            confidence=tmp[0].probabillity;
+//        }
+    };
+protected:
+    //std::vector<String> labels_;
+    Ptr<TextImageClassifier> classifier_;
+public:
+    textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
+    {
+
+    }
+
+
+
+    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
+    {
+                Mat netOutput;
+                //std::cout<<"started detect"<<std::endl;
+                this->classifier_->detect(inputImage,netOutput);
+                //std::cout<<"After Detect"<<std::endl;
+                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
+                int nbrTextBoxes = OutputGeometry_.height;
+                int nCol = OutputGeometry_.width;
+                //std::cout<<nbrTextBoxes<<std::endl;
+                std::vector<NetOutput> tmp;
+                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
+                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
+                //Bbox.resize(nbrTextBoxes);
+                //confidence.resize(nbrTextBoxes);
+                for (int k=0;k<nbrTextBoxes;k++)
+                {
+                    Bbox.push_back(tmp[k].bbox);
+                    confidence.push_back(tmp[k].probability);
+                }
+                //Bbox = netOutput.data;
+                //confidence = netOutput.data;
+
+     }
+
+
+
+    void run(Mat& image, std::vector<Rect>* component_rects=NULL,
+             std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
+        //double confidence;
+        //String transcription;
+        std::vector<Rect> bbox;
+        std::vector<float> score;
+        textDetectInImage(image,bbox,score);
+        //output_text=transcription.c_str();
+        if(component_rects!=NULL)
+        {
+            component_rects->resize(bbox.size());  // should be a user behavior
+
+            component_rects = &bbox;
+        }
+
+        if(component_confidences!=NULL)
+        {
+            component_confidences->resize(score.size()); // shoub be a user behavior
+
+            component_confidences = &score;
+        }
+    }
+
+    void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
+             std::vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+        CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
+        this->run(image,component_rects,component_confidences,component_level);
+    }
+
+//    std::vector<String>& getVocabulary()
+//    {
+//        return this->labels_;
+//    }
+
+    Ptr<TextImageClassifier> getClassifier()
+    {
+        return this->classifier_;
+    }
+};
+
+Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
+{
+    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
+}
+
+Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
+{
+
+
+    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+
+    Mat textbox_mean(1,3,CV_8U);
+    textbox_mean.at<uchar>(0,0)=104;
+    textbox_mean.at<uchar>(0,1)=117;
+    textbox_mean.at<uchar>(0,2)=123;
+    preprocessor->set_mean(textbox_mean);
+
+    Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
+    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
+}
+
+
+
+
+
+
+
+}  } //namespace text namespace cv
--- a/modules/text/text_config.hpp.in
+++ b/modules/text/text_config.hpp.in
@ -1,7 +1,13 @@
 #ifndef __OPENCV_TEXT_CONFIG_HPP__
 #define __OPENCV_TEXT_CONFIG_HPP__

+// HAVE QT5
+//#cmakedefine HAVE_QT5GUI
+
+// HAVE CAFFE
+//#cmakedefine HAVE_CAFFE
+
 // HAVE OCR Tesseract
-#cmakedefine HAVE_TESSERACT
+//#cmakedefine HAVE_TESSERACT

-#endif
+#endif