Merge pull request #60 from lluisgomez/master

refactors OCRTesseract interface
11 years ago · 37af04328a
parent 47f61f1c51 36a3116191
commit 37af04328a
7 changed files with 235 additions and 250 deletions
--- a/modules/text/doc/ocr.rst
+++ b/modules/text/doc/ocr.rst
@ -5,39 +5,19 @@ Scene Text Recognition

 OCRTesseract
 ------------
-.. ocv:class:: OCRTesseract
+.. ocv:class:: OCRTesseract : public BaseOCR

 OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++. Notice that it is compiled only when tesseract-ocr is correctly installed. ::

-    class CV_EXPORTS OCRTesseract
-    {
-    private:
-        tesseract::TessBaseAPI tess;
-    
-    public:
-        //! Default constructor
-        OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL,
-                     tesseract::OcrEngineMode oem=tesseract::OEM_DEFAULT, tesseract::PageSegMode psmode=tesseract::PSM_AUTO);
-    
-        ~OCRTesseract();
-    
-        /*!
-        the key method. Takes image on input and returns recognized text in the output_text parameter
-        optionally provides also the Rects for individual text elements (e.g. words) and a list of 
-        ranked recognition alternatives.
-        */
-        void run(Mat& image, string& output_text, vector<Rect>* component_rects=NULL,
-                 vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
-                 int component_level=0);
-    };
-
-To see the OCRTesseract combined with scene text detection, have a look at the end_to_end_recognition demo: https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp
-
-OCRTesseract::OCRTesseract
--------------------------
-Constructor.
-
-.. ocv:function:: void OCRTesseract::OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, tesseract::OcrEngineMode oem=tesseract::OEM_DEFAULT, tesseract::PageSegMode psmode=tesseract::PSM_AUTO)
+.. note::
+
+    * (C++) An example of OCRTesseract recognition combined with scene text detection can be found at the end_to_end_recognition demo: https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp
+
+OCRTesseract::create
+--------------------
+Creates an instance of the OCRTesseract class. Initializes Tesseract.
+
+.. ocv:function:: Ptr<OCRTesseract> OCRTesseract::create(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, int oem=(int)tesseract::OEM_DEFAULT, int psmode=(int)tesseract::PSM_AUTO)

    :param datapath: the name of the parent directory of tessdata ended with "/", or NULL to use the system's default directory.
    :param language: an ISO 639-3 code or NULL will default to "eng".
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -44,24 +44,14 @@
 #ifndef __OPENCV_TEXT_OCR_HPP__
 #define __OPENCV_TEXT_OCR_HPP__

-#include "text_config.hpp"
-
-#ifdef HAVE_TESSERACT
-#include <tesseract/baseapi.h>
-#include <tesseract/resultiterator.h>
-#endif
-
-#include "opencv2/core.hpp"
 #include <vector>
 #include <string>

-
 namespace cv
 {
 namespace text
 {

-using namespace std;

 enum
 {
@ -69,40 +59,26 @@ enum
    OCR_LEVEL_TEXTLINE
 };

-#ifdef HAVE_TESSERACT
-class CV_EXPORTS OCRTesseract
+//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
+class CV_EXPORTS BaseOCR
 {
-private:
-    tesseract::TessBaseAPI tess;
-
 public:
-    //Default constructor
-    OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL,
-                 tesseract::OcrEngineMode oem=tesseract::OEM_DEFAULT, tesseract::PageSegMode psmode=tesseract::PSM_AUTO);
-
-    ~OCRTesseract();
-
-    void run(Mat& image, string& output_text, vector<Rect>* component_rects=NULL,
-             vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
-             int component_level=0);
+    virtual ~BaseOCR() {};
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0) = 0;
 };
-#else
-//stub
-class CV_EXPORTS OCRTesseract
+
+class CV_EXPORTS OCRTesseract : public BaseOCR
 {
 public:
-    //Default constructor
-    OCRTesseract(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL,
-                 int oem=0, int psmode=0);
+    virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
+                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
+                     int component_level=0);

-    ~OCRTesseract();
-
-    void run(Mat& image, string& output_text, vector<Rect>* component_rects=NULL,
-             vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
-             int component_level=0);
+    static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
+                                    const char* char_whitelist=NULL, int oem=3, int psmode=3);
 };
-#endif
-


 }
--- a/modules/text/samples/end_to_end_recognition.cpp
+++ b/modules/text/samples/end_to_end_recognition.cpp
@ -102,7 +102,7 @@ int main(int argc, char* argv[])
    /*Text Recognition (OCR)*/

    double t_r = (double)getTickCount();
-    OCRTesseract* ocr = new OCRTesseract();
+    Ptr<OCRTesseract> ocr = OCRTesseract::create();
    cout << "TIME_OCR_INITIALIZATION = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
    string output;

--- a/modules/text/samples/webcam_demo.cpp
+++ b/modules/text/samples/webcam_demo.cpp
@ -54,12 +54,12 @@ private:
    vector< vector<Rect> > &boxes;
    vector< vector<string> > &words;
    vector< vector<float> > &confidences;
-    vector< OCRTesseract* > &ocrs;
+    vector< Ptr<OCRTesseract> > &ocrs;

 public:
    Parallel_OCR(vector<Mat> &_detections, vector<string> &_outputs, vector< vector<Rect> > &_boxes,
                 vector< vector<string> > &_words, vector< vector<float> > &_confidences, 
-                 vector< OCRTesseract* > &_ocrs)
+                 vector< Ptr<OCRTesseract> > &_ocrs)
        : detections(_detections), outputs(_outputs), boxes(_boxes), words(_words), 
          confidences(_confidences), ocrs(_ocrs)
    {}
@ -120,11 +120,10 @@ int main(int argc, char* argv[])

    //Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
    int num_ocrs = 10;
-    vector<OCRTesseract*> ocrs;
+    vector< Ptr<OCRTesseract> > ocrs;
    for (int o=0; o<num_ocrs; o++)
    {
-      OCRTesseract* ocr = new OCRTesseract();
-      ocrs.push_back(ocr);
+      ocrs.push_back(OCRTesseract::create());
    }

    //cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
--- a/modules/text/src/ocr.cpp
+++ b/modules/text/src/ocr.cpp
@ -1,177 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include "precomp.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/ml.hpp"
-
-#include <iostream>
-#include <fstream>
-#include <queue>
-
-namespace cv
-{
-namespace text
-{
-
-using namespace std;
-
-
-#ifdef HAVE_TESSERACT
-//Default constructor
-OCRTesseract::OCRTesseract(const char* datapath, const char* language, const char* char_whitelist, tesseract::OcrEngineMode oemode, tesseract::PageSegMode psmode)
-{
-
-    const char *lang = "eng";
-    if (language != NULL)
-        lang = language;
-
-    if (tess.Init(datapath, lang, oemode))
-    {
-        cout << "OCRTesseract: Could not initialize tesseract." << endl;
-        throw 1;
-    }
-
-    //cout << "OCRTesseract: tesseract version " << tess.Version() << endl;
-
-    tesseract::PageSegMode pagesegmode = psmode;
-    tess.SetPageSegMode(pagesegmode);
-
-    if(char_whitelist != NULL)
-        tess.SetVariable("tessedit_char_whitelist", char_whitelist);
-    else
-        tess.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
-
-    tess.SetVariable("save_best_choices", "T");
-
-}
-
-OCRTesseract::~OCRTesseract()
-{
-    tess.End();
-}
-
-void OCRTesseract::run(Mat& image, string& output, vector<Rect>* component_rects,
-                       vector<string>* component_texts, vector<float>* component_confidences, int component_level)
-{
-    CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
-    if (component_texts != 0)
-        component_texts->clear();
-    if (component_rects != 0)
-        component_rects->clear();
-    if (component_confidences != 0)
-        component_confidences->clear();
-
-    tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1());
-    tess.Recognize(0);
-    output = string(tess.GetUTF8Text());
-
-    if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) )
-    {
-        tesseract::ResultIterator* ri = tess.GetIterator();
-        tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
-        if (component_level == OCR_LEVEL_TEXTLINE)
-            level = tesseract::RIL_TEXTLINE;
-
-        if (ri != 0) {
-            do {
-                const char* word = ri->GetUTF8Text(level);
-                if (word == NULL)
-                    continue;
-                float conf = ri->Confidence(level);
-                int x1, y1, x2, y2;
-                ri->BoundingBox(level, &x1, &y1, &x2, &y2);
-
-                if (component_texts != 0)
-                    component_texts->push_back(string(word));
-                if (component_rects != 0)
-                    component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1));
-                if (component_confidences != 0)
-                    component_confidences->push_back(conf);
-
-                delete[] word;
-            } while (ri->Next(level));
-        }
-        delete ri;
-    }
-
-    tess.Clear();
-}
-#else
-//Stub constructor
-OCRTesseract::OCRTesseract(const char* datapath, const char* language, const char* char_whitelist, int oemode, int psmode)
-{
-    cout << "OCRTesseract("<<oemode<<psmode<<"): Tesseract not found." << endl;
-    if (datapath != NULL)
-      cout << "            " << datapath << endl;
-    if (language != NULL)
-      cout << "            " << language << endl;
-    if (char_whitelist != NULL)
-      cout << "            " << char_whitelist << endl;
-}
-
-//Stub destructor
-OCRTesseract::~OCRTesseract()
-{
-}
-
-//Stub method, does nothing
-void OCRTesseract::run(Mat& image, string& output, vector<Rect>* component_rects,
-                       vector<string>* component_texts, vector<float>* component_confidences, int component_level)
-{
-    CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
-
-    cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl;
-    output.clear();
-    if(component_rects)
-        component_rects->clear();
-    if(component_texts)
-        component_texts->clear();
-    if(component_confidences)
-        component_confidences->clear();
-}
-#endif
-
-
-
-}
-}
--- a/modules/text/src/ocr_tesseract.cpp
+++ b/modules/text/src/ocr_tesseract.cpp
@ -0,0 +1,200 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/ml.hpp"
+
+#include <iostream>
+#include <fstream>
+#include <queue>
+
+namespace cv
+{
+namespace text
+{
+
+using namespace std;
+
+void OCRTesseract::run(Mat& image, string& output_text, vector<Rect>* component_rects,
+                       vector<string>* component_texts, vector<float>* component_confidences,
+                       int component_level)
+{
+    CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
+    CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
+    output_text.clear();
+    if (component_rects != NULL)
+        component_rects->clear();
+    if (component_texts != NULL)
+        component_texts->clear();
+    if (component_confidences != NULL)
+        component_confidences->clear();
+}
+
+class OCRTesseractImpl : public OCRTesseract
+{
+private:
+#ifdef HAVE_TESSERACT
+    tesseract::TessBaseAPI tess;
+#endif
+
+public:
+    //Default constructor
+    OCRTesseractImpl(const char* datapath, const char* language, const char* char_whitelist, int oemode, int psmode)
+    {
+
+#ifdef HAVE_TESSERACT
+        const char *lang = "eng";
+        if (language != NULL)
+            lang = language;
+
+        if (tess.Init(datapath, lang, (tesseract::OcrEngineMode)oemode))
+        {
+            cout << "OCRTesseract: Could not initialize tesseract." << endl;
+            throw 1;
+        }
+
+        //cout << "OCRTesseract: tesseract version " << tess.Version() << endl;
+
+        tesseract::PageSegMode pagesegmode = (tesseract::PageSegMode)psmode;
+        tess.SetPageSegMode(pagesegmode);
+
+        if(char_whitelist != NULL)
+            tess.SetVariable("tessedit_char_whitelist", char_whitelist);
+        else
+            tess.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+
+        tess.SetVariable("save_best_choices", "T");
+#else
+        cout << "OCRTesseract("<<oemode<<psmode<<"): Tesseract not found." << endl;
+        if (datapath != NULL)
+            cout << "            " << datapath << endl;
+        if (language != NULL)
+            cout << "            " << language << endl;
+        if (char_whitelist != NULL)
+            cout << "            " << char_whitelist << endl;
+#endif
+    }
+
+    ~OCRTesseractImpl()
+    {
+#ifdef HAVE_TESSERACT
+        tess.End();
+#endif
+    }
+
+    void run(Mat& image, string& output, vector<Rect>* component_rects=NULL,
+             vector<string>* component_texts=NULL, vector<float>* component_confidences=NULL,
+             int component_level=0)
+    {
+
+        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
+
+#ifdef HAVE_TESSERACT
+
+        if (component_texts != 0)
+            component_texts->clear();
+        if (component_rects != 0)
+            component_rects->clear();
+        if (component_confidences != 0)
+            component_confidences->clear();
+
+        tess.SetImage((uchar*)image.data, image.size().width, image.size().height, image.channels(), image.step1());
+        tess.Recognize(0);
+        output = string(tess.GetUTF8Text());
+
+        if ( (component_rects != NULL) || (component_texts != NULL) || (component_confidences != NULL) )
+        {
+            tesseract::ResultIterator* ri = tess.GetIterator();
+            tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
+            if (component_level == OCR_LEVEL_TEXTLINE)
+                level = tesseract::RIL_TEXTLINE;
+
+            if (ri != 0) {
+                do {
+                    const char* word = ri->GetUTF8Text(level);
+                    if (word == NULL)
+                        continue;
+                    float conf = ri->Confidence(level);
+                    int x1, y1, x2, y2;
+                    ri->BoundingBox(level, &x1, &y1, &x2, &y2);
+
+                    if (component_texts != 0)
+                        component_texts->push_back(string(word));
+                    if (component_rects != 0)
+                        component_rects->push_back(Rect(x1,y1,x2-x1,y2-y1));
+                    if (component_confidences != 0)
+                        component_confidences->push_back(conf);
+
+                    delete[] word;
+                } while (ri->Next(level));
+            }
+            delete ri;
+        }
+
+        tess.Clear();
+
+#else
+
+        cout << "OCRTesseract(" << component_level << image.type() <<"): Tesseract not found." << endl;
+        output.clear();
+        if(component_rects)
+            component_rects->clear();
+        if(component_texts)
+            component_texts->clear();
+        if(component_confidences)
+            component_confidences->clear();
+#endif
+    }
+
+
+};
+
+Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language,
+                                       const char* char_whitelist, int oem, int psmode)
+{
+    return makePtr<OCRTesseractImpl>(datapath,language,char_whitelist,oem,psmode);
+}
+
+
+}
+}
--- a/modules/text/src/precomp.hpp
+++ b/modules/text/src/precomp.hpp
@ -45,4 +45,11 @@

 #include "opencv2/text.hpp"

+#include "text_config.hpp"
+
+#ifdef HAVE_TESSERACT
+#include <tesseract/baseapi.h>
+#include <tesseract/resultiterator.h>
+#endif
+
 #endif