diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index 7d8149672..651934b0c 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -1,4 +1,4 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// +/*M////////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -62,7 +62,7 @@ enum }; //base class BaseOCR declares a common API that would be used in a typical text recognition scenario -class CV_EXPORTS BaseOCR +class CV_EXPORTS_W BaseOCR { public: virtual ~BaseOCR() {}; @@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed. found at the webcam_demo: */ -class CV_EXPORTS OCRTesseract : public BaseOCR +class CV_EXPORTS_W OCRTesseract : public BaseOCR { public: /** @brief Recognize text using the tesseract-ocr API. @@ -113,6 +113,14 @@ public: std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + + CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0; + + /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract. @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the @@ -127,7 +135,7 @@ public: (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other possible values. */ - static Ptr create(const char* datapath=NULL, const char* language=NULL, + CV_WRAP static Ptr create(const char* datapath=NULL, const char* language=NULL, const char* char_whitelist=NULL, int oem=3, int psmode=3); }; @@ -146,7 +154,7 @@ enum decoder_mode be found at the webcam_demo sample: */ -class CV_EXPORTS OCRHMMDecoder : public BaseOCR +class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR { public: @@ -159,7 +167,7 @@ public: loadOCRHMMClassifierNM and KNN model provided in . */ - class CV_EXPORTS ClassifierCallback + class CV_EXPORTS_W ClassifierCallback { public: virtual ~ClassifierCallback() { } @@ -227,6 +235,11 @@ public: std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder. @param classifier The character classifier with built in feature extractor. @@ -252,6 +265,15 @@ public: // cols == rows == vocabulari.size() decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + CV_WRAP static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + protected: Ptr classifier; @@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the using a KNN model trained with synthetic data of rendered characters with different standard font types. */ -CV_EXPORTS Ptr loadOCRHMMClassifierNM(const std::string& filename); + +CV_EXPORTS_W Ptr loadOCRHMMClassifierNM(const String& filename); /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. @@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions at each window location. */ -CV_EXPORTS Ptr loadOCRHMMClassifierCNN(const std::string& filename); +CV_EXPORTS_W Ptr loadOCRHMMClassifierCNN(const String& filename); //! @} @@ -299,9 +322,11 @@ CV_EXPORTS Ptr loadOCRHMMClassifierCNN(const * @note * - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) : * - * */ + **/ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector& lexicon, OutputArray transition_probabilities_table); +CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector& lexicon); + /* OCR BeamSearch Decoder */ @@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto be found at the demo sample: */ -class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR +class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR { public: @@ -325,7 +350,7 @@ public: loadOCRBeamSearchClassifierCNN with all its parameters provided in . */ - class CV_EXPORTS ClassifierCallback + class CV_EXPORTS_W ClassifierCallback { public: virtual ~ClassifierCallback() { } @@ -350,7 +375,7 @@ public: provides also the Rects for individual text elements found (e.g. words), and the list of those text elements with their confidence values. - @param image Input image CV_8UC1 with a single text line (or word). + @param image Input binary image CV_8UC1 with a single text line (or word). @param output_text Output text. Most likely character sequence found by the HMM decoder. @@ -373,6 +398,11 @@ public: std::vector* component_texts=NULL, std::vector* component_confidences=NULL, int component_level=0); + // aliases for scripting + CV_WRAP String run(InputArray image, int min_confidence, int component_level=0); + + CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0); + /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder. @param classifier The character classifier with built in feature extractor. @@ -401,6 +431,16 @@ public: decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); // Size of the beam in Beam Search algorithm + CV_WRAP static Ptr create(const Ptr classifier, // The character classifier with built in feature extractor + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + int beam_size = 500); // Size of the beam in Beam Search algorithm + protected: Ptr classifier; @@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions at each window location. */ -CV_EXPORTS Ptr loadOCRBeamSearchClassifierCNN(const std::string& filename); + +CV_EXPORTS_W Ptr loadOCRBeamSearchClassifierCNN(const String& filename); //! @} diff --git a/modules/text/src/ocr_beamsearch_decoder.cpp b/modules/text/src/ocr_beamsearch_decoder.cpp index 0a87285f1..3ab24d2a1 100644 --- a/modules/text/src/ocr_beamsearch_decoder.cpp +++ b/modules/text/src/ocr_beamsearch_decoder.cpp @@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto component_confidences->clear(); } +CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level) +{ + std::string output1; + std::string output2; + vector component_texts; + vector component_confidences; + Mat image_m = image.getMat(); + run(image_m, output1, NULL, &component_texts, &component_confidences, component_level); + for(unsigned int i = 0; i < component_texts.size(); i++) + { + //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl; + if(component_confidences[i] > min_confidence) + { + output2 += component_texts[i]; + } + } + return String(output2); +} + +CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level) +{ + std::string output1; + std::string output2; + vector component_texts; + vector component_confidences; + Mat image_m = image.getMat(); + Mat mask_m = mask.getMat(); + run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level); + for(unsigned int i = 0; i < component_texts.size(); i++) + { + //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl; + if(component_confidences[i] > min_confidence) + { + output2 += component_texts[i]; + } + } + return String(output2); +} + void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector >& recognition_probabilities, vector& oversegmentation) { @@ -460,6 +499,16 @@ Ptr OCRBeamSearchDecoder::create( Ptr(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size); } +CV_EXPORTS_W Ptr OCRBeamSearchDecoder::create(Ptr _classifier, + const String& _vocabulary, + InputArray transition_p, + InputArray emission_p, + int _mode, + int _beam_size) +{ + return makePtr(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size); +} + class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback { @@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim return dec_max_idx; } - -Ptr loadOCRBeamSearchClassifierCNN(const std::string& filename) +Ptr loadOCRBeamSearchClassifierCNN(const String& filename) { - return makePtr(filename); + return makePtr(std::string(filename)); } } diff --git a/modules/text/src/ocr_hmm_decoder.cpp b/modules/text/src/ocr_hmm_decoder.cpp index 8a0a74b9d..002dc8613 100644 --- a/modules/text/src/ocr_hmm_decoder.cpp +++ b/modules/text/src/ocr_hmm_decoder.cpp @@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector component_confidences->clear(); } +CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level) +{ + std::string output1; + std::string output2; + vector component_texts; + vector component_confidences; + Mat image_m = image.getMat(); + run(image_m, output1, NULL, &component_texts, &component_confidences, component_level); + for(unsigned int i = 0; i < component_texts.size(); i++) + { + //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl; + if(component_confidences[i] > min_confidence) + { + output2 += component_texts[i]; + } + } + return String(output2); +} + +CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level) +{ + std::string output1; + std::string output2; + vector component_texts; + vector component_confidences; + Mat image_m = image.getMat(); + Mat mask_m = mask.getMat(); + run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level); + for(unsigned int i = 0; i < component_texts.size(); i++) + { + cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl; + + if(component_confidences[i] > min_confidence) + { + output2 += component_texts[i]; + } + } + return String(output2); +} + void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector& out_class, vector& out_confidence) { CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 )); @@ -635,6 +675,16 @@ Ptr OCRHMMDecoder::create( Ptr } +Ptr OCRHMMDecoder::create( Ptr _classifier, + const String& _vocabulary, + InputArray transition_p, + InputArray emission_p, + int _mode) +{ + return makePtr(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode); +} + + class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback { public: @@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector& out_class, vector } -Ptr loadOCRHMMClassifierNM(const std::string& filename) +Ptr loadOCRHMMClassifierNM(const String& filename) { - return makePtr(filename); + return makePtr(std::string(filename)); } - - class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback { public: @@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates) } -Ptr loadOCRHMMClassifierCNN(const std::string& filename) +Ptr loadOCRHMMClassifierCNN(const String& filename) { - return makePtr(filename); + return makePtr(std::string(filename)); } /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). @@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector& lexicon, O return; } +Mat createOCRHMMTransitionsTable(const String& vocabulary, vector& lexicon) +{ + std::string voc(vocabulary); + vector lex; + for(vector::iterator l = lexicon.begin(); l != lexicon.end(); l++) + lex.push_back(std::string(*l)); + + Mat _transitions; + createOCRHMMTransitionsTable(voc, lex, _transitions); + return _transitions; +} + } } diff --git a/modules/text/src/ocr_tesseract.cpp b/modules/text/src/ocr_tesseract.cpp index 79695f0d7..accf47761 100644 --- a/modules/text/src/ocr_tesseract.cpp +++ b/modules/text/src/ocr_tesseract.cpp @@ -86,6 +86,47 @@ void OCRTesseract::run(Mat& image, Mat& mask, string& output_text, vector* component_confidences->clear(); } +CV_WRAP String OCRTesseract::run(InputArray image, int min_confidence, int component_level) +{ + std::string output1; + std::string output2; + vector component_texts; + vector component_confidences; + Mat image_m = image.getMat(); + run(image_m, output1, NULL, &component_texts, &component_confidences, component_level); + for(unsigned int i = 0; i < component_texts.size(); i++) + { + // cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl; + if(component_confidences[i] > min_confidence) + { + output2 += component_texts[i]; + } + } + return String(output2); +} + +CV_WRAP String OCRTesseract::run(InputArray image, InputArray mask, int min_confidence, int component_level) +{ + std::string output1; + std::string output2; + vector component_texts; + vector component_confidences; + Mat image_m = image.getMat(); + Mat mask_m = mask.getMat(); + run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level); + for(unsigned int i = 0; i < component_texts.size(); i++) + { + cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl; + + if(component_confidences[i] > min_confidence) + { + output2 += component_texts[i]; + } + } + return String(output2); +} + + class OCRTesseractImpl : public OCRTesseract { private: @@ -215,13 +256,20 @@ public: run( mask, output, component_rects, component_texts, component_confidences, component_level); } - + void setWhiteList(const String& char_whitelist) + { + #ifdef HAVE_TESSERACT + tess.SetVariable("tessedit_char_whitelist", char_whitelist.c_str()); + #else + (void)char_whitelist; + #endif + } }; Ptr OCRTesseract::create(const char* datapath, const char* language, const char* char_whitelist, int oem, int psmode) { - return makePtr(datapath,language,char_whitelist,oem,psmode); + return makePtr(datapath, language, char_whitelist, oem, psmode); }