From 6c9d6d507b8b8f1454cdbe02aef38cedff592ea5 Mon Sep 17 00:00:00 2001 From: Jcrist99 Date: Thu, 8 Jun 2017 06:54:10 +0400 Subject: [PATCH] Merge pull request #1210 from abratchik:contrib.java.wrapper.fix.3.2 fix java wrappers for ERFilter, OCRHMMDecoder, OCRBeamSearchDecoder (#1210) * fix java wrappers for ERFilter, OCRHMMDecoder, OCRBeamSearchDecoder * fix comments --- .../text/include/opencv2/text/erfilter.hpp | 18 +++++++ modules/text/include/opencv2/text/ocr.hpp | 48 +++++++++++++++++++ modules/text/src/erfilter.cpp | 36 ++++++++++++++ modules/text/src/ocr_beamsearch_decoder.cpp | 13 ++++- modules/text/src/ocr_hmm_decoder.cpp | 33 +++++++++++-- 5 files changed, 142 insertions(+), 6 deletions(-) diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp index 9cfd246c6..7d7717335 100644 --- a/modules/text/include/opencv2/text/erfilter.hpp +++ b/modules/text/include/opencv2/text/erfilter.hpp @@ -371,6 +371,24 @@ CV_EXPORTS void MSERsToERStats(InputArray image, std::vector // Utility funtion for scripting CV_EXPORTS_W void detectRegions(InputArray image, const Ptr& er_filter1, const Ptr& er_filter2, CV_OUT std::vector< std::vector >& regions); + +/** @brief Extracts text regions from image. + +@param image Source image where text blocks needs to be extracted from. Should be CV_8UC3 (color). +@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12] +@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12] +@param groups_rects Output list of rectangle blocks with text +@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, ERGROUPING_ORIENTATION_ANY. +@param filename The XML or YAML file with the classifier model (e.g. samples/trained_classifier_erGrouping.xml). Only to use when grouping method is ERGROUPING_ORIENTATION_ANY. +@param minProbability The minimum probability for accepting a group. Only to use when grouping method is ERGROUPING_ORIENTATION_ANY. + + + */ +CV_EXPORTS_W void detectRegions(InputArray image, const Ptr& er_filter1, const Ptr& er_filter2, CV_OUT std::vector &groups_rects, + int method = ERGROUPING_ORIENTATION_HORIZ, + const String& filename = String(), + float minProbability = (float)0.5); + //! @} } diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index bb948f8c3..67992306d 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -172,6 +172,13 @@ enum decoder_mode OCR_DECODER_VITERBI = 0 // Other algorithms may be added }; +/* OCR classifier type*/ +enum classifier_type +{ + OCR_KNN_CLASSIFIER = 0, + OCR_CNN_CLASSIFIER = 1 +}; + /** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models. @note @@ -299,6 +306,21 @@ public: // cols == rows == vocabulari.size() int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) + /** @brief Creates an instance of the OCRHMMDecoder class. Loads and initializes HMMDecoder from the specified path + + @overload + */ + CV_WRAP static Ptr create(const String& filename, + + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + + int classifier = OCR_KNN_CLASSIFIER); // The character classifier type protected: Ptr classifier; @@ -318,6 +340,8 @@ fixed size, while retaining the centroid and aspect ratio, in order to extract a based on gradient orientations along the chain-code of its perimeter. Then, the region is classified using a KNN model trained with synthetic data of rendered characters with different standard font types. + +@deprecated loadOCRHMMClassifier instead */ CV_EXPORTS_W Ptr loadOCRHMMClassifierNM(const String& filename); @@ -330,9 +354,19 @@ The CNN default classifier is based in the scene text recognition method propose Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Convolutional Neural Network and a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions at each window location. + +@deprecated use loadOCRHMMClassifier instead */ CV_EXPORTS_W Ptr loadOCRHMMClassifierCNN(const String& filename); +/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. + + @param filename The XML or YAML file with the classifier model (e.g. OCRBeamSearch_CNN_model_data.xml.gz) + + @param classifier Can be one of classifier_type enum values. + + */ +CV_EXPORTS_W Ptr loadOCRHMMClassifier(const String& filename, int classifier); //! @} /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). @@ -466,6 +500,20 @@ public: int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); // Size of the beam in Beam Search algorithm + /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path. + + @overload + + */ + CV_WRAP static Ptr create(const String& filename, // The character classifier file + const String& vocabulary, // The language vocabulary (chars when ascii english text) + // size() must be equal to the number of classes + InputArray transition_probabilities_table, // Table with transition probabilities between character pairs + // cols == rows == vocabulari.size() + InputArray emission_probabilities_table, // Table with observation emission probabilities + // cols == rows == vocabulari.size() + int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) + int beam_size = 500); protected: Ptr classifier; diff --git a/modules/text/src/erfilter.cpp b/modules/text/src/erfilter.cpp index efff9de9f..af3348c9e 100644 --- a/modules/text/src/erfilter.cpp +++ b/modules/text/src/erfilter.cpp @@ -4219,5 +4219,41 @@ void detectRegions(InputArray image, const Ptr& er_filter1, const Ptr< } } + +void detectRegions(InputArray image, const Ptr& er_filter1, const Ptr& er_filter2, + CV_OUT std::vector &groups_rects, + int method, + const String& filename, + float minProbability) +{ + // assert correct image type + CV_Assert( image.type() == CV_8UC3 ); + + CV_Assert( !er_filter1.empty() ); + CV_Assert( !er_filter2.empty() ); + + // Extract channels to be processed individually + vector channels; + + Mat grey; + cvtColor(image,grey,COLOR_RGB2GRAY); + + // here we are only using grey channel + channels.push_back(grey); + channels.push_back(255-grey); + + vector > regions(channels.size()); + + // Apply the default cascade classifier to each independent channel (could be done in parallel) + for (int c=0; c<(int)channels.size(); c++) + { + er_filter1->run(channels[c], regions[c]); + er_filter2->run(channels[c], regions[c]); + } + // Detect character groups + vector< vector > nm_region_groups; + erGrouping(image, channels, regions, nm_region_groups, groups_rects, method, filename, minProbability); +} + } } diff --git a/modules/text/src/ocr_beamsearch_decoder.cpp b/modules/text/src/ocr_beamsearch_decoder.cpp index b067e0ebc..70da85417 100644 --- a/modules/text/src/ocr_beamsearch_decoder.cpp +++ b/modules/text/src/ocr_beamsearch_decoder.cpp @@ -499,7 +499,7 @@ Ptr OCRBeamSearchDecoder::create( Ptr(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size); } -CV_EXPORTS_W Ptr OCRBeamSearchDecoder::create(Ptr _classifier, +Ptr OCRBeamSearchDecoder::create(Ptr _classifier, const String& _vocabulary, InputArray transition_p, InputArray emission_p, @@ -509,8 +509,17 @@ CV_EXPORTS_W Ptr OCRBeamSearchDecoder::create(Ptr(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size); } +Ptr OCRBeamSearchDecoder::create(const String& _filename, + const String& _vocabulary, + InputArray transition_p, + InputArray emission_p, + int _mode, + int _beam_size) +{ + return makePtr(loadOCRBeamSearchClassifierCNN(_filename), _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size); +} -class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback +class OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback { public: //constructor diff --git a/modules/text/src/ocr_hmm_decoder.cpp b/modules/text/src/ocr_hmm_decoder.cpp index ea4c62d00..c0e9af42f 100644 --- a/modules/text/src/ocr_hmm_decoder.cpp +++ b/modules/text/src/ocr_hmm_decoder.cpp @@ -90,7 +90,7 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector component_confidences->clear(); } -CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level) +String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level) { std::string output1; std::string output2; @@ -109,7 +109,7 @@ CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int comp return String(output2); } -CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level) +cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level) { std::string output1; std::string output2; @@ -684,8 +684,17 @@ Ptr OCRHMMDecoder::create( Ptr return makePtr(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode); } +Ptr OCRHMMDecoder::create( const String& _filename, + const String& _vocabulary, + InputArray transition_p, + InputArray emission_p, + int _mode, + int _classifier) +{ + return makePtr(loadOCRHMMClassifier(_filename, _classifier), _vocabulary, transition_p, emission_p, (decoder_mode)_mode); +} -class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback +class OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback { public: //constructor @@ -916,6 +925,22 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector& out_class, vector } +Ptr loadOCRHMMClassifier(const String& _filename, int _classifier) + +{ + Ptr pt; + switch(_classifier) { + case OCR_KNN_CLASSIFIER: + pt = loadOCRHMMClassifierNM(_filename); + break; + case OCR_CNN_CLASSIFIER: + pt = loadOCRHMMClassifierCNN(_filename); + default: + CV_Error(Error::StsBadArg, "Specified HMM classifier is not supported!"); + break; + } + return pt; +} Ptr loadOCRHMMClassifierNM(const String& filename) @@ -923,7 +948,7 @@ Ptr loadOCRHMMClassifierNM(const String& file return makePtr(std::string(filename)); } -class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback +class OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback { public: //constructor