text python bindings

pull/416/head
previ 9 years ago
parent c8053da4f2
commit fe05681627
  1. 67
      modules/text/include/opencv2/text/ocr.hpp
  2. 54
      modules/text/src/ocr_beamsearch_decoder.cpp
  3. 72
      modules/text/src/ocr_hmm_decoder.cpp
  4. 52
      modules/text/src/ocr_tesseract.cpp

@ -1,4 +1,4 @@
/*M/////////////////////////////////////////////////////////////////////////////////////// /*M//////////////////////////////////////////////////////////////////////////////////////////
// //
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
// //
@ -62,7 +62,7 @@ enum
}; };
//base class BaseOCR declares a common API that would be used in a typical text recognition scenario //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
class CV_EXPORTS BaseOCR class CV_EXPORTS_W BaseOCR
{ {
public: public:
virtual ~BaseOCR() {}; virtual ~BaseOCR() {};
@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
found at the webcam_demo: found at the webcam_demo:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
*/ */
class CV_EXPORTS OCRTesseract : public BaseOCR class CV_EXPORTS_W OCRTesseract : public BaseOCR
{ {
public: public:
/** @brief Recognize text using the tesseract-ocr API. /** @brief Recognize text using the tesseract-ocr API.
@ -113,6 +113,14 @@ public:
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0); int component_level=0);
// aliases for scripting
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
/** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract. /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
@param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
@ -127,7 +135,7 @@ public:
(fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
possible values. possible values.
*/ */
static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL, CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
const char* char_whitelist=NULL, int oem=3, int psmode=3); const char* char_whitelist=NULL, int oem=3, int psmode=3);
}; };
@ -146,7 +154,7 @@ enum decoder_mode
be found at the webcam_demo sample: be found at the webcam_demo sample:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp> <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
*/ */
class CV_EXPORTS OCRHMMDecoder : public BaseOCR class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
{ {
public: public:
@ -159,7 +167,7 @@ public:
loadOCRHMMClassifierNM and KNN model provided in loadOCRHMMClassifierNM and KNN model provided in
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>. <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
*/ */
class CV_EXPORTS ClassifierCallback class CV_EXPORTS_W ClassifierCallback
{ {
public: public:
virtual ~ClassifierCallback() { } virtual ~ClassifierCallback() { }
@ -227,6 +235,11 @@ public:
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0); int component_level=0);
// aliases for scripting
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
/** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder. /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
@param classifier The character classifier with built in feature extractor. @param classifier The character classifier with built in feature extractor.
@ -252,6 +265,15 @@ public:
// cols == rows == vocabulari.size() // cols == rows == vocabulari.size()
decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
const String& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
protected: protected:
Ptr<OCRHMMDecoder::ClassifierCallback> classifier; Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the
using a KNN model trained with synthetic data of rendered characters with different standard font using a KNN model trained with synthetic data of rendered characters with different standard font
types. types.
*/ */
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object. /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
at each window location. at each window location.
*/ */
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename); CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
//! @} //! @}
@ -299,9 +322,11 @@ CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const
* @note * @note
* - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) : * - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
* <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml> * <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
* */ **/
CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table); CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
/* OCR BeamSearch Decoder */ /* OCR BeamSearch Decoder */
@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto
be found at the demo sample: be found at the demo sample:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp> <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
*/ */
class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
{ {
public: public:
@ -325,7 +350,7 @@ public:
loadOCRBeamSearchClassifierCNN with all its parameters provided in loadOCRBeamSearchClassifierCNN with all its parameters provided in
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>. <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
*/ */
class CV_EXPORTS ClassifierCallback class CV_EXPORTS_W ClassifierCallback
{ {
public: public:
virtual ~ClassifierCallback() { } virtual ~ClassifierCallback() { }
@ -350,7 +375,7 @@ public:
provides also the Rects for individual text elements found (e.g. words), and the list of those provides also the Rects for individual text elements found (e.g. words), and the list of those
text elements with their confidence values. text elements with their confidence values.
@param image Input image CV_8UC1 with a single text line (or word). @param image Input binary image CV_8UC1 with a single text line (or word).
@param output_text Output text. Most likely character sequence found by the HMM decoder. @param output_text Output text. Most likely character sequence found by the HMM decoder.
@ -373,6 +398,11 @@ public:
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0); int component_level=0);
// aliases for scripting
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
/** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder. /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
@param classifier The character classifier with built in feature extractor. @param classifier The character classifier with built in feature extractor.
@ -401,6 +431,16 @@ public:
decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500); // Size of the beam in Beam Search algorithm int beam_size = 500); // Size of the beam in Beam Search algorithm
CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
const String& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500); // Size of the beam in Beam Search algorithm
protected: protected:
Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier; Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
at each window location. at each window location.
*/ */
CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
//! @} //! @}

@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto
component_confidences->clear(); component_confidences->clear();
} }
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
Mat mask_m = mask.getMat();
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation) void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
{ {
@ -460,6 +499,16 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size); return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
} }
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
const String& _vocabulary,
InputArray transition_p,
InputArray emission_p,
int _mode,
int _beam_size)
{
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
}
class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
{ {
@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim
return dec_max_idx; return dec_max_idx;
} }
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename)
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
{ {
return makePtr<OCRBeamSearchClassifierCNN>(filename); return makePtr<OCRBeamSearchClassifierCNN>(std::string(filename));
} }
} }

@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
component_confidences->clear(); component_confidences->clear();
} }
CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
Mat mask_m = mask.getMat();
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence) void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
{ {
CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 )); CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
@ -635,6 +675,16 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
} }
Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> _classifier,
const String& _vocabulary,
InputArray transition_p,
InputArray emission_p,
int _mode)
{
return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
}
class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
{ {
public: public:
@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
} }
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename) Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)
{ {
return makePtr<OCRHMMClassifierKNN>(filename); return makePtr<OCRHMMClassifierKNN>(std::string(filename));
} }
class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
{ {
public: public:
@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
} }
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename) Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename)
{ {
return makePtr<OCRHMMClassifierCNN>(filename); return makePtr<OCRHMMClassifierCNN>(std::string(filename));
} }
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, O
return; return;
} }
Mat createOCRHMMTransitionsTable(const String& vocabulary, vector<cv::String>& lexicon)
{
std::string voc(vocabulary);
vector<string> lex;
for(vector<cv::String>::iterator l = lexicon.begin(); l != lexicon.end(); l++)
lex.push_back(std::string(*l));
Mat _transitions;
createOCRHMMTransitionsTable(voc, lex, _transitions);
return _transitions;
}
} }
} }

@ -86,6 +86,47 @@ void OCRTesseract::run(Mat& image, Mat& mask, string& output_text, vector<Rect>*
component_confidences->clear(); component_confidences->clear();
} }
CV_WRAP String OCRTesseract::run(InputArray image, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
// cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
CV_WRAP String OCRTesseract::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
Mat mask_m = mask.getMat();
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
class OCRTesseractImpl : public OCRTesseract class OCRTesseractImpl : public OCRTesseract
{ {
private: private:
@ -215,13 +256,20 @@ public:
run( mask, output, component_rects, component_texts, component_confidences, component_level); run( mask, output, component_rects, component_texts, component_confidences, component_level);
} }
void setWhiteList(const String& char_whitelist)
{
#ifdef HAVE_TESSERACT
tess.SetVariable("tessedit_char_whitelist", char_whitelist.c_str());
#else
(void)char_whitelist;
#endif
}
}; };
Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language, Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language,
const char* char_whitelist, int oem, int psmode) const char* char_whitelist, int oem, int psmode)
{ {
return makePtr<OCRTesseractImpl>(datapath,language,char_whitelist,oem,psmode); return makePtr<OCRTesseractImpl>(datapath, language, char_whitelist, oem, psmode);
} }

Loading…
Cancel
Save