text python bindings

pull/416/head
previ 9 years ago
parent c8053da4f2
commit fe05681627
  1. 67
      modules/text/include/opencv2/text/ocr.hpp
  2. 54
      modules/text/src/ocr_beamsearch_decoder.cpp
  3. 72
      modules/text/src/ocr_hmm_decoder.cpp
  4. 52
      modules/text/src/ocr_tesseract.cpp

@ -1,4 +1,4 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
/*M//////////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
@ -62,7 +62,7 @@ enum
};
//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
class CV_EXPORTS BaseOCR
class CV_EXPORTS_W BaseOCR
{
public:
virtual ~BaseOCR() {};
@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
found at the webcam_demo:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
*/
class CV_EXPORTS OCRTesseract : public BaseOCR
class CV_EXPORTS_W OCRTesseract : public BaseOCR
{
public:
/** @brief Recognize text using the tesseract-ocr API.
@ -113,6 +113,14 @@ public:
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0);
// aliases for scripting
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
/** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
@param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
@ -127,7 +135,7 @@ public:
(fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
possible values.
*/
static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
const char* char_whitelist=NULL, int oem=3, int psmode=3);
};
@ -146,7 +154,7 @@ enum decoder_mode
be found at the webcam_demo sample:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
*/
class CV_EXPORTS OCRHMMDecoder : public BaseOCR
class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
{
public:
@ -159,7 +167,7 @@ public:
loadOCRHMMClassifierNM and KNN model provided in
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
*/
class CV_EXPORTS ClassifierCallback
class CV_EXPORTS_W ClassifierCallback
{
public:
virtual ~ClassifierCallback() { }
@ -227,6 +235,11 @@ public:
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0);
// aliases for scripting
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
/** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
@param classifier The character classifier with built in feature extractor.
@ -252,6 +265,15 @@ public:
// cols == rows == vocabulari.size()
decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
const String& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
protected:
Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the
using a KNN model trained with synthetic data of rendered characters with different standard font
types.
*/
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
at each window location.
*/
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename);
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
//! @}
@ -299,9 +322,11 @@ CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const
* @note
* - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
* <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
* */
**/
CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
/* OCR BeamSearch Decoder */
@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto
be found at the demo sample:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
*/
class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
{
public:
@ -325,7 +350,7 @@ public:
loadOCRBeamSearchClassifierCNN with all its parameters provided in
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
*/
class CV_EXPORTS ClassifierCallback
class CV_EXPORTS_W ClassifierCallback
{
public:
virtual ~ClassifierCallback() { }
@ -350,7 +375,7 @@ public:
provides also the Rects for individual text elements found (e.g. words), and the list of those
text elements with their confidence values.
@param image Input image CV_8UC1 with a single text line (or word).
@param image Input binary image CV_8UC1 with a single text line (or word).
@param output_text Output text. Most likely character sequence found by the HMM decoder.
@ -373,6 +398,11 @@ public:
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0);
// aliases for scripting
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
/** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
@param classifier The character classifier with built in feature extractor.
@ -401,6 +431,16 @@ public:
decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500); // Size of the beam in Beam Search algorithm
CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
const String& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500); // Size of the beam in Beam Search algorithm
protected:
Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
at each window location.
*/
CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
//! @}

@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto
component_confidences->clear();
}
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
Mat mask_m = mask.getMat();
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
{
@ -460,6 +499,16 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
}
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
const String& _vocabulary,
InputArray transition_p,
InputArray emission_p,
int _mode,
int _beam_size)
{
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
}
class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
{
@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim
return dec_max_idx;
}
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename)
{
return makePtr<OCRBeamSearchClassifierCNN>(filename);
return makePtr<OCRBeamSearchClassifierCNN>(std::string(filename));
}
}

@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
component_confidences->clear();
}
CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
Mat mask_m = mask.getMat();
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
{
CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
@ -635,6 +675,16 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
}
Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> _classifier,
const String& _vocabulary,
InputArray transition_p,
InputArray emission_p,
int _mode)
{
return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
}
class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
{
public:
@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
}
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename)
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)
{
return makePtr<OCRHMMClassifierKNN>(filename);
return makePtr<OCRHMMClassifierKNN>(std::string(filename));
}
class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
{
public:
@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
}
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename)
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename)
{
return makePtr<OCRHMMClassifierCNN>(filename);
return makePtr<OCRHMMClassifierCNN>(std::string(filename));
}
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, O
return;
}
Mat createOCRHMMTransitionsTable(const String& vocabulary, vector<cv::String>& lexicon)
{
std::string voc(vocabulary);
vector<string> lex;
for(vector<cv::String>::iterator l = lexicon.begin(); l != lexicon.end(); l++)
lex.push_back(std::string(*l));
Mat _transitions;
createOCRHMMTransitionsTable(voc, lex, _transitions);
return _transitions;
}
}
}

@ -86,6 +86,47 @@ void OCRTesseract::run(Mat& image, Mat& mask, string& output_text, vector<Rect>*
component_confidences->clear();
}
CV_WRAP String OCRTesseract::run(InputArray image, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
// cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
CV_WRAP String OCRTesseract::run(InputArray image, InputArray mask, int min_confidence, int component_level)
{
std::string output1;
std::string output2;
vector<string> component_texts;
vector<float> component_confidences;
Mat image_m = image.getMat();
Mat mask_m = mask.getMat();
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
for(unsigned int i = 0; i < component_texts.size(); i++)
{
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
if(component_confidences[i] > min_confidence)
{
output2 += component_texts[i];
}
}
return String(output2);
}
class OCRTesseractImpl : public OCRTesseract
{
private:
@ -215,13 +256,20 @@ public:
run( mask, output, component_rects, component_texts, component_confidences, component_level);
}
void setWhiteList(const String& char_whitelist)
{
#ifdef HAVE_TESSERACT
tess.SetVariable("tessedit_char_whitelist", char_whitelist.c_str());
#else
(void)char_whitelist;
#endif
}
};
Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language,
const char* char_whitelist, int oem, int psmode)
{
return makePtr<OCRTesseractImpl>(datapath,language,char_whitelist,oem,psmode);
return makePtr<OCRTesseractImpl>(datapath, language, char_whitelist, oem, psmode);
}

Loading…
Cancel
Save