Merge pull request #298 from lluisgomez:master

pull/320/head
Vadim Pisarevsky 9 years ago
commit f9d42886f1
  1. 15
      modules/text/include/opencv2/text/ocr.hpp
  2. 38
      modules/text/samples/cropped_word_recognition.cpp
  3. 62
      modules/text/src/ocr_hmm_decoder.cpp

@ -240,6 +240,21 @@ types.
*/
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@param vocabulary The language vocabulary (chars when ascii english text).
@param lexicon The list of words that are expected to be found in a particular image.
@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
@note
- (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
*/
CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
/* OCR BeamSearch Decoder */

@ -1,11 +1,10 @@
/*
* textdetection.cpp
* cropped_word_recognition.cpp
*
* A demo program of End-to-end Scene Text Detection and Recognition:
* Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:
* Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
* A demo program of text recognition in a given cropped word.
* Shows the use of the OCRBeamSearchDecoder class API using the provided default classifier.
*
* Created on: Jul 31, 2014
* Created on: Jul 9, 2015
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
*/
@ -20,9 +19,9 @@ using namespace std;
using namespace cv;
using namespace cv::text;
//Perform text recognition in a given cropped word
int main(int argc, char* argv[])
{
cout << endl << argv[0] << endl << endl;
cout << "A demo program of Scene Text cropped word Recognition: " << endl;
cout << "Shows the use of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in:" << endl;
@ -37,19 +36,34 @@ int main(int argc, char* argv[])
return(0);
}
string vocabulary = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789"; // must have the same order as the clasifier output classes
vector<string> lexicon; // a list of words expected to be found on the input image
lexicon.push_back(string("abb"));
lexicon.push_back(string("patata"));
lexicon.push_back(string("CHINA"));
lexicon.push_back(string("HERE"));
lexicon.push_back(string("President"));
lexicon.push_back(string("smash"));
lexicon.push_back(string("KUALA"));
lexicon.push_back(string("NINTENDO"));
// Create tailored language model a small given lexicon
Mat transition_p;
string filename = "OCRHMM_transitions_table.xml"; // TODO this table was done with a different vocabulary order?
// TODO add a new function in ocr.cpp to create transition tab
// for a given lexicon
createOCRHMMTransitionsTable(vocabulary,lexicon,transition_p);
// An alternative would be to load the default generic language model
// (created from ispell 42869 english words list)
/*Mat transition_p;
string filename = "OCRHMM_transitions_table.xml"; // TODO use same order for voc
FileStorage fs(filename, FileStorage::READ);
fs["transition_probabilities"] >> transition_p;
fs.release();
fs.release();*/
Mat emission_p = Mat::eye(62,62,CV_64FC1);
string voc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789";
Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create(
loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
voc, transition_p, emission_p);
vocabulary, transition_p, emission_p);
double t_r = (double)getTickCount();
string output;

@ -113,7 +113,7 @@ public:
vector<float>* component_confidences,
int component_level)
{
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
CV_Assert( (image.cols > 0) && (image.rows > 0) );
CV_Assert( component_level == OCR_LEVEL_WORD );
@ -159,14 +159,14 @@ public:
{
if (vector_w.at<float>(0,s) == 0)
s_init = s+1;
else
else
break;
}
for (int s=vector_w.cols-1; s>=0; s--)
{
if (vector_w.at<float>(0,s) == 0)
s_end = s;
else
else
break;
}
@ -598,5 +598,61 @@ Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string&
return makePtr<OCRHMMClassifierKNN>(filename);
}
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@param vocabulary The language vocabulary (chars when ascii english text).
@param lexicon The list of words that are expected to be found in a particular image.
@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
The function calculate frequency statistics of character pairs from the given lexicon and fills
the output transition_probabilities_table with them.
The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
@note
- (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
*/
void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, OutputArray _transitions)
{
CV_Assert( vocabulary.size() > 0 );
CV_Assert( lexicon.size() > 0 );
if ( (_transitions.getMat().cols != (int)vocabulary.size()) ||
(_transitions.getMat().rows != (int)vocabulary.size()) ||
(_transitions.getMat().type() != CV_64F) )
{
_transitions.create((int)vocabulary.size(), (int)vocabulary.size(), CV_64F);
}
Mat transitions = _transitions.getMat();
transitions = Scalar(0);
Mat count_pairs = Mat::zeros(1, (int)vocabulary.size(), CV_64F);
for (size_t w=0; w<lexicon.size(); w++)
{
for (size_t i=0,j=1; i<lexicon[w].size()-1; i++,j++)
{
size_t idx_i = vocabulary.find(lexicon[w][i]);
size_t idx_j = vocabulary.find(lexicon[w][j]);
if ((idx_i == string::npos) || (idx_j == string::npos))
{
CV_Error(Error::StsBadArg, "Found a non-vocabulary char in lexicon!");
}
transitions.at<double>((int)idx_i,(int)idx_j) += 1;
count_pairs.at<double>(0,(int)idx_i) += 1;
}
}
for (int i=0; i<transitions.rows; i++)
{
transitions.row(i) = transitions.row(i) / count_pairs.at<double>(0,i); //normalize
}
return;
}
}
}

Loading…
Cancel
Save