Merge pull request #298 from lluisgomez:master

9 years ago · f9d42886f1
parent b63d80260b 2e787d6b42
commit f9d42886f1
3 changed files with 100 additions and 15 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -240,6 +240,21 @@ types.
 */
 CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);

+/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
+
+@param vocabulary The language vocabulary (chars when ascii english text).
+
+@param lexicon The list of words that are expected to be found in a particular image.
+
+@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+
+The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+@note
+   -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ */
+CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
+

 /* OCR BeamSearch Decoder */

--- a/modules/text/samples/cropped_word_recognition.cpp
+++ b/modules/text/samples/cropped_word_recognition.cpp
@ -1,11 +1,10 @@
 /*
- * textdetection.cpp
+ * cropped_word_recognition.cpp
 *
- * A demo program of End-to-end Scene Text Detection and Recognition:
- * Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:
- * Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
+ * A demo program of text recognition in a given cropped word.
+ * Shows the use of the OCRBeamSearchDecoder class API using the provided default classifier.
 *
- * Created on: Jul 31, 2014
+ * Created on: Jul 9, 2015
 *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
 */

@ -20,9 +19,9 @@ using namespace std;
 using namespace cv;
 using namespace cv::text;

-//Perform text recognition in a given cropped word
 int main(int argc, char* argv[])
 {
+
    cout << endl << argv[0] << endl << endl;
    cout << "A demo program of Scene Text cropped word Recognition: " << endl;
    cout << "Shows the use of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in:" << endl;
@ -37,19 +36,34 @@ int main(int argc, char* argv[])
        return(0);
    }

+    string vocabulary = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789"; // must have the same order as the clasifier output classes
+    vector<string> lexicon;  // a list of words expected to be found on the input image
+    lexicon.push_back(string("abb"));
+    lexicon.push_back(string("patata"));
+    lexicon.push_back(string("CHINA"));
+    lexicon.push_back(string("HERE"));
+    lexicon.push_back(string("President"));
+    lexicon.push_back(string("smash"));
+    lexicon.push_back(string("KUALA"));
+    lexicon.push_back(string("NINTENDO"));
+
+    // Create tailored language model a small given lexicon
    Mat transition_p;
-    string filename = "OCRHMM_transitions_table.xml"; // TODO this table was done with a different vocabulary order?
-                                                      // TODO add a new function in ocr.cpp to create transition tab
-                                                      // for a given lexicon
+    createOCRHMMTransitionsTable(vocabulary,lexicon,transition_p);
+
+    // An alternative would be to load the default generic language model
+    //    (created from ispell 42869 english words list)
+    /*Mat transition_p;
+    string filename = "OCRHMM_transitions_table.xml"; // TODO use same order for voc
    FileStorage fs(filename, FileStorage::READ);
    fs["transition_probabilities"] >> transition_p;
-    fs.release();
+    fs.release();*/
+
    Mat emission_p = Mat::eye(62,62,CV_64FC1);
-    string voc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789";

    Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create(
                loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
-                voc, transition_p, emission_p);
+                vocabulary, transition_p, emission_p);

    double t_r = (double)getTickCount();
    string output;
--- a/modules/text/src/ocr_hmm_decoder.cpp
+++ b/modules/text/src/ocr_hmm_decoder.cpp
@ -113,7 +113,7 @@ public:
              vector<float>* component_confidences,
              int component_level)
    {
-    
+
        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
        CV_Assert( (image.cols > 0) && (image.rows > 0) );
        CV_Assert( component_level == OCR_LEVEL_WORD );
@ -159,14 +159,14 @@ public:
            {
                if (vector_w.at<float>(0,s) == 0)
                   s_init = s+1;
-                else 
+                else
                  break;
            }
            for (int s=vector_w.cols-1; s>=0; s--)
            {
                if (vector_w.at<float>(0,s) == 0)
                   s_end = s;
-                else 
+                else
                  break;
            }

@ -598,5 +598,61 @@ Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string&
    return makePtr<OCRHMMClassifierKNN>(filename);
 }

+/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
+
+@param vocabulary The language vocabulary (chars when ascii english text).
+
+@param lexicon The list of words that are expected to be found in a particular image.
+
+@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+
+The function calculate frequency statistics of character pairs from the given lexicon and fills
+the output transition_probabilities_table with them.
+The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+@note
+   -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ */
+void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, OutputArray _transitions)
+{
+
+
+    CV_Assert( vocabulary.size() > 0 );
+    CV_Assert( lexicon.size() > 0 );
+
+    if ( (_transitions.getMat().cols != (int)vocabulary.size()) ||
+         (_transitions.getMat().rows != (int)vocabulary.size()) ||
+         (_transitions.getMat().type() != CV_64F) )
+    {
+      _transitions.create((int)vocabulary.size(), (int)vocabulary.size(), CV_64F);
+    }
+
+    Mat transitions = _transitions.getMat();
+    transitions = Scalar(0);
+    Mat count_pairs = Mat::zeros(1, (int)vocabulary.size(), CV_64F);
+
+    for (size_t w=0; w<lexicon.size(); w++)
+    {
+      for (size_t i=0,j=1; i<lexicon[w].size()-1; i++,j++)
+      {
+        size_t idx_i = vocabulary.find(lexicon[w][i]);
+        size_t idx_j = vocabulary.find(lexicon[w][j]);
+        if ((idx_i == string::npos) || (idx_j == string::npos))
+        {
+           CV_Error(Error::StsBadArg, "Found a non-vocabulary char in lexicon!");
+        }
+        transitions.at<double>((int)idx_i,(int)idx_j) += 1;
+        count_pairs.at<double>(0,(int)idx_i) += 1;
+      }
+    }
+
+    for (int i=0; i<transitions.rows; i++)
+    {
+      transitions.row(i) = transitions.row(i) / count_pairs.at<double>(0,i); //normalize
+    }
+
+    return;
+}
+
 }
 }