Adds createOCRHMMTransitionsTable() utility function to create a tailored language model transitions table from a given list of words (lexicon)

10 years ago · e4cc9a77a9
parent f9634fd8cd
commit e4cc9a77a9
2 changed files with 74 additions and 3 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -240,6 +240,21 @@ types.
 */
 CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);

+/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
+
+@param vocabulary The language vocabulary (chars when ascii english text).
+
+@param lexicon The list of words that are expected to be found in a particular image.
+
+@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+
+The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them. The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+@note
+   -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ */
+CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
+

 /* OCR BeamSearch Decoder */

--- a/modules/text/src/ocr_hmm_decoder.cpp
+++ b/modules/text/src/ocr_hmm_decoder.cpp
@ -113,7 +113,7 @@ public:
              vector<float>* component_confidences,
              int component_level)
    {
-    
+
        CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
        CV_Assert( (image.cols > 0) && (image.rows > 0) );
        CV_Assert( component_level == OCR_LEVEL_WORD );
@ -159,14 +159,14 @@ public:
            {
                if (vector_w.at<float>(0,s) == 0)
                   s_init = s+1;
-                else 
+                else
                  break;
            }
            for (int s=vector_w.cols-1; s>=0; s--)
            {
                if (vector_w.at<float>(0,s) == 0)
                   s_end = s;
-                else 
+                else
                  break;
            }

@ -596,5 +596,61 @@ Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string&
    return makePtr<OCRHMMClassifierKNN>(filename);
 }

+/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
+
+@param vocabulary The language vocabulary (chars when ascii english text).
+
+@param lexicon The list of words that are expected to be found in a particular image.
+
+@param transition_probabilities_table Output table with transition probabilities between character pairs. cols == rows == vocabulary.size().
+
+The function calculate frequency statistics of character pairs from the given lexicon and fills
+the output transition_probabilities_table with them.
+The transition_probabilities_table can be used as input in the OCRHMMDecoder::create() and OCRBeamSearchDecoder::create() methods.
+@note
+   -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
+        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
+ */
+void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, OutputArray _transitions)
+{
+
+
+    CV_Assert( vocabulary.size() > 0 );
+    CV_Assert( lexicon.size() > 0 );
+
+    if ( (_transitions.getMat().cols != (int)vocabulary.size()) ||
+         (_transitions.getMat().rows != (int)vocabulary.size()) ||
+         (_transitions.getMat().type() != CV_64F) )
+    {
+      _transitions.create((int)vocabulary.size(), (int)vocabulary.size(), CV_64F);
+    }
+
+    Mat transitions = _transitions.getMat();
+    transitions = Scalar(0);
+    Mat count_pairs = Mat::zeros(1, vocabulary.size(), CV_64F);
+
+    for (size_t w=0; w<lexicon.size(); w++)
+    {
+      for (size_t i=0,j=1; i<lexicon[w].size()-1; i++,j++)
+      {
+        size_t idx_i = vocabulary.find(lexicon[w][i]);
+        size_t idx_j = vocabulary.find(lexicon[w][j]);
+        if ((idx_i == string::npos) || (idx_j == string::npos))
+        {
+           CV_Error(Error::StsBadArg, "Found a non-vocabulary char in lexicon!");
+        }
+        transitions.at<double>(idx_i,idx_j) += 1;
+        count_pairs.at<double>(0,idx_i) += 1;
+      }
+    }
+
+    for (int i=0; i<transitions.rows; i++)
+    {
+      transitions.row(i) = transitions.row(i) / count_pairs.at<double>(0,i); //normalize
+    }
+
+    return;
+}
+
 }
 }