Adds example on segmented word recognition. Shows the use of the OCRHMMDecoder with the NM and CNN default classifiers.

pull/321/head
Lluis Gomez-Bigorda 10 years ago
parent ee677a255b
commit 2538bf74a6
  1. BIN
      modules/text/samples/scenetext_segmented_word01.jpg
  2. BIN
      modules/text/samples/scenetext_segmented_word01_mask.png
  3. BIN
      modules/text/samples/scenetext_segmented_word02.jpg
  4. BIN
      modules/text/samples/scenetext_segmented_word02_mask.png
  5. BIN
      modules/text/samples/scenetext_segmented_word03.jpg
  6. BIN
      modules/text/samples/scenetext_segmented_word03_mask.png
  7. BIN
      modules/text/samples/scenetext_segmented_word04.jpg
  8. BIN
      modules/text/samples/scenetext_segmented_word04_mask.png
  9. BIN
      modules/text/samples/scenetext_segmented_word05.jpg
  10. BIN
      modules/text/samples/scenetext_segmented_word05_mask.png
  11. 116
      modules/text/samples/segmented_word_recognition.cpp

Binary file not shown.

After

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 528 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 682 B

@ -0,0 +1,116 @@
/*
* segmented_word_recognition.cpp
*
* A demo program on segmented word recognition.
* Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.
*
* Created on: Jul 31, 2015
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
*/
#include "opencv2/text.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
using namespace std;
using namespace cv;
using namespace text;
int main(int argc, char* argv[]) {
const String keys =
"{help h usage ? | | print this message.}"
"{@image | | source image for recognition.}"
"{@mask | | binary segmentation mask where each contour is a character.}"
"{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}"
;
CommandLineParser parser(argc, argv, keys);
parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n");
String filename1 = parser.get<String>(0);
String filename2 = parser.get<String>(1);
parser.printMessage();
cout << endl << endl;
if ((parser.has("help")) || (filename1.size()==0))
{
return 0;
}
if (!parser.check())
{
parser.printErrors();
return 0;
}
Mat image = imread(filename1);
Mat mask;
if (filename2.size() > 0)
mask = imread(filename2);
else
image.copyTo(mask);
// be sure the mask is a binry image
cvtColor(mask, mask, COLOR_BGR2GRAY);
threshold(mask, mask, 128., 255, THRESH_BINARY);
// character recognition vocabulary
string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
// Emission probabilities for the HMM language model (identity matrix by default)
Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1);
// Bigram transition probabilities for the HMM language model
Mat transitionProbabilities;
string lex = parser.get<string>("lex");
if (lex.size()>0)
{
// Build tailored language model for the provided lexicon
vector<string> lexicon;
size_t pos = 0;
string delimiter = ",";
std::string token;
while ((pos = lex.find(delimiter)) != std::string::npos) {
token = lex.substr(0, pos);
lexicon.push_back(token);
lex.erase(0, pos + delimiter.length());
}
lexicon.push_back(lex);
createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities);
} else {
// Or load the generic language model (from Aspell English dictionary)
FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ);
fs["transition_probabilities"] >> transitionProbabilities;
fs.release();
}
Ptr<OCRTesseract> ocrTes = OCRTesseract::create();
Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create(
loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"),
voc, transitionProbabilities, emissionProbabilities);
Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create(
loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
voc, transitionProbabilities, emissionProbabilities);
std::string output;
double t_r = getTickCount();
ocrTes->run(mask, output);
output.erase(remove(output.begin(), output.end(), '\n'), output.end());
cout << " OCR_Tesseract output \"" << output << "\". Done in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
t_r = getTickCount();
ocrNM->run(mask, output);
cout << " OCR_NM output \"" << output << "\". Done in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
t_r = getTickCount();
ocrCNN->run(image, mask, output);
cout << " OCR_CNN output \"" << output << "\". Done in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
}
Loading…
Cancel
Save