Adds example on segmented word recognition. Shows the use of the OCRHMMDecoder with the NM and CNN default classifiers.
After Width: | Height: | Size: 155 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 124 KiB |
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 89 KiB |
After Width: | Height: | Size: 1.0 KiB |
After Width: | Height: | Size: 101 KiB |
After Width: | Height: | Size: 528 B |
After Width: | Height: | Size: 57 KiB |
After Width: | Height: | Size: 682 B |
@ -0,0 +1,116 @@ |
||||
/*
|
||||
* segmented_word_recognition.cpp |
||||
* |
||||
* A demo program on segmented word recognition. |
||||
* Shows the use of the OCRHMMDecoder API with the two provided default character classifiers. |
||||
* |
||||
* Created on: Jul 31, 2015 |
||||
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es> |
||||
*/ |
||||
|
||||
#include "opencv2/text.hpp" |
||||
#include "opencv2/core/utility.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
|
||||
#include <iostream> |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace text; |
||||
|
||||
|
||||
int main(int argc, char* argv[]) { |
||||
|
||||
const String keys = |
||||
"{help h usage ? | | print this message.}" |
||||
"{@image | | source image for recognition.}" |
||||
"{@mask | | binary segmentation mask where each contour is a character.}" |
||||
"{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}" |
||||
; |
||||
CommandLineParser parser(argc, argv, keys); |
||||
|
||||
parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n"); |
||||
|
||||
String filename1 = parser.get<String>(0); |
||||
String filename2 = parser.get<String>(1); |
||||
|
||||
parser.printMessage(); |
||||
cout << endl << endl; |
||||
if ((parser.has("help")) || (filename1.size()==0)) |
||||
{ |
||||
return 0; |
||||
} |
||||
if (!parser.check()) |
||||
{ |
||||
parser.printErrors(); |
||||
return 0; |
||||
} |
||||
|
||||
Mat image = imread(filename1); |
||||
Mat mask; |
||||
if (filename2.size() > 0) |
||||
mask = imread(filename2); |
||||
else |
||||
image.copyTo(mask); |
||||
|
||||
// be sure the mask is a binry image
|
||||
cvtColor(mask, mask, COLOR_BGR2GRAY); |
||||
threshold(mask, mask, 128., 255, THRESH_BINARY); |
||||
|
||||
// character recognition vocabulary
|
||||
string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; |
||||
// Emission probabilities for the HMM language model (identity matrix by default)
|
||||
Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1); |
||||
// Bigram transition probabilities for the HMM language model
|
||||
Mat transitionProbabilities; |
||||
|
||||
string lex = parser.get<string>("lex"); |
||||
if (lex.size()>0) |
||||
{ |
||||
// Build tailored language model for the provided lexicon
|
||||
vector<string> lexicon; |
||||
size_t pos = 0; |
||||
string delimiter = ","; |
||||
std::string token; |
||||
while ((pos = lex.find(delimiter)) != std::string::npos) { |
||||
token = lex.substr(0, pos); |
||||
lexicon.push_back(token); |
||||
lex.erase(0, pos + delimiter.length()); |
||||
} |
||||
lexicon.push_back(lex); |
||||
createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities); |
||||
} else { |
||||
// Or load the generic language model (from Aspell English dictionary)
|
||||
FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ); |
||||
fs["transition_probabilities"] >> transitionProbabilities; |
||||
fs.release(); |
||||
} |
||||
|
||||
Ptr<OCRTesseract> ocrTes = OCRTesseract::create(); |
||||
|
||||
Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create( |
||||
loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"), |
||||
voc, transitionProbabilities, emissionProbabilities); |
||||
|
||||
Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create( |
||||
loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"), |
||||
voc, transitionProbabilities, emissionProbabilities); |
||||
|
||||
std::string output; |
||||
double t_r = getTickCount(); |
||||
ocrTes->run(mask, output); |
||||
output.erase(remove(output.begin(), output.end(), '\n'), output.end()); |
||||
cout << " OCR_Tesseract output \"" << output << "\". Done in " |
||||
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||
|
||||
t_r = getTickCount(); |
||||
ocrNM->run(mask, output); |
||||
cout << " OCR_NM output \"" << output << "\". Done in " |
||||
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||
|
||||
t_r = getTickCount(); |
||||
ocrCNN->run(image, mask, output); |
||||
cout << " OCR_CNN output \"" << output << "\". Done in " |
||||
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||
} |