You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
116 lines
4.1 KiB
116 lines
4.1 KiB
/* |
|
* segmented_word_recognition.cpp |
|
* |
|
* A demo program on segmented word recognition. |
|
* Shows the use of the OCRHMMDecoder API with the two provided default character classifiers. |
|
* |
|
* Created on: Jul 31, 2015 |
|
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es> |
|
*/ |
|
|
|
#include "opencv2/text.hpp" |
|
#include "opencv2/core/utility.hpp" |
|
#include "opencv2/highgui.hpp" |
|
#include "opencv2/imgproc.hpp" |
|
|
|
#include <iostream> |
|
|
|
using namespace std; |
|
using namespace cv; |
|
using namespace text; |
|
|
|
|
|
int main(int argc, char* argv[]) { |
|
|
|
const String keys = |
|
"{help h usage ? | | print this message.}" |
|
"{@image | | source image for recognition.}" |
|
"{@mask | | binary segmentation mask where each contour is a character.}" |
|
"{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}" |
|
; |
|
CommandLineParser parser(argc, argv, keys); |
|
|
|
parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n"); |
|
|
|
String filename1 = parser.get<String>(0); |
|
String filename2 = parser.get<String>(1); |
|
|
|
parser.printMessage(); |
|
cout << endl << endl; |
|
if ((parser.has("help")) || (filename1.size()==0)) |
|
{ |
|
return 0; |
|
} |
|
if (!parser.check()) |
|
{ |
|
parser.printErrors(); |
|
return 0; |
|
} |
|
|
|
Mat image = imread(filename1); |
|
Mat mask; |
|
if (filename2.size() > 0) |
|
mask = imread(filename2); |
|
else |
|
image.copyTo(mask); |
|
|
|
// be sure the mask is a binry image |
|
cvtColor(mask, mask, COLOR_BGR2GRAY); |
|
threshold(mask, mask, 128., 255, THRESH_BINARY); |
|
|
|
// character recognition vocabulary |
|
string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; |
|
// Emission probabilities for the HMM language model (identity matrix by default) |
|
Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1); |
|
// Bigram transition probabilities for the HMM language model |
|
Mat transitionProbabilities; |
|
|
|
string lex = parser.get<string>("lex"); |
|
if (lex.size()>0) |
|
{ |
|
// Build tailored language model for the provided lexicon |
|
vector<string> lexicon; |
|
size_t pos = 0; |
|
string delimiter = ","; |
|
std::string token; |
|
while ((pos = lex.find(delimiter)) != std::string::npos) { |
|
token = lex.substr(0, pos); |
|
lexicon.push_back(token); |
|
lex.erase(0, pos + delimiter.length()); |
|
} |
|
lexicon.push_back(lex); |
|
createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities); |
|
} else { |
|
// Or load the generic language model (from Aspell English dictionary) |
|
FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ); |
|
fs["transition_probabilities"] >> transitionProbabilities; |
|
fs.release(); |
|
} |
|
|
|
Ptr<OCRTesseract> ocrTes = OCRTesseract::create(); |
|
|
|
Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create( |
|
loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"), |
|
voc, transitionProbabilities, emissionProbabilities); |
|
|
|
Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create( |
|
loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"), |
|
voc, transitionProbabilities, emissionProbabilities); |
|
|
|
std::string output; |
|
double t_r = (double)getTickCount(); |
|
ocrTes->run(mask, output); |
|
output.erase(remove(output.begin(), output.end(), '\n'), output.end()); |
|
cout << " OCR_Tesseract output \"" << output << "\". Done in " |
|
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
|
|
|
t_r = (double)getTickCount(); |
|
ocrNM->run(mask, output); |
|
cout << " OCR_NM output \"" << output << "\". Done in " |
|
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
|
|
|
t_r = (double)getTickCount(); |
|
ocrCNN->run(image, mask, output); |
|
cout << " OCR_CNN output \"" << output << "\". Done in " |
|
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
|
}
|
|
|