Merge pull request #350 from lluisgomez:ocrbeamsearch_refactor

pull/343/merge
Vadim Pisarevsky 9 years ago
commit e2f9c66671
  1. 5
      modules/text/include/opencv2/text/ocr.hpp
  2. 9
      modules/text/samples/cropped_word_recognition.cpp
  3. BIN
      modules/text/samples/scenetext_word03.jpg
  4. BIN
      modules/text/samples/scenetext_word04.jpg
  5. 324
      modules/text/src/ocr_beamsearch_decoder.cpp

@ -338,6 +338,9 @@ public:
including 0 as start-sequence location. including 0 as start-sequence location.
*/ */
virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation ); virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
int getWindowSize() {return 0;}
int getStepSize() {return 0;}
}; };
public: public:
@ -396,7 +399,7 @@ public:
InputArray emission_probabilities_table, // Table with observation emission probabilities InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size() // cols == rows == vocabulari.size()
decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 50); // Size of the beam in Beam Search algorithm int beam_size = 500); // Size of the beam in Beam Search algorithm
protected: protected:

@ -39,12 +39,13 @@ int main(int argc, char* argv[])
string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the clasifier output classes string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the clasifier output classes
vector<string> lexicon; // a list of words expected to be found on the input image vector<string> lexicon; // a list of words expected to be found on the input image
lexicon.push_back(string("abb")); lexicon.push_back(string("abb"));
lexicon.push_back(string("patata")); lexicon.push_back(string("riser"));
lexicon.push_back(string("CHINA")); lexicon.push_back(string("CHINA"));
lexicon.push_back(string("HERE")); lexicon.push_back(string("HERE"));
lexicon.push_back(string("President")); lexicon.push_back(string("President"));
lexicon.push_back(string("smash")); lexicon.push_back(string("smash"));
lexicon.push_back(string("KUALA")); lexicon.push_back(string("KUALA"));
lexicon.push_back(string("Produkt"));
lexicon.push_back(string("NINTENDO")); lexicon.push_back(string("NINTENDO"));
// Create tailored language model a small given lexicon // Create tailored language model a small given lexicon
@ -54,16 +55,18 @@ int main(int argc, char* argv[])
// An alternative would be to load the default generic language model // An alternative would be to load the default generic language model
// (created from ispell 42869 english words list) // (created from ispell 42869 english words list)
/*Mat transition_p; /*Mat transition_p;
string filename = "OCRHMM_transitions_table.xml"; // TODO use same order for voc string filename = "OCRHMM_transitions_table.xml";
FileStorage fs(filename, FileStorage::READ); FileStorage fs(filename, FileStorage::READ);
fs["transition_probabilities"] >> transition_p; fs["transition_probabilities"] >> transition_p;
fs.release();*/ fs.release();*/
Mat emission_p = Mat::eye(62,62,CV_64FC1); Mat emission_p = Mat::eye(62,62,CV_64FC1);
// Notice we set here a beam size of 50. This is much faster than using the default value (500).
// 50 works well with our tiny lexicon example, but may not with larger dictionaries.
Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create(
loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"), loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
vocabulary, transition_p, emission_p); vocabulary, transition_p, emission_p, OCR_DECODER_VITERBI, 50);
double t_r = (double)getTickCount(); double t_r = (double)getTickCount();
string output; string output;

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

@ -72,13 +72,12 @@ void OCRBeamSearchDecoder::run(Mat& image, string& output_text, vector<Rect>* co
if (component_confidences != NULL) if (component_confidences != NULL)
component_confidences->clear(); component_confidences->clear();
} }
void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>* component_rects, void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>* component_rects,
vector<string>* component_texts, vector<float>* component_confidences, vector<string>* component_texts, vector<float>* component_confidences,
int component_level) int component_level)
{ {
CV_Assert(mask.type() == CV_8UC1);
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) ); CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
CV_Assert( mask.type() == CV_8UC1 );
CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) ); CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
output_text.clear(); output_text.clear();
if (component_rects != NULL) if (component_rects != NULL)
@ -102,11 +101,18 @@ void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< v
oversegmentation.clear(); oversegmentation.clear();
} }
struct beamSearch_node {
double score;
vector<int> segmentation;
bool expanded;
// TODO calculating score of its childs would be much faster if we store the last column
// of their "root" path.
};
bool beam_sort_function ( pair< double,vector<int> > i, pair< double,vector<int> > j ); bool beam_sort_function ( beamSearch_node a, beamSearch_node b );
bool beam_sort_function ( pair< double,vector<int> > i, pair< double,vector<int> > j ) bool beam_sort_function ( beamSearch_node a, beamSearch_node b )
{ {
return (i.first > j.first); return (a.score > b.score);
} }
@ -122,17 +128,43 @@ public:
int _beam_size) int _beam_size)
{ {
classifier = _classifier; classifier = _classifier;
transition_p = transition_probabilities_table.getMat(); step_size = classifier->getStepSize();
win_size = classifier->getWindowSize();
emission_p = emission_probabilities_table.getMat(); emission_p = emission_probabilities_table.getMat();
vocabulary = _vocabulary; vocabulary = _vocabulary;
mode = _mode; mode = _mode;
beam_size = _beam_size; beam_size = _beam_size;
transition_probabilities_table.getMat().copyTo(transition_p);
for (int i=0; i<transition_p.rows; i++)
{
for (int j=0; j<transition_p.cols; j++)
{
if (transition_p.at<double>(i,j) == 0)
transition_p.at<double>(i,j) = -DBL_MAX;
else
transition_p.at<double>(i,j) = log(transition_p.at<double>(i,j));
}
}
} }
~OCRBeamSearchDecoderImpl() ~OCRBeamSearchDecoderImpl()
{ {
} }
void run( Mat& src,
Mat& mask,
string& out_sequence,
vector<Rect>* component_rects,
vector<string>* component_texts,
vector<float>* component_confidences,
int component_level)
{
CV_Assert(mask.type() == CV_8UC1);
//nothing to do with a mask here
run( src, out_sequence, component_rects, component_texts, component_confidences,
component_level);
}
void run( Mat& src, void run( Mat& src,
string& out_sequence, string& out_sequence,
vector<Rect>* component_rects, vector<Rect>* component_rects,
@ -152,20 +184,62 @@ public:
if (component_confidences != NULL) if (component_confidences != NULL)
component_confidences->clear(); component_confidences->clear();
// TODO We must split a line into words or specify we only work with words
if(src.type() == CV_8UC3) if(src.type() == CV_8UC3)
{ {
cvtColor(src,src,COLOR_RGB2GRAY); cvtColor(src,src,COLOR_RGB2GRAY);
} }
vector< vector<double> > recognition_probabilities; // TODO if input is a text line (not a word) we may need to split into words here!
vector<int> oversegmentation;
// do sliding window classification along a croped word image
classifier->eval(src, recognition_probabilities, oversegmentation); classifier->eval(src, recognition_probabilities, oversegmentation);
/*Now we go here with the beam search algorithm to optimize the recognition score*/ // if the number of oversegmentation points found is less than 2 we can not do nothing!!
if (oversegmentation.size() < 2) return;
//NMS of recognitions
double last_best_p = 0;
int last_best_idx = -1;
for (size_t i=0; i<recognition_probabilities.size(); )
{
double best_p = 0;
int best_idx = -1;
for (size_t j=0; j<recognition_probabilities[i].size(); j++)
{
if (recognition_probabilities[i][j] > best_p)
{
best_p = recognition_probabilities[i][j];
best_idx = (int)j;
}
}
if ((i>0) && (best_idx == last_best_idx)
&& (oversegmentation[i]*step_size < oversegmentation[i-1]*step_size + win_size) )
{
if (last_best_p > best_p)
{
//remove i'th elements and do not increment i
recognition_probabilities.erase (recognition_probabilities.begin()+i);
oversegmentation.erase (oversegmentation.begin()+i);
continue;
} else {
//remove (i-1)'th elements and do not increment i
recognition_probabilities.erase (recognition_probabilities.begin()+i-1);
oversegmentation.erase (oversegmentation.begin()+i-1);
last_best_idx = best_idx;
last_best_p = best_p;
continue;
}
}
last_best_idx = best_idx;
last_best_p = best_p;
i++;
}
/*Now we go with the beam search algorithm to optimize the recognition score*/
//convert probabilities to log probabilities //convert probabilities to log probabilities
for (size_t i=0; i<recognition_probabilities.size(); i++) for (size_t i=0; i<recognition_probabilities.size(); i++)
@ -178,170 +252,160 @@ public:
recognition_probabilities[i][j] = log(recognition_probabilities[i][j]); recognition_probabilities[i][j] = log(recognition_probabilities[i][j]);
} }
} }
for (int i=0; i<transition_p.rows; i++)
// initialize the beam with all possible character's pairs
int generated_chids = 0;
for (size_t i=0; i<recognition_probabilities.size()-1; i++)
{ {
for (int j=0; j<transition_p.cols; j++) for (size_t j=i+1; j<recognition_probabilities.size(); j++)
{ {
if (transition_p.at<double>(i,j) == 0)
transition_p.at<double>(i,j) = -DBL_MAX;
else
transition_p.at<double>(i,j) = log(transition_p.at<double>(i,j));
}
}
beamSearch_node node;
node.segmentation.push_back((int)i);
node.segmentation.push_back((int)j);
node.score = score_segmentation(node.segmentation, out_sequence);
vector< vector<int> > childs = generate_childs( node.segmentation );
node.expanded = true;
set<unsigned long long int> visited_nodes; //TODO make it member of class beam.push_back( node );
vector<int> start_segmentation; if (!childs.empty())
start_segmentation.push_back(oversegmentation[0]); update_beam( childs );
start_segmentation.push_back(oversegmentation[oversegmentation.size()-1]);
vector< pair< double,vector<int> > > beam; generated_chids += (int)childs.size();
beam.push_back( pair< double,vector<int> > (score_segmentation(start_segmentation, recognition_probabilities, out_sequence), start_segmentation) );
vector< vector<int> > childs = generate_childs(start_segmentation,oversegmentation, visited_nodes); }
if (!childs.empty()) }
update_beam( beam, childs, recognition_probabilities);
//cout << "beam size " << beam.size() << " best score " << beam[0].first<< endl;
int generated_chids = (int)childs.size();
while (generated_chids != 0) while (generated_chids != 0)
{ {
generated_chids = 0; generated_chids = 0;
vector< pair< double,vector<int> > > old_beam = beam;
for (size_t i=0; i<old_beam.size(); i++) for (size_t i=0; i<beam.size(); i++)
{ {
childs = generate_childs(old_beam[i].second,oversegmentation, visited_nodes); vector< vector<int> > childs;
if (!beam[i].expanded)
{
childs = generate_childs( beam[i].segmentation );
beam[i].expanded = true;
}
if (!childs.empty()) if (!childs.empty())
update_beam( beam, childs, recognition_probabilities); update_beam( childs );
generated_chids += (int)childs.size(); generated_chids += (int)childs.size();
} }
//cout << "beam size " << beam.size() << " best score " << beam[0].first << endl;
} }
// Done! Get the best prediction found into out_sequence
double lp = score_segmentation( beam[0].segmentation, out_sequence );
// FINISHED ! Get the best prediction found into out_sequence // fill other (dummy) output parameters
score_segmentation(beam[0].second, recognition_probabilities, out_sequence); component_rects->push_back(Rect(0,0,src.cols,src.rows));
component_texts->push_back(out_sequence);
component_confidences->push_back((float)exp(lp));
// TODO fill other output parameters
return; return;
} }
void run( Mat& src,
Mat& mask,
string& out_sequence,
vector<Rect>* component_rects,
vector<string>* component_texts,
vector<float>* component_confidences,
int component_level)
{
CV_Assert( mask.type() == CV_8UC1 );
// Nothing to do with a mask here. We do slidding window anyway.
run( src, out_sequence, component_rects, component_texts, component_confidences, component_level );
}
private: private:
int win_size;
int step_size;
//////////////////////////////////////////////////////////// vector< beamSearch_node > beam;
vector< vector<double> > recognition_probabilities;
vector<int> oversegmentation;
// TODO the way we expand nodes makes the recognition score heuristic not monotonic vector< vector<int> > generate_childs( vector<int> &segmentation )
// it should start from left node 0 and grow always to the right.
vector< vector<int> > generate_childs(vector<int> &segmentation, vector<int> &oversegmentation, set<unsigned long long int> &visited_nodes)
{ {
/*cout << " generate childs for [";
for (size_t i = 0 ; i < segmentation .size(); i++)
cout << segmentation[i] << ",";
cout << "] ";*/
vector< vector<int> > childs; vector< vector<int> > childs;
for (size_t i=0; i<oversegmentation.size(); i++) for (size_t i=segmentation[segmentation.size()-1]+1; i<oversegmentation.size(); i++)
{ {
int seg_point = oversegmentation[i]; int seg_point = (int)i;
if (find(segmentation.begin(), segmentation.end(), seg_point) == segmentation.end()) if (find(segmentation.begin(), segmentation.end(), seg_point) == segmentation.end())
{ {
//cout << seg_point << " " ;
vector<int> child = segmentation; vector<int> child = segmentation;
child.push_back(seg_point); child.push_back(seg_point);
sort(child.begin(), child.end()); childs.push_back(child);
unsigned long long int key = 0;
for (size_t j=0; j<child.size(); j++)
{
key += (unsigned long long int)pow(2,oversegmentation.size()-(oversegmentation.end()-find(oversegmentation.begin(), oversegmentation.end(), child[j])));
}
//if (!visited_nodes[key])
if (visited_nodes.find(key) == visited_nodes.end())
{
childs.push_back(child);
//visited_nodes[key] = true;
visited_nodes.insert(key);
}
} }
} }
//cout << endl;
return childs; return childs;
} }
void update_beam ( vector< vector<int> > &childs )
////////////////////////////////////////////////////////////
//TODO shall the beam itself be a member of the class?
void update_beam (vector< pair< double,vector<int> > > &beam, vector< vector<int> > &childs, vector< vector<double> > &recognition_probabilities)
{ {
string out_sequence; string out_sequence;
double min_score = -DBL_MAX; //min score value to be part of the beam double min_score = -DBL_MAX; //min score value to be part of the beam
if ((int)beam.size() == beam_size) if ((int)beam.size() >= beam_size)
min_score = beam[beam.size()-1].first; //last element has the lowest score min_score = beam[beam_size-1].score; //last element has the lowest score
for (size_t i=0; i<childs.size(); i++) for (size_t i=0; i<childs.size(); i++)
{ {
double score = score_segmentation(childs[i], recognition_probabilities, out_sequence); double score = score_segmentation(childs[i], out_sequence);
if (score > min_score) if (score > min_score)
{ {
beam.push_back(pair< double,vector<int> >(score,childs[i])); beamSearch_node node;
node.score = score;
node.segmentation = childs[i];
node.expanded = false;
beam.push_back(node);
sort(beam.begin(),beam.end(),beam_sort_function); sort(beam.begin(),beam.end(),beam_sort_function);
if ((int)beam.size() > beam_size) if ((int)beam.size() > beam_size)
{ {
beam.pop_back(); beam.erase(beam.begin()+beam_size,beam.end());
min_score = beam[beam.size()-1].first; min_score = beam[beam.size()-1].score;
} }
} }
} }
} }
//////////////////////////////////////////////////////////// double score_segmentation( vector<int> &segmentation, string& outstring )
// TODO Add heuristics to the score function (see PhotoOCR paper)
// e.g.: in some cases we discard a segmentation because it includes a very large character
// in other cases we do it because the overlapping between two chars is too large
// etc.
double score_segmentation(vector<int> &segmentation, vector< vector<double> > &observations, string& outstring)
{ {
//TODO This must be extracted from dictionary // Score Heuristics:
// No need to use Viterbi to know a given segmentation is bad
// e.g.: in some cases we discard a segmentation because it includes a very large character
// in other cases we do it because the overlapping between two chars is too large
// TODO Add more heuristics (e.g. penalize large inter-character variance)
Mat interdist ((int)segmentation.size()-1, 1, CV_32F, 1);
for (size_t i=0; i<segmentation.size()-1; i++)
{
interdist.at<float>((int)i,0) = (float)oversegmentation[segmentation[(int)i+1]]*step_size
- (float)oversegmentation[segmentation[(int)i]]*step_size;
if ((float)interdist.at<float>((int)i,0)/win_size > 2.25) // TODO explain how did you set this thrs
{
return -DBL_MAX;
}
if ((float)interdist.at<float>((int)i,0)/win_size < 0.15) // TODO explain how did you set this thrs
{
return -DBL_MAX;
}
}
Scalar m, std;
meanStdDev(interdist, m, std);
//double interdist_std = std[0];
//TODO Extracting start probs from lexicon (if we have it) may boost accuracy!
vector<double> start_p(vocabulary.size()); vector<double> start_p(vocabulary.size());
for (int i=0; i<(int)vocabulary.size(); i++) for (int i=0; i<(int)vocabulary.size(); i++)
start_p[i] = log(1.0/vocabulary.size()); start_p[i] = log(1.0/vocabulary.size());
Mat V = Mat::ones((int)segmentation.size()-1,(int)vocabulary.size(),CV_64FC1); Mat V = Mat::ones((int)segmentation.size(),(int)vocabulary.size(),CV_64FC1);
V = V * -DBL_MAX; V = V * -DBL_MAX;
vector<string> path(vocabulary.size()); vector<string> path(vocabulary.size());
// Initialize base cases (t == 0) // Initialize base cases (t == 0)
for (int i=0; i<(int)vocabulary.size(); i++) for (int i=0; i<(int)vocabulary.size(); i++)
{ {
V.at<double>(0,i) = start_p[i] + observations[segmentation[1]-1][i]; V.at<double>(0,i) = start_p[i] + recognition_probabilities[segmentation[0]][i];
path[i] = vocabulary.at(i); path[i] = vocabulary.at(i);
} }
// Run Viterbi for t > 0 // Run Viterbi for t > 0
for (int t=1; t<(int)segmentation.size()-1; t++) for (int t=1; t<(int)segmentation.size(); t++)
{ {
vector<string> newpath(vocabulary.size()); vector<string> newpath(vocabulary.size());
@ -352,7 +416,7 @@ private:
int best_idx = 0; int best_idx = 0;
for (int j=0; j<(int)vocabulary.size(); j++) for (int j=0; j<(int)vocabulary.size(); j++)
{ {
double prob = V.at<double>(t-1,j) + transition_p.at<double>(j,i) + observations[segmentation[t+1]-1][i]; double prob = V.at<double>(t-1,j) + transition_p.at<double>(j,i) + recognition_probabilities[segmentation[t]][i];
if ( prob > max_prob) if ( prob > max_prob)
{ {
max_prob = prob; max_prob = prob;
@ -372,7 +436,7 @@ private:
int best_idx = 0; int best_idx = 0;
for (int i=0; i<(int)vocabulary.size(); i++) for (int i=0; i<(int)vocabulary.size(); i++)
{ {
double prob = V.at<double>((int)segmentation.size()-2,i); double prob = V.at<double>((int)segmentation.size()-1,i);
if ( prob > max_prob) if ( prob > max_prob)
{ {
max_prob = prob; max_prob = prob;
@ -380,9 +444,8 @@ private:
} }
} }
//cout << " score " << max_prob / (segmentation.size()-1) << " " << path[best_idx] << endl;
outstring = path[best_idx]; outstring = path[best_idx];
return max_prob / (segmentation.size()-1); return (max_prob / (segmentation.size()-1));
} }
}; };
@ -408,21 +471,24 @@ public:
void eval( InputArray src, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation ); void eval( InputArray src, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation );
int getWindowSize() {return window_size;}
int getStepSize() {return step_size;}
void setStepSize(int _step_size) {step_size = _step_size;}
protected: protected:
void normalizeAndZCA(Mat& patches); void normalizeAndZCA(Mat& patches);
double eval_feature(Mat& feature, double* prob_estimates); double eval_feature(Mat& feature, double* prob_estimates);
private: private:
//TODO implement getters/setters for some of these members (if apply) int window_size; // window size
int nr_class; // number of classes int step_size; // sliding window step
int nr_class; // number of classes
int nr_feature; // number of features int nr_feature; // number of features
Mat feature_min; // scale range Mat feature_min; // scale range
Mat feature_max; Mat feature_max;
Mat weights; // Logistic Regression weights Mat weights; // Logistic Regression weights
Mat kernels; // CNN kernels Mat kernels; // CNN kernels
Mat M, P; // ZCA Whitening parameters Mat M, P; // ZCA Whitening parameters
int step_size; // sliding window step
int window_size; // window size
int quad_size; int quad_size;
int patch_size; int patch_size;
int num_quads; // extract 25 quads (12x12) from each image int num_quads; // extract 25 quads (12x12) from each image
@ -449,26 +515,15 @@ OCRBeamSearchClassifierCNN::OCRBeamSearchClassifierCNN (const string& filename)
else else
CV_Error(Error::StsBadArg, "Default classifier data file not found!"); CV_Error(Error::StsBadArg, "Default classifier data file not found!");
// check all matrix dimensions match correctly and no one is empty nr_feature = weights.rows;
CV_Assert( (M.cols > 0) && (M.rows > 0) ); nr_class = weights.cols;
CV_Assert( (P.cols > 0) && (P.rows > 0) );
CV_Assert( (kernels.cols > 0) && (kernels.rows > 0) );
CV_Assert( (weights.cols > 0) && (weights.rows > 0) );
CV_Assert( (feature_min.cols > 0) && (feature_min.rows > 0) );
CV_Assert( (feature_max.cols > 0) && (feature_max.rows > 0) );
nr_feature = weights.rows;
nr_class = weights.cols;
patch_size = (int)sqrt(kernels.cols); patch_size = (int)sqrt(kernels.cols);
// algorithm internal parameters window_size = 4*patch_size;
window_size = 32; step_size = 4;
quad_size = 12; quad_size = 12;
num_quads = 25; num_quads = 25;
num_tiles = 25; num_tiles = 25;
alpha = 0.5; alpha = 0.5; // used in non-linear activation function z = max(0, |D*a| - alpha)
step_size = 4; // TODO showld this be a parameter for the user?
} }
void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation) void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
@ -493,7 +548,6 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >
resize(src,src,Size(window_size*src.cols/src.rows,window_size)); resize(src,src,Size(window_size*src.cols/src.rows,window_size));
int seg_points = 0; int seg_points = 0;
oversegmentation.push_back(seg_points);
Mat quad; Mat quad;
Mat tmp; Mat tmp;
@ -584,19 +638,17 @@ void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >
double *p = new double[nr_class]; double *p = new double[nr_class];
double predict_label = eval_feature(feature,p); double predict_label = eval_feature(feature,p);
//cout << " Prediction: " << vocabulary[predict_label] << " with probability " << p[0] << endl;
if (predict_label < 0)
CV_Error(Error::StsInternal, "OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()");
if ( (predict_label < 0) || (predict_label > nr_class) )
CV_Error(Error::StsOutOfRange, "OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()");
seg_points++;
oversegmentation.push_back(seg_points);
vector<double> recognition_p(p, p+nr_class*sizeof(double));
recognition_probabilities.push_back(recognition_p);
vector<double> recognition_p(p, p+nr_class);
recognition_probabilities.push_back(recognition_p);
oversegmentation.push_back(seg_points);
seg_points++;
} }
} }
// normalize for contrast and apply ZCA whitening to a set of image patches // normalize for contrast and apply ZCA whitening to a set of image patches

Loading…
Cancel
Save