diff --git a/modules/text/README.md b/modules/text/README.md index bbbad11a1..488518a28 100644 --- a/modules/text/README.md +++ b/modules/text/README.md @@ -12,7 +12,7 @@ Here are instructions on how to install Tesseract on your machine (Linux or Mac; Tesseract installation instruction (Linux, Mac) ----------------------------------------------- -0. Linux users may try to install tesseract-3.03-rc1 (or later) and leptonica-1.70 (or later) with the corresponding developement packages using their package manager. Mac users may try brew. The instructions below are for those who wants to build tesseract from source. +0. Linux users may try to install tesseract-3.03-rc1 (or later) and leptonica-1.70 (or later) with the corresponding development packages using their package manager. Mac users may try brew. The instructions below are for those who wants to build tesseract from source. 1. download leptonica 1.70 tarball (helper image processing library, used by tesseract. Later versions might work too): http://www.leptonica.com/download.html @@ -33,9 +33,9 @@ mkdir build && cd build ../configure --with-extra-includes=/usr/local --with-extra-libraries=/usr/local make && sudo make install -tessract will be installed to /usr/local. +Tesseract will be installed to /usr/local. -3. download the pre-trained classifier data for english language: +3. download the pre-trained classifier data for English language: https://code.google.com/p/tesseract-ocr/downloads/detail?name=eng.traineddata.gz unzip it (gzip -d eng.traineddata.gz) and copy to /usr/local/share/tessdata. diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp index 945194a16..6bcaa0198 100644 --- a/modules/text/include/opencv2/text.hpp +++ b/modules/text/include/opencv2/text.hpp @@ -66,12 +66,12 @@ hierarchy by their inclusion relation: ![image](pics/component_tree.png) -The component tree may conatain a huge number of regions even for a very simple image as shown in +The component tree may contain a huge number of regions even for a very simple image as shown in the previous image. This number can easily reach the order of 1 x 10\^6 regions for an average 1 Megapixel image. In order to efficiently select suitable regions among all the ERs the algorithm make use of a sequential classifier with two differentiated stages. -In the first stage incrementally computable descriptors (area, perimeter, bounding box, and euler +In the first stage incrementally computable descriptors (area, perimeter, bounding box, and Euler's number) are computed (in O(1)) for each region r and used as features for a classifier which estimates the class-conditional probability p(r|character). Only the ERs which correspond to local maximum of the probability p(r|character) are selected (if their probability is above a global limit diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp index 7d7717335..0f903bc13 100644 --- a/modules/text/include/opencv2/text/erfilter.hpp +++ b/modules/text/include/opencv2/text/erfilter.hpp @@ -78,7 +78,7 @@ public: //! incrementally computable features int area; int perimeter; - int euler; //!< euler number + int euler; //!< Euler's number Rect rect; double raw_moments[2]; //!< order 1 raw moments to derive the centroid double central_moments[3]; //!< order 2 central moments to construct the covariance matrix @@ -105,7 +105,7 @@ public: ERStat* next; ERStat* prev; - //! wenever the regions is a local maxima of the probability + //! whenever the regions is a local maxima of the probability bool local_maxima; ERStat* max_probability_ancestor; ERStat* min_probability_ancestor; @@ -317,7 +317,7 @@ enum erGrouping_Modes { @param channels Vector of single channel images CV_8UC1 from wich the regions were extracted. -@param regions Vector of ER's retreived from the ERFilter algorithm from each channel. +@param regions Vector of ER's retrieved from the ERFilter algorithm from each channel. @param groups The output of the algorithm is stored in this parameter as set of lists of indexes to provided regions. @@ -353,7 +353,7 @@ CV_EXPORTS_W void erGrouping(InputArray image, InputArray channel, @param image Source image CV_8UC1 from which the MSERs where extracted. -@param contours Intput vector with all the contours (vector\). +@param contours Input vector with all the contours (vector\). @param regions Output where the ERStat regions are stored. diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index 67992306d..e151c15ee 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -153,7 +153,7 @@ public: @param language an ISO 639-3 code or NULL will default to "eng". @param char_whitelist specifies the list of characters used for recognition. NULL defaults to "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ". - @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by deffault + @param oem tesseract-ocr offers different OCR Engine Modes (OEM), by default tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible values. @param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO @@ -195,7 +195,7 @@ public: This way it hides the feature extractor and the classifier itself, so developers can write their own OCR code. - The default character classifier and feature extractor can be loaded using the utility funtion + The default character classifier and feature extractor can be loaded using the utility function loadOCRHMMClassifierNM and KNN model provided in . */ @@ -289,21 +289,21 @@ public: (). */ static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor - const std::string& vocabulary, // The language vocabulary (chars when ascii english text) + const std::string& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes InputArray transition_probabilities_table, // Table with transition probabilities between character pairs - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() InputArray emission_probabilities_table, // Table with observation emission probabilities - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) CV_WRAP static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor - const String& vocabulary, // The language vocabulary (chars when ascii english text) + const String& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes InputArray transition_probabilities_table, // Table with transition probabilities between character pairs - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() InputArray emission_probabilities_table, // Table with observation emission probabilities - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment) /** @brief Creates an instance of the OCRHMMDecoder class. Loads and initializes HMMDecoder from the specified path @@ -312,12 +312,12 @@ public: */ CV_WRAP static Ptr create(const String& filename, - const String& vocabulary, // The language vocabulary (chars when ascii english text) + const String& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes InputArray transition_probabilities_table, // Table with transition probabilities between character pairs - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() InputArray emission_probabilities_table, // Table with observation emission probabilities - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int classifier = OCR_KNN_CLASSIFIER); // The character classifier type @@ -371,7 +371,7 @@ CV_EXPORTS_W Ptr loadOCRHMMClassifier(const S /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon). * - * @param vocabulary The language vocabulary (chars when ascii english text). + * @param vocabulary The language vocabulary (chars when ASCII English text). * * @param lexicon The list of words that are expected to be found in a particular image. * @@ -466,7 +466,7 @@ public: @param classifier The character classifier with built in feature extractor. - @param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size() + @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size() must be equal to the number of classes of the classifier. @param transition_probabilities_table Table with transition probabilities between character @@ -481,22 +481,22 @@ public: @param beam_size Size of the beam in Beam Search algorithm. */ static Ptr create(const Ptr classifier,// The character classifier with built in feature extractor - const std::string& vocabulary, // The language vocabulary (chars when ascii english text) + const std::string& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes InputArray transition_probabilities_table, // Table with transition probabilities between character pairs - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() InputArray emission_probabilities_table, // Table with observation emission probabilities - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); // Size of the beam in Beam Search algorithm CV_WRAP static Ptr create(const Ptr classifier, // The character classifier with built in feature extractor - const String& vocabulary, // The language vocabulary (chars when ascii english text) + const String& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes InputArray transition_probabilities_table, // Table with transition probabilities between character pairs - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() InputArray emission_probabilities_table, // Table with observation emission probabilities - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); // Size of the beam in Beam Search algorithm @@ -506,12 +506,12 @@ public: */ CV_WRAP static Ptr create(const String& filename, // The character classifier file - const String& vocabulary, // The language vocabulary (chars when ascii english text) + const String& vocabulary, // The language vocabulary (chars when ASCII English text) // size() must be equal to the number of classes InputArray transition_probabilities_table, // Table with transition probabilities between character pairs - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() InputArray emission_probabilities_table, // Table with observation emission probabilities - // cols == rows == vocabulari.size() + // cols == rows == vocabulary.size() int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); protected: diff --git a/modules/text/samples/character_recognition.cpp b/modules/text/samples/character_recognition.cpp index 019c71c8f..6fcb29be4 100644 --- a/modules/text/samples/character_recognition.cpp +++ b/modules/text/samples/character_recognition.cpp @@ -37,7 +37,7 @@ int main(int argc, char* argv[]) return(0); } - string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the clasifier output classes + string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the classifier output classes Ptr ocr = loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"); diff --git a/modules/text/samples/cropped_word_recognition.cpp b/modules/text/samples/cropped_word_recognition.cpp index 32e3570e5..e33b24d22 100644 --- a/modules/text/samples/cropped_word_recognition.cpp +++ b/modules/text/samples/cropped_word_recognition.cpp @@ -36,7 +36,7 @@ int main(int argc, char* argv[]) return(0); } - string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the clasifier output classes + string vocabulary = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; // must have the same order as the classifier output classes vector lexicon; // a list of words expected to be found on the input image lexicon.push_back(string("abb")); lexicon.push_back(string("riser")); @@ -53,7 +53,7 @@ int main(int argc, char* argv[]) createOCRHMMTransitionsTable(vocabulary,lexicon,transition_p); // An alternative would be to load the default generic language model - // (created from ispell 42869 english words list) + // (created from ispell 42869 English words list) /*Mat transition_p; string filename = "OCRHMM_transitions_table.xml"; FileStorage fs(filename, FileStorage::READ); diff --git a/modules/text/samples/end_to_end_recognition.cpp b/modules/text/samples/end_to_end_recognition.cpp index 0117f801c..1b079067c 100644 --- a/modules/text/samples/end_to_end_recognition.cpp +++ b/modules/text/samples/end_to_end_recognition.cpp @@ -20,7 +20,7 @@ using namespace std; using namespace cv; using namespace cv::text; -//Calculate edit distance netween two words +//Calculate edit distance between two words size_t edit_distance(const string& A, const string& B); size_t min(size_t x, size_t y, size_t z); bool isRepetitive(const string& s); @@ -164,7 +164,7 @@ int main(int argc, char* argv[]) cout << "TIME_OCR = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl; - /* Recognition evaluation with (approximate) hungarian matching and edit distances */ + /* Recognition evaluation with (approximate) Hungarian matching and edit distances */ if(argc>2) { diff --git a/modules/text/samples/segmented_word_recognition.cpp b/modules/text/samples/segmented_word_recognition.cpp index d3b50d24f..18b102642 100644 --- a/modules/text/samples/segmented_word_recognition.cpp +++ b/modules/text/samples/segmented_word_recognition.cpp @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) { else image.copyTo(mask); - // be sure the mask is a binry image + // be sure the mask is a binary image cvtColor(mask, mask, COLOR_BGR2GRAY); threshold(mask, mask, 128., 255, THRESH_BINARY); diff --git a/modules/text/samples/webcam_demo.cpp b/modules/text/samples/webcam_demo.cpp index 6f8d95ea7..6071b3c69 100644 --- a/modules/text/samples/webcam_demo.cpp +++ b/modules/text/samples/webcam_demo.cpp @@ -290,7 +290,7 @@ int main(int argc, char* argv[]) { outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end()); - //cout << "OCR output = \"" << outputs[i] << "\" lenght = " << outputs[i].size() << endl; + //cout << "OCR output = \"" << outputs[i] << "\" length = " << outputs[i].size() << endl; if (outputs[i].size() < 3) continue; diff --git a/modules/text/src/erfilter.cpp b/modules/text/src/erfilter.cpp index af3348c9e..8bb14de48 100644 --- a/modules/text/src/erfilter.cpp +++ b/modules/text/src/erfilter.cpp @@ -107,7 +107,7 @@ ERStat::ERStat(int init_level, int init_pixel, int init_x, int init_y) : pixel(i // derivative classes -// the classe implementing the interface for the 1st and 2nd stages of Neumann and Matas algorithm +// the classes implementing the interface for the 1st and 2nd stages of Neumann and Matas algorithm class CV_EXPORTS ERFilterNM : public ERFilter { public: @@ -277,7 +277,7 @@ void ERFilterNM::er_tree_extract( InputArray image ) // the component stack vector er_stack; - // the quads for euler number calculation + // the quads for Euler's number calculation // quads[2][2] and quads[2][3] are never used. // The four lowest bits in each quads[i][j] correspond to the 2x2 binary patterns // Q_1, Q_2, Q_3 in the Neumann and Matas CVPR 2012 paper @@ -336,7 +336,7 @@ void ERFilterNM::er_tree_extract( InputArray image ) default: if (y > 0) neighbour_pixel = current_pixel - width; break; } - // if neighbour is not accessible, mark it accessible and retreive its grey-level value + // if neighbour is not accessible, mark it accessible and retrieve its grey-level value if ( !accessible_pixel_mask[neighbour_pixel] && (neighbour_pixel != current_pixel) ) { @@ -377,14 +377,14 @@ void ERFilterNM::er_tree_extract( InputArray image ) } } - } // else neigbor was already accessible + } // else neighbour was already accessible if (push_new_component) continue; // once here we can add the current pixel to the component at the top of the stack // but first we find how many of its neighbours are part of the region boundary (needed for - // perimeter and crossings calc.) and the increment in quads counts for euler number calc. + // perimeter and crossings calc.) and the increment in quads counts for Euler's number calc. int non_boundary_neighbours = 0; int non_boundary_neighbours_horiz = 0; @@ -801,7 +801,7 @@ ERStat* ERFilterNM::er_tree_filter ( InputArray image, ERStat * stat, ERStat *pa vector contour_poly; vector hierarchy; findContours( region, contours, hierarchy, RETR_TREE, CHAIN_APPROX_NONE, Point(0, 0) ); - //TODO check epsilon parameter of approxPolyDP (set empirically) : we want more precission + //TODO check epsilon parameter of approxPolyDP (set empirically) : we want more precision // if the region is very small because otherwise we'll loose all the convexities approxPolyDP( Mat(contours[0]), contour_poly, (float)min(rect.width,rect.height)/17, true ); @@ -1089,9 +1089,9 @@ double ERClassifierNM2::eval(const ERStat& stat) default classifier can be implicitly load with function loadClassifierNM1() from file in samples/cpp/trained_classifierNM1.xml \param thresholdDelta Threshold step in subsequent thresholds when extracting the component tree - \param minArea The minimum area (% of image size) allowed for retreived ER's - \param minArea The maximum area (% of image size) allowed for retreived ER's - \param minProbability The minimum probability P(er|character) allowed for retreived ER's + \param minArea The minimum area (% of image size) allowed for retrieved ER's + \param minArea The maximum area (% of image size) allowed for retrieved ER's + \param minProbability The minimum probability P(er|character) allowed for retrieved ER's \param nonMaxSuppression Whenever non-maximum suppression is done over the branch probabilities \param minProbability The minimum probability difference between local maxima and local minima ERs */ @@ -1222,12 +1222,12 @@ void get_gradient_magnitude(Mat& _grey_img, Mat& _gradient_magnitude) /*! - Compute the diferent channels to be processed independently in the N&M algorithm + Compute the different channels to be processed independently in the N&M algorithm Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 In N&M algorithm, the combination of intensity (I), hue (H), saturation (S), and gradient - magnitude channels (Grad) are used in order to obatin high localization recall. - This implementation also the alternative combination of red (R), grren (G), blue (B), + magnitude channels (Grad) are used in order to obtain high localization recall. + This implementation also the alternative combination of red (R), green (G), blue (B), lightness (L), and gradient magnitude (Grad). \param _src Source image. Must be RGB CV_8UC3. @@ -1965,7 +1965,7 @@ public: static void generate_dendrogram(double * const Z, cluster_result & Z2, const int_fast32_t N) { // The array "nodes" is a union-find data structure for the cluster - // identites (only needed for unsorted cluster_result input). + // identities (only needed for unsorted cluster_result input). union_find nodes; stable_sort(Z2[0], Z2[N-1]); nodes.init(N); @@ -2196,11 +2196,11 @@ struct HCluster{ vector elements; // elements (contour ID) int nfa; // the number of false alarms for this merge float dist; // distance of the merge - float dist_ext; // distamce where this merge will merge with another + float dist_ext; // distance where this merge will merge with another long double volume; // volume of the bounding sphere (or bounding box) long double volume_ext; // volume of the sphere(or box) + envolvent empty space vector > points; // nD points in this cluster - bool max_meaningful; // is this merge max meaningul ? + bool max_meaningful; // is this merge max meaningful ? vector max_in_branch; // otherwise which merges are the max_meaningful in this branch int min_nfa_in_branch; // min nfa detected within the chilhood int node1; @@ -2285,7 +2285,7 @@ void MaxMeaningfulClustering::build_merge_info(double *Z, double *X, int N, int vector< vector > *meaningful_clusters) { - // walk the whole dendogram + // walk the whole dendrogram for (int i=0; i<(N-1)*4; i=i+4) { HCluster cluster; @@ -2964,7 +2964,7 @@ static float extract_features(Mat &grey, Mat& channel, vector ®ions, f.convex_hull_ratio = (float)contourArea(hull)/contourArea(contours0[0]); vector cx; vector hull_idx; - //TODO check epsilon parameter of approxPolyDP (set empirically) : we want more precission + //TODO check epsilon parameter of approxPolyDP (set empirically) : we want more precision // if the region is very small because otherwise we'll loose all the convexities approxPolyDP( Mat(contours0[0]), contours0[0], (float)min(rrect.size.width,rrect.size.height)/17, true ); convexHull(contours0[0],hull_idx,false,false); @@ -3007,7 +3007,7 @@ static float extract_features(Mat &grey, Mat& channel, vector ®ions, \param _image Original RGB image from wich the regions were extracted. \param _src Vector of sinle channel images CV_8UC1 from wich the regions were extracted. - \param regions Vector of ER's retreived from the ERFilter algorithm from each channel + \param regions Vector of ER's retrieved from the ERFilter algorithm from each channel \param groups The output of the algorithm are stored in this parameter as list of indexes to provided regions. \param text_boxes The output of the algorithm are stored in this parameter as list of rectangles. \param filename The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml) @@ -3158,7 +3158,7 @@ struct line_estimates }; // distanceLinesEstimates -// Calculates the distance between two line estimates defined as the largest +// Calculates the distance between two line estimates defined as the largest // normalized vertical difference of their top/bottom lines at their boundary points // out float distance float distanceLinesEstimates(line_estimates &a, line_estimates &b); @@ -3328,7 +3328,7 @@ void fitLineOLS(Point p1, Point p2, Point p3, float &a0, float &a1) a1=(float)(3*sumxy-sumx*sumy) / (3*sumx2-sumx*sumx); } -// Fit line from three points using (heutistic) Least-Median of Squares +// Fit line from three points using (heuristic) Least-Median of Squares // out a0 is the intercept // out a1 is the slope // returns the error of the single point that doesn't fit the line @@ -3339,7 +3339,7 @@ float fitLineLMS(Point p1, Point p2, Point p3, float &a0, float &a1) a1 = 0; //Least-Median of Squares does not make sense with only three points - //becuse any line passing by two of them has median_error = 0 + //because any line passing by two of them has median_error = 0 //So we'll take the one with smaller slope float l_a0, l_a1, best_slope=FLT_MAX, err=0; @@ -3730,7 +3730,7 @@ bool sort_couples (Vec3i i,Vec3i j) { return (i[0] er_filter = createERFilterNM1(loadDummyClassifier(),1,0.005f,0.3f,0.f,false); for (int i=0; i<(int)valid_sequences.size(); i++) { @@ -4172,7 +4172,7 @@ void MSERsToERStats(InputArray image, vector > &contours, vector& er_filter1, const Ptr& er_filter2, CV_OUT vector< vector >& regions) { // assert correct image type diff --git a/modules/text/src/ocr_beamsearch_decoder.cpp b/modules/text/src/ocr_beamsearch_decoder.cpp index 70da85417..b746e4b39 100644 --- a/modules/text/src/ocr_beamsearch_decoder.cpp +++ b/modules/text/src/ocr_beamsearch_decoder.cpp @@ -144,7 +144,7 @@ struct beamSearch_node { double score; vector segmentation; bool expanded; - // TODO calculating score of its childs would be much faster if we store the last column + // TODO calculating score of its child would be much faster if we store the last column // of their "root" path. }; @@ -231,7 +231,7 @@ public: // TODO if input is a text line (not a word) we may need to split into words here! - // do sliding window classification along a croped word image + // do sliding window classification along a cropped word image classifier->eval(src, recognition_probabilities, oversegmentation); // if the number of oversegmentation points found is less than 2 we can not do nothing!!