diff --git a/modules/text/doc/text.bib b/modules/text/doc/text.bib new file mode 100644 index 000000000..64a8f4a19 --- /dev/null +++ b/modules/text/doc/text.bib @@ -0,0 +1,34 @@ +@inproceedings{Neumann12, + title={Scene Text Localization and Recognition}, + author={Neumann and L., Matas and J.}, + journal={ Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on}, + pages={3538--3545}, + year={2012}, + organization={IEEE} +} +@inproceedings{Neumann11, + author = {Lukáš Neumann and Jiří Matas}, + title = {Text localization in real-world images using efficiently pruned exhaustive search}, + booktitle = {in Document Analysis and Recognition, 2011 International Conference on. IEEE, 2011}, + year = {}, + pages = {687--691} +} +@inproceedings{Gomez13, + author = {G\'{o}mez, Llu\'{\i}s and Karatzas, Dimosthenis}, + title={Multi-script Text Extraction from Natural Scenes}, + booktitle = {Proceedings of the 2013 12th International Conference on Document Analysis and Recognition}, + series = {ICDAR '13}, + year = {2013}, + isbn = {978-0-7695-4999-6}, + pages = {467--471}, + publisher = {IEEE Computer Society} + } +@article{Gomez14, + author = {Lluis Gomez i Bigorda and + Dimosthenis Karatzas}, + title = {A Fast Hierarchical Method for Multi-script and Arbitrary Oriented + Scene Text Extraction}, + journal = {CoRR}, + volume = {abs/1407.7504}, + year = {2014}, +} \ No newline at end of file diff --git a/modules/text/include/opencv2/text.hpp b/modules/text/include/opencv2/text.hpp index 6bcaa0198..cea49c69c 100644 --- a/modules/text/include/opencv2/text.hpp +++ b/modules/text/include/opencv2/text.hpp @@ -54,7 +54,7 @@ Class-specific Extremal Regions for Scene Text Detection -------------------------------------------------------- The scene text detection algorithm described below has been initially proposed by Lukás Neumann & -Jiri Matas [Neumann12]. The main idea behind Class-specific Extremal Regions is similar to the MSER +Jiri Matas @cite Neumann11. The main idea behind Class-specific Extremal Regions is similar to the MSER in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image. However, this technique differs from MSER in that selection of suitable ERs is done by a sequential classifier trained for character detection, i.e. dropping the stability requirement of MSERs and @@ -87,9 +87,9 @@ order to increase the character localization recall. After the ER filtering is done on each input channel, character candidates must be grouped in high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements -two different grouping algorithms: the Exhaustive Search algorithm proposed in [Neumann11] for +two different grouping algorithms: the Exhaustive Search algorithm proposed in @cite Neumann12 for grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas -in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping). +in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping). To see the text detector at work, have a look at the textdetection demo: diff --git a/modules/text/include/opencv2/text/erfilter.hpp b/modules/text/include/opencv2/text/erfilter.hpp index 0f903bc13..c9bac2b32 100644 --- a/modules/text/include/opencv2/text/erfilter.hpp +++ b/modules/text/include/opencv2/text/erfilter.hpp @@ -111,7 +111,7 @@ public: ERStat* min_probability_ancestor; }; -/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm [Neumann12]. : +/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm @cite Neumann12. : Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier. */ @@ -163,31 +163,8 @@ public: }; -/*! - Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm - Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 - - The component tree of the image is extracted by a threshold increased step by step - from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness, - number of holes, and number of horizontal crossings) are computed for each ER - and used as features for a classifier which estimates the class-conditional - probability P(er|character). The value of P(er|character) is tracked using the inclusion - relation of ER across all thresholds and only the ERs which correspond to local maximum - of the probability P(er|character) are selected (if the local maximum of the - probability is above a global limit pmin and the difference between local maximum and - local minimum is greater than minProbabilityDiff). - - @param cb – Callback with the classifier. Default classifier can be implicitly load with function - loadClassifierNM1(), e.g. from file in samples/cpp/trained_classifierNM1.xml - @param thresholdDelta – Threshold step in subsequent thresholds when extracting the component tree - @param minArea – The minimum area (% of image size) allowed for retreived ER’s - @param maxArea – The maximum area (% of image size) allowed for retreived ER’s - @param minProbability – The minimum probability P(er|character) allowed for retreived ER’s - @param nonMaxSuppression – Whenever non-maximum suppression is done over the branch probabilities - @param minProbabilityDiff – The minimum probability difference between local maxima and local minima ERs -*/ - -/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12]. + +/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm @cite Neumann12. @param cb : Callback with the classifier. Default classifier can be implicitly load with function loadClassifierNM1, e.g. from file in samples/cpp/trained_classifierNM1.xml @@ -213,7 +190,7 @@ CV_EXPORTS_W Ptr createERFilterNM1(const Ptr& cb, bool nonMaxSuppression = true, float minProbabilityDiff = (float)0.1); -/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12]. +/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm @cite Neumann12. @param cb : Callback with the classifier. Default classifier can be implicitly load with function loadClassifierNM2, e.g. from file in samples/cpp/trained_classifierNM2.xml @@ -268,7 +245,7 @@ enum { ERFILTER_NM_RGBLGrad, ERFILTER_NM_IHSGrad }; -/** @brief Compute the different channels to be processed independently in the N&M algorithm [Neumann12]. +/** @brief Compute the different channels to be processed independently in the N&M algorithm @cite Neumann12. @param _src Source image. Must be RGB CV_8UC3. @@ -289,7 +266,7 @@ CV_EXPORTS_W void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays //! text::erGrouping operation modes enum erGrouping_Modes { - /** Exhaustive Search algorithm proposed in [Neumann11] for grouping horizontally aligned text. + /** Exhaustive Search algorithm proposed in @cite Neumann11 for grouping horizontally aligned text. The algorithm models a verification function for all the possible ER sequences. The verification fuction for ER pairs consists in a set of threshold-based pairwise rules which compare measurements of two regions (height ratio, centroid angle, and region distance). The @@ -300,7 +277,7 @@ enum erGrouping_Modes { consistent. */ ERGROUPING_ORIENTATION_HORIZ, - /** Text grouping method proposed in [Gomez13][Gomez14] for grouping arbitrary oriented text. Regions + /** Text grouping method proposed in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text. Regions are agglomerated by Single Linkage Clustering in a weighted feature space that combines proximity (x,y coordinates) and similarity measures (color, size, gradient magnitude, stroke width, etc.). SLC provides a dendrogram where each node represents a text group hypothesis. Then the algorithm @@ -375,8 +352,8 @@ CV_EXPORTS_W void detectRegions(InputArray image, const Ptr& er_filter /** @brief Extracts text regions from image. @param image Source image where text blocks needs to be extracted from. Should be CV_8UC3 (color). -@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12] -@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12] +@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm @cite Neumann12 +@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm @cite Neumann12 @param groups_rects Output list of rectangle blocks with text @param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, ERGROUPING_ORIENTATION_ANY. @param filename The XML or YAML file with the classifier model (e.g. samples/trained_classifier_erGrouping.xml). Only to use when grouping method is ERGROUPING_ORIENTATION_ANY. diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index e151c15ee..1ba37f03a 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -128,7 +128,7 @@ public: recognition of individual text elements found (e.g. words or text lines). @param component_confidences If provided the method will output a list of confidence values for the recognition of individual text elements found (e.g. words or text lines). - @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE. + @param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE. */ virtual void run(Mat& image, std::string& output_text, std::vector* component_rects=NULL, std::vector* component_texts=NULL, std::vector* component_confidences=NULL,