@ -46,6 +46,10 @@
# include <vector>
# include <string>
# include <iostream>
# include <sstream>
namespace cv
{
@ -61,82 +65,126 @@ enum
OCR_LEVEL_TEXTLINE
} ;
//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
//base class BaseOCR declares a common API that would be used in a typical text
//recognition scenario
class CV_EXPORTS_W BaseOCR
{
public :
public :
virtual ~ BaseOCR ( ) { } ;
virtual void run ( Mat & image , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
virtual void run ( Mat & image , std : : string & output_text ,
std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL ,
std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) = 0 ;
virtual void run ( Mat & image , Mat & mask , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
virtual void run ( Mat & image , Mat & mask , std : : string & output_text ,
std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL ,
std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) = 0 ;
/** @brief Main functionality of the OCR Hierarchy. Subclasses provide
* default parameters for all parameters other than the input image .
*/
virtual String run ( InputArray image ) {
std : : string res ;
std : : vector < Rect > component_rects ;
std : : vector < float > component_confidences ;
std : : vector < std : : string > component_texts ;
Mat inputImage = image . getMat ( ) ;
this - > run ( inputImage , res , & component_rects , & component_texts ,
& component_confidences , OCR_LEVEL_WORD ) ;
return res ;
}
} ;
/** @brief OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
/** @brief OCRTesseract class provides an interface with the tesseract-ocr API
* ( v3 .02 .02 ) in C + + .
Notice that it is compiled only when tesseract - ocr is correctly installed .
@ note
- ( C + + ) An example of OCRTesseract recognition combined with scene text detection can be found
at the end_to_end_recognition demo :
< https : //github.com/opencv/opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
- ( C + + ) Another example of OCRTesseract recognition combined with scene text detection can be
found at the webcam_demo :
< https : //github.com/opencv/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
- ( C + + ) An example of OCRTesseract recognition combined with scene text
detection can be found at the end_to_end_recognition demo :
< https : //github.com/Itseez /opencv_contrib/blob/master/modules/text/samples/end_to_end_recognition.cpp>
- ( C + + ) Another example of OCRTesseract recognition combined with scene
text detection can be found at the webcam_demo :
< https : //github.com/Itseez /opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
*/
class CV_EXPORTS_W OCRTesseract : public BaseOCR
{
public :
/** @brief Recognize text using the tesseract-ocr API.
Takes image on input and returns recognized text in the output_text parameter . Optionally
provides also the Rects for individual text elements found ( e . g . words ) , and the list of those
text elements with their confidence values .
Takes image on input and returns recognized text in the output_text
parameter . Optionally provides also the Rects for individual text elements
found ( e . g . words ) , and the list of those text elements with their
confidence values .
@ param image Input image CV_8UC1 or CV_8UC3
@ param output_text Output text of the tesseract - ocr .
@ param component_rects If provided the method will output a list of Rects for the individual
text elements found ( e . g . words or text lines ) .
@ param component_texts If provided the method will output a list of text strings for the
recognition of individual text elements found ( e . g . words or text lines ) .
@ param component_confidences If provided the method will output a list of confidence values
for the recognition of individual text elements found ( e . g . words or text lines ) .
@ param component_rects If provided the method will output a list of Rects
for the individual text elements found ( e . g . words or text lines ) .
@ param component_texts If provided the method will output a list of text
strings for the recognition of individual text elements found ( e . g . words or
text lines ) .
@ param component_confidences If provided the method will output a list of
confidence values for the recognition of individual text elements found
( e . g . words or text lines ) .
@ param component_level OCR_LEVEL_WORD ( by default ) , or OCR_LEVEL_TEXT_LINE .
*/
virtual void run ( Mat & image , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
virtual void run ( Mat & image , std : : string & output_text ,
std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL ,
std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) ;
virtual void run ( Mat & image , Mat & mask , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) ;
virtual void run ( Mat & image , Mat & mask , std : : string & output_text ,
std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL ,
std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) ;
// aliases for scripting
CV_WRAP String run ( InputArray image , int min_confidence , int component_level = 0 ) ;
CV_WRAP String run ( InputArray image , int min_confidence ,
int component_level = 0 ) ;
CV_WRAP String run ( InputArray image , InputArray mask , int min_confidence , int component_level = 0 ) ;
CV_WRAP String run ( InputArray image , InputArray mask ,
int min_confidence , int component_level = 0 ) ;
CV_WRAP virtual void setWhiteList ( const String & char_whitelist ) = 0 ;
/** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
/** @brief Creates an instance of the OCRTesseract class. Initializes
* Tesseract .
* @ param datapath the name of the parent directory of tessdata ended with
* " / " , or NULL to use the system ' s default directory .
* @ param language an ISO 639 - 3 code or NULL will default to " eng " .
* @ param char_whitelist specifies the list of characters used for
* recognition . NULL defaults to " 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ " .
@ param datapath the name of the parent directory of tessdata ended with " / " , or NULL to use the
system ' s default directory .
@ param language an ISO 639 - 3 code or NULL will default to " eng " .
@ param char_whitelist specifies the list of characters used for recognition . NULL defaults to
" 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ " .
@ param oem tesseract - ocr offers different OCR Engine Modes ( OEM ) , by deffault
tesseract : : OEM_DEFAULT is used . See the tesseract - ocr API documentation for other possible
values .
@ param psmode tesseract - ocr offers different Page Segmentation Modes ( PSM ) tesseract : : PSM_AUTO
( fully automatic layout analysis ) is used . See the tesseract - ocr API documentation for other
possible values .
* @ param oem tesseract - ocr offers different OCR Engine Modes ( OEM ) , by
* default tesseract : : OEM_DEFAULT is used . See the tesseract - ocr API
* documentation for other possible values .
* @ param psmode tesseract - ocr offers different Page Segmentation Modes
* ( PSM ) tesseract : : PSM_AUTO ( fully automatic layout analysis ) is used . See
* the tesseract - ocr API documentation for other possible values .
*/
CV_WRAP static Ptr < OCRTesseract > create ( const char * datapath = NULL , const char * language = NULL ,
const char * char_whitelist = NULL , int oem = 3 , int psmode = 3 ) ;
CV_WRAP static Ptr < OCRTesseract > create ( const char * datapath = NULL ,
const char * language = NULL ,
const char * char_whitelist = NULL ,
int oem = 3 , int psmode = 3 ) ;
} ;
@ -147,134 +195,156 @@ enum decoder_mode
OCR_DECODER_VITERBI = 0 // Other algorithms may be added
} ;
/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
/** @brief OCRHMMDecoder class provides an interface for OCR using Hidden Markov
* Models .
@ note
- ( C + + ) An example on using OCRHMMDecoder recognition combined with scene text detection can
be found at the webcam_demo sample :
< https : //github.com/opencv /opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
* @ note
* - ( C + + ) An example on using OCRHMMDecoder recognition combined with scene
* text detection can be found at the webcam_demo sample :
* < https : //github.com/Itseez /opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
*/
class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
{
public :
class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR {
public :
/** @brief Callback with the character classifier is made a class.
This way it hides the feature extractor and the classifier itself , so developers can write
their own OCR code .
* This way it hides the feature extractor and the classifier itself , so
* developers can write their own OCR code .
The default character classifier and feature extractor can be loaded using the utility funtion
loadOCRHMMClassifierNM and KNN model provided in
< https : //github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
*/
class CV_EXPORTS_W ClassifierCallback
{
public :
* The default character classifier and feature extractor can be loaded using
* the utility funtion loadOCRHMMClassifierNM and KNN model provided in
* < https : //github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
*/
class CV_EXPORTS_W ClassifierCallback {
public :
virtual ~ ClassifierCallback ( ) { }
/** @brief The character classifier must return a (ranked list of) class(es) id('s)
/** @brief The character classifier must return a (ranked list of)
* class ( es ) id ( ' s )
@ param image Input image CV_8UC1 or CV_8UC3 with a single letter .
@ param out_class The classifier returns the character class categorical label , or list of
class labels , to which the input image corresponds .
@ param out_confidence The classifier returns the probability of the input image
corresponding to each classes in out_class .
* @ param image Input image CV_8UC1 or CV_8UC3 with a single letter .
* @ param out_class The classifier returns the character class
* categorical label , or list of class labels , to which the input image
* corresponds .
* @ param out_confidence The classifier returns the probability of the
* input image corresponding to each classes in out_class .
*/
virtual void eval ( InputArray image , std : : vector < int > & out_class , std : : vector < double > & out_confidence ) ;
virtual void eval ( InputArray image , std : : vector < int > & out_class ,
std : : vector < double > & out_confidence ) ;
} ;
public :
/** @brief Recognize text using HMM.
Takes binary image on input and returns recognized text in the output_text parameter . Optionally
provides also the Rects for individual text elements found ( e . g . words ) , and the list of those
text elements with their confidence values .
* Takes binary image on input and returns recognized text in the output_text
* parameter . Optionally provides also the Rects for individual text elements
* found ( e . g . words ) , and the list of those text elements with their
* confidence values .
@ param image Input binary image CV_8UC1 with a single text line ( or word ) .
* @ param image Input binary image CV_8UC1 with a single text line ( or word ) .
@ param output_text Output text . Most likely character sequence found by the HMM decoder .
* @ param output_text Output text . Most likely character sequence found by
* the HMM decoder .
@ param component_rects If provided the method will output a list of Rects for the individual
text elements found ( e . g . words ) .
* @ param component_rects If provided the method will output a list of Rects
* for the individual text elements found ( e . g . words ) .
@ param component_texts If provided the method will output a list of text strings for the
recognition of individual text elements found ( e . g . words ) .
* @ param component_texts If provided the method will output a list of text
* strings for the recognition of individual text elements found ( e . g . words )
* .
@ param component_confidences If provided the method will output a list of confidence values
for the recognition of individual text elements found ( e . g . words ) .
* @ param component_confidences If provided the method will output a list of
* confidence values for the recognition of individual text elements found
* ( e . g . words ) .
@ param component_level Only OCR_LEVEL_WORD is supported .
*/
virtual void run ( Mat & image , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) ;
* @ param component_level Only OCR_LEVEL_WORD is supported .
*/
virtual void run ( Mat & image , std : : string & output_text ,
std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL ,
std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) ;
/** @brief Recognize text using HMM.
Takes an image and a mask ( where each connected component corresponds to a segmented character )
on input and returns recognized text in the output_text parameter . Optionally
provides also the Rects for individual text elements found ( e . g . words ) , and the list of those
text elements with their confidence values .
* Takes an image and a mask ( where each connected component corresponds to a
* segmented character ) on input and returns recognized text in the
* output_text parameter . Optionally provides also the Rects for individual
* text elements found ( e . g . words ) , and the list of those text elements with
* their confidence values .
@ param image Input image CV_8UC1 or CV_8UC3 with a single text line ( or word ) .
@ param mask Input binary image CV_8UC1 same size as input image . Each connected component in mask corresponds to a segmented character in the input image .
* @ param image Input image CV_8UC1 or CV_8UC3 with a single text line
* ( or word ) .
@ param output_text Output text . Most likely character sequence found by the HMM decoder .
* @ param mask Input binary image CV_8UC1 same size as input image . Each
* connected component in mask corresponds to a segmented character in the
* input image .
@ param component_rects If provided the method will output a list of Rects for the individual
text elements found ( e . g . words ) .
* @ param output_text Output text . Most likely character sequence found by
* the HMM decoder .
@ param component_tex ts If provided the method will output a list of text strings for the
recognition of individual text elements found ( e . g . words ) .
* @ param component_rec ts If provided the method will output a list of Rects
* for the individual text elements found ( e . g . words ) .
@ param component_confidences If provided the method will output a list of confidence values
for the recognition of individual text elements found ( e . g . words ) .
* @ param component_texts If provided the method will output a list of text
* strings for the recognition of individual text elements found ( e . g . words )
* .
@ param component_level Only OCR_LEVEL_WORD is supported .
*/
virtual void run ( Mat & image , Mat & mask , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
* @ param component_confidences If provided the method will output a list of
* confidence values for the recognition of individual text elements found
* ( e . g . words ) .
* @ param component_level Only OCR_LEVEL_WORD is supported .
*/
virtual void run ( Mat & image , Mat & mask , std : : string & output_text ,
std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL ,
std : : vector < float > * component_confidences = NULL ,
int component_level = 0 ) ;
// aliases for scripting
CV_WRAP String run ( InputArray image , int min_confidence , int component_level = 0 ) ;
CV_WRAP String run ( InputArray image ,
int min_confidence ,
int component_level = 0 ) ;
CV_WRAP String run ( InputArray image , InputArray mask , int min_confidence , int component_level = 0 ) ;
CV_WRAP String run ( InputArray image ,
InputArray mask ,
int min_confidence ,
int component_level = 0 ) ;
/** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
/** @brief Creates an instance of the OCRHMMDecoder class. Initializes
* HMMDecoder .
@ param classifier The character classifier with built in feature extractor .
* @ param classifier The character classifier with built in feature
* extractor .
@ param vocabulary The language vocabulary ( chars when ascii english text ) . vocabulary . size ( )
must be equal to the number of classes of the classifier .
* @ param vocabulary The language vocabulary ( chars when ascii english text )
* . vocabulary . size ( ) must be equal to the number of classes of the
* classifier .
@ param transition_probabilities_table Table with transition probabilities between character
pairs . cols = = rows = = vocabulary . size ( ) .
* @ param transition_probabilities_table Table with transition probabilities
* between character pairs . cols = = rows = = vocabulary . size ( ) .
@ param emission_probabilities_table Table with observation emission probabilities . cols = =
rows = = vocabulary . size ( ) .
* @ param emission_probabilities_table Table with observation emission
* probabilities . cols = = rows = = vocabulary . size ( ) .
@ param mode HMM Decoding algorithm . Only OCR_DECODER_VITERBI is available for the moment
( < http : //en.wikipedia.org/wiki/Viterbi_algorithm>).
* @ param mode HMM Decoding algorithm . Only OCR_DECODER_VITERBI is available
* for the moment ( < http : //en.wikipedia.org/wiki/Viterbi_algorithm>).
*/
static Ptr < OCRHMMDecoder > create ( const Ptr < OCRHMMDecoder : : ClassifierCallback > classifier , // The character classifier with built in feature extractor
const std : : string & vocabulary , // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table , // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table , // Table with observation emission probabilities
// cols == rows == vocabulari.size()
decoder_mode mode = OCR_DECODER_VITERBI ) ; // HMM Decoding algorithm (only Viterbi for the moment)
CV_WRAP static Ptr < OCRHMMDecoder > create ( const Ptr < OCRHMMDecoder : : ClassifierCallback > classifier , // The character classifier with built in feature extractor
const String & vocabulary , // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table , // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table , // Table with observation emission probabilities
// cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI ) ; // HMM Decoding algorithm (only Viterbi for the moment)
protected :
static Ptr < OCRHMMDecoder > create (
const Ptr < OCRHMMDecoder : : ClassifierCallback > classifier , // The character classifier with built in feature extractor
const std : : string & vocabulary , // The language vocabulary (chars when ascii english text) size() must be equal to the number of classes
InputArray transition_probabilities_table , // Table with transition probabilities between character pairs cols == rows == vocabulari.size()
InputArray emission_probabilities_table , // Table with observation emission probabilities cols == rows == vocabulari.size()
decoder_mode mode = OCR_DECODER_VITERBI ) ; // HMM Decoding algorithm (only Viterbi for the moment)
CV_WRAP static Ptr < OCRHMMDecoder > create (
const Ptr < OCRHMMDecoder : : ClassifierCallback > classifier , // The character classifier with built in feature extractor
const String & vocabulary , // The language vocabulary (chars when ascii english text) size() must be equal to the number of classes
InputArray transition_probabilities_table , // Table with transition probabilities between character pairs cols == rows == vocabulari.size()
InputArray emission_probabilities_table , // Table with observation emission probabilities cols == rows == vocabulari.size()
int mode = OCR_DECODER_VITERBI ) ; // HMM Decoding algorithm (only Viterbi for the moment)
protected :
Ptr < OCRHMMDecoder : : ClassifierCallback > classifier ;
std : : string vocabulary ;
@ -283,76 +353,98 @@ protected:
decoder_mode mode ;
} ;
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
/** @brief Allow to implicitly load the default character classifier when
* creating an OCRHMMDecoder object .
@ param filename The XML or YAML file with the classifier model ( e . g . OCRHMM_knn_model_data . xml )
* @ param filename The XML or YAML file with the classifier model ( e . g .
* OCRHMM_knn_model_data . xml )
The KNN default classifier is based in the scene text recognition method proposed by Lukás Neumann &
Jiri Matas in [ Neumann11b ] . Basically , the region ( contour ) in the input image is normalized to a
fixed size , while retaining the centroid and aspect ratio , in order to extract a feature vector
based on gradient orientations along the chain - code of its perimeter . Then , the region is classified
using a KNN model trained with synthetic data of rendered characters with different standard font
types .
* The KNN default classifier is based in the scene text recognition method
* proposed by Lukás Neumann & Jiri Matas in [ Neumann11b ] . Basically , the region
* ( contour ) in the input image is normalized to a fixed size , while retaining
* the centroid and aspect ratio , in order to extract a feature vector based on
* gradient orientations along the chain - code of its perimeter . Then , the region
* is classified using a KNN model trained with synthetic data of rendered
* characters with different standard font types .
*/
CV_EXPORTS_W Ptr < OCRHMMDecoder : : ClassifierCallback > loadOCRHMMClassifierNM (
const String & filename ) ;
CV_EXPORTS_W Ptr < OCRHMMDecoder : : ClassifierCallback > loadOCRHMMClassifierNM ( const String & filename ) ;
/** @brief Allow to implicitly load the default character classifier when
* creating an OCRHMMDecoder object .
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
* @ param filename The XML or YAML file with the classifier model ( e . g .
* OCRBeamSearch_CNN_model_data . xml . gz )
@ param filename The XML or YAML file with the classifier model ( e . g . OCRBeamSearch_CNN_model_data . xml . gz )
The CNN default classifier is based in the scene text recognition method proposed by Adam Coates &
Andrew NG in [ Coates11a ] . The character classifier consists in a Single Layer Convolutional Neural Network and
a linear classifier . It is applied to the input image in a sliding window fashion , providing a set of recognitions
at each window location .
* The CNN default classifier is based in the scene text recognition method
* proposed by Adam Coates & Andrew NG in [ Coates11a ] . The character classifier
* consists in a Single Layer Convolutional Neural Network and a linear
* classifier . It is applied to the input image in a sliding window fashion ,
* providing a set of recognitions at each window location .
*/
CV_EXPORTS_W Ptr < OCRHMMDecoder : : ClassifierCallback > loadOCRHMMClassifierCNN ( const String & filename ) ;
CV_EXPORTS_W Ptr < OCRHMMDecoder : : ClassifierCallback > loadOCRHMMClassifierCNN (
const String & filename ) ;
//! @}
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
*
/** @brief Utility function to create a tailored language model transitions
* table from a given list of words ( lexicon ) .
* @ param vocabulary The language vocabulary ( chars when ascii english text ) .
*
* @ param lexicon The list of words that are expected to be found in a particular image .
*
* @ param transition_probabilities_table Output table with transition probabilities between character pairs . cols = = rows = = vocabulary . size ( ) .
*
* The function calculate frequency statistics of character pairs from the given lexicon and fills the output transition_probabilities_table with them . The transition_probabilities_table can be used as input in the OCRHMMDecoder : : create ( ) and OCRBeamSearchDecoder : : create ( ) methods .
* @ param transition_probabilities_table Output table with transition
* probabilities between character pairs . cols = = rows = = vocabulary . size ( ) .
* The function calculate frequency statistics of character pairs from the given
* lexicon and fills the output transition_probabilities_table with them . The
* transition_probabilities_table can be used as input in the
* OCRHMMDecoder : : create ( ) and OCRBeamSearchDecoder : : create ( ) methods .
* @ note
* - ( C + + ) An alternative would be to load the default generic language transition table provided in the text module samples folder ( created from ispell 42869 english words list ) :
* < https : //github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
* - ( C + + ) An alternative would be to load the default generic language
* transition table provided in the text module samples folder ( created
* from ispell 42869 english words list ) :
* < https : //github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
* */
CV_EXPORTS void createOCRHMMTransitionsTable ( std : : string & vocabulary , std : : vector < std : : string > & lexicon , OutputArray transition_probabilities_table ) ;
CV_EXPORTS_W Mat createOCRHMMTransitionsTable ( const String & vocabulary , std : : vector < cv : : String > & lexicon ) ;
CV_EXPORTS void createOCRHMMTransitionsTable (
std : : string & vocabulary , std : : vector < std : : string > & lexicon ,
OutputArray transition_probabilities_table ) ;
CV_EXPORTS_W Mat createOCRHMMTransitionsTable (
const String & vocabulary , std : : vector < cv : : String > & lexicon ) ;
/* OCR BeamSearch Decoder */
/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam
* Search algorithm .
@ note
- ( C + + ) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
be found at the demo sample :
< https : //github.com/opencv /opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
- ( C + + ) An example on using OCRBeamSearchDecoder recognition combined with
scene text detection can be found at the demo sample :
< https : //github.com/Itseez /opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
*/
class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
{
public :
/* Forward declaration of class that can be used to generate an OCRBeamSearchDecoder::ClassifierCallbac */
class TextImageClassifier ;
class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR {
public :
/** @brief Callback with the character classifier is made a class.
This way it hides the feature extractor and the classifier itself , so developers can write
their own OCR code .
* This way it hides the feature extractor and the classifier itself , so
* developers can write their own OCR code .
The default character classifier and feature extractor can be loaded using the utility funtion
loadOCRBeamSearchClassifierCNN with all its parameters provided in
< https : //github.com/opencv/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
* The default character classifier and feature extractor can be loaded
* using the utility funtion loadOCRBeamSearchClassifierCNN with all its
* parameters provided in
* < https : //github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
*/
class CV_EXPORTS_W ClassifierCallback
{
public :
class CV_EXPORTS_W ClassifierCallback {
public :
virtual ~ ClassifierCallback ( ) { }
/** @brief The character classifier must return a (ranked list of) class(es) id('s)
@ -364,8 +456,8 @@ public:
*/
virtual void eval ( InputArray image , std : : vector < std : : vector < double > > & recognition_probabilities , std : : vector < int > & oversegmentation ) ;
int getWindowSize ( ) { return 0 ; }
int getStepSize ( ) { return 0 ; }
virtual int getWindowSize ( ) { return 0 ; }
virtual int getStepSize ( ) { return 0 ; }
} ;
public :
@ -421,6 +513,7 @@ public:
@ param beam_size Size of the beam in Beam Search algorithm .
*/
static Ptr < OCRBeamSearchDecoder > create ( const Ptr < OCRBeamSearchDecoder : : ClassifierCallback > classifier , // The character classifier with built in feature extractor
const std : : string & vocabulary , // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
@ -441,6 +534,44 @@ public:
int mode = OCR_DECODER_VITERBI , // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 500 ) ; // Size of the beam in Beam Search algorithm
/** @brief This method allows to plug a classifier that is derivative of TextImageClassifier in to
* OCRBeamSearchDecoder as a ClassifierCallback .
@ param classifier A pointer to a TextImageClassifier decendent
@ param alphabet The language alphabet one char per symbol . alphabet . size ( ) must be equal to the number of classes
of the classifier . In future editinons it should be replaced with a vector of strings .
@ param transition_probabilities_table Table with transition probabilities between character
pairs . cols = = rows = = alphabet . size ( ) .
@ param emission_probabilities_table Table with observation emission probabilities . cols = =
rows = = alphabet . size ( ) .
@ param windowWidth The width of the windows to which the sliding window will be iterated . The height will
be the height of the image . The windows might be resized to fit the classifiers input by the classifiers
preprocessor .
@ param windowStep The step for the sliding window
@ param mode HMM Decoding algorithm ( only Viterbi for the moment )
@ param beam_size Size of the beam in Beam Search algorithm
*/
// CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<TextImageClassifier> classifier, // The character classifier with built in feature extractor
// String alphabet, // The language alphabet one char per symbol
// // size() must be equal to the number of classes
// InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// // cols == rows == alphabet.size()
// InputArray emission_probabilities_table, // Table with observation emission probabilities
// // cols == rows == alphabet.size()
// int windowWidth, // The width of the windows to which the sliding window will be iterated.
// // The height will be the height of the image. The windows might be resized to
// // fit the classifiers input by the classifiers preprocessor
// int windowStep = 1 , // The step for the sliding window
// int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
// int beam_size = 500); // Size of the beam in Beam Search algorithm
protected :
Ptr < OCRBeamSearchDecoder : : ClassifierCallback > classifier ;
@ -465,6 +596,364 @@ CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClas
//! @}
}
}
//Classifiers should provide diferent backends
//For the moment only caffe is implemeted
enum {
OCR_HOLISTIC_BACKEND_NONE ,
OCR_HOLISTIC_BACKEND_CAFFE
} ;
class TextImageClassifier ;
/**
* @ brief The ImagePreprocessor class
*/
class CV_EXPORTS_W ImagePreprocessor {
protected :
virtual void preprocess_ ( const Mat & input , Mat & output , Size outputSize , int outputChannels ) = 0 ;
virtual void set_mean_ ( Mat ) { }
public :
virtual ~ ImagePreprocessor ( ) { }
/** @brief this method in provides public acces to the preprocessing with respect to a specific
* classifier
*
* This method ' s main use would be to use the preprocessor without feeding it to a classifier .
* Determining the exact behavior of a preprocessor is the main motivation for this .
*
* @ param input an image without any constraints
*
* @ param output in most cases an image of fixed depth size and whitened
*
* @ param sz the size to which the image would be resize if the preprocessor resizes inputs
*
* @ param outputChannels the number of channels for the output image
*/
CV_WRAP void preprocess ( InputArray input , OutputArray output , Size sz , int outputChannels ) ;
CV_WRAP void set_mean ( Mat mean ) ;
/** @brief Creates a functor that only resizes and changes the channels of the input
* without further processing .
*
* @ return shared pointer to the generated preprocessor
*/
CV_WRAP static Ptr < ImagePreprocessor > createResizer ( ) ;
/** @brief
*
* @ param sigma
*
* @ return shared pointer to generated preprocessor
*/
CV_WRAP static Ptr < ImagePreprocessor > createImageStandarizer ( double sigma ) ;
/** @brief
*
* @ return shared pointer to generated preprocessor
*/
CV_WRAP static Ptr < ImagePreprocessor > createImageMeanSubtractor ( InputArray meanImg ) ;
CV_WRAP static Ptr < ImagePreprocessor > createImageCustomPreprocessor ( double rawval = 1.0 , String channel_order = " BGR " ) ;
friend class TextImageClassifier ;
} ;
/** @brief Abstract class that implements the classifcation of text images.
*
* The interface is generic enough to describe any image classifier . And allows
* to take advantage of compouting in batches . While word classifiers are the default
* networks , any image classifers should work .
*
*/
class CV_EXPORTS_W TextImageClassifier
{
protected :
Size inputGeometry_ ;
Size outputGeometry_ ;
int channelCount_ ;
Ptr < ImagePreprocessor > preprocessor_ ;
/** @brief all image preprocessing is handled here including whitening etc.
*
* @ param input the image to be preprocessed for the classifier . If the depth
* is CV_U8 values should be in [ 0 , 255 ] otherwise values are assumed to be in [ 0 , 1 ]
*
* @ param output reference to the image to be fed to the classifier , the preprocessor will
* resize the image to the apropriate size and convert it to the apropriate depth \
*
* The method preprocess should never be used externally , it is up to classify and classifyBatch
* methods to employ it .
*/
virtual void preprocess ( const Mat & input , Mat & output ) ;
public :
virtual ~ TextImageClassifier ( ) { }
/** @brief
*/
CV_WRAP virtual void setPreprocessor ( Ptr < ImagePreprocessor > ptr ) ;
/** @brief
*/
CV_WRAP Ptr < ImagePreprocessor > getPreprocessor ( ) ;
/** @brief produces a class confidence row-vector given an image
*/
CV_WRAP virtual void classify ( InputArray image , OutputArray classProbabilities ) = 0 ;
/** @brief produces a list of bounding box given an image
*/
CV_WRAP virtual void detect ( InputArray image , OutputArray classProbabilities ) = 0 ;
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
*/
CV_WRAP virtual void classifyBatch ( InputArrayOfArrays image , OutputArray classProbabilities ) = 0 ;
/** @brief simple getter method returning the number of channels each input sample has
*/
CV_WRAP virtual int getInputChannelCount ( ) { return this - > channelCount_ ; }
/** @brief simple getter method returning the size of the input sample
*/
CV_WRAP virtual Size getInputSize ( ) { return this - > inputGeometry_ ; }
/** @brief simple getter method returning the size of the oputput row-vector
*/
CV_WRAP virtual int getOutputSize ( ) = 0 ;
CV_WRAP virtual Size getOutputGeometry ( ) = 0 ;
/** @brief simple getter method returning the size of the minibatches for this classifier.
* If not applicabe this method should return 1
*/
CV_WRAP virtual int getMinibatchSize ( ) = 0 ;
friend class ImagePreprocessor ;
} ;
class CV_EXPORTS_W DeepCNN : public TextImageClassifier
{
/** @brief Class that uses a pretrained caffe model for word classification.
*
* This network is described in detail in :
* Max Jaderberg et al . : Reading Text in the Wild with Convolutional Neural Networks , IJCV 2015
* http : //arxiv.org/abs/1412.1842
*/
public :
virtual ~ DeepCNN ( ) { } ;
/** @brief Constructs a DeepCNN object from a caffe pretrained model
*
* @ param archFilename is the path to the prototxt file containing the deployment model architecture description .
*
* @ param weightsFilename is the path to the pretrained weights of the model in binary fdorm . This file can be
* very large , up to 2 GB .
*
* @ param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method ;
*
* @ param minibatchSz the maximum number of samples that can processed in parallel . In practice this parameter
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU .
*
* @ param backEnd integer parameter selecting the coputation framework . For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP static Ptr < DeepCNN > create ( String archFilename , String weightsFilename , Ptr < ImagePreprocessor > preprocessor , int minibatchSz = 100 , int backEnd = OCR_HOLISTIC_BACKEND_CAFFE ) ;
/** @brief Constructs a DeepCNN intended to be used for word spotting.
*
* This method loads a pretrained classifier and couples him with a preprocessor that standarises pixels with a
* deviation of 113. The architecture file can be downloaded from :
* < http : //nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
* While the weights can be downloaded from :
* < http : //nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
* The words assigned to the network outputs are available at :
* < http : //nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
*
* @ param archFilename is the path to the prototxt file containing the deployment model architecture description .
* When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy " .prototxt " .
*
* @ param weightsFilename is the path to the pretrained weights of the model . When employing
* OCR_HOLISTIC_BACKEND_CAFFE this is the path to the " .caffemodel " file . This file can be very large , the
* pretrained DictNet uses 2 GB .
*
* @ param backEnd integer parameter selecting the coputation framework . For now OCR_HOLISTIC_BACKEND_CAFFE is
* the only option
*/
CV_WRAP static Ptr < DeepCNN > createDictNet ( String archFilename , String weightsFilename , int backEnd = OCR_HOLISTIC_BACKEND_CAFFE ) ;
} ;
namespace cnn_config {
namespace caffe_backend {
/** @brief Prompts Caffe on the computation device beeing used
*
* Caffe can only be controlled globally on whether the GPU or the CPU is used has a
* global behavior . This function queries the current state of caffe .
* If the module is built without caffe , this method throws an exception .
*
* @ return true if caffe is computing on the GPU , false if caffe is computing on the CPU
*/
CV_EXPORTS_W bool getCaffeGpuMode ( ) ;
/** @brief Sets the computation device beeing used by Caffe
*
* Caffe can only be controlled globally on whether the GPU or the CPU is used has a
* global behavior . This function queries the current state of caffe .
* If the module is built without caffe , this method throws an exception .
*
* @ param useGpu set to true for caffe to be computing on the GPU , false if caffe is
* computing on the CPU
*/
CV_EXPORTS_W void setCaffeGpuMode ( bool useGpu ) ;
/** @brief Provides runtime information on whether Caffe support was compiled in.
*
* The text module API is the same regardless of whether CAffe was available or not
* During compilation . When methods that require Caffe are invocked while Caffe support
* is not compiled in , exceptions are thrown . This method allows to test whether the
* text module was built with caffe during runtime .
*
* @ return true if Caffe support for the the text module was provided during compilation ,
* false if Caffe was unavailable .
*/
CV_EXPORTS_W bool getCaffeAvailable ( ) ;
} //caffe
} //cnn_config
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
* Given a predefined vocabulary , a TextImageClassifier is employed to select the most probable
* word given an input image .
*
* This class implements the logic of providing transcriptions given a vocabulary and and an image
* classifer . The classifier has to be any TextImageClassifier but the classifier for which this
* class was built is the DictNet . In order to load it the following files should be downloaded :
* < http : //nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt>
* < http : //nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel>
* < http : //nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt>
*/
class CV_EXPORTS_W OCRHolisticWordRecognizer : public BaseOCR
{
public :
virtual void run ( Mat & image , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
int component_level = OCR_LEVEL_WORD ) = 0 ;
/** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
Takes image on input and returns recognized text in the output_text parameter . Optionally
provides also the Rects for individual text elements found ( e . g . words ) , and the list of those
text elements with their confidence values .
@ param image Input image CV_8UC1 or CV_8UC3
@ param mask is totally ignored and is only available for compatibillity reasons
@ param output_text Output text of the the word spoting , always one that exists in the dictionary .
@ param component_rects Not applicable for word spotting can be be NULL if not , a single elemnt will
be put in the vector .
@ param component_texts Not applicable for word spotting can be be NULL if not , a single elemnt will
be put in the vector .
@ param component_confidences Not applicable for word spotting can be be NULL if not , a single elemnt will
be put in the vector .
@ param component_level must be OCR_LEVEL_WORD .
*/
virtual void run ( Mat & image , Mat & mask , std : : string & output_text , std : : vector < Rect > * component_rects = NULL ,
std : : vector < std : : string > * component_texts = NULL , std : : vector < float > * component_confidences = NULL ,
int component_level = OCR_LEVEL_WORD ) = 0 ;
/**
@ brief Method that provides a quick and simple interface to a single word image classifcation
@ param inputImage an image expected to be a CV_U8C1 or CV_U8C3 of any size assumed to contain a single word
@ param transcription an opencv string that will store the detected word transcription
@ param confidence a double that will be updated with the confidence the classifier has for the selected word
*/
CV_WRAP virtual void recogniseImage ( InputArray inputImage , CV_OUT String & transcription , CV_OUT double & confidence ) = 0 ;
/**
@ brief Method that provides a quick and simple interface to a multiple word image classifcation taking advantage
the classifiers parallel capabilities .
@ param inputImageList an list of images expected to be a CV_U8C1 or CV_U8C3 each image can be of any size and is assumed
to contain a single word .
@ param transcriptions a vector of opencv strings that will store the detected word transcriptions , one for each
input image
@ param confidences a vector of double that will be updated with the confidence the classifier has for each of the
selected words .
*/
CV_WRAP virtual void recogniseImageBatch ( InputArrayOfArrays inputImageList , CV_OUT std : : vector < String > & transcriptions , CV_OUT std : : vector < double > & confidences ) = 0 ;
/**
@ brief simple getter for the vocabulary employed
*/
CV_WRAP virtual const std : : vector < String > & getVocabulary ( ) = 0 ;
/** @brief simple getter for the preprocessing functor
*/
CV_WRAP virtual Ptr < TextImageClassifier > getClassifier ( ) = 0 ;
/** @brief Creates an instance of the OCRHolisticWordRecognizer class.
@ param classifierPtr an instance of TextImageClassifier , normaly a DeepCNN instance
@ param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary . Each text line
in the file is assumed to be a single word . The number of words in the vocabulary must be exactly the same as the outputSize
of the classifier .
*/
CV_WRAP static Ptr < OCRHolisticWordRecognizer > create ( Ptr < TextImageClassifier > classifierPtr , String vocabularyFilename ) ;
/** @brief Creates an instance of the OCRHolisticWordRecognizer class and implicitly also a DeepCNN classifier.
@ param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture .
@ param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe - binary form .
@ param vocabularyFilename the relative or absolute path to the file containing all words in the vocabulary . Each text line
in the file is assumed to be a single word . The number of words in the vocabulary must be exactly the same as the outputSize
of the classifier .
*/
CV_WRAP static Ptr < OCRHolisticWordRecognizer > create ( String modelArchFilename , String modelWeightsFilename , String vocabularyFilename ) ;
/** @brief
*
* @ param classifierPtr
*
* @ param vocabulary
*/
CV_WRAP static Ptr < OCRHolisticWordRecognizer > create ( Ptr < TextImageClassifier > classifierPtr , const std : : vector < String > & vocabulary ) ;
/** @brief
*
* @ param modelArchFilename
*
* @ param modelWeightsFilename
*
* @ param vocabulary
*/
CV_WRAP static Ptr < OCRHolisticWordRecognizer > create ( String modelArchFilename , String modelWeightsFilename , const std : : vector < String > & vocabulary ) ;
} ;
} //namespace text
} //namespace cv
# endif // _OPENCV_TEXT_OCR_HPP_