text: change default char_whitelist parameter.

pull/3462/head
Kumataro 2 years ago
parent c60fde02e0
commit 42fc4892d5
  1. 6
      modules/text/include/opencv2/text/ocr.hpp
  2. 4
      modules/text/src/ocr_tesseract.cpp

@ -153,14 +153,16 @@ public:
@param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
system's default directory.
@param language an ISO 639-3 code or NULL will default to "eng".
@param char_whitelist specifies the list of characters used for recognition. NULL defaults to
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
@param char_whitelist specifies the list of characters used for recognition. NULL defaults to ""
(All characters will be used for recognition).
@param oem tesseract-ocr offers different OCR Engine Modes (OEM), by default
tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
values.
@param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO
(fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
possible values.
@note The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
*/
CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);

@ -163,10 +163,12 @@ public:
tesseract::PageSegMode pagesegmode = (tesseract::PageSegMode)psmode;
tess.SetPageSegMode(pagesegmode);
// tessedit_whitelist default changes from [0-9a-zA-Z] to "".
// See https://github.com/opencv/opencv_contrib/issues/3457
if(char_whitelist != NULL)
tess.SetVariable("tessedit_char_whitelist", char_whitelist);
else
tess.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
tess.SetVariable("tessedit_char_whitelist", "");
tess.SetVariable("save_best_choices", "T");
#else

Loading…
Cancel
Save