diff --git a/modules/text/doc/text.bib b/modules/text/doc/text.bib index 64a8f4a19..d2ed9f9b6 100644 --- a/modules/text/doc/text.bib +++ b/modules/text/doc/text.bib @@ -31,4 +31,14 @@ journal = {CoRR}, volume = {abs/1407.7504}, year = {2014}, -} \ No newline at end of file +} +@inproceedings{LiaoSBWL17, + author = {Minghui Liao and + Baoguang Shi and + Xiang Bai and + Xinggang Wang and + Wenyu Liu}, + title = {TextBoxes: {A} Fast Text Detector with a Single Deep Neural Network}, + booktitle = {AAAI}, + year = {2017} +} diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp index 0e51df39f..9c780ae31 100644 --- a/modules/text/include/opencv2/text/textDetector.hpp +++ b/modules/text/include/opencv2/text/textDetector.hpp @@ -27,12 +27,16 @@ public: @param Bbox a vector of Rect that will store the detected word bounding box @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box */ - virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector& Bbox, CV_OUT std::vector& confidence) = 0; + CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector& Bbox, CV_OUT std::vector& confidence) = 0; virtual ~TextDetector() {} }; /** @brief TextDetectorCNN class provides the functionallity of text bounding box detection. - * A TextDetectorCNN is employed to find bounding boxes of text words given an input image. + This class is representing to find bounding boxes of text words given an input image. + This class uses OpenCV dnn module to load pre-trained model described in @cite LiaoSBWL17. + The original repository with the modified SSD Caffe version: https://github.com/MhLiao/TextBoxes. + Model can be downloaded from [DropBox](https://www.dropbox.com/s/g8pjzv2de9gty8g/TextBoxes_icdar13.caffemodel?dl=0). + Modified .prototxt file with the model description can be found in `opencv_contrib/modules/text/samples/textbox.prototxt`. */ class CV_EXPORTS_W TextDetectorCNN : public TextDetector { @@ -44,9 +48,9 @@ public: @param Bbox a vector of Rect that will store the detected word bounding box @param confidence a vector of float that will be updated with the confidence the classifier has for the selected bounding box */ - CV_WRAP virtual void textDetectInImage(InputArray inputImage, CV_OUT std::vector& Bbox, CV_OUT std::vector& confidence) = 0; + CV_WRAP virtual void detect(InputArray inputImage, CV_OUT std::vector& Bbox, CV_OUT std::vector& confidence) = 0; - /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier. + /** @brief Creates an instance of the TextDetectorCNN class using the provided parameters. @param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture. @param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form. diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py index 09dcb2492..256a28e9e 100644 --- a/modules/text/samples/deeptextdetection.py +++ b/modules/text/samples/deeptextdetection.py @@ -14,14 +14,14 @@ def main(): print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') quit() - if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): + if not os.path.isfile('TextBoxes_icdar13.caffemodel') or not os.path.isfile('textbox.prototxt'): print " Model files not found in current directory. Aborting" - print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" + print " See the documentation of text::TextDetectorCNN class to get download links." quit() img = cv2.imread(str(sys.argv[1])) - textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel") - rects, outProbs = textSpotter.textDetectInImage(img); + textSpotter = cv2.text.TextDetectorCNN_create("textbox.prototxt", "TextBoxes_icdar13.caffemodel") + rects, outProbs = textSpotter.detect(img); vis = img.copy() thres = 0.6 diff --git a/modules/text/samples/textbox.prototxt b/modules/text/samples/textbox.prototxt index 6e8cb688e..bb8019828 100644 --- a/modules/text/samples/textbox.prototxt +++ b/modules/text/samples/textbox.prototxt @@ -885,6 +885,7 @@ layer { variance: 0.1 variance: 0.2 variance: 0.2 + additional_y_offset: true } } layer { @@ -1009,6 +1010,7 @@ layer { variance: 0.1 variance: 0.2 variance: 0.2 + additional_y_offset: true } } layer { @@ -1133,6 +1135,7 @@ layer { variance: 0.1 variance: 0.2 variance: 0.2 + additional_y_offset: true } } layer { @@ -1257,6 +1260,7 @@ layer { variance: 0.1 variance: 0.2 variance: 0.2 + additional_y_offset: true } } layer { @@ -1381,6 +1385,7 @@ layer { variance: 0.1 variance: 0.2 variance: 0.2 + additional_y_offset: true } } layer { @@ -1505,6 +1510,7 @@ layer { variance: 0.1 variance: 0.2 variance: 0.2 + additional_y_offset: true } } layer { diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp index 9975c3947..f3c292836 100644 --- a/modules/text/samples/textbox_demo.cpp +++ b/modules/text/samples/textbox_demo.cpp @@ -10,15 +10,14 @@ using namespace cv; namespace { -std::string getHelpStr(std::string progFname) +std::string getHelpStr(const std::string& progFname) { std::stringstream out; out << " Demo of text detection CNN for text detection." << std::endl << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"< " << std::endl - << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"< textSpotter = - text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false); + text::TextDetectorCNN::create(modelArch, moddelWeights, false); std::vector bbox; std::vector outProbabillities; - textSpotter->textDetectInImage(image, bbox, outProbabillities); + textSpotter->detect(image, bbox, outProbabillities); textbox_draw(image, bbox, outProbabillities, 0.5f); diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp index 1c3933fda..cd624985f 100644 --- a/modules/text/src/text_detectorCNN.cpp +++ b/modules/text/src/text_detectorCNN.cpp @@ -72,7 +72,7 @@ public: } } - void textDetectInImage(InputArray inputImage_, std::vector& Bbox, std::vector& confidence) + void detect(InputArray inputImage_, std::vector& Bbox, std::vector& confidence) { CV_Assert(inputImage_.channels() == inputChannelCount_); Mat inputImage = inputImage_.getMat().clone();