diff --git a/modules/text/CMakeLists.txt b/modules/text/CMakeLists.txt index a8a32326f..5d5a52b4a 100644 --- a/modules/text/CMakeLists.txt +++ b/modules/text/CMakeLists.txt @@ -67,4 +67,3 @@ if() else() message(STATUS "TEXT CAFFE CONFLICT") endif() - diff --git a/modules/text/FindTesseract.cmake b/modules/text/FindTesseract.cmake index 54c4a4929..4622ece14 100644 --- a/modules/text/FindTesseract.cmake +++ b/modules/text/FindTesseract.cmake @@ -19,6 +19,4 @@ find_library(Lept_LIBRARY NAMES lept set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY}) if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR) set(Tesseract_FOUND 1) -endif() - - +endif() diff --git a/modules/text/README.md b/modules/text/README.md index a82bef20f..2caf58a1e 100644 --- a/modules/text/README.md +++ b/modules/text/README.md @@ -66,7 +66,7 @@ Instalation of Caffe backend The caffe wrapping backend has the requirements caffe does. * Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises. The simplest solution is to build caffe without support for OpenCV. -* Only the OS supported by Caffe are supported by the backend. +* Only the OS supported by Caffe are supported by the backend. The scripts describing the module have been developed in ubuntu 16.04 and assume such a system. Other UNIX systems including OSX should be easy to adapt. @@ -90,7 +90,7 @@ echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200 +++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200 @@ -234,6 +234,7 @@ - + template friend class Net; + virtual ~Callback(){} @@ -138,7 +138,7 @@ Instalation of Caffe backend The caffe wrapping backend has the requirements caffe does. * Caffe can be built against OpenCV, if the caffe backend is enabled, a circular bependency arises. The simplest solution is to build caffe without support for OpenCV. -* Only the OS supported by Caffe are supported by the backend. +* Only the OS supported by Caffe are supported by the backend. The scripts describing the module have been developed in ubuntu 16.04 and assume such a system. Other UNIX systems including OSX should be easy to adapt. @@ -160,7 +160,7 @@ echo 'LIBRARY_DIRS += /usr/lib/x86_64-linux-gnu/hdf5/serial/' >> Makefile.config echo "--- /tmp/caffe/include/caffe/net.hpp 2017-05-28 04:55:47.929623902 +0200 +++ caffe/distribute/include/caffe/net.hpp 2017-05-28 04:51:33.437090768 +0200 @@ -234,6 +234,7 @@ - + template friend class Net; + virtual ~Callback(){} diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index 8030fcb63..e01a16f72 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -91,7 +91,7 @@ enum ocr_engine_mode }; //base class BaseOCR declares a common API that would be used in a typical text recognition scenario - + class CV_EXPORTS_W BaseOCR { public: @@ -188,7 +188,7 @@ public: /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract. - + @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the system's default directory. @param language an ISO 639-3 code or NULL will default to "eng". @@ -277,8 +277,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR { * for the individual text elements found (e.g. words). * @param component_texts If provided the method will output a list of text - * strings for the recognition of individual text elements found (e.g. words) - * . + * strings for the recognition of individual text elements found (e.g. words). * @param component_confidences If provided the method will output a list of * confidence values for the recognition of individual text elements found @@ -314,8 +313,7 @@ class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR { * for the individual text elements found (e.g. words). * @param component_texts If provided the method will output a list of text - * strings for the recognition of individual text elements found (e.g. words) - * . + * strings for the recognition of individual text elements found (e.g. words). * @param component_confidences If provided the method will output a list of * confidence values for the recognition of individual text elements found @@ -596,34 +594,32 @@ public: int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment) int beam_size = 500); // Size of the beam in Beam Search algorithm - /** @brief This method allows to plug a classifier that is derivative of TextImageClassifier in to - * OCRBeamSearchDecoder as a ClassifierCallback. - @param classifier A pointer to a TextImageClassifier decendent - @param alphabet The language alphabet one char per symbol. alphabet.size() must be equal to the number of classes - of the classifier. In future editinons it should be replaced with a vector of strings. + + + /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path. + + @overload + + @param filename path to a character classifier file + + @param vocabulary The language vocabulary (chars when ASCII English text). vocabulary.size() + must be equal to the number of classes of the classifier.. @param transition_probabilities_table Table with transition probabilities between character - pairs. cols == rows == alphabet.size(). + pairs. cols == rows == vocabulary.size(). @param emission_probabilities_table Table with observation emission probabilities. cols == - rows == alphabet.size(). + rows == vocabulary.size(). @param windowWidth The width of the windows to which the sliding window will be iterated. The height will be the height of the image. The windows might be resized to fit the classifiers input by the classifiers preprocessor. - @param windowStep The step for the sliding window - @param mode HMM Decoding algorithm (only Viterbi for the moment) @param beam_size Size of the beam in Beam Search algorithm - */ - - /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder from the specified path. - - @overload */ CV_WRAP static Ptr create(const String& filename, // The character classifier file diff --git a/modules/text/samples/deeptextdetection.py b/modules/text/samples/deeptextdetection.py index e2f67a3f1..8bc7a6422 100644 --- a/modules/text/samples/deeptextdetection.py +++ b/modules/text/samples/deeptextdetection.py @@ -25,11 +25,10 @@ if (len(sys.argv) < 2): # print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n" # # quit() -# check model and architecture file existance +# check model and architecture file existance if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): print " Model files not found in current directory. Aborting" print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" - quit() cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True); diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp index a41558935..e36015831 100644 --- a/modules/text/samples/textbox_demo.cpp +++ b/modules/text/samples/textbox_demo.cpp @@ -61,7 +61,7 @@ int main(int argc, const char * argv[]){ exit(1); } //set to true if you have a GPU with more than 3GB - cv::text::cnn_config::caffe_backend::setCaffeGpuMode(false); + cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true); if (argc < 3){ std::cout< outProbabillities; textSpotter->textDetectInImage(image,bbox,outProbabillities); // textbox_draw(image, bbox,outProbabillities); - float thres =0.6; + float thres =0.6f; std::vector imageList; for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){ if(outProbabillities[imageIdx]>thres){ diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp index 14cdaeb38..cf3a0c8ba 100644 --- a/modules/text/src/text_detectorCNN.cpp +++ b/modules/text/src/text_detectorCNN.cpp @@ -19,6 +19,9 @@ #ifdef HAVE_CAFFE #include "caffe/caffe.hpp" #endif + +#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__) + namespace cv { namespace text { inline bool fileExists (String filename) { @@ -33,6 +36,9 @@ protected: void process_(Mat inputImage, Mat &outputMat) { // do forward pass and stores the output in outputMat + CV_Assert(outputMat.isContinuous()); + if (inputImage.channels() != this->inputChannelCount_) + CV_WARN("Number of input channel(s) in the model is not same as input"); #ifdef HAVE_CAFFE @@ -204,7 +210,7 @@ Ptr DeepCNNTextDetector::create(String archFilename,String if(preprocessor.empty()) { // create a custom preprocessor with rawval - Ptr preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); + preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); // set the mean for the preprocessor Mat textbox_mean(1,3,CV_8U); @@ -264,4 +270,3 @@ void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output) } } //namespace text namespace cv -