Modified the class heirarchy

8 years ago · be395e5981
parent 2b8ed124f2
commit be395e5981
5 changed files with 509 additions and 56 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -716,10 +716,6 @@ public:
    /** @brief produces a class confidence row-vector given an image
     */
    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
    /** @brief produces a list of bounding box given an image
     */
    CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
     */
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@ -65,7 +65,7 @@ namespace text
 //detection scenario
 class CV_EXPORTS_W BaseDetector
 {
- public:
+public:
    virtual ~BaseDetector() {};
    virtual void run(Mat& image,
@ -78,6 +78,118 @@ class CV_EXPORTS_W BaseDetector
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;
 };
 /** A virtual class for different models of text detection (including CNN based deep models)
 */
 class CV_EXPORTS_W TextRegionDetector
 {
 protected:
    /** Stores input and output size
     */
    //netGeometry inputGeometry_;
    //netGeometry outputGeometry_;
    Size inputGeometry_;
    Size outputGeometry_;
    int inputChannelCount_;
    int outputChannelCount_;
 public:
    virtual ~TextRegionDetector() {}
    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
     */
    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
    /** @brief simple getter method returning the size (height, width) of the input sample
     */
    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
    /** @brief simple getter method returning the shape of the oputput
     *   Any text detector should output a number of text regions alongwith a score of text-ness
     *   From the shape it can be inferred the number of text regions and number of returned value
     *   for each region
     */
    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
 };
 /** Generic structure of Deep CNN based Text Detectors
 * */
 class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
 {
    /** @brief Class that uses a pretrained caffe model for text detection.
     * Any text detection should
     * This network is described in detail in:
     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
     * https://arxiv.org/abs/1611.06779
     */
 protected:
    /** all deep CNN based text detectors have a preprocessor (normally)
         */
    Ptr<ImagePreprocessor> preprocessor_;
    /** @brief all image preprocessing is handled here including whitening etc.
         *
         *  @param input the image to be preprocessed for the classifier. If the depth
         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
         *
         * @param output reference to the image to be fed to the classifier, the preprocessor will
         * resize the image to the apropriate size and convert it to the apropriate depth\
         *
         * The method preprocess should never be used externally, it is up to classify and classifyBatch
         * methods to employ it.
         */
    virtual void preprocess(const Mat& input,Mat& output);
 public:
    virtual ~DeepCNNTextDetector() {};
    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
     *
     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
     *
     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
     *
     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
     *
     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
     *
     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
     * the only option
     */
    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
     *
     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
     * The architecture and models weights can be downloaded from:
     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
     *
     * @param weightsFilename is the path to the pretrained weights of the model. When employing
     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
     *
     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
     * the only option
     */
    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
    friend class ImagePreprocessor;
 };
 /** @brief textDetector class provides the functionallity of text bounding box detection.
 * A TextRegionDetector is employed to find bounding boxes of text
 * words given an input image.
 *
 * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
 * The TextRegionDetector can be any text detector
 *
 */
 class CV_EXPORTS_W textDetector : public BaseDetector
 {
@ -127,7 +239,7 @@ public:
    /** @brief simple getter for the preprocessing functor
     */
-    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
    /** @brief Creates an instance of the textDetector class.
@ -135,7 +247,7 @@ public:
     */
-    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@ -459,53 +459,53 @@ protected:
 #endif
    }
-    void process_(Mat inputImage, Mat &outputMat)
+//    void process_(Mat inputImage, Mat &outputMat)
-    {
+//    {
-        // do forward pass and stores the output in outputMat
+//        // do forward pass and stores the output in outputMat
-        //Process one image
+//        //Process one image
-        CV_Assert(this->minibatchSz_==1);
+//        CV_Assert(this->minibatchSz_==1);
-        //CV_Assert(outputMat.isContinuous());
+//        //CV_Assert(outputMat.isContinuous());
-#ifdef HAVE_CAFFE
+//#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
+//        net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
+//        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
+//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
+//        float* inputData=inputBuffer;
-        std::vector<Mat> input_channels;
+//        std::vector<Mat> input_channels;
-        Mat preprocessed;
+//        Mat preprocessed;
-        // if the image have multiple color channels the input layer should be populated accordingly
+//        // if the image have multiple color channels the input layer should be populated accordingly
-        for (int channel=0;channel < this->channelCount_;channel++){
+//        for (int channel=0;channel < this->channelCount_;channel++){
-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
+//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            input_channels.push_back(netInputWraped);
+//            input_channels.push_back(netInputWraped);
-            //input_data += width * height;
+//            //input_data += width * height;
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
+//        }
-        this->preprocess(inputImage,preprocessed);
+//        this->preprocess(inputImage,preprocessed);
-        split(preprocessed, input_channels);
+//        split(preprocessed, input_channels);
-        //preprocessed.copyTo(netInputWraped);
+//        //preprocessed.copyTo(netInputWraped);
-        this->net_->Forward();
+//        this->net_->Forward();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
+//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
+//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+//        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
-        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
+//        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
-        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
+//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-        float*outputMatData=(float*)(outputMat.data);
+//        float*outputMatData=(float*)(outputMat.data);
-        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
+//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-#endif
+//#endif
-    }
+//    }
@ -587,15 +587,15 @@ public:
        inputImageList.push_back(image.getMat());
        classifyBatch(inputImageList,classProbabilities);
    }
-    void detect(InputArray image, OutputArray Bbox_prob)
+//    void detect(InputArray image, OutputArray Bbox_prob)
-    {
+//    {
-        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
+//        Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
-        Mat outputMat = Bbox_prob.getMat();
+//        Mat outputMat = Bbox_prob.getMat();
-        process_(image.getMat(),outputMat);
+//        process_(image.getMat(),outputMat);
-        //copy back to outputArray
+//        //copy back to outputArray
-        outputMat.copyTo(Bbox_prob);
+//        outputMat.copyTo(Bbox_prob);
-    }
+//    }
    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
    {
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@ -23,6 +23,8 @@
 namespace cv { namespace text {
 class textDetectImpl: public textDetector{
 private:
    struct NetOutput{
@ -60,9 +62,9 @@ private:
    };
 protected:
-    Ptr<TextImageClassifier> classifier_;
+    Ptr<TextRegionDetector> classifier_;
 public:
-    textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
+    textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
    {
    }
@ -131,13 +133,13 @@ public:
-    Ptr<TextImageClassifier> getClassifier()
+    Ptr<TextRegionDetector> getClassifier()
    {
        return this->classifier_;
    }
 };
-Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
+Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
 {
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei
    textbox_mean.at<uchar>(0,2)=123;
    preprocessor->set_mean(textbox_mean);
 // create a pointer to text box detector(textDetector)
-    Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
+    Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@ -0,0 +1,343 @@
 #include "precomp.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/core.hpp"
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <queue>
 #include <algorithm>
 #include <iosfwd>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 #ifdef HAVE_CAFFE
 #include "caffe/caffe.hpp"
 #endif
 namespace cv { namespace text {
 inline bool fileExists (String filename) {
    std::ifstream f(filename.c_str());
    return f.good();
 }
 //************************************************************************************
 //******************   TextImageClassifier   *****************************************
 //************************************************************************************
 //void TextImageClassifier::preprocess(const Mat& input,Mat& output)
 //{
 //    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
 //}
 //void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
 //{
 //    CV_Assert(!ptr.empty());
 //    preprocessor_=ptr;
 //}
 //Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
 //{
 //    return preprocessor_;
 //}
 class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
 protected:
    void process_(Mat inputImage, Mat &outputMat)
    {
        // do forward pass and stores the output in outputMat
        //Process one image
        // CV_Assert(this->outputGeometry_.batchSize==1);
        //CV_Assert(outputMat.isContinuous());
 #ifdef HAVE_CAFFE
        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
        net_->Reshape();
        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
        float* inputData=inputBuffer;
        std::vector<Mat> input_channels;
        Mat preprocessed;
        // if the image have multiple color channels the input layer should be populated accordingly
        for (int channel=0;channel < this->inputChannelCount_;channel++){
            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
            input_channels.push_back(netInputWraped);
            //input_data += width * height;
            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
        }
        this->preprocess(inputImage,preprocessed);
        split(preprocessed, input_channels);
        //preprocessed.copyTo(netInputWraped);
        this->net_->Forward();
        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
        this->outputGeometry_.height = net_->output_blobs()[0]->height();
        this->outputGeometry_.width = net_->output_blobs()[0]->width();
        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
        float*outputMatData=(float*)(outputMat.data);
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
 #endif
    }
 #ifdef HAVE_CAFFE
    Ptr<caffe::Net<float> > net_;
 #endif
    //Size inputGeometry_;
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    //int outputSize_;
 public:
    DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn):
        minibatchSz_(dn.minibatchSz_){
        outputGeometry_=dn.outputGeometry_;
        inputGeometry_=dn.inputGeometry_;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
 #ifdef HAVE_CAFFE
        this->net_=dn.net_;
 #endif
    }
    DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn)
    {
 #ifdef HAVE_CAFFE
        this->net_=dn.net_;
 #endif
        this->setPreprocessor(dn.preprocessor_);
        this->inputGeometry_=dn.inputGeometry_;
        this->inputChannelCount_=dn.inputChannelCount_;
        this->outputChannelCount_ = dn.outputChannelCount_;
        // this->minibatchSz_=dn.minibatchSz_;
        //this->outputGeometry_=dn.outputSize_;
        this->preprocessor_=dn.preprocessor_;
        this->outputGeometry_=dn.outputGeometry_;
        return *this;
        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
    }
    DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
        :minibatchSz_(maxMinibatchSz)
    {
        CV_Assert(this->minibatchSz_>0);
        CV_Assert(fileExists(modelArchFilename));
        CV_Assert(fileExists(modelWeightsFilename));
        CV_Assert(!preprocessor.empty());
        this->setPreprocessor(preprocessor);
 #ifdef HAVE_CAFFE
        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
        CV_Assert(net_->num_inputs()==1);
        CV_Assert(net_->num_outputs()==1);
        CV_Assert(this->net_->input_blobs()[0]->channels()==1
                ||this->net_->input_blobs()[0]->channels()==3);
        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
        this->inputGeometry_.height = inputLayer->height();
        this->inputGeometry_.width = inputLayer->width();
        this->inputChannelCount_ = inputLayer->channels();
        //this->inputGeometry_.batchSize =1;
        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
        net_->Reshape();
        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
        //this->outputGeometry_.batchSize =1;
        this->outputGeometry_.height =net_->output_blobs()[0]->height();
        this->outputGeometry_.width = net_->output_blobs()[0]->width();
 #else
        CV_Error(Error::StsError,"Caffe not available during compilation!");
 #endif
    }
    void detect(InputArray image, OutputArray Bbox_prob)
    {
        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
        Mat outputMat = Bbox_prob.getMat();
        process_(image.getMat(),outputMat);
        //copy back to outputArray
        outputMat.copyTo(Bbox_prob);
    }
    //int getOutputSize()
    //{
    //  return this->outputSize_;
    //}
    Size getOutputGeometry()
    {
        return this->outputGeometry_;
    }
    Size getinputGeometry()
    {
        return this->inputGeometry_;
    }
    int getMinibatchSize()
    {
        return this->minibatchSz_;
    }
    int getBackend()
    {
        return OCR_HOLISTIC_BACKEND_CAFFE;
    }
    void setPreprocessor(Ptr<ImagePreprocessor> ptr)
    {
        CV_Assert(!ptr.empty());
        preprocessor_=ptr;
    }
    Ptr<ImagePreprocessor> getPreprocessor()
    {
        return preprocessor_;
    }
 };
 Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
 {
    if(preprocessor.empty())
    {
        // create a custom preprocessor with rawval
        Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
        // set the mean for the preprocessor
        Mat textbox_mean(1,3,CV_8U);
        textbox_mean.at<uchar>(0,0)=104;
        textbox_mean.at<uchar>(0,1)=117;
        textbox_mean.at<uchar>(0,2)=123;
        preprocessor->set_mean(textbox_mean);
    }
    switch(backEnd){
    case OCR_HOLISTIC_BACKEND_CAFFE:
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
        break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default:
        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
        break;
    }
    return Ptr<DeepCNNTextDetector>();
 }
 Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd)
 {
    // create a custom preprocessor with rawval
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
    // set the mean for the preprocessor
    Mat textbox_mean(1,3,CV_8U);
    textbox_mean.at<uchar>(0,0)=104;
    textbox_mean.at<uchar>(0,1)=117;
    textbox_mean.at<uchar>(0,2)=123;
    preprocessor->set_mean(textbox_mean);
    switch(backEnd){
    case OCR_HOLISTIC_BACKEND_CAFFE:
        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
        break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default:
        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
        break;
    }
    return Ptr<DeepCNNTextDetector>();
 }
 void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
 {
    Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width);
    this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
 }
 //namespace cnn_config{
 //namespace caffe_backend{
 //#ifdef HAVE_CAFFE
 //bool getCaffeGpuMode()
 //{
 //    return caffe::Caffe::mode()==caffe::Caffe::GPU;
 //}
 //void setCaffeGpuMode(bool useGpu)
 //{
 //    if(useGpu)
 //    {
 //        caffe::Caffe::set_mode(caffe::Caffe::GPU);
 //    }else
 //    {
 //        caffe::Caffe::set_mode(caffe::Caffe::CPU);
 //    }
 //}
 //bool getCaffeAvailable()
 //{
 //    return true;
 //}
 //#else
 //bool getCaffeGpuMode()
 //{
 //    CV_Error(Error::StsError,"Caffe not available during compilation!");
 //    return 0;
 //}
 //void setCaffeGpuMode(bool useGpu)
 //{
 //    CV_Error(Error::StsError,"Caffe not available during compilation!");
 //    CV_Assert(useGpu==1);//Compilation directives force
 //}
 //bool getCaffeAvailable(){
 //    return 0;
 //}
 //#endif
 //}//namespace caffe
 //}//namespace cnn_config
 }  } //namespace text namespace cv