Minor modification

8 years ago · fc9c41b8d4
parent 40db962641
commit fc9c41b8d4
2 changed files with 16 additions and 489 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -722,6 +722,8 @@ public:
    /** @brief simple getter method returning the size of the oputput row-vector
     */
    CV_WRAP virtual int getOutputSize()=0;
+    /** @brief simple getter method returning the shape of the oputput from caffe
+     */
    CV_WRAP virtual Size getOutputGeometry()=0;

    /** @brief simple getter method returning the size of the minibatches for this classifier.
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@ -22,468 +22,6 @@

 namespace cv { namespace text {

-//Maybe OpenCV has a routine better suited
-//inline bool fileExists (String filename) {
-//    std::ifstream f(filename.c_str());
-//    return f.good();
-//}
-
-//************************************************************************************
-//******************   ImagePreprocessor   *******************************************
-//************************************************************************************
-
-/*void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){
-    Mat inpImg=input.getMat();
-    Mat outImg;
-    this->preprocess_(inpImg,outImg,sz,outputChannels);
-    outImg.copyTo(output);
-}*/
-
-
-/*class ResizerPreprocessor: public ImagePreprocessor{
-protected:
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1){
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U){
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-    }
-public:
-    ResizerPreprocessor(){}
-    ~ResizerPreprocessor(){}
-};
-
-class StandarizerPreprocessor: public ImagePreprocessor{
-protected:
-    double sigma_;
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        Scalar dev,mean;
-        meanStdDev(output,mean,dev);
-        subtract(output,mean[0],output);
-        divide(output,(dev[0]/sigma_),output);
-    }
-public:
-    StandarizerPreprocessor(double sigma):sigma_(sigma){}
-    ~StandarizerPreprocessor(){}
-};
-
-class MeanSubtractorPreprocessor: public ImagePreprocessor{
-protected:
-    Mat mean_;
-    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
-        //TODO put all the logic of channel and depth conversions in ImageProcessor class
-        CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height);
-        CV_Assert(outputChannels==1 || outputChannels==3);
-        CV_Assert(input.channels()==1 || input.channels()==3);
-        if(input.channels()!=outputChannels)
-        {
-            Mat tmpInput;
-            if(outputChannels==1)
-            {
-                cvtColor(input,tmpInput,COLOR_BGR2GRAY);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                cvtColor(input,tmpInput,COLOR_GRAY2BGR);
-                if(input.depth()==CV_8U)
-                {
-                    tmpInput.convertTo(output,CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    tmpInput.convertTo(output, CV_32FC3);
-                }
-            }
-        }else
-        {
-            if(input.channels()==1)
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC1,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC1);
-                }
-            }else
-            {
-                if(input.depth()==CV_8U)
-                {
-                    input.convertTo(output, CV_32FC3,1/255.0);
-                }else
-                {//Assuming values are at the desired [0,1] range
-                    input.convertTo(output, CV_32FC3);
-                }
-            }
-        }
-        if(outputSize.width!=0 && outputSize.height!=0)
-        {
-            resize(output,output,outputSize);
-        }
-        subtract(output,this->mean_,output);
-    }
-public:
-    MeanSubtractorPreprocessor(Mat mean)
-    {
-        mean.copyTo(this->mean_);
-    }
-
-    ~MeanSubtractorPreprocessor(){}
-};
-
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createResizer()
-{
-    return Ptr<ImagePreprocessor>(new ResizerPreprocessor);
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma)
-{
-    return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma));
-}
-
-Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg)
-{
-    Mat tmp=meanImg.getMat();
-    return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp));
-}
-
-//************************************************************************************
-//******************   TextImageClassifier   *****************************************
-//************************************************************************************
-
-void TextImageClassifier::preprocess(const Mat& input,Mat& output)
-{
-    this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
-}
-
-void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
-{
-    CV_Assert(!ptr.empty());
-    preprocessor_=ptr;
-}
-
-Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
-{
-    return preprocessor_;
-}*/
-
-/*
-class DeepCNNCaffeImpl: public DeepCNN{
-protected:
-    void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat)
-    {
-        //Classifies a list of images containing at most minibatchSz_ images
-        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
-        CV_Assert(outputMat.isContinuous());
-#ifdef HAVE_CAFFE
-        net_->input_blobs()[0]->Reshape(inputImageList.size(), 1,this->inputGeometry_.height,this->inputGeometry_.width);
-        net_->Reshape();
-        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-        float* inputData=inputBuffer;
-        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-        {
-            Mat preprocessed;
-            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-            this->preprocess(inputImageList[imgNum],preprocessed);
-            preprocessed.copyTo(netInputWraped);
-            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-        }
-        this->net_->ForwardPrefilled();
-        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-        float*outputMatData=(float*)(outputMat.data);
-        memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size());
-#endif
-    }
-
-#ifdef HAVE_CAFFE
-    Ptr<caffe::Net<float> > net_;
-#endif
-    //Size inputGeometry_;
-    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
-    int outputSize_;
-public:
-    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
-        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
-        channelCount_=dn.channelCount_;
-        inputGeometry_=dn.inputGeometry_;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-    }
-    DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn)
-    {
-#ifdef HAVE_CAFFE
-        this->net_=dn.net_;
-#endif
-        this->setPreprocessor(dn.preprocessor_);
-        this->inputGeometry_=dn.inputGeometry_;
-        this->channelCount_=dn.channelCount_;
-        this->minibatchSz_=dn.minibatchSz_;
-        this->outputSize_=dn.outputSize_;
-        this->preprocessor_=dn.preprocessor_;
-        return *this;
-        //Implemented to supress Visual Studio warning "assignment operator could not be generated"
-    }
-
-    DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
-        :minibatchSz_(maxMinibatchSz)
-    {
-        CV_Assert(this->minibatchSz_>0);
-        CV_Assert(fileExists(modelArchFilename));
-        CV_Assert(fileExists(modelWeightsFilename));
-        CV_Assert(!preprocessor.empty());
-        this->setPreprocessor(preprocessor);
-#ifdef HAVE_CAFFE
-        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-        CV_Assert(net_->num_inputs()==1);
-        CV_Assert(net_->num_outputs()==1);
-        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-                ||this->net_->input_blobs()[0]->channels()==3);
-        this->channelCount_=this->net_->input_blobs()[0]->channels();
-        this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-        caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-        this->inputGeometry_=Size(inputLayer->width(), inputLayer->height());
-        inputLayer->Reshape(this->minibatchSz_,1,this->inputGeometry_.height, this->inputGeometry_.width);
-        net_->Reshape();
-        this->outputSize_=net_->output_blobs()[0]->channels();
-
-#else
-        CV_Error(Error::StsError,"Caffe not available during compilation!");
-#endif
-    }
-
-    void classify(InputArray image, OutputArray classProbabilities)
-    {
-        std::vector<Mat> inputImageList;
-        inputImageList.push_back(image.getMat());
-        classifyBatch(inputImageList,classProbabilities);
-    }
-
-    void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
-    {
-        std::vector<Mat> allImageVector;
-        inputImageList.getMatVector(allImageVector);
-        size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
-        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
-        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
-        Mat outputMat = classProbabilities.getMat();
-        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
-        {
-            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
-            std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum);
-            std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd);
-            std::vector<Mat> minibatchInput(from,to);
-            classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd)));
-        }
-    }
-
-    int getOutputSize()
-    {
-        return this->outputSize_;
-    }
-
-    int getMinibatchSize()
-    {
-        return this->minibatchSz_;
-    }
-
-    int getBackend()
-    {
-        return OCR_HOLISTIC_BACKEND_CAFFE;
-    }
-};
-
-
-Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
-{
-    if(preprocessor.empty())
-    {
-        preprocessor=ImagePreprocessor::createResizer();
-    }
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-
-Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd)
-{
-    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113);
-    switch(backEnd){
-    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
-        break;
-    case OCR_HOLISTIC_BACKEND_NONE:
-    default:
-        CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
-        return Ptr<DeepCNN>();
-        break;
-    }
-}
-
-namespace cnn_config{
-namespace caffe_backend{
-
-#ifdef HAVE_CAFFE
-
-bool getCaffeGpuMode()
-{
-    return caffe::Caffe::mode()==caffe::Caffe::GPU;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    if(useGpu)
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::GPU);
-    }else
-    {
-        caffe::Caffe::set_mode(caffe::Caffe::CPU);
-    }
-}
-
-bool getCaffeAvailable()
-{
-    return true;
-}
-
-#else
-
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
-
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
-}
-
-bool getCaffeAvailable(){
-    return 0;
-}
-
-#endif
-
-}//namespace caffe
-}//namespace cnn_config
-*/

 class textDetectImpl: public textDetector{
 private:
@ -493,10 +31,6 @@ private:
        Rect bbox;
        float probability;

-//        static bool sorter(const NetOutput& o1,const NetOutput& o2)
-//        {//used with std::sort to provide the most probable class
-//            return o1.probabillity>o2.probabillity;
-//        }

        static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
        {
@ -516,22 +50,16 @@ private:
                float ht = y_max-y_min+1;

                res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
-               // printf("%f %f %f %f\n",buffer[k*nCol+3],buffer[k*nCol+4],buffer[k*nCol+5],buffer[k*nCol+6]);
+
                res[k].probability=buffer[k*nCol+2];
            }
-//            std::sort(res.begin(),res.end(),NetOutput::sorter);
+
        }

-//        static void getDetections(const float* buffer,int nbOutputs,int &classNum,double& confidence)
-//        {
-//            std::vector<NetOutput> tmp;
-//            getOutputs(buffer,nbOutputs,tmp);
-//            classNum=tmp[0].wordIdx;
-//            confidence=tmp[0].probabillity;
-//        }
+
    };
 protected:
-    //std::vector<String> labels_;
+
    Ptr<TextImageClassifier> classifier_;
 public:
    textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
@ -544,25 +72,24 @@ public:
    void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
    {
                Mat netOutput;
-                //std::cout<<"started detect"<<std::endl;
+                // call the detect function of deepCNN class
                this->classifier_->detect(inputImage,netOutput);
-                //std::cout<<"After Detect"<<std::endl;
+               // get the output geometry i.e height and width of output blob from caffe
                Size OutputGeometry_ = this->classifier_->getOutputGeometry();
                int nbrTextBoxes = OutputGeometry_.height;
                int nCol = OutputGeometry_.width;
-                //std::cout<<nbrTextBoxes<<std::endl;
+
                std::vector<NetOutput> tmp;
+                // the output bounding box needs to be resized by the input height and width
                Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
                NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
-                //Bbox.resize(nbrTextBoxes);
-                //confidence.resize(nbrTextBoxes);
+                // put the output in CV_OUT
+
                for (int k=0;k<nbrTextBoxes;k++)
                {
                    Bbox.push_back(tmp[k].bbox);
                    confidence.push_back(tmp[k].probability);
                }
-                //Bbox = netOutput.data;
-                //confidence = netOutput.data;

     }

@ -602,10 +129,7 @@ public:
        this->run(image,component_rects,component_confidences,component_level);
    }

-//    std::vector<String>& getVocabulary()
-//    {
-//        return this->labels_;
-//    }
+

    Ptr<TextImageClassifier> getClassifier()
    {
@ -621,15 +145,16 @@ Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
 Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
 {

-
+// create a custom preprocessor with rawval
    Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
+// set the mean for the preprocessor

    Mat textbox_mean(1,3,CV_8U);
    textbox_mean.at<uchar>(0,0)=104;
    textbox_mean.at<uchar>(0,1)=117;
    textbox_mean.at<uchar>(0,2)=123;
    preprocessor->set_mean(textbox_mean);
-
+// create a pointer to text box detector(textDetector)
    Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
    return Ptr<textDetector>(new textDetectImpl(classifierPtr));
 }