added calculation of output size

8 years ago · c697e41b8d
parent a2cab07193
commit c697e41b8d
6 changed files with 107 additions and 136 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -861,6 +861,15 @@ public:
 };

 namespace cnn_config{
+
+/** @brief runtime backend information
+ *
+ * this function finds the status of backends compiled with this module
+ *
+ * @return a list of backends (caffe,opencv-dnn etc.)
+ * */
+CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
+
 namespace caffe_backend{

 /** @brief Prompts Caffe on the computation device beeing used
@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 CV_EXPORTS_W bool getCaffeAvailable();

 }//caffe
+namespace dnn_backend {
+
+/** @brief Provides runtime information on whether DNN module was compiled in.
+ *
+ * The text module API is the same regardless of whether DNN module was available or not
+ * During compilation. When methods that require backend are invocked while no backend support
+ * is compiled, exceptions are thrown. This method allows to test whether the
+ * text module was built with dnn_backend during runtime.
+ *
+ * @return true if opencv_dnn support for the the text module was provided during compilation,
+ * false if opencv_dnn was unavailable.
+ */
+CV_EXPORTS_W bool getDNNAvailable();
+
+}//dnn_backend
 }//cnn_config

 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@ -56,7 +56,7 @@ namespace cv
 namespace text
 {

-//! @addtogroup text_recognize
+//! @addtogroup text_detect
 //! @{


@ -263,7 +263,7 @@ public:

 };

-
+//! @}
 }//namespace text
 }//namespace cv

--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
        //exit(1);
    }
+    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+    std::cout << "The Following backends are available" << "\n";
+    for (int i=0;i<backends.size();i++)
+       std::cout << backends[i] << "\n";
+
+   // printf("%s",x);
    //set to true if you have a GPU with more than 3GB
     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
    }
    // call dict net here for all detected parts
    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);

    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
        cv::Point tl_ = bbox.at(i).tl();
        cv::Point br_ = bbox.at(i).br();

-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;

    }
    out.close();
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@ -122,6 +122,7 @@ protected:
    //void set_mean_(Mat M){}

    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
@ -433,6 +434,7 @@ protected:
        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
        CV_Assert(outputMat.isContinuous());

+
 #ifdef HAVE_CAFFE
        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
        net_->Reshape();
@ -450,16 +452,19 @@ protected:
                input_channels.push_back(netInputWraped);
                //input_data += width * height;
                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+
            }
            this->preprocess(inputImageList[imgNum],preprocessed);
            split(preprocessed, input_channels);

+
        }
        this->net_->ForwardPrefilled();
        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;

+
        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
        float*outputMatData=(float*)(outputMat.data);
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@ -470,9 +475,10 @@ protected:
 #ifdef HAVE_CAFFE
    Ptr<caffe::Net<float> > net_;
 #endif
-    //Size inputGeometry_;
+    //Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
+    //Size outputGeometry_;
 public:
    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@ -608,7 +614,7 @@ protected:
            preProcessedImList.push_back(preprocessed);
        }
        // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");

        float*outputMatData=(float*)(outputMat.data);
       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@ -625,9 +631,16 @@ protected:
 #ifdef HAVE_DNN
    Ptr<Net> net_;
 #endif
-    //Size inputGeometry_;
+    // hard coding input image size. anything in DNN library to get that from prototxt??
+   // Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
+    //Size outputGeometry_;//= Size(1,1);
+    //int channelCount_;
+   // int inputChannel_ ;//=1;
+    const int _inputHeight =32;
+    const int _inputWidth =100;
+    const int _inputChannel =1;
 public:
    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@ -678,33 +691,17 @@ public:
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
-// find a wa to check the followings in cv::dnn ???
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-

-        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);

-        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-        //inputLayerId = net_->getLayerId('data');
-
-      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
-       //                                     inputLayerId,
-      //                                      std::vector<MatShape>* inLayerShapes,
-      //  std::vector<MatShape>* outLayerShapes) const;
-        // should not be hard coded ideally
-
-        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = 1;//inputLayer->channels();
+        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = _inputChannel;//inputLayer->channels();

        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        //net_->Reshape();
-        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
-        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
+        //std::vector<Mat> blobs = outLayer->blobs;
+
+        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
+        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());



@ -732,7 +729,7 @@ public:
        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
        Mat outputMat = classProbabilities.getMat();
-        printf("ekhane");
+
        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
        {
            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 }

 namespace cnn_config{
+std::vector<std::string> getAvailableBackends()
+{
+    std::vector<std::string> backends;
+
+#ifdef HAVE_CAFFE
+    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
+
+#endif
+#ifdef HAVE_DNN
+    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
+#endif
+    return backends;
+
+
+}
+
 namespace caffe_backend{

 #ifdef HAVE_CAFFE
@ -856,7 +869,7 @@ bool getCaffeAvailable()
 {
    return true;
 }
-#elif defined(HAVE_DNN)
+#else

 bool getCaffeGpuMode()
 {
@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
 bool getCaffeAvailable(){
    return 0;
 }
-bool getDNNAvailable(){
-    return true;
-}

+#endif

-#else
+}//namespace caffe
+namespace dnn_backend{
+#ifdef  HAVE_DNN

-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}

-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
+bool getDNNAvailable(){
+    return true;
 }
-
-bool getCaffeAvailable(){
+#else
+bool getDNNAvailable(){
    return 0;
 }
-
 #endif
-
-}//namespace caffe
+}//namspace dnn_backend
 }//namespace cnn_config

 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@ -931,6 +935,7 @@ private:
            getOutputs(buffer,nbOutputs,tmp);
            classNum=tmp[0].wordIdx;
            confidence=tmp[0].probabillity;
+
        }
    };
 protected:
@ -972,6 +977,7 @@ public:
    {
        Mat netOutput;
        this->classifier_->classifyBatch(inputImageList,netOutput);
+
        for(int k=0;k<netOutput.rows;k++)
        {
            int classNum;
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@ -16,9 +16,9 @@
 #include <vector>


-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
+//#ifdef HAVE_CAFFE
+//#include "caffe/caffe.hpp"
+//#endif

 namespace cv { namespace text {

--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@ -225,75 +225,25 @@ protected:

 #ifdef HAVE_DNN

-        //std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
-
        Mat preprocessed;
        this->preprocess(inputImage,preprocessed);
-        printf("After preprocess");
-        // preprocesses each image in the inputImageList and push to preprocessedImList
-//        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-//        {
-//            this->preprocess(inputImageList[imgNum],preprocessed);
-//            preProcessedImList.push_back(preprocessed);
-//        }
-        // set input data blob in dnn::net
-        //Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
-        //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
-        net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
-        printf("Input layer");
-
-
-       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
-       Mat outputNet = this->net_->forward( );//"mbox_priorbox");
-       printf("After forward");
-       //outputNet = outputNet.reshape(1, 1);
+
+        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
+
+       Mat outputNet = this->net_->forward( );
+
       this->outputGeometry_.height = outputNet.size[2];
       this->outputGeometry_.width = outputNet.size[3];
       this->outputChannelCount_ = outputNet.size[1];
-       printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
+
       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
        float*outputMatData=(float*)(outputMat.data);
       float*outputNetData=(float*)(outputNet.data);
       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;

       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-//        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-//        net_->Reshape();
-//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-//        float* inputData=inputBuffer;
-
-//        std::vector<Mat> input_channels;
-//        Mat preprocessed;
-//        // if the image have multiple color channels the input layer should be populated accordingly
-//        for (int channel=0;channel < this->inputChannelCount_;channel++){
-
-//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-//            input_channels.push_back(netInputWraped);
-//            //input_data += width * height;
-//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-//        }
-//        this->preprocess(inputImage,preprocessed);
-//        split(preprocessed, input_channels);
-
-//        //preprocessed.copyTo(netInputWraped);


-//        this->net_->Forward();
-//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-
-
-
-
-//        this->outputGeometry_.height = net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-//        float*outputMatData=(float*)(outputMat.data);
-
-//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-


 #endif
@ -307,6 +257,9 @@ protected:
    //Size inputGeometry_;
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    //int outputSize_;
+    const int _inputHeight =700;
+    const int _inputWidth =700;
+    const int _inputChannel =3;
 public:
    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_){
@ -355,28 +308,10 @@ public:
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
-//        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-//        this->inputGeometry_.height = inputLayer->height();
-//        this->inputGeometry_.width = inputLayer->width();
-//        this->inputChannelCount_ = inputLayer->channels();
-//        //this->inputGeometry_.batchSize =1;
-
-//        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-//        net_->Reshape();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        //this->outputGeometry_.batchSize =1;
-//        this->outputGeometry_.height =net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-        this->inputGeometry_.height =700;
-        this->inputGeometry_.width = 700 ;//inputLayer->width();
-        this->inputChannelCount_ = 3 ;//inputLayer->channels();
+
+        this->inputGeometry_.height =_inputHeight;
+        this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
+        this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();

 #else
        CV_Error(Error::StsError,"DNN module not available during compilation!");
@ -389,7 +324,7 @@ public:
        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
        Mat outputMat = Bbox_prob.getMat();
-        printf("calling");
+
        process_(image.getMat(),outputMat);
        //copy back to outputArray
        outputMat.copyTo(Bbox_prob);
@ -487,20 +422,20 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
    case OCR_HOLISTIC_BACKEND_DEFAULT:

 #ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));

 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
 #else
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
 #endif
        break;
    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
        break;
    case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
         break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default: