added calculation of output size

8 years ago · c697e41b8d
parent a2cab07193
commit c697e41b8d
6 changed files with 107 additions and 136 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@ -861,6 +861,15 @@ public:
 };
 namespace cnn_config{
 /** @brief runtime backend information
 *
 * this function finds the status of backends compiled with this module
 *
 * @return a list of backends (caffe,opencv-dnn etc.)
 * */
 CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
 namespace caffe_backend{
 /** @brief Prompts Caffe on the computation device beeing used
@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 CV_EXPORTS_W bool getCaffeAvailable();
 }//caffe
 namespace dnn_backend {
 /** @brief Provides runtime information on whether DNN module was compiled in.
 *
 * The text module API is the same regardless of whether DNN module was available or not
 * During compilation. When methods that require backend are invocked while no backend support
 * is compiled, exceptions are thrown. This method allows to test whether the
 * text module was built with dnn_backend during runtime.
 *
 * @return true if opencv_dnn support for the the text module was provided during compilation,
 * false if opencv_dnn was unavailable.
 */
 CV_EXPORTS_W bool getDNNAvailable();
 }//dnn_backend
 }//cnn_config
 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@ -56,7 +56,7 @@ namespace cv
 namespace text
 {
-//! @addtogroup text_recognize
+//! @addtogroup text_detect
 //! @{
@ -263,7 +263,7 @@ public:
 };
-
+//! @}
 }//namespace text
 }//namespace cv
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
        std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
        //exit(1);
    }
    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
    std::cout << "The Following backends are available" << "\n";
    for (int i=0;i<backends.size();i++)
       std::cout << backends[i] << "\n";
   // printf("%s",x);
    //set to true if you have a GPU with more than 3GB
     if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
    cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
    }
    // call dict net here for all detected parts
    cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
    cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
            cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
        cv::Point tl_ = bbox.at(i).tl();
        cv::Point br_ = bbox.at(i).br();
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
    }
    out.close();
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@ -122,6 +122,7 @@ protected:
    //void set_mean_(Mat M){}
    void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
        //TODO put all the logic of channel and depth conversions in ImageProcessor class
        CV_Assert(outputChannels==1 || outputChannels==3);
        CV_Assert(input.channels()==1 || input.channels()==3);
@ -433,6 +434,7 @@ protected:
        CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
        CV_Assert(outputMat.isContinuous());
 #ifdef HAVE_CAFFE
        net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
        net_->Reshape();
@ -450,16 +452,19 @@ protected:
                input_channels.push_back(netInputWraped);
                //input_data += width * height;
                inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
            }
            this->preprocess(inputImageList[imgNum],preprocessed);
            split(preprocessed, input_channels);
        }
        this->net_->ForwardPrefilled();
        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
        this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
        int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
        //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
        float*outputMatData=(float*)(outputMat.data);
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@ -470,9 +475,10 @@ protected:
 #ifdef HAVE_CAFFE
    Ptr<caffe::Net<float> > net_;
 #endif
-    //Size inputGeometry_;
+    //Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
    //Size outputGeometry_;
 public:
    DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@ -608,7 +614,7 @@ protected:
            preProcessedImList.push_back(preprocessed);
        }
        // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
        float*outputMatData=(float*)(outputMat.data);
       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@ -625,9 +631,16 @@ protected:
 #ifdef HAVE_DNN
    Ptr<Net> net_;
 #endif
-    //Size inputGeometry_;
+    // hard coding input image size. anything in DNN library to get that from prototxt??
   // Size inputGeometry_;//=Size(100,32);
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    int outputSize_;
    //Size outputGeometry_;//= Size(1,1);
    //int channelCount_;
   // int inputChannel_ ;//=1;
    const int _inputHeight =32;
    const int _inputWidth =100;
    const int _inputChannel =1;
 public:
    DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@ -678,33 +691,17 @@ public:
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
 // find a wa to check the followings in cv::dnn ???
 //        CV_Assert(net_->num_inputs()==1);
 //        CV_Assert(net_->num_outputs()==1);
 //        CV_Assert(this->net_->input_blobs()[0]->channels()==1
 //                ||this->net_->input_blobs()[0]->channels()==3);
 //        this->channelCount_=this->net_->input_blobs()[0]->channels();
        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
-        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
+        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
-        //inputLayerId = net_->getLayerId('data');
+        this->channelCount_ = _inputChannel;//inputLayer->channels();
      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
       //                                     inputLayerId,
      //                                      std::vector<MatShape>* inLayerShapes,
      //  std::vector<MatShape>* outLayerShapes) const;
        // should not be hard coded ideally
        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
        this->channelCount_ = 1;//inputLayer->channels();
        //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        //net_->Reshape();
+        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
-        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
+        //std::vector<Mat> blobs = outLayer->blobs;
-        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+
        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
@ -732,7 +729,7 @@ public:
        size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
        classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
        Mat outputMat = classProbabilities.getMat();
-        printf("ekhane");
+
        for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
        {
            size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 }
 namespace cnn_config{
 std::vector<std::string> getAvailableBackends()
 {
    std::vector<std::string> backends;
 #ifdef HAVE_CAFFE
    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
 #endif
 #ifdef HAVE_DNN
    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
 #endif
    return backends;
 }
 namespace caffe_backend{
 #ifdef HAVE_CAFFE
@ -856,7 +869,7 @@ bool getCaffeAvailable()
 {
    return true;
 }
-#elif defined(HAVE_DNN)
+#else
 bool getCaffeGpuMode()
 {
@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
 bool getCaffeAvailable(){
    return 0;
 }
 bool getDNNAvailable(){
    return true;
 }
 #endif
-#else
+}//namespace caffe
 namespace dnn_backend{
 #ifdef  HAVE_DNN
 bool getCaffeGpuMode()
 {
    CV_Error(Error::StsError,"Caffe not available during compilation!");
    return 0;
 }
-void setCaffeGpuMode(bool useGpu)
+bool getDNNAvailable(){
-{
+    return true;
    CV_Error(Error::StsError,"Caffe not available during compilation!");
    CV_Assert(useGpu==1);//Compilation directives force
 }
-
+#else
-bool getCaffeAvailable(){
+bool getDNNAvailable(){
    return 0;
 }
 #endif
-
+}//namspace dnn_backend
 }//namespace caffe
 }//namespace cnn_config
 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@ -931,6 +935,7 @@ private:
            getOutputs(buffer,nbOutputs,tmp);
            classNum=tmp[0].wordIdx;
            confidence=tmp[0].probabillity;
        }
    };
 protected:
@ -972,6 +977,7 @@ public:
    {
        Mat netOutput;
        this->classifier_->classifyBatch(inputImageList,netOutput);
        for(int k=0;k<netOutput.rows;k++)
        {
            int classNum;
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@ -16,9 +16,9 @@
 #include <vector>
-#ifdef HAVE_CAFFE
+//#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
+//#include "caffe/caffe.hpp"
-#endif
+//#endif
 namespace cv { namespace text {
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@ -225,75 +225,25 @@ protected:
 #ifdef HAVE_DNN
        //std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
        Mat preprocessed;
        this->preprocess(inputImage,preprocessed);
-        printf("After preprocess");
+
-        // preprocesses each image in the inputImageList and push to preprocessedImList
+        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
-//        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
+
-//        {
+       Mat outputNet = this->net_->forward( );
-//            this->preprocess(inputImageList[imgNum],preprocessed);
+
 //            preProcessedImList.push_back(preprocessed);
 //        }
        // set input data blob in dnn::net
        //Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
        //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
        net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
        printf("Input layer");
       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
       Mat outputNet = this->net_->forward( );//"mbox_priorbox");
       printf("After forward");
       //outputNet = outputNet.reshape(1, 1);
       this->outputGeometry_.height = outputNet.size[2];
       this->outputGeometry_.width = outputNet.size[3];
       this->outputChannelCount_ = outputNet.size[1];
-       printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
+
       outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
        float*outputMatData=(float*)(outputMat.data);
       float*outputNetData=(float*)(outputNet.data);
       int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
       memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
 //        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
 //        net_->Reshape();
 //        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
 //        float* inputData=inputBuffer;
 //        std::vector<Mat> input_channels;
 //        Mat preprocessed;
 //        // if the image have multiple color channels the input layer should be populated accordingly
 //        for (int channel=0;channel < this->inputChannelCount_;channel++){
 //            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
 //            input_channels.push_back(netInputWraped);
 //            //input_data += width * height;
 //            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
 //        }
 //        this->preprocess(inputImage,preprocessed);
 //        split(preprocessed, input_channels);
 //        //preprocessed.copyTo(netInputWraped);
 //        this->net_->Forward();
 //        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
 //        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
 //        this->outputGeometry_.height = net_->output_blobs()[0]->height();
 //        this->outputGeometry_.width = net_->output_blobs()[0]->width();
 //        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
 //        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
 //        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
 //        float*outputMatData=(float*)(outputMat.data);
 //        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
 #endif
@ -307,6 +257,9 @@ protected:
    //Size inputGeometry_;
    int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
    //int outputSize_;
    const int _inputHeight =700;
    const int _inputWidth =700;
    const int _inputChannel =3;
 public:
    DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
        minibatchSz_(dn.minibatchSz_){
@ -355,28 +308,10 @@ public:
            //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
            exit(-1);
        }
-//        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
+
-//        CV_Assert(net_->num_inputs()==1);
+        this->inputGeometry_.height =_inputHeight;
-//        CV_Assert(net_->num_outputs()==1);
+        this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
+        this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();
 //                ||this->net_->input_blobs()[0]->channels()==3);
 //        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
 //        this->inputGeometry_.height = inputLayer->height();
 //        this->inputGeometry_.width = inputLayer->width();
 //        this->inputChannelCount_ = inputLayer->channels();
 //        //this->inputGeometry_.batchSize =1;
 //        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
 //        net_->Reshape();
 //        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
 //        //this->outputGeometry_.batchSize =1;
 //        this->outputGeometry_.height =net_->output_blobs()[0]->height();
 //        this->outputGeometry_.width = net_->output_blobs()[0]->width();
        this->inputGeometry_.height =700;
        this->inputGeometry_.width = 700 ;//inputLayer->width();
        this->inputChannelCount_ = 3 ;//inputLayer->channels();
 #else
        CV_Error(Error::StsError,"DNN module not available during compilation!");
@ -389,7 +324,7 @@ public:
        Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
        Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
        Mat outputMat = Bbox_prob.getMat();
-        printf("calling");
+
        process_(image.getMat(),outputMat);
        //copy back to outputArray
        outputMat.copyTo(Bbox_prob);
@ -487,20 +422,20 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
    case OCR_HOLISTIC_BACKEND_DEFAULT:
 #ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
 #else
        CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
        return Ptr<DeepCNNTextDetector>();
 #endif
        break;
    case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
        break;
    case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
         break;
    case OCR_HOLISTIC_BACKEND_NONE:
    default: