From c697e41b8d8415084971e5e8dc1f73d2867eab37 Mon Sep 17 00:00:00 2001
From: sghoshcvc <sghosh@cvc.uab.es>
Date: Mon, 28 Aug 2017 19:25:58 +0200
Subject: [PATCH] added calculation of output size

---
 modules/text/include/opencv2/text/ocr.hpp     |  24 +++++
 .../include/opencv2/text/textDetector.hpp     |   4 +-
 modules/text/samples/textbox_demo.cpp         |  10 +-
 modules/text/src/ocr_holistic.cpp             |  98 +++++++++--------
 modules/text/src/text_detector.cpp            |   6 +-
 modules/text/src/text_detectorCNN.cpp         | 101 ++++--------------
 6 files changed, 107 insertions(+), 136 deletions(-)

diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp
index 3c7390935..14dfc0924 100644
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -861,6 +861,15 @@ public:
 };
 
 namespace cnn_config{
+
+/** @brief runtime backend information
+ *
+ * this function finds the status of backends compiled with this module
+ *
+ * @return a list of backends (caffe,opencv-dnn etc.)
+ * */
+CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
+
 namespace caffe_backend{
 
 /** @brief Prompts Caffe on the computation device beeing used
@@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
 CV_EXPORTS_W bool getCaffeAvailable();
 
 }//caffe
+namespace dnn_backend {
+
+/** @brief Provides runtime information on whether DNN module was compiled in.
+ *
+ * The text module API is the same regardless of whether DNN module was available or not
+ * During compilation. When methods that require backend are invocked while no backend support
+ * is compiled, exceptions are thrown. This method allows to test whether the
+ * text module was built with dnn_backend during runtime.
+ *
+ * @return true if opencv_dnn support for the the text module was provided during compilation,
+ * false if opencv_dnn was unavailable.
+ */
+CV_EXPORTS_W bool getDNNAvailable();
+
+}//dnn_backend
 }//cnn_config
 
 /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp
index ad1b53dee..eda748014 100644
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -56,7 +56,7 @@ namespace cv
 namespace text
 {
 
-//! @addtogroup text_recognize
+//! @addtogroup text_detect
 //! @{
 
 
@@ -263,7 +263,7 @@ public:
 
 };
 
-
+//! @}
 }//namespace text
 }//namespace cv
 
diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp
index 75a18a315..b76658e1b 100644
--- a/modules/text/samples/textbox_demo.cpp
+++ b/modules/text/samples/textbox_demo.cpp
@@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
         std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
         //exit(1);
     }
+    std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
+    std::cout << "The Following backends are available" << "\n";
+    for (int i=0;i<backends.size();i++)
+       std::cout << backends[i] << "\n";
+
+   // printf("%s",x);
     //set to true if you have a GPU with more than 3GB
      if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
     cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
     }
     // call dict net here for all detected parts
     cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
-                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
+                "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
 
     cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
             cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
         cv::Point tl_ = bbox.at(i).tl();
         cv::Point br_ = bbox.at(i).br();
 
-        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
+        out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
 
     }
     out.close();
diff --git a/modules/text/src/ocr_holistic.cpp b/modules/text/src/ocr_holistic.cpp
index d2a9f42ec..f41fb7eb1 100644
--- a/modules/text/src/ocr_holistic.cpp
+++ b/modules/text/src/ocr_holistic.cpp
@@ -122,6 +122,7 @@ protected:
     //void set_mean_(Mat M){}
 
     void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
+
         //TODO put all the logic of channel and depth conversions in ImageProcessor class
         CV_Assert(outputChannels==1 || outputChannels==3);
         CV_Assert(input.channels()==1 || input.channels()==3);
@@ -433,6 +434,7 @@ protected:
         CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
         CV_Assert(outputMat.isContinuous());
 
+
 #ifdef HAVE_CAFFE
         net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
         net_->Reshape();
@@ -450,16 +452,19 @@ protected:
                 input_channels.push_back(netInputWraped);
                 //input_data += width * height;
                 inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
+
             }
             this->preprocess(inputImageList[imgNum],preprocessed);
             split(preprocessed, input_channels);
 
+
         }
         this->net_->ForwardPrefilled();
         const float* outputNetData=net_->output_blobs()[0]->cpu_data();
         this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
         int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
 
+
         //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
         float*outputMatData=(float*)(outputMat.data);
         memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@@ -470,9 +475,10 @@ protected:
 #ifdef HAVE_CAFFE
     Ptr<caffe::Net<float> > net_;
 #endif
-    //Size inputGeometry_;
+    //Size inputGeometry_;//=Size(100,32);
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     int outputSize_;
+    //Size outputGeometry_;
 public:
     DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -608,7 +614,7 @@ protected:
             preProcessedImList.push_back(preprocessed);
         }
         // set input data blob in dnn::net
-        net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
+        net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
 
         float*outputMatData=(float*)(outputMat.data);
        //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@@ -625,9 +631,16 @@ protected:
 #ifdef HAVE_DNN
     Ptr<Net> net_;
 #endif
-    //Size inputGeometry_;
+    // hard coding input image size. anything in DNN library to get that from prototxt??
+   // Size inputGeometry_;//=Size(100,32);
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     int outputSize_;
+    //Size outputGeometry_;//= Size(1,1);
+    //int channelCount_;
+   // int inputChannel_ ;//=1;
+    const int _inputHeight =32;
+    const int _inputWidth =100;
+    const int _inputChannel =1;
 public:
     DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@@ -678,33 +691,17 @@ public:
             //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
             exit(-1);
         }
-// find a wa to check the followings in cv::dnn ???
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
 
-        //this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
 
-        //caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
-        //inputLayerId = net_->getLayerId('data');
-
-      //  inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
-       //                                     inputLayerId,
-      //                                      std::vector<MatShape>* inLayerShapes,
-      //  std::vector<MatShape>* outLayerShapes) const;
-        // should not be hard coded ideally
-
-        this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
-        this->channelCount_ = 1;//inputLayer->channels();
+        this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
+        this->channelCount_ = _inputChannel;//inputLayer->channels();
 
         //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-        //net_->Reshape();
-        this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
-        this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
+        Ptr< Layer > outLayer=	net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
+        //std::vector<Mat> blobs = outLayer->blobs;
+
+        this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
+        //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
 
 
 
@@ -732,7 +729,7 @@ public:
         size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
         classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
         Mat outputMat = classProbabilities.getMat();
-        printf("ekhane");
+
         for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
         {
             size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
 }
 
 namespace cnn_config{
+std::vector<std::string> getAvailableBackends()
+{
+    std::vector<std::string> backends;
+
+#ifdef HAVE_CAFFE
+    backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
+
+#endif
+#ifdef HAVE_DNN
+    backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
+#endif
+    return backends;
+
+
+}
+
 namespace caffe_backend{
 
 #ifdef HAVE_CAFFE
@@ -856,7 +869,7 @@ bool getCaffeAvailable()
 {
     return true;
 }
-#elif defined(HAVE_DNN)
+#else
 
 bool getCaffeGpuMode()
 {
@@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
 bool getCaffeAvailable(){
     return 0;
 }
-bool getDNNAvailable(){
-    return true;
-}
 
+#endif
 
-#else
+}//namespace caffe
+namespace dnn_backend{
+#ifdef  HAVE_DNN
 
-bool getCaffeGpuMode()
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    return 0;
-}
 
-void setCaffeGpuMode(bool useGpu)
-{
-    CV_Error(Error::StsError,"Caffe not available during compilation!");
-    CV_Assert(useGpu==1);//Compilation directives force
+bool getDNNAvailable(){
+    return true;
 }
-
-bool getCaffeAvailable(){
+#else
+bool getDNNAvailable(){
     return 0;
 }
-
 #endif
-
-}//namespace caffe
+}//namspace dnn_backend
 }//namespace cnn_config
 
 class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@@ -931,6 +935,7 @@ private:
             getOutputs(buffer,nbOutputs,tmp);
             classNum=tmp[0].wordIdx;
             confidence=tmp[0].probabillity;
+
         }
     };
 protected:
@@ -972,6 +977,7 @@ public:
     {
         Mat netOutput;
         this->classifier_->classifyBatch(inputImageList,netOutput);
+
         for(int k=0;k<netOutput.rows;k++)
         {
             int classNum;
diff --git a/modules/text/src/text_detector.cpp b/modules/text/src/text_detector.cpp
index 9b6d4f966..949f5f86d 100644
--- a/modules/text/src/text_detector.cpp
+++ b/modules/text/src/text_detector.cpp
@@ -16,9 +16,9 @@
 #include <vector>
 
 
-#ifdef HAVE_CAFFE
-#include "caffe/caffe.hpp"
-#endif
+//#ifdef HAVE_CAFFE
+//#include "caffe/caffe.hpp"
+//#endif
 
 namespace cv { namespace text {
 
diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp
index a2c583c7f..90d6fd9b8 100644
--- a/modules/text/src/text_detectorCNN.cpp
+++ b/modules/text/src/text_detectorCNN.cpp
@@ -225,75 +225,25 @@ protected:
 
 #ifdef HAVE_DNN
 
-        //std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
-
         Mat preprocessed;
         this->preprocess(inputImage,preprocessed);
-        printf("After preprocess");
-        // preprocesses each image in the inputImageList and push to preprocessedImList
-//        for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
-//        {
-//            this->preprocess(inputImageList[imgNum],preprocessed);
-//            preProcessedImList.push_back(preprocessed);
-//        }
-        // set input data blob in dnn::net
-        //Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
-        //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
-        net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
-        printf("Input layer");
-
-
-       //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
-       Mat outputNet = this->net_->forward( );//"mbox_priorbox");
-       printf("After forward");
-       //outputNet = outputNet.reshape(1, 1);
+
+        net_->setInput(blobFromImage(preprocessed,1,  this->inputGeometry_), "data");
+
+       Mat outputNet = this->net_->forward( );
+
        this->outputGeometry_.height = outputNet.size[2];
        this->outputGeometry_.width = outputNet.size[3];
        this->outputChannelCount_ = outputNet.size[1];
-       printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
+
        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
         float*outputMatData=(float*)(outputMat.data);
        float*outputNetData=(float*)(outputNet.data);
        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
 
        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-//        net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
-//        net_->Reshape();
-//        float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
-//        float* inputData=inputBuffer;
-
-//        std::vector<Mat> input_channels;
-//        Mat preprocessed;
-//        // if the image have multiple color channels the input layer should be populated accordingly
-//        for (int channel=0;channel < this->inputChannelCount_;channel++){
-
-//            cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
-//            input_channels.push_back(netInputWraped);
-//            //input_data += width * height;
-//            inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
-//        }
-//        this->preprocess(inputImage,preprocessed);
-//        split(preprocessed, input_channels);
-
-//        //preprocessed.copyTo(netInputWraped);
 
 
-//        this->net_->Forward();
-//        const float* outputNetData=net_->output_blobs()[0]->cpu_data();
-//        // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
-
-
-
-
-//        this->outputGeometry_.height = net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
-//        outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
-//        float*outputMatData=(float*)(outputMat.data);
-
-//        memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
-
 
 
 #endif
@@ -307,6 +257,9 @@ protected:
     //Size inputGeometry_;
     int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
     //int outputSize_;
+    const int _inputHeight =700;
+    const int _inputWidth =700;
+    const int _inputChannel =3;
 public:
     DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
         minibatchSz_(dn.minibatchSz_){
@@ -355,28 +308,10 @@ public:
             //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
             exit(-1);
         }
-//        this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
-//        CV_Assert(net_->num_inputs()==1);
-//        CV_Assert(net_->num_outputs()==1);
-//        CV_Assert(this->net_->input_blobs()[0]->channels()==1
-//                ||this->net_->input_blobs()[0]->channels()==3);
-//        //        this->channelCount_=this->net_->input_blobs()[0]->channels();
-
-
-//        this->inputGeometry_.height = inputLayer->height();
-//        this->inputGeometry_.width = inputLayer->width();
-//        this->inputChannelCount_ = inputLayer->channels();
-//        //this->inputGeometry_.batchSize =1;
-
-//        inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
-//        net_->Reshape();
-//        this->outputChannelCount_ = net_->output_blobs()[0]->channels();
-//        //this->outputGeometry_.batchSize =1;
-//        this->outputGeometry_.height =net_->output_blobs()[0]->height();
-//        this->outputGeometry_.width = net_->output_blobs()[0]->width();
-        this->inputGeometry_.height =700;
-        this->inputGeometry_.width = 700 ;//inputLayer->width();
-        this->inputChannelCount_ = 3 ;//inputLayer->channels();
+
+        this->inputGeometry_.height =_inputHeight;
+        this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
+        this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();
 
 #else
         CV_Error(Error::StsError,"DNN module not available during compilation!");
@@ -389,7 +324,7 @@ public:
         Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
         Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
         Mat outputMat = Bbox_prob.getMat();
-        printf("calling");
+
         process_(image.getMat(),outputMat);
         //copy back to outputArray
         outputMat.copyTo(Bbox_prob);
@@ -487,20 +422,20 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
     case OCR_HOLISTIC_BACKEND_DEFAULT:
 
 #ifdef HAVE_CAFFE
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
 
 #elif defined(HAVE_DNN)
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
 #else
         CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
         return Ptr<DeepCNNTextDetector>();
 #endif
         break;
     case OCR_HOLISTIC_BACKEND_CAFFE:
-        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
+        return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
         break;
     case OCR_HOLISTIC_BACKEND_DNN:
-         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
+         return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
          break;
     case OCR_HOLISTIC_BACKEND_NONE:
     default: