From c697e41b8d8415084971e5e8dc1f73d2867eab37 Mon Sep 17 00:00:00 2001 From: sghoshcvc Date: Mon, 28 Aug 2017 19:25:58 +0200 Subject: [PATCH] added calculation of output size --- modules/text/include/opencv2/text/ocr.hpp | 24 +++++ .../include/opencv2/text/textDetector.hpp | 4 +- modules/text/samples/textbox_demo.cpp | 10 +- modules/text/src/ocr_holistic.cpp | 98 +++++++++-------- modules/text/src/text_detector.cpp | 6 +- modules/text/src/text_detectorCNN.cpp | 101 ++++-------------- 6 files changed, 107 insertions(+), 136 deletions(-) diff --git a/modules/text/include/opencv2/text/ocr.hpp b/modules/text/include/opencv2/text/ocr.hpp index 3c7390935..14dfc0924 100644 --- a/modules/text/include/opencv2/text/ocr.hpp +++ b/modules/text/include/opencv2/text/ocr.hpp @@ -861,6 +861,15 @@ public: }; namespace cnn_config{ + +/** @brief runtime backend information + * + * this function finds the status of backends compiled with this module + * + * @return a list of backends (caffe,opencv-dnn etc.) + * */ +CV_EXPORTS_W std::vector getAvailableBackends(); + namespace caffe_backend{ /** @brief Prompts Caffe on the computation device beeing used @@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu); CV_EXPORTS_W bool getCaffeAvailable(); }//caffe +namespace dnn_backend { + +/** @brief Provides runtime information on whether DNN module was compiled in. + * + * The text module API is the same regardless of whether DNN module was available or not + * During compilation. When methods that require backend are invocked while no backend support + * is compiled, exceptions are thrown. This method allows to test whether the + * text module was built with dnn_backend during runtime. + * + * @return true if opencv_dnn support for the the text module was provided during compilation, + * false if opencv_dnn was unavailable. + */ +CV_EXPORTS_W bool getDNNAvailable(); + +}//dnn_backend }//cnn_config /** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting. diff --git a/modules/text/include/opencv2/text/textDetector.hpp b/modules/text/include/opencv2/text/textDetector.hpp index ad1b53dee..eda748014 100644 --- a/modules/text/include/opencv2/text/textDetector.hpp +++ b/modules/text/include/opencv2/text/textDetector.hpp @@ -56,7 +56,7 @@ namespace cv namespace text { -//! @addtogroup text_recognize +//! @addtogroup text_detect //! @{ @@ -263,7 +263,7 @@ public: }; - +//! @} }//namespace text }//namespace cv diff --git a/modules/text/samples/textbox_demo.cpp b/modules/text/samples/textbox_demo.cpp index 75a18a315..b76658e1b 100644 --- a/modules/text/samples/textbox_demo.cpp +++ b/modules/text/samples/textbox_demo.cpp @@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){ std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"; //exit(1); } + std::vector backends=cv::text::cnn_config::getAvailableBackends(); + std::cout << "The Following backends are available" << "\n"; + for (int i=0;i cnn=cv::text::DeepCNN::createDictNet( - "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel"); + "dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN); cv::Ptr wordSpotter= cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt"); @@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){ cv::Point tl_ = bbox.at(i).tl(); cv::Point br_ = bbox.at(i).br(); - out<minibatchSz_); CV_Assert(outputMat.isContinuous()); + #ifdef HAVE_CAFFE net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width); net_->Reshape(); @@ -450,16 +452,19 @@ protected: input_channels.push_back(netInputWraped); //input_data += width * height; inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); + } this->preprocess(inputImageList[imgNum],preprocessed); split(preprocessed, input_channels); + } this->net_->ForwardPrefilled(); const float* outputNetData=net_->output_blobs()[0]->cpu_data(); this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width; + //outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width); float*outputMatData=(float*)(outputMat.data); memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size()); @@ -470,9 +475,10 @@ protected: #ifdef HAVE_CAFFE Ptr > net_; #endif - //Size inputGeometry_; + //Size inputGeometry_;//=Size(100,32); int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst int outputSize_; + //Size outputGeometry_; public: DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn): minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ @@ -608,7 +614,7 @@ protected: preProcessedImList.push_back(preprocessed); } // set input data blob in dnn::net - net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data"); + net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data"); float*outputMatData=(float*)(outputMat.data); //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ; @@ -625,9 +631,16 @@ protected: #ifdef HAVE_DNN Ptr net_; #endif - //Size inputGeometry_; + // hard coding input image size. anything in DNN library to get that from prototxt?? + // Size inputGeometry_;//=Size(100,32); int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst int outputSize_; + //Size outputGeometry_;//= Size(1,1); + //int channelCount_; + // int inputChannel_ ;//=1; + const int _inputHeight =32; + const int _inputWidth =100; + const int _inputChannel =1; public: DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn): minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ @@ -678,33 +691,17 @@ public: //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; exit(-1); } -// find a wa to check the followings in cv::dnn ??? -// CV_Assert(net_->num_inputs()==1); -// CV_Assert(net_->num_outputs()==1); -// CV_Assert(this->net_->input_blobs()[0]->channels()==1 -// ||this->net_->input_blobs()[0]->channels()==3); -// this->channelCount_=this->net_->input_blobs()[0]->channels(); - - - //this->net_->CopyTrainedLayersFrom(modelWeightsFilename); - //caffe::Blob* inputLayer = this->net_->input_blobs()[0]; - //inputLayerId = net_->getLayerId('data'); - - // inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape, - // inputLayerId, - // std::vector* inLayerShapes, - // std::vector* outLayerShapes) const; - // should not be hard coded ideally - - this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height()); - this->channelCount_ = 1;//inputLayer->channels(); + this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height()); + this->channelCount_ = _inputChannel;//inputLayer->channels(); //inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width); - //net_->Reshape(); - this->outputSize_=88172 ;//net_->output_blobs()[0]->channels(); - this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); + Ptr< Layer > outLayer= net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2])); + //std::vector blobs = outLayer->blobs; + + this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels(); + //this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); @@ -732,7 +729,7 @@ public: size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); Mat outputMat = classProbabilities.getMat(); - printf("ekhane"); + for(size_t imgNum=0;imgNum(allImageVector.size()-imgNum,minibatchSize); @@ -832,6 +829,22 @@ Ptr DeepCNN::createDictNet(String archFilename,String weightsFilename,i } namespace cnn_config{ +std::vector getAvailableBackends() +{ + std::vector backends; + +#ifdef HAVE_CAFFE + backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn + +#endif +#ifdef HAVE_DNN + backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend" +#endif + return backends; + + +} + namespace caffe_backend{ #ifdef HAVE_CAFFE @@ -856,7 +869,7 @@ bool getCaffeAvailable() { return true; } -#elif defined(HAVE_DNN) +#else bool getCaffeGpuMode() { @@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu) bool getCaffeAvailable(){ return 0; } -bool getDNNAvailable(){ - return true; -} +#endif -#else +}//namespace caffe +namespace dnn_backend{ +#ifdef HAVE_DNN -bool getCaffeGpuMode() -{ - CV_Error(Error::StsError,"Caffe not available during compilation!"); - return 0; -} -void setCaffeGpuMode(bool useGpu) -{ - CV_Error(Error::StsError,"Caffe not available during compilation!"); - CV_Assert(useGpu==1);//Compilation directives force +bool getDNNAvailable(){ + return true; } - -bool getCaffeAvailable(){ +#else +bool getDNNAvailable(){ return 0; } - #endif - -}//namespace caffe +}//namspace dnn_backend }//namespace cnn_config class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{ @@ -931,6 +935,7 @@ private: getOutputs(buffer,nbOutputs,tmp); classNum=tmp[0].wordIdx; confidence=tmp[0].probabillity; + } }; protected: @@ -972,6 +977,7 @@ public: { Mat netOutput; this->classifier_->classifyBatch(inputImageList,netOutput); + for(int k=0;k -#ifdef HAVE_CAFFE -#include "caffe/caffe.hpp" -#endif +//#ifdef HAVE_CAFFE +//#include "caffe/caffe.hpp" +//#endif namespace cv { namespace text { diff --git a/modules/text/src/text_detectorCNN.cpp b/modules/text/src/text_detectorCNN.cpp index a2c583c7f..90d6fd9b8 100644 --- a/modules/text/src/text_detectorCNN.cpp +++ b/modules/text/src/text_detectorCNN.cpp @@ -225,75 +225,25 @@ protected: #ifdef HAVE_DNN - //std::vector preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class? - Mat preprocessed; this->preprocess(inputImage,preprocessed); - printf("After preprocess"); - // preprocesses each image in the inputImageList and push to preprocessedImList -// for(size_t imgNum=0;imgNumpreprocess(inputImageList[imgNum],preprocessed); -// preProcessedImList.push_back(preprocessed); -// } - // set input data blob in dnn::net - //Mat temp =blobFromImage(preprocessed,1, Size(700, 700)); - //printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]); - net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data"); - printf("Input layer"); - - - //Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ; - Mat outputNet = this->net_->forward( );//"mbox_priorbox"); - printf("After forward"); - //outputNet = outputNet.reshape(1, 1); + + net_->setInput(blobFromImage(preprocessed,1, this->inputGeometry_), "data"); + + Mat outputNet = this->net_->forward( ); + this->outputGeometry_.height = outputNet.size[2]; this->outputGeometry_.width = outputNet.size[3]; this->outputChannelCount_ = outputNet.size[1]; - printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]); + outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); float*outputMatData=(float*)(outputMat.data); float*outputNetData=(float*)(outputNet.data); int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); -// net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width); -// net_->Reshape(); -// float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); -// float* inputData=inputBuffer; - -// std::vector input_channels; -// Mat preprocessed; -// // if the image have multiple color channels the input layer should be populated accordingly -// for (int channel=0;channel < this->inputChannelCount_;channel++){ - -// cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); -// input_channels.push_back(netInputWraped); -// //input_data += width * height; -// inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); -// } -// this->preprocess(inputImage,preprocessed); -// split(preprocessed, input_channels); - -// //preprocessed.copyTo(netInputWraped); -// this->net_->Forward(); -// const float* outputNetData=net_->output_blobs()[0]->cpu_data(); -// // const float* outputNetData1=net_->output_blobs()[1]->cpu_data(); - - - - -// this->outputGeometry_.height = net_->output_blobs()[0]->height(); -// this->outputGeometry_.width = net_->output_blobs()[0]->width(); -// this->outputChannelCount_ = net_->output_blobs()[0]->channels(); -// int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; -// outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); -// float*outputMatData=(float*)(outputMat.data); - -// memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); - #endif @@ -307,6 +257,9 @@ protected: //Size inputGeometry_; int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst //int outputSize_; + const int _inputHeight =700; + const int _inputWidth =700; + const int _inputChannel =3; public: DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn): minibatchSz_(dn.minibatchSz_){ @@ -355,28 +308,10 @@ public: //std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; exit(-1); } -// this->net_.reset(new caffe::Net(modelArchFilename, caffe::TEST)); -// CV_Assert(net_->num_inputs()==1); -// CV_Assert(net_->num_outputs()==1); -// CV_Assert(this->net_->input_blobs()[0]->channels()==1 -// ||this->net_->input_blobs()[0]->channels()==3); -// // this->channelCount_=this->net_->input_blobs()[0]->channels(); - - -// this->inputGeometry_.height = inputLayer->height(); -// this->inputGeometry_.width = inputLayer->width(); -// this->inputChannelCount_ = inputLayer->channels(); -// //this->inputGeometry_.batchSize =1; - -// inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width); -// net_->Reshape(); -// this->outputChannelCount_ = net_->output_blobs()[0]->channels(); -// //this->outputGeometry_.batchSize =1; -// this->outputGeometry_.height =net_->output_blobs()[0]->height(); -// this->outputGeometry_.width = net_->output_blobs()[0]->width(); - this->inputGeometry_.height =700; - this->inputGeometry_.width = 700 ;//inputLayer->width(); - this->inputChannelCount_ = 3 ;//inputLayer->channels(); + + this->inputGeometry_.height =_inputHeight; + this->inputGeometry_.width = _inputWidth ;//inputLayer->width(); + this->inputChannelCount_ = _inputChannel ;//inputLayer->channels(); #else CV_Error(Error::StsError,"DNN module not available during compilation!"); @@ -389,7 +324,7 @@ public: Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed Mat outputMat = Bbox_prob.getMat(); - printf("calling"); + process_(image.getMat(),outputMat); //copy back to outputArray outputMat.copyTo(Bbox_prob); @@ -487,20 +422,20 @@ Ptr DeepCNNTextDetector::createTextBoxNet(String archFilena case OCR_HOLISTIC_BACKEND_DEFAULT: #ifdef HAVE_CAFFE - return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); + return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); #elif defined(HAVE_DNN) - return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100)); + return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1)); #else CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); return Ptr(); #endif break; case OCR_HOLISTIC_BACKEND_CAFFE: - return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); + return Ptr(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); break; case OCR_HOLISTIC_BACKEND_DNN: - return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100)); + return Ptr(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1)); break; case OCR_HOLISTIC_BACKEND_NONE: default: