added calculation of output size

pull/1348/head
sghoshcvc 7 years ago
parent a2cab07193
commit c697e41b8d
  1. 24
      modules/text/include/opencv2/text/ocr.hpp
  2. 4
      modules/text/include/opencv2/text/textDetector.hpp
  3. 10
      modules/text/samples/textbox_demo.cpp
  4. 98
      modules/text/src/ocr_holistic.cpp
  5. 6
      modules/text/src/text_detector.cpp
  6. 101
      modules/text/src/text_detectorCNN.cpp

@ -861,6 +861,15 @@ public:
};
namespace cnn_config{
/** @brief runtime backend information
*
* this function finds the status of backends compiled with this module
*
* @return a list of backends (caffe,opencv-dnn etc.)
* */
CV_EXPORTS_W std::vector<std::string> getAvailableBackends();
namespace caffe_backend{
/** @brief Prompts Caffe on the computation device beeing used
@ -897,6 +906,21 @@ CV_EXPORTS_W void setCaffeGpuMode(bool useGpu);
CV_EXPORTS_W bool getCaffeAvailable();
}//caffe
namespace dnn_backend {
/** @brief Provides runtime information on whether DNN module was compiled in.
*
* The text module API is the same regardless of whether DNN module was available or not
* During compilation. When methods that require backend are invocked while no backend support
* is compiled, exceptions are thrown. This method allows to test whether the
* text module was built with dnn_backend during runtime.
*
* @return true if opencv_dnn support for the the text module was provided during compilation,
* false if opencv_dnn was unavailable.
*/
CV_EXPORTS_W bool getDNNAvailable();
}//dnn_backend
}//cnn_config
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.

@ -56,7 +56,7 @@ namespace cv
namespace text
{
//! @addtogroup text_recognize
//! @addtogroup text_detect
//! @{
@ -263,7 +263,7 @@ public:
};
//! @}
}//namespace text
}//namespace cv

@ -61,6 +61,12 @@ int main(int argc, const char * argv[]){
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n";
//exit(1);
}
std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends();
std::cout << "The Following backends are available" << "\n";
for (int i=0;i<backends.size();i++)
std::cout << backends[i] << "\n";
// printf("%s",x);
//set to true if you have a GPU with more than 3GB
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable())
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true);
@ -112,7 +118,7 @@ int main(int argc, const char * argv[]){
}
// call dict net here for all detected parts
cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet(
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel");
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN);
cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter=
cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt");
@ -130,7 +136,7 @@ int main(int argc, const char * argv[]){
cv::Point tl_ = bbox.at(i).tl();
cv::Point br_ = bbox.at(i).br();
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<tl_.y<<","<<tl_.y<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl;
}
out.close();

@ -122,6 +122,7 @@ protected:
//void set_mean_(Mat M){}
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){
//TODO put all the logic of channel and depth conversions in ImageProcessor class
CV_Assert(outputChannels==1 || outputChannels==3);
CV_Assert(input.channels()==1 || input.channels()==3);
@ -433,6 +434,7 @@ protected:
CV_Assert(int(inputImageList.size())<=this->minibatchSz_);
CV_Assert(outputMat.isContinuous());
#ifdef HAVE_CAFFE
net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
net_->Reshape();
@ -450,16 +452,19 @@ protected:
input_channels.push_back(netInputWraped);
//input_data += width * height;
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
}
this->preprocess(inputImageList[imgNum],preprocessed);
split(preprocessed, input_channels);
}
this->net_->ForwardPrefilled();
const float* outputNetData=net_->output_blobs()[0]->cpu_data();
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
//outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
float*outputMatData=(float*)(outputMat.data);
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size());
@ -470,9 +475,10 @@ protected:
#ifdef HAVE_CAFFE
Ptr<caffe::Net<float> > net_;
#endif
//Size inputGeometry_;
//Size inputGeometry_;//=Size(100,32);
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
int outputSize_;
//Size outputGeometry_;
public:
DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn):
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@ -608,7 +614,7 @@ protected:
preProcessedImList.push_back(preprocessed);
}
// set input data blob in dnn::net
net_->setInput(blobFromImages(preProcessedImList,1, Size(100, 32)), "data");
net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data");
float*outputMatData=(float*)(outputMat.data);
//Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
@ -625,9 +631,16 @@ protected:
#ifdef HAVE_DNN
Ptr<Net> net_;
#endif
//Size inputGeometry_;
// hard coding input image size. anything in DNN library to get that from prototxt??
// Size inputGeometry_;//=Size(100,32);
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
int outputSize_;
//Size outputGeometry_;//= Size(1,1);
//int channelCount_;
// int inputChannel_ ;//=1;
const int _inputHeight =32;
const int _inputWidth =100;
const int _inputChannel =1;
public:
DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn):
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){
@ -678,33 +691,17 @@ public:
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
exit(-1);
}
// find a wa to check the followings in cv::dnn ???
// CV_Assert(net_->num_inputs()==1);
// CV_Assert(net_->num_outputs()==1);
// CV_Assert(this->net_->input_blobs()[0]->channels()==1
// ||this->net_->input_blobs()[0]->channels()==3);
// this->channelCount_=this->net_->input_blobs()[0]->channels();
//this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
//caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
//inputLayerId = net_->getLayerId('data');
// inputLayerShape = net_->getLayerShapes(const MatShape& netInputShape,
// inputLayerId,
// std::vector<MatShape>* inLayerShapes,
// std::vector<MatShape>* outLayerShapes) const;
// should not be hard coded ideally
this->inputGeometry_=Size(100,32);// Size(inputLayer->width(), inputLayer->height());
this->channelCount_ = 1;//inputLayer->channels();
this->inputGeometry_=Size(_inputWidth,_inputHeight);// Size(inputLayer->width(), inputLayer->height());
this->channelCount_ = _inputChannel;//inputLayer->channels();
//inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
//net_->Reshape();
this->outputSize_=88172 ;//net_->output_blobs()[0]->channels();
this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
Ptr< Layer > outLayer= net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2]));
//std::vector<Mat> blobs = outLayer->blobs;
this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
//this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
@ -732,7 +729,7 @@ public:
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F);
Mat outputMat = classProbabilities.getMat();
printf("ekhane");
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize)
{
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize);
@ -832,6 +829,22 @@ Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,i
}
namespace cnn_config{
std::vector<std::string> getAvailableBackends()
{
std::vector<std::string> backends;
#ifdef HAVE_CAFFE
backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
#endif
#ifdef HAVE_DNN
backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
#endif
return backends;
}
namespace caffe_backend{
#ifdef HAVE_CAFFE
@ -856,7 +869,7 @@ bool getCaffeAvailable()
{
return true;
}
#elif defined(HAVE_DNN)
#else
bool getCaffeGpuMode()
{
@ -873,32 +886,23 @@ void setCaffeGpuMode(bool useGpu)
bool getCaffeAvailable(){
return 0;
}
bool getDNNAvailable(){
return true;
}
#endif
#else
}//namespace caffe
namespace dnn_backend{
#ifdef HAVE_DNN
bool getCaffeGpuMode()
{
CV_Error(Error::StsError,"Caffe not available during compilation!");
return 0;
}
void setCaffeGpuMode(bool useGpu)
{
CV_Error(Error::StsError,"Caffe not available during compilation!");
CV_Assert(useGpu==1);//Compilation directives force
bool getDNNAvailable(){
return true;
}
bool getCaffeAvailable(){
#else
bool getDNNAvailable(){
return 0;
}
#endif
}//namespace caffe
}//namspace dnn_backend
}//namespace cnn_config
class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{
@ -931,6 +935,7 @@ private:
getOutputs(buffer,nbOutputs,tmp);
classNum=tmp[0].wordIdx;
confidence=tmp[0].probabillity;
}
};
protected:
@ -972,6 +977,7 @@ public:
{
Mat netOutput;
this->classifier_->classifyBatch(inputImageList,netOutput);
for(int k=0;k<netOutput.rows;k++)
{
int classNum;

@ -16,9 +16,9 @@
#include <vector>
#ifdef HAVE_CAFFE
#include "caffe/caffe.hpp"
#endif
//#ifdef HAVE_CAFFE
//#include "caffe/caffe.hpp"
//#endif
namespace cv { namespace text {

@ -225,75 +225,25 @@ protected:
#ifdef HAVE_DNN
//std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
Mat preprocessed;
this->preprocess(inputImage,preprocessed);
printf("After preprocess");
// preprocesses each image in the inputImageList and push to preprocessedImList
// for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++)
// {
// this->preprocess(inputImageList[imgNum],preprocessed);
// preProcessedImList.push_back(preprocessed);
// }
// set input data blob in dnn::net
//Mat temp =blobFromImage(preprocessed,1, Size(700, 700));
//printf("%d %d %d ",temp.size[1],temp.size[2],temp.size[3]);
net_->setInput(blobFromImage(preprocessed,1, Size(700, 700)), "data");
printf("Input layer");
//Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
Mat outputNet = this->net_->forward( );//"mbox_priorbox");
printf("After forward");
//outputNet = outputNet.reshape(1, 1);
net_->setInput(blobFromImage(preprocessed,1, this->inputGeometry_), "data");
Mat outputNet = this->net_->forward( );
this->outputGeometry_.height = outputNet.size[2];
this->outputGeometry_.width = outputNet.size[3];
this->outputChannelCount_ = outputNet.size[1];
printf("%d %d %d ",outputNet.size[1],outputNet.size[2],outputNet.size[3]);
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
float*outputMatData=(float*)(outputMat.data);
float*outputNetData=(float*)(outputNet.data);
int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
// net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
// net_->Reshape();
// float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
// float* inputData=inputBuffer;
// std::vector<Mat> input_channels;
// Mat preprocessed;
// // if the image have multiple color channels the input layer should be populated accordingly
// for (int channel=0;channel < this->inputChannelCount_;channel++){
// cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
// input_channels.push_back(netInputWraped);
// //input_data += width * height;
// inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
// }
// this->preprocess(inputImage,preprocessed);
// split(preprocessed, input_channels);
// //preprocessed.copyTo(netInputWraped);
// this->net_->Forward();
// const float* outputNetData=net_->output_blobs()[0]->cpu_data();
// // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
// this->outputGeometry_.height = net_->output_blobs()[0]->height();
// this->outputGeometry_.width = net_->output_blobs()[0]->width();
// this->outputChannelCount_ = net_->output_blobs()[0]->channels();
// int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
// outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
// float*outputMatData=(float*)(outputMat.data);
// memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
#endif
@ -307,6 +257,9 @@ protected:
//Size inputGeometry_;
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
//int outputSize_;
const int _inputHeight =700;
const int _inputWidth =700;
const int _inputChannel =3;
public:
DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn):
minibatchSz_(dn.minibatchSz_){
@ -355,28 +308,10 @@ public:
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
exit(-1);
}
// this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
// CV_Assert(net_->num_inputs()==1);
// CV_Assert(net_->num_outputs()==1);
// CV_Assert(this->net_->input_blobs()[0]->channels()==1
// ||this->net_->input_blobs()[0]->channels()==3);
// // this->channelCount_=this->net_->input_blobs()[0]->channels();
// this->inputGeometry_.height = inputLayer->height();
// this->inputGeometry_.width = inputLayer->width();
// this->inputChannelCount_ = inputLayer->channels();
// //this->inputGeometry_.batchSize =1;
// inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
// net_->Reshape();
// this->outputChannelCount_ = net_->output_blobs()[0]->channels();
// //this->outputGeometry_.batchSize =1;
// this->outputGeometry_.height =net_->output_blobs()[0]->height();
// this->outputGeometry_.width = net_->output_blobs()[0]->width();
this->inputGeometry_.height =700;
this->inputGeometry_.width = 700 ;//inputLayer->width();
this->inputChannelCount_ = 3 ;//inputLayer->channels();
this->inputGeometry_.height =_inputHeight;
this->inputGeometry_.width = _inputWidth ;//inputLayer->width();
this->inputChannelCount_ = _inputChannel ;//inputLayer->channels();
#else
CV_Error(Error::StsError,"DNN module not available during compilation!");
@ -389,7 +324,7 @@ public:
Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
Mat outputMat = Bbox_prob.getMat();
printf("calling");
process_(image.getMat(),outputMat);
//copy back to outputArray
outputMat.copyTo(Bbox_prob);
@ -487,20 +422,20 @@ Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilena
case OCR_HOLISTIC_BACKEND_DEFAULT:
#ifdef HAVE_CAFFE
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
#elif defined(HAVE_DNN)
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
#else
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented");
return Ptr<DeepCNNTextDetector>();
#endif
break;
case OCR_HOLISTIC_BACKEND_CAFFE:
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1));
break;
case OCR_HOLISTIC_BACKEND_DNN:
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 100));
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1));
break;
case OCR_HOLISTIC_BACKEND_NONE:
default:

Loading…
Cancel
Save