Repository for OpenCV's extra modules
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

170 lines
5.0 KiB

#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <queue>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#ifdef HAVE_CAFFE
#include "caffe/caffe.hpp"
#endif
namespace cv { namespace text {
class textDetectImpl: public textDetector{
private:
struct NetOutput{
//Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
//the raw outputs of caffe
Rect bbox;
float probability;
static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape)
{
res.resize(nbrTextBoxes);
for(int k=0;k<nbrTextBoxes;k++)
{
float x_min = buffer[k*nCol+3]*inputShape.width;
float y_min = buffer[k*nCol+4]*inputShape.height;
float x_max = buffer[k*nCol+5]*inputShape.width;
float y_max = buffer[k*nCol +6]*inputShape.height;
x_min = x_min<0?0:x_min;
y_min = y_min<0?0:y_min;
x_max = x_max> inputShape.width?inputShape.width-1:x_max;
y_max = y_max > inputShape.height?inputShape.height-1:y_max;
float wd = x_max-x_min+1;
float ht = y_max-y_min+1;
res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht));
res[k].probability=buffer[k*nCol+2];
}
}
};
protected:
Ptr<TextRegionDetector> classifier_;
public:
textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
{
}
void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence)
{
Mat netOutput;
// call the detect function of deepCNN class
this->classifier_->detect(inputImage,netOutput);
// get the output geometry i.e height and width of output blob from caffe
Size OutputGeometry_ = this->classifier_->getOutputGeometry();
int nbrTextBoxes = OutputGeometry_.height;
int nCol = OutputGeometry_.width;
std::vector<NetOutput> tmp;
// the output bounding box needs to be resized by the input height and width
Size inputImageShape = Size(inputImage.cols(),inputImage.rows());
NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape);
// put the output in CV_OUT
for (int k=0;k<nbrTextBoxes;k++)
{
Bbox.push_back(tmp[k].bbox);
confidence.push_back(tmp[k].probability);
}
}
void run(Mat& image, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0)
{
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
//double confidence;
//String transcription;
std::vector<Rect> bbox;
std::vector<float> score;
textDetectInImage(image,bbox,score);
//output_text=transcription.c_str();
if(component_rects!=NULL)
{
component_rects->resize(bbox.size()); // should be a user behavior
component_rects = &bbox;
}
if(component_confidences!=NULL)
{
component_confidences->resize(score.size()); // shoub be a user behavior
component_confidences = &score;
}
}
void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL,
std::vector<float>* component_confidences=NULL,
int component_level=0)
{
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
this->run(image,component_rects,component_confidences,component_level);
}
Ptr<TextRegionDetector> getClassifier()
{
return this->classifier_;
}
};
Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
{
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
}
Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename)
{
// create a custom preprocessor with rawval
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
// set the mean for the preprocessor
Mat textbox_mean(1,3,CV_8U);
textbox_mean.at<uchar>(0,0)=104;
textbox_mean.at<uchar>(0,1)=117;
textbox_mean.at<uchar>(0,2)=123;
preprocessor->set_mean(textbox_mean);
// create a pointer to text box detector(textDetector)
Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
}
} } //namespace text namespace cv