parent
bad02f3797
commit
3aa88889aa
9 changed files with 122 additions and 657 deletions
@ -1,14 +0,0 @@ |
|||||||
# Caffe package for CNN Triplet training |
|
||||||
unset(Caffe_FOUND) |
|
||||||
|
|
||||||
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp |
|
||||||
HINTS |
|
||||||
/usr/local/include) |
|
||||||
|
|
||||||
find_library(Caffe_LIBS NAMES caffe |
|
||||||
HINTS |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
if(Caffe_LIBS AND Caffe_INCLUDE_DIR) |
|
||||||
set(Caffe_FOUND 1) |
|
||||||
endif() |
|
@ -1,10 +0,0 @@ |
|||||||
#Required for Caffe |
|
||||||
unset(Glog_FOUND) |
|
||||||
|
|
||||||
find_library(Glog_LIBS NAMES glog |
|
||||||
HINTS |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
if(Glog_LIBS) |
|
||||||
set(Glog_FOUND 1) |
|
||||||
endif() |
|
@ -1,10 +0,0 @@ |
|||||||
#Protobuf package required for Caffe |
|
||||||
unset(Protobuf_FOUND) |
|
||||||
|
|
||||||
find_library(Protobuf_LIBS NAMES protobuf |
|
||||||
HINTS |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
if(Protobuf_LIBS) |
|
||||||
set(Protobuf_FOUND 1) |
|
||||||
endif() |
|
@ -1,82 +0,0 @@ |
|||||||
#!/usr/bin/env python |
|
||||||
|
|
||||||
import cv2 |
|
||||||
import sys |
|
||||||
import os.path |
|
||||||
|
|
||||||
#Global variable shared between the Mouse callback and main |
|
||||||
refPt = [] |
|
||||||
cropping = False |
|
||||||
image=None |
|
||||||
drawImage=None |
|
||||||
dictNet=None |
|
||||||
wordSpotter=None |
|
||||||
|
|
||||||
|
|
||||||
def mouseCallback(event, x, y, flags, param): |
|
||||||
# grab references to the global variables |
|
||||||
global refPt, cropping,wordSpotter,drawImage,image |
|
||||||
|
|
||||||
# if the left mouse button was clicked, record the starting |
|
||||||
# (x, y) coordinates and indicate that cropping is being |
|
||||||
# performed |
|
||||||
if event == cv2.EVENT_LBUTTONDOWN: |
|
||||||
refPt = [(x, y)] |
|
||||||
cropping = True |
|
||||||
|
|
||||||
# check to see if the left mouse button was released |
|
||||||
elif event == cv2.EVENT_LBUTTONUP: |
|
||||||
# record the ending (x, y) coordinates and indicate that |
|
||||||
# the cropping operation is finished |
|
||||||
refPt.append((x, y)) |
|
||||||
cropping = False |
|
||||||
|
|
||||||
# draw a rectangle around the region of interest |
|
||||||
roi = image[refPt[0][1]:refPt[1][1], refPt[0][0]:refPt[1][0]] |
|
||||||
res=wordSpotter.recogniseImage(roi) |
|
||||||
drawImage = image.copy() |
|
||||||
cv2.rectangle(drawImage, refPt[0], refPt[1], (0, 255, 0), 2) |
|
||||||
cv2.putText(drawImage,"%s:%f"%(res[0],res[1]),refPt[0],cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) |
|
||||||
cv2.imshow("Select A Region", drawImage) |
|
||||||
|
|
||||||
|
|
||||||
if __name__=='__main__': |
|
||||||
USEGPU=False |
|
||||||
helpStr="""Usage: """+sys.argv[0]+""" IMAGE_FILENAME |
|
||||||
|
|
||||||
Press 'q' or 'Q' exit |
|
||||||
|
|
||||||
The modelFiles must be available in the current directory. |
|
||||||
In linux shell they can be downloaded (~2GB) with the following commands: |
|
||||||
wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel |
|
||||||
wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt |
|
||||||
wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt |
|
||||||
""" |
|
||||||
if((len(sys.argv)!=2 )or not(os.path.isfile(sys.argv[1]) )): |
|
||||||
print helpStr |
|
||||||
print 'No image file given Aborting!' |
|
||||||
sys.exit(1) |
|
||||||
if not (os.path.isfile('dictnet_vgg_deploy.prototxt') and |
|
||||||
os.path.isfile('dictnet_vgg.caffemodel') and |
|
||||||
os.path.isfile('dictnet_vgg_labels.txt')): |
|
||||||
print helpStr |
|
||||||
print 'Model files not present, Aborting!' |
|
||||||
sys.exit(1) |
|
||||||
|
|
||||||
dictNet=cv2.text.DictNet_create('./dictnet_vgg_deploy.prototxt','./dictnet_vgg.caffemodel',100,USEGPU) |
|
||||||
wordSpotter=cv2.text.OCRHolisticWordRecognizer_create(dictNet,"./dictnet_vgg_labels.txt") |
|
||||||
|
|
||||||
image = cv2.imread(sys.argv[1]) |
|
||||||
drawImage = image.copy() |
|
||||||
cv2.namedWindow("Select A Region") |
|
||||||
cv2.setMouseCallback("Select A Region", mouseCallback) |
|
||||||
|
|
||||||
while True: |
|
||||||
cv2.imshow("Select A Region", drawImage) |
|
||||||
key = cv2.waitKey(1) & 0xFF |
|
||||||
|
|
||||||
# if the 'q' key is pressed, break from the loop |
|
||||||
if key == ord("q") or key == ord("Q"): |
|
||||||
break |
|
||||||
|
|
||||||
cv2.destroyAllWindows() |
|
@ -1,296 +1,102 @@ |
|||||||
#include "precomp.hpp" |
#include "precomp.hpp" |
||||||
#include "opencv2/imgproc.hpp" |
#include "opencv2/imgproc.hpp" |
||||||
#include "opencv2/core.hpp" |
#include "opencv2/core.hpp" |
||||||
|
#include "opencv2/dnn.hpp" |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <iostream> |
|
||||||
#include <fstream> |
#include <fstream> |
||||||
#include <sstream> |
|
||||||
#include <queue> |
|
||||||
#include <algorithm> |
|
||||||
#include <iosfwd> |
|
||||||
#include <memory> |
|
||||||
#include <string> |
|
||||||
#include <utility> |
|
||||||
#include <vector> |
|
||||||
|
|
||||||
//should this be moved elsewhere?
|
|
||||||
//In precomp.hpp It doesn't work
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
#include "caffe/caffe.hpp" |
|
||||||
#endif |
|
||||||
|
|
||||||
|
using namespace std; |
||||||
|
|
||||||
namespace cv { namespace text { |
namespace cv { namespace text { |
||||||
|
|
||||||
//Maybe OpenCV has a routine better suited
|
class OCRHolisticWordRecognizerImpl : public OCRHolisticWordRecognizer |
||||||
inline bool fileExists (String filename) { |
{ |
||||||
std::ifstream f(filename.c_str()); |
|
||||||
return f.good(); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
class DictNetCaffeImpl: public DictNet{ |
|
||||||
protected: |
|
||||||
void preprocess(Mat& input,Mat& output){ |
|
||||||
if(input.channels()==3){ |
|
||||||
Mat tmpInput; |
|
||||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
|
||||||
if(input.depth()==CV_8U){ |
|
||||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
|
||||||
}else{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else{ |
|
||||||
if(input.channels()==1){ |
|
||||||
if(input.depth()==CV_8U){ |
|
||||||
input.convertTo(output, CV_32FC1,1/255.0); |
|
||||||
}else{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else{ |
|
||||||
CV_Error(Error::StsError,"Expecting images with either 1 or 3 channels"); |
|
||||||
} |
|
||||||
} |
|
||||||
resize(output,output,this->inputGeometry_); |
|
||||||
Scalar dev,mean; |
|
||||||
meanStdDev(output,mean,dev); |
|
||||||
subtract(output,mean[0],output); |
|
||||||
divide(output,(dev[0]/128.0),output); |
|
||||||
} |
|
||||||
|
|
||||||
void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat){ |
|
||||||
//Classifies a list of images containing at most minibatchSz_ images
|
|
||||||
CV_Assert(int(inputImageList.size())<=this->minibatchSz_); |
|
||||||
CV_Assert(outputMat.isContinuous()); |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
net_->input_blobs()[0]->Reshape(inputImageList.size(), 1,this->inputGeometry_.height,this->inputGeometry_.width); |
|
||||||
net_->Reshape(); |
|
||||||
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); |
|
||||||
float* inputData=inputBuffer; |
|
||||||
for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++){ |
|
||||||
Mat preprocessed; |
|
||||||
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); |
|
||||||
this->preprocess(inputImageList[imgNum],preprocessed); |
|
||||||
preprocessed.copyTo(netInputWraped); |
|
||||||
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); |
|
||||||
} |
|
||||||
this->net_->ForwardPrefilled(); |
|
||||||
const float* outputNetData=net_->output_blobs()[0]->cpu_data(); |
|
||||||
float*outputMatData=(float*)(outputMat.data); |
|
||||||
memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size()); |
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
Ptr<caffe::Net<float> > net_; |
|
||||||
#endif |
|
||||||
Size inputGeometry_; |
|
||||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
|
||||||
bool gpuBackend_;//The existence of the assignment operator mandates this to be nonconst
|
|
||||||
int outputSize_; |
|
||||||
public: |
|
||||||
DictNetCaffeImpl(const DictNetCaffeImpl& dn):inputGeometry_(dn.inputGeometry_),minibatchSz_(dn.minibatchSz_), |
|
||||||
gpuBackend_(dn.gpuBackend_),outputSize_(dn.outputSize_){ |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
} |
|
||||||
DictNetCaffeImpl& operator=(const DictNetCaffeImpl &dn){ |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
this->inputGeometry_=dn.inputGeometry_; |
|
||||||
this->minibatchSz_=dn.minibatchSz_; |
|
||||||
this->gpuBackend_=dn.gpuBackend_; |
|
||||||
this->outputSize_=dn.outputSize_; |
|
||||||
return *this; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
} |
|
||||||
|
|
||||||
DictNetCaffeImpl(String modelArchFilename, String modelWeightsFilename, int maxMinibatchSz, bool useGpu) |
|
||||||
:minibatchSz_(maxMinibatchSz), gpuBackend_(useGpu){ |
|
||||||
CV_Assert(this->minibatchSz_>0); |
|
||||||
CV_Assert(fileExists(modelArchFilename)); |
|
||||||
CV_Assert(fileExists(modelWeightsFilename)); |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
if(this->gpuBackend_){ |
|
||||||
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
|
||||||
}else{ |
|
||||||
caffe::Caffe::set_mode(caffe::Caffe::CPU); |
|
||||||
} |
|
||||||
this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST)); |
|
||||||
CV_Assert(net_->num_inputs()==1); |
|
||||||
CV_Assert(net_->num_outputs()==1); |
|
||||||
CV_Assert(this->net_->input_blobs()[0]->channels()==1); |
|
||||||
this->net_->CopyTrainedLayersFrom(modelWeightsFilename); |
|
||||||
caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0]; |
|
||||||
this->inputGeometry_=Size(inputLayer->width(), inputLayer->height()); |
|
||||||
inputLayer->Reshape(this->minibatchSz_,1,this->inputGeometry_.height, this->inputGeometry_.width); |
|
||||||
net_->Reshape(); |
|
||||||
this->outputSize_=net_->output_blobs()[0]->channels(); |
|
||||||
|
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
void classify(InputArray image, OutputArray classProbabilities){ |
|
||||||
std::vector<Mat> inputImageList; |
|
||||||
inputImageList.push_back(image.getMat()); |
|
||||||
classifyBatch(inputImageList,classProbabilities); |
|
||||||
} |
|
||||||
|
|
||||||
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities){ |
|
||||||
std::vector<Mat> allImageVector; |
|
||||||
inputImageList.getMatVector(allImageVector); |
|
||||||
size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
|
|
||||||
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
|
|
||||||
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); |
|
||||||
Mat outputMat = classProbabilities.getMat(); |
|
||||||
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize){ |
|
||||||
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize); |
|
||||||
std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum); |
|
||||||
std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd); |
|
||||||
std::vector<Mat> minibatchInput(from,to); |
|
||||||
classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
int getOutputSize(){ |
|
||||||
return this->outputSize_; |
|
||||||
} |
|
||||||
int getMinibatchSize(){ |
|
||||||
return this->minibatchSz_; |
|
||||||
} |
|
||||||
bool usingGpu(){ |
|
||||||
return this->gpuBackend_; |
|
||||||
} |
|
||||||
int getBackend(){ |
|
||||||
return OCR_HOLISTIC_BACKEND_CAFFE; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
|
|
||||||
Ptr<DictNet> DictNet::create(String archFilename,String weightsFilename,int minibatchSz,bool useGpu,int backEnd){ |
|
||||||
switch(backEnd){ |
|
||||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
|
||||||
return Ptr<DictNet>(new DictNetCaffeImpl(archFilename, weightsFilename, minibatchSz, useGpu)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_NONE: |
|
||||||
default: |
|
||||||
CV_Error(Error::StsError,"DictNet::create backend not implemented"); |
|
||||||
return Ptr<DictNet>(); |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{ |
|
||||||
private: |
private: |
||||||
struct NetOutput{ |
dnn::Net net; |
||||||
//Auxiliary structure that handles the logic of getting class ids and probabillities from
|
vector<string> words; |
||||||
//the raw outputs of caffe
|
|
||||||
int wordIdx; |
|
||||||
float probabillity; |
|
||||||
|
|
||||||
static bool sorter(const NetOutput& o1,const NetOutput& o2){//used with std::sort to provide the most probable class
|
|
||||||
return o1.probabillity>o2.probabillity; |
|
||||||
} |
|
||||||
|
|
||||||
static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res){ |
|
||||||
res.resize(nbOutputs); |
|
||||||
for(int k=0;k<nbOutputs;k++){ |
|
||||||
res[k].wordIdx=k; |
|
||||||
res[k].probabillity=buffer[k]; |
|
||||||
} |
|
||||||
std::sort(res.begin(),res.end(),NetOutput::sorter); |
|
||||||
} |
|
||||||
static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence){ |
|
||||||
std::vector<NetOutput> tmp; |
|
||||||
getOutputs(buffer,nbOutputs,tmp); |
|
||||||
classNum=tmp[0].wordIdx; |
|
||||||
confidence=tmp[0].probabillity; |
|
||||||
} |
|
||||||
}; |
|
||||||
protected: |
|
||||||
std::vector<String> labels_; |
|
||||||
Ptr<TextImageClassifier> classifier_; |
|
||||||
public: |
public: |
||||||
OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabullaryFilename):classifier_(classifierPtr){ |
OCRHolisticWordRecognizerImpl(const string &archFilename, const string &weightsFilename, const string &wordsFilename) |
||||||
CV_Assert(fileExists(vocabullaryFilename));//this fails for some rason
|
{ |
||||||
std::ifstream labelsFile(vocabullaryFilename.c_str()); |
net = dnn::readNetFromCaffe(archFilename, weightsFilename); |
||||||
if(!labelsFile){ |
std::ifstream in(wordsFilename.c_str()); |
||||||
CV_Error(Error::StsError,"Could not read Labels from file"); |
if (!in) |
||||||
|
{ |
||||||
|
CV_Error(Error::StsError, "Could not read Labels from file"); |
||||||
} |
} |
||||||
std::string line; |
std::string line; |
||||||
while (std::getline(labelsFile, line)){ |
while (std::getline(in, line)) |
||||||
labels_.push_back(std::string(line)); |
words.push_back(line); |
||||||
} |
CV_Assert(getClassCount() == words.size()); |
||||||
CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); |
|
||||||
} |
|
||||||
|
|
||||||
void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence){ |
|
||||||
Mat netOutput; |
|
||||||
this->classifier_->classify(inputImage,netOutput); |
|
||||||
int classNum; |
|
||||||
NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence); |
|
||||||
transcription=this->labels_[classNum]; |
|
||||||
} |
} |
||||||
void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec){ |
|
||||||
Mat netOutput; |
|
||||||
this->classifier_->classifyBatch(inputImageList,netOutput); |
|
||||||
for(int k=0;k<netOutput.rows;k++){ |
|
||||||
int classNum; |
|
||||||
double confidence; |
|
||||||
NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence); |
|
||||||
transcriptionVec.push_back(this->labels_[classNum]); |
|
||||||
confidenceVec.push_back(confidence); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, |
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0) |
||||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, |
{ |
||||||
int component_level=0){ |
CV_Assert(component_level==OCR_LEVEL_WORD); //Componnents not applicable for word spotting
|
||||||
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
|
|
||||||
double confidence; |
double confidence; |
||||||
String transcription; |
output_text = classify(image, confidence); |
||||||
recogniseImage(image,transcription,confidence); |
|
||||||
output_text=transcription.c_str(); |
|
||||||
if(component_rects!=NULL){ |
if(component_rects!=NULL){ |
||||||
component_rects->resize(1); |
component_rects->resize(1); |
||||||
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height); |
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height); |
||||||
} |
} |
||||||
if(component_texts!=NULL){ |
if(component_texts!=NULL){ |
||||||
component_texts->resize(1); |
component_texts->resize(1); |
||||||
(*component_texts)[0]=transcription.c_str(); |
(*component_texts)[0] = output_text; |
||||||
} |
} |
||||||
if(component_confidences!=NULL){ |
if(component_confidences!=NULL){ |
||||||
component_confidences->resize(1); |
component_confidences->resize(1); |
||||||
(*component_confidences)[0]=float(confidence); |
(*component_confidences)[0] = float(confidence); |
||||||
} |
} |
||||||
} |
} |
||||||
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, |
|
||||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, |
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0) |
||||||
int component_level=0){ |
{ |
||||||
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
|
//Mask is ignored because the CNN operates on a full image
|
||||||
this->run(image,output_text,component_rects,component_texts,component_confidences,component_level); |
CV_Assert(mask.cols == image.cols && mask.rows == image.rows); |
||||||
|
this->run(image, output_text, component_rects, component_texts, component_confidences, component_level); |
||||||
} |
} |
||||||
std::vector<String>& getVocabulary(){ |
|
||||||
return this->labels_; |
protected: |
||||||
|
Size getPerceptiveField() const |
||||||
|
{ |
||||||
|
return Size(100, 32); |
||||||
|
} |
||||||
|
|
||||||
|
size_t getClassCount() |
||||||
|
{ |
||||||
|
int id = net.getLayerId("prob"); |
||||||
|
dnn::MatShape inputShape; |
||||||
|
inputShape.push_back(1); |
||||||
|
inputShape.push_back(1); |
||||||
|
inputShape.push_back(getPerceptiveField().height); |
||||||
|
inputShape.push_back(getPerceptiveField().width); |
||||||
|
vector<dnn::MatShape> inShapes, outShapes; |
||||||
|
net.getLayerShapes(inputShape, id, inShapes, outShapes); |
||||||
|
CV_Assert(outShapes.size() == 1 && outShapes[0].size() == 4); |
||||||
|
CV_Assert(outShapes[0][0] == 1 && outShapes[0][2] == 1 && outShapes[0][3] == 1); |
||||||
|
return outShapes[0][1]; |
||||||
|
} |
||||||
|
|
||||||
|
string classify(InputArray image, double & conf) |
||||||
|
{ |
||||||
|
CV_Assert(image.channels() == 1 && image.depth() == CV_8U); |
||||||
|
Mat resized; |
||||||
|
resize(image, resized, getPerceptiveField()); |
||||||
|
Mat blob = dnn::blobFromImage(resized); |
||||||
|
net.setInput(blob, "data"); |
||||||
|
Mat prob = net.forward("prob"); |
||||||
|
CV_Assert(prob.dims == 4 && !prob.empty() && prob.size[1] == (int)getClassCount()); |
||||||
|
int idx[4] = {0}; |
||||||
|
minMaxIdx(prob, 0, &conf, 0, idx); |
||||||
|
CV_Assert(0 <= idx[1] && idx[1] < (int)words.size()); |
||||||
|
return words[idx[1]]; |
||||||
} |
} |
||||||
}; |
|
||||||
|
|
||||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabullaryFilename ){ |
}; |
||||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabullaryFilename)); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabullaryFilename){ |
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(const string &archFilename, const string &weightsFilename, const string &wordsFilename) |
||||||
Ptr<TextImageClassifier> classifierPtr(new DictNetCaffeImpl(modelArchFilename,modelWeightsFilename, 100,0)); |
{ |
||||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabullaryFilename)); |
return makePtr<OCRHolisticWordRecognizerImpl>(archFilename, weightsFilename, wordsFilename); |
||||||
} |
} |
||||||
|
|
||||||
} } //namespace text namespace cv
|
}} // cv::text::
|
||||||
|
@ -1,13 +1,7 @@ |
|||||||
#ifndef __OPENCV_TEXT_CONFIG_HPP__ |
#ifndef __OPENCV_TEXT_CONFIG_HPP__ |
||||||
#define __OPENCV_TEXT_CONFIG_HPP__ |
#define __OPENCV_TEXT_CONFIG_HPP__ |
||||||
|
|
||||||
// HAVE CAFFE
|
|
||||||
#cmakedefine HAVE_CAFFE |
|
||||||
|
|
||||||
// HAVE OCR Tesseract
|
// HAVE OCR Tesseract
|
||||||
#cmakedefine HAVE_TESSERACT |
#cmakedefine HAVE_TESSERACT |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif |
#endif |
||||||
|
Loading…
Reference in new issue