Merge pull request #1384 from mshabunin:pr723
commit
68736a2ce5
5 changed files with 225 additions and 4 deletions
@ -0,0 +1,61 @@ |
||||
/*
|
||||
* dictnet_demo.cpp |
||||
* |
||||
* Demonstrates simple use of the holistic word classifier in C++ |
||||
* |
||||
* Created on: June 26, 2016 |
||||
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com> |
||||
*/ |
||||
|
||||
#include "opencv2/text.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
|
||||
#include <sstream> |
||||
#include <iostream> |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::text; |
||||
|
||||
inline void printHelp() |
||||
{ |
||||
cout << " Demo of wordspotting CNN for text recognition." << endl; |
||||
cout << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl; |
||||
|
||||
cout << " Usage: program <input_image>" << endl; |
||||
cout << " Caffe Model files (dictnet_vgg.caffemodel, dictnet_vgg_deploy.prototxt, dictnet_vgg_labels.txt)"<<endl; |
||||
cout << " must be in the current directory." << endl << endl; |
||||
|
||||
cout << " Obtaining Caffe Model files in linux shell:"<<endl; |
||||
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<endl; |
||||
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<endl; |
||||
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<endl<<endl; |
||||
} |
||||
|
||||
int main(int argc, const char * argv[]) |
||||
{ |
||||
if (argc != 2) |
||||
{ |
||||
printHelp(); |
||||
exit(1); |
||||
} |
||||
|
||||
Mat image = imread(argv[1], IMREAD_GRAYSCALE); |
||||
|
||||
cout << "Read image (" << argv[1] << "): " << image.size << ", channels: " << image.channels() << ", depth: " << image.depth() << endl; |
||||
|
||||
if (image.empty()) |
||||
{ |
||||
printHelp(); |
||||
exit(1); |
||||
} |
||||
|
||||
Ptr<OCRHolisticWordRecognizer> wordSpotter = OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt"); |
||||
|
||||
std::string word; |
||||
vector<float> confs; |
||||
wordSpotter->run(image, word, 0, 0, &confs); |
||||
|
||||
cout << "Detected word: '" << word << "', confidence: " << confs[0] << endl; |
||||
} |
@ -0,0 +1,102 @@ |
||||
#include "precomp.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include "opencv2/core.hpp" |
||||
#include "opencv2/dnn.hpp" |
||||
|
||||
#include <fstream> |
||||
|
||||
using namespace std; |
||||
|
||||
namespace cv { namespace text { |
||||
|
||||
class OCRHolisticWordRecognizerImpl : public OCRHolisticWordRecognizer |
||||
{ |
||||
private: |
||||
dnn::Net net; |
||||
vector<string> words; |
||||
|
||||
public: |
||||
OCRHolisticWordRecognizerImpl(const string &archFilename, const string &weightsFilename, const string &wordsFilename) |
||||
{ |
||||
net = dnn::readNetFromCaffe(archFilename, weightsFilename); |
||||
std::ifstream in(wordsFilename.c_str()); |
||||
if (!in) |
||||
{ |
||||
CV_Error(Error::StsError, "Could not read Labels from file"); |
||||
} |
||||
std::string line; |
||||
while (std::getline(in, line)) |
||||
words.push_back(line); |
||||
CV_Assert(getClassCount() == words.size()); |
||||
} |
||||
|
||||
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0) |
||||
{ |
||||
CV_Assert(component_level==OCR_LEVEL_WORD); //Componnents not applicable for word spotting
|
||||
double confidence; |
||||
output_text = classify(image, confidence); |
||||
if(component_rects!=NULL){ |
||||
component_rects->resize(1); |
||||
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height); |
||||
} |
||||
if(component_texts!=NULL){ |
||||
component_texts->resize(1); |
||||
(*component_texts)[0] = output_text; |
||||
} |
||||
if(component_confidences!=NULL){ |
||||
component_confidences->resize(1); |
||||
(*component_confidences)[0] = float(confidence); |
||||
} |
||||
} |
||||
|
||||
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0) |
||||
{ |
||||
//Mask is ignored because the CNN operates on a full image
|
||||
CV_Assert(mask.cols == image.cols && mask.rows == image.rows); |
||||
this->run(image, output_text, component_rects, component_texts, component_confidences, component_level); |
||||
} |
||||
|
||||
protected: |
||||
Size getPerceptiveField() const |
||||
{ |
||||
return Size(100, 32); |
||||
} |
||||
|
||||
size_t getClassCount() |
||||
{ |
||||
int id = net.getLayerId("prob"); |
||||
dnn::MatShape inputShape; |
||||
inputShape.push_back(1); |
||||
inputShape.push_back(1); |
||||
inputShape.push_back(getPerceptiveField().height); |
||||
inputShape.push_back(getPerceptiveField().width); |
||||
vector<dnn::MatShape> inShapes, outShapes; |
||||
net.getLayerShapes(inputShape, id, inShapes, outShapes); |
||||
CV_Assert(outShapes.size() == 1 && outShapes[0].size() == 4); |
||||
CV_Assert(outShapes[0][0] == 1 && outShapes[0][2] == 1 && outShapes[0][3] == 1); |
||||
return outShapes[0][1]; |
||||
} |
||||
|
||||
string classify(InputArray image, double & conf) |
||||
{ |
||||
CV_Assert(image.channels() == 1 && image.depth() == CV_8U); |
||||
Mat resized; |
||||
resize(image, resized, getPerceptiveField()); |
||||
Mat blob = dnn::blobFromImage(resized); |
||||
net.setInput(blob, "data"); |
||||
Mat prob = net.forward("prob"); |
||||
CV_Assert(prob.dims == 4 && !prob.empty() && prob.size[1] == (int)getClassCount()); |
||||
int idx[4] = {0}; |
||||
minMaxIdx(prob, 0, &conf, 0, idx); |
||||
CV_Assert(0 <= idx[1] && idx[1] < (int)words.size()); |
||||
return words[idx[1]]; |
||||
} |
||||
|
||||
}; |
||||
|
||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(const string &archFilename, const string &weightsFilename, const string &wordsFilename) |
||||
{ |
||||
return makePtr<OCRHolisticWordRecognizerImpl>(archFilename, weightsFilename, wordsFilename); |
||||
} |
||||
|
||||
}} // cv::text::
|
Loading…
Reference in new issue