Merge pull request #1384 from mshabunin:pr723

pull/1400/head
Vadim Pisarevsky 7 years ago
commit 68736a2ce5
  1. 2
      modules/text/CMakeLists.txt
  2. 62
      modules/text/include/opencv2/text/ocr.hpp
  3. 61
      modules/text/samples/dictnet_demo.cpp
  4. 102
      modules/text/src/ocr_holistic.cpp
  5. 2
      modules/text/text_config.hpp.in

@ -1,5 +1,5 @@
set(the_description "Text Detection and Recognition")
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d OPTIONAL opencv_highgui WRAP python java)
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java)
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

@ -536,8 +536,66 @@ at each window location.
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
/** @brief OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting.
* Given a predefined vocabulary , a DictNet is employed to select the most probable
* word given an input image.
*
* DictNet is described in detail in:
* Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015
* http://arxiv.org/abs/1412.1842
*/
class CV_EXPORTS OCRHolisticWordRecognizer : public BaseOCR
{
public:
virtual void run(Mat& image,
std::string& output_text,
std::vector<Rect>* component_rects = NULL,
std::vector<std::string>* component_texts = NULL,
std::vector<float>* component_confidences = NULL,
int component_level = OCR_LEVEL_WORD) = 0;
/** @brief Recognize text using a segmentation based word-spotting/classifier cnn.
Takes image on input and returns recognized text in the output_text parameter. Optionally
provides also the Rects for individual text elements found (e.g. words), and the list of those
text elements with their confidence values.
@param image Input image CV_8UC1 or CV_8UC3
@param mask is totally ignored and is only available for compatibillity reasons
@param output_text Output text of the the word spoting, always one that exists in the dictionary.
@param component_rects Not applicable for word spotting can be be NULL if not, a single elemnt will
be put in the vector.
@param component_texts Not applicable for word spotting can be be NULL if not, a single elemnt will
be put in the vector.
@param component_confidences Not applicable for word spotting can be be NULL if not, a single elemnt will
be put in the vector.
@param component_level must be OCR_LEVEL_WORD.
*/
virtual void run(Mat& image,
Mat& mask,
std::string& output_text,
std::vector<Rect>* component_rects = NULL,
std::vector<std::string>* component_texts = NULL,
std::vector<float>* component_confidences = NULL,
int component_level = OCR_LEVEL_WORD) = 0;
/** @brief Creates an instance of the OCRHolisticWordRecognizer class.
*/
static Ptr<OCRHolisticWordRecognizer> create(const std::string &archFilename,
const std::string &weightsFilename,
const std::string &wordsFilename);
};
//! @}
}
}
}} // cv::text::
#endif // _OPENCV_TEXT_OCR_HPP_

@ -0,0 +1,61 @@
/*
* dictnet_demo.cpp
*
* Demonstrates simple use of the holistic word classifier in C++
*
* Created on: June 26, 2016
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com>
*/
#include "opencv2/text.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <sstream>
#include <iostream>
using namespace std;
using namespace cv;
using namespace cv::text;
inline void printHelp()
{
cout << " Demo of wordspotting CNN for text recognition." << endl;
cout << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl;
cout << " Usage: program <input_image>" << endl;
cout << " Caffe Model files (dictnet_vgg.caffemodel, dictnet_vgg_deploy.prototxt, dictnet_vgg_labels.txt)"<<endl;
cout << " must be in the current directory." << endl << endl;
cout << " Obtaining Caffe Model files in linux shell:"<<endl;
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<endl;
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<endl;
cout << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<endl<<endl;
}
int main(int argc, const char * argv[])
{
if (argc != 2)
{
printHelp();
exit(1);
}
Mat image = imread(argv[1], IMREAD_GRAYSCALE);
cout << "Read image (" << argv[1] << "): " << image.size << ", channels: " << image.channels() << ", depth: " << image.depth() << endl;
if (image.empty())
{
printHelp();
exit(1);
}
Ptr<OCRHolisticWordRecognizer> wordSpotter = OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
std::string word;
vector<float> confs;
wordSpotter->run(image, word, 0, 0, &confs);
cout << "Detected word: '" << word << "', confidence: " << confs[0] << endl;
}

@ -0,0 +1,102 @@
#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core.hpp"
#include "opencv2/dnn.hpp"
#include <fstream>
using namespace std;
namespace cv { namespace text {
class OCRHolisticWordRecognizerImpl : public OCRHolisticWordRecognizer
{
private:
dnn::Net net;
vector<string> words;
public:
OCRHolisticWordRecognizerImpl(const string &archFilename, const string &weightsFilename, const string &wordsFilename)
{
net = dnn::readNetFromCaffe(archFilename, weightsFilename);
std::ifstream in(wordsFilename.c_str());
if (!in)
{
CV_Error(Error::StsError, "Could not read Labels from file");
}
std::string line;
while (std::getline(in, line))
words.push_back(line);
CV_Assert(getClassCount() == words.size());
}
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0)
{
CV_Assert(component_level==OCR_LEVEL_WORD); //Componnents not applicable for word spotting
double confidence;
output_text = classify(image, confidence);
if(component_rects!=NULL){
component_rects->resize(1);
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height);
}
if(component_texts!=NULL){
component_texts->resize(1);
(*component_texts)[0] = output_text;
}
if(component_confidences!=NULL){
component_confidences->resize(1);
(*component_confidences)[0] = float(confidence);
}
}
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, int component_level=0)
{
//Mask is ignored because the CNN operates on a full image
CV_Assert(mask.cols == image.cols && mask.rows == image.rows);
this->run(image, output_text, component_rects, component_texts, component_confidences, component_level);
}
protected:
Size getPerceptiveField() const
{
return Size(100, 32);
}
size_t getClassCount()
{
int id = net.getLayerId("prob");
dnn::MatShape inputShape;
inputShape.push_back(1);
inputShape.push_back(1);
inputShape.push_back(getPerceptiveField().height);
inputShape.push_back(getPerceptiveField().width);
vector<dnn::MatShape> inShapes, outShapes;
net.getLayerShapes(inputShape, id, inShapes, outShapes);
CV_Assert(outShapes.size() == 1 && outShapes[0].size() == 4);
CV_Assert(outShapes[0][0] == 1 && outShapes[0][2] == 1 && outShapes[0][3] == 1);
return outShapes[0][1];
}
string classify(InputArray image, double & conf)
{
CV_Assert(image.channels() == 1 && image.depth() == CV_8U);
Mat resized;
resize(image, resized, getPerceptiveField());
Mat blob = dnn::blobFromImage(resized);
net.setInput(blob, "data");
Mat prob = net.forward("prob");
CV_Assert(prob.dims == 4 && !prob.empty() && prob.size[1] == (int)getClassCount());
int idx[4] = {0};
minMaxIdx(prob, 0, &conf, 0, idx);
CV_Assert(0 <= idx[1] && idx[1] < (int)words.size());
return words[idx[1]];
}
};
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(const string &archFilename, const string &weightsFilename, const string &wordsFilename)
{
return makePtr<OCRHolisticWordRecognizerImpl>(archFilename, weightsFilename, wordsFilename);
}
}} // cv::text::

@ -4,4 +4,4 @@
// HAVE OCR Tesseract
#cmakedefine HAVE_TESSERACT
#endif
#endif

Loading…
Cancel
Save