parent
c33629e053
commit
951e18272d
19 changed files with 308 additions and 2898 deletions
@ -1,84 +1,24 @@ |
||||
set(the_description "Text Detection and Recognition") |
||||
|
||||
if(POLICY CMP0023) |
||||
message(STATUS "Explicitly setting policy CMP0023 to OLD") |
||||
cmake_policy(SET CMP0023 OLD) |
||||
endif(POLICY CMP0023) |
||||
|
||||
# Using cmake scripts and modules |
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) |
||||
|
||||
set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d) |
||||
|
||||
find_package(Caffe) |
||||
if(Caffe_FOUND) |
||||
message(STATUS "Caffe: YES") |
||||
set(HAVE_CAFFE 1) |
||||
else() |
||||
message(STATUS "Caffe: NO") |
||||
# list(APPEND TEXT_DEPS opencv_dnn) |
||||
endif() |
||||
|
||||
#internal dependencies |
||||
find_package(Protobuf) |
||||
if(Protobuf_FOUND) |
||||
message(STATUS "Protobuf: YES") |
||||
set(HAVE_PROTOBUF 1) |
||||
else() |
||||
message(STATUS "Protobuf: NO") |
||||
endif() |
||||
|
||||
find_package(Glog) |
||||
if(Glog_FOUND) |
||||
message(STATUS "Glog: YES") |
||||
set(HAVE_GLOG 1) |
||||
else() |
||||
message(STATUS "Glog: NO") |
||||
endif() |
||||
|
||||
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python) |
||||
#ocv_define_module(text ${TEXT_DEPS} WRAP python) |
||||
|
||||
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) |
||||
|
||||
find_package(Tesseract) |
||||
if(${Tesseract_FOUND}) |
||||
message(STATUS "Tesseract: YES") |
||||
include_directories(${Tesseract_INCLUDE_DIR}) |
||||
target_link_libraries(opencv_text ${Tesseract_LIBS}) |
||||
add_definitions(-DHAVE_TESSERACT) |
||||
else() |
||||
message(STATUS "Tesseract: NO") |
||||
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java) |
||||
|
||||
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT) |
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) |
||||
find_package(Tesseract QUIET) |
||||
if(Tesseract_FOUND) |
||||
message(STATUS "Tesseract: YES") |
||||
set(HAVE_TESSERACT 1) |
||||
ocv_include_directories(${Tesseract_INCLUDE_DIR}) |
||||
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES}) |
||||
else() |
||||
message(STATUS "Tesseract: NO") |
||||
endif() |
||||
endif() |
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in |
||||
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY) |
||||
|
||||
if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF) |
||||
include_directories(${Caffe_INCLUDE_DIR}) |
||||
find_package(HDF5 COMPONENTS HL REQUIRED) |
||||
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) |
||||
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES}) |
||||
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem) |
||||
include_directories(SYSTEM ${Boost_INCLUDE_DIR}) |
||||
include_directories(SYSTEM ${CUDA_INCLUDE_DIR}) |
||||
link_directories(SYSTEM ${CUDA_LIBS}) |
||||
# include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ ) |
||||
#link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64) |
||||
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES}) |
||||
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES}) |
||||
add_definitions(-DHAVE_CAFFE) |
||||
endif() #HAVE_CAFFE |
||||
|
||||
message(STATUS "TEXT CAFFE SEARCH") |
||||
if() |
||||
message(STATUS "TEXT NO CAFFE CONFLICT") |
||||
else() |
||||
message(STATUS "TEXT CAFFE CONFLICT") |
||||
endif() |
||||
ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR}) |
||||
|
||||
if(HAVE_opencv_dnn) |
||||
message(STATUS "dnn module found") |
||||
add_definitions(-DHAVE_DNN) |
||||
set(HAVE_DNN 1) |
||||
else() |
||||
message(STATUS "dnn module not found") |
||||
endif() |
||||
ocv_add_testdata(samples/ contrib/text |
||||
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg" |
||||
) |
||||
|
@ -1,14 +0,0 @@ |
||||
# Caffe package for CNN Triplet training |
||||
unset(Caffe_FOUND) |
||||
|
||||
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp |
||||
HINTS |
||||
/usr/local/include) |
||||
|
||||
find_library(Caffe_LIBS NAMES caffe |
||||
HINTS |
||||
/usr/local/lib) |
||||
|
||||
if(Caffe_LIBS AND Caffe_INCLUDE_DIR) |
||||
set(Caffe_FOUND 1) |
||||
endif() |
@ -1,10 +0,0 @@ |
||||
#Required for Caffe |
||||
unset(Glog_FOUND) |
||||
|
||||
find_library(Glog_LIBS NAMES glog |
||||
HINTS |
||||
/usr/local/lib) |
||||
|
||||
if(Glog_LIBS) |
||||
set(Glog_FOUND 1) |
||||
endif() |
@ -1,10 +0,0 @@ |
||||
#Protobuf package required for Caffe |
||||
unset(Protobuf_FOUND) |
||||
|
||||
find_library(Protobuf_LIBS NAMES protobuf |
||||
HINTS |
||||
/usr/local/lib) |
||||
|
||||
if(Protobuf_LIBS) |
||||
set(Protobuf_FOUND 1) |
||||
endif() |
@ -1,22 +0,0 @@ |
||||
# Tesseract OCR |
||||
unset(Tesseract_FOUND) |
||||
|
||||
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h |
||||
HINTS |
||||
/usr/include |
||||
/usr/local/include) |
||||
|
||||
find_library(Tesseract_LIBRARY NAMES tesseract |
||||
HINTS |
||||
/usr/lib |
||||
/usr/local/lib) |
||||
|
||||
find_library(Lept_LIBRARY NAMES lept |
||||
HINTS |
||||
/usr/lib |
||||
/usr/local/lib) |
||||
|
||||
set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY}) |
||||
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR) |
||||
set(Tesseract_FOUND 1) |
||||
endif() |
@ -1,57 +1,37 @@ |
||||
# -*- coding: utf-8 -*- |
||||
""" |
||||
Created on Wed Jul 19 17:54:00 2017 |
||||
|
||||
@author: sgnosh |
||||
""" |
||||
|
||||
#!/usr/bin/python |
||||
|
||||
import sys |
||||
import os |
||||
|
||||
import cv2 |
||||
import numpy as np |
||||
|
||||
print('\nDeeptextdetection.py') |
||||
print(' A demo script of text box alogorithm of the paper:') |
||||
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') |
||||
|
||||
|
||||
if (len(sys.argv) < 2): |
||||
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') |
||||
quit() |
||||
#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable(): |
||||
# print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n" |
||||
# |
||||
# quit() |
||||
# check model and architecture file existance |
||||
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): |
||||
print " Model files not found in current directory. Aborting" |
||||
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" |
||||
quit() |
||||
cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True); |
||||
pathname = os.path.dirname(sys.argv[0]) |
||||
def main(): |
||||
print('\nDeeptextdetection.py') |
||||
print(' A demo script of text box alogorithm of the paper:') |
||||
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') |
||||
|
||||
if (len(sys.argv) < 2): |
||||
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') |
||||
quit() |
||||
|
||||
img = cv2.imread(str(sys.argv[1])) |
||||
textSpotter=cv2.text.textDetector_create( |
||||
"textbox_deploy.prototxt","textbox.caffemodel") |
||||
rects,outProbs = textSpotter.textDetectInImage(img); |
||||
# for visualization |
||||
vis = img.copy() |
||||
# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown |
||||
thres = 0.6 |
||||
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): |
||||
print " Model files not found in current directory. Aborting" |
||||
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" |
||||
quit() |
||||
|
||||
img = cv2.imread(str(sys.argv[1])) |
||||
textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel") |
||||
rects, outProbs = textSpotter.textDetectInImage(img); |
||||
vis = img.copy() |
||||
thres = 0.6 |
||||
|
||||
#Visualization |
||||
for r in range(0,np.shape(rects)[0]): |
||||
if outProbs[r] >thres: |
||||
rect = rects[r] |
||||
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2) |
||||
# cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1) |
||||
for r in range(np.shape(rects)[0]): |
||||
if outProbs[r] > thres: |
||||
rect = rects[r] |
||||
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2) |
||||
|
||||
cv2.imshow("Text detection result", vis) |
||||
cv2.waitKey() |
||||
|
||||
#Visualization |
||||
cv2.imshow("Text detection result", vis) |
||||
cv2.waitKey(0) |
||||
if __name__ == "__main__": |
||||
main() |
||||
|
@ -1,151 +1,86 @@ |
||||
/*
|
||||
* dictnet_demo.cpp |
||||
* |
||||
* Demonstrates simple use of the holistic word classifier in C++ |
||||
* |
||||
* Created on: June 26, 2016 |
||||
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com> |
||||
*/ |
||||
|
||||
#include "opencv2/text.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include <opencv2/text.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
|
||||
#include <sstream> |
||||
#include <vector> |
||||
#include <iostream> |
||||
#include <iomanip> |
||||
#include <fstream> |
||||
|
||||
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres); |
||||
inline std::string getHelpStr(std::string progFname){ |
||||
std::stringstream out; |
||||
out << " Demo of text detection CNN for text detection." << std::endl; |
||||
out << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl; |
||||
|
||||
out << " Usage: " << progFname << " <output_file> <input_image>" << std::endl; |
||||
out << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl; |
||||
out << " must be in the current directory." << std::endl << std::endl; |
||||
using namespace cv; |
||||
|
||||
out << " Obtaining Caffe Model files in linux shell:"<<std::endl; |
||||
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl; |
||||
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl; |
||||
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl; |
||||
namespace |
||||
{ |
||||
std::string getHelpStr(std::string progFname) |
||||
{ |
||||
std::stringstream out; |
||||
out << " Demo of text detection CNN for text detection." << std::endl |
||||
<< " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl |
||||
<< " Usage: " << progFname << " <output_file> <input_image>" << std::endl |
||||
<< " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl |
||||
<< " must be in the current directory." << std::endl |
||||
<< " These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl; |
||||
return out.str(); |
||||
} |
||||
|
||||
inline bool fileExists (std::string filename) { |
||||
bool fileExists (std::string filename) |
||||
{ |
||||
std::ifstream f(filename.c_str()); |
||||
return f.good(); |
||||
} |
||||
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6) |
||||
|
||||
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres) |
||||
{ |
||||
for (int i=0;i<(int)groups.size(); i++) |
||||
for (size_t i = 0; i < groups.size(); i++) |
||||
{ |
||||
if(probs[i]>thres) |
||||
if(probs[i] > thres) |
||||
{ |
||||
if (src.type() == CV_8UC3) |
||||
{ |
||||
cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 ); |
||||
cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 )); |
||||
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA); |
||||
String label = format("%.2f", probs[i]); |
||||
std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n"; |
||||
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA); |
||||
} |
||||
else |
||||
rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 ); |
||||
rectangle(src, groups[i], Scalar( 255 ), 3, 8 ); |
||||
} |
||||
} |
||||
} |
||||
|
||||
} |
||||
|
||||
int main(int argc, const char * argv[]){ |
||||
if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){ |
||||
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"; |
||||
//exit(1);
|
||||
} |
||||
std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends(); |
||||
std::cout << "The Following backends are available" << "\n"; |
||||
for (int i=0;i<backends.size();i++) |
||||
std::cout << backends[i] << "\n"; |
||||
|
||||
// printf("%s",x);
|
||||
//set to true if you have a GPU with more than 3GB
|
||||
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable()) |
||||
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true); |
||||
|
||||
if (argc < 3){ |
||||
std::cout<<getHelpStr(argv[0]); |
||||
std::cout<<"Insufiecient parameters. Aborting!"<<std::endl; |
||||
int main(int argc, const char * argv[]) |
||||
{ |
||||
if (argc < 2) |
||||
{ |
||||
std::cout << getHelpStr(argv[0]); |
||||
std::cout << "Insufiecient parameters. Aborting!" << std::endl; |
||||
exit(1); |
||||
} |
||||
|
||||
if (!fileExists("textbox.caffemodel") || |
||||
!fileExists("textbox_deploy.prototxt")){ |
||||
// !fileExists("dictnet_vgg_labels.txt"))
|
||||
|
||||
std::cout<<getHelpStr(argv[0]); |
||||
std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl; |
||||
exit(1); |
||||
} |
||||
|
||||
if (fileExists(argv[1])){ |
||||
!fileExists("textbox_deploy.prototxt")) |
||||
{ |
||||
std::cout<<getHelpStr(argv[0]); |
||||
std::cout<<"Output file must not exist. Aborting!"<<std::endl; |
||||
std::cout << "Model files not found in the current directory. Aborting!" << std::endl; |
||||
exit(1); |
||||
} |
||||
|
||||
cv::Mat image; |
||||
image = cv::imread(cv::String(argv[2])); |
||||
|
||||
Mat image = imread(String(argv[1]), IMREAD_COLOR); |
||||
|
||||
std::cout<<"Starting Text Box Demo"<<std::endl; |
||||
cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create( |
||||
"textbox_deploy.prototxt","textbox.caffemodel"); |
||||
std::cout << "Starting Text Box Demo" << std::endl; |
||||
Ptr<text::TextDetectorCNN> textSpotter = |
||||
text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false); |
||||
|
||||
//cv::Ptr<cv::text::textDetector> wordSpotter=
|
||||
// cv::text::textDetector::create(cnn);
|
||||
std::cout<<"Created Text Spotter with text Boxes"; |
||||
|
||||
std::vector<cv::Rect> bbox; |
||||
std::vector<Rect> bbox; |
||||
std::vector<float> outProbabillities; |
||||
textSpotter->textDetectInImage(image,bbox,outProbabillities); |
||||
// textbox_draw(image, bbox,outProbabillities);
|
||||
float thres =0.6f; |
||||
std::vector<cv::Mat> imageList; |
||||
for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){ |
||||
if(outProbabillities[imageIdx]>thres){ |
||||
imageList.push_back(image(bbox.at(imageIdx))); |
||||
} |
||||
|
||||
} |
||||
// call dict net here for all detected parts
|
||||
cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet( |
||||
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN); |
||||
|
||||
cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter= |
||||
cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt"); |
||||
|
||||
std::vector<cv::String> wordList; |
||||
std::vector<double> wordProbabillities; |
||||
wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities); |
||||
// write the output in file
|
||||
std::ofstream out; |
||||
out.open(argv[1]); |
||||
|
||||
|
||||
for (int i=0;i<(int)wordList.size(); i++) |
||||
{ |
||||
cv::Point tl_ = bbox.at(i).tl(); |
||||
cv::Point br_ = bbox.at(i).br(); |
||||
|
||||
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl; |
||||
|
||||
} |
||||
out.close(); |
||||
textbox_draw(image, bbox,outProbabillities,wordList); |
||||
textSpotter->textDetectInImage(image, bbox, outProbabillities); |
||||
|
||||
textbox_draw(image, bbox, outProbabillities, 0.5f); |
||||
|
||||
cv::imshow("TextBox Demo",image); |
||||
imshow("TextBox Demo",image); |
||||
std::cout << "Done!" << std::endl << std::endl; |
||||
std::cout << "Press any key to exit." << std::endl << std::endl; |
||||
if ((cv::waitKey()&0xff) == ' ') |
||||
return 0; |
||||
waitKey(); |
||||
return 0; |
||||
} |
||||
|
@ -1,387 +0,0 @@ |
||||
#include "precomp.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/core.hpp" |
||||
|
||||
|
||||
|
||||
#include <iostream> |
||||
#include <fstream> |
||||
#include <sstream> |
||||
#include <queue> |
||||
#include <algorithm> |
||||
#include <iosfwd> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
namespace cv { namespace text { |
||||
//************************************************************************************
|
||||
//****************** ImagePreprocessor *******************************************
|
||||
//************************************************************************************
|
||||
|
||||
void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){ |
||||
Mat inpImg=input.getMat(); |
||||
Mat outImg; |
||||
this->preprocess_(inpImg,outImg,sz,outputChannels); |
||||
outImg.copyTo(output); |
||||
} |
||||
void ImagePreprocessor::set_mean(Mat mean){ |
||||
|
||||
|
||||
this->set_mean_(mean); |
||||
|
||||
} |
||||
|
||||
|
||||
|
||||
class ResizerPreprocessor: public ImagePreprocessor{ |
||||
protected: |
||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
||||
CV_Assert(outputChannels==1 || outputChannels==3); |
||||
CV_Assert(input.channels()==1 || input.channels()==3); |
||||
if(input.channels()!=outputChannels) |
||||
{ |
||||
Mat tmpInput; |
||||
if(outputChannels==1){ |
||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
tmpInput.convertTo(output, CV_32FC1); |
||||
} |
||||
}else |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
tmpInput.convertTo(output, CV_32FC3); |
||||
} |
||||
} |
||||
}else |
||||
{ |
||||
if(input.channels()==1) |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
input.convertTo(output, CV_32FC1,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
input.convertTo(output, CV_32FC1); |
||||
} |
||||
}else |
||||
{ |
||||
if(input.depth()==CV_8U){ |
||||
input.convertTo(output, CV_32FC3,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
input.convertTo(output, CV_32FC3); |
||||
} |
||||
} |
||||
} |
||||
if(outputSize.width!=0 && outputSize.height!=0) |
||||
{ |
||||
resize(output,output,outputSize); |
||||
} |
||||
} |
||||
//void set_mean_(Mat m){}
|
||||
public: |
||||
ResizerPreprocessor(){} |
||||
~ResizerPreprocessor(){} |
||||
}; |
||||
|
||||
class StandarizerPreprocessor: public ImagePreprocessor{ |
||||
protected: |
||||
double sigma_; |
||||
//void set_mean_(Mat M){}
|
||||
|
||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
||||
|
||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
||||
CV_Assert(outputChannels==1 || outputChannels==3); |
||||
CV_Assert(input.channels()==1 || input.channels()==3); |
||||
if(input.channels()!=outputChannels) |
||||
{ |
||||
Mat tmpInput; |
||||
if(outputChannels==1) |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
tmpInput.convertTo(output, CV_32FC1); |
||||
} |
||||
}else |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
tmpInput.convertTo(output, CV_32FC3); |
||||
} |
||||
} |
||||
}else |
||||
{ |
||||
if(input.channels()==1) |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
input.convertTo(output, CV_32FC1,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
input.convertTo(output, CV_32FC1); |
||||
} |
||||
}else |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
input.convertTo(output, CV_32FC3,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
input.convertTo(output, CV_32FC3); |
||||
} |
||||
} |
||||
} |
||||
if(outputSize.width!=0 && outputSize.height!=0) |
||||
{ |
||||
resize(output,output,outputSize); |
||||
} |
||||
|
||||
Scalar mean,dev; |
||||
meanStdDev(output,mean,dev); |
||||
subtract(output,mean[0],output); |
||||
divide(output,(dev[0]/sigma_),output); |
||||
} |
||||
public: |
||||
StandarizerPreprocessor(double sigma):sigma_(sigma){} |
||||
~StandarizerPreprocessor(){} |
||||
|
||||
}; |
||||
|
||||
class customPreprocessor:public ImagePreprocessor{ |
||||
protected: |
||||
|
||||
double rawval_; |
||||
Mat mean_; |
||||
String channel_order_; |
||||
|
||||
void set_mean_(Mat imMean_){ |
||||
|
||||
imMean_.copyTo(this->mean_); |
||||
|
||||
|
||||
} |
||||
|
||||
void set_raw_scale(int rawval){ |
||||
rawval_ = rawval; |
||||
|
||||
} |
||||
void set_channels(String channel_order){ |
||||
channel_order_=channel_order; |
||||
} |
||||
|
||||
|
||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
||||
|
||||
CV_Assert(outputChannels==1 || outputChannels==3); |
||||
CV_Assert(input.channels()==1 || input.channels()==3); |
||||
if(input.channels()!=outputChannels) |
||||
{ |
||||
Mat tmpInput; |
||||
if(outputChannels==1) |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
if (rawval_ == 1) |
||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
||||
else |
||||
tmpInput.convertTo(output,CV_32FC1); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
if (rawval_ ==1) |
||||
tmpInput.convertTo(output, CV_32FC1); |
||||
else |
||||
tmpInput.convertTo(output, CV_32FC1,rawval_); |
||||
} |
||||
}else |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
if (rawval_ == 1) |
||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
||||
else |
||||
tmpInput.convertTo(output,CV_32FC1); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
if (rawval_ ==1) |
||||
tmpInput.convertTo(output, CV_32FC1); |
||||
else |
||||
tmpInput.convertTo(output, CV_32FC1,rawval_); |
||||
} |
||||
} |
||||
}else |
||||
{ |
||||
if(input.channels()==1) |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
if (rawval_ == 1) |
||||
input.convertTo(output,CV_32FC1,1/255.0); |
||||
else |
||||
input.convertTo(output,CV_32FC1); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
if (rawval_ ==1) |
||||
input.convertTo(output, CV_32FC1); |
||||
else |
||||
input.convertTo(output, CV_32FC1,rawval_); |
||||
} |
||||
}else |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
if (rawval_ == 1) |
||||
input.convertTo(output,CV_32FC3,1/255.0); |
||||
else |
||||
input.convertTo(output,CV_32FC3); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
if (rawval_ ==1) |
||||
input.convertTo(output, CV_32FC3); |
||||
else |
||||
input.convertTo(output, CV_32FC3,rawval_); |
||||
} |
||||
} |
||||
} |
||||
if(outputSize.width!=0 && outputSize.height!=0) |
||||
{ |
||||
resize(output,output,outputSize); |
||||
} |
||||
|
||||
if (!this->mean_.empty()){ |
||||
|
||||
Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2)); |
||||
subtract(output,mean_s,output); |
||||
} |
||||
else{ |
||||
Scalar mean_s; |
||||
mean_s = mean(output); |
||||
subtract(output,mean_s,output); |
||||
} |
||||
|
||||
} |
||||
|
||||
public: |
||||
customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){} |
||||
~customPreprocessor(){} |
||||
|
||||
}; |
||||
|
||||
class MeanSubtractorPreprocessor: public ImagePreprocessor{ |
||||
protected: |
||||
Mat mean_; |
||||
//void set_mean_(Mat m){}
|
||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
||||
CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height); |
||||
CV_Assert(outputChannels==1 || outputChannels==3); |
||||
CV_Assert(input.channels()==1 || input.channels()==3); |
||||
if(input.channels()!=outputChannels) |
||||
{ |
||||
Mat tmpInput; |
||||
if(outputChannels==1) |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
tmpInput.convertTo(output, CV_32FC1); |
||||
} |
||||
}else |
||||
{ |
||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
tmpInput.convertTo(output, CV_32FC3); |
||||
} |
||||
} |
||||
}else |
||||
{ |
||||
if(input.channels()==1) |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
input.convertTo(output, CV_32FC1,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
input.convertTo(output, CV_32FC1); |
||||
} |
||||
}else |
||||
{ |
||||
if(input.depth()==CV_8U) |
||||
{ |
||||
input.convertTo(output, CV_32FC3,1/255.0); |
||||
}else |
||||
{//Assuming values are at the desired [0,1] range
|
||||
input.convertTo(output, CV_32FC3); |
||||
} |
||||
} |
||||
} |
||||
if(outputSize.width!=0 && outputSize.height!=0) |
||||
{ |
||||
resize(output,output,outputSize); |
||||
} |
||||
subtract(output,this->mean_,output); |
||||
} |
||||
public: |
||||
MeanSubtractorPreprocessor(Mat mean) |
||||
{ |
||||
mean.copyTo(this->mean_); |
||||
} |
||||
|
||||
~MeanSubtractorPreprocessor(){} |
||||
}; |
||||
|
||||
|
||||
|
||||
Ptr<ImagePreprocessor> ImagePreprocessor::createResizer() |
||||
{ |
||||
return Ptr<ImagePreprocessor>(new ResizerPreprocessor); |
||||
} |
||||
|
||||
Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma) |
||||
{ |
||||
return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma)); |
||||
} |
||||
Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order) |
||||
{ |
||||
|
||||
return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order)); |
||||
} |
||||
|
||||
Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg) |
||||
{ |
||||
Mat tmp=meanImg.getMat(); |
||||
return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp)); |
||||
} |
||||
} |
||||
} |
@ -1,697 +0,0 @@ |
||||
#include "precomp.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/core.hpp" |
||||
|
||||
|
||||
|
||||
#include <iostream> |
||||
#include <fstream> |
||||
#include <sstream> |
||||
#include <queue> |
||||
#include <algorithm> |
||||
#include <iosfwd> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
|
||||
#ifdef HAVE_CAFFE |
||||
#include "caffe/caffe.hpp" |
||||
#endif |
||||
|
||||
#ifdef HAVE_DNN |
||||
#include "opencv2/dnn.hpp" |
||||
#endif |
||||
|
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
using namespace std; |
||||
namespace cv { namespace text { |
||||
|
||||
//Maybe OpenCV has a routine better suited
|
||||
inline bool fileExists (String filename) { |
||||
std::ifstream f(filename.c_str()); |
||||
return f.good(); |
||||
} |
||||
|
||||
|
||||
|
||||
//************************************************************************************
|
||||
//****************** TextImageClassifier *****************************************
|
||||
//************************************************************************************
|
||||
|
||||
void TextImageClassifier::preprocess(const Mat& input,Mat& output) |
||||
{ |
||||
this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_); |
||||
} |
||||
|
||||
void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr) |
||||
{ |
||||
CV_Assert(!ptr.empty()); |
||||
preprocessor_=ptr; |
||||
} |
||||
|
||||
Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor() |
||||
{ |
||||
return preprocessor_; |
||||
} |
||||
|
||||
|
||||
class DeepCNNCaffeImpl: public DeepCNN{ |
||||
protected: |
||||
void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat) |
||||
{ |
||||
//Classifies a list of images containing at most minibatchSz_ images
|
||||
CV_Assert(int(inputImageList.size())<=this->minibatchSz_); |
||||
CV_Assert(outputMat.isContinuous()); |
||||
|
||||
|
||||
#ifdef HAVE_CAFFE |
||||
net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width); |
||||
net_->Reshape(); |
||||
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); |
||||
float* inputData=inputBuffer; |
||||
|
||||
for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++) |
||||
{ |
||||
std::vector<Mat> input_channels; |
||||
Mat preprocessed; |
||||
// if the image have multiple color channels the input layer should be populated accordingly
|
||||
for (int channel=0;channel < this->channelCount_;channel++){ |
||||
|
||||
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); |
||||
input_channels.push_back(netInputWraped); |
||||
//input_data += width * height;
|
||||
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); |
||||
|
||||
} |
||||
this->preprocess(inputImageList[imgNum],preprocessed); |
||||
split(preprocessed, input_channels); |
||||
|
||||
|
||||
} |
||||
this->net_->ForwardPrefilled(); |
||||
const float* outputNetData=net_->output_blobs()[0]->cpu_data(); |
||||
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); |
||||
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width; |
||||
|
||||
|
||||
//outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
|
||||
float*outputMatData=(float*)(outputMat.data); |
||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size()); |
||||
|
||||
#endif |
||||
} |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
Ptr<caffe::Net<float> > net_; |
||||
#endif |
||||
//Size inputGeometry_;//=Size(100,32);
|
||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
||||
int outputSize_; |
||||
//Size outputGeometry_;
|
||||
public: |
||||
DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn): |
||||
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ |
||||
channelCount_=dn.channelCount_; |
||||
inputGeometry_=dn.inputGeometry_; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
#ifdef HAVE_CAFFE |
||||
this->net_=dn.net_; |
||||
#endif |
||||
} |
||||
DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn) |
||||
{ |
||||
#ifdef HAVE_CAFFE |
||||
this->net_=dn.net_; |
||||
#endif |
||||
this->setPreprocessor(dn.preprocessor_); |
||||
this->inputGeometry_=dn.inputGeometry_; |
||||
this->channelCount_=dn.channelCount_; |
||||
this->minibatchSz_=dn.minibatchSz_; |
||||
this->outputSize_=dn.outputSize_; |
||||
this->preprocessor_=dn.preprocessor_; |
||||
this->outputGeometry_=dn.outputGeometry_; |
||||
return *this; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
} |
||||
|
||||
DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz) |
||||
:minibatchSz_(maxMinibatchSz) |
||||
{ |
||||
|
||||
CV_Assert(this->minibatchSz_>0); |
||||
CV_Assert(fileExists(modelArchFilename)); |
||||
CV_Assert(fileExists(modelWeightsFilename)); |
||||
CV_Assert(!preprocessor.empty()); |
||||
this->setPreprocessor(preprocessor); |
||||
#ifdef HAVE_CAFFE |
||||
this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST)); |
||||
CV_Assert(net_->num_inputs()==1); |
||||
CV_Assert(net_->num_outputs()==1); |
||||
CV_Assert(this->net_->input_blobs()[0]->channels()==1 |
||||
||this->net_->input_blobs()[0]->channels()==3); |
||||
this->channelCount_=this->net_->input_blobs()[0]->channels(); |
||||
|
||||
|
||||
|
||||
this->net_->CopyTrainedLayersFrom(modelWeightsFilename); |
||||
|
||||
caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0]; |
||||
|
||||
this->inputGeometry_=Size(inputLayer->width(), inputLayer->height()); |
||||
this->channelCount_ = inputLayer->channels(); |
||||
|
||||
inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width); |
||||
net_->Reshape(); |
||||
this->outputSize_=net_->output_blobs()[0]->channels(); |
||||
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#else |
||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
||||
#endif |
||||
} |
||||
|
||||
void classify(InputArray image, OutputArray classProbabilities) |
||||
{ |
||||
std::vector<Mat> inputImageList; |
||||
inputImageList.push_back(image.getMat()); |
||||
classifyBatch(inputImageList,classProbabilities); |
||||
} |
||||
|
||||
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) |
||||
{ |
||||
std::vector<Mat> allImageVector; |
||||
inputImageList.getMatVector(allImageVector); |
||||
size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
|
||||
|
||||
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
|
||||
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); |
||||
Mat outputMat = classProbabilities.getMat(); |
||||
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize) |
||||
{ |
||||
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize); |
||||
std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum); |
||||
std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd); |
||||
std::vector<Mat> minibatchInput(from,to); |
||||
classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); |
||||
|
||||
} |
||||
|
||||
} |
||||
|
||||
int getOutputSize() |
||||
{ |
||||
return this->outputSize_; |
||||
} |
||||
Size getOutputGeometry() |
||||
{ |
||||
return this->outputGeometry_; |
||||
} |
||||
|
||||
int getMinibatchSize() |
||||
{ |
||||
return this->minibatchSz_; |
||||
} |
||||
|
||||
int getBackend() |
||||
{ |
||||
return OCR_HOLISTIC_BACKEND_CAFFE; |
||||
} |
||||
}; |
||||
|
||||
class DeepCNNOpenCvDNNImpl: public DeepCNN{ |
||||
protected: |
||||
|
||||
void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat) |
||||
{ |
||||
//Classifies a list of images containing at most minibatchSz_ images
|
||||
CV_Assert(int(inputImageList.size())<=this->minibatchSz_); |
||||
CV_Assert(outputMat.isContinuous()); |
||||
|
||||
#ifdef HAVE_DNN |
||||
|
||||
std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
|
||||
|
||||
Mat preprocessed; |
||||
// preprocesses each image in the inputImageList and push to preprocessedImList
|
||||
for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++) |
||||
{ |
||||
this->preprocess(inputImageList[imgNum],preprocessed); |
||||
preProcessedImList.push_back(preprocessed); |
||||
} |
||||
// set input data blob in dnn::net
|
||||
net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data"); |
||||
|
||||
float*outputMatData=(float*)(outputMat.data); |
||||
//Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
|
||||
Mat outputNet = this->net_->forward(); |
||||
outputNet = outputNet.reshape(1, 1); |
||||
|
||||
float*outputNetData=(float*)(outputNet.data); |
||||
|
||||
memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size()); |
||||
|
||||
#endif |
||||
} |
||||
|
||||
#ifdef HAVE_DNN |
||||
Ptr<Net> net_; |
||||
#endif |
||||
// hard coding input image size. anything in DNN library to get that from prototxt??
|
||||
// Size inputGeometry_;//=Size(100,32);
|
||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
||||
int outputSize_; |
||||
//Size outputGeometry_;//= Size(1,1);
|
||||
//int channelCount_;
|
||||
// int inputChannel_ ;//=1;
|
||||
// int _inputHeight;
|
||||
//int _inputWidth ;
|
||||
//int _inputChannel ;
|
||||
public: |
||||
DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn): |
||||
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ |
||||
channelCount_=dn.channelCount_; |
||||
inputGeometry_=dn.inputGeometry_; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
#ifdef HAVE_DNN |
||||
this->net_=dn.net_; |
||||
#endif |
||||
} |
||||
DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn) |
||||
{ |
||||
#ifdef HAVE_DNN |
||||
this->net_=dn.net_; |
||||
#endif |
||||
this->setPreprocessor(dn.preprocessor_); |
||||
this->inputGeometry_=dn.inputGeometry_; |
||||
this->channelCount_=dn.channelCount_; |
||||
this->minibatchSz_=dn.minibatchSz_; |
||||
this->outputSize_=dn.outputSize_; |
||||
this->preprocessor_=dn.preprocessor_; |
||||
this->outputGeometry_=dn.outputGeometry_; |
||||
return *this; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
} |
||||
|
||||
DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel ) |
||||
:minibatchSz_(maxMinibatchSz) |
||||
{ |
||||
|
||||
CV_Assert(this->minibatchSz_>0); |
||||
CV_Assert(fileExists(modelArchFilename)); |
||||
CV_Assert(fileExists(modelWeightsFilename)); |
||||
CV_Assert(!preprocessor.empty()); |
||||
this->setPreprocessor(preprocessor); |
||||
#ifdef HAVE_DNN |
||||
|
||||
this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename)); |
||||
|
||||
|
||||
|
||||
if (this->net_.empty()) |
||||
{ |
||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
||||
std::cerr << "prototxt: " << modelArchFilename << std::endl; |
||||
std::cerr << "caffemodel: " << modelWeightsFilename << std::endl; |
||||
//std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
|
||||
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
|
||||
exit(-1); |
||||
} |
||||
|
||||
|
||||
this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height());
|
||||
this->channelCount_ = inputChannel;//inputLayer->channels();
|
||||
|
||||
//inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
|
||||
Ptr< Layer > outLayer= net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2])); |
||||
//std::vector<Mat> blobs = outLayer->blobs;
|
||||
|
||||
this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
|
||||
//this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#else |
||||
CV_Error(Error::StsError,"DNN module not available during compilation!"); |
||||
#endif |
||||
} |
||||
|
||||
void classify(InputArray image, OutputArray classProbabilities) |
||||
{ |
||||
std::vector<Mat> inputImageList; |
||||
inputImageList.push_back(image.getMat()); |
||||
classifyBatch(inputImageList,classProbabilities); |
||||
} |
||||
|
||||
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) |
||||
{ |
||||
std::vector<Mat> allImageVector; |
||||
inputImageList.getMatVector(allImageVector); |
||||
size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
|
||||
|
||||
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
|
||||
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); |
||||
Mat outputMat = classProbabilities.getMat(); |
||||
|
||||
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize) |
||||
{ |
||||
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize); |
||||
std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum); |
||||
std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd); |
||||
std::vector<Mat> minibatchInput(from,to); |
||||
classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); |
||||
|
||||
} |
||||
|
||||
} |
||||
|
||||
int getOutputSize() |
||||
{ |
||||
return this->outputSize_; |
||||
} |
||||
Size getOutputGeometry() |
||||
{ |
||||
return this->outputGeometry_; |
||||
} |
||||
|
||||
int getMinibatchSize() |
||||
{ |
||||
return this->minibatchSz_; |
||||
} |
||||
|
||||
int getBackend() |
||||
{ |
||||
return OCR_HOLISTIC_BACKEND_DNN; |
||||
} |
||||
}; |
||||
|
||||
Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd) |
||||
{ |
||||
if(preprocessor.empty()) |
||||
{ |
||||
preprocessor=ImagePreprocessor::createResizer(); |
||||
} |
||||
switch(backEnd){ |
||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
||||
|
||||
#elif defined(HAVE_DNN) |
||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1)); |
||||
#else |
||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
||||
return Ptr<DeepCNN>(); |
||||
#endif |
||||
break; |
||||
|
||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_DNN: |
||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1)); |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_NONE: |
||||
default: |
||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
||||
return Ptr<DeepCNN>(); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
|
||||
Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd) |
||||
{ |
||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113); |
||||
switch(backEnd){ |
||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); |
||||
|
||||
#elif defined(HAVE_DNN) |
||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1)); |
||||
#else |
||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
||||
return Ptr<DeepCNN>(); |
||||
#endif |
||||
break; |
||||
|
||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_DNN: |
||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1)); |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_NONE: |
||||
default: |
||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
||||
return Ptr<DeepCNN>(); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
namespace cnn_config{ |
||||
std::vector<std::string> getAvailableBackends() |
||||
{ |
||||
std::vector<std::string> backends; |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
|
||||
|
||||
#endif |
||||
#ifdef HAVE_DNN |
||||
backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
|
||||
#endif |
||||
return backends; |
||||
|
||||
|
||||
} |
||||
|
||||
namespace caffe_backend{ |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
|
||||
bool getCaffeGpuMode() |
||||
{ |
||||
return caffe::Caffe::mode()==caffe::Caffe::GPU; |
||||
} |
||||
|
||||
void setCaffeGpuMode(bool useGpu) |
||||
{ |
||||
if(useGpu) |
||||
{ |
||||
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
||||
}else |
||||
{ |
||||
caffe::Caffe::set_mode(caffe::Caffe::CPU); |
||||
} |
||||
} |
||||
|
||||
bool getCaffeAvailable() |
||||
{ |
||||
return true; |
||||
} |
||||
#else |
||||
|
||||
bool getCaffeGpuMode() |
||||
{ |
||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
||||
return 0; |
||||
} |
||||
|
||||
void setCaffeGpuMode(bool useGpu) |
||||
{ |
||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
||||
CV_Assert(useGpu==1);//Compilation directives force
|
||||
} |
||||
|
||||
bool getCaffeAvailable(){ |
||||
return 0; |
||||
} |
||||
|
||||
#endif |
||||
|
||||
}//namespace caffe
|
||||
namespace dnn_backend{ |
||||
#ifdef HAVE_DNN |
||||
|
||||
|
||||
bool getDNNAvailable(){ |
||||
return true; |
||||
} |
||||
#else |
||||
bool getDNNAvailable(){ |
||||
return 0; |
||||
} |
||||
#endif |
||||
}//namspace dnn_backend
|
||||
}//namespace cnn_config
|
||||
|
||||
class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{ |
||||
private: |
||||
struct NetOutput{ |
||||
//Auxiliary structure that handles the logic of getting class ids and probabillities from
|
||||
//the raw outputs of caffe
|
||||
int wordIdx; |
||||
float probabillity; |
||||
|
||||
static bool sorter(const NetOutput& o1,const NetOutput& o2) |
||||
{//used with std::sort to provide the most probable class
|
||||
return o1.probabillity>o2.probabillity; |
||||
} |
||||
|
||||
static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res) |
||||
{ |
||||
res.resize(nbOutputs); |
||||
for(int k=0;k<nbOutputs;k++) |
||||
{ |
||||
res[k].wordIdx=k; |
||||
res[k].probabillity=buffer[k]; |
||||
} |
||||
std::sort(res.begin(),res.end(),NetOutput::sorter); |
||||
} |
||||
|
||||
static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence) |
||||
{ |
||||
std::vector<NetOutput> tmp; |
||||
getOutputs(buffer,nbOutputs,tmp); |
||||
classNum=tmp[0].wordIdx; |
||||
confidence=tmp[0].probabillity; |
||||
|
||||
} |
||||
}; |
||||
protected: |
||||
std::vector<String> labels_; |
||||
Ptr<TextImageClassifier> classifier_; |
||||
public: |
||||
OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr) |
||||
{ |
||||
CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
|
||||
std::ifstream labelsFile(vocabularyFilename.c_str()); |
||||
if(!labelsFile) |
||||
{ |
||||
CV_Error(Error::StsError,"Could not read Labels from file"); |
||||
} |
||||
std::string line; |
||||
while (std::getline(labelsFile, line)) |
||||
{ |
||||
labels_.push_back(std::string(line)); |
||||
} |
||||
CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); |
||||
} |
||||
|
||||
OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr) |
||||
{ |
||||
this->labels_=vocabulary; |
||||
CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); |
||||
} |
||||
|
||||
void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence) |
||||
{ |
||||
Mat netOutput; |
||||
this->classifier_->classify(inputImage,netOutput); |
||||
int classNum; |
||||
NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence); |
||||
transcription=this->labels_[classNum]; |
||||
} |
||||
|
||||
void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec) |
||||
{ |
||||
Mat netOutput; |
||||
this->classifier_->classifyBatch(inputImageList,netOutput); |
||||
|
||||
for(int k=0;k<netOutput.rows;k++) |
||||
{ |
||||
int classNum; |
||||
double confidence; |
||||
NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence); |
||||
transcriptionVec.push_back(this->labels_[classNum]); |
||||
confidenceVec.push_back(confidence); |
||||
} |
||||
} |
||||
|
||||
|
||||
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, |
||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, |
||||
int component_level=0) |
||||
{ |
||||
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
|
||||
double confidence; |
||||
String transcription; |
||||
recogniseImage(image,transcription,confidence); |
||||
output_text=transcription.c_str(); |
||||
if(component_rects!=NULL) |
||||
{ |
||||
component_rects->resize(1); |
||||
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height); |
||||
} |
||||
if(component_texts!=NULL) |
||||
{ |
||||
component_texts->resize(1); |
||||
(*component_texts)[0]=transcription.c_str(); |
||||
} |
||||
if(component_confidences!=NULL) |
||||
{ |
||||
component_confidences->resize(1); |
||||
(*component_confidences)[0]=float(confidence); |
||||
} |
||||
} |
||||
|
||||
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, |
||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, |
||||
int component_level=0) |
||||
{ |
||||
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
|
||||
this->run(image,output_text,component_rects,component_texts,component_confidences,component_level); |
||||
} |
||||
|
||||
std::vector<String>& getVocabulary() |
||||
{ |
||||
return this->labels_; |
||||
} |
||||
|
||||
Ptr<TextImageClassifier> getClassifier() |
||||
{ |
||||
return this->classifier_; |
||||
} |
||||
}; |
||||
|
||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename ) |
||||
{ |
||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename)); |
||||
} |
||||
|
||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename) |
||||
{ |
||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113); |
||||
Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100)); |
||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename)); |
||||
} |
||||
|
||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary) |
||||
{ |
||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary)); |
||||
} |
||||
|
||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){ |
||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113); |
||||
Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100)); |
||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary)); |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} } //namespace text namespace cv
|
@ -1,169 +0,0 @@ |
||||
#include "precomp.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include "opencv2/core.hpp" |
||||
|
||||
|
||||
|
||||
#include <iostream> |
||||
#include <fstream> |
||||
#include <sstream> |
||||
#include <queue> |
||||
#include <algorithm> |
||||
#include <iosfwd> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
|
||||
//#ifdef HAVE_CAFFE
|
||||
//#include "caffe/caffe.hpp"
|
||||
//#endif
|
||||
|
||||
namespace cv { namespace text { |
||||
|
||||
|
||||
|
||||
|
||||
class textDetectImpl: public textDetector{ |
||||
private: |
||||
struct NetOutput{ |
||||
//Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
|
||||
//the raw outputs of caffe
|
||||
Rect bbox; |
||||
float probability; |
||||
|
||||
|
||||
static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape) |
||||
{ |
||||
|
||||
res.resize(nbrTextBoxes); |
||||
for(int k=0;k<nbrTextBoxes;k++) |
||||
{ |
||||
float x_min = buffer[k*nCol+3]*inputShape.width; |
||||
float y_min = buffer[k*nCol+4]*inputShape.height; |
||||
float x_max = buffer[k*nCol+5]*inputShape.width; |
||||
float y_max = buffer[k*nCol +6]*inputShape.height; |
||||
x_min = x_min<0?0:x_min; |
||||
y_min = y_min<0?0:y_min; |
||||
x_max = x_max> inputShape.width?inputShape.width-1:x_max; |
||||
y_max = y_max > inputShape.height?inputShape.height-1:y_max; |
||||
float wd = x_max-x_min+1; |
||||
float ht = y_max-y_min+1; |
||||
|
||||
res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht)); |
||||
|
||||
res[k].probability=buffer[k*nCol+2]; |
||||
} |
||||
|
||||
} |
||||
|
||||
|
||||
}; |
||||
protected: |
||||
|
||||
Ptr<TextRegionDetector> classifier_; |
||||
public: |
||||
textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr) |
||||
{ |
||||
|
||||
} |
||||
|
||||
|
||||
|
||||
void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence) |
||||
{ |
||||
Mat netOutput; |
||||
// call the detect function of deepTextCNN class
|
||||
this->classifier_->detect(inputImage,netOutput); |
||||
// get the output geometry i.e height and width of output blob from caffe
|
||||
Size OutputGeometry_ = this->classifier_->getOutputGeometry(); |
||||
int nbrTextBoxes = OutputGeometry_.height; |
||||
int nCol = OutputGeometry_.width; |
||||
|
||||
std::vector<NetOutput> tmp; |
||||
// the output bounding box needs to be resized by the input height and width
|
||||
Size inputImageShape = Size(inputImage.cols(),inputImage.rows()); |
||||
NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape); |
||||
// put the output in CV_OUT
|
||||
|
||||
for (int k=0;k<nbrTextBoxes;k++) |
||||
{ |
||||
Bbox.push_back(tmp[k].bbox); |
||||
confidence.push_back(tmp[k].probability); |
||||
} |
||||
|
||||
} |
||||
|
||||
|
||||
|
||||
void run(Mat& image, std::vector<Rect>* component_rects=NULL, |
||||
std::vector<float>* component_confidences=NULL, |
||||
int component_level=0) |
||||
{ |
||||
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
|
||||
|
||||
std::vector<Rect> bbox; |
||||
std::vector<float> score; |
||||
textDetectInImage(image,bbox,score); |
||||
|
||||
if(component_rects!=NULL) |
||||
{ |
||||
component_rects->resize(bbox.size()); // should be a user behavior
|
||||
|
||||
component_rects = &bbox; |
||||
} |
||||
|
||||
if(component_confidences!=NULL) |
||||
{ |
||||
component_confidences->resize(score.size()); // shoub be a user behavior
|
||||
|
||||
component_confidences = &score; |
||||
} |
||||
} |
||||
|
||||
void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL, |
||||
std::vector<float>* component_confidences=NULL, |
||||
int component_level=0) |
||||
{ |
||||
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
|
||||
this->run(image,component_rects,component_confidences,component_level); |
||||
} |
||||
|
||||
|
||||
|
||||
Ptr<TextRegionDetector> getClassifier() |
||||
{ |
||||
return this->classifier_; |
||||
} |
||||
}; |
||||
|
||||
Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr) |
||||
{ |
||||
return Ptr<textDetector>(new textDetectImpl(classifierPtr)); |
||||
} |
||||
|
||||
Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename) |
||||
{ |
||||
|
||||
// create a custom preprocessor with rawval
|
||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); |
||||
// set the mean for the preprocessor
|
||||
|
||||
Mat textbox_mean(1,3,CV_8U); |
||||
textbox_mean.at<uchar>(0,0)=104; |
||||
textbox_mean.at<uchar>(0,1)=117; |
||||
textbox_mean.at<uchar>(0,2)=123; |
||||
preprocessor->set_mean(textbox_mean); |
||||
// create a pointer to text box detector(textDetector)
|
||||
Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1)); |
||||
return Ptr<textDetector>(new textDetectImpl(classifierPtr)); |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} } //namespace text namespace cv
|
@ -1,453 +1,101 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
#include "opencv2/core.hpp" |
||||
|
||||
|
||||
|
||||
#include <iostream> |
||||
#include <fstream> |
||||
#include <sstream> |
||||
#include <queue> |
||||
#include <algorithm> |
||||
#include <iosfwd> |
||||
#include <memory> |
||||
#include <string> |
||||
#include <utility> |
||||
#include <vector> |
||||
|
||||
|
||||
#ifdef HAVE_CAFFE |
||||
#include "caffe/caffe.hpp" |
||||
#endif |
||||
|
||||
#ifdef HAVE_DNN |
||||
#include "opencv2/dnn.hpp" |
||||
#endif |
||||
|
||||
using namespace cv::dnn; |
||||
|
||||
#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__) |
||||
|
||||
namespace cv { namespace text { |
||||
|
||||
inline bool fileExists (String filename) { |
||||
std::ifstream f(filename.c_str()); |
||||
return f.good(); |
||||
} |
||||
|
||||
class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{ |
||||
protected: |
||||
|
||||
|
||||
void process_(Mat inputImage, Mat &outputMat) |
||||
{ |
||||
// do forward pass and stores the output in outputMat
|
||||
CV_Assert(outputMat.isContinuous()); |
||||
if (inputImage.channels() != this->inputChannelCount_) |
||||
CV_WARN("Number of input channel(s) in the model is not same as input"); |
||||
|
||||
|
||||
#ifdef HAVE_CAFFE |
||||
net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width); |
||||
net_->Reshape(); |
||||
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); |
||||
float* inputData=inputBuffer; |
||||
|
||||
std::vector<Mat> input_channels; |
||||
Mat preprocessed; |
||||
// if the image have multiple color channels the input layer should be populated accordingly
|
||||
for (int channel=0;channel < this->inputChannelCount_;channel++){ |
||||
|
||||
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); |
||||
input_channels.push_back(netInputWraped); |
||||
//input_data += width * height;
|
||||
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); |
||||
} |
||||
this->preprocess(inputImage,preprocessed); |
||||
split(preprocessed, input_channels); |
||||
|
||||
//preprocessed.copyTo(netInputWraped);
|
||||
|
||||
|
||||
this->net_->Forward(); |
||||
const float* outputNetData=net_->output_blobs()[0]->cpu_data(); |
||||
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
|
||||
|
||||
|
||||
|
||||
|
||||
this->outputGeometry_.height = net_->output_blobs()[0]->height(); |
||||
this->outputGeometry_.width = net_->output_blobs()[0]->width(); |
||||
this->outputChannelCount_ = net_->output_blobs()[0]->channels(); |
||||
int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; |
||||
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); |
||||
float*outputMatData=(float*)(outputMat.data); |
||||
|
||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); |
||||
|
||||
|
||||
|
||||
#endif |
||||
} |
||||
|
||||
|
||||
#ifdef HAVE_CAFFE |
||||
Ptr<caffe::Net<float> > net_; |
||||
#endif |
||||
//Size inputGeometry_;
|
||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
||||
//int outputSize_;
|
||||
public: |
||||
DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn): |
||||
minibatchSz_(dn.minibatchSz_){ |
||||
outputGeometry_=dn.outputGeometry_; |
||||
inputGeometry_=dn.inputGeometry_; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
#ifdef HAVE_CAFFE |
||||
this->net_=dn.net_; |
||||
#endif |
||||
} |
||||
DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn) |
||||
{ |
||||
#ifdef HAVE_CAFFE |
||||
this->net_=dn.net_; |
||||
#endif |
||||
this->setPreprocessor(dn.preprocessor_); |
||||
this->inputGeometry_=dn.inputGeometry_; |
||||
this->inputChannelCount_=dn.inputChannelCount_; |
||||
this->outputChannelCount_ = dn.outputChannelCount_; |
||||
// this->minibatchSz_=dn.minibatchSz_;
|
||||
//this->outputGeometry_=dn.outputSize_;
|
||||
this->preprocessor_=dn.preprocessor_; |
||||
this->outputGeometry_=dn.outputGeometry_; |
||||
return *this; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
} |
||||
|
||||
DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz) |
||||
:minibatchSz_(maxMinibatchSz) |
||||
{ |
||||
|
||||
CV_Assert(this->minibatchSz_>0); |
||||
CV_Assert(fileExists(modelArchFilename)); |
||||
CV_Assert(fileExists(modelWeightsFilename)); |
||||
CV_Assert(!preprocessor.empty()); |
||||
this->setPreprocessor(preprocessor); |
||||
#ifdef HAVE_CAFFE |
||||
this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST)); |
||||
CV_Assert(net_->num_inputs()==1); |
||||
CV_Assert(net_->num_outputs()==1); |
||||
CV_Assert(this->net_->input_blobs()[0]->channels()==1 |
||||
||this->net_->input_blobs()[0]->channels()==3); |
||||
// this->channelCount_=this->net_->input_blobs()[0]->channels();
|
||||
|
||||
|
||||
|
||||
this->net_->CopyTrainedLayersFrom(modelWeightsFilename); |
||||
|
||||
caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0]; |
||||
|
||||
this->inputGeometry_.height = inputLayer->height(); |
||||
this->inputGeometry_.width = inputLayer->width(); |
||||
this->inputChannelCount_ = inputLayer->channels(); |
||||
//this->inputGeometry_.batchSize =1;
|
||||
|
||||
inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width); |
||||
net_->Reshape(); |
||||
this->outputChannelCount_ = net_->output_blobs()[0]->channels(); |
||||
//this->outputGeometry_.batchSize =1;
|
||||
this->outputGeometry_.height =net_->output_blobs()[0]->height(); |
||||
this->outputGeometry_.width = net_->output_blobs()[0]->width(); |
||||
|
||||
#else |
||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
||||
#endif |
||||
} |
||||
|
||||
|
||||
void detect(InputArray image, OutputArray Bbox_prob) |
||||
{ |
||||
Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); |
||||
Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
|
||||
Mat outputMat = Bbox_prob.getMat(); |
||||
process_(image.getMat(),outputMat); |
||||
//copy back to outputArray
|
||||
outputMat.copyTo(Bbox_prob); |
||||
} |
||||
|
||||
Size getOutputGeometry() |
||||
{ |
||||
return this->outputGeometry_; |
||||
} |
||||
Size getinputGeometry() |
||||
{ |
||||
return this->inputGeometry_; |
||||
} |
||||
|
||||
int getMinibatchSize() |
||||
{ |
||||
return this->minibatchSz_; |
||||
} |
||||
|
||||
int getBackend() |
||||
{ |
||||
return OCR_HOLISTIC_BACKEND_CAFFE; |
||||
} |
||||
void setPreprocessor(Ptr<ImagePreprocessor> ptr) |
||||
{ |
||||
CV_Assert(!ptr.empty()); |
||||
preprocessor_=ptr; |
||||
} |
||||
|
||||
Ptr<ImagePreprocessor> getPreprocessor() |
||||
{ |
||||
return preprocessor_; |
||||
} |
||||
}; |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace text |
||||
{ |
||||
|
||||
class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{ |
||||
class TextDetectorCNNImpl : public TextDetectorCNN |
||||
{ |
||||
protected: |
||||
Net net_; |
||||
std::vector<Size> sizes_; |
||||
int inputChannelCount_; |
||||
bool detectMultiscale_; |
||||
|
||||
|
||||
void process_(Mat inputImage, Mat &outputMat) |
||||
void getOutputs(const float* buffer,int nbrTextBoxes,int nCol, |
||||
std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape) |
||||
{ |
||||
// do forward pass and stores the output in outputMat
|
||||
CV_Assert(outputMat.isContinuous()); |
||||
if (inputImage.channels() != this->inputChannelCount_) |
||||
CV_WARN("Number of input channel(s) in the model is not same as input"); |
||||
|
||||
|
||||
#ifdef HAVE_DNN |
||||
|
||||
Mat preprocessed; |
||||
this->preprocess(inputImage,preprocessed); |
||||
|
||||
net_->setInput(blobFromImage(preprocessed,1, this->inputGeometry_), "data"); |
||||
|
||||
Mat outputNet = this->net_->forward( ); |
||||
|
||||
this->outputGeometry_.height = outputNet.size[2]; |
||||
this->outputGeometry_.width = outputNet.size[3]; |
||||
this->outputChannelCount_ = outputNet.size[1]; |
||||
for(int k = 0; k < nbrTextBoxes; k++) |
||||
{ |
||||
float x_min = buffer[k*nCol + 3]*inputShape.width; |
||||
float y_min = buffer[k*nCol + 4]*inputShape.height; |
||||
|
||||
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); |
||||
float*outputMatData=(float*)(outputMat.data); |
||||
float*outputNetData=(float*)(outputNet.data); |
||||
int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; |
||||
float x_max = buffer[k*nCol + 5]*inputShape.width; |
||||
float y_max = buffer[k*nCol + 6]*inputShape.height; |
||||
|
||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); |
||||
CV_Assert(x_min < x_max, y_min < y_max); |
||||
|
||||
x_min = std::max(0.f, x_min); |
||||
y_min = std::max(0.f, y_min); |
||||
|
||||
x_max = std::min(inputShape.width - 1.f, x_max); |
||||
y_max = std::min(inputShape.height - 1.f, y_max); |
||||
|
||||
int wd = cvRound(x_max - x_min); |
||||
int ht = cvRound(y_max - y_min); |
||||
|
||||
#endif |
||||
Bbox.push_back(Rect(cvRound(x_min), cvRound(y_min), wd, ht)); |
||||
confidence.push_back(buffer[k*nCol + 2]); |
||||
} |
||||
} |
||||
|
||||
|
||||
|
||||
#ifdef HAVE_DNN |
||||
Ptr<Net> net_; |
||||
#endif |
||||
//Size inputGeometry_;
|
||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
||||
//int outputSize_;
|
||||
//int inputHeight_;
|
||||
//int inputWidth_;
|
||||
//int inputChannel_;
|
||||
public: |
||||
DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn): |
||||
minibatchSz_(dn.minibatchSz_){ |
||||
outputGeometry_=dn.outputGeometry_; |
||||
inputGeometry_=dn.inputGeometry_; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
#ifdef HAVE_DNN |
||||
this->net_=dn.net_; |
||||
#endif |
||||
} |
||||
DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn) |
||||
{ |
||||
#ifdef HAVE_DNN |
||||
this->net_=dn.net_; |
||||
#endif |
||||
this->setPreprocessor(dn.preprocessor_); |
||||
this->inputGeometry_=dn.inputGeometry_; |
||||
this->inputChannelCount_=dn.inputChannelCount_; |
||||
this->outputChannelCount_ = dn.outputChannelCount_; |
||||
// this->minibatchSz_=dn.minibatchSz_;
|
||||
//this->outputGeometry_=dn.outputSize_;
|
||||
this->preprocessor_=dn.preprocessor_; |
||||
this->outputGeometry_=dn.outputGeometry_; |
||||
return *this; |
||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||
} |
||||
|
||||
DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3) |
||||
:minibatchSz_(maxMinibatchSz) |
||||
TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) : |
||||
detectMultiscale_(detectMultiscale) |
||||
{ |
||||
net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename); |
||||
CV_Assert(!net_.empty()); |
||||
inputChannelCount_ = 3; |
||||
sizes_.push_back(Size(700, 700)); |
||||
|
||||
CV_Assert(this->minibatchSz_>0); |
||||
CV_Assert(fileExists(modelArchFilename)); |
||||
CV_Assert(fileExists(modelWeightsFilename)); |
||||
CV_Assert(!preprocessor.empty()); |
||||
this->setPreprocessor(preprocessor); |
||||
#ifdef HAVE_DNN |
||||
this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename)); |
||||
|
||||
if (this->net_.empty()) |
||||
if(detectMultiscale_) |
||||
{ |
||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
||||
std::cerr << "prototxt: " << modelArchFilename << std::endl; |
||||
std::cerr << "caffemodel: " << modelWeightsFilename << std::endl; |
||||
//std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
|
||||
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
|
||||
exit(-1); |
||||
sizes_.push_back(Size(300, 300)); |
||||
sizes_.push_back(Size(700,500)); |
||||
sizes_.push_back(Size(700,300)); |
||||
sizes_.push_back(Size(1600,1600)); |
||||
} |
||||
|
||||
this->inputGeometry_.height =inputHeight; |
||||
this->inputGeometry_.width = inputWidth ;//inputLayer->width();
|
||||
this->inputChannelCount_ = inputChannel ;//inputLayer->channels();
|
||||
|
||||
#else |
||||
CV_Error(Error::StsError,"DNN module not available during compilation!"); |
||||
#endif |
||||
} |
||||
|
||||
|
||||
void detect(InputArray image, OutputArray Bbox_prob) |
||||
void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence) |
||||
{ |
||||
Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); |
||||
Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
|
||||
Mat outputMat = Bbox_prob.getMat(); |
||||
CV_Assert(inputImage_.channels() == inputChannelCount_); |
||||
Mat inputImage = inputImage_.getMat().clone(); |
||||
Bbox.resize(0); |
||||
confidence.resize(0); |
||||
|
||||
process_(image.getMat(),outputMat); |
||||
//copy back to outputArray
|
||||
outputMat.copyTo(Bbox_prob); |
||||
} |
||||
|
||||
Size getOutputGeometry() |
||||
{ |
||||
return this->outputGeometry_; |
||||
} |
||||
Size getinputGeometry() |
||||
{ |
||||
return this->inputGeometry_; |
||||
} |
||||
|
||||
int getMinibatchSize() |
||||
{ |
||||
return this->minibatchSz_; |
||||
} |
||||
|
||||
int getBackend() |
||||
{ |
||||
return OCR_HOLISTIC_BACKEND_DNN; |
||||
} |
||||
void setPreprocessor(Ptr<ImagePreprocessor> ptr) |
||||
{ |
||||
CV_Assert(!ptr.empty()); |
||||
preprocessor_=ptr; |
||||
} |
||||
|
||||
Ptr<ImagePreprocessor> getPreprocessor() |
||||
{ |
||||
return preprocessor_; |
||||
} |
||||
for(size_t i = 0; i < sizes_.size(); i++) |
||||
{ |
||||
Size inputGeometry = sizes_[i]; |
||||
net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data"); |
||||
Mat outputNet = net_.forward(); |
||||
int nbrTextBoxes = outputNet.size[2]; |
||||
int nCol = outputNet.size[3]; |
||||
int outputChannelCount = outputNet.size[1]; |
||||
CV_Assert(outputChannelCount == 1); |
||||
getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size()); |
||||
} |
||||
} |
||||
}; |
||||
|
||||
Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd) |
||||
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale) |
||||
{ |
||||
if(preprocessor.empty()) |
||||
{ |
||||
// create a custom preprocessor with rawval
|
||||
preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); |
||||
// set the mean for the preprocessor
|
||||
|
||||
Mat textbox_mean(1,3,CV_8U); |
||||
textbox_mean.at<uchar>(0,0)=104; |
||||
textbox_mean.at<uchar>(0,1)=117; |
||||
textbox_mean.at<uchar>(0,2)=123; |
||||
preprocessor->set_mean(textbox_mean); |
||||
} |
||||
switch(backEnd){ |
||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
||||
|
||||
#elif defined(HAVE_DNN) |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3)); |
||||
#else |
||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
||||
return Ptr<DeepCNNTextDetector>(); |
||||
#endif |
||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
||||
|
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
||||
break; |
||||
|
||||
case OCR_HOLISTIC_BACKEND_DNN: |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3)); |
||||
break; |
||||
|
||||
case OCR_HOLISTIC_BACKEND_NONE: |
||||
default: |
||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
||||
return Ptr<DeepCNNTextDetector>(); |
||||
break; |
||||
} |
||||
//return Ptr<DeepCNNTextDetector>();
|
||||
|
||||
return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale); |
||||
} |
||||
|
||||
|
||||
Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd) |
||||
{ |
||||
|
||||
// create a custom preprocessor with rawval
|
||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); |
||||
// set the mean for the preprocessor
|
||||
|
||||
Mat textbox_mean(1,3,CV_8U); |
||||
textbox_mean.at<uchar>(0,0)=104; |
||||
textbox_mean.at<uchar>(0,1)=117; |
||||
textbox_mean.at<uchar>(0,2)=123; |
||||
preprocessor->set_mean(textbox_mean); |
||||
switch(backEnd){ |
||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
||||
|
||||
#ifdef HAVE_CAFFE |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); |
||||
|
||||
#elif defined(HAVE_DNN) |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3)); |
||||
#else |
||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
||||
return Ptr<DeepCNNTextDetector>(); |
||||
#endif |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_DNN: |
||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3)); |
||||
break; |
||||
case OCR_HOLISTIC_BACKEND_NONE: |
||||
default: |
||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
||||
return Ptr<DeepCNNTextDetector>(); |
||||
break; |
||||
} |
||||
//return Ptr<DeepCNNTextDetector>();
|
||||
|
||||
} |
||||
|
||||
void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output) |
||||
{ |
||||
Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width); |
||||
this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_); |
||||
} |
||||
|
||||
|
||||
|
||||
} } //namespace text namespace cv
|
||||
} //namespace text
|
||||
} //namespace cv
|
||||
|
@ -1,4 +1,7 @@ |
||||
#ifndef __OPENCV_TEXT_CONFIG_HPP__ |
||||
#define __OPENCV_TEXT_CONFIG_HPP__ |
||||
|
||||
// HAVE OCR Tesseract
|
||||
#cmakedefine HAVE_TESSERACT |
||||
|
||||
#endif |
||||
|
Loading…
Reference in new issue