parent
c33629e053
commit
951e18272d
19 changed files with 308 additions and 2898 deletions
@ -1,84 +1,24 @@ |
|||||||
set(the_description "Text Detection and Recognition") |
set(the_description "Text Detection and Recognition") |
||||||
|
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_dnn OPTIONAL opencv_highgui WRAP python java) |
||||||
if(POLICY CMP0023) |
|
||||||
message(STATUS "Explicitly setting policy CMP0023 to OLD") |
if(NOT CMAKE_CROSSCOMPILING OR OPENCV_FIND_TESSERACT) |
||||||
cmake_policy(SET CMP0023 OLD) |
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) |
||||||
endif(POLICY CMP0023) |
find_package(Tesseract QUIET) |
||||||
|
if(Tesseract_FOUND) |
||||||
# Using cmake scripts and modules |
message(STATUS "Tesseract: YES") |
||||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) |
set(HAVE_TESSERACT 1) |
||||||
|
ocv_include_directories(${Tesseract_INCLUDE_DIR}) |
||||||
set(TEXT_DEPS opencv_ml opencv_highgui opencv_imgproc opencv_core opencv_features2d opencv_calib3d) |
ocv_target_link_libraries(${the_module} ${Tesseract_LIBRARIES}) |
||||||
|
else() |
||||||
find_package(Caffe) |
message(STATUS "Tesseract: NO") |
||||||
if(Caffe_FOUND) |
|
||||||
message(STATUS "Caffe: YES") |
|
||||||
set(HAVE_CAFFE 1) |
|
||||||
else() |
|
||||||
message(STATUS "Caffe: NO") |
|
||||||
# list(APPEND TEXT_DEPS opencv_dnn) |
|
||||||
endif() |
|
||||||
|
|
||||||
#internal dependencies |
|
||||||
find_package(Protobuf) |
|
||||||
if(Protobuf_FOUND) |
|
||||||
message(STATUS "Protobuf: YES") |
|
||||||
set(HAVE_PROTOBUF 1) |
|
||||||
else() |
|
||||||
message(STATUS "Protobuf: NO") |
|
||||||
endif() |
|
||||||
|
|
||||||
find_package(Glog) |
|
||||||
if(Glog_FOUND) |
|
||||||
message(STATUS "Glog: YES") |
|
||||||
set(HAVE_GLOG 1) |
|
||||||
else() |
|
||||||
message(STATUS "Glog: NO") |
|
||||||
endif() |
|
||||||
|
|
||||||
ocv_define_module(text opencv_ml opencv_imgproc opencv_core opencv_features2d opencv_calib3d OPTIONAL opencv_dnn WRAP python) |
|
||||||
#ocv_define_module(text ${TEXT_DEPS} WRAP python) |
|
||||||
|
|
||||||
#set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) |
|
||||||
|
|
||||||
find_package(Tesseract) |
|
||||||
if(${Tesseract_FOUND}) |
|
||||||
message(STATUS "Tesseract: YES") |
|
||||||
include_directories(${Tesseract_INCLUDE_DIR}) |
|
||||||
target_link_libraries(opencv_text ${Tesseract_LIBS}) |
|
||||||
add_definitions(-DHAVE_TESSERACT) |
|
||||||
else() |
|
||||||
message(STATUS "Tesseract: NO") |
|
||||||
endif() |
endif() |
||||||
|
endif() |
||||||
|
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/text_config.hpp.in |
||||||
|
${CMAKE_BINARY_DIR}/text_config.hpp @ONLY) |
||||||
|
|
||||||
if(HAVE_CAFFE AND HAVE_GLOG AND HAVE_PROTOBUF) |
ocv_include_directories(${CMAKE_CURRENT_BINARY_DIR}) |
||||||
include_directories(${Caffe_INCLUDE_DIR}) |
|
||||||
find_package(HDF5 COMPONENTS HL REQUIRED) |
|
||||||
include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR}) |
|
||||||
list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES}) |
|
||||||
find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem) |
|
||||||
include_directories(SYSTEM ${Boost_INCLUDE_DIR}) |
|
||||||
include_directories(SYSTEM ${CUDA_INCLUDE_DIR}) |
|
||||||
link_directories(SYSTEM ${CUDA_LIBS}) |
|
||||||
# include_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/include/ usr/local/cuda-8.0/include/ /usr/local/cuda-7.5/targets/x86_64-linux/include/ ) |
|
||||||
#link_directories(SYSTEM /usr/local/cuda-8.0/targets/x86_64-linux/lib/ usr/local/cuda-8.0/lib/ /usr/local/cuda-7.5/targets/x86_64-linux/lib/ /usr/lib/openblas-base/lib /usr/local/cuda-8.0/lib64) |
|
||||||
list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES}) |
|
||||||
target_link_libraries(opencv_text atlas blas ${Caffe_LIBS} ${Glog_LIBS} ${Protobuf_LIBS} ${HDF5_LIBRARIES} ${Boost_LIBRARIES}) |
|
||||||
add_definitions(-DHAVE_CAFFE) |
|
||||||
endif() #HAVE_CAFFE |
|
||||||
|
|
||||||
message(STATUS "TEXT CAFFE SEARCH") |
|
||||||
if() |
|
||||||
message(STATUS "TEXT NO CAFFE CONFLICT") |
|
||||||
else() |
|
||||||
message(STATUS "TEXT CAFFE CONFLICT") |
|
||||||
endif() |
|
||||||
|
|
||||||
if(HAVE_opencv_dnn) |
ocv_add_testdata(samples/ contrib/text |
||||||
message(STATUS "dnn module found") |
FILES_MATCHING PATTERN "*.xml" PATTERN "*.xml.gz" REGEX "scenetext[0-9]+.jpg" |
||||||
add_definitions(-DHAVE_DNN) |
) |
||||||
set(HAVE_DNN 1) |
|
||||||
else() |
|
||||||
message(STATUS "dnn module not found") |
|
||||||
endif() |
|
||||||
|
@ -1,14 +0,0 @@ |
|||||||
# Caffe package for CNN Triplet training |
|
||||||
unset(Caffe_FOUND) |
|
||||||
|
|
||||||
find_path(Caffe_INCLUDE_DIR NAMES caffe/caffe.hpp caffe/common.hpp caffe/net.hpp caffe/proto/caffe.pb.h caffe/util/io.hpp caffe/vision_layers.hpp |
|
||||||
HINTS |
|
||||||
/usr/local/include) |
|
||||||
|
|
||||||
find_library(Caffe_LIBS NAMES caffe |
|
||||||
HINTS |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
if(Caffe_LIBS AND Caffe_INCLUDE_DIR) |
|
||||||
set(Caffe_FOUND 1) |
|
||||||
endif() |
|
@ -1,10 +0,0 @@ |
|||||||
#Required for Caffe |
|
||||||
unset(Glog_FOUND) |
|
||||||
|
|
||||||
find_library(Glog_LIBS NAMES glog |
|
||||||
HINTS |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
if(Glog_LIBS) |
|
||||||
set(Glog_FOUND 1) |
|
||||||
endif() |
|
@ -1,10 +0,0 @@ |
|||||||
#Protobuf package required for Caffe |
|
||||||
unset(Protobuf_FOUND) |
|
||||||
|
|
||||||
find_library(Protobuf_LIBS NAMES protobuf |
|
||||||
HINTS |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
if(Protobuf_LIBS) |
|
||||||
set(Protobuf_FOUND 1) |
|
||||||
endif() |
|
@ -1,22 +0,0 @@ |
|||||||
# Tesseract OCR |
|
||||||
unset(Tesseract_FOUND) |
|
||||||
|
|
||||||
find_path(Tesseract_INCLUDE_DIR tesseract/baseapi.h |
|
||||||
HINTS |
|
||||||
/usr/include |
|
||||||
/usr/local/include) |
|
||||||
|
|
||||||
find_library(Tesseract_LIBRARY NAMES tesseract |
|
||||||
HINTS |
|
||||||
/usr/lib |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
find_library(Lept_LIBRARY NAMES lept |
|
||||||
HINTS |
|
||||||
/usr/lib |
|
||||||
/usr/local/lib) |
|
||||||
|
|
||||||
set(Tesseract_LIBS ${Tesseract_LIBRARY} ${Lept_LIBRARY}) |
|
||||||
if(Tesseract_LIBS AND Tesseract_INCLUDE_DIR) |
|
||||||
set(Tesseract_FOUND 1) |
|
||||||
endif() |
|
@ -1,57 +1,37 @@ |
|||||||
# -*- coding: utf-8 -*- |
# -*- coding: utf-8 -*- |
||||||
""" |
|
||||||
Created on Wed Jul 19 17:54:00 2017 |
|
||||||
|
|
||||||
@author: sgnosh |
|
||||||
""" |
|
||||||
|
|
||||||
#!/usr/bin/python |
#!/usr/bin/python |
||||||
|
|
||||||
import sys |
import sys |
||||||
import os |
import os |
||||||
|
|
||||||
import cv2 |
import cv2 |
||||||
import numpy as np |
import numpy as np |
||||||
|
|
||||||
print('\nDeeptextdetection.py') |
def main(): |
||||||
print(' A demo script of text box alogorithm of the paper:') |
print('\nDeeptextdetection.py') |
||||||
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') |
print(' A demo script of text box alogorithm of the paper:') |
||||||
|
print(' * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network https://arxiv.org/abs/1611.06779\n') |
||||||
|
|
||||||
if (len(sys.argv) < 2): |
|
||||||
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') |
|
||||||
quit() |
|
||||||
#if not cv2.text.cnn_config.caffe_backend.getCaffeAvailable(): |
|
||||||
# print"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n" |
|
||||||
# |
|
||||||
# quit() |
|
||||||
# check model and architecture file existance |
|
||||||
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): |
|
||||||
print " Model files not found in current directory. Aborting" |
|
||||||
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" |
|
||||||
quit() |
|
||||||
cv2.text.cnn_config.caffe_backend.setCaffeGpuMode(True); |
|
||||||
pathname = os.path.dirname(sys.argv[0]) |
|
||||||
|
|
||||||
|
if (len(sys.argv) < 2): |
||||||
|
print(' (ERROR) You must call this script with an argument (path_to_image_to_be_processed)\n') |
||||||
|
quit() |
||||||
|
|
||||||
img = cv2.imread(str(sys.argv[1])) |
if not os.path.isfile('textbox.caffemodel') or not os.path.isfile('textbox_deploy.prototxt'): |
||||||
textSpotter=cv2.text.textDetector_create( |
print " Model files not found in current directory. Aborting" |
||||||
"textbox_deploy.prototxt","textbox.caffemodel") |
print " Model files should be downloaded from https://github.com/sghoshcvc/TextBox-Models" |
||||||
rects,outProbs = textSpotter.textDetectInImage(img); |
quit() |
||||||
# for visualization |
|
||||||
vis = img.copy() |
|
||||||
# Threshold to select rectangles : All rectangles for which outProbs is more than this threshold will be shown |
|
||||||
thres = 0.6 |
|
||||||
|
|
||||||
|
img = cv2.imread(str(sys.argv[1])) |
||||||
|
textSpotter = cv2.text.TextDetectorCNN_create("textbox_deploy.prototxt","textbox.caffemodel") |
||||||
|
rects, outProbs = textSpotter.textDetectInImage(img); |
||||||
|
vis = img.copy() |
||||||
|
thres = 0.6 |
||||||
|
|
||||||
#Visualization |
for r in range(np.shape(rects)[0]): |
||||||
for r in range(0,np.shape(rects)[0]): |
if outProbs[r] > thres: |
||||||
if outProbs[r] >thres: |
rect = rects[r] |
||||||
rect = rects[r] |
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (255, 0, 0), 2) |
||||||
cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 0, 0), 2) |
|
||||||
# cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1) |
|
||||||
|
|
||||||
|
cv2.imshow("Text detection result", vis) |
||||||
|
cv2.waitKey() |
||||||
|
|
||||||
#Visualization |
if __name__ == "__main__": |
||||||
cv2.imshow("Text detection result", vis) |
main() |
||||||
cv2.waitKey(0) |
|
||||||
|
@ -1,151 +1,86 @@ |
|||||||
/*
|
#include <opencv2/text.hpp> |
||||||
* dictnet_demo.cpp |
#include <opencv2/highgui.hpp> |
||||||
* |
#include <opencv2/imgproc.hpp> |
||||||
* Demonstrates simple use of the holistic word classifier in C++ |
|
||||||
* |
|
||||||
* Created on: June 26, 2016 |
|
||||||
* Author: Anguelos Nicolaou <anguelos.nicolaou AT gmail.com> |
|
||||||
*/ |
|
||||||
|
|
||||||
#include "opencv2/text.hpp" |
|
||||||
#include "opencv2/highgui.hpp" |
|
||||||
#include "opencv2/imgproc.hpp" |
|
||||||
|
|
||||||
#include <sstream> |
#include <sstream> |
||||||
#include <vector> |
|
||||||
#include <iostream> |
#include <iostream> |
||||||
#include <iomanip> |
|
||||||
#include <fstream> |
#include <fstream> |
||||||
|
|
||||||
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres); |
using namespace cv; |
||||||
inline std::string getHelpStr(std::string progFname){ |
|
||||||
std::stringstream out; |
|
||||||
out << " Demo of text detection CNN for text detection." << std::endl; |
|
||||||
out << " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl; |
|
||||||
|
|
||||||
out << " Usage: " << progFname << " <output_file> <input_image>" << std::endl; |
|
||||||
out << " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl; |
|
||||||
out << " must be in the current directory." << std::endl << std::endl; |
|
||||||
|
|
||||||
out << " Obtaining Caffe Model files in linux shell:"<<std::endl; |
namespace |
||||||
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg.caffemodel"<<std::endl; |
{ |
||||||
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_deploy.prototxt"<<std::endl; |
std::string getHelpStr(std::string progFname) |
||||||
out << " wget http://nicolaou.homouniversalis.org/assets/vgg_text/dictnet_vgg_labels.txt"<<std::endl<<std::endl; |
{ |
||||||
|
std::stringstream out; |
||||||
|
out << " Demo of text detection CNN for text detection." << std::endl |
||||||
|
<< " Max Jaderberg et al.: Reading Text in the Wild with Convolutional Neural Networks, IJCV 2015"<<std::endl<<std::endl |
||||||
|
<< " Usage: " << progFname << " <output_file> <input_image>" << std::endl |
||||||
|
<< " Caffe Model files (textbox.caffemodel, textbox_deploy.prototxt)"<<std::endl |
||||||
|
<< " must be in the current directory." << std::endl |
||||||
|
<< " These files can be downloaded from https://github.com/sghoshcvc/TextBox-Models.git" << std::endl; |
||||||
return out.str(); |
return out.str(); |
||||||
} |
} |
||||||
|
|
||||||
inline bool fileExists (std::string filename) { |
bool fileExists (std::string filename) |
||||||
|
{ |
||||||
std::ifstream f(filename.c_str()); |
std::ifstream f(filename.c_str()); |
||||||
return f.good(); |
return f.good(); |
||||||
} |
} |
||||||
void textbox_draw(cv::Mat &src, std::vector<cv::Rect> &groups,std::vector<float> &probs,std::vector<cv::String> wordList,float thres=0.6) |
|
||||||
|
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres) |
||||||
{ |
{ |
||||||
for (int i=0;i<(int)groups.size(); i++) |
for (size_t i = 0; i < groups.size(); i++) |
||||||
{ |
{ |
||||||
if(probs[i]>thres) |
if(probs[i] > thres) |
||||||
{ |
{ |
||||||
if (src.type() == CV_8UC3) |
if (src.type() == CV_8UC3) |
||||||
{ |
{ |
||||||
cv::rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 0, 255, 255 ), 3, 8 ); |
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA); |
||||||
cv::putText(src, wordList[i],groups.at(i).tl() , cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar( 0,0,255 )); |
String label = format("%.2f", probs[i]); |
||||||
|
std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n"; |
||||||
|
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA); |
||||||
} |
} |
||||||
else |
else |
||||||
rectangle(src,groups.at(i).tl(),groups.at(i).br(),cv::Scalar( 255 ), 3, 8 ); |
rectangle(src, groups[i], Scalar( 255 ), 3, 8 ); |
||||||
} |
} |
||||||
} |
} |
||||||
} |
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
int main(int argc, const char * argv[]){ |
int main(int argc, const char * argv[]) |
||||||
if(!cv::text::cnn_config::caffe_backend::getCaffeAvailable()){ |
{ |
||||||
std::cout<<"The text module was compiled without Caffe which is the only available DeepCNN backend.\nAborting!\n"; |
if (argc < 2) |
||||||
//exit(1);
|
{ |
||||||
} |
std::cout << getHelpStr(argv[0]); |
||||||
std::vector<std::string> backends=cv::text::cnn_config::getAvailableBackends(); |
std::cout << "Insufiecient parameters. Aborting!" << std::endl; |
||||||
std::cout << "The Following backends are available" << "\n"; |
|
||||||
for (int i=0;i<backends.size();i++) |
|
||||||
std::cout << backends[i] << "\n"; |
|
||||||
|
|
||||||
// printf("%s",x);
|
|
||||||
//set to true if you have a GPU with more than 3GB
|
|
||||||
if(cv::text::cnn_config::caffe_backend::getCaffeAvailable()) |
|
||||||
cv::text::cnn_config::caffe_backend::setCaffeGpuMode(true); |
|
||||||
|
|
||||||
if (argc < 3){ |
|
||||||
std::cout<<getHelpStr(argv[0]); |
|
||||||
std::cout<<"Insufiecient parameters. Aborting!"<<std::endl; |
|
||||||
exit(1); |
exit(1); |
||||||
} |
} |
||||||
|
|
||||||
if (!fileExists("textbox.caffemodel") || |
if (!fileExists("textbox.caffemodel") || |
||||||
!fileExists("textbox_deploy.prototxt")){ |
!fileExists("textbox_deploy.prototxt")) |
||||||
// !fileExists("dictnet_vgg_labels.txt"))
|
{ |
||||||
|
|
||||||
std::cout<<getHelpStr(argv[0]); |
|
||||||
std::cout<<"Model files not found in the current directory. Aborting!"<<std::endl; |
|
||||||
exit(1); |
|
||||||
} |
|
||||||
|
|
||||||
if (fileExists(argv[1])){ |
|
||||||
std::cout<<getHelpStr(argv[0]); |
std::cout<<getHelpStr(argv[0]); |
||||||
std::cout<<"Output file must not exist. Aborting!"<<std::endl; |
std::cout << "Model files not found in the current directory. Aborting!" << std::endl; |
||||||
exit(1); |
exit(1); |
||||||
} |
} |
||||||
|
|
||||||
cv::Mat image; |
Mat image = imread(String(argv[1]), IMREAD_COLOR); |
||||||
image = cv::imread(cv::String(argv[2])); |
|
||||||
|
|
||||||
|
|
||||||
std::cout<<"Starting Text Box Demo"<<std::endl; |
std::cout << "Starting Text Box Demo" << std::endl; |
||||||
cv::Ptr<cv::text::textDetector> textSpotter=cv::text::textDetector::create( |
Ptr<text::TextDetectorCNN> textSpotter = |
||||||
"textbox_deploy.prototxt","textbox.caffemodel"); |
text::TextDetectorCNN::create("textbox_deploy.prototxt","textbox.caffemodel", false); |
||||||
|
|
||||||
//cv::Ptr<cv::text::textDetector> wordSpotter=
|
std::vector<Rect> bbox; |
||||||
// cv::text::textDetector::create(cnn);
|
|
||||||
std::cout<<"Created Text Spotter with text Boxes"; |
|
||||||
|
|
||||||
std::vector<cv::Rect> bbox; |
|
||||||
std::vector<float> outProbabillities; |
std::vector<float> outProbabillities; |
||||||
textSpotter->textDetectInImage(image,bbox,outProbabillities); |
textSpotter->textDetectInImage(image, bbox, outProbabillities); |
||||||
// textbox_draw(image, bbox,outProbabillities);
|
|
||||||
float thres =0.6f; |
|
||||||
std::vector<cv::Mat> imageList; |
|
||||||
for(int imageIdx=0;imageIdx<(int)bbox.size();imageIdx++){ |
|
||||||
if(outProbabillities[imageIdx]>thres){ |
|
||||||
imageList.push_back(image(bbox.at(imageIdx))); |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
// call dict net here for all detected parts
|
|
||||||
cv::Ptr<cv::text::DeepCNN> cnn=cv::text::DeepCNN::createDictNet( |
|
||||||
"dictnet_vgg_deploy.prototxt","dictnet_vgg.caffemodel",cv::text::OCR_HOLISTIC_BACKEND_DNN); |
|
||||||
|
|
||||||
cv::Ptr<cv::text::OCRHolisticWordRecognizer> wordSpotter= |
|
||||||
cv::text::OCRHolisticWordRecognizer::create(cnn,"dictnet_vgg_labels.txt"); |
|
||||||
|
|
||||||
std::vector<cv::String> wordList; |
|
||||||
std::vector<double> wordProbabillities; |
|
||||||
wordSpotter->recogniseImageBatch(imageList,wordList,wordProbabillities); |
|
||||||
// write the output in file
|
|
||||||
std::ofstream out; |
|
||||||
out.open(argv[1]); |
|
||||||
|
|
||||||
|
|
||||||
for (int i=0;i<(int)wordList.size(); i++) |
|
||||||
{ |
|
||||||
cv::Point tl_ = bbox.at(i).tl(); |
|
||||||
cv::Point br_ = bbox.at(i).br(); |
|
||||||
|
|
||||||
out<<argv[2]<<","<<tl_.x<<","<<tl_.y<<","<<","<<br_.x<<","<<br_.y<<","<<wordList[i]<<std::endl; |
|
||||||
|
|
||||||
} |
|
||||||
out.close(); |
|
||||||
textbox_draw(image, bbox,outProbabillities,wordList); |
|
||||||
|
|
||||||
|
textbox_draw(image, bbox, outProbabillities, 0.5f); |
||||||
|
|
||||||
cv::imshow("TextBox Demo",image); |
imshow("TextBox Demo",image); |
||||||
std::cout << "Done!" << std::endl << std::endl; |
std::cout << "Done!" << std::endl << std::endl; |
||||||
std::cout << "Press any key to exit." << std::endl << std::endl; |
std::cout << "Press any key to exit." << std::endl << std::endl; |
||||||
if ((cv::waitKey()&0xff) == ' ') |
waitKey(); |
||||||
return 0; |
return 0; |
||||||
} |
} |
||||||
|
@ -1,387 +0,0 @@ |
|||||||
#include "precomp.hpp" |
|
||||||
#include "opencv2/imgproc.hpp" |
|
||||||
#include "opencv2/highgui.hpp" |
|
||||||
#include "opencv2/core.hpp" |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <iostream> |
|
||||||
#include <fstream> |
|
||||||
#include <sstream> |
|
||||||
#include <queue> |
|
||||||
#include <algorithm> |
|
||||||
#include <iosfwd> |
|
||||||
#include <memory> |
|
||||||
#include <string> |
|
||||||
#include <utility> |
|
||||||
#include <vector> |
|
||||||
|
|
||||||
namespace cv { namespace text { |
|
||||||
//************************************************************************************
|
|
||||||
//****************** ImagePreprocessor *******************************************
|
|
||||||
//************************************************************************************
|
|
||||||
|
|
||||||
void ImagePreprocessor::preprocess(InputArray input,OutputArray output,Size sz,int outputChannels){ |
|
||||||
Mat inpImg=input.getMat(); |
|
||||||
Mat outImg; |
|
||||||
this->preprocess_(inpImg,outImg,sz,outputChannels); |
|
||||||
outImg.copyTo(output); |
|
||||||
} |
|
||||||
void ImagePreprocessor::set_mean(Mat mean){ |
|
||||||
|
|
||||||
|
|
||||||
this->set_mean_(mean); |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ResizerPreprocessor: public ImagePreprocessor{ |
|
||||||
protected: |
|
||||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
|
||||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
|
||||||
CV_Assert(outputChannels==1 || outputChannels==3); |
|
||||||
CV_Assert(input.channels()==1 || input.channels()==3); |
|
||||||
if(input.channels()!=outputChannels) |
|
||||||
{ |
|
||||||
Mat tmpInput; |
|
||||||
if(outputChannels==1){ |
|
||||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC3); |
|
||||||
} |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.channels()==1) |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
input.convertTo(output, CV_32FC1,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U){ |
|
||||||
input.convertTo(output, CV_32FC3,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC3); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
if(outputSize.width!=0 && outputSize.height!=0) |
|
||||||
{ |
|
||||||
resize(output,output,outputSize); |
|
||||||
} |
|
||||||
} |
|
||||||
//void set_mean_(Mat m){}
|
|
||||||
public: |
|
||||||
ResizerPreprocessor(){} |
|
||||||
~ResizerPreprocessor(){} |
|
||||||
}; |
|
||||||
|
|
||||||
class StandarizerPreprocessor: public ImagePreprocessor{ |
|
||||||
protected: |
|
||||||
double sigma_; |
|
||||||
//void set_mean_(Mat M){}
|
|
||||||
|
|
||||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
|
||||||
|
|
||||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
|
||||||
CV_Assert(outputChannels==1 || outputChannels==3); |
|
||||||
CV_Assert(input.channels()==1 || input.channels()==3); |
|
||||||
if(input.channels()!=outputChannels) |
|
||||||
{ |
|
||||||
Mat tmpInput; |
|
||||||
if(outputChannels==1) |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC3); |
|
||||||
} |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.channels()==1) |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
input.convertTo(output, CV_32FC1,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
input.convertTo(output, CV_32FC3,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC3); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
if(outputSize.width!=0 && outputSize.height!=0) |
|
||||||
{ |
|
||||||
resize(output,output,outputSize); |
|
||||||
} |
|
||||||
|
|
||||||
Scalar mean,dev; |
|
||||||
meanStdDev(output,mean,dev); |
|
||||||
subtract(output,mean[0],output); |
|
||||||
divide(output,(dev[0]/sigma_),output); |
|
||||||
} |
|
||||||
public: |
|
||||||
StandarizerPreprocessor(double sigma):sigma_(sigma){} |
|
||||||
~StandarizerPreprocessor(){} |
|
||||||
|
|
||||||
}; |
|
||||||
|
|
||||||
class customPreprocessor:public ImagePreprocessor{ |
|
||||||
protected: |
|
||||||
|
|
||||||
double rawval_; |
|
||||||
Mat mean_; |
|
||||||
String channel_order_; |
|
||||||
|
|
||||||
void set_mean_(Mat imMean_){ |
|
||||||
|
|
||||||
imMean_.copyTo(this->mean_); |
|
||||||
|
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
void set_raw_scale(int rawval){ |
|
||||||
rawval_ = rawval; |
|
||||||
|
|
||||||
} |
|
||||||
void set_channels(String channel_order){ |
|
||||||
channel_order_=channel_order; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
|
||||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
|
||||||
|
|
||||||
CV_Assert(outputChannels==1 || outputChannels==3); |
|
||||||
CV_Assert(input.channels()==1 || input.channels()==3); |
|
||||||
if(input.channels()!=outputChannels) |
|
||||||
{ |
|
||||||
Mat tmpInput; |
|
||||||
if(outputChannels==1) |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
if (rawval_ == 1) |
|
||||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
|
||||||
else |
|
||||||
tmpInput.convertTo(output,CV_32FC1); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
if (rawval_ ==1) |
|
||||||
tmpInput.convertTo(output, CV_32FC1); |
|
||||||
else |
|
||||||
tmpInput.convertTo(output, CV_32FC1,rawval_); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
if (rawval_ == 1) |
|
||||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
|
||||||
else |
|
||||||
tmpInput.convertTo(output,CV_32FC1); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
if (rawval_ ==1) |
|
||||||
tmpInput.convertTo(output, CV_32FC1); |
|
||||||
else |
|
||||||
tmpInput.convertTo(output, CV_32FC1,rawval_); |
|
||||||
} |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.channels()==1) |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
if (rawval_ == 1) |
|
||||||
input.convertTo(output,CV_32FC1,1/255.0); |
|
||||||
else |
|
||||||
input.convertTo(output,CV_32FC1); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
if (rawval_ ==1) |
|
||||||
input.convertTo(output, CV_32FC1); |
|
||||||
else |
|
||||||
input.convertTo(output, CV_32FC1,rawval_); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
if (rawval_ == 1) |
|
||||||
input.convertTo(output,CV_32FC3,1/255.0); |
|
||||||
else |
|
||||||
input.convertTo(output,CV_32FC3); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
if (rawval_ ==1) |
|
||||||
input.convertTo(output, CV_32FC3); |
|
||||||
else |
|
||||||
input.convertTo(output, CV_32FC3,rawval_); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
if(outputSize.width!=0 && outputSize.height!=0) |
|
||||||
{ |
|
||||||
resize(output,output,outputSize); |
|
||||||
} |
|
||||||
|
|
||||||
if (!this->mean_.empty()){ |
|
||||||
|
|
||||||
Scalar mean_s(this->mean_.at<uchar>(0,0),this->mean_.at<uchar>(0,1),this->mean_.at<uchar>(0,2)); |
|
||||||
subtract(output,mean_s,output); |
|
||||||
} |
|
||||||
else{ |
|
||||||
Scalar mean_s; |
|
||||||
mean_s = mean(output); |
|
||||||
subtract(output,mean_s,output); |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
public: |
|
||||||
customPreprocessor( double rawval,String channel_order):rawval_(rawval),channel_order_(channel_order){} |
|
||||||
~customPreprocessor(){} |
|
||||||
|
|
||||||
}; |
|
||||||
|
|
||||||
class MeanSubtractorPreprocessor: public ImagePreprocessor{ |
|
||||||
protected: |
|
||||||
Mat mean_; |
|
||||||
//void set_mean_(Mat m){}
|
|
||||||
void preprocess_(const Mat& input,Mat& output,Size outputSize,int outputChannels){ |
|
||||||
//TODO put all the logic of channel and depth conversions in ImageProcessor class
|
|
||||||
CV_Assert(this->mean_.cols==outputSize.width && this->mean_.rows ==outputSize.height); |
|
||||||
CV_Assert(outputChannels==1 || outputChannels==3); |
|
||||||
CV_Assert(input.channels()==1 || input.channels()==3); |
|
||||||
if(input.channels()!=outputChannels) |
|
||||||
{ |
|
||||||
Mat tmpInput; |
|
||||||
if(outputChannels==1) |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_BGR2GRAY); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
tmpInput.convertTo(output,CV_32FC1,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
cvtColor(input,tmpInput,COLOR_GRAY2BGR); |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
tmpInput.convertTo(output,CV_32FC3,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
tmpInput.convertTo(output, CV_32FC3); |
|
||||||
} |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.channels()==1) |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
input.convertTo(output, CV_32FC1,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC1); |
|
||||||
} |
|
||||||
}else |
|
||||||
{ |
|
||||||
if(input.depth()==CV_8U) |
|
||||||
{ |
|
||||||
input.convertTo(output, CV_32FC3,1/255.0); |
|
||||||
}else |
|
||||||
{//Assuming values are at the desired [0,1] range
|
|
||||||
input.convertTo(output, CV_32FC3); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
if(outputSize.width!=0 && outputSize.height!=0) |
|
||||||
{ |
|
||||||
resize(output,output,outputSize); |
|
||||||
} |
|
||||||
subtract(output,this->mean_,output); |
|
||||||
} |
|
||||||
public: |
|
||||||
MeanSubtractorPreprocessor(Mat mean) |
|
||||||
{ |
|
||||||
mean.copyTo(this->mean_); |
|
||||||
} |
|
||||||
|
|
||||||
~MeanSubtractorPreprocessor(){} |
|
||||||
}; |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Ptr<ImagePreprocessor> ImagePreprocessor::createResizer() |
|
||||||
{ |
|
||||||
return Ptr<ImagePreprocessor>(new ResizerPreprocessor); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<ImagePreprocessor> ImagePreprocessor::createImageStandarizer(double sigma) |
|
||||||
{ |
|
||||||
return Ptr<ImagePreprocessor>(new StandarizerPreprocessor(sigma)); |
|
||||||
} |
|
||||||
Ptr<ImagePreprocessor> ImagePreprocessor::createImageCustomPreprocessor(double rawval,String channel_order) |
|
||||||
{ |
|
||||||
|
|
||||||
return Ptr<ImagePreprocessor>(new customPreprocessor(rawval,channel_order)); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<ImagePreprocessor> ImagePreprocessor::createImageMeanSubtractor(InputArray meanImg) |
|
||||||
{ |
|
||||||
Mat tmp=meanImg.getMat(); |
|
||||||
return Ptr<ImagePreprocessor>(new MeanSubtractorPreprocessor(tmp)); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
@ -1,697 +0,0 @@ |
|||||||
#include "precomp.hpp" |
|
||||||
#include "opencv2/imgproc.hpp" |
|
||||||
#include "opencv2/highgui.hpp" |
|
||||||
#include "opencv2/core.hpp" |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <iostream> |
|
||||||
#include <fstream> |
|
||||||
#include <sstream> |
|
||||||
#include <queue> |
|
||||||
#include <algorithm> |
|
||||||
#include <iosfwd> |
|
||||||
#include <memory> |
|
||||||
#include <string> |
|
||||||
#include <utility> |
|
||||||
#include <vector> |
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
#include "caffe/caffe.hpp" |
|
||||||
#endif |
|
||||||
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
#include "opencv2/dnn.hpp" |
|
||||||
#endif |
|
||||||
|
|
||||||
using namespace cv; |
|
||||||
using namespace cv::dnn; |
|
||||||
using namespace std; |
|
||||||
namespace cv { namespace text { |
|
||||||
|
|
||||||
//Maybe OpenCV has a routine better suited
|
|
||||||
inline bool fileExists (String filename) { |
|
||||||
std::ifstream f(filename.c_str()); |
|
||||||
return f.good(); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//************************************************************************************
|
|
||||||
//****************** TextImageClassifier *****************************************
|
|
||||||
//************************************************************************************
|
|
||||||
|
|
||||||
void TextImageClassifier::preprocess(const Mat& input,Mat& output) |
|
||||||
{ |
|
||||||
this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_); |
|
||||||
} |
|
||||||
|
|
||||||
void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr) |
|
||||||
{ |
|
||||||
CV_Assert(!ptr.empty()); |
|
||||||
preprocessor_=ptr; |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor() |
|
||||||
{ |
|
||||||
return preprocessor_; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
class DeepCNNCaffeImpl: public DeepCNN{ |
|
||||||
protected: |
|
||||||
void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat) |
|
||||||
{ |
|
||||||
//Classifies a list of images containing at most minibatchSz_ images
|
|
||||||
CV_Assert(int(inputImageList.size())<=this->minibatchSz_); |
|
||||||
CV_Assert(outputMat.isContinuous()); |
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
net_->input_blobs()[0]->Reshape(inputImageList.size(), this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width); |
|
||||||
net_->Reshape(); |
|
||||||
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); |
|
||||||
float* inputData=inputBuffer; |
|
||||||
|
|
||||||
for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++) |
|
||||||
{ |
|
||||||
std::vector<Mat> input_channels; |
|
||||||
Mat preprocessed; |
|
||||||
// if the image have multiple color channels the input layer should be populated accordingly
|
|
||||||
for (int channel=0;channel < this->channelCount_;channel++){ |
|
||||||
|
|
||||||
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); |
|
||||||
input_channels.push_back(netInputWraped); |
|
||||||
//input_data += width * height;
|
|
||||||
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); |
|
||||||
|
|
||||||
} |
|
||||||
this->preprocess(inputImageList[imgNum],preprocessed); |
|
||||||
split(preprocessed, input_channels); |
|
||||||
|
|
||||||
|
|
||||||
} |
|
||||||
this->net_->ForwardPrefilled(); |
|
||||||
const float* outputNetData=net_->output_blobs()[0]->cpu_data(); |
|
||||||
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); |
|
||||||
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width; |
|
||||||
|
|
||||||
|
|
||||||
//outputMat.resize(this->outputGeometry_.height * this->outputGeometry_.width);
|
|
||||||
float*outputMatData=(float*)(outputMat.data); |
|
||||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz*inputImageList.size()); |
|
||||||
|
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
Ptr<caffe::Net<float> > net_; |
|
||||||
#endif |
|
||||||
//Size inputGeometry_;//=Size(100,32);
|
|
||||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
|
||||||
int outputSize_; |
|
||||||
//Size outputGeometry_;
|
|
||||||
public: |
|
||||||
DeepCNNCaffeImpl(const DeepCNNCaffeImpl& dn): |
|
||||||
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ |
|
||||||
channelCount_=dn.channelCount_; |
|
||||||
inputGeometry_=dn.inputGeometry_; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
} |
|
||||||
DeepCNNCaffeImpl& operator=(const DeepCNNCaffeImpl &dn) |
|
||||||
{ |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
this->setPreprocessor(dn.preprocessor_); |
|
||||||
this->inputGeometry_=dn.inputGeometry_; |
|
||||||
this->channelCount_=dn.channelCount_; |
|
||||||
this->minibatchSz_=dn.minibatchSz_; |
|
||||||
this->outputSize_=dn.outputSize_; |
|
||||||
this->preprocessor_=dn.preprocessor_; |
|
||||||
this->outputGeometry_=dn.outputGeometry_; |
|
||||||
return *this; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
} |
|
||||||
|
|
||||||
DeepCNNCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz) |
|
||||||
:minibatchSz_(maxMinibatchSz) |
|
||||||
{ |
|
||||||
|
|
||||||
CV_Assert(this->minibatchSz_>0); |
|
||||||
CV_Assert(fileExists(modelArchFilename)); |
|
||||||
CV_Assert(fileExists(modelWeightsFilename)); |
|
||||||
CV_Assert(!preprocessor.empty()); |
|
||||||
this->setPreprocessor(preprocessor); |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST)); |
|
||||||
CV_Assert(net_->num_inputs()==1); |
|
||||||
CV_Assert(net_->num_outputs()==1); |
|
||||||
CV_Assert(this->net_->input_blobs()[0]->channels()==1 |
|
||||||
||this->net_->input_blobs()[0]->channels()==3); |
|
||||||
this->channelCount_=this->net_->input_blobs()[0]->channels(); |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
this->net_->CopyTrainedLayersFrom(modelWeightsFilename); |
|
||||||
|
|
||||||
caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0]; |
|
||||||
|
|
||||||
this->inputGeometry_=Size(inputLayer->width(), inputLayer->height()); |
|
||||||
this->channelCount_ = inputLayer->channels(); |
|
||||||
|
|
||||||
inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width); |
|
||||||
net_->Reshape(); |
|
||||||
this->outputSize_=net_->output_blobs()[0]->channels(); |
|
||||||
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height()); |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
void classify(InputArray image, OutputArray classProbabilities) |
|
||||||
{ |
|
||||||
std::vector<Mat> inputImageList; |
|
||||||
inputImageList.push_back(image.getMat()); |
|
||||||
classifyBatch(inputImageList,classProbabilities); |
|
||||||
} |
|
||||||
|
|
||||||
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) |
|
||||||
{ |
|
||||||
std::vector<Mat> allImageVector; |
|
||||||
inputImageList.getMatVector(allImageVector); |
|
||||||
size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
|
|
||||||
|
|
||||||
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
|
|
||||||
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); |
|
||||||
Mat outputMat = classProbabilities.getMat(); |
|
||||||
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize) |
|
||||||
{ |
|
||||||
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize); |
|
||||||
std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum); |
|
||||||
std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd); |
|
||||||
std::vector<Mat> minibatchInput(from,to); |
|
||||||
classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
int getOutputSize() |
|
||||||
{ |
|
||||||
return this->outputSize_; |
|
||||||
} |
|
||||||
Size getOutputGeometry() |
|
||||||
{ |
|
||||||
return this->outputGeometry_; |
|
||||||
} |
|
||||||
|
|
||||||
int getMinibatchSize() |
|
||||||
{ |
|
||||||
return this->minibatchSz_; |
|
||||||
} |
|
||||||
|
|
||||||
int getBackend() |
|
||||||
{ |
|
||||||
return OCR_HOLISTIC_BACKEND_CAFFE; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
class DeepCNNOpenCvDNNImpl: public DeepCNN{ |
|
||||||
protected: |
|
||||||
|
|
||||||
void classifyMiniBatch(std::vector<Mat> inputImageList, Mat outputMat) |
|
||||||
{ |
|
||||||
//Classifies a list of images containing at most minibatchSz_ images
|
|
||||||
CV_Assert(int(inputImageList.size())<=this->minibatchSz_); |
|
||||||
CV_Assert(outputMat.isContinuous()); |
|
||||||
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
|
|
||||||
std::vector<Mat> preProcessedImList; // to store preprocessed images, should it be handled inside preprocessing class?
|
|
||||||
|
|
||||||
Mat preprocessed; |
|
||||||
// preprocesses each image in the inputImageList and push to preprocessedImList
|
|
||||||
for(size_t imgNum=0;imgNum<inputImageList.size();imgNum++) |
|
||||||
{ |
|
||||||
this->preprocess(inputImageList[imgNum],preprocessed); |
|
||||||
preProcessedImList.push_back(preprocessed); |
|
||||||
} |
|
||||||
// set input data blob in dnn::net
|
|
||||||
net_->setInput(blobFromImages(preProcessedImList,1, this->inputGeometry_), "data"); |
|
||||||
|
|
||||||
float*outputMatData=(float*)(outputMat.data); |
|
||||||
//Mat outputNet(inputImageList.size(),this->outputSize_,CV_32FC1,outputMatData) ;
|
|
||||||
Mat outputNet = this->net_->forward(); |
|
||||||
outputNet = outputNet.reshape(1, 1); |
|
||||||
|
|
||||||
float*outputNetData=(float*)(outputNet.data); |
|
||||||
|
|
||||||
memcpy(outputMatData,outputNetData,sizeof(float)*this->outputSize_*inputImageList.size()); |
|
||||||
|
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
Ptr<Net> net_; |
|
||||||
#endif |
|
||||||
// hard coding input image size. anything in DNN library to get that from prototxt??
|
|
||||||
// Size inputGeometry_;//=Size(100,32);
|
|
||||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
|
||||||
int outputSize_; |
|
||||||
//Size outputGeometry_;//= Size(1,1);
|
|
||||||
//int channelCount_;
|
|
||||||
// int inputChannel_ ;//=1;
|
|
||||||
// int _inputHeight;
|
|
||||||
//int _inputWidth ;
|
|
||||||
//int _inputChannel ;
|
|
||||||
public: |
|
||||||
DeepCNNOpenCvDNNImpl(const DeepCNNOpenCvDNNImpl& dn): |
|
||||||
minibatchSz_(dn.minibatchSz_),outputSize_(dn.outputSize_){ |
|
||||||
channelCount_=dn.channelCount_; |
|
||||||
inputGeometry_=dn.inputGeometry_; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
} |
|
||||||
DeepCNNOpenCvDNNImpl& operator=(const DeepCNNOpenCvDNNImpl &dn) |
|
||||||
{ |
|
||||||
#ifdef HAVE_DNN |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
this->setPreprocessor(dn.preprocessor_); |
|
||||||
this->inputGeometry_=dn.inputGeometry_; |
|
||||||
this->channelCount_=dn.channelCount_; |
|
||||||
this->minibatchSz_=dn.minibatchSz_; |
|
||||||
this->outputSize_=dn.outputSize_; |
|
||||||
this->preprocessor_=dn.preprocessor_; |
|
||||||
this->outputGeometry_=dn.outputGeometry_; |
|
||||||
return *this; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
} |
|
||||||
|
|
||||||
DeepCNNOpenCvDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputWidth ,int inputHeight ,int inputChannel ) |
|
||||||
:minibatchSz_(maxMinibatchSz) |
|
||||||
{ |
|
||||||
|
|
||||||
CV_Assert(this->minibatchSz_>0); |
|
||||||
CV_Assert(fileExists(modelArchFilename)); |
|
||||||
CV_Assert(fileExists(modelWeightsFilename)); |
|
||||||
CV_Assert(!preprocessor.empty()); |
|
||||||
this->setPreprocessor(preprocessor); |
|
||||||
#ifdef HAVE_DNN |
|
||||||
|
|
||||||
this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename)); |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (this->net_.empty()) |
|
||||||
{ |
|
||||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
|
||||||
std::cerr << "prototxt: " << modelArchFilename << std::endl; |
|
||||||
std::cerr << "caffemodel: " << modelWeightsFilename << std::endl; |
|
||||||
//std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
|
|
||||||
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
|
|
||||||
exit(-1); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
this->inputGeometry_=Size(inputWidth,inputHeight);// Size(inputLayer->width(), inputLayer->height());
|
|
||||||
this->channelCount_ = inputChannel;//inputLayer->channels();
|
|
||||||
|
|
||||||
//inputLayer->Reshape(this->minibatchSz_,this->channelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
|
|
||||||
Ptr< Layer > outLayer= net_->getLayer (net_->getLayerId (net_->getLayerNames()[net_->getLayerNames().size()-2])); |
|
||||||
//std::vector<Mat> blobs = outLayer->blobs;
|
|
||||||
|
|
||||||
this->outputSize_=(outLayer->blobs)[1].size[0] ;//net_->output_blobs()[0]->channels();
|
|
||||||
//this->outputGeometry_ = Size(1,1);//Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"DNN module not available during compilation!"); |
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
void classify(InputArray image, OutputArray classProbabilities) |
|
||||||
{ |
|
||||||
std::vector<Mat> inputImageList; |
|
||||||
inputImageList.push_back(image.getMat()); |
|
||||||
classifyBatch(inputImageList,classProbabilities); |
|
||||||
} |
|
||||||
|
|
||||||
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities) |
|
||||||
{ |
|
||||||
std::vector<Mat> allImageVector; |
|
||||||
inputImageList.getMatVector(allImageVector); |
|
||||||
size_t outputSize=size_t(this->outputSize_);//temporary variable to avoid int to size_t arithmentic
|
|
||||||
|
|
||||||
size_t minibatchSize=size_t(this->minibatchSz_);//temporary variable to avoid int to size_t arithmentic
|
|
||||||
classProbabilities.create(Size(int(outputSize),int(allImageVector.size())),CV_32F); |
|
||||||
Mat outputMat = classProbabilities.getMat(); |
|
||||||
|
|
||||||
for(size_t imgNum=0;imgNum<allImageVector.size();imgNum+=minibatchSize) |
|
||||||
{ |
|
||||||
size_t rangeEnd=imgNum+std::min<size_t>(allImageVector.size()-imgNum,minibatchSize); |
|
||||||
std::vector<Mat>::const_iterator from=std::vector<Mat>::const_iterator(allImageVector.begin()+imgNum); |
|
||||||
std::vector<Mat>::const_iterator to=std::vector<Mat>::const_iterator(allImageVector.begin()+rangeEnd); |
|
||||||
std::vector<Mat> minibatchInput(from,to); |
|
||||||
classifyMiniBatch(minibatchInput,outputMat.rowRange(int(imgNum),int(rangeEnd))); |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
int getOutputSize() |
|
||||||
{ |
|
||||||
return this->outputSize_; |
|
||||||
} |
|
||||||
Size getOutputGeometry() |
|
||||||
{ |
|
||||||
return this->outputGeometry_; |
|
||||||
} |
|
||||||
|
|
||||||
int getMinibatchSize() |
|
||||||
{ |
|
||||||
return this->minibatchSz_; |
|
||||||
} |
|
||||||
|
|
||||||
int getBackend() |
|
||||||
{ |
|
||||||
return OCR_HOLISTIC_BACKEND_DNN; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
Ptr<DeepCNN> DeepCNN::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd) |
|
||||||
{ |
|
||||||
if(preprocessor.empty()) |
|
||||||
{ |
|
||||||
preprocessor=ImagePreprocessor::createResizer(); |
|
||||||
} |
|
||||||
switch(backEnd){ |
|
||||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
|
||||||
|
|
||||||
#elif defined(HAVE_DNN) |
|
||||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1)); |
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
|
||||||
return Ptr<DeepCNN>(); |
|
||||||
#endif |
|
||||||
break; |
|
||||||
|
|
||||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
|
||||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_DNN: |
|
||||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,100,32,1)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_NONE: |
|
||||||
default: |
|
||||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
|
||||||
return Ptr<DeepCNN>(); |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
Ptr<DeepCNN> DeepCNN::createDictNet(String archFilename,String weightsFilename,int backEnd) |
|
||||||
{ |
|
||||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113); |
|
||||||
switch(backEnd){ |
|
||||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); |
|
||||||
|
|
||||||
#elif defined(HAVE_DNN) |
|
||||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1)); |
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
|
||||||
return Ptr<DeepCNN>(); |
|
||||||
#endif |
|
||||||
break; |
|
||||||
|
|
||||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
|
||||||
return Ptr<DeepCNN>(new DeepCNNCaffeImpl(archFilename, weightsFilename,preprocessor, 100)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_DNN: |
|
||||||
return Ptr<DeepCNN>(new DeepCNNOpenCvDNNImpl(archFilename, weightsFilename,preprocessor, 100,100,32,1)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_NONE: |
|
||||||
default: |
|
||||||
CV_Error(Error::StsError,"DeepCNN::create backend not implemented"); |
|
||||||
return Ptr<DeepCNN>(); |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
namespace cnn_config{ |
|
||||||
std::vector<std::string> getAvailableBackends() |
|
||||||
{ |
|
||||||
std::vector<std::string> backends; |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
backends.push_back("CAFFE, OCR_HOLISTIC_BACKEND_CAFFE"); // dnn backend opencv_dnn
|
|
||||||
|
|
||||||
#endif |
|
||||||
#ifdef HAVE_DNN |
|
||||||
backends.push_back("DNN, OCR_HOLISTIC_BACKEND_DNN");// opencv_dnn based backend"
|
|
||||||
#endif |
|
||||||
return backends; |
|
||||||
|
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
namespace caffe_backend{ |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
|
|
||||||
bool getCaffeGpuMode() |
|
||||||
{ |
|
||||||
return caffe::Caffe::mode()==caffe::Caffe::GPU; |
|
||||||
} |
|
||||||
|
|
||||||
void setCaffeGpuMode(bool useGpu) |
|
||||||
{ |
|
||||||
if(useGpu) |
|
||||||
{ |
|
||||||
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
|
||||||
}else |
|
||||||
{ |
|
||||||
caffe::Caffe::set_mode(caffe::Caffe::CPU); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
bool getCaffeAvailable() |
|
||||||
{ |
|
||||||
return true; |
|
||||||
} |
|
||||||
#else |
|
||||||
|
|
||||||
bool getCaffeGpuMode() |
|
||||||
{ |
|
||||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
void setCaffeGpuMode(bool useGpu) |
|
||||||
{ |
|
||||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
|
||||||
CV_Assert(useGpu==1);//Compilation directives force
|
|
||||||
} |
|
||||||
|
|
||||||
bool getCaffeAvailable(){ |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
#endif |
|
||||||
|
|
||||||
}//namespace caffe
|
|
||||||
namespace dnn_backend{ |
|
||||||
#ifdef HAVE_DNN |
|
||||||
|
|
||||||
|
|
||||||
bool getDNNAvailable(){ |
|
||||||
return true; |
|
||||||
} |
|
||||||
#else |
|
||||||
bool getDNNAvailable(){ |
|
||||||
return 0; |
|
||||||
} |
|
||||||
#endif |
|
||||||
}//namspace dnn_backend
|
|
||||||
}//namespace cnn_config
|
|
||||||
|
|
||||||
class OCRHolisticWordRecognizerImpl: public OCRHolisticWordRecognizer{ |
|
||||||
private: |
|
||||||
struct NetOutput{ |
|
||||||
//Auxiliary structure that handles the logic of getting class ids and probabillities from
|
|
||||||
//the raw outputs of caffe
|
|
||||||
int wordIdx; |
|
||||||
float probabillity; |
|
||||||
|
|
||||||
static bool sorter(const NetOutput& o1,const NetOutput& o2) |
|
||||||
{//used with std::sort to provide the most probable class
|
|
||||||
return o1.probabillity>o2.probabillity; |
|
||||||
} |
|
||||||
|
|
||||||
static void getOutputs(const float* buffer,int nbOutputs,std::vector<NetOutput>& res) |
|
||||||
{ |
|
||||||
res.resize(nbOutputs); |
|
||||||
for(int k=0;k<nbOutputs;k++) |
|
||||||
{ |
|
||||||
res[k].wordIdx=k; |
|
||||||
res[k].probabillity=buffer[k]; |
|
||||||
} |
|
||||||
std::sort(res.begin(),res.end(),NetOutput::sorter); |
|
||||||
} |
|
||||||
|
|
||||||
static void getClassification(const float* buffer,int nbOutputs,int &classNum,double& confidence) |
|
||||||
{ |
|
||||||
std::vector<NetOutput> tmp; |
|
||||||
getOutputs(buffer,nbOutputs,tmp); |
|
||||||
classNum=tmp[0].wordIdx; |
|
||||||
confidence=tmp[0].probabillity; |
|
||||||
|
|
||||||
} |
|
||||||
}; |
|
||||||
protected: |
|
||||||
std::vector<String> labels_; |
|
||||||
Ptr<TextImageClassifier> classifier_; |
|
||||||
public: |
|
||||||
OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename):classifier_(classifierPtr) |
|
||||||
{ |
|
||||||
CV_Assert(fileExists(vocabularyFilename));//this fails for some rason
|
|
||||||
std::ifstream labelsFile(vocabularyFilename.c_str()); |
|
||||||
if(!labelsFile) |
|
||||||
{ |
|
||||||
CV_Error(Error::StsError,"Could not read Labels from file"); |
|
||||||
} |
|
||||||
std::string line; |
|
||||||
while (std::getline(labelsFile, line)) |
|
||||||
{ |
|
||||||
labels_.push_back(std::string(line)); |
|
||||||
} |
|
||||||
CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); |
|
||||||
} |
|
||||||
|
|
||||||
OCRHolisticWordRecognizerImpl(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary):classifier_(classifierPtr) |
|
||||||
{ |
|
||||||
this->labels_=vocabulary; |
|
||||||
CV_Assert(this->classifier_->getOutputSize()==int(this->labels_.size())); |
|
||||||
} |
|
||||||
|
|
||||||
void recogniseImage(InputArray inputImage,CV_OUT String& transcription,CV_OUT double& confidence) |
|
||||||
{ |
|
||||||
Mat netOutput; |
|
||||||
this->classifier_->classify(inputImage,netOutput); |
|
||||||
int classNum; |
|
||||||
NetOutput::getClassification((float*)(netOutput.data),this->classifier_->getOutputSize(),classNum,confidence); |
|
||||||
transcription=this->labels_[classNum]; |
|
||||||
} |
|
||||||
|
|
||||||
void recogniseImageBatch(InputArrayOfArrays inputImageList,CV_OUT std::vector<String>& transcriptionVec,CV_OUT std::vector<double>& confidenceVec) |
|
||||||
{ |
|
||||||
Mat netOutput; |
|
||||||
this->classifier_->classifyBatch(inputImageList,netOutput); |
|
||||||
|
|
||||||
for(int k=0;k<netOutput.rows;k++) |
|
||||||
{ |
|
||||||
int classNum; |
|
||||||
double confidence; |
|
||||||
NetOutput::getClassification((float*)(netOutput.row(k).data),this->classifier_->getOutputSize(),classNum,confidence); |
|
||||||
transcriptionVec.push_back(this->labels_[classNum]); |
|
||||||
confidenceVec.push_back(confidence); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL, |
|
||||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, |
|
||||||
int component_level=0) |
|
||||||
{ |
|
||||||
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
|
|
||||||
double confidence; |
|
||||||
String transcription; |
|
||||||
recogniseImage(image,transcription,confidence); |
|
||||||
output_text=transcription.c_str(); |
|
||||||
if(component_rects!=NULL) |
|
||||||
{ |
|
||||||
component_rects->resize(1); |
|
||||||
(*component_rects)[0]=Rect(0,0,image.size().width,image.size().height); |
|
||||||
} |
|
||||||
if(component_texts!=NULL) |
|
||||||
{ |
|
||||||
component_texts->resize(1); |
|
||||||
(*component_texts)[0]=transcription.c_str(); |
|
||||||
} |
|
||||||
if(component_confidences!=NULL) |
|
||||||
{ |
|
||||||
component_confidences->resize(1); |
|
||||||
(*component_confidences)[0]=float(confidence); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL, |
|
||||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL, |
|
||||||
int component_level=0) |
|
||||||
{ |
|
||||||
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
|
|
||||||
this->run(image,output_text,component_rects,component_texts,component_confidences,component_level); |
|
||||||
} |
|
||||||
|
|
||||||
std::vector<String>& getVocabulary() |
|
||||||
{ |
|
||||||
return this->labels_; |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<TextImageClassifier> getClassifier() |
|
||||||
{ |
|
||||||
return this->classifier_; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,String vocabularyFilename ) |
|
||||||
{ |
|
||||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename)); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename, String vocabularyFilename) |
|
||||||
{ |
|
||||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113); |
|
||||||
Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100)); |
|
||||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabularyFilename)); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(Ptr<TextImageClassifier> classifierPtr,const std::vector<String>& vocabulary) |
|
||||||
{ |
|
||||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary)); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<OCRHolisticWordRecognizer> OCRHolisticWordRecognizer::create(String modelArchFilename, String modelWeightsFilename,const std::vector<String>& vocabulary){ |
|
||||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageStandarizer(113); |
|
||||||
Ptr<TextImageClassifier> classifierPtr(new DeepCNNCaffeImpl(modelArchFilename,modelWeightsFilename,preprocessor,100)); |
|
||||||
return Ptr<OCRHolisticWordRecognizer>(new OCRHolisticWordRecognizerImpl(classifierPtr,vocabulary)); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} } //namespace text namespace cv
|
|
@ -1,169 +0,0 @@ |
|||||||
#include "precomp.hpp" |
|
||||||
#include "opencv2/imgproc.hpp" |
|
||||||
#include "opencv2/core.hpp" |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <iostream> |
|
||||||
#include <fstream> |
|
||||||
#include <sstream> |
|
||||||
#include <queue> |
|
||||||
#include <algorithm> |
|
||||||
#include <iosfwd> |
|
||||||
#include <memory> |
|
||||||
#include <string> |
|
||||||
#include <utility> |
|
||||||
#include <vector> |
|
||||||
|
|
||||||
|
|
||||||
//#ifdef HAVE_CAFFE
|
|
||||||
//#include "caffe/caffe.hpp"
|
|
||||||
//#endif
|
|
||||||
|
|
||||||
namespace cv { namespace text { |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class textDetectImpl: public textDetector{ |
|
||||||
private: |
|
||||||
struct NetOutput{ |
|
||||||
//Auxiliary structure that handles the logic of getting bounding box and confidences of textness from
|
|
||||||
//the raw outputs of caffe
|
|
||||||
Rect bbox; |
|
||||||
float probability; |
|
||||||
|
|
||||||
|
|
||||||
static void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,std::vector<NetOutput>& res,Size inputShape) |
|
||||||
{ |
|
||||||
|
|
||||||
res.resize(nbrTextBoxes); |
|
||||||
for(int k=0;k<nbrTextBoxes;k++) |
|
||||||
{ |
|
||||||
float x_min = buffer[k*nCol+3]*inputShape.width; |
|
||||||
float y_min = buffer[k*nCol+4]*inputShape.height; |
|
||||||
float x_max = buffer[k*nCol+5]*inputShape.width; |
|
||||||
float y_max = buffer[k*nCol +6]*inputShape.height; |
|
||||||
x_min = x_min<0?0:x_min; |
|
||||||
y_min = y_min<0?0:y_min; |
|
||||||
x_max = x_max> inputShape.width?inputShape.width-1:x_max; |
|
||||||
y_max = y_max > inputShape.height?inputShape.height-1:y_max; |
|
||||||
float wd = x_max-x_min+1; |
|
||||||
float ht = y_max-y_min+1; |
|
||||||
|
|
||||||
res[k].bbox=Rect(int(x_min),int(y_min),int(wd),int(ht)); |
|
||||||
|
|
||||||
res[k].probability=buffer[k*nCol+2]; |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
}; |
|
||||||
protected: |
|
||||||
|
|
||||||
Ptr<TextRegionDetector> classifier_; |
|
||||||
public: |
|
||||||
textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr) |
|
||||||
{ |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void textDetectInImage(InputArray inputImage,CV_OUT std::vector<Rect>& Bbox,CV_OUT std::vector<float>& confidence) |
|
||||||
{ |
|
||||||
Mat netOutput; |
|
||||||
// call the detect function of deepTextCNN class
|
|
||||||
this->classifier_->detect(inputImage,netOutput); |
|
||||||
// get the output geometry i.e height and width of output blob from caffe
|
|
||||||
Size OutputGeometry_ = this->classifier_->getOutputGeometry(); |
|
||||||
int nbrTextBoxes = OutputGeometry_.height; |
|
||||||
int nCol = OutputGeometry_.width; |
|
||||||
|
|
||||||
std::vector<NetOutput> tmp; |
|
||||||
// the output bounding box needs to be resized by the input height and width
|
|
||||||
Size inputImageShape = Size(inputImage.cols(),inputImage.rows()); |
|
||||||
NetOutput::getOutputs((float*)(netOutput.data),nbrTextBoxes,nCol,tmp,inputImageShape); |
|
||||||
// put the output in CV_OUT
|
|
||||||
|
|
||||||
for (int k=0;k<nbrTextBoxes;k++) |
|
||||||
{ |
|
||||||
Bbox.push_back(tmp[k].bbox); |
|
||||||
confidence.push_back(tmp[k].probability); |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void run(Mat& image, std::vector<Rect>* component_rects=NULL, |
|
||||||
std::vector<float>* component_confidences=NULL, |
|
||||||
int component_level=0) |
|
||||||
{ |
|
||||||
CV_Assert(component_level==OCR_LEVEL_WORD);//Componnents not applicable for word spotting
|
|
||||||
|
|
||||||
std::vector<Rect> bbox; |
|
||||||
std::vector<float> score; |
|
||||||
textDetectInImage(image,bbox,score); |
|
||||||
|
|
||||||
if(component_rects!=NULL) |
|
||||||
{ |
|
||||||
component_rects->resize(bbox.size()); // should be a user behavior
|
|
||||||
|
|
||||||
component_rects = &bbox; |
|
||||||
} |
|
||||||
|
|
||||||
if(component_confidences!=NULL) |
|
||||||
{ |
|
||||||
component_confidences->resize(score.size()); // shoub be a user behavior
|
|
||||||
|
|
||||||
component_confidences = &score; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
void run(Mat& image, Mat& mask, std::vector<Rect>* component_rects=NULL, |
|
||||||
std::vector<float>* component_confidences=NULL, |
|
||||||
int component_level=0) |
|
||||||
{ |
|
||||||
CV_Assert(mask.cols==image.cols && mask.rows== image.rows);//Mask is ignored because the CNN operates on a full image
|
|
||||||
this->run(image,component_rects,component_confidences,component_level); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Ptr<TextRegionDetector> getClassifier() |
|
||||||
{ |
|
||||||
return this->classifier_; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr) |
|
||||||
{ |
|
||||||
return Ptr<textDetector>(new textDetectImpl(classifierPtr)); |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWeightsFilename) |
|
||||||
{ |
|
||||||
|
|
||||||
// create a custom preprocessor with rawval
|
|
||||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); |
|
||||||
// set the mean for the preprocessor
|
|
||||||
|
|
||||||
Mat textbox_mean(1,3,CV_8U); |
|
||||||
textbox_mean.at<uchar>(0,0)=104; |
|
||||||
textbox_mean.at<uchar>(0,1)=117; |
|
||||||
textbox_mean.at<uchar>(0,2)=123; |
|
||||||
preprocessor->set_mean(textbox_mean); |
|
||||||
// create a pointer to text box detector(textDetector)
|
|
||||||
Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1)); |
|
||||||
return Ptr<textDetector>(new textDetectImpl(classifierPtr)); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} } //namespace text namespace cv
|
|
@ -1,453 +1,101 @@ |
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
|
||||||
#include "precomp.hpp" |
#include "precomp.hpp" |
||||||
#include "opencv2/imgproc.hpp" |
#include "opencv2/imgproc.hpp" |
||||||
#include "opencv2/core.hpp" |
#include "opencv2/core.hpp" |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include <iostream> |
|
||||||
#include <fstream> |
#include <fstream> |
||||||
#include <sstream> |
|
||||||
#include <queue> |
|
||||||
#include <algorithm> |
#include <algorithm> |
||||||
#include <iosfwd> |
|
||||||
#include <memory> |
|
||||||
#include <string> |
|
||||||
#include <utility> |
|
||||||
#include <vector> |
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
#include "caffe/caffe.hpp" |
|
||||||
#endif |
|
||||||
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
#include "opencv2/dnn.hpp" |
#include "opencv2/dnn.hpp" |
||||||
#endif |
|
||||||
|
|
||||||
using namespace cv::dnn; |
using namespace cv::dnn; |
||||||
|
|
||||||
#define CV_WARN(message) fprintf(stderr, "warning: %s (%s:%d)\n", message, __FILE__, __LINE__) |
namespace cv |
||||||
|
{ |
||||||
namespace cv { namespace text { |
namespace text |
||||||
|
{ |
||||||
inline bool fileExists (String filename) { |
|
||||||
std::ifstream f(filename.c_str()); |
|
||||||
return f.good(); |
|
||||||
} |
|
||||||
|
|
||||||
class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{ |
|
||||||
protected: |
|
||||||
|
|
||||||
|
|
||||||
void process_(Mat inputImage, Mat &outputMat) |
|
||||||
{ |
|
||||||
// do forward pass and stores the output in outputMat
|
|
||||||
CV_Assert(outputMat.isContinuous()); |
|
||||||
if (inputImage.channels() != this->inputChannelCount_) |
|
||||||
CV_WARN("Number of input channel(s) in the model is not same as input"); |
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width); |
|
||||||
net_->Reshape(); |
|
||||||
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data(); |
|
||||||
float* inputData=inputBuffer; |
|
||||||
|
|
||||||
std::vector<Mat> input_channels; |
|
||||||
Mat preprocessed; |
|
||||||
// if the image have multiple color channels the input layer should be populated accordingly
|
|
||||||
for (int channel=0;channel < this->inputChannelCount_;channel++){ |
|
||||||
|
|
||||||
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData); |
|
||||||
input_channels.push_back(netInputWraped); |
|
||||||
//input_data += width * height;
|
|
||||||
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width); |
|
||||||
} |
|
||||||
this->preprocess(inputImage,preprocessed); |
|
||||||
split(preprocessed, input_channels); |
|
||||||
|
|
||||||
//preprocessed.copyTo(netInputWraped);
|
|
||||||
|
|
||||||
|
|
||||||
this->net_->Forward(); |
|
||||||
const float* outputNetData=net_->output_blobs()[0]->cpu_data(); |
|
||||||
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
this->outputGeometry_.height = net_->output_blobs()[0]->height(); |
|
||||||
this->outputGeometry_.width = net_->output_blobs()[0]->width(); |
|
||||||
this->outputChannelCount_ = net_->output_blobs()[0]->channels(); |
|
||||||
int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; |
|
||||||
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); |
|
||||||
float*outputMatData=(float*)(outputMat.data); |
|
||||||
|
|
||||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
Ptr<caffe::Net<float> > net_; |
|
||||||
#endif |
|
||||||
//Size inputGeometry_;
|
|
||||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
|
||||||
//int outputSize_;
|
|
||||||
public: |
|
||||||
DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn): |
|
||||||
minibatchSz_(dn.minibatchSz_){ |
|
||||||
outputGeometry_=dn.outputGeometry_; |
|
||||||
inputGeometry_=dn.inputGeometry_; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
} |
|
||||||
DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn) |
|
||||||
{ |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
this->setPreprocessor(dn.preprocessor_); |
|
||||||
this->inputGeometry_=dn.inputGeometry_; |
|
||||||
this->inputChannelCount_=dn.inputChannelCount_; |
|
||||||
this->outputChannelCount_ = dn.outputChannelCount_; |
|
||||||
// this->minibatchSz_=dn.minibatchSz_;
|
|
||||||
//this->outputGeometry_=dn.outputSize_;
|
|
||||||
this->preprocessor_=dn.preprocessor_; |
|
||||||
this->outputGeometry_=dn.outputGeometry_; |
|
||||||
return *this; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
} |
|
||||||
|
|
||||||
DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz) |
|
||||||
:minibatchSz_(maxMinibatchSz) |
|
||||||
{ |
|
||||||
|
|
||||||
CV_Assert(this->minibatchSz_>0); |
|
||||||
CV_Assert(fileExists(modelArchFilename)); |
|
||||||
CV_Assert(fileExists(modelWeightsFilename)); |
|
||||||
CV_Assert(!preprocessor.empty()); |
|
||||||
this->setPreprocessor(preprocessor); |
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST)); |
|
||||||
CV_Assert(net_->num_inputs()==1); |
|
||||||
CV_Assert(net_->num_outputs()==1); |
|
||||||
CV_Assert(this->net_->input_blobs()[0]->channels()==1 |
|
||||||
||this->net_->input_blobs()[0]->channels()==3); |
|
||||||
// this->channelCount_=this->net_->input_blobs()[0]->channels();
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
this->net_->CopyTrainedLayersFrom(modelWeightsFilename); |
|
||||||
|
|
||||||
caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0]; |
|
||||||
|
|
||||||
this->inputGeometry_.height = inputLayer->height(); |
|
||||||
this->inputGeometry_.width = inputLayer->width(); |
|
||||||
this->inputChannelCount_ = inputLayer->channels(); |
|
||||||
//this->inputGeometry_.batchSize =1;
|
|
||||||
|
|
||||||
inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width); |
|
||||||
net_->Reshape(); |
|
||||||
this->outputChannelCount_ = net_->output_blobs()[0]->channels(); |
|
||||||
//this->outputGeometry_.batchSize =1;
|
|
||||||
this->outputGeometry_.height =net_->output_blobs()[0]->height(); |
|
||||||
this->outputGeometry_.width = net_->output_blobs()[0]->width(); |
|
||||||
|
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"Caffe not available during compilation!"); |
|
||||||
#endif |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
void detect(InputArray image, OutputArray Bbox_prob) |
|
||||||
{ |
|
||||||
Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); |
|
||||||
Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
|
|
||||||
Mat outputMat = Bbox_prob.getMat(); |
|
||||||
process_(image.getMat(),outputMat); |
|
||||||
//copy back to outputArray
|
|
||||||
outputMat.copyTo(Bbox_prob); |
|
||||||
} |
|
||||||
|
|
||||||
Size getOutputGeometry() |
|
||||||
{ |
|
||||||
return this->outputGeometry_; |
|
||||||
} |
|
||||||
Size getinputGeometry() |
|
||||||
{ |
|
||||||
return this->inputGeometry_; |
|
||||||
} |
|
||||||
|
|
||||||
int getMinibatchSize() |
|
||||||
{ |
|
||||||
return this->minibatchSz_; |
|
||||||
} |
|
||||||
|
|
||||||
int getBackend() |
|
||||||
{ |
|
||||||
return OCR_HOLISTIC_BACKEND_CAFFE; |
|
||||||
} |
|
||||||
void setPreprocessor(Ptr<ImagePreprocessor> ptr) |
|
||||||
{ |
|
||||||
CV_Assert(!ptr.empty()); |
|
||||||
preprocessor_=ptr; |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<ImagePreprocessor> getPreprocessor() |
|
||||||
{ |
|
||||||
return preprocessor_; |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
|
|
||||||
class DeepCNNTextDetectorDNNImpl: public DeepCNNTextDetector{ |
class TextDetectorCNNImpl : public TextDetectorCNN |
||||||
|
{ |
||||||
protected: |
protected: |
||||||
|
Net net_; |
||||||
|
std::vector<Size> sizes_; |
||||||
|
int inputChannelCount_; |
||||||
|
bool detectMultiscale_; |
||||||
|
|
||||||
|
|
||||||
void process_(Mat inputImage, Mat &outputMat) |
void getOutputs(const float* buffer,int nbrTextBoxes,int nCol, |
||||||
|
std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape) |
||||||
{ |
{ |
||||||
// do forward pass and stores the output in outputMat
|
for(int k = 0; k < nbrTextBoxes; k++) |
||||||
CV_Assert(outputMat.isContinuous()); |
{ |
||||||
if (inputImage.channels() != this->inputChannelCount_) |
float x_min = buffer[k*nCol + 3]*inputShape.width; |
||||||
CV_WARN("Number of input channel(s) in the model is not same as input"); |
float y_min = buffer[k*nCol + 4]*inputShape.height; |
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
|
|
||||||
Mat preprocessed; |
|
||||||
this->preprocess(inputImage,preprocessed); |
|
||||||
|
|
||||||
net_->setInput(blobFromImage(preprocessed,1, this->inputGeometry_), "data"); |
|
||||||
|
|
||||||
Mat outputNet = this->net_->forward( ); |
|
||||||
|
|
||||||
this->outputGeometry_.height = outputNet.size[2]; |
|
||||||
this->outputGeometry_.width = outputNet.size[3]; |
|
||||||
this->outputChannelCount_ = outputNet.size[1]; |
|
||||||
|
|
||||||
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1); |
float x_max = buffer[k*nCol + 5]*inputShape.width; |
||||||
float*outputMatData=(float*)(outputMat.data); |
float y_max = buffer[k*nCol + 6]*inputShape.height; |
||||||
float*outputNetData=(float*)(outputNet.data); |
|
||||||
int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width; |
|
||||||
|
|
||||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz); |
CV_Assert(x_min < x_max, y_min < y_max); |
||||||
|
|
||||||
|
x_min = std::max(0.f, x_min); |
||||||
|
y_min = std::max(0.f, y_min); |
||||||
|
|
||||||
|
x_max = std::min(inputShape.width - 1.f, x_max); |
||||||
|
y_max = std::min(inputShape.height - 1.f, y_max); |
||||||
|
|
||||||
|
int wd = cvRound(x_max - x_min); |
||||||
|
int ht = cvRound(y_max - y_min); |
||||||
|
|
||||||
#endif |
Bbox.push_back(Rect(cvRound(x_min), cvRound(y_min), wd, ht)); |
||||||
|
confidence.push_back(buffer[k*nCol + 2]); |
||||||
|
} |
||||||
} |
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
Ptr<Net> net_; |
|
||||||
#endif |
|
||||||
//Size inputGeometry_;
|
|
||||||
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
|
||||||
//int outputSize_;
|
|
||||||
//int inputHeight_;
|
|
||||||
//int inputWidth_;
|
|
||||||
//int inputChannel_;
|
|
||||||
public: |
public: |
||||||
DeepCNNTextDetectorDNNImpl(const DeepCNNTextDetectorDNNImpl& dn): |
TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) : |
||||||
minibatchSz_(dn.minibatchSz_){ |
detectMultiscale_(detectMultiscale) |
||||||
outputGeometry_=dn.outputGeometry_; |
|
||||||
inputGeometry_=dn.inputGeometry_; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
#ifdef HAVE_DNN |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
} |
|
||||||
DeepCNNTextDetectorDNNImpl& operator=(const DeepCNNTextDetectorDNNImpl &dn) |
|
||||||
{ |
|
||||||
#ifdef HAVE_DNN |
|
||||||
this->net_=dn.net_; |
|
||||||
#endif |
|
||||||
this->setPreprocessor(dn.preprocessor_); |
|
||||||
this->inputGeometry_=dn.inputGeometry_; |
|
||||||
this->inputChannelCount_=dn.inputChannelCount_; |
|
||||||
this->outputChannelCount_ = dn.outputChannelCount_; |
|
||||||
// this->minibatchSz_=dn.minibatchSz_;
|
|
||||||
//this->outputGeometry_=dn.outputSize_;
|
|
||||||
this->preprocessor_=dn.preprocessor_; |
|
||||||
this->outputGeometry_=dn.outputGeometry_; |
|
||||||
return *this; |
|
||||||
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
|
||||||
} |
|
||||||
|
|
||||||
DeepCNNTextDetectorDNNImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz,int inputHeight=700,int inputWidth =700,int inputChannel =3) |
|
||||||
:minibatchSz_(maxMinibatchSz) |
|
||||||
{ |
{ |
||||||
|
net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename); |
||||||
|
CV_Assert(!net_.empty()); |
||||||
|
inputChannelCount_ = 3; |
||||||
|
sizes_.push_back(Size(700, 700)); |
||||||
|
|
||||||
CV_Assert(this->minibatchSz_>0); |
if(detectMultiscale_) |
||||||
CV_Assert(fileExists(modelArchFilename)); |
|
||||||
CV_Assert(fileExists(modelWeightsFilename)); |
|
||||||
CV_Assert(!preprocessor.empty()); |
|
||||||
this->setPreprocessor(preprocessor); |
|
||||||
#ifdef HAVE_DNN |
|
||||||
this->net_ = makePtr<Net>(readNetFromCaffe(modelArchFilename,modelWeightsFilename)); |
|
||||||
|
|
||||||
if (this->net_.empty()) |
|
||||||
{ |
{ |
||||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
sizes_.push_back(Size(300, 300)); |
||||||
std::cerr << "prototxt: " << modelArchFilename << std::endl; |
sizes_.push_back(Size(700,500)); |
||||||
std::cerr << "caffemodel: " << modelWeightsFilename << std::endl; |
sizes_.push_back(Size(700,300)); |
||||||
//std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
|
sizes_.push_back(Size(1600,1600)); |
||||||
//std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
|
|
||||||
exit(-1); |
|
||||||
} |
} |
||||||
|
|
||||||
this->inputGeometry_.height =inputHeight; |
|
||||||
this->inputGeometry_.width = inputWidth ;//inputLayer->width();
|
|
||||||
this->inputChannelCount_ = inputChannel ;//inputLayer->channels();
|
|
||||||
|
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"DNN module not available during compilation!"); |
|
||||||
#endif |
|
||||||
} |
} |
||||||
|
|
||||||
|
void textDetectInImage(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence) |
||||||
void detect(InputArray image, OutputArray Bbox_prob) |
|
||||||
{ |
{ |
||||||
Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width); |
CV_Assert(inputImage_.channels() == inputChannelCount_); |
||||||
Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
|
Mat inputImage = inputImage_.getMat().clone(); |
||||||
Mat outputMat = Bbox_prob.getMat(); |
Bbox.resize(0); |
||||||
|
confidence.resize(0); |
||||||
|
|
||||||
process_(image.getMat(),outputMat); |
for(size_t i = 0; i < sizes_.size(); i++) |
||||||
//copy back to outputArray
|
{ |
||||||
outputMat.copyTo(Bbox_prob); |
Size inputGeometry = sizes_[i]; |
||||||
} |
net_.setInput(blobFromImage(inputImage, 1, inputGeometry, Scalar(123, 117, 104)), "data"); |
||||||
|
Mat outputNet = net_.forward(); |
||||||
Size getOutputGeometry() |
int nbrTextBoxes = outputNet.size[2]; |
||||||
{ |
int nCol = outputNet.size[3]; |
||||||
return this->outputGeometry_; |
int outputChannelCount = outputNet.size[1]; |
||||||
} |
CV_Assert(outputChannelCount == 1); |
||||||
Size getinputGeometry() |
getOutputs((float*)(outputNet.data), nbrTextBoxes, nCol, Bbox, confidence, inputImage.size()); |
||||||
{ |
} |
||||||
return this->inputGeometry_; |
} |
||||||
} |
|
||||||
|
|
||||||
int getMinibatchSize() |
|
||||||
{ |
|
||||||
return this->minibatchSz_; |
|
||||||
} |
|
||||||
|
|
||||||
int getBackend() |
|
||||||
{ |
|
||||||
return OCR_HOLISTIC_BACKEND_DNN; |
|
||||||
} |
|
||||||
void setPreprocessor(Ptr<ImagePreprocessor> ptr) |
|
||||||
{ |
|
||||||
CV_Assert(!ptr.empty()); |
|
||||||
preprocessor_=ptr; |
|
||||||
} |
|
||||||
|
|
||||||
Ptr<ImagePreprocessor> getPreprocessor() |
|
||||||
{ |
|
||||||
return preprocessor_; |
|
||||||
} |
|
||||||
}; |
}; |
||||||
|
|
||||||
Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd) |
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale) |
||||||
{ |
{ |
||||||
if(preprocessor.empty()) |
return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale); |
||||||
{ |
|
||||||
// create a custom preprocessor with rawval
|
|
||||||
preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); |
|
||||||
// set the mean for the preprocessor
|
|
||||||
|
|
||||||
Mat textbox_mean(1,3,CV_8U); |
|
||||||
textbox_mean.at<uchar>(0,0)=104; |
|
||||||
textbox_mean.at<uchar>(0,1)=117; |
|
||||||
textbox_mean.at<uchar>(0,2)=123; |
|
||||||
preprocessor->set_mean(textbox_mean); |
|
||||||
} |
|
||||||
switch(backEnd){ |
|
||||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
|
||||||
|
|
||||||
#elif defined(HAVE_DNN) |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3)); |
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
|
||||||
return Ptr<DeepCNNTextDetector>(); |
|
||||||
#endif |
|
||||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
|
||||||
|
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz)); |
|
||||||
break; |
|
||||||
|
|
||||||
case OCR_HOLISTIC_BACKEND_DNN: |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, minibatchSz,700,700,3)); |
|
||||||
break; |
|
||||||
|
|
||||||
case OCR_HOLISTIC_BACKEND_NONE: |
|
||||||
default: |
|
||||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
|
||||||
return Ptr<DeepCNNTextDetector>(); |
|
||||||
break; |
|
||||||
} |
|
||||||
//return Ptr<DeepCNNTextDetector>();
|
|
||||||
|
|
||||||
} |
} |
||||||
|
} //namespace text
|
||||||
|
} //namespace cv
|
||||||
Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd) |
|
||||||
{ |
|
||||||
|
|
||||||
// create a custom preprocessor with rawval
|
|
||||||
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255); |
|
||||||
// set the mean for the preprocessor
|
|
||||||
|
|
||||||
Mat textbox_mean(1,3,CV_8U); |
|
||||||
textbox_mean.at<uchar>(0,0)=104; |
|
||||||
textbox_mean.at<uchar>(0,1)=117; |
|
||||||
textbox_mean.at<uchar>(0,2)=123; |
|
||||||
preprocessor->set_mean(textbox_mean); |
|
||||||
switch(backEnd){ |
|
||||||
case OCR_HOLISTIC_BACKEND_DEFAULT: |
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); |
|
||||||
|
|
||||||
#elif defined(HAVE_DNN) |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3)); |
|
||||||
#else |
|
||||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
|
||||||
return Ptr<DeepCNNTextDetector>(); |
|
||||||
#endif |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_CAFFE: |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 1)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_DNN: |
|
||||||
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorDNNImpl(archFilename, weightsFilename,preprocessor, 1,700,700,3)); |
|
||||||
break; |
|
||||||
case OCR_HOLISTIC_BACKEND_NONE: |
|
||||||
default: |
|
||||||
CV_Error(Error::StsError,"DeepCNNTextDetector::create backend not implemented"); |
|
||||||
return Ptr<DeepCNNTextDetector>(); |
|
||||||
break; |
|
||||||
} |
|
||||||
//return Ptr<DeepCNNTextDetector>();
|
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output) |
|
||||||
{ |
|
||||||
Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width); |
|
||||||
this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} } //namespace text namespace cv
|
|
||||||
|
@ -1,4 +1,7 @@ |
|||||||
#ifndef __OPENCV_TEXT_CONFIG_HPP__ |
#ifndef __OPENCV_TEXT_CONFIG_HPP__ |
||||||
#define __OPENCV_TEXT_CONFIG_HPP__ |
#define __OPENCV_TEXT_CONFIG_HPP__ |
||||||
|
|
||||||
|
// HAVE OCR Tesseract
|
||||||
|
#cmakedefine HAVE_TESSERACT |
||||||
|
|
||||||
#endif |
#endif |
||||||
|
Loading…
Reference in new issue