Merge branch 'master' of https://github.com/Itseez/opencv_contrib
After Width: | Height: | Size: 155 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 124 KiB |
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 89 KiB |
After Width: | Height: | Size: 1.0 KiB |
After Width: | Height: | Size: 101 KiB |
After Width: | Height: | Size: 528 B |
After Width: | Height: | Size: 57 KiB |
After Width: | Height: | Size: 682 B |
@ -0,0 +1,116 @@ |
|||||||
|
/*
|
||||||
|
* segmented_word_recognition.cpp |
||||||
|
* |
||||||
|
* A demo program on segmented word recognition. |
||||||
|
* Shows the use of the OCRHMMDecoder API with the two provided default character classifiers. |
||||||
|
* |
||||||
|
* Created on: Jul 31, 2015 |
||||||
|
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es> |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "opencv2/text.hpp" |
||||||
|
#include "opencv2/core/utility.hpp" |
||||||
|
#include "opencv2/highgui.hpp" |
||||||
|
#include "opencv2/imgproc.hpp" |
||||||
|
|
||||||
|
#include <iostream> |
||||||
|
|
||||||
|
using namespace std; |
||||||
|
using namespace cv; |
||||||
|
using namespace text; |
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) { |
||||||
|
|
||||||
|
const String keys = |
||||||
|
"{help h usage ? | | print this message.}" |
||||||
|
"{@image | | source image for recognition.}" |
||||||
|
"{@mask | | binary segmentation mask where each contour is a character.}" |
||||||
|
"{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}" |
||||||
|
; |
||||||
|
CommandLineParser parser(argc, argv, keys); |
||||||
|
|
||||||
|
parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n"); |
||||||
|
|
||||||
|
String filename1 = parser.get<String>(0); |
||||||
|
String filename2 = parser.get<String>(1); |
||||||
|
|
||||||
|
parser.printMessage(); |
||||||
|
cout << endl << endl; |
||||||
|
if ((parser.has("help")) || (filename1.size()==0)) |
||||||
|
{ |
||||||
|
return 0; |
||||||
|
} |
||||||
|
if (!parser.check()) |
||||||
|
{ |
||||||
|
parser.printErrors(); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
Mat image = imread(filename1); |
||||||
|
Mat mask; |
||||||
|
if (filename2.size() > 0) |
||||||
|
mask = imread(filename2); |
||||||
|
else |
||||||
|
image.copyTo(mask); |
||||||
|
|
||||||
|
// be sure the mask is a binry image
|
||||||
|
cvtColor(mask, mask, COLOR_BGR2GRAY); |
||||||
|
threshold(mask, mask, 128., 255, THRESH_BINARY); |
||||||
|
|
||||||
|
// character recognition vocabulary
|
||||||
|
string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; |
||||||
|
// Emission probabilities for the HMM language model (identity matrix by default)
|
||||||
|
Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1); |
||||||
|
// Bigram transition probabilities for the HMM language model
|
||||||
|
Mat transitionProbabilities; |
||||||
|
|
||||||
|
string lex = parser.get<string>("lex"); |
||||||
|
if (lex.size()>0) |
||||||
|
{ |
||||||
|
// Build tailored language model for the provided lexicon
|
||||||
|
vector<string> lexicon; |
||||||
|
size_t pos = 0; |
||||||
|
string delimiter = ","; |
||||||
|
std::string token; |
||||||
|
while ((pos = lex.find(delimiter)) != std::string::npos) { |
||||||
|
token = lex.substr(0, pos); |
||||||
|
lexicon.push_back(token); |
||||||
|
lex.erase(0, pos + delimiter.length()); |
||||||
|
} |
||||||
|
lexicon.push_back(lex); |
||||||
|
createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities); |
||||||
|
} else { |
||||||
|
// Or load the generic language model (from Aspell English dictionary)
|
||||||
|
FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ); |
||||||
|
fs["transition_probabilities"] >> transitionProbabilities; |
||||||
|
fs.release(); |
||||||
|
} |
||||||
|
|
||||||
|
Ptr<OCRTesseract> ocrTes = OCRTesseract::create(); |
||||||
|
|
||||||
|
Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create( |
||||||
|
loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"), |
||||||
|
voc, transitionProbabilities, emissionProbabilities); |
||||||
|
|
||||||
|
Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create( |
||||||
|
loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"), |
||||||
|
voc, transitionProbabilities, emissionProbabilities); |
||||||
|
|
||||||
|
std::string output; |
||||||
|
double t_r = (double)getTickCount(); |
||||||
|
ocrTes->run(mask, output); |
||||||
|
output.erase(remove(output.begin(), output.end(), '\n'), output.end()); |
||||||
|
cout << " OCR_Tesseract output \"" << output << "\". Done in " |
||||||
|
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||||
|
|
||||||
|
t_r = (double)getTickCount(); |
||||||
|
ocrNM->run(mask, output); |
||||||
|
cout << " OCR_NM output \"" << output << "\". Done in " |
||||||
|
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||||
|
|
||||||
|
t_r = (double)getTickCount(); |
||||||
|
ocrCNN->run(image, mask, output); |
||||||
|
cout << " OCR_CNN output \"" << output << "\". Done in " |
||||||
|
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||||
|
} |
@ -0,0 +1,76 @@ |
|||||||
|
Disparity map post-filtering {#tutorial_ximgproc_disparity_filtering} |
||||||
|
============================ |
||||||
|
|
||||||
|
Introduction |
||||||
|
------------ |
||||||
|
|
||||||
|
Stereo matching algorithms, especially highly-optimized ones that are intended for real-time processing |
||||||
|
on CPU, tend to make quite a few errors on challenging sequences. These errors are usually concentrated |
||||||
|
in uniform texture-less areas, half-occlusions and regions near depth discontinuities. One way of dealing |
||||||
|
with stereo-matching errors is to use various techniques of detecting potentially inaccurate disparity |
||||||
|
values and invalidate them, therefore making the disparity map semi-sparse. Several such techniques are |
||||||
|
already implemented in the StereoBM and StereoSGBM algorithms. Another way would be to use some kind of |
||||||
|
filtering procedure to align the disparity map edges with those of the source image and to propagate |
||||||
|
the disparity values from high- to low-confidence regions like half-occlusions. Recent advances in |
||||||
|
edge-aware filtering have enabled performing such post-filtering under the constraints of real-time |
||||||
|
processing on CPU. |
||||||
|
|
||||||
|
In this tutorial you will learn how to use the disparity map post-filtering to improve the results |
||||||
|
of StereoBM and StereoSGBM algorithms. |
||||||
|
|
||||||
|
Source Stereoscopic Image |
||||||
|
------------------------- |
||||||
|
|
||||||
|
 |
||||||
|
 |
||||||
|
|
||||||
|
Source Code |
||||||
|
----------- |
||||||
|
|
||||||
|
We will be using snippets from the example application, that can be downloaded [here ](https://github.com/Itseez/opencv_contrib/blob/master/modules/ximgproc/samples/disparity_filtering.cpp). |
||||||
|
|
||||||
|
Explanation |
||||||
|
----------- |
||||||
|
|
||||||
|
The provided example has several options that yield different trade-offs between the speed and |
||||||
|
the quality of the resulting disparity map. Both the speed and the quality are measured if the user |
||||||
|
has provided the ground-truth disparity map. In this tutorial we will take a detailed look at the |
||||||
|
default pipeline, that was designed to provide the best possible quality under the constraints of |
||||||
|
real-time processing on CPU. |
||||||
|
|
||||||
|
-# **Load left and right views** |
||||||
|
@snippet ximgproc/samples/disparity_filtering.cpp load_views |
||||||
|
We start by loading the source stereopair. For this tutorial we will take a somewhat challenging |
||||||
|
example from the MPI-Sintel dataset with a lot of texture-less regions. |
||||||
|
|
||||||
|
-# **Prepare the views for matching** |
||||||
|
@snippet ximgproc/samples/disparity_filtering.cpp downscale |
||||||
|
We perform downscaling of the views to speed-up the matching stage at the cost of minor |
||||||
|
quality degradation. To get the best possible quality downscaling should be avoided. |
||||||
|
|
||||||
|
-# **Perform matching and create the filter instance** |
||||||
|
@snippet ximgproc/samples/disparity_filtering.cpp matching |
||||||
|
We are using StereoBM for faster processing. If speed is not critical, though, |
||||||
|
StereoSGBM would provide better quality. The filter instance is created by providing |
||||||
|
the StereoMatcher instance that we intend to use. Another matcher instance is |
||||||
|
returned by the createRightMatcher function. These two matcher instances are then |
||||||
|
used to compute disparity maps both for the left and right views, that are required |
||||||
|
by the filter. |
||||||
|
|
||||||
|
-# **Perform filtering** |
||||||
|
@snippet ximgproc/samples/disparity_filtering.cpp filtering |
||||||
|
Disparity maps computed by the respective matcher instances, as well as the source left view |
||||||
|
are passed to the filter. Note that we are using the original non-downscaled view to guide the |
||||||
|
filtering process. The disparity map is automatically upscaled in an edge-aware fashion to match |
||||||
|
the original view resolution. The result is stored in filtered_disp. |
||||||
|
|
||||||
|
-# **Visualize the disparity maps** |
||||||
|
@snippet ximgproc/samples/disparity_filtering.cpp visualization |
||||||
|
We use a convenience function getDisparityVis to visualize the disparity maps. The second parameter |
||||||
|
defines the contrast (all disparity values are scaled by this value in the visualization). |
||||||
|
|
||||||
|
Results |
||||||
|
------- |
||||||
|
|
||||||
|
 |
||||||
|
 |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 58 KiB |