Merge branch 'master' of https://github.com/Itseez/opencv_contrib
After Width: | Height: | Size: 155 KiB |
After Width: | Height: | Size: 1.1 KiB |
After Width: | Height: | Size: 124 KiB |
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 89 KiB |
After Width: | Height: | Size: 1.0 KiB |
After Width: | Height: | Size: 101 KiB |
After Width: | Height: | Size: 528 B |
After Width: | Height: | Size: 57 KiB |
After Width: | Height: | Size: 682 B |
@ -0,0 +1,116 @@ |
||||
/*
|
||||
* segmented_word_recognition.cpp |
||||
* |
||||
* A demo program on segmented word recognition. |
||||
* Shows the use of the OCRHMMDecoder API with the two provided default character classifiers. |
||||
* |
||||
* Created on: Jul 31, 2015 |
||||
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es> |
||||
*/ |
||||
|
||||
#include "opencv2/text.hpp" |
||||
#include "opencv2/core/utility.hpp" |
||||
#include "opencv2/highgui.hpp" |
||||
#include "opencv2/imgproc.hpp" |
||||
|
||||
#include <iostream> |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace text; |
||||
|
||||
|
||||
int main(int argc, char* argv[]) { |
||||
|
||||
const String keys = |
||||
"{help h usage ? | | print this message.}" |
||||
"{@image | | source image for recognition.}" |
||||
"{@mask | | binary segmentation mask where each contour is a character.}" |
||||
"{lexicon lex l | | (optional) lexicon provided as a list of comma separated words.}" |
||||
; |
||||
CommandLineParser parser(argc, argv, keys); |
||||
|
||||
parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n"); |
||||
|
||||
String filename1 = parser.get<String>(0); |
||||
String filename2 = parser.get<String>(1); |
||||
|
||||
parser.printMessage(); |
||||
cout << endl << endl; |
||||
if ((parser.has("help")) || (filename1.size()==0)) |
||||
{ |
||||
return 0; |
||||
} |
||||
if (!parser.check()) |
||||
{ |
||||
parser.printErrors(); |
||||
return 0; |
||||
} |
||||
|
||||
Mat image = imread(filename1); |
||||
Mat mask; |
||||
if (filename2.size() > 0) |
||||
mask = imread(filename2); |
||||
else |
||||
image.copyTo(mask); |
||||
|
||||
// be sure the mask is a binry image
|
||||
cvtColor(mask, mask, COLOR_BGR2GRAY); |
||||
threshold(mask, mask, 128., 255, THRESH_BINARY); |
||||
|
||||
// character recognition vocabulary
|
||||
string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; |
||||
// Emission probabilities for the HMM language model (identity matrix by default)
|
||||
Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1); |
||||
// Bigram transition probabilities for the HMM language model
|
||||
Mat transitionProbabilities; |
||||
|
||||
string lex = parser.get<string>("lex"); |
||||
if (lex.size()>0) |
||||
{ |
||||
// Build tailored language model for the provided lexicon
|
||||
vector<string> lexicon; |
||||
size_t pos = 0; |
||||
string delimiter = ","; |
||||
std::string token; |
||||
while ((pos = lex.find(delimiter)) != std::string::npos) { |
||||
token = lex.substr(0, pos); |
||||
lexicon.push_back(token); |
||||
lex.erase(0, pos + delimiter.length()); |
||||
} |
||||
lexicon.push_back(lex); |
||||
createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities); |
||||
} else { |
||||
// Or load the generic language model (from Aspell English dictionary)
|
||||
FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ); |
||||
fs["transition_probabilities"] >> transitionProbabilities; |
||||
fs.release(); |
||||
} |
||||
|
||||
Ptr<OCRTesseract> ocrTes = OCRTesseract::create(); |
||||
|
||||
Ptr<OCRHMMDecoder> ocrNM = OCRHMMDecoder::create( |
||||
loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"), |
||||
voc, transitionProbabilities, emissionProbabilities); |
||||
|
||||
Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create( |
||||
loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"), |
||||
voc, transitionProbabilities, emissionProbabilities); |
||||
|
||||
std::string output; |
||||
double t_r = (double)getTickCount(); |
||||
ocrTes->run(mask, output); |
||||
output.erase(remove(output.begin(), output.end(), '\n'), output.end()); |
||||
cout << " OCR_Tesseract output \"" << output << "\". Done in " |
||||
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||
|
||||
t_r = (double)getTickCount(); |
||||
ocrNM->run(mask, output); |
||||
cout << " OCR_NM output \"" << output << "\". Done in " |
||||
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||
|
||||
t_r = (double)getTickCount(); |
||||
ocrCNN->run(image, mask, output); |
||||
cout << " OCR_CNN output \"" << output << "\". Done in " |
||||
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl; |
||||
} |
@ -0,0 +1,76 @@ |
||||
Disparity map post-filtering {#tutorial_ximgproc_disparity_filtering} |
||||
============================ |
||||
|
||||
Introduction |
||||
------------ |
||||
|
||||
Stereo matching algorithms, especially highly-optimized ones that are intended for real-time processing |
||||
on CPU, tend to make quite a few errors on challenging sequences. These errors are usually concentrated |
||||
in uniform texture-less areas, half-occlusions and regions near depth discontinuities. One way of dealing |
||||
with stereo-matching errors is to use various techniques of detecting potentially inaccurate disparity |
||||
values and invalidate them, therefore making the disparity map semi-sparse. Several such techniques are |
||||
already implemented in the StereoBM and StereoSGBM algorithms. Another way would be to use some kind of |
||||
filtering procedure to align the disparity map edges with those of the source image and to propagate |
||||
the disparity values from high- to low-confidence regions like half-occlusions. Recent advances in |
||||
edge-aware filtering have enabled performing such post-filtering under the constraints of real-time |
||||
processing on CPU. |
||||
|
||||
In this tutorial you will learn how to use the disparity map post-filtering to improve the results |
||||
of StereoBM and StereoSGBM algorithms. |
||||
|
||||
Source Stereoscopic Image |
||||
------------------------- |
||||
|
||||
 |
||||
 |
||||
|
||||
Source Code |
||||
----------- |
||||
|
||||
We will be using snippets from the example application, that can be downloaded [here ](https://github.com/Itseez/opencv_contrib/blob/master/modules/ximgproc/samples/disparity_filtering.cpp). |
||||
|
||||
Explanation |
||||
----------- |
||||
|
||||
The provided example has several options that yield different trade-offs between the speed and |
||||
the quality of the resulting disparity map. Both the speed and the quality are measured if the user |
||||
has provided the ground-truth disparity map. In this tutorial we will take a detailed look at the |
||||
default pipeline, that was designed to provide the best possible quality under the constraints of |
||||
real-time processing on CPU. |
||||
|
||||
-# **Load left and right views** |
||||
@snippet ximgproc/samples/disparity_filtering.cpp load_views |
||||
We start by loading the source stereopair. For this tutorial we will take a somewhat challenging |
||||
example from the MPI-Sintel dataset with a lot of texture-less regions. |
||||
|
||||
-# **Prepare the views for matching** |
||||
@snippet ximgproc/samples/disparity_filtering.cpp downscale |
||||
We perform downscaling of the views to speed-up the matching stage at the cost of minor |
||||
quality degradation. To get the best possible quality downscaling should be avoided. |
||||
|
||||
-# **Perform matching and create the filter instance** |
||||
@snippet ximgproc/samples/disparity_filtering.cpp matching |
||||
We are using StereoBM for faster processing. If speed is not critical, though, |
||||
StereoSGBM would provide better quality. The filter instance is created by providing |
||||
the StereoMatcher instance that we intend to use. Another matcher instance is |
||||
returned by the createRightMatcher function. These two matcher instances are then |
||||
used to compute disparity maps both for the left and right views, that are required |
||||
by the filter. |
||||
|
||||
-# **Perform filtering** |
||||
@snippet ximgproc/samples/disparity_filtering.cpp filtering |
||||
Disparity maps computed by the respective matcher instances, as well as the source left view |
||||
are passed to the filter. Note that we are using the original non-downscaled view to guide the |
||||
filtering process. The disparity map is automatically upscaled in an edge-aware fashion to match |
||||
the original view resolution. The result is stored in filtered_disp. |
||||
|
||||
-# **Visualize the disparity maps** |
||||
@snippet ximgproc/samples/disparity_filtering.cpp visualization |
||||
We use a convenience function getDisparityVis to visualize the disparity maps. The second parameter |
||||
defines the contrast (all disparity values are scaled by this value in the visualization). |
||||
|
||||
Results |
||||
------- |
||||
|
||||
 |
||||
 |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 60 KiB |
After Width: | Height: | Size: 58 KiB |