Merge pull request #1520 from lluisgomez:scene_text_erGrouping_classifier

pull/1488/merge
Roman Donchenko 12 years ago committed by OpenCV Buildbot
commit 4ce0a8a822
  1. 6
      modules/objdetect/doc/erfilter.rst
  2. 7
      modules/objdetect/include/opencv2/objdetect/erfilter.hpp
  3. 348
      modules/objdetect/src/erfilter.cpp
  4. BIN
      samples/cpp/scenetext.jpg
  5. BIN
      samples/cpp/scenetext01.jpg
  6. BIN
      samples/cpp/scenetext02.jpg
  7. BIN
      samples/cpp/scenetext03.jpg
  8. BIN
      samples/cpp/scenetext04.jpg
  9. BIN
      samples/cpp/scenetext05.jpg
  10. BIN
      samples/cpp/scenetext06.jpg
  11. BIN
      samples/cpp/scenetext_GT.png
  12. 62
      samples/cpp/textdetection.cpp
  13. 20046
      samples/cpp/trained_classifier_erGrouping.xml

@ -198,12 +198,14 @@ erGrouping
----------
Find groups of Extremal Regions that are organized as text blocks.
.. ocv:function:: void erGrouping( InputArrayOfArrays src, std::vector<std::vector<ERStat> > &regions, std::vector<Rect> &groups )
.. ocv:function:: void erGrouping( InputArrayOfArrays src, std::vector<std::vector<ERStat> > &regions, const std::string& filename, float minProbablity, std::vector<Rect > &groups)
:param src: Vector of sinle channel images CV_8UC1 from wich the regions were extracted
:param regions: Vector of ER's retreived from the ERFilter algorithm from each channel
:param filename: The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml)
:param minProbability: The minimum probability for accepting a group
:param groups: The output of the algorithm are stored in this parameter as list of rectangles.
This function implements the grouping algorithm described in [Gomez13]. Notice that this implementation constrains the results to horizontally-aligned text and latin script (since ERFilter classifiers are trained only for latin script detection).
The algorithm combines two different clustering techniques in a single parameter-free procedure to detect groups of regions organized as text. The maximally meaningful groups are fist detected in several feature spaces, where each feature space is a combination of proximity information (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to combine all these hypotheses to get the final estimate. Each of the resulting groups are finally heuristically validated in order to assess if they form a valid horizontally-aligned text block.
The algorithm combines two different clustering techniques in a single parameter-free procedure to detect groups of regions organized as text. The maximally meaningful groups are fist detected in several feature spaces, where each feature space is a combination of proximity information (x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.), thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to combine all these hypotheses to get the final estimate. Each of the resulting groups are finally validated using a classifier in order to assess if they form a valid horizontally-aligned text block.

@ -250,14 +250,17 @@ CV_EXPORTS void computeNMChannels(InputArray _src, OutputArrayOfArrays _channels
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
heuristically validated in order to assest if they form a valid horizontally-aligned text block.
validated using a classifier in order to assest if they form a valid horizontally-aligned text block.
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
\param filename The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml)
\param minProbability The minimum probability for accepting a group
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
*/
CV_EXPORTS void erGrouping(InputArrayOfArrays src, std::vector<std::vector<ERStat> > &regions,
std::vector<Rect> &groups);
const std::string& filename, float minProbablity,
std::vector<Rect > &groups);
}
#endif // _OPENCV_ERFILTER_HPP_

@ -235,10 +235,7 @@ void ERFilterNM::er_tree_extract( InputArray image )
if (thresholdDelta > 1)
{
Mat tmp;
src.copyTo(tmp);
src.release();
src = (image.getMat() / thresholdDelta) -1;
src = (src / thresholdDelta) -1;
}
const unsigned char * image_data = src.data;
@ -2721,6 +2718,28 @@ float extract_features(InputOutputArray src, vector<ERStat> &regions, vector<ERF
return max_stroke;
}
static bool edge_comp (Vec4f i,Vec4f j)
{
Point a = Point(cvRound(i[0]), cvRound(i[1]));
Point b = Point(cvRound(i[2]), cvRound(i[3]));
double edist_i = cv::norm(a-b);
a = Point(cvRound(j[0]), cvRound(j[1]));
b = Point(cvRound(j[2]), cvRound(j[3]));
double edist_j = cv::norm(a-b);
return (edist_i<edist_j);
}
static bool find_vertex(vector<Point> &vertex, Point &p)
{
for (int i=0; i<(int)vertex.size(); i++)
{
if (vertex.at(i) == p)
return true;
}
return false;
}
/*!
Find groups of Extremal Regions that are organized as text blocks. This function implements
the grouping algorithm described in:
@ -2734,16 +2753,25 @@ float extract_features(InputOutputArray src, vector<ERStat> &regions, vector<ERF
(x,y coordinates) and a similarity measure (intensity, color, size, gradient magnitude, etc.),
thus providing a set of hypotheses of text groups. Evidence Accumulation framework is used to
combine all these hypotheses to get the final estimate. Each of the resulting groups are finally
heuristically validated in order to assest if they form a valid horizontally-aligned text block.
validated by a classifier in order to assest if they form a valid horizontally-aligned text block.
\param src Vector of sinle channel images CV_8UC1 from wich the regions were extracted.
\param regions Vector of ER's retreived from the ERFilter algorithm from each channel
\param filename The XML or YAML file with the classifier model (e.g. trained_classifier_erGrouping.xml)
\param minProbability The minimum probability for accepting a group
\param groups The output of the algorithm are stored in this parameter as list of rectangles.
*/
void erGrouping(InputArrayOfArrays _src, vector<vector<ERStat> > &regions, std::vector<Rect > &text_boxes)
void erGrouping(InputArrayOfArrays _src, vector<vector<ERStat> > &regions, const std::string& filename, float minProbability, std::vector<Rect > &text_boxes)
{
// TODO assert correct vector<Mat>
CvBoost group_boost;
if (ifstream(filename.c_str()))
group_boost.load( filename.c_str(), "boost" );
else
CV_Error(CV_StsBadArg, "erGrouping: Default classifier file not found!");
std::vector<Mat> src;
_src.getMatVector(src);
@ -2868,18 +2896,22 @@ void erGrouping(InputArrayOfArrays _src, vector<vector<ERStat> > &regions, std::
free(D);
/* --------------------------------- Groups Validation --------------------------------*/
/* Examine each of the clusters in order to assest if they are valid text lines or not */
/* ------------------------------------------------------------------------------------*/
// remove non-horizontally-aligned groups
vector<Rect> groups_rects;
vector<vector<float> > data_arrays(meaningful_clusters.size());
vector<Rect> groups_rects(meaningful_clusters.size());
// Collect group level features and classify the group
for (int i=(int)meaningful_clusters.size()-1; i>=0; i--)
{
Rect group_rect;
float sumx=0, sumy=0, sumxy=0, sumx2=0;
// linear regression slope helps discriminating horizontal aligned groups
for (int j=0; j<(int)meaningful_clusters.at(i).size();j++)
{
if (j==0)
@ -2907,105 +2939,69 @@ void erGrouping(InputArrayOfArrays _src, vector<vector<ERStat> > &regions, std::
float a1=((int)meaningful_clusters.at(i).size()*sumxy-sumx*sumy) /
((int)meaningful_clusters.at(i).size()*sumx2-sumx*sumx);
if (abs(a1) > 0.13)
meaningful_clusters.erase(meaningful_clusters.begin()+i);
vector<float> data;
if (a1 != a1)
data_arrays.at(i).push_back(1.f);
else
groups_rects.insert(groups_rects.begin(), group_rect);
}
data_arrays.at(i).push_back(a1);
//TODO if group has less than 5 chars we can infer that is a single line so an additional rule can
// be added here in order to reject non-colinear small groups
groups_rects.at(i) = group_rect;
// group probability mean
double group_probability_mean = 0;
// number of non-overlapping regions
vector<Rect> individual_components;
/* TODO this is better code for detecting non-horizontally-aligned groups but only works for
* single lines so we need here a way to split the rejected groups into several line hypothesis
* and recursively apply the text line validation test until no more splits can be done
vector<Rect> groups_rects;
for (int i=meaningful_clusters.size()-1; i>=0; i--)
{
Rect group_rect;
// The variance of several similarity features is also helpful
vector<float> strokes;
vector<float> grad_magnitudes;
vector<float> intensities;
vector<float> bg_intensities;
for (int j=0; j<(int)meaningful_clusters.at(i).size();j++)
{
if (j==0)
{
group_rect = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect;
} else {
group_rect = group_rect | regions.at(c).at(meaningful_clusters.at(i).at(j)).rect;
}
}
float group_y_center = 0;
for (int j=0; j<(int)meaningful_clusters.at(i).size();j++)
{
int region_y_center = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.y +
regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.height/2;
group_y_center += region_y_center;
}
group_y_center = group_y_center / (int)meaningful_clusters.at(i).size();
// We'll try to remove groups with repetitive patterns using averaged SAD
// SAD = Sum of Absolute Differences
Mat grey = img;
Mat sad = Mat::zeros(regions.at(c).at(meaningful_clusters.at(i).at(0)).rect.size() , CV_8UC1);
Mat region_mask = Mat::zeros(grey.rows+2, grey.cols+2, CV_8UC1);
float sad_value = 0;
Mat ratios = Mat::zeros(1, (int)meaningful_clusters.at(i).size(), CV_32FC1);
//Mat holes = Mat::zeros(1, (int)meaningful_clusters.at(i).size(), CV_32FC1);
float err = 0;
for (int j=0; j<(int)meaningful_clusters.at(i).size();j++)
{
int region_y_center = regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.y +
regions.at(c).at(meaningful_clusters.at(i).at(j)).rect.height/2;
err += pow(group_y_center-region_y_center, 2);
}
err = sqrt(err / (int)meaningful_clusters.at(i).size());
err = err / group_rect.height;
if (err > 0.17)
meaningful_clusters.erase(meaningful_clusters.begin()+i);
else
groups_rects.insert(groups_rects.begin(), group_rect);
ERStat *stat = &regions.at(c).at(meaningful_clusters.at(i).at(j));
} */
// check for colinear groups that can be merged
for (int i=0; i<(int)meaningful_clusters.size(); i++)
{
int ay1 = groups_rects.at(i).y;
int ay2 = groups_rects.at(i).y + groups_rects.at(i).height;
int ax1 = groups_rects.at(i).x;
int ax2 = groups_rects.at(i).x + groups_rects.at(i).width;
for (int j=(int)meaningful_clusters.size()-1; j>i; j--)
{
int by1 = groups_rects.at(j).y;
int by2 = groups_rects.at(j).y + groups_rects.at(j).height;
int bx1 = groups_rects.at(j).x;
int bx2 = groups_rects.at(j).x + groups_rects.at(j).width;
//Fill the region
Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y),
Point(stat->rect.br().x+2,stat->rect.br().y+2)));
region = Scalar(0);
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
Rect rect;
int y_intersection = min(ay2,by2) - max(ay1,by1);
floodFill( grey(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))),
region, Point(stat->pixel%grey.cols - stat->rect.x, stat->pixel/grey.cols - stat->rect.y),
Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags );
if (y_intersection > 0.75*(max(groups_rects.at(i).height,groups_rects.at(j).height)))
Mat mask = Mat::zeros(regions.at(c).at(meaningful_clusters.at(i).at(0)).rect.size() , CV_8UC1);
resize(region, mask, mask.size());
mask = mask - 254;
if (j!=0)
{
int xdist = min(abs(ax2-bx1),abs(bx2-ax1));
if (xdist < 0.75*(max(groups_rects.at(i).height,groups_rects.at(j).height)))
{
for (int r=0; r<(int)meaningful_clusters.at(j).size(); r++)
meaningful_clusters.at(i).push_back(meaningful_clusters.at(j).at(r));
meaningful_clusters.erase(meaningful_clusters.begin()+j);
groups_rects.erase(groups_rects.begin()+j);
}
// accumulate Sum of Absolute Differences
absdiff(sad, mask, sad);
Scalar s = sum(sad);
sad_value += (float)s[0]/(sad.rows*sad.cols);
}
mask.copyTo(sad);
ratios.at<float>(0,j) = (float)min(stat->rect.width, stat->rect.height) /
max(stat->rect.width, stat->rect.height);
//holes.at<float>(0,j) = (float)stat->hole_area_ratio;
}
}
// remove groups with less than 3 non-overlapping regions
for (int i=(int)meaningful_clusters.size()-1; i>=0; i--)
{
double group_probability_mean = 0;
Rect group_rect;
vector<Rect> individual_components;
for (int j=0; j<(int)meaningful_clusters.at(i).size();j++)
{
strokes.push_back((float)features.at(meaningful_clusters.at(i).at(j)).stroke_mean);
grad_magnitudes.push_back((float)features.at(meaningful_clusters.at(i).at(j)).gradient_mean);
intensities.push_back(features.at(meaningful_clusters.at(i).at(j)).intensity_mean);
bg_intensities.push_back(features.at(meaningful_clusters.at(i).at(j)).boundary_intensity_mean);
group_probability_mean += regions.at(c).at(meaningful_clusters.at(i).at(j)).probability;
if (j==0)
@ -3036,112 +3032,108 @@ void erGrouping(InputArrayOfArrays _src, vector<vector<ERStat> > &regions, std::
}
group_probability_mean = group_probability_mean / meaningful_clusters.at(i).size();
data_arrays.at(i).insert(data_arrays.at(i).begin(),(float)individual_components.size());
if (individual_components.size()<3) // || (group_probability_mean < 0.5)
// variance of widths and heights help to discriminate groups with high height variability
vector<int> widths;
vector<int> heights;
// the MST edge orientations histogram may be dominated by the horizontal axis orientation
Subdiv2D subdiv(Rect(0,0,src.at(0).cols,src.at(0).rows));
for (int r=0; r < (int)individual_components.size(); r++)
{
meaningful_clusters.erase(meaningful_clusters.begin()+i);
groups_rects.erase(groups_rects.begin()+i);
continue;
widths.push_back(individual_components.at(r).width);
heights.push_back(individual_components.at(r).height);
Point2f fp( (float)individual_components.at(r).x + individual_components.at(r).width/2,
(float)individual_components.at(r).y + individual_components.at(r).height/2 );
subdiv.insert(fp);
}
}
Scalar mean, std;
meanStdDev(Mat(widths), mean, std);
data_arrays.at(i).push_back((float)(std[0]/mean[0]));
data_arrays.at(i).push_back((float)mean[0]);
meanStdDev(Mat(heights), mean, std);
data_arrays.at(i).push_back((float)(std[0]/mean[0]));
// TODO remove groups with high height variability
vector<Vec4f> edgeList;
subdiv.getEdgeList(edgeList);
std::sort (edgeList.begin(), edgeList.end(), edge_comp);
vector<Point> mst_vertices;
// Try to remove groups with repetitive patterns
for (int i=(int)meaningful_clusters.size()-1; i>=0; i--)
{
Mat grey = img;
Mat sad = Mat::zeros(regions.at(c).at(meaningful_clusters.at(i).at(0)).rect.size() , CV_8UC1);
Mat region_mask = Mat::zeros(grey.rows+2, grey.cols+2, CV_8UC1);
float sad_value = 0;
Mat ratios = Mat::zeros(1, (int)meaningful_clusters.at(i).size(), CV_32FC1);
Mat holes = Mat::zeros(1, (int)meaningful_clusters.at(i).size(), CV_32FC1);
int horiz_edges = 0, non_horiz_edges = 0;
vector<float> edge_distances;
for (int r=0; r<(int)meaningful_clusters.at(i).size(); r++)
for( size_t k = 0; k < edgeList.size(); k++ )
{
ERStat *stat = &regions.at(c).at(meaningful_clusters.at(i).at(r));
//Fill the region
Mat region = region_mask(Rect(Point(stat->rect.x,stat->rect.y),
Point(stat->rect.br().x+2,stat->rect.br().y+2)));
region = Scalar(0);
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
Rect rect;
floodFill( grey(Rect(Point(stat->rect.x,stat->rect.y),Point(stat->rect.br().x,stat->rect.br().y))),
region, Point(stat->pixel%grey.cols - stat->rect.x, stat->pixel/grey.cols - stat->rect.y),
Scalar(255), &rect, Scalar(stat->level), Scalar(0), flags );
Mat mask = Mat::zeros(regions.at(c).at(meaningful_clusters.at(i).at(0)).rect.size() , CV_8UC1);
resize(region, mask, mask.size());
mask = mask - 254;
if (r!=0)
Vec4f e = edgeList[k];
Point pt0 = Point(cvRound(e[0]), cvRound(e[1]));
Point pt1 = Point(cvRound(e[2]), cvRound(e[3]));
if (((pt0.x>0)&&(pt0.x<src.at(0).cols)&&(pt0.y>0)&&(pt0.y<src.at(0).rows) &&
(pt1.x>0)&&(pt1.x<src.at(0).cols)&&(pt1.y>0)&&(pt1.y<src.at(0).rows)) &&
((!find_vertex(mst_vertices,pt0)) ||
(!find_vertex(mst_vertices,pt1))))
{
// using Sum of Absolute Differences
absdiff(sad, mask, sad);
Scalar s = sum(sad);
sad_value += (float)s[0]/(sad.rows*sad.cols);
double angle = atan2((double)(pt0.y-pt1.y),(double)(pt0.x-pt1.x));
//if ( (abs(angle) < 0.35) || (abs(angle) > 5.93) || ((abs(angle) > 2.79)&&(abs(angle) < 3.49)) )
if ( (abs(angle) < 0.25) || (abs(angle) > 6.03) || ((abs(angle) > 2.88)&&(abs(angle) < 3.4)) )
{
horiz_edges++;
edge_distances.push_back((float)norm(pt0-pt1));
}
else
non_horiz_edges++;
mst_vertices.push_back(pt0);
mst_vertices.push_back(pt1);
}
mask.copyTo(sad);
ratios.at<float>(0,r) = (float)min(stat->rect.width, stat->rect.height) /
max(stat->rect.width, stat->rect.height);
holes.at<float>(0,r) = (float)stat->hole_area_ratio;
}
Scalar mean,std;
meanStdDev( holes, mean, std);
float holes_mean = (float)mean[0];
meanStdDev( ratios, mean, std);
// Set empirically
if (((float)sad_value / ((int)meaningful_clusters.at(i).size()-1) < 0.12) ||
(((float)sad_value / ((int)meaningful_clusters.at(i).size()-1) < 0.175)&&(holes_mean < 0.015))||
//TODO this must be num of non-overlapping regions.at(c) and probably 7 is ok!
((holes_mean < 0.005)&&((int)meaningful_clusters.at(i).size()>10)))
{
meaningful_clusters.erase(meaningful_clusters.begin()+i);
groups_rects.erase(groups_rects.begin()+i);
}
}
// remove small groups inside others
vector<int> groups_to_remove;
for (int i=0; i<(int)meaningful_clusters.size()-1; i++)
{
for (int j=i+1; j<(int)meaningful_clusters.size(); j++)
{
if (horiz_edges == 0)
data_arrays.at(i).push_back(0.f);
else
data_arrays.at(i).push_back((float)horiz_edges/(horiz_edges+non_horiz_edges));
Rect intersection = groups_rects.at(i) & groups_rects.at(j);
// remove groups where objects are not equidistant enough
Scalar dist_mean, dist_std;
meanStdDev(Mat(edge_distances),dist_mean, dist_std);
if (dist_std[0] == 0)
data_arrays.at(i).push_back(0.f);
else
data_arrays.at(i).push_back((float)(dist_std[0]/dist_mean[0]));
if (intersection == groups_rects.at(i))
groups_to_remove.push_back(i);
if (intersection == groups_rects.at(j))
groups_to_remove.push_back(j);
if (dist_mean[0] == 0)
data_arrays.at(i).push_back(0.f);
else
data_arrays.at(i).push_back((float)dist_mean[0]/data_arrays.at(i).at(3));
}
}
//meanStdDev( holes, mean, std);
//float holes_mean = (float)mean[0];
meanStdDev( ratios, mean, std);
if (!groups_to_remove.empty())
{
int last_removed = -1;
std::sort(groups_to_remove.begin(), groups_to_remove.end());
for (int i=(int)groups_to_remove.size()-1; i>=0; i--)
data_arrays.at(i).push_back((float)sad_value / ((int)meaningful_clusters.at(i).size()-1));
meanStdDev( Mat(strokes), mean, std);
data_arrays.at(i).push_back((float)(std[0]/mean[0]));
meanStdDev( Mat(grad_magnitudes), mean, std);
data_arrays.at(i).push_back((float)(std[0]/mean[0]));
meanStdDev( Mat(intensities), mean, std);
data_arrays.at(i).push_back((float)std[0]);
meanStdDev( Mat(bg_intensities), mean, std);
data_arrays.at(i).push_back((float)std[0]);
// Validate only groups with more than 2 non-overlapping regions
if (data_arrays.at(i).at(0) > 2)
{
if (groups_to_remove.at(i) == last_removed)
continue;
else
last_removed = groups_to_remove.at(i);
data_arrays.at(i).insert(data_arrays.at(i).begin(),0.f);
float votes = group_boost.predict( Mat(data_arrays.at(i)), Mat(), Range::all(), false, true );
// Logistic Correction returns a probability value (in the range(0,1))
double probability = (double)1-(double)1/(1+exp(-2*votes));
meaningful_clusters.erase(meaningful_clusters.begin()+groups_to_remove.at(i));
groups_rects.erase(groups_rects.begin()+groups_to_remove.at(i));
if (probability > minProbability)
text_boxes.push_back(groups_rects.at(i));
}
}
groups_to_remove.clear();
for (int i=0; i<(int)groups_rects.size(); i++)
text_boxes.push_back(groups_rects.at(i));
}
// check for colinear groups that can be merged

Binary file not shown.

Before

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

@ -1,8 +1,12 @@
//--------------------------------------------------------------------------------------------------
// A demo program of the Extremal Region Filter algorithm described in
// Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
//--------------------------------------------------------------------------------------------------
/*
* textdetection.cpp
*
* A demo program of the Extremal Region Filter algorithm described in
* Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
*
* Created on: Sep 23, 2013
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
*/
#include "opencv2/opencv.hpp"
#include "opencv2/objdetect.hpp"
@ -18,10 +22,13 @@ using namespace cv;
void show_help_and_exit(const char *cmd);
void groups_draw(Mat &src, vector<Rect> &groups);
void er_draw(Mat &src, Mat &dst, ERStat& er);
void er_show(vector<Mat> &channels, vector<vector<ERStat> > &regions);
int main(int argc, const char * argv[])
{
cout << endl << argv[0] << endl << endl;
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
if (argc < 2) show_help_and_exit(argv[0]);
@ -37,11 +44,13 @@ int main(int argc, const char * argv[])
channels.push_back(255-channels[c]);
// Create ERFilter objects with the 1st and 2nd stage default classifiers
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00025,0.13,0.4,true,0.1);
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.3);
Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),16,0.00015,0.13,0.2,true,0.1);
Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);
vector<vector<ERStat> > regions(channels.size());
// Apply the default cascade classifier to each independent channel (could be done in parallel)
cout << "Extracting Class Specific Extremal Regions from " << (int)channels.size() << " channels ..." << endl;
cout << " (...) this may take a while (...)" << endl << endl;
for (int c=0; c<(int)channels.size(); c++)
{
er_filter1->run(channels[c], regions[c]);
@ -49,13 +58,18 @@ int main(int argc, const char * argv[])
}
// Detect character groups
cout << "Grouping extracted ERs ... ";
vector<Rect> groups;
erGrouping(channels, regions, groups);
erGrouping(channels, regions, "trained_classifier_erGrouping.xml", 0.5, groups);
// draw groups
groups_draw(src, groups);
imshow("grouping",src);
waitKey(-1);
cout << "Done!" << endl << endl;
cout << "Press 'e' to show the extracted Extremal Regions, any other key to exit." << endl << endl;
if( waitKey (-1) == 101)
er_show(channels,regions);
// memory clean-up
er_filter1.release();
@ -73,9 +87,6 @@ int main(int argc, const char * argv[])
void show_help_and_exit(const char *cmd)
{
cout << endl << cmd << endl << endl;
cout << "Demo program of the Extremal Region Filter algorithm described in " << endl;
cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl;
cout << " Usage: " << cmd << " <input_image> " << endl;
cout << " Default classifier files (trained_classifierNM*.xml) must be in current directory" << endl << endl;
exit(-1);
@ -92,14 +103,25 @@ void groups_draw(Mat &src, vector<Rect> &groups)
}
}
void er_draw(Mat &src, Mat &dst, ERStat& er)
void er_show(vector<Mat> &channels, vector<vector<ERStat> > &regions)
{
if (er.parent != NULL) // deprecate the root region
for (int c=0; c<(int)channels.size(); c++)
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(src,dst,Point(er.pixel%src.cols,er.pixel/src.cols),Scalar(255),0,Scalar(er.level),Scalar(0),flags);
Mat dst = Mat::zeros(channels[0].rows+2,channels[0].cols+2,CV_8UC1);
for (int r=0; r<(int)regions[c].size(); r++)
{
ERStat er = regions[c][r];
if (er.parent != NULL) // deprecate the root region
{
int newMaskVal = 255;
int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY;
floodFill(channels[c],dst,Point(er.pixel%channels[c].cols,er.pixel/channels[c].cols),
Scalar(255),0,Scalar(er.level),Scalar(0),flags);
}
}
char buff[10]; char *buff_ptr = buff;
sprintf(buff, "channel %d", c);
imshow(buff_ptr, dst);
}
waitKey(-1);
}

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save