Merge pull request #3 from Itseez/master

Update
pull/331/head
Vladimir 10 years ago
commit a80308fd44
  1. 2
      modules/README.md
  2. 102
      modules/datasets/include/opencv2/datasets/or_pascal.hpp
  3. 96
      modules/datasets/include/opencv2/datasets/pd_inria.hpp
  4. 112
      modules/datasets/samples/or_pascal.cpp
  5. 94
      modules/datasets/samples/pd_inria.cpp
  6. 214
      modules/datasets/src/or_pascal.cpp
  7. 203
      modules/datasets/src/pd_inria.cpp
  8. 2
      modules/stereo/CMakeLists.txt
  9. 2
      modules/stereo/README.md
  10. 262
      modules/stereo/include/opencv2/stereo.hpp
  11. 49
      modules/stereo/include/opencv2/stereo/stereo.hpp
  12. 59
      modules/stereo/src/precomp.hpp
  13. 738
      modules/stereo/src/stereo_binary_bm.cpp
  14. 958
      modules/stereo/src/stereo_binary_sgbm.cpp
  15. 45
      modules/stereo/test/test_main.cpp
  16. 31
      modules/stereo/test/test_precomp.hpp
  17. 19
      modules/surface_matching/samples/ppf_load_match.cpp
  18. 4
      modules/surface_matching/src/ppf_match_3d.cpp
  19. 113
      modules/text/include/opencv2/text/ocr.hpp
  20. BIN
      modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz
  21. 66
      modules/text/samples/cropped_word_recognition.cpp
  22. BIN
      modules/text/samples/scenetext_word01.jpg
  23. BIN
      modules/text/samples/scenetext_word02.jpg
  24. 6
      modules/text/src/erfilter.cpp
  25. 656
      modules/text/src/ocr_beamsearch_decoder.cpp
  26. 2
      modules/text/src/ocr_tesseract.cpp
  27. 135
      modules/tracking/doc/[Tutorial] Adding new Tracker Method for dummies
  28. 26
      modules/tracking/doc/tracking.bib
  29. 54
      modules/tracking/include/opencv2/tracking/tracker.hpp
  30. 194
      modules/tracking/samples/kcf.cpp
  31. 4
      modules/tracking/samples/tld_test.cpp
  32. 32816
      modules/tracking/src/featureColorName.cpp
  33. 133
      modules/tracking/src/opencl/tldDetector.cl
  34. 6
      modules/tracking/src/precomp.hpp
  35. 575
      modules/tracking/src/tldDetector.cpp
  36. 7
      modules/tracking/src/tldDetector.hpp
  37. 4
      modules/tracking/src/tldEnsembleClassifier.cpp
  38. 1
      modules/tracking/src/tldEnsembleClassifier.hpp
  39. 114
      modules/tracking/src/tldModel.cpp
  40. 7
      modules/tracking/src/tldModel.hpp
  41. 24
      modules/tracking/src/tldTracker.cpp
  42. 1
      modules/tracking/src/tldTracker.hpp
  43. 1
      modules/tracking/src/tracker.cpp
  44. 686
      modules/tracking/src/trackerKCF.cpp
  45. 8
      modules/xfeatures2d/include/opencv2/xfeatures2d.hpp
  46. 2
      modules/xfeatures2d/src/daisy.cpp
  47. 22
      modules/ximgproc/doc/ximgproc.bib
  48. 1
      modules/ximgproc/include/opencv2/ximgproc.hpp
  49. 146
      modules/ximgproc/include/opencv2/ximgproc/disparity_filter.hpp
  50. 60
      modules/ximgproc/include/opencv2/ximgproc/edge_filter.hpp
  51. 169
      modules/ximgproc/perf/perf_disparity_wls_filter.cpp
  52. 81
      modules/ximgproc/perf/perf_fgs_filter.cpp
  53. 459
      modules/ximgproc/samples/disparity_filtering.cpp
  54. 466
      modules/ximgproc/src/disparity_filters.cpp
  55. 694
      modules/ximgproc/src/fgs_filter.cpp
  56. 4
      modules/ximgproc/src/seeds.cpp
  57. 4
      modules/ximgproc/src/structured_edge_detection.cpp
  58. 284
      modules/ximgproc/src/wls_filter.cpp
  59. 154
      modules/ximgproc/test/test_disparity_wls_filter.cpp
  60. 153
      modules/ximgproc/test/test_fgs_filter.cpp
  61. BIN
      modules/ximgproc/testdata/disparityfilter/GT.png
  62. 7
      modules/ximgproc/testdata/disparityfilter/ROI.xml
  63. BIN
      modules/ximgproc/testdata/disparityfilter/disparity_left_raw.png
  64. BIN
      modules/ximgproc/testdata/disparityfilter/disparity_right_raw.png
  65. BIN
      modules/ximgproc/testdata/disparityfilter/left_view.png
  66. 7
      modules/ximgproc/testdata/disparityfilter/reference_accuracy.xml
  67. BIN
      modules/ximgproc/testdata/edgefilter/fgs/kodim23_lambda=1000_sigma=10.png
  68. 2
      modules/xphoto/include/opencv2/xphoto.hpp
  69. 32
      modules/xphoto/include/opencv2/xphoto/white_balance.hpp
  70. 29
      modules/xphoto/perf/perf_grayworld.cpp
  71. 3
      modules/xphoto/perf/perf_main.cpp
  72. 19
      modules/xphoto/perf/perf_precomp.hpp
  73. 62
      modules/xphoto/samples/grayworld_color_balance.cpp
  74. 213
      modules/xphoto/src/grayworld_white_balance.cpp
  75. 90
      modules/xphoto/test/test_grayworld.cpp

@ -51,3 +51,5 @@ $ cmake -D OPENCV_EXTRA_MODULES_PATH=<opencv_contrib>/modules -D BUILD_opencv_re
21. **opencv_xobjdetect**: Integral Channel Features Detector Framework.
22. **opencv_xphoto**: Additional photo processing algorithms: Color balance / Denoising / Inpainting.
23. **opencv_stereo**: Stereo Correspondence done with different descriptors: Census / CS-Census / MCT / BRIEF / MV / RT.

@ -0,0 +1,102 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Itseez Inc or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_DATASETS_VOC_PASCAL_HPP
#define OPENCV_DATASETS_VOC_PASCAL_HPP
#include <string>
#include <vector>
#include "opencv2/datasets/dataset.hpp"
#include <opencv2/core.hpp>
namespace cv
{
namespace datasets
{
//! @addtogroup datasets_or
//! @{
struct PascalPart: public Object
{
std::string name;
int xmin;
int ymin;
int xmax;
int ymax;
};
struct PascalObj: public PascalPart
{
std::string pose;
bool truncated;
bool difficult;
bool occluded;
std::vector<PascalPart> parts;
};
struct OR_pascalObj : public Object
{
std::string filename;
int width;
int height;
int depth;
std::vector<PascalObj> objects;
};
class CV_EXPORTS OR_pascal : public Dataset
{
public:
virtual void load(const std::string &path) = 0;
static Ptr<OR_pascal> create();
};
//! @}
}// namespace dataset
}// namespace cv
#endif

@ -0,0 +1,96 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Itseez Inc or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef OPENCV_DATASETS_PD_INRIA_HPP
#define OPENCV_DATASETS_PD_INRIA_HPP
#include <string>
#include <vector>
#include "opencv2/datasets/dataset.hpp"
#include <opencv2/core.hpp>
namespace cv
{
namespace datasets
{
//! @addtogroup datasets_pd
//! @{
enum sampleType
{
POS = 0,
NEG = 1
};
struct PD_inriaObj : public Object
{
// image file name
std::string filename;
// positive or negative
sampleType sType;
// image size
int width;
int height;
int depth;
// bounding boxes
std::vector< Rect > bndboxes;
};
class CV_EXPORTS PD_inria : public Dataset
{
public:
virtual void load(const std::string &path) = 0;
static Ptr<PD_inria> create();
};
//! @}
}
}
#endif

@ -0,0 +1,112 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Itseez Inc or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/datasets/or_pascal.hpp"
#include <opencv2/core.hpp>
#include <cstdio>
#include <cstdlib> // atoi
#include <string>
#include <vector>
#include <set>
using namespace std;
using namespace cv;
using namespace cv::datasets;
int main(int argc, char *argv[])
{
const char *keys =
"{ help h usage ? | | show this message }"
"{ path p |true| path to folder with dataset }";
CommandLineParser parser(argc, argv, keys);
string path(parser.get<string>("path"));
if (parser.has("help") || path=="true")
{
parser.printMessage();
return -1;
}
Ptr<OR_pascal> dataset = OR_pascal::create();
dataset->load(path);
// Print train/test/validation size and first example
OR_pascalObj *example;
vector< Ptr<Object> > &train = dataset->getTrain();
printf("\ntrain:\nsize: %u", (unsigned int)train.size());
example = static_cast<OR_pascalObj *>(train[0].get());
printf("\nfirst image: \n%s", example->filename.c_str());
printf("\nsize:");
printf("\n - width: %d", example->width);
printf("\n - height: %d", example->height);
printf("\n - depth: %d", example->depth);
for (unsigned int i = 0; i < example->objects.size(); i++)
{
printf("\nobject %d", i);
printf("\nname: %s", example->objects[i].name.c_str());
printf("\npose: %s", example->objects[i].pose.c_str());
printf("\ntruncated: %d", example->objects[i].truncated);
printf("\ndifficult: %d", example->objects[i].difficult);
printf("\noccluded: %d", example->objects[i].occluded);
printf("\nbounding box:");
printf("\n - xmin: %d", example->objects[i].xmin);
printf("\n - ymin: %d", example->objects[i].ymin);
printf("\n - xmax: %d", example->objects[i].xmax);
printf("\n - ymax: %d", example->objects[i].ymax);
}
vector< Ptr<Object> > &test = dataset->getTest();
printf("\ntest:\nsize: %u", (unsigned int)test.size());
example = static_cast<OR_pascalObj *>(test[0].get());
printf("\nfirst image: \n%s", example->filename.c_str());
vector< Ptr<Object> > &validation = dataset->getValidation();
printf("\nvalidation:\nsize: %u", (unsigned int)validation.size());
example = static_cast<OR_pascalObj *>(validation[0].get());
printf("\nfirst image: \n%s\n", example->filename.c_str());
return 0;
}

@ -0,0 +1,94 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Itseez Inc or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/datasets/pd_inria.hpp"
#include <opencv2/core.hpp>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
using namespace cv;
using namespace cv::datasets;
int main(int argc, char *argv[])
{
const char *keys =
"{ help h usage ? | | show this message }"
"{ path p |true| path to dataset }";
CommandLineParser parser(argc, argv, keys);
string path(parser.get<string>("path"));
if (parser.has("help") || path=="true")
{
parser.printMessage();
return -1;
}
Ptr<PD_inria> dataset = PD_inria::create();
dataset->load(path);
cout << "train size: " << (unsigned int)dataset->getTrain().size() << endl;
PD_inriaObj *example = static_cast<PD_inriaObj *>(dataset->getTrain()[0].get());
cout << "first train object: " << endl;
cout << "name: " << example->filename << endl;
// image size
cout << "image size: " << endl;
cout << " - width: " << example->width << endl;
cout << " - height: " << example->height << endl;
cout << " - depth: " << example->depth << endl;
// bounding boxes
for (unsigned int i = 0; i < (example->bndboxes).size(); i++)
{
cout << "object " << i << endl;
int x = (example->bndboxes)[i].x;
int y = (example->bndboxes)[i].y;
cout << " - xmin = " << x << endl;
cout << " - ymin = " << y << endl;
cout << " - xmax = " << (example->bndboxes)[i].width + x << endl;
cout << " - ymax = " << (example->bndboxes)[i].height + y<< endl;
}
return 0;
}

@ -0,0 +1,214 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Itseez Inc or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/datasets/or_pascal.hpp"
#include "opencv2/datasets/util.hpp"
#include <opencv2/datasets/tinyxml2/tinyxml2.h>
#include <fstream>
namespace cv
{
namespace datasets
{
using namespace std;
using namespace tinyxml2;
class OR_pascalImp : public OR_pascal
{
public:
OR_pascalImp() {}
virtual void load(const string &path);
private:
void loadDataset(const string &path, const string &nameImageSet, vector< Ptr<Object> > &imageSet);
Ptr<Object> parseAnnotation(const string path, const string id);
const char* parseNodeText(XMLElement* node, const string nodeName, const string defaultValue);
};
void OR_pascalImp::load(const string &path)
{
train.push_back(vector< Ptr<Object> >());
test.push_back(vector< Ptr<Object> >());
validation.push_back(vector< Ptr<Object> >());
loadDataset(path, "train", train.back());
loadDataset(path, "test", test.back());
loadDataset(path, "val", validation.back());
}
void OR_pascalImp::loadDataset(const string &path, const string &nameImageSet, vector< Ptr<Object> > &imageSet)
{
string pathImageSets(path + "ImageSets/Main/");
string imageList = pathImageSets + nameImageSet + ".txt";
ifstream in(imageList.c_str());
string error_message = format("Image list not exists!\n%s", imageList.c_str());
if (!in.is_open())
CV_Error(Error::StsBadArg, error_message);
string id = "";
while( getline(in, id) )
{
if( strcmp(nameImageSet.c_str(), "test") == 0 ) // test set ground truth is not available
{
Ptr<OR_pascalObj> annotation(new OR_pascalObj);
annotation->filename = path + "JPEGImages/" + id + ".jpg";
imageSet.push_back(annotation);
}
else
{
imageSet.push_back(parseAnnotation(path, id));
}
}
}
const char* OR_pascalImp::parseNodeText(XMLElement* node, const string nodeName, const string defaultValue)
{
const char* e = node->FirstChildElement(nodeName.c_str())->GetText();
if( e != 0 )
return e ;
else
return defaultValue.c_str();
}
Ptr<Object> OR_pascalImp::parseAnnotation(const string path, const string id)
{
string pathAnnotations(path + "Annotations/");
string pathImages(path + "JPEGImages/");
Ptr<OR_pascalObj> annotation(new OR_pascalObj);
XMLDocument doc;
string xml_file = pathAnnotations + id + ".xml";
XMLError error_code = doc.LoadFile(xml_file.c_str());
string error_message = format("Parsing XML failed. Error code = %d. \nFile = %s", error_code, xml_file.c_str());
switch (error_code)
{
case XML_SUCCESS:
break;
case XML_ERROR_FILE_NOT_FOUND:
error_message = "XML file not found! " + error_message;
CV_Error(Error::StsParseError, error_message);
return annotation;
default:
CV_Error(Error::StsParseError, error_message);
break;
}
// <annotation/>
XMLElement *xml_ann = doc.RootElement();
// <filename/>
string img_name = xml_ann->FirstChildElement("filename")->GetText();
annotation->filename = pathImages + img_name;
// <size/>
XMLElement *sz = xml_ann->FirstChildElement("size");
int width = atoi(sz->FirstChildElement("width")->GetText());
int height = atoi(sz->FirstChildElement("height")->GetText());
int depth = atoi(sz->FirstChildElement("depth")->GetText());
annotation->width = width;
annotation->height = height;
annotation->depth = depth;
// <object/>
vector<PascalObj> objects;
XMLElement *xml_obj = xml_ann->FirstChildElement("object");
while (xml_obj)
{
PascalObj pascal_obj;
pascal_obj.name = xml_obj->FirstChildElement("name")->GetText();
pascal_obj.pose = parseNodeText(xml_obj, "pose", "Unspecified");
pascal_obj.truncated = atoi(parseNodeText(xml_obj, "truncated", "0")) > 0;
pascal_obj.difficult = atoi(parseNodeText(xml_obj, "difficult", "0")) > 0;
pascal_obj.occluded = atoi(parseNodeText(xml_obj, "occluded", "0")) > 0;
// <bndbox/>
XMLElement *xml_bndbox = xml_obj->FirstChildElement("bndbox");
pascal_obj.xmin = atoi(xml_bndbox->FirstChildElement("xmin")->GetText());
pascal_obj.ymin = atoi(xml_bndbox->FirstChildElement("ymin")->GetText());
pascal_obj.xmax = atoi(xml_bndbox->FirstChildElement("xmax")->GetText());
pascal_obj.ymax = atoi(xml_bndbox->FirstChildElement("ymax")->GetText());
// <part/>
vector<PascalPart> parts;
XMLElement *xml_part = xml_obj->FirstChildElement("part");
while (xml_part)
{
PascalPart pascal_part;
pascal_part.name = xml_part->FirstChildElement("name")->GetText();
xml_bndbox = xml_part->FirstChildElement("bndbox");
pascal_part.xmin = atoi(xml_bndbox->FirstChildElement("xmin")->GetText());
pascal_part.ymin = atoi(xml_bndbox->FirstChildElement("ymin")->GetText());
pascal_part.xmax = atoi(xml_bndbox->FirstChildElement("xmax")->GetText());
pascal_part.ymax = atoi(xml_bndbox->FirstChildElement("ymax")->GetText());
parts.push_back(pascal_part);
xml_part = xml_part->NextSiblingElement("part");
}
pascal_obj.parts = parts;
objects.push_back(pascal_obj);
xml_obj = xml_obj->NextSiblingElement("object");
}
annotation->objects = objects;
return annotation;
}
Ptr<OR_pascal> OR_pascal::create()
{
return Ptr<OR_pascalImp>(new OR_pascalImp);
}
}
}

@ -0,0 +1,203 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2015, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Itseez Inc or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/datasets/pd_inria.hpp"
#include "opencv2/datasets/util.hpp"
namespace cv
{
namespace datasets
{
using namespace std;
class PD_inriaImp : public PD_inria
{
public:
PD_inriaImp() {}
virtual ~PD_inriaImp() {}
virtual void load(const string &path);
private:
void loadDataset(const string &path, const string nameImageSet, vector< Ptr<Object> > &imageSet);
void readTextLines(const string filename, vector< string > &lines);
void parseAnnotation(const string filename, Ptr< PD_inriaObj > &object);
};
void PD_inriaImp::load(const string &path)
{
// Training set
train.push_back(vector< Ptr<Object> >());
loadDataset(path, "Train", train.back());
// Testing set
test.push_back(vector< Ptr<Object> >());
loadDataset(path, "Test", test.back());
// There is no validation set
validation.push_back(vector< Ptr<Object> >());
}
void PD_inriaImp::readTextLines(const string filename, vector< string > &lines)
{
ifstream in(filename.c_str());
string error_message = "";
if (!in.is_open())
{
error_message = format("Unable to open file: \n%s\n", filename.c_str());
CV_Error(Error::StsBadArg, error_message);
}
string currline = "";
while (getline(in, currline))
lines.push_back(currline);
}
void PD_inriaImp::parseAnnotation(const string filename, Ptr< PD_inriaObj > &object)
{
string error_message = "";
ifstream in(filename.c_str());
if (!in.is_open())
{
error_message = format("Unable to open file: \n%s\n", filename.c_str());
CV_Error(Error::StsBadArg, error_message);
}
string imageSizeHeader = "Image size (X x Y x C) : ";
string imageSizeFmt = imageSizeHeader + "%d x %d x %d";
string objWithGTHeader = "Objects with ground truth : ";
string objWithGTFmt = objWithGTHeader + "%d { \"PASperson\" }";
string boundBoxHeader = "Bounding box for object ";
string boundBoxFmt = boundBoxHeader + "%*d \"PASperson\" (Xmin, Ymin) - (Xmax, Ymax) : (%d, %d) - (%d, %d)";
string line = "";
int width = 0;
int height = 0;
int depth = 0;
int xmin, ymin, xmax, ymax;
int numObjects = 0;
while (getline(in, line))
{
if (line[0] == '#' || !line[0])
continue;
if (strstr(line.c_str(), imageSizeHeader.c_str()))
{
sscanf(line.c_str(), imageSizeFmt.c_str(), &width, &height, &depth);
object->width = width;
object->height = height;
object->depth = depth;
}
else if (strstr(line.c_str(), objWithGTHeader.c_str()))
{
sscanf(line.c_str(), objWithGTFmt.c_str(), &numObjects);
if (numObjects <= 0)
break;
}
else if (strstr(line.c_str(), boundBoxHeader.c_str()))
{
sscanf(line.c_str(), boundBoxFmt.c_str(), &xmin, &ymin, &xmax, &ymax);
Rect bndbox;
bndbox.x = xmin;
bndbox.y = ymin;
bndbox.width = xmax - xmin;
bndbox.height = ymax - ymin;
(object->bndboxes).push_back(bndbox);
}
}
CV_Assert((object->bndboxes).size() == (unsigned int)numObjects);
}
void PD_inriaImp::loadDataset(const string &path, const string nameImageSet, vector< Ptr<Object> > &imageSet)
{
string listAnn = path + nameImageSet + "/annotations.lst";
string listPos = path + nameImageSet + "/pos.lst";
string listNeg = path + nameImageSet + "/neg.lst";
vector< string > fsAnn;
vector< string > fsPos;
vector< string > fsNeg;
// read file names
readTextLines(listAnn, fsAnn);
readTextLines(listPos, fsPos);
readTextLines(listNeg, fsNeg);
CV_Assert(fsAnn.size() == fsPos.size());
for (unsigned int i = 0; i < fsPos.size(); i++)
{
Ptr<PD_inriaObj> curr(new PD_inriaObj);
parseAnnotation(path + fsAnn[i], curr);
curr->filename = path + fsPos[i];
curr->sType = POS;
imageSet.push_back(curr);
}
for (unsigned int i = 0; i < fsNeg.size(); i++)
{
Ptr<PD_inriaObj> curr(new PD_inriaObj);
curr->filename = path + fsNeg[i];
curr->sType = NEG;
imageSet.push_back(curr);
}
}
Ptr<PD_inria> PD_inria::create()
{
return Ptr<PD_inriaImp>(new PD_inriaImp);
}
}
}

@ -0,0 +1,2 @@
set(the_description "Stereo Correspondence")
ocv_define_module(stereo opencv_imgproc opencv_features2d opencv_core opencv_highgui opencv_calib3d)

@ -0,0 +1,2 @@
Stereo Correspondence with different descriptors
================================================

@ -0,0 +1,262 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_STEREO_HPP__
#define __OPENCV_STEREO_HPP__
#include "opencv2/core.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/core/affine.hpp"
/**
@defgroup stereo Stereo Correspondance Algorithms
*/
namespace cv
{
namespace stereo
{
//! @addtogroup stereo
//! @{
// void correctMatches( InputArray F, InputArray points1, InputArray points2,
// OutputArray newPoints1, OutputArray newPoints2 );
/** @brief Filters off small noise blobs (speckles) in the disparity map
@param img The input 16-bit signed disparity image
@param newVal The disparity value used to paint-off the speckles
@param maxSpeckleSize The maximum speckle size to consider it a speckle. Larger blobs are not
affected by the algorithm
@param maxDiff Maximum difference between neighbor disparity pixels to put them into the same
blob. Note that since StereoBM, StereoSGBM and may be other algorithms return a fixed-point
disparity map, where disparity values are multiplied by 16, this scale factor should be taken into
account when specifying this parameter value.
@param buf The optional temporary buffer to avoid memory allocation within the function.
*/
/** @brief The base class for stereo correspondence algorithms.
*/
class StereoMatcher : public Algorithm
{
public:
enum { DISP_SHIFT = 4,
DISP_SCALE = (1 << DISP_SHIFT)
};
/** @brief Computes disparity map for the specified stereo pair
@param left Left 8-bit single-channel image.
@param right Right image of the same size and the same type as the left one.
@param disparity Output disparity map. It has the same size as the input images. Some algorithms,
like StereoBM or StereoSGBM compute 16-bit fixed-point disparity map (where each disparity value
has 4 fractional bits), whereas other algorithms output 32-bit floating-point disparity map.
*/
virtual void compute( InputArray left, InputArray right,
OutputArray disparity ) = 0;
virtual int getMinDisparity() const = 0;
virtual void setMinDisparity(int minDisparity) = 0;
virtual int getNumDisparities() const = 0;
virtual void setNumDisparities(int numDisparities) = 0;
virtual int getBlockSize() const = 0;
virtual void setBlockSize(int blockSize) = 0;
virtual int getSpeckleWindowSize() const = 0;
virtual void setSpeckleWindowSize(int speckleWindowSize) = 0;
virtual int getSpeckleRange() const = 0;
virtual void setSpeckleRange(int speckleRange) = 0;
virtual int getDisp12MaxDiff() const = 0;
virtual void setDisp12MaxDiff(int disp12MaxDiff) = 0;
};
/** @brief Class for computing stereo correspondence using the block matching algorithm, introduced and
contributed to OpenCV by K. Konolige.
*/
class StereoBinaryBM : public StereoMatcher
{
public:
enum { PREFILTER_NORMALIZED_RESPONSE = 0,
PREFILTER_XSOBEL = 1
};
virtual int getPreFilterType() const = 0;
virtual void setPreFilterType(int preFilterType) = 0;
virtual int getPreFilterSize() const = 0;
virtual void setPreFilterSize(int preFilterSize) = 0;
virtual int getPreFilterCap() const = 0;
virtual void setPreFilterCap(int preFilterCap) = 0;
virtual int getTextureThreshold() const = 0;
virtual void setTextureThreshold(int textureThreshold) = 0;
virtual int getUniquenessRatio() const = 0;
virtual void setUniquenessRatio(int uniquenessRatio) = 0;
virtual int getSmallerBlockSize() const = 0;
virtual void setSmallerBlockSize(int blockSize) = 0;
virtual Rect getROI1() const = 0;
virtual void setROI1(Rect roi1) = 0;
virtual Rect getROI2() const = 0;
virtual void setROI2(Rect roi2) = 0;
/** @brief Creates StereoBM object
@param numDisparities the disparity search range. For each pixel algorithm will find the best
disparity from 0 (default minimum disparity) to numDisparities. The search range can then be
shifted by changing the minimum disparity.
@param blockSize the linear size of the blocks compared by the algorithm. The size should be odd
(as the block is centered at the current pixel). Larger block size implies smoother, though less
accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher
chance for algorithm to find a wrong correspondence.
The function create StereoBM object. You can then call StereoBM::compute() to compute disparity for
a specific stereo pair.
*/
CV_EXPORTS static Ptr< cv::stereo::StereoBinaryBM > create(int numDisparities = 0, int blockSize = 21);
};
/** @brief The class implements the modified H. Hirschmuller algorithm @cite HH08 that differs from the original
one as follows:
- By default, the algorithm is single-pass, which means that you consider only 5 directions
instead of 8. Set mode=StereoSGBM::MODE_HH in createStereoSGBM to run the full variant of the
algorithm but beware that it may consume a lot of memory.
- The algorithm matches blocks, not individual pixels. Though, setting blockSize=1 reduces the
blocks to single pixels.
- Mutual information cost function is not implemented. Instead, a simpler Birchfield-Tomasi
sub-pixel metric from @cite BT98 is used. Though, the color images are supported as well.
- Some pre- and post- processing steps from K. Konolige algorithm StereoBM are included, for
example: pre-filtering (StereoBM::PREFILTER_XSOBEL type) and post-filtering (uniqueness
check, quadratic interpolation and speckle filtering).
@note
- (Python) An example illustrating the use of the StereoSGBM matching algorithm can be found
at opencv_source_code/samples/python2/stereo_match.py
*/
class StereoBinarySGBM : public StereoMatcher
{
public:
enum
{
MODE_SGBM = 0,
MODE_HH = 1
};
virtual int getPreFilterCap() const = 0;
virtual void setPreFilterCap(int preFilterCap) = 0;
virtual int getUniquenessRatio() const = 0;
virtual void setUniquenessRatio(int uniquenessRatio) = 0;
virtual int getP1() const = 0;
virtual void setP1(int P1) = 0;
virtual int getP2() const = 0;
virtual void setP2(int P2) = 0;
virtual int getMode() const = 0;
virtual void setMode(int mode) = 0;
/** @brief Creates StereoSGBM object
@param minDisparity Minimum possible disparity value. Normally, it is zero but sometimes
rectification algorithms can shift images, so this parameter needs to be adjusted accordingly.
@param numDisparities Maximum disparity minus minimum disparity. The value is always greater than
zero. In the current implementation, this parameter must be divisible by 16.
@param blockSize Matched block size. It must be an odd number \>=1 . Normally, it should be
somewhere in the 3..11 range.
@param P1 The first parameter controlling the disparity smoothness. See below.
@param P2 The second parameter controlling the disparity smoothness. The larger the values are,
the smoother the disparity is. P1 is the penalty on the disparity change by plus or minus 1
between neighbor pixels. P2 is the penalty on the disparity change by more than 1 between neighbor
pixels. The algorithm requires P2 \> P1 . See stereo_match.cpp sample where some reasonably good
P1 and P2 values are shown (like 8\*number_of_image_channels\*SADWindowSize\*SADWindowSize and
32\*number_of_image_channels\*SADWindowSize\*SADWindowSize , respectively).
@param disp12MaxDiff Maximum allowed difference (in integer pixel units) in the left-right
disparity check. Set it to a non-positive value to disable the check.
@param preFilterCap Truncation value for the prefiltered image pixels. The algorithm first
computes x-derivative at each pixel and clips its value by [-preFilterCap, preFilterCap] interval.
The result values are passed to the Birchfield-Tomasi pixel cost function.
@param uniquenessRatio Margin in percentage by which the best (minimum) computed cost function
value should "win" the second best value to consider the found match correct. Normally, a value
within the 5-15 range is good enough.
@param speckleWindowSize Maximum size of smooth disparity regions to consider their noise speckles
and invalidate. Set it to 0 to disable speckle filtering. Otherwise, set it somewhere in the
50-200 range.
@param speckleRange Maximum disparity variation within each connected component. If you do speckle
filtering, set the parameter to a positive value, it will be implicitly multiplied by 16.
Normally, 1 or 2 is good enough.
@param mode Set it to StereoSGBM::MODE_HH to run the full-scale two-pass dynamic programming
algorithm. It will consume O(W\*H\*numDisparities) bytes, which is large for 640x480 stereo and
huge for HD-size pictures. By default, it is set to false .
The first constructor initializes StereoSGBM with all the default parameters. So, you only have to
set StereoSGBM::numDisparities at minimum. The second constructor enables you to set each parameter
to a custom value.
*/
CV_EXPORTS static Ptr<cv::stereo::StereoBinarySGBM> create(int minDisparity, int numDisparities, int blockSize,
int P1 = 0, int P2 = 0, int disp12MaxDiff = 0,
int preFilterCap = 0, int uniquenessRatio = 0,
int speckleWindowSize = 0, int speckleRange = 0,
int mode = StereoBinarySGBM::MODE_SGBM);
};
//! @}
}//stereo
} // cv
#ifndef DISABLE_OPENCV_24_COMPATIBILITY
#include "opencv2/stereo/stereo_c.h"
#endif
#endif

@ -0,0 +1,49 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef __OPENCV_BUILD
#error this is a compatibility header which should not be used inside the OpenCV library
#endif
#include "opencv2/stereo.hpp"

@ -0,0 +1,59 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_STEREO_PRECOMP_H__
#define __OPENCV_STEREO_PRECOMP_H__
#include "opencv2/stereo.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/cvdef.h"
#include "opencv2/highgui.hpp"
#include "opencv2/calib3d.hpp"
#include <algorithm>
#include <cmath>
#endif

@ -0,0 +1,738 @@
//M*//////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/****************************************************************************************\
* Very fast SAD-based (Sum-of-Absolute-Diffrences) stereo correspondence algorithm. *
* Contributed by Kurt Konolige *
\****************************************************************************************/
#include "precomp.hpp"
#include <stdio.h>
#include <limits>
namespace cv
{
namespace stereo
{
struct StereoBinaryBMParams
{
StereoBinaryBMParams(int _numDisparities = 64, int _SADWindowSize = 9)
{
preFilterType = StereoBinaryBM::PREFILTER_XSOBEL;
preFilterSize = 9;
preFilterCap = 31;
SADWindowSize = _SADWindowSize;
minDisparity = 0;
numDisparities = _numDisparities > 0 ? _numDisparities : 64;
textureThreshold = 10;
uniquenessRatio = 15;
speckleRange = speckleWindowSize = 0;
roi1 = roi2 = Rect(0, 0, 0, 0);
disp12MaxDiff = -1;
dispType = CV_16S;
}
int preFilterType;
int preFilterSize;
int preFilterCap;
int SADWindowSize;
int minDisparity;
int numDisparities;
int textureThreshold;
int uniquenessRatio;
int speckleRange;
int speckleWindowSize;
Rect roi1, roi2;
int disp12MaxDiff;
int dispType;
};
static void prefilterNorm(const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf)
{
int x, y, wsz2 = winsize / 2;
int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
int scale_g = winsize*winsize / 8, scale_s = (1024 + scale_g) / (scale_g * 2);
const int OFS = 256 * 5, TABSZ = OFS * 2 + 256;
uchar tab[TABSZ];
const uchar* sptr = src.ptr();
int srcstep = (int)src.step;
Size size = src.size();
scale_g *= scale_s;
for (x = 0; x < TABSZ; x++)
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
for (x = 0; x < size.width; x++)
vsum[x] = (ushort)(sptr[x] * (wsz2 + 2));
for (y = 1; y < wsz2; y++)
{
for (x = 0; x < size.width; x++)
vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]);
}
for (y = 0; y < size.height; y++)
{
const uchar* top = sptr + srcstep*MAX(y - wsz2 - 1, 0);
const uchar* bottom = sptr + srcstep*MIN(y + wsz2, size.height - 1);
const uchar* prev = sptr + srcstep*MAX(y - 1, 0);
const uchar* curr = sptr + srcstep*y;
const uchar* next = sptr + srcstep*MIN(y + 1, size.height - 1);
uchar* dptr = dst.ptr<uchar>(y);
for (x = 0; x < size.width; x++)
vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]);
for (x = 0; x <= wsz2; x++)
{
vsum[-x - 1] = vsum[0];
vsum[size.width + x] = vsum[size.width - 1];
}
int sum = vsum[0] * (wsz2 + 1);
for (x = 1; x <= wsz2; x++)
sum += vsum[x];
int val = ((curr[0] * 5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10;
dptr[0] = tab[val + OFS];
for (x = 1; x < size.width - 1; x++)
{
sum += vsum[x + wsz2] - vsum[x - wsz2 - 1];
val = ((curr[x] * 4 + curr[x - 1] + curr[x + 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
dptr[x] = tab[val + OFS];
}
sum += vsum[x + wsz2] - vsum[x - wsz2 - 1];
val = ((curr[x] * 5 + curr[x - 1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
dptr[x] = tab[val + OFS];
}
}
static void
prefilterXSobel(const Mat& src, Mat& dst, int ftzero)
{
int x, y;
const int OFS = 256 * 4, TABSZ = OFS * 2 + 256;
uchar tab[TABSZ];
Size size = src.size();
for (x = 0; x < TABSZ; x++)
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero * 2 : x - OFS + ftzero);
uchar val0 = tab[0 + OFS];
#if CV_SSE2
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
for (y = 0; y < size.height - 1; y += 2)
{
const uchar* srow1 = src.ptr<uchar>(y);
const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1;
const uchar* srow2 = y < size.height - 1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
const uchar* srow3 = y < size.height - 2 ? srow1 + src.step * 2 : srow1;
uchar* dptr0 = dst.ptr<uchar>(y);
uchar* dptr1 = dptr0 + dst.step;
dptr0[0] = dptr0[size.width - 1] = dptr1[0] = dptr1[size.width - 1] = val0;
x = 1;
#if CV_SSE2
if (useSIMD)
{
__m128i z = _mm_setzero_si128(), ftz = _mm_set1_epi16((short)ftzero),
ftz2 = _mm_set1_epi8(cv::saturate_cast<uchar>(ftzero * 2));
for (; x <= size.width - 9; x += 8)
{
__m128i c0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x - 1)), z);
__m128i c1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x - 1)), z);
__m128i d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow0 + x + 1)), z);
__m128i d1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow1 + x + 1)), z);
d0 = _mm_sub_epi16(d0, c0);
d1 = _mm_sub_epi16(d1, c1);
__m128i c2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x - 1)), z);
__m128i c3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x - 1)), z);
__m128i d2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow2 + x + 1)), z);
__m128i d3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow3 + x + 1)), z);
d2 = _mm_sub_epi16(d2, c2);
d3 = _mm_sub_epi16(d3, c3);
__m128i v0 = _mm_add_epi16(d0, _mm_add_epi16(d2, _mm_add_epi16(d1, d1)));
__m128i v1 = _mm_add_epi16(d1, _mm_add_epi16(d3, _mm_add_epi16(d2, d2)));
v0 = _mm_packus_epi16(_mm_add_epi16(v0, ftz), _mm_add_epi16(v1, ftz));
v0 = _mm_min_epu8(v0, ftz2);
_mm_storel_epi64((__m128i*)(dptr0 + x), v0);
_mm_storel_epi64((__m128i*)(dptr1 + x), _mm_unpackhi_epi64(v0, v0));
}
}
#endif
for (; x < size.width - 1; x++)
{
int d0 = srow0[x + 1] - srow0[x - 1], d1 = srow1[x + 1] - srow1[x - 1],
d2 = srow2[x + 1] - srow2[x - 1], d3 = srow3[x + 1] - srow3[x - 1];
int v0 = tab[d0 + d1 * 2 + d2 + OFS];
int v1 = tab[d1 + d2 * 2 + d3 + OFS];
dptr0[x] = (uchar)v0;
dptr1[x] = (uchar)v1;
}
}
for (; y < size.height; y++)
{
uchar* dptr = dst.ptr<uchar>(y);
for (x = 0; x < size.width; x++)
dptr[x] = val0;
}
}
static const int DISPARITY_SHIFT = 4;
static void
findStereoCorrespondenceBM(const Mat& left, const Mat& right,
Mat& disp, Mat& cost, const StereoBinaryBMParams& state,
uchar* buf, int _dy0, int _dy1)
{
const int ALIGN = 16;
int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz / 2;
int dy0 = MIN(_dy0, wsz2 + 1), dy1 = MIN(_dy1, wsz2 + 1);
int ndisp = state.numDisparities;
int mindisp = state.minDisparity;
int lofs = MAX(ndisp - 1 + mindisp, 0);
int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio;
short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT);
int *sad, *hsad0, *hsad, *hsad_sub, *htext;
uchar *cbuf0, *cbuf;
const uchar* lptr0 = left.ptr() + lofs;
const uchar* rptr0 = right.ptr() + rofs;
const uchar *lptr, *lptr_sub, *rptr;
short* dptr = disp.ptr<short>();
int sstep = (int)left.step;
int dstep = (int)(disp.step / sizeof(dptr[0]));
int cstep = (height + dy0 + dy1)*ndisp;
int costbuf = 0;
int coststep = cost.data ? (int)(cost.step / sizeof(costbuf)) : 0;
const int TABSZ = 256;
uchar tab[TABSZ];
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height + dy1)*ndisp) + wsz2 + 2, ALIGN);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
for (x = 0; x < TABSZ; x++)
tab[x] = (uchar)std::abs(x - ftzero);
// initialize buffers
memset(hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]));
memset(htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]));
for (x = -wsz2 - 1; x < wsz2; x++)
{
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
lptr = lptr0 + std::min(std::max(x, -lofs), width - lofs - 1) - dy0*sstep;
rptr = rptr0 + std::min(std::max(x, -rofs), width - rofs - 1) - dy0*sstep;
for (y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep)
{
int lval = lptr[0];
for (d = 0; d < ndisp; d++)
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = (int)(hsad[d] + diff);
}
htext[y] += tab[lval];
}
}
// initialize the left and right borders of the disparity map
for (y = 0; y < height; y++)
{
for (x = 0; x < lofs; x++)
dptr[y*dstep + x] = FILTERED;
for (x = lofs + width1; x < width; x++)
dptr[y*dstep + x] = FILTERED;
}
dptr += lofs;
for (x = 0; x < width1; x++, dptr++)
{
int* costptr = cost.data ? cost.ptr<int>() + lofs + x : &costbuf;
int x0 = x - wsz2 - 1, x1 = x + wsz2;
const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
hsad = hsad0 - dy0*ndisp;
lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width - 1 - lofs) - dy0*sstep;
lptr = lptr0 + MIN(MAX(x1, -lofs), width - 1 - lofs) - dy0*sstep;
rptr = rptr0 + MIN(MAX(x1, -rofs), width - 1 - rofs) - dy0*sstep;
for (y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep)
{
int lval = lptr[0];
for (d = 0; d < ndisp; d++)
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = hsad[d] + diff - cbuf_sub[d];
}
htext[y] += tab[lval] - tab[lptr_sub[0]];
}
// fill borders
for (y = dy1; y <= wsz2; y++)
htext[height + y] = htext[height + dy1 - 1];
for (y = -wsz2 - 1; y < -dy0; y++)
htext[y] = htext[-dy0];
// initialize sums
int tsum = 0;
{
for (d = 0; d < ndisp; d++)
sad[d] = (int)(hsad0[d - ndisp*dy0] * (wsz2 + 2 - dy0));
hsad = hsad0 + (1 - dy0)*ndisp;
for (y = 1 - dy0; y < wsz2; y++, hsad += ndisp)
for (d = 0; d < ndisp; d++)
sad[d] = (int)(sad[d] + hsad[d]);
for (y = -wsz2 - 1; y < wsz2; y++)
tsum += htext[y];
}
// finally, start the real processing
{
for (y = 0; y < height; y++)
{
int minsad = INT_MAX, mind = -1;
hsad = hsad0 + MIN(y + wsz2, height + dy1 - 1)*ndisp;
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
for (d = 0; d < ndisp; d++)
{
int currsad = sad[d] + hsad[d] - hsad_sub[d];
sad[d] = currsad;
if (currsad < minsad)
{
minsad = currsad;
mind = d;
}
}
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
if (tsum < textureThreshold)
{
dptr[y*dstep] = FILTERED;
continue;
}
if (uniquenessRatio > 0)
{
int thresh = minsad + (minsad * uniquenessRatio / 100);
for (d = 0; d < ndisp; d++)
{
if ((d < mind - 1 || d > mind + 1) && sad[d] <= thresh)
break;
}
if (d < ndisp)
{
dptr[y*dstep] = FILTERED;
continue;
}
}
{
sad[-1] = sad[1];
sad[ndisp] = sad[ndisp - 2];
int p = sad[mind + 1], n = sad[mind - 1];
d = p + n - 2 * sad[mind] + std::abs(p - n);
dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp) * 256 + (d != 0 ? (p - n) * 256 / d : 0) + 15) >> 4);
costptr[y*coststep] = sad[mind];
}
}
}
}
}
struct PrefilterInvoker : public ParallelLoopBody
{
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
uchar* buf0, uchar* buf1, StereoBinaryBMParams* _state)
{
imgs0[0] = &left0; imgs0[1] = &right0;
imgs[0] = &left; imgs[1] = &right;
buf[0] = buf0; buf[1] = buf1;
state = _state;
}
void operator()(const Range& range) const
{
for (int i = range.start; i < range.end; i++)
{
if (state->preFilterType == StereoBinaryBM::PREFILTER_NORMALIZED_RESPONSE)
prefilterNorm(*imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i]);
else
prefilterXSobel(*imgs0[i], *imgs[i], state->preFilterCap);
}
}
const Mat* imgs0[2];
Mat* imgs[2];
uchar* buf[2];
StereoBinaryBMParams* state;
};
struct FindStereoCorrespInvoker : public ParallelLoopBody
{
FindStereoCorrespInvoker(const Mat& _left, const Mat& _right,
Mat& _disp, StereoBinaryBMParams* _state,
int _nstripes, size_t _stripeBufSize,
bool _useShorts, Rect _validDisparityRect,
Mat& _slidingSumBuf, Mat& _cost)
{
left = &_left; right = &_right;
disp = &_disp; state = _state;
nstripes = _nstripes; stripeBufSize = _stripeBufSize;
useShorts = _useShorts;
validDisparityRect = _validDisparityRect;
slidingSumBuf = &_slidingSumBuf;
cost = &_cost;
}
void operator()(const Range& range) const
{
int cols = left->cols, rows = left->rows;
int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
int FILTERED = (state->minDisparity - 1) * 16;
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
if (roi.height == 0)
return;
int row0 = roi.y;
int row1 = roi.y + roi.height;
Mat part;
if (row0 > _row0)
{
part = disp->rowRange(_row0, row0);
part = Scalar::all(FILTERED);
}
if (_row1 > row1)
{
part = disp->rowRange(row1, _row1);
part = Scalar::all(FILTERED);
}
Mat left_i = left->rowRange(row0, row1);
Mat right_i = right->rowRange(row0, row1);
Mat disp_i = disp->rowRange(row0, row1);
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
findStereoCorrespondenceBM(left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1);
if (state->disp12MaxDiff >= 0)
validateDisparity(disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff);
if (roi.x > 0)
{
part = disp_i.colRange(0, roi.x);
part = Scalar::all(FILTERED);
}
if (roi.x + roi.width < cols)
{
part = disp_i.colRange(roi.x + roi.width, cols);
part = Scalar::all(FILTERED);
}
}
protected:
const Mat *left, *right;
Mat* disp, *slidingSumBuf, *cost;
StereoBinaryBMParams *state;
int nstripes;
size_t stripeBufSize;
bool useShorts;
Rect validDisparityRect;
};
class StereoBinaryBMImpl : public StereoBinaryBM
{
public:
StereoBinaryBMImpl()
{
params = StereoBinaryBMParams();
}
StereoBinaryBMImpl(int _numDisparities, int _SADWindowSize)
{
params = StereoBinaryBMParams(_numDisparities, _SADWindowSize);
}
void compute(InputArray leftarr, InputArray rightarr, OutputArray disparr)
{
int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
Size leftsize = leftarr.size();
if (leftarr.size() != rightarr.size())
CV_Error(Error::StsUnmatchedSizes, "All the images must have the same size");
if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
CV_Error(Error::StsUnsupportedFormat, "Both input images must have CV_8UC1");
if (dtype != CV_16SC1 && dtype != CV_32FC1)
CV_Error(Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format");
if (params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
params.preFilterType != PREFILTER_XSOBEL)
CV_Error(Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE");
if (params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0)
CV_Error(Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255");
if (params.preFilterCap < 1 || params.preFilterCap > 63)
CV_Error(Error::StsOutOfRange, "preFilterCap must be within 1..63");
if (params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 ||
params.SADWindowSize >= std::min(leftsize.width, leftsize.height))
CV_Error(Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height");
if (params.numDisparities <= 0 || params.numDisparities % 16 != 0)
CV_Error(Error::StsOutOfRange, "numDisparities must be positive and divisble by 16");
if (params.textureThreshold < 0)
CV_Error(Error::StsOutOfRange, "texture threshold must be non-negative");
if (params.uniquenessRatio < 0)
CV_Error(Error::StsOutOfRange, "uniqueness ratio must be non-negative");
int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;
Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
disparr.create(left0.size(), dtype);
Mat disp0 = disparr.getMat();
preFilteredImg0.create(left0.size(), CV_8U);
preFilteredImg1.create(left0.size(), CV_8U);
cost.create(left0.size(), CV_16S);
Mat left = preFilteredImg0, right = preFilteredImg1;
int mindisp = params.minDisparity;
int ndisp = params.numDisparities;
int width = left0.cols;
int height = left0.rows;
int lofs = std::max(ndisp - 1 + mindisp, 0);
int rofs = -std::min(ndisp - 1 + mindisp, 0);
int width1 = width - rofs - ndisp + 1;
if (lofs >= width || rofs >= width || width1 < 1)
{
disp0 = Scalar::all(FILTERED * (disp0.type() < CV_32F ? 1 : 1. / (1 << DISPARITY_SHIFT)));
return;
}
Mat disp = disp0;
if (dtype == CV_32F)
{
dispbuf.create(disp0.size(), CV_16S);
disp = dispbuf;
}
int wsz = params.SADWindowSize;
int bufSize0 = (int)((ndisp + 2)*sizeof(int));
bufSize0 += (int)((height + wsz + 2)*ndisp*sizeof(int));
bufSize0 += (int)((height + wsz + 2)*sizeof(int));
bufSize0 += (int)((height + wsz + 2)*ndisp*(wsz + 2)*sizeof(uchar) + 256);
int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
int bufSize2 = 0;
if (params.speckleRange >= 0 && params.speckleWindowSize > 0)
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
#if CV_SSE2
bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2);
#else
const bool useShorts = false;
#endif
const double SAD_overhead_coeff = 10.0;
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz - 1) * SAD_overhead_coeff), (double)height);
int nstripes = cvCeil(height / maxStripeSize);
int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));
if (slidingSumBuf.cols < bufSize)
slidingSumBuf.create(1, bufSize, CV_8U);
uchar *_buf = slidingSumBuf.ptr();
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
validDisparityRect = getValidDisparityROI(R1.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect,
R2.area() > 0 ? Rect(0, 0, width, height) : validDisparityRect,
params.minDisparity, params.numDisparities,
params.SADWindowSize);
parallel_for_(Range(0, nstripes),
FindStereoCorrespInvoker(left, right, disp, &params, nstripes,
bufSize0, useShorts, validDisparityRect,
slidingSumBuf, cost));
if (params.speckleRange >= 0 && params.speckleWindowSize > 0)
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
if (disp0.data != disp.data)
disp.convertTo(disp0, disp0.type(), 1. / (1 << DISPARITY_SHIFT), 0);
}
int getMinDisparity() const { return params.minDisparity; }
void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
int getNumDisparities() const { return params.numDisparities; }
void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
int getBlockSize() const { return params.SADWindowSize; }
void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
int getSpeckleWindowSize() const { return params.speckleWindowSize; }
void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
int getSpeckleRange() const { return params.speckleRange; }
void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
int getPreFilterType() const { return params.preFilterType; }
void setPreFilterType(int preFilterType) { params.preFilterType = preFilterType; }
int getPreFilterSize() const { return params.preFilterSize; }
void setPreFilterSize(int preFilterSize) { params.preFilterSize = preFilterSize; }
int getPreFilterCap() const { return params.preFilterCap; }
void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
int getTextureThreshold() const { return params.textureThreshold; }
void setTextureThreshold(int textureThreshold) { params.textureThreshold = textureThreshold; }
int getUniquenessRatio() const { return params.uniquenessRatio; }
void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
int getSmallerBlockSize() const { return 0; }
void setSmallerBlockSize(int) {}
Rect getROI1() const { return params.roi1; }
void setROI1(Rect roi1) { params.roi1 = roi1; }
Rect getROI2() const { return params.roi2; }
void setROI2(Rect roi2) { params.roi2 = roi2; }
void write(FileStorage& fs) const
{
fs << "name" << name_
<< "minDisparity" << params.minDisparity
<< "numDisparities" << params.numDisparities
<< "blockSize" << params.SADWindowSize
<< "speckleWindowSize" << params.speckleWindowSize
<< "speckleRange" << params.speckleRange
<< "disp12MaxDiff" << params.disp12MaxDiff
<< "preFilterType" << params.preFilterType
<< "preFilterSize" << params.preFilterSize
<< "preFilterCap" << params.preFilterCap
<< "textureThreshold" << params.textureThreshold
<< "uniquenessRatio" << params.uniquenessRatio;
}
void read(const FileNode& fn)
{
FileNode n = fn["name"];
CV_Assert(n.isString() && String(n) == name_);
params.minDisparity = (int)fn["minDisparity"];
params.numDisparities = (int)fn["numDisparities"];
params.SADWindowSize = (int)fn["blockSize"];
params.speckleWindowSize = (int)fn["speckleWindowSize"];
params.speckleRange = (int)fn["speckleRange"];
params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
params.preFilterType = (int)fn["preFilterType"];
params.preFilterSize = (int)fn["preFilterSize"];
params.preFilterCap = (int)fn["preFilterCap"];
params.textureThreshold = (int)fn["textureThreshold"];
params.uniquenessRatio = (int)fn["uniquenessRatio"];
params.roi1 = params.roi2 = Rect();
}
StereoBinaryBMParams params;
Mat preFilteredImg0, preFilteredImg1, cost, dispbuf;
Mat slidingSumBuf;
static const char* name_;
};
const char* StereoBinaryBMImpl::name_ = "StereoMatcher.BM";
Ptr<StereoBinaryBM> StereoBinaryBM::create(int _numDisparities, int _SADWindowSize)
{
return makePtr<StereoBinaryBMImpl>(_numDisparities, _SADWindowSize);
}
}
}
/* End of file. */

@ -0,0 +1,958 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/*
This is a variation of
"Stereo Processing by Semiglobal Matching and Mutual Information"
by Heiko Hirschmuller.
We match blocks rather than individual pixels, thus the algorithm is called
SGBM (Semi-global block matching)
*/
#include "precomp.hpp"
#include <limits.h>
namespace cv
{
namespace stereo
{
typedef uchar PixType;
typedef short CostType;
typedef short DispType;
enum { NR = 16, NR2 = NR/2 };
struct StereoBinarySGBMParams
{
StereoBinarySGBMParams()
{
minDisparity = numDisparities = 0;
SADWindowSize = 0;
P1 = P2 = 0;
disp12MaxDiff = 0;
preFilterCap = 0;
uniquenessRatio = 0;
speckleWindowSize = 0;
speckleRange = 0;
mode = StereoBinarySGBM::MODE_SGBM;
}
StereoBinarySGBMParams( int _minDisparity, int _numDisparities, int _SADWindowSize,
int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
int _mode )
{
minDisparity = _minDisparity;
numDisparities = _numDisparities;
SADWindowSize = _SADWindowSize;
P1 = _P1;
P2 = _P2;
disp12MaxDiff = _disp12MaxDiff;
preFilterCap = _preFilterCap;
uniquenessRatio = _uniquenessRatio;
speckleWindowSize = _speckleWindowSize;
speckleRange = _speckleRange;
mode = _mode;
}
int minDisparity;
int numDisparities;
int SADWindowSize;
int preFilterCap;
int uniquenessRatio;
int P1;
int P2;
int speckleWindowSize;
int speckleRange;
int disp12MaxDiff;
int mode;
};
/*
For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
and for each disparity minD<=d<maxD the function
computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
row1[x] and row2[x-d]. The subpixel algorithm from
"Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
is used, hence the suffix BT.
the temporary buffer should contain width2*2 elements
*/
static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int minD, int maxD, CostType* cost,
PixType* buffer, const PixType* tab,
int tabOfs, int )
{
int x, c, width = img1.cols, cn = img1.channels();
int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
int minX2 = std::max(minX1 - maxD, 0), maxX2 = std::min(maxX1 - minD, width);
int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
tab += tabOfs;
for( c = 0; c < cn*2; c++ )
{
prow1[width*c] = prow1[width*c + width-1] =
prow2[width*c] = prow2[width*c + width-1] = tab[0];
}
int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
if( cn == 1 )
{
for( x = 1; x < width-1; x++ )
{
prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
prow1[x+width] = row1[x];
prow2[width-1-x+width] = row2[x];
}
}
else
{
for( x = 1; x < width-1; x++ )
{
prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
prow1[x+width*3] = row1[x*3];
prow1[x+width*4] = row1[x*3+1];
prow1[x+width*5] = row1[x*3+2];
prow2[width-1-x+width*3] = row2[x*3];
prow2[width-1-x+width*4] = row2[x*3+1];
prow2[width-1-x+width*5] = row2[x*3+2];
}
}
memset( cost, 0, width1*D*sizeof(cost[0]) );
buffer -= minX2;
cost -= minX1*D + minD; // simplify the cost indices inside the loop
#if CV_SSE2
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
#if 1
for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
{
int diff_scale = c < cn ? 0 : 2;
// precompute
// v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
// v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
for( x = minX2; x < maxX2; x++ )
{
int v = prow2[x];
int vl = x > 0 ? (v + prow2[x-1])/2 : v;
int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
int v0 = std::min(vl, vr); v0 = std::min(v0, v);
int v1 = std::max(vl, vr); v1 = std::max(v1, v);
buffer[x] = (PixType)v0;
buffer[x + width2] = (PixType)v1;
}
for( x = minX1; x < maxX1; x++ )
{
int u = prow1[x];
int ul = x > 0 ? (u + prow1[x-1])/2 : u;
int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
int u0 = std::min(ul, ur); u0 = std::min(u0, u);
int u1 = std::max(ul, ur); u1 = std::max(u1, u);
#if CV_SSE2
if( useSIMD )
{
__m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
__m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
__m128i ds = _mm_cvtsi32_si128(diff_scale);
for( int d = minD; d < maxD; d += 16 )
{
__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
__m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
__m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
__m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
__m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
__m128i diff = _mm_min_epu8(c0, c1);
c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
}
}
else
#endif
{
for( int d = minD; d < maxD; d++ )
{
int v = prow2[width-x-1 + d];
int v0 = buffer[width-x-1 + d];
int v1 = buffer[width-x-1 + d + width2];
int c0 = std::max(0, u - v1); c0 = std::max(c0, v0 - u);
int c1 = std::max(0, v - u1); c1 = std::max(c1, u0 - v);
cost[x*D + d] = (CostType)(cost[x*D+d] + (std::min(c0, c1) >> diff_scale));
}
}
}
}
#else
for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
{
for( x = minX1; x < maxX1; x++ )
{
int u = prow1[x];
#if CV_SSE2
if( useSIMD )
{
__m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
for( int d = minD; d < maxD; d += 16 )
{
__m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
__m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
__m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
__m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
_mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
_mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
}
}
else
#endif
{
for( int d = minD; d < maxD; d++ )
{
int v = prow2[width-1-x + d];
cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
}
}
}
}
#endif
}
/*
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
minD <= d < maxD.
disp2full is the reverse disparity map, that is:
disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
note that disp1buf will have the same size as the roi and
disp2full will have the same size as img1 (or img2).
On exit disp2buf is not the final disparity, it is an intermediate result that becomes
final after all the tiles are processed.
the disparity in disp1buf is written with sub-pixel accuracy
(4 fractional bits, see StereoSGBM::DISP_SCALE),
using quadratic interpolation, while the disparity in disp2buf
is written as is, without interpolation.
disp2cost also has the same size as img1 (or img2).
It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
*/
static void computeDisparityBinarySGBM( const Mat& img1, const Mat& img2,
Mat& disp1, const StereoBinarySGBMParams& params,
Mat& buffer )
{
#if CV_SSE2
static const uchar LSBTab[] =
{
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
const int ALIGN = 16;
const int DISP_SHIFT = StereoMatcher::DISP_SHIFT;
const int DISP_SCALE = (1 << DISP_SHIFT);
const CostType MAX_COST = SHRT_MAX;
int minD = params.minDisparity, maxD = minD + params.numDisparities;
Size SADWindowSize;
SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
int ftzero = std::max(params.preFilterCap, 15) | 1;
int uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;
int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
int P1 = params.P1 > 0 ? params.P1 : 2, P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);
int k, width = disp1.cols, height = disp1.rows;
int minX1 = std::max(-maxD, 0), maxX1 = width + std::min(minD, 0);
int D = maxD - minD, width1 = maxX1 - minX1;
int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
bool fullDP = params.mode == StereoBinarySGBM::MODE_HH;
int npasses = fullDP ? 2 : 1;
const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
PixType clipTab[TAB_SIZE];
for( k = 0; k < TAB_SIZE; k++ )
clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
if( minX1 >= maxX1 )
{
disp1 = Scalar::all(INVALID_DISP_SCALED);
return;
}
CV_Assert( D % 16 == 0 );
// NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
// if you change NR, please, modify the loop as well.
int D2 = D+16, NRD2 = NR2*D2;
// the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
// for 8-way dynamic programming we need the current row and
// the previous row, i.e. 2 rows in total
const int NLR = 2;
const int LrBorder = NLR - 1;
// for each possible stereo match (img1(x,y) <=> img2(x-d,y))
// we keep pixel difference cost (C) and the summary cost over NR directions (S).
// we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
size_t costBufSize = width1*D;
size_t CSBufSize = costBufSize*(fullDP ? height : 1);
size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
int hsumBufNRows = SH2*2 + 2;
size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
CSBufSize*2*sizeof(CostType) + // C, S
width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
if( buffer.empty() || !buffer.isContinuous() ||
buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
buffer.create(1, (int)totalBufSize, CV_8U);
// summary cost over different (nDirs) directions
CostType* Cbuf = (CostType*)alignPtr(buffer.ptr(), ALIGN);
CostType* Sbuf = Cbuf + CSBufSize;
CostType* hsumBuf = Sbuf + CSBufSize;
CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
DispType* disp2ptr = (DispType*)(disp2cost + width);
PixType* tempBuf = (PixType*)(disp2ptr + width);
// add P2 to every C(x,y). it saves a few operations in the inner loops
for( k = 0; k < width1*D; k++ )
Cbuf[k] = (CostType)P2;
for( int pass = 1; pass <= npasses; pass++ )
{
int x1, y1, x2, y2, dx, dy;
if( pass == 1 )
{
y1 = 0; y2 = height; dy = 1;
x1 = 0; x2 = width1; dx = 1;
}
else
{
y1 = height-1; y2 = -1; dy = -1;
x1 = width1-1; x2 = -1; dx = -1;
}
CostType *Lr[NLR]={0}, *minLr[NLR]={0};
for( k = 0; k < NLR; k++ )
{
// shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
// and will occasionally use negative indices with the arrays
// we need to shift Lr[k] pointers by 1, to give the space for d=-1.
// however, then the alignment will be imperfect, i.e. bad for SSE,
// thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*LrBorder;
memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
}
for( int y = y1; y != y2; y += dy )
{
int x, d;
DispType* disp1ptr = disp1.ptr<DispType>(y);
CostType* C = Cbuf + (!fullDP ? 0 : y*costBufSize);
CostType* S = Sbuf + (!fullDP ? 0 : y*costBufSize);
if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
{
int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
for( k = dy1; k <= dy2; k++ )
{
CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;
if( k < height )
{
calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
memset(hsumAdd, 0, D*sizeof(CostType));
for( x = 0; x <= SW2*D; x += D )
{
int scale = x == 0 ? SW2 + 1 : 1;
for( d = 0; d < D; d++ )
hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
}
if( y > 0 )
{
const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
const CostType* Cprev = !fullDP || y == 0 ? C : C - costBufSize;
for( x = D; x < width1*D; x += D )
{
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
#if CV_SSE2
if( useSIMD )
{
for( d = 0; d < D; d += 8 )
{
__m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
__m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
hv = _mm_adds_epi16(_mm_subs_epi16(hv,
_mm_load_si128((const __m128i*)(pixSub + d))),
_mm_load_si128((const __m128i*)(pixAdd + d)));
Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
_mm_load_si128((const __m128i*)(hsumSub + x + d))),
hv);
_mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
_mm_store_si128((__m128i*)(C + x + d), Cx);
}
}
else
#endif
{
for( d = 0; d < D; d++ )
{
int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
}
}
}
}
else
{
for( x = D; x < width1*D; x += D )
{
const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
for( d = 0; d < D; d++ )
hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
}
}
}
if( y == 0 )
{
int scale = k == 0 ? SH2 + 1 : 1;
for( x = 0; x < width1*D; x++ )
C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
}
}
// also, clear the S buffer
for( k = 0; k < width1*D; k++ )
S[k] = 0;
}
// clear the left and the right borders
memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
/*
[formula 13 in the paper]
compute L_r(p, d) = C(p, d) +
min(L_r(p-r, d),
L_r(p-r, d-1) + P1,
L_r(p-r, d+1) + P1,
min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
where p = (x,y), r is one of the directions.
we process all the directions at once:
0: r=(-dx, 0)
1: r=(-1, -dy)
2: r=(0, -dy)
3: r=(1, -dy)
4: r=(-2, -dy)
5: r=(-1, -dy*2)
6: r=(1, -dy*2)
7: r=(2, -dy)
*/
for( x = x1; x != x2; x += dx )
{
int xm = x*NR2, xd = xm*D2;
int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
CostType* Lr_p2 = Lr[1] + xd + D2*2;
CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
CostType* Lr_p = Lr[0] + xd;
const CostType* Cp = C + x*D;
CostType* Sp = S + x*D;
#if CV_SSE2
if( useSIMD )
{
__m128i _P1 = _mm_set1_epi16((short)P1);
__m128i _delta0 = _mm_set1_epi16((short)delta0);
__m128i _delta1 = _mm_set1_epi16((short)delta1);
__m128i _delta2 = _mm_set1_epi16((short)delta2);
__m128i _delta3 = _mm_set1_epi16((short)delta3);
__m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
for( d = 0; d < D; d += 8 )
{
__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
__m128i L0, L1, L2, L3;
L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
L0 = _mm_min_epi16(L0, _delta0);
L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
L1 = _mm_min_epi16(L1, _delta1);
L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
L2 = _mm_min_epi16(L2, _delta2);
L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
L3 = _mm_min_epi16(L3, _delta3);
L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
_mm_store_si128( (__m128i*)(Lr_p + d), L0);
_mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
_mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
_mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
__m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
__m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
_minL0 = _mm_min_epi16(_minL0, t0);
__m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
L0 = _mm_adds_epi16(L0, L1);
L2 = _mm_adds_epi16(L2, L3);
Sval = _mm_adds_epi16(Sval, L0);
Sval = _mm_adds_epi16(Sval, L2);
_mm_store_si128((__m128i*)(Sp + d), Sval);
}
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
_mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
}
else
#endif
{
int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
for( d = 0; d < D; d++ )
{
int Cpd = Cp[d], L0, L1, L2, L3;
L0 = Cpd + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
L1 = Cpd + std::min((int)Lr_p1[d], std::min(Lr_p1[d-1] + P1, std::min(Lr_p1[d+1] + P1, delta1))) - delta1;
L2 = Cpd + std::min((int)Lr_p2[d], std::min(Lr_p2[d-1] + P1, std::min(Lr_p2[d+1] + P1, delta2))) - delta2;
L3 = Cpd + std::min((int)Lr_p3[d], std::min(Lr_p3[d-1] + P1, std::min(Lr_p3[d+1] + P1, delta3))) - delta3;
Lr_p[d] = (CostType)L0;
minL0 = std::min(minL0, L0);
Lr_p[d + D2] = (CostType)L1;
minL1 = std::min(minL1, L1);
Lr_p[d + D2*2] = (CostType)L2;
minL2 = std::min(minL2, L2);
Lr_p[d + D2*3] = (CostType)L3;
minL3 = std::min(minL3, L3);
Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
}
minLr[0][xm] = (CostType)minL0;
minLr[0][xm+1] = (CostType)minL1;
minLr[0][xm+2] = (CostType)minL2;
minLr[0][xm+3] = (CostType)minL3;
}
}
if( pass == npasses )
{
for( x = 0; x < width; x++ )
{
disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
disp2cost[x] = MAX_COST;
}
for( x = width1 - 1; x >= 0; x-- )
{
CostType* Sp = S + x*D;
int minS = MAX_COST, bestDisp = -1;
if( npasses == 1 )
{
int xm = x*NR2, xd = xm*D2;
int minL0 = MAX_COST;
int delta0 = minLr[0][xm + NR2] + P2;
CostType* Lr_p0 = Lr[0] + xd + NRD2;
Lr_p0[-1] = Lr_p0[D] = MAX_COST;
CostType* Lr_p = Lr[0] + xd;
const CostType* Cp = C + x*D;
#if CV_SSE2
if( useSIMD )
{
__m128i _P1 = _mm_set1_epi16((short)P1);
__m128i _delta0 = _mm_set1_epi16((short)delta0);
__m128i _minL0 = _mm_set1_epi16((short)minL0);
__m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
__m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
for( d = 0; d < D; d += 8 )
{
__m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
L0 = _mm_min_epi16(L0, _delta0);
L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
_mm_store_si128((__m128i*)(Lr_p + d), L0);
_minL0 = _mm_min_epi16(_minL0, L0);
L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
_mm_store_si128((__m128i*)(Sp + d), L0);
__m128i mask = _mm_cmpgt_epi16(_minS, L0);
_minS = _mm_min_epi16(_minS, L0);
_bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
_d8 = _mm_adds_epi16(_d8, _8);
}
short CV_DECL_ALIGNED(16) bestDispBuf[8];
_mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
_minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
__m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
minS = (CostType)_mm_cvtsi128_si32(qS);
qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
qS = _mm_cmpeq_epi16(_minS, qS);
int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
bestDisp = bestDispBuf[LSBTab[idx]];
}
else
#endif
{
for( d = 0; d < D; d++ )
{
int L0 = Cp[d] + std::min((int)Lr_p0[d], std::min(Lr_p0[d-1] + P1, std::min(Lr_p0[d+1] + P1, delta0))) - delta0;
Lr_p[d] = (CostType)L0;
minL0 = std::min(minL0, L0);
int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
if( Sval < minS )
{
minS = Sval;
bestDisp = d;
}
}
minLr[0][xm] = (CostType)minL0;
}
}
else
{
for( d = 0; d < D; d++ )
{
int Sval = Sp[d];
if( Sval < minS )
{
minS = Sval;
bestDisp = d;
}
}
}
for( d = 0; d < D; d++ )
{
if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
break;
}
if( d < D )
continue;
d = bestDisp;
int _x2 = x + minX1 - d - minD;
if( disp2cost[_x2] > minS )
{
disp2cost[_x2] = (CostType)minS;
disp2ptr[_x2] = (DispType)(d + minD);
}
if( 0 < d && d < D-1 )
{
// do subpixel quadratic interpolation:
// fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
// then find minimum of the parabola.
int denom2 = std::max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
}
else
d *= DISP_SCALE;
disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
}
for( x = minX1; x < maxX1; x++ )
{
// we round the computed disparity both towards -inf and +inf and check
// if either of the corresponding disparities in disp2 is consistent.
// This is to give the computed disparity a chance to look valid if it is.
int d1 = disp1ptr[x];
if( d1 == INVALID_DISP_SCALED )
continue;
int _d = d1 >> DISP_SHIFT;
int d_ = (d1 + DISP_SCALE-1) >> DISP_SHIFT;
int _x = x - _d, x_ = x - d_;
if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
}
}
// now shift the cyclic buffers
std::swap( Lr[0], Lr[1] );
std::swap( minLr[0], minLr[1] );
}
}
}
class StereoBinarySGBMImpl : public StereoBinarySGBM
{
public:
StereoBinarySGBMImpl()
{
params = StereoBinarySGBMParams();
}
StereoBinarySGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,
int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
int _mode )
{
params = StereoBinarySGBMParams( _minDisparity, _numDisparities, _SADWindowSize,
_P1, _P2, _disp12MaxDiff, _preFilterCap,
_uniquenessRatio, _speckleWindowSize, _speckleRange,
_mode );
}
void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )
{
Mat left = leftarr.getMat(), right = rightarr.getMat();
CV_Assert( left.size() == right.size() && left.type() == right.type() &&
left.depth() == CV_8U );
disparr.create( left.size(), CV_16S );
Mat disp = disparr.getMat();
computeDisparityBinarySGBM( left, right, disp, params, buffer );
medianBlur(disp, disp, 3);
if( params.speckleWindowSize > 0 )
filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,
StereoMatcher::DISP_SCALE*params.speckleRange, buffer);
}
int getMinDisparity() const { return params.minDisparity; }
void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }
int getNumDisparities() const { return params.numDisparities; }
void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }
int getBlockSize() const { return params.SADWindowSize; }
void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }
int getSpeckleWindowSize() const { return params.speckleWindowSize; }
void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }
int getSpeckleRange() const { return params.speckleRange; }
void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }
int getDisp12MaxDiff() const { return params.disp12MaxDiff; }
void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }
int getPreFilterCap() const { return params.preFilterCap; }
void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }
int getUniquenessRatio() const { return params.uniquenessRatio; }
void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }
int getP1() const { return params.P1; }
void setP1(int P1) { params.P1 = P1; }
int getP2() const { return params.P2; }
void setP2(int P2) { params.P2 = P2; }
int getMode() const { return params.mode; }
void setMode(int mode) { params.mode = mode; }
void write(FileStorage& fs) const
{
fs << "name" << name_
<< "minDisparity" << params.minDisparity
<< "numDisparities" << params.numDisparities
<< "blockSize" << params.SADWindowSize
<< "speckleWindowSize" << params.speckleWindowSize
<< "speckleRange" << params.speckleRange
<< "disp12MaxDiff" << params.disp12MaxDiff
<< "preFilterCap" << params.preFilterCap
<< "uniquenessRatio" << params.uniquenessRatio
<< "P1" << params.P1
<< "P2" << params.P2
<< "mode" << params.mode;
}
void read(const FileNode& fn)
{
FileNode n = fn["name"];
CV_Assert( n.isString() && String(n) == name_ );
params.minDisparity = (int)fn["minDisparity"];
params.numDisparities = (int)fn["numDisparities"];
params.SADWindowSize = (int)fn["blockSize"];
params.speckleWindowSize = (int)fn["speckleWindowSize"];
params.speckleRange = (int)fn["speckleRange"];
params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
params.preFilterCap = (int)fn["preFilterCap"];
params.uniquenessRatio = (int)fn["uniquenessRatio"];
params.P1 = (int)fn["P1"];
params.P2 = (int)fn["P2"];
params.mode = (int)fn["mode"];
}
StereoBinarySGBMParams params;
Mat buffer;
static const char* name_;
};
const char* StereoBinarySGBMImpl::name_ = "StereoMatcher.SGBM";
Ptr<StereoBinarySGBM> StereoBinarySGBM::create(int minDisparity, int numDisparities, int SADWindowSize,
int P1, int P2, int disp12MaxDiff,
int preFilterCap, int uniquenessRatio,
int speckleWindowSize, int speckleRange,
int mode)
{
return Ptr<StereoBinarySGBM>(
new StereoBinarySGBMImpl(minDisparity, numDisparities, SADWindowSize,
P1, P2, disp12MaxDiff,
preFilterCap, uniquenessRatio,
speckleWindowSize, speckleRange,
mode));
}
typedef cv::Point_<short> Point2s;
}
}

@ -0,0 +1,45 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
CV_TEST_MAIN("cv")

@ -0,0 +1,31 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_TEST_PRECOMP_HPP__
#define __OPENCV_TEST_PRECOMP_HPP__
#include <iostream>
#include "opencv2/ts.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/stereo.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/cvdef.h"
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"
#include <algorithm>
#include <cmath>
#endif

@ -110,9 +110,22 @@ int main(int argc, char** argv)
tick2 = cv::getTickCount();
cout << endl << "PPF Elapsed Time " <<
(tick2-tick1)/cv::getTickFrequency() << " sec" << endl;
// Get only first N results
int N = 2;
//check results size from match call above
size_t results_size = results.size();
cout << "Number of matching poses: " << results_size;
if (results_size == 0) {
cout << endl << "No matching poses found. Exiting." << endl;
exit(0);
}
// Get only first N results - but adjust to results size if num of results are less than that specified by N
size_t N = 2;
if (results_size < N) {
cout << endl << "Reducing matching poses to be reported (as specified in code): "
<< N << " to the number of matches found: " << results_size << endl;
N = results_size;
}
vector<Pose3DPtr> resultsSub(results.begin(),results.begin()+N);
// Create an instance of ICP

@ -375,7 +375,7 @@ void PPF3DDetector::clusterPoses(std::vector<Pose3DPtr> poseList, int numPoses,
#pragma omp parallel for
#endif
// uses weighting by the number of votes
for (size_t i=0; i<poseClusters.size(); i++)
for (int i=0; i<static_cast<int>(poseClusters.size()); i++)
{
// We could only average the quaternions. So I will make use of them here
double qAvg[4]={0}, tAvg[3]={0};
@ -426,7 +426,7 @@ void PPF3DDetector::clusterPoses(std::vector<Pose3DPtr> poseList, int numPoses,
#if defined _OPENMP
#pragma omp parallel for
#endif
for (size_t i=0; i<poseClusters.size(); i++)
for (int i=0; i<static_cast<int>(poseClusters.size()); i++)
{
// We could only average the quaternions. So I will make use of them here
double qAvg[4]={0}, tAvg[3]={0};

@ -240,6 +240,119 @@ types.
*/
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
/* OCR BeamSearch Decoder */
/** @brief OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
@note
- (C++) An example on using OCRBeamSearchDecoder recognition combined with scene text detection can
be found at the demo sample:
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
*/
class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
{
public:
/** @brief Callback with the character classifier is made a class.
This way it hides the feature extractor and the classifier itself, so developers can write
their own OCR code.
The default character classifier and feature extractor can be loaded using the utility funtion
loadOCRBeamSearchClassifierCNN with all its parameters provided in
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
*/
class CV_EXPORTS ClassifierCallback
{
public:
virtual ~ClassifierCallback() { }
/** @brief The character classifier must return a (ranked list of) class(es) id('s)
@param image Input image CV_8UC1 or CV_8UC3 with a single letter.
@param recognition_probabilities For each of the N characters found the classifier returns a list with
class probabilities for each class.
@param oversegmentation The classifier returns a list of N+1 character locations' x-coordinates,
including 0 as start-sequence location.
*/
virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
};
public:
/** @brief Recognize text using Beam Search.
Takes image on input and returns recognized text in the output_text parameter. Optionally
provides also the Rects for individual text elements found (e.g. words), and the list of those
text elements with their confidence values.
@param image Input image CV_8UC1 with a single text line (or word).
@param output_text Output text. Most likely character sequence found by the HMM decoder.
@param component_rects If provided the method will output a list of Rects for the individual
text elements found (e.g. words).
@param component_texts If provided the method will output a list of text strings for the
recognition of individual text elements found (e.g. words).
@param component_confidences If provided the method will output a list of confidence values
for the recognition of individual text elements found (e.g. words).
@param component_level Only OCR_LEVEL_WORD is supported.
*/
virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
int component_level=0);
/** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
@param classifier The character classifier with built in feature extractor.
@param vocabulary The language vocabulary (chars when ascii english text). vocabulary.size()
must be equal to the number of classes of the classifier.
@param transition_probabilities_table Table with transition probabilities between character
pairs. cols == rows == vocabulary.size().
@param emission_probabilities_table Table with observation emission probabilities. cols ==
rows == vocabulary.size().
@param mode HMM Decoding algorithm. Only OCR_DECODER_VITERBI is available for the moment
(<http://en.wikipedia.org/wiki/Viterbi_algorithm>).
@param beam_size Size of the beam in Beam Search algorithm.
*/
static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
const std::string& vocabulary, // The language vocabulary (chars when ascii english text)
// size() must be equal to the number of classes
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
// cols == rows == vocabulari.size()
InputArray emission_probabilities_table, // Table with observation emission probabilities
// cols == rows == vocabulari.size()
decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
int beam_size = 50); // Size of the beam in Beam Search algorithm
protected:
Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
std::string vocabulary;
Mat transition_p;
Mat emission_p;
decoder_mode mode;
int beam_size;
};
/** @brief Allow to implicitly load the default character classifier when creating an OCRBeamSearchDecoder object.
@param filename The XML or YAML file with the classifier model (e.g. OCRHMM_knn_model_data.xml)
The default classifier is based in the scene text recognition method proposed by Adam Coates &
Andrew NG in [Coates11a]. The character classifier sonsists in a Single Layer Convolutional Neural Network and
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
at each window location.
*/
CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
//! @}
}

@ -0,0 +1,66 @@
/*
* textdetection.cpp
*
* A demo program of End-to-end Scene Text Detection and Recognition:
* Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:
* Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
*
* Created on: Jul 31, 2014
* Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
*/
#include "opencv2/text.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include <iostream>
using namespace std;
using namespace cv;
using namespace cv::text;
//Perform text recognition in a given cropped word
int main(int argc, char* argv[])
{
cout << endl << argv[0] << endl << endl;
cout << "A demo program of Scene Text cropped word Recognition: " << endl;
cout << "Shows the use of the OCRBeamSearchDecoder class using the Single Layer CNN character classifier described in:" << endl;
cout << "Coates, Adam, et al. \"Text detection and character recognition in scene images with unsupervised feature learning.\" ICDAR 2011." << endl << endl;
Mat image;
if(argc>1)
image = imread(argv[1]);
else
{
cout << " Usage: " << argv[0] << " <input_image>" << endl << endl;
return(0);
}
Mat transition_p;
string filename = "OCRHMM_transitions_table.xml"; // TODO this table was done with a different vocabulary order?
// TODO add a new function in ocr.cpp to create transition tab
// for a given lexicon
FileStorage fs(filename, FileStorage::READ);
fs["transition_probabilities"] >> transition_p;
fs.release();
Mat emission_p = Mat::eye(62,62,CV_64FC1);
string voc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyx0123456789";
Ptr<OCRBeamSearchDecoder> ocr = OCRBeamSearchDecoder::create(
loadOCRBeamSearchClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
voc, transition_p, emission_p);
double t_r = (double)getTickCount();
string output;
vector<Rect> boxes;
vector<string> words;
vector<float> confidences;
ocr->run(image, output, &boxes, &words, &confidences, OCR_LEVEL_WORD);
cout << "OCR output = \"" << output << "\". Decoded in "
<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl << endl;
return 0;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

@ -287,7 +287,11 @@ void ERFilterNM::er_tree_extract( InputArray image )
quads[1][3] = (1<<3)|(1<<2)|(1<<1);
quads[2][0] = (1<<2)|(1<<1);
quads[2][1] = (1<<3)|(1);
quads[2][3] = 255;
// quads[2][2] and quads[2][3] are never used so no need to initialize them.
// The four lowest bits in each quads[i][j] correspond to the 2x2 binary patterns
// Q_1, Q_2, Q_3 in the Neumann and Matas CVPR 2012 paper
// (see in page 4 at the end of first column).
// Q_1 and Q_2 have four patterns, while Q_3 has only two.
// masks to know if a pixel is accessible and if it has been already added to some region

@ -0,0 +1,656 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/ml.hpp"
#include <iostream>
#include <fstream>
#include <set>
namespace cv
{
namespace text
{
using namespace std;
using namespace cv::ml;
/* OCR BeamSearch Decoder */
void OCRBeamSearchDecoder::run(Mat& image, string& output_text, vector<Rect>* component_rects,
vector<string>* component_texts, vector<float>* component_confidences,
int component_level)
{
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
CV_Assert( (component_level == OCR_LEVEL_TEXTLINE) || (component_level == OCR_LEVEL_WORD) );
output_text.clear();
if (component_rects != NULL)
component_rects->clear();
if (component_texts != NULL)
component_texts->clear();
if (component_confidences != NULL)
component_confidences->clear();
}
void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
{
CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
if (!recognition_probabilities.empty())
{
for (size_t i=0; i<recognition_probabilities.size(); i++)
recognition_probabilities[i].clear();
}
recognition_probabilities.clear();
oversegmentation.clear();
}
bool beam_sort_function ( pair< double,vector<int> > i, pair< double,vector<int> > j );
bool beam_sort_function ( pair< double,vector<int> > i, pair< double,vector<int> > j )
{
return (i.first > j.first);
}
class OCRBeamSearchDecoderImpl : public OCRBeamSearchDecoder
{
public:
//Default constructor
OCRBeamSearchDecoderImpl( Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
const string& _vocabulary,
InputArray transition_probabilities_table,
InputArray emission_probabilities_table,
decoder_mode _mode,
int _beam_size)
{
classifier = _classifier;
transition_p = transition_probabilities_table.getMat();
emission_p = emission_probabilities_table.getMat();
vocabulary = _vocabulary;
mode = _mode;
beam_size = _beam_size;
}
~OCRBeamSearchDecoderImpl()
{
}
void run( Mat& src,
string& out_sequence,
vector<Rect>* component_rects,
vector<string>* component_texts,
vector<float>* component_confidences,
int component_level)
{
CV_Assert( (src.type() == CV_8UC1) || (src.type() == CV_8UC3) );
CV_Assert( (src.cols > 0) && (src.rows > 0) );
CV_Assert( component_level == OCR_LEVEL_WORD );
out_sequence.clear();
if (component_rects != NULL)
component_rects->clear();
if (component_texts != NULL)
component_texts->clear();
if (component_confidences != NULL)
component_confidences->clear();
// TODO split a line into words
if(src.type() == CV_8UC3)
{
cvtColor(src,src,COLOR_RGB2GRAY);
}
vector< vector<double> > recognition_probabilities;
vector<int> oversegmentation;
classifier->eval(src, recognition_probabilities, oversegmentation);
/*Now we go here with the beam search algorithm to optimize the recognition score*/
//convert probabilities to log probabilities
for (size_t i=0; i<recognition_probabilities.size(); i++)
{
for (size_t j=0; j<recognition_probabilities[i].size(); j++)
{
if (recognition_probabilities[i][j] == 0)
recognition_probabilities[i][j] = -DBL_MAX;
else
recognition_probabilities[i][j] = log(recognition_probabilities[i][j]);
}
}
for (int i=0; i<transition_p.rows; i++)
{
for (int j=0; j<transition_p.cols; j++)
{
if (transition_p.at<double>(i,j) == 0)
transition_p.at<double>(i,j) = -DBL_MAX;
else
transition_p.at<double>(i,j) = log(transition_p.at<double>(i,j));
}
}
//TODO it would be interesting to have a hash table with a vector of booleans
// but this is not possible when we have a large number of possible segmentations.
//vector<bool> visited_nodes(pow(2,oversegmentation.size()),false); // hash table for visited nodes
// options are using std::set<unsigned long long int> to store only the keys of visited nodes
// but will deteriorate the time performance.
set<unsigned long long int> visited_nodes; //TODO make it member of class
// it is also possible to reduce the number of seg. points in some way (e.g. use only seg.points
// for which there is a change on the class prediction)
vector<int> start_segmentation;
start_segmentation.push_back(oversegmentation[0]);
start_segmentation.push_back(oversegmentation[oversegmentation.size()-1]);
vector< pair< double,vector<int> > > beam;
beam.push_back( pair< double,vector<int> > (score_segmentation(start_segmentation, recognition_probabilities, out_sequence), start_segmentation) );
vector< vector<int> > childs = generate_childs(start_segmentation,oversegmentation, visited_nodes);
if (!childs.empty())
update_beam( beam, childs, recognition_probabilities);
//cout << "beam size " << beam.size() << " best score " << beam[0].first<< endl;
int generated_chids = (int)childs.size();
while (generated_chids != 0)
{
generated_chids = 0;
vector< pair< double,vector<int> > > old_beam = beam;
for (size_t i=0; i<old_beam.size(); i++)
{
childs = generate_childs(old_beam[i].second,oversegmentation, visited_nodes);
if (!childs.empty())
update_beam( beam, childs, recognition_probabilities);
generated_chids += (int)childs.size();
}
//cout << "beam size " << beam.size() << " best score " << beam[0].first << endl;
}
// FINISHED ! Get the best prediction found into out_sequence
score_segmentation(beam[0].second, recognition_probabilities, out_sequence);
// TODO fill other output parameters
return;
}
private:
////////////////////////////////////////////////////////////
// TODO the way we expand nodes makes the recognition score heuristic not monotonic
// it should start from left node 0 and grow always to the right.
vector< vector<int> > generate_childs(vector<int> &segmentation, vector<int> &oversegmentation, set<unsigned long long int> &visited_nodes)
{
/*cout << " generate childs for [";
for (size_t i = 0 ; i < segmentation .size(); i++)
cout << segmentation[i] << ",";
cout << "] ";*/
vector< vector<int> > childs;
for (size_t i=0; i<oversegmentation.size(); i++)
{
int seg_point = oversegmentation[i];
if (find(segmentation.begin(), segmentation.end(), seg_point) == segmentation.end())
{
//cout << seg_point << " " ;
vector<int> child = segmentation;
child.push_back(seg_point);
sort(child.begin(), child.end());
unsigned long long int key = 0;
for (size_t j=0; j<child.size(); j++)
{
key += (unsigned long long int)pow(2,oversegmentation.size()-(oversegmentation.end()-find(oversegmentation.begin(), oversegmentation.end(), child[j])));
}
//if (!visited_nodes[key])
if (visited_nodes.find(key) == visited_nodes.end())
{
childs.push_back(child);
//visited_nodes[key] = true;
visited_nodes.insert(key);
}
}
}
//cout << endl;
return childs;
}
////////////////////////////////////////////////////////////
//TODO shall the beam itself be a member of the class?
void update_beam (vector< pair< double,vector<int> > > &beam, vector< vector<int> > &childs, vector< vector<double> > &recognition_probabilities)
{
string out_sequence;
double min_score = -DBL_MAX; //min score value to be part of the beam
if ((int)beam.size() == beam_size)
min_score = beam[beam.size()-1].first; //last element has the lowest score
for (size_t i=0; i<childs.size(); i++)
{
double score = score_segmentation(childs[i], recognition_probabilities, out_sequence);
if (score > min_score)
{
beam.push_back(pair< double,vector<int> >(score,childs[i]));
sort(beam.begin(),beam.end(),beam_sort_function);
if ((int)beam.size() > beam_size)
{
beam.pop_back();
min_score = beam[beam.size()-1].first;
}
}
}
}
////////////////////////////////////////////////////////////
// TODO Add heuristics to the score function (see PhotoOCR paper)
// e.g.: in some cases we discard a segmentation because it includes a very large character
// in other cases we do it because the overlapping between two chars is too large
// etc.
double score_segmentation(vector<int> &segmentation, vector< vector<double> > &observations, string& outstring)
{
//TODO This must be extracted from dictionary
vector<double> start_p(vocabulary.size());
for (int i=0; i<(int)vocabulary.size(); i++)
start_p[i] = log(1.0/vocabulary.size());
Mat V = Mat::ones((int)segmentation.size()-1,(int)vocabulary.size(),CV_64FC1);
V = V * -DBL_MAX;
vector<string> path(vocabulary.size());
// Initialize base cases (t == 0)
for (int i=0; i<(int)vocabulary.size(); i++)
{
V.at<double>(0,i) = start_p[i] + observations[segmentation[1]-1][i];
path[i] = vocabulary.at(i);
}
// Run Viterbi for t > 0
for (int t=1; t<(int)segmentation.size()-1; t++)
{
vector<string> newpath(vocabulary.size());
for (int i=0; i<(int)vocabulary.size(); i++)
{
double max_prob = -DBL_MAX;
int best_idx = 0;
for (int j=0; j<(int)vocabulary.size(); j++)
{
double prob = V.at<double>(t-1,j) + transition_p.at<double>(j,i) + observations[segmentation[t+1]-1][i];
if ( prob > max_prob)
{
max_prob = prob;
best_idx = j;
}
}
V.at<double>(t,i) = max_prob;
newpath[i] = path[best_idx] + vocabulary.at(i);
}
// Don't need to remember the old paths
path.swap(newpath);
}
double max_prob = -DBL_MAX;
int best_idx = 0;
for (int i=0; i<(int)vocabulary.size(); i++)
{
double prob = V.at<double>((int)segmentation.size()-2,i);
if ( prob > max_prob)
{
max_prob = prob;
best_idx = i;
}
}
//cout << " score " << max_prob / (segmentation.size()-1) << " " << path[best_idx] << endl;
outstring = path[best_idx];
return max_prob / (segmentation.size()-1);
}
};
Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
const string& _vocabulary,
InputArray transition_p,
InputArray emission_p,
decoder_mode _mode,
int _beam_size)
{
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
}
class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
{
public:
//constructor
OCRBeamSearchClassifierCNN(const std::string& filename);
// Destructor
~OCRBeamSearchClassifierCNN() {}
void eval( InputArray src, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation );
protected:
void normalizeAndZCA(Mat& patches);
double eval_feature(Mat& feature, double* prob_estimates);
private:
//TODO implement getters/setters for some of these members (if apply)
int nr_class; // number of classes
int nr_feature; // number of features
Mat feature_min; // scale range
Mat feature_max;
Mat weights; // Logistic Regression weights
Mat kernels; // CNN kernels
Mat M, P; // ZCA Whitening parameters
int step_size; // sliding window step
int window_size; // window size
int quad_size;
int patch_size;
int num_quads; // extract 25 quads (12x12) from each image
int num_tiles; // extract 25 patches (8x8) from each quad
double alpha; // used in non-linear activation function z = max(0, |D*a| - alpha)
};
OCRBeamSearchClassifierCNN::OCRBeamSearchClassifierCNN (const string& filename)
{
if (ifstream(filename.c_str()))
{
FileStorage fs(filename, FileStorage::READ);
// Load kernels bank and withenning params
fs["kernels"] >> kernels;
fs["M"] >> M;
fs["P"] >> P;
// Load Logistic Regression weights
fs["weights"] >> weights;
// Load feature scaling ranges
fs["feature_min"] >> feature_min;
fs["feature_max"] >> feature_max;
fs.release();
// TODO check all matrix dimensions match correctly and no one is empty
}
else
CV_Error(Error::StsBadArg, "Default classifier data file not found!");
nr_feature = weights.rows;
nr_class = weights.cols;
// TODO some of this can be inferred from the input file (e.g. patch size must be sqrt(filters.cols))
step_size = 4;
window_size = 32;
quad_size = 12;
patch_size = 8;
num_quads = 25;
num_tiles = 25;
alpha = 0.5;
}
void OCRBeamSearchClassifierCNN::eval( InputArray _src, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
{
CV_Assert(( _src.getMat().type() == CV_8UC3 ) || ( _src.getMat().type() == CV_8UC1 ));
if (!recognition_probabilities.empty())
{
for (size_t i=0; i<recognition_probabilities.size(); i++)
recognition_probabilities[i].clear();
}
recognition_probabilities.clear();
oversegmentation.clear();
Mat src = _src.getMat();
if(src.type() == CV_8UC3)
{
cvtColor(src,src,COLOR_RGB2GRAY);
}
// TODO shall we resize the input image or make a copy ?
resize(src,src,Size(window_size*src.cols/src.rows,window_size));
int seg_points = 0;
oversegmentation.push_back(seg_points);
Mat quad;
Mat tmp;
Mat img;
// begin sliding window loop foreach detection window
for (int x_c=0; x_c<=src.cols-window_size; x_c=x_c+step_size)
{
img = src(Rect(Point(x_c,0),Size(window_size,window_size)));
int patch_count = 0;
vector< vector<double> > data_pool(9);
int quad_id = 1;
for (int q_x=0; q_x<=window_size-quad_size; q_x=q_x+(quad_size/2-1))
{
for (int q_y=0; q_y<=window_size-quad_size; q_y=q_y+(quad_size/2-1))
{
Rect quad_rect = Rect(q_x,q_y,quad_size,quad_size);
quad = img(quad_rect);
//start sliding window (8x8) in each tile and store the patch as row in data_pool
for (int w_x=0; w_x<=quad_size-patch_size; w_x++)
{
for (int w_y=0; w_y<=quad_size-patch_size; w_y++)
{
quad(Rect(w_x,w_y,patch_size,patch_size)).copyTo(tmp);
tmp = tmp.reshape(0,1);
tmp.convertTo(tmp, CV_64F);
normalizeAndZCA(tmp);
vector<double> patch;
tmp.copyTo(patch);
if ((quad_id == 1)||(quad_id == 2)||(quad_id == 6)||(quad_id == 7))
data_pool[0].insert(data_pool[0].end(),patch.begin(),patch.end());
if ((quad_id == 2)||(quad_id == 7)||(quad_id == 3)||(quad_id == 8)||(quad_id == 4)||(quad_id == 9))
data_pool[1].insert(data_pool[1].end(),patch.begin(),patch.end());
if ((quad_id == 4)||(quad_id == 9)||(quad_id == 5)||(quad_id == 10))
data_pool[2].insert(data_pool[2].end(),patch.begin(),patch.end());
if ((quad_id == 6)||(quad_id == 11)||(quad_id == 16)||(quad_id == 7)||(quad_id == 12)||(quad_id == 17))
data_pool[3].insert(data_pool[3].end(),patch.begin(),patch.end());
if ((quad_id == 7)||(quad_id == 12)||(quad_id == 17)||(quad_id == 8)||(quad_id == 13)||(quad_id == 18)||(quad_id == 9)||(quad_id == 14)||(quad_id == 19))
data_pool[4].insert(data_pool[4].end(),patch.begin(),patch.end());
if ((quad_id == 9)||(quad_id == 14)||(quad_id == 19)||(quad_id == 10)||(quad_id == 15)||(quad_id == 20))
data_pool[5].insert(data_pool[5].end(),patch.begin(),patch.end());
if ((quad_id == 16)||(quad_id == 21)||(quad_id == 17)||(quad_id == 22))
data_pool[6].insert(data_pool[6].end(),patch.begin(),patch.end());
if ((quad_id == 17)||(quad_id == 22)||(quad_id == 18)||(quad_id == 23)||(quad_id == 19)||(quad_id == 24))
data_pool[7].insert(data_pool[7].end(),patch.begin(),patch.end());
if ((quad_id == 19)||(quad_id == 24)||(quad_id == 20)||(quad_id == 25))
data_pool[8].insert(data_pool[8].end(),patch.begin(),patch.end());
patch_count++;
}
}
quad_id++;
}
}
//do dot product of each normalized and whitened patch
//each pool is averaged and this yields a representation of 9xD
Mat feature = Mat::zeros(9,kernels.rows,CV_64FC1);
for (int i=0; i<9; i++)
{
Mat pool = Mat(data_pool[i]);
pool = pool.reshape(0,(int)data_pool[i].size()/kernels.cols);
for (int p=0; p<pool.rows; p++)
{
for (int f=0; f<kernels.rows; f++)
{
feature.row(i).at<double>(0,f) = feature.row(i).at<double>(0,f) + max(0.0,std::abs(pool.row(p).dot(kernels.row(f)))-alpha);
}
}
}
feature = feature.reshape(0,1);
// data must be normalized within the range obtained during training
double lower = -1.0;
double upper = 1.0;
for (int k=0; k<feature.cols; k++)
{
feature.at<double>(0,k) = lower + (upper-lower) *
(feature.at<double>(0,k)-feature_min.at<double>(0,k))/
(feature_max.at<double>(0,k)-feature_min.at<double>(0,k));
}
double *p = new double[nr_class];
double predict_label = eval_feature(feature,p);
//cout << " Prediction: " << vocabulary[predict_label] << " with probability " << p[0] << endl;
if (predict_label < 0) // TODO use cvError
cout << "OCRBeamSearchClassifierCNN::eval Error: unexpected prediction in eval_feature()" << endl;
seg_points++;
oversegmentation.push_back(seg_points);
vector<double> recognition_p(p, p+nr_class*sizeof(double));
recognition_probabilities.push_back(recognition_p);
}
}
// normalize for contrast and apply ZCA whitening to a set of image patches
void OCRBeamSearchClassifierCNN::normalizeAndZCA(Mat& patches)
{
//Normalize for contrast
for (int i=0; i<patches.rows; i++)
{
Scalar row_mean, row_std;
meanStdDev(patches.row(i),row_mean,row_std);
row_std[0] = sqrt(pow(row_std[0],2)*patches.cols/(patches.cols-1)+10);
patches.row(i) = (patches.row(i) - row_mean[0]) / row_std[0];
}
//ZCA whitening
if ((M.dims == 0) || (P.dims == 0))
{
Mat CC;
calcCovarMatrix(patches,CC,M,COVAR_NORMAL|COVAR_ROWS|COVAR_SCALE);
CC = CC * patches.rows / (patches.rows-1);
Mat e_val,e_vec;
eigen(CC.t(),e_val,e_vec);
e_vec = e_vec.t();
sqrt(1./(e_val + 0.1), e_val);
Mat V = Mat::zeros(e_vec.rows, e_vec.cols, CV_64FC1);
Mat D = Mat::eye(e_vec.rows, e_vec.cols, CV_64FC1);
for (int i=0; i<e_vec.cols; i++)
{
e_vec.col(e_vec.cols-i-1).copyTo(V.col(i));
D.col(i) = D.col(i) * e_val.at<double>(0,e_val.rows-i-1);
}
P = V * D * V.t();
}
for (int i=0; i<patches.rows; i++)
patches.row(i) = patches.row(i) - M;
patches = patches * P;
}
double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
{
for(int i=0;i<nr_class;i++)
prob_estimates[i] = 0;
for(int idx=0; idx<nr_feature; idx++)
for(int i=0;i<nr_class;i++)
prob_estimates[i] += weights.at<float>(idx,i)*feature.at<double>(0,idx); //TODO use vectorized dot product
int dec_max_idx = 0;
for(int i=1;i<nr_class;i++)
{
if(prob_estimates[i] > prob_estimates[dec_max_idx])
dec_max_idx = i;
}
for(int i=0;i<nr_class;i++)
prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
double sum=0;
for(int i=0; i<nr_class; i++)
sum+=prob_estimates[i];
for(int i=0; i<nr_class; i++)
prob_estimates[i]=prob_estimates[i]/sum;
return dec_max_idx;
}
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
{
return makePtr<OCRBeamSearchClassifierCNN>(filename);
}
}
}

@ -127,7 +127,7 @@ public:
int component_level=0)
{
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC1) );
CV_Assert( (image.type() == CV_8UC1) || (image.type() == CV_8UC3) );
#ifdef HAVE_TESSERACT

@ -0,0 +1,135 @@
/*---------------STEP 1---------------------*/
/* modify this file
* opencv2/tracking/tracker.hpp
* and put several lines of snippet similar to
* the following:
*/
/*------------------------------------------*/
class CV_EXPORTS_W TrackerKCF : public Tracker
{
public:
struct CV_EXPORTS Params
{
Params();
void read( const FileNode& /*fn*/ );
void write( FileStorage& /*fs*/ ) const;
};
/** @brief Constructor
@param parameters KCF parameters TrackerKCF::Params
*/
BOILERPLATE_CODE("KCF",TrackerKCF);
};
/*---------------STEP 2---------------------*/
/* modify this file
* src/tracker.cpp
* add one line in function
* Ptr<Tracker> Tracker::create( const String& trackerType )
*/
/*------------------------------------------*/
Ptr<Tracker> Tracker::create( const String& trackerType )
{
BOILERPLATE_CODE("MIL",TrackerMIL);
BOILERPLATE_CODE("BOOSTING",TrackerBoosting);
BOILERPLATE_CODE("MEDIANFLOW",TrackerMedianFlow);
BOILERPLATE_CODE("TLD",TrackerTLD);
BOILERPLATE_CODE("KCF",TrackerKCF); // add this line!
return Ptr<Tracker>();
}
/*---------------STEP 3---------------------*/
/* make a new file and paste the snippet below
* and modify it according to your needs.
* also make sure to put the LICENSE part.
* src/trackerKCF.cpp
*/
/*------------------------------------------*/
/*---------------------------
| TrackerKCFModel
|---------------------------*/
namespace cv{
/**
* \brief Implementation of TrackerModel for MIL algorithm
*/
class TrackerKCFModel : public TrackerModel{
public:
TrackerKCFModel(TrackerKCF::Params /*params*/){}
~TrackerKCFModel(){}
protected:
void modelEstimationImpl( const std::vector<Mat>& responses ){}
void modelUpdateImpl(){}
};
} /* namespace cv */
/*---------------------------
| TrackerKCF
|---------------------------*/
namespace cv{
/*
* Prototype
*/
class TrackerKCFImpl : public TrackerKCF{
public:
TrackerKCFImpl( const TrackerKCF::Params &parameters = TrackerKCF::Params() );
void read( const FileNode& fn );
void write( FileStorage& fs ) const;
protected:
bool initImpl( const Mat& image, const Rect2d& boundingBox );
bool updateImpl( const Mat& image, Rect2d& boundingBox );
TrackerKCF::Params params;
};
/*
* Constructor
*/
Ptr<TrackerKCF> TrackerKCF::createTracker(const TrackerKCF::Params &parameters){
return Ptr<TrackerKCFImpl>(new TrackerKCFImpl(parameters));
}
TrackerKCFImpl::TrackerKCFImpl( const TrackerKCF::Params &parameters ) :
params( parameters )
{
isInit = false;
}
void TrackerKCFImpl::read( const cv::FileNode& fn ){
params.read( fn );
}
void TrackerKCFImpl::write( cv::FileStorage& fs ) const{
params.write( fs );
}
bool TrackerKCFImpl::initImpl( const Mat& image, const Rect2d& boundingBox ){
model=Ptr<TrackerKCFModel>(new TrackerKCFModel(params));
return true;
}
bool TrackerKCFImpl::updateImpl( const Mat& image, Rect2d& boundingBox ){return true;}
/*
* Parameters
*/
TrackerKCF::Params::Params(){
}
void TrackerKCF::Params::read( const cv::FileNode& fn ){
}
void TrackerKCF::Params::write( cv::FileStorage& fs ) const{
}
} /* namespace cv */

@ -67,3 +67,29 @@
year={2013},
organization={IEEE}
}
@article{KCF,
title = {High-Speed Tracking with Kernelized Correlation Filters},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
author = {Henriques, J. F. and Caseiro, R. and Martins, P. and Batista, J.},
year = {2015},
doi = {10.1109/TPAMI.2014.2345390},
}
@inproceedings{KCF_ECCV,
title = {Exploiting the Circulant Structure of Tracking-by-detection with Kernels},
author = {Henriques, J. F. and Caseiro, R. and Martins, P. and Batista, J.},
booktitle = {proceedings of the European Conference on Computer Vision},
year = {2012},
}
@INPROCEEDINGS{KCF_CN,
author={Danelljan, M. and Khan, F.S. and Felsberg, M. and van de Weijer, J.},
booktitle={Computer Vision and Pattern Recognition (CVPR), 2014 IEEE Conference on},
title={Adaptive Color Attributes for Real-Time Visual Tracking},
year={2014},
month={June},
pages={1090-1097},
keywords={computer vision;feature extraction;image colour analysis;image representation;image sequences;adaptive color attributes;benchmark color sequences;color features;color representations;computer vision;image description;real-time visual tracking;tracking-by-detection framework;Color;Computational modeling;Covariance matrices;Image color analysis;Kernel;Target tracking;Visualization;Adaptive Dimensionality Reduction;Appearance Model;Color Features;Visual Tracking},
doi={10.1109/CVPR.2014.143},
}

@ -1189,6 +1189,60 @@ class CV_EXPORTS_W TrackerTLD : public Tracker
BOILERPLATE_CODE("TLD",TrackerTLD);
};
/** @brief KCF is a novel tracking framework that utilizes properties of circulant matrix to enhance the processing speed.
* This tracking method is an implementation of @cite KCF_ECCV which is extended to KFC with color-names features (@cite KCF_CN).
* The original paper of KCF is available at <http://home.isr.uc.pt/~henriques/circulant/index.html>
* as well as the matlab implementation. For more information about KCF with color-names features, please refer to
* <http://www.cvl.isy.liu.se/research/objrec/visualtracking/colvistrack/index.html>.
*/
class CV_EXPORTS_W TrackerKCF : public Tracker
{
public:
/**
* \brief Feature type to be used in the tracking grayscale, colornames, compressed color-names
* The modes available now:
- "GRAY" -- Use grayscale values as the feature
- "CN" -- Color-names feature
*/
enum MODE {GRAY, CN, CN2};
struct CV_EXPORTS Params
{
/**
* \brief Constructor
*/
Params();
/**
* \brief Read parameters from file, currently unused
*/
void read( const FileNode& /*fn*/ );
/**
* \brief Read parameters from file, currently unused
*/
void write( FileStorage& /*fs*/ ) const;
double sigma; //!< gaussian kernel bandwidth
double lambda; //!< regularization
double interp_factor; //!< linear interpolation factor for adaptation
double output_sigma_factor; //!< spatial bandwidth (proportional to target)
double pca_learning_rate; //!< compression learning rate
bool resize; //!< activate the resize feature to improve the processing speed
bool split_coeff; //!< split the training coefficients into two matrices
bool wrap_kernel; //!< wrap around the kernel values
bool compress_feature; //!< activate the pca method to compress the features
int max_patch_size; //!< threshold for the ROI size
int compressed_size; //!< feature size after compression
MODE descriptor; //!< descriptor type
};
/** @brief Constructor
@param parameters KCF parameters TrackerKCF::Params
*/
BOILERPLATE_CODE("KCF",TrackerKCF);
};
//! @}
} /* namespace cv */

@ -0,0 +1,194 @@
/*----------------------------------------------
* Usage:
* example_tracking_kcf <video_name>
*
* example:
* example_tracking_kcf Bolt/img/%04.jpg
* example_tracking_kcf faceocc2.webm
*--------------------------------------------------*/
#include <opencv2/core/utility.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <cstring>
using namespace std;
using namespace cv;
class BoxExtractor {
public:
Rect2d extract(Mat img);
Rect2d extract(const std::string& windowName, Mat img, bool showCrossair = true);
struct handlerT{
bool isDrawing;
Rect2d box;
Mat image;
// initializer list
handlerT(): isDrawing(false) {};
}params;
private:
static void mouseHandler(int event, int x, int y, int flags, void *param);
void opencv_mouse_callback( int event, int x, int y, int , void *param );
};
int main( int argc, char** argv ){
// show help
if(argc<2){
cout<<
" Usage: example_tracking_kcf <video_name>\n"
" examples:\n"
" example_tracking_kcf Bolt/img/%04.jpg\n"
" example_tracking_kcf faceocc2.webm\n"
<< endl;
return 0;
}
// ROI selector
BoxExtractor box;
// create the tracker
Ptr<Tracker> tracker = Tracker::create( "KCF" );
// set input video
std::string video = argv[1];
VideoCapture cap(video);
Mat frame;
// get bounding box
cap >> frame;
Rect2d roi=box.extract("tracker",frame);
//quit if ROI was not selected
if(roi.width==0 || roi.height==0)
return 0;
// initialize the tracker
tracker->init(frame,roi);
// do the tracking
printf("Start the tracking process, press ESC to quit.\n");
for ( ;; ){
// get frame from the video
cap >> frame;
// stop the program if no more images
if(frame.rows==0 || frame.cols==0)
break;
// update the tracking result
tracker->update(frame,roi);
// draw the tracked object
rectangle( frame, roi, Scalar( 255, 0, 0 ), 2, 1 );
// show image with the tracked object
imshow("tracker",frame);
//quit on ESC button
if(waitKey(1)==27)break;
}
}
void BoxExtractor::mouseHandler(int event, int x, int y, int flags, void *param){
BoxExtractor *self =static_cast<BoxExtractor*>(param);
self->opencv_mouse_callback(event,x,y,flags,param);
}
void BoxExtractor::opencv_mouse_callback( int event, int x, int y, int , void *param ){
handlerT * data = (handlerT*)param;
switch( event ){
// update the selected bounding box
case EVENT_MOUSEMOVE:
if( data->isDrawing ){
data->box.width = x-data->box.x;
data->box.height = y-data->box.y;
}
break;
// start to select the bounding box
case EVENT_LBUTTONDOWN:
data->isDrawing = true;
data->box = cvRect( x, y, 0, 0 );
break;
// cleaning up the selected bounding box
case EVENT_LBUTTONUP:
data->isDrawing = false;
if( data->box.width < 0 ){
data->box.x += data->box.width;
data->box.width *= -1;
}
if( data->box.height < 0 ){
data->box.y += data->box.height;
data->box.height *= -1;
}
break;
}
}
Rect2d BoxExtractor::extract(Mat img){
return extract("Bounding Box Extractor", img);
}
Rect2d BoxExtractor::extract(const std::string& windowName, Mat img, bool showCrossair){
int key=0;
// show the image and give feedback to user
imshow(windowName,img);
printf("Select an object to track and then press SPACE/BACKSPACE/ENTER button!\n");
// copy the data, rectangle should be drawn in the fresh image
params.image=img.clone();
// select the object
setMouseCallback( windowName, mouseHandler, (void *)&params );
// end selection process on SPACE (32) BACKSPACE (27) or ENTER (13)
while(!(key==32 || key==27 || key==13)){
// draw the selected object
rectangle(
params.image,
params.box,
Scalar(255,0,0),2,1
);
// draw cross air in the middle of bounding box
if(showCrossair){
// horizontal line
line(
params.image,
Point((int)params.box.x,(int)(params.box.y+params.box.height/2)),
Point((int)(params.box.x+params.box.width),(int)(params.box.y+params.box.height/2)),
Scalar(255,0,0),2,1
);
// vertical line
line(
params.image,
Point((int)(params.box.x+params.box.width/2),(int)params.box.y),
Point((int)(params.box.x+params.box.width/2),(int)(params.box.y+params.box.height)),
Scalar(255,0,0),2,1
);
}
// show the image bouding box
imshow(windowName,params.image);
// reset the image
params.image=img.clone();
//get keyboard event
key=waitKey(1);
}
return params.box;
}

@ -48,8 +48,8 @@
using namespace std;
using namespace cv;
#define NUM_TEST_FRAMES 500
#define TEST_VIDEO_INDEX 1 //TLD Dataset Video Index from 1-10
#define NUM_TEST_FRAMES 100
#define TEST_VIDEO_INDEX 7 //TLD Dataset Video Index from 1-10
//#define RECORD_VIDEO_FLG
static Mat image;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,133 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
__kernel void NCC(__global const uchar *patch,
__global const uchar *positiveSamples,
__global const uchar *negativeSamples,
__global float *ncc,
int posNum,
int negNum)
{
int id = get_global_id(0);
if (id >= 1000) return;
bool posFlg;
if (id < 500)
posFlg = true;
if (id >= 500)
{
//Negative index
id = id - 500;
posFlg = false;
}
//Variables
int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
float sq1 = 0, sq2 = 0, ares = 0;
int N = 225;
//NCC with positive sample
if (posFlg && id < posNum)
{
for (int i = 0; i < N; i++)
{
s1 += positiveSamples[id * N + i];
s2 += patch[i];
n1 += positiveSamples[id * N + i] * positiveSamples[id * N + i];
n2 += patch[i] * patch[i];
prod += positiveSamples[id * N + i] * patch[i];
}
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
ncc[id] = ares;
}
//NCC with negative sample
if (!posFlg && id < negNum)
{
for (int i = 0; i < N; i++)
{
s1 += negativeSamples[id * N + i];
s2 += patch[i];
n1 += negativeSamples[id * N + i] * negativeSamples[id * N + i];
n2 += patch[i] * patch[i];
prod += negativeSamples[id * N + i] * patch[i];
}
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
ncc[id+500] = ares;
}
}
__kernel void batchNCC(__global const uchar *patches,
__global const uchar *positiveSamples,
__global const uchar *negativeSamples,
__global float *posNcc,
__global float *negNcc,
int posNum,
int negNum,
int patchNum)
{
int id = get_global_id(0);
bool posFlg;
if (id < 500*patchNum)
posFlg = true;
if (id >= 500*patchNum)
{
//Negative index
id = id - 500*patchNum;
posFlg = false;
}
int modelSampleID = id % 500;
int patchID = id / 500;
//Variables
int s1 = 0, s2 = 0, n1 = 0, n2 = 0, prod = 0;
float sq1 = 0, sq2 = 0, ares = 0;
int N = 225;
//NCC with positive sample
if (posFlg && modelSampleID < posNum)
{
for (int i = 0; i < N; i++)
{
s1 += positiveSamples[modelSampleID * N + i];
s2 += patches[patchID*N + i];
n1 += positiveSamples[modelSampleID * N + i] * positiveSamples[modelSampleID * N + i];
n2 += patches[patchID*N + i] * patches[patchID*N + i];
prod += positiveSamples[modelSampleID * N + i] * patches[patchID*N + i];
}
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
posNcc[id] = ares;
}
//NCC with negative sample
if (!posFlg && modelSampleID < negNum)
{
for (int i = 0; i < N; i++)
{
s1 += negativeSamples[modelSampleID * N + i];
s2 += patches[patchID*N + i];
n1 += negativeSamples[modelSampleID * N + i] * negativeSamples[modelSampleID * N + i];
n2 += patches[patchID*N + i] * patches[patchID*N + i];
prod += negativeSamples[modelSampleID * N + i] * patches[patchID*N + i];
}
sq1 = sqrt(max(0.0, n1 - 1.0 * s1 * s1 / N));
sq2 = sqrt(max(0.0, n2 - 1.0 * s2 * s2 / N));
ares = (sq2 == 0) ? sq1 / fabs(sq1) : (prod - s1 * s2 / N) / sq1 / sq2;
negNcc[id] = ares;
}
}

@ -44,5 +44,11 @@
#include "opencv2/tracking.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/ocl.hpp"
namespace cv
{
extern const double ColorNames[][10];
}
#endif

@ -65,19 +65,232 @@ namespace cv
// Calculate Relative similarity of the patch (NN-Model)
double TLDDetector::Sr(const Mat_<uchar>& patch)
{
/*
int64 e1, e2;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
for (int i = 0; i < (int)(*positiveExamples).size(); i++)
splus = std::max(splus, 0.5 * (NCC((*positiveExamples)[i], patch) + 1.0));
for (int i = 0; i < (int)(*negativeExamples).size(); i++)
sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
printf("Sr: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
*/
//int64 e1, e2;
//float t;
//e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
for (int i = 0; i < *posNum; i++)
{
modelSample.data = &(posExp->data[i * 225]);
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
for (int i = 0; i < *negNum; i++)
{
modelSample.data = &(negExp->data[i * 225]);
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sr CPU: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
}
double TLDDetector::ocl_Sr(const Mat_<uchar>& patch)
{
//int64 e1, e2, e3, e4;
//double t;
//e1 = getTickCount();
//e3 = getTickCount();
double splus = 0.0, sminus = 0.0;
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNCC(1, 2*MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
ocl::Kernel k;
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
String error;
ocl::Program prog(src, NULL, error);
k.create("NCC", prog);
if (k.empty())
printf("Kernel create failed!!!\n");
k.args(
ocl::KernelArg::PtrReadOnly(devPatch),
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
ocl::KernelArg::PtrWriteOnly(devNCC),
posNum,
negNum);
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Mem Cpy GPU: %f\n", t);
size_t globSize = 1000;
//e3 = getTickCount();
if (!k.run(1, &globSize, NULL, false))
printf("Kernel Run Error!!!");
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Kernel Run GPU: %f\n", t);
//e3 = getTickCount();
Mat resNCC = devNCC.getMat(ACCESS_READ);
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Read Mem GPU: %f\n", t);
////Compare
//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
//for (int i = 0; i < 200; i+=17)
//{
// modelSample.data = &(posExp->data[i * 225]);
// printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
//}
//for (int i = 0; i < 200; i+=23)
//{
// modelSample.data = &(negExp->data[i * 225]);
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
//}
for (int i = 0; i < *posNum; i++)
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) + 1.0));
for (int i = 0; i < *negNum; i++)
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i+500) +1.0));
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sr GPU: %f\n\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
}
void TLDDetector::ocl_batchSrSc(const Mat_<uchar>& patches, double *resultSr, double *resultSc, int numOfPatches)
{
//int64 e1, e2, e3, e4;
//double t;
//e1 = getTickCount();
//e3 = getTickCount();
UMat devPatches = patches.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devPosNCC(MAX_EXAMPLES_IN_MODEL, numOfPatches, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNegNCC(MAX_EXAMPLES_IN_MODEL, numOfPatches, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
ocl::Kernel k;
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
String error;
ocl::Program prog(src, NULL, error);
k.create("batchNCC", prog);
if (k.empty())
printf("Kernel create failed!!!\n");
k.args(
ocl::KernelArg::PtrReadOnly(devPatches),
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
ocl::KernelArg::PtrWriteOnly(devPosNCC),
ocl::KernelArg::PtrWriteOnly(devNegNCC),
posNum,
negNum,
numOfPatches);
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Mem Cpy GPU: %f\n", t);
// 2 -> Pos&Neg
size_t globSize = 2 * numOfPatches*MAX_EXAMPLES_IN_MODEL;
//e3 = getTickCount();
if (!k.run(1, &globSize, NULL, false))
printf("Kernel Run Error!!!");
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Kernel Run GPU: %f\n", t);
//e3 = getTickCount();
Mat posNCC = devPosNCC.getMat(ACCESS_READ);
Mat negNCC = devNegNCC.getMat(ACCESS_READ);
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Read Mem GPU: %f\n", t);
//Calculate Srs
for (int id = 0; id < numOfPatches; id++)
{
double spr = 0.0, smr = 0.0, spc = 0.0, smc = 0;
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < *posNum; i++)
{
spr = std::max(spr, 0.5 * (posNCC.at<float>(id * 500 + i) + 1.0));
if ((int)(*timeStampsPositive)[i] <= med)
spc = std::max(spr, 0.5 * (posNCC.at<float>(id * 500 + i) + 1.0));
}
for (int i = 0; i < *negNum; i++)
smc = smr = std::max(smr, 0.5 * (negNCC.at<float>(id * 500 + i) + 1.0));
if (spr + smr == 0.0)
resultSr[id] = 0.0;
else
resultSr[id] = spr / (smr + spr);
if (spc + smc == 0.0)
resultSc[id] = 0.0;
else
resultSc[id] = spc / (smc + spc);
}
////Compare positive NCCs
/*Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
Mat_<uchar> patch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
for (int j = 0; j < numOfPatches; j++)
{
for (int i = 0; i < 1; i++)
{
modelSample.data = &(posExp->data[i * 225]);
patch.data = &(patches.data[j * 225]);
printf("%f\t%f\n", resultSr[j], Sr(patch));
printf("%f\t%f\n", resultSc[j], Sc(patch));
}
}*/
//for (int i = 0; i < 200; i+=23)
//{
// modelSample.data = &(negExp->data[i * 225]);
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
//}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sr GPU: %f\n\n", t);
}
// Calculate Conservative similarity of the patch (NN-Model)
double TLDDetector::Sc(const Mat_<uchar>& patch)
{
/*
int64 e1, e2;
float t;
e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < (int)(*positiveExamples).size(); i++)
@ -87,6 +300,117 @@ namespace cv
}
for (int i = 0; i < (int)(*negativeExamples).size(); i++)
sminus = std::max(sminus, 0.5 * (NCC((*negativeExamples)[i], patch) + 1.0));
e2 = getTickCount();
t = (e2 - e1) / getTickFrequency()*1000.0;
printf("Sc: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
*/
//int64 e1, e2;
//double t;
//e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < *posNum; i++)
{
if ((int)(*timeStampsPositive)[i] <= med)
{
modelSample.data = &(posExp->data[i * 225]);
splus = std::max(splus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
}
for (int i = 0; i < *negNum; i++)
{
modelSample.data = &(negExp->data[i * 225]);
sminus = std::max(sminus, 0.5 * (NCC(modelSample, patch) + 1.0));
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sc: %f\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
}
double TLDDetector::ocl_Sc(const Mat_<uchar>& patch)
{
//int64 e1, e2, e3, e4;
//float t;
//e1 = getTickCount();
double splus = 0.0, sminus = 0.0;
//e3 = getTickCount();
UMat devPatch = patch.getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devPositiveSamples = posExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNegativeSamples = negExp->getUMat(ACCESS_READ, USAGE_ALLOCATE_DEVICE_MEMORY);
UMat devNCC(1, 2 * MAX_EXAMPLES_IN_MODEL, CV_32FC1, ACCESS_RW, USAGE_ALLOCATE_DEVICE_MEMORY);
ocl::Kernel k;
ocl::ProgramSource src = ocl::tracking::tldDetector_oclsrc;
String error;
ocl::Program prog(src, NULL, error);
k.create("NCC", prog);
if (k.empty())
printf("Kernel create failed!!!\n");
k.args(
ocl::KernelArg::PtrReadOnly(devPatch),
ocl::KernelArg::PtrReadOnly(devPositiveSamples),
ocl::KernelArg::PtrReadOnly(devNegativeSamples),
ocl::KernelArg::PtrWriteOnly(devNCC),
posNum,
negNum);
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Mem Cpy GPU: %f\n", t);
size_t globSize = 1000;
//e3 = getTickCount();
if (!k.run(1, &globSize, NULL, false))
printf("Kernel Run Error!!!");
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Kernel Run GPU: %f\n", t);
//e3 = getTickCount();
Mat resNCC = devNCC.getMat(ACCESS_READ);
//e4 = getTickCount();
//t = (e4 - e3) / getTickFrequency()*1000.0;
//printf("Read Mem GPU: %f\n", t);
////Compare
//Mat_<uchar> modelSample(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
//for (int i = 0; i < 200; i+=17)
//{
// modelSample.data = &(posExp->data[i * 225]);
// printf("%f\t%f\n\n", resNCC.at<float>(i), NCC(modelSample, patch));
//}
//for (int i = 0; i < 200; i+=23)
//{
// modelSample.data = &(negExp->data[i * 225]);
// printf("%f\t%f\n", resNCC.at<float>(500+i), NCC(modelSample, patch));
//}
int med = getMedian((*timeStampsPositive));
for (int i = 0; i < *posNum; i++)
if ((int)(*timeStampsPositive)[i] <= med)
splus = std::max(splus, 0.5 * (resNCC.at<float>(i) +1.0));
for (int i = 0; i < *negNum; i++)
sminus = std::max(sminus, 0.5 * (resNCC.at<float>(i + 500) + 1.0));
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Sc GPU: %f\n\n", t);
if (splus + sminus == 0.0)
return 0.0;
return splus / (sminus + splus);
@ -129,76 +453,243 @@ namespace cv
}
//Detection - returns most probable new target location (Max Sc)
bool TLDDetector::detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize)
{
patches.clear();
Mat resized_img, blurred_img;
Mat_<uchar> standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
img.copyTo(resized_img);
imgBlurred.copyTo(blurred_img);
Mat tmp;
int dx = initSize.width / 10, dy = initSize.height / 10;
Size2d size = img.size();
double scale = 1.0;
int total = 0, pass = 0;
int npos = 0, nneg = 0;
double tmp = 0, maxSc = -5.0;
double maxSc = -5.0;
Rect2d maxScRect;
int scaleID;
std::vector <Mat> resized_imgs, blurred_imgs;
std::vector <Point> varBuffer, ensBuffer;
std::vector <int> varScaleIDs, ensScaleIDs;
//int64 e1, e2;
//double t;
//e1 = getTickCount();
//Detection part
//Generate windows and filter by variance
scaleID = 0;
resized_imgs.push_back(img);
blurred_imgs.push_back(imgBlurred);
do
{
Mat_<double> intImgP, intImgP2;
computeIntegralImages(resized_img, intImgP, intImgP2);
prepareClassifiers((int)blurred_img.step[0]);
for (int i = 0, imax = cvFloor((0.0 + resized_img.cols - initSize.width) / dx); i < imax; i++)
computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2);
for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++)
{
for (int j = 0, jmax = cvFloor((0.0 + resized_img.rows - initSize.height) / dy); j < jmax; j++)
for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++)
{
LabeledPatch labPatch;
total++;
if (!patchVariance(intImgP, intImgP2, originalVariancePtr, Point(dx * i, dy * j), initSize))
continue;
if (ensembleClassifierNum(&blurred_img.at<uchar>(dy * j, dx * i)) <= ENSEMBLE_THRESHOLD)
continue;
pass++;
varBuffer.push_back(Point(dx * i, dy * j));
varScaleIDs.push_back(scaleID);
}
}
scaleID++;
size.width /= SCALE_STEP;
size.height /= SCALE_STEP;
scale *= SCALE_STEP;
resize(img, tmp, size, 0, 0, DOWNSCALE_MODE);
resized_imgs.push_back(tmp);
GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f);
blurred_imgs.push_back(tmp);
} while (size.width >= initSize.width && size.height >= initSize.height);
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Variance: %d\t%f\n", varBuffer.size(), t);
//Encsemble classification
//e1 = getTickCount();
for (int i = 0; i < (int)varBuffer.size(); i++)
{
prepareClassifiers(static_cast<int> (blurred_imgs[varScaleIDs[i]].step[0]));
if (ensembleClassifierNum(&blurred_imgs[varScaleIDs[i]].at<uchar>(varBuffer[i].y, varBuffer[i].x)) <= ENSEMBLE_THRESHOLD)
continue;
ensBuffer.push_back(varBuffer[i]);
ensScaleIDs.push_back(varScaleIDs[i]);
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Ensemble: %d\t%f\n", ensBuffer.size(), t);
labPatch.rect = Rect2d(dx * i * scale, dy * j * scale, initSize.width * scale, initSize.height * scale);
resample(resized_img, Rect2d(Point(dx * i, dy * j), initSize), standardPatch);
tmp = Sr(standardPatch);
//NN classification
//e1 = getTickCount();
for (int i = 0; i < (int)ensBuffer.size(); i++)
{
LabeledPatch labPatch;
double curScale = pow(SCALE_STEP, ensScaleIDs[i]);
labPatch.rect = Rect2d(ensBuffer[i].x*curScale, ensBuffer[i].y*curScale, initSize.width * curScale, initSize.height * curScale);
resample(resized_imgs[ensScaleIDs[i]], Rect2d(ensBuffer[i], initSize), standardPatch);
////To fix: Check the paper, probably this cause wrong learning
//
labPatch.isObject = tmp > THETA_NN;
labPatch.shouldBeIntegrated = abs(tmp - THETA_NN) < 0.1;
patches.push_back(labPatch);
//
double srValue, scValue;
srValue = Sr(standardPatch);
if (!labPatch.isObject)
{
nneg++;
////To fix: Check the paper, probably this cause wrong learning
//
labPatch.isObject = srValue > THETA_NN;
labPatch.shouldBeIntegrated = abs(srValue - THETA_NN) < 0.1;
patches.push_back(labPatch);
//
if (!labPatch.isObject)
{
nneg++;
continue;
}
else
{
npos++;
}
scValue = Sc(standardPatch);
if (scValue > maxSc)
{
maxSc = scValue;
maxScRect = labPatch.rect;
}
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("NN: %d\t%f\n", patches.size(), t);
if (maxSc < 0)
return false;
res = maxScRect;
return true;
}
bool TLDDetector::ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize)
{
patches.clear();
Mat_<uchar> standardPatch(STANDARD_PATCH_SIZE, STANDARD_PATCH_SIZE);
Mat tmp;
int dx = initSize.width / 10, dy = initSize.height / 10;
Size2d size = img.size();
double scale = 1.0;
int npos = 0, nneg = 0;
double maxSc = -5.0;
Rect2d maxScRect;
int scaleID;
std::vector <Mat> resized_imgs, blurred_imgs;
std::vector <Point> varBuffer, ensBuffer;
std::vector <int> varScaleIDs, ensScaleIDs;
//int64 e1, e2;
//double t;
//e1 = getTickCount();
//Detection part
//Generate windows and filter by variance
scaleID = 0;
resized_imgs.push_back(img);
blurred_imgs.push_back(imgBlurred);
do
{
Mat_<double> intImgP, intImgP2;
computeIntegralImages(resized_imgs[scaleID], intImgP, intImgP2);
for (int i = 0, imax = cvFloor((0.0 + resized_imgs[scaleID].cols - initSize.width) / dx); i < imax; i++)
{
for (int j = 0, jmax = cvFloor((0.0 + resized_imgs[scaleID].rows - initSize.height) / dy); j < jmax; j++)
{
if (!patchVariance(intImgP, intImgP2, originalVariancePtr, Point(dx * i, dy * j), initSize))
continue;
}
else
{
npos++;
}
tmp = Sc(standardPatch);
if (tmp > maxSc)
{
maxSc = tmp;
maxScRect = labPatch.rect;
}
varBuffer.push_back(Point(dx * i, dy * j));
varScaleIDs.push_back(scaleID);
}
}
scaleID++;
size.width /= SCALE_STEP;
size.height /= SCALE_STEP;
scale *= SCALE_STEP;
resize(img, resized_img, size, 0, 0, DOWNSCALE_MODE);
GaussianBlur(resized_img, blurred_img, GaussBlurKernelSize, 0.0f);
resize(img, tmp, size, 0, 0, DOWNSCALE_MODE);
resized_imgs.push_back(tmp);
GaussianBlur(resized_imgs[scaleID], tmp, GaussBlurKernelSize, 0.0f);
blurred_imgs.push_back(tmp);
} while (size.width >= initSize.width && size.height >= initSize.height);
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Variance: %d\t%f\n", varBuffer.size(), t);
//Encsemble classification
//e1 = getTickCount();
for (int i = 0; i < (int)varBuffer.size(); i++)
{
prepareClassifiers((int)blurred_imgs[varScaleIDs[i]].step[0]);
if (ensembleClassifierNum(&blurred_imgs[varScaleIDs[i]].at<uchar>(varBuffer[i].y, varBuffer[i].x)) <= ENSEMBLE_THRESHOLD)
continue;
ensBuffer.push_back(varBuffer[i]);
ensScaleIDs.push_back(varScaleIDs[i]);
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("Ensemble: %d\t%f\n", ensBuffer.size(), t);
//NN classification
//e1 = getTickCount();
//Prepare batch of patches
int numOfPatches = (int)ensBuffer.size();
Mat_<uchar> stdPatches(numOfPatches, 225);
double *resultSr = new double[numOfPatches];
double *resultSc = new double[numOfPatches];
uchar *patchesData = stdPatches.data;
for (int i = 0; i < (int)ensBuffer.size(); i++)
{
resample(resized_imgs[ensScaleIDs[i]], Rect2d(ensBuffer[i], initSize), standardPatch);
uchar *stdPatchData = standardPatch.data;
for (int j = 0; j < 225; j++)
patchesData[225*i+j] = stdPatchData[j];
}
//Calculate Sr and Sc batches
ocl_batchSrSc(stdPatches, resultSr, resultSc, numOfPatches);
for (int i = 0; i < (int)ensBuffer.size(); i++)
{
LabeledPatch labPatch;
standardPatch.data = &stdPatches.data[225 * i];
double curScale = pow(SCALE_STEP, ensScaleIDs[i]);
labPatch.rect = Rect2d(ensBuffer[i].x*curScale, ensBuffer[i].y*curScale, initSize.width * curScale, initSize.height * curScale);
double srValue, scValue;
srValue = resultSr[i];
//srValue = Sr(standardPatch);
//printf("%f\t%f\t\n", srValue, resultSr[i]);
////To fix: Check the paper, probably this cause wrong learning
//
labPatch.isObject = srValue > THETA_NN;
labPatch.shouldBeIntegrated = abs(srValue - THETA_NN) < 0.1;
patches.push_back(labPatch);
//
if (!labPatch.isObject)
{
nneg++;
continue;
}
else
{
npos++;
}
scValue = resultSc[i];
if (scValue > maxSc)
{
maxSc = scValue;
maxScRect = labPatch.rect;
}
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency()*1000.0;
//printf("NN: %d\t%f\n", patches.size(), t);
if (maxSc < 0)
return false;

@ -43,6 +43,7 @@
#define OPENCV_TLD_DETECTOR
#include "precomp.hpp"
#include "opencl_kernels_tracking.hpp"
#include "tldEnsembleClassifier.hpp"
#include "tldUtils.hpp"
@ -73,9 +74,14 @@ namespace cv
inline double ensembleClassifierNum(const uchar* data);
inline void prepareClassifiers(int rowstep);
double Sr(const Mat_<uchar>& patch);
double ocl_Sr(const Mat_<uchar>& patch);
double Sc(const Mat_<uchar>& patch);
double ocl_Sc(const Mat_<uchar>& patch);
void ocl_batchSrSc(const Mat_<uchar>& patches, double *resultSr, double *resultSc, int numOfPatches);
std::vector<TLDEnsembleClassifier> classifiers;
Mat *posExp, *negExp;
int *posNum, *negNum;
std::vector<Mat_<uchar> > *positiveExamples, *negativeExamples;
std::vector<int> *timeStampsPositive, *timeStampsNegative;
double *originalVariancePtr;
@ -87,6 +93,7 @@ namespace cv
bool isObject, shouldBeIntegrated;
};
bool detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
bool ocl_detect(const Mat& img, const Mat& imgBlurred, Rect2d& res, std::vector<LabeledPatch>& patches, Size initSize);
protected:

@ -58,11 +58,11 @@ namespace cv
// Calculate measure locations from 15x15 grid on minSize patches
void TLDEnsembleClassifier::stepPrefSuff(std::vector<Vec4b>& arr, int pos, int len, int gridSize)
{
#if 0
#if 0
int step = len / (gridSize - 1), pref = (len - step * (gridSize - 1)) / 2;
for (int i = 0; i < (int)(sizeof(x1) / sizeof(x1[0])); i++)
arr[i] = pref + arr[i] * step;
#else
#else
int total = len - gridSize;
int quo = total / (gridSize - 1), rem = total % (gridSize - 1);
int smallStep = quo, bigStep = quo + 1;

@ -64,6 +64,5 @@ namespace cv
std::vector<Point2i> offset;
int lastStep_;
};
}
}

@ -56,6 +56,15 @@ namespace cv
detector = Ptr<TLDDetector>(new TLDDetector());
//Propagate data to Detector
posNum = 0;
negNum = 0;
posExp = Mat(Size(225, 500), CV_8UC1);
negExp = Mat(Size(225, 500), CV_8UC1);
detector->posNum = &posNum;
detector->negNum = &negNum;
detector->posExp = &posExp;
detector->negExp = &negExp;
detector->positiveExamples = &positiveExamples;
detector->negativeExamples = &negativeExamples;
detector->timeStampsPositive = &timeStampsPositive;
@ -69,14 +78,13 @@ namespace cv
scaledImg, blurredImg, GaussBlurKernelSize, SCALE_STEP);
GaussianBlur(image, image_blurred, GaussBlurKernelSize, 0.0);
TLDDetector::generateScanGrid(image.rows, image.cols, minSize_, scanGrid);
getClosestN(scanGrid, Rect2d(boundingBox.x / scale, boundingBox.y / scale, boundingBox.width / scale,
boundingBox.height / scale), 10, closest);
getClosestN(scanGrid, Rect2d(boundingBox.x / scale, boundingBox.y / scale, boundingBox.width / scale, boundingBox.height / scale), 10, closest);
Mat_<uchar> blurredPatch(minSize);
TLDEnsembleClassifier::makeClassifiers(minSize, MEASURES_PER_CLASSIFIER, GRIDSIZE, detector->classifiers);
//Generate initial positive samples and put them to the model
positiveExamples.reserve(200);
for (int i = 0; i < (int)closest.size(); i++)
{
for (int j = 0; j < 20; j++)
@ -188,6 +196,11 @@ namespace cv
void TrackerTLDModel::integrateAdditional(const std::vector<Mat_<uchar> >& eForModel, const std::vector<Mat_<uchar> >& eForEnsemble, bool isPositive)
{
int positiveIntoModel = 0, negativeIntoModel = 0, positiveIntoEnsemble = 0, negativeIntoEnsemble = 0;
if ((int)eForModel.size() == 0) return;
//int64 e1, e2;
//double t;
//e1 = getTickCount();
for (int k = 0; k < (int)eForModel.size(); k++)
{
double sr = detector->Sr(eForModel[k]);
@ -218,6 +231,79 @@ namespace cv
detector->classifiers[i].integrate(eForEnsemble[k], isPositive);
}
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency() * 1000;
//printf("Integrate Additional: %fms\n", t);
/*
if( negativeIntoModel > 0 )
dfprintf((stdout, "negativeIntoModel = %d ", negativeIntoModel));
if( positiveIntoModel > 0 )
dfprintf((stdout, "positiveIntoModel = %d ", positiveIntoModel));
if( negativeIntoEnsemble > 0 )
dfprintf((stdout, "negativeIntoEnsemble = %d ", negativeIntoEnsemble));
if( positiveIntoEnsemble > 0 )
dfprintf((stdout, "positiveIntoEnsemble = %d ", positiveIntoEnsemble));
dfprintf((stdout, "\n"));*/
}
void TrackerTLDModel::ocl_integrateAdditional(const std::vector<Mat_<uchar> >& eForModel, const std::vector<Mat_<uchar> >& eForEnsemble, bool isPositive)
{
int positiveIntoModel = 0, negativeIntoModel = 0, positiveIntoEnsemble = 0, negativeIntoEnsemble = 0;
if ((int)eForModel.size() == 0) return;
//int64 e1, e2;
//double t;
//e1 = getTickCount();
//Prepare batch of patches
int numOfPatches = (int)eForModel.size();
Mat_<uchar> stdPatches(numOfPatches, 225);
double *resultSr = new double[numOfPatches];
double *resultSc = new double[numOfPatches];
uchar *patchesData = stdPatches.data;
for (int i = 0; i < numOfPatches; i++)
{
uchar *stdPatchData = eForModel[i].data;
for (int j = 0; j < 225; j++)
patchesData[225 * i + j] = stdPatchData[j];
}
//Calculate Sr and Sc batches
detector->ocl_batchSrSc(stdPatches, resultSr, resultSc, numOfPatches);
for (int k = 0; k < (int)eForModel.size(); k++)
{
double sr = resultSr[k];
if ((sr > THETA_NN) != isPositive)
{
if (isPositive)
{
positiveIntoModel++;
pushIntoModel(eForModel[k], true);
}
else
{
negativeIntoModel++;
pushIntoModel(eForModel[k], false);
}
}
double p = 0;
for (int i = 0; i < (int)detector->classifiers.size(); i++)
p += detector->classifiers[i].posteriorProbability(eForEnsemble[k].data, (int)eForEnsemble[k].step[0]);
p /= detector->classifiers.size();
if ((p > ENSEMBLE_THRESHOLD) != isPositive)
{
if (isPositive)
positiveIntoEnsemble++;
else
negativeIntoEnsemble++;
for (int i = 0; i < (int)detector->classifiers.size(); i++)
detector->classifiers[i].integrate(eForEnsemble[k], isPositive);
}
}
//e2 = getTickCount();
//t = (e2 - e1) / getTickFrequency() * 1000;
//printf("Integrate Additional OCL: %fms\n", t);
/*
if( negativeIntoModel > 0 )
dfprintf((stdout, "negativeIntoModel = %d ", negativeIntoModel));
@ -238,12 +324,30 @@ namespace cv
std::vector<int>* proxyT;
if (positive)
{
if (posNum < 500)
{
uchar *patchPtr = example.data;
uchar *modelPtr = posExp.data;
for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++)
modelPtr[posNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i];
posNum++;
}
proxyV = &positiveExamples;
proxyN = &timeStampPositiveNext;
proxyT = &timeStampsPositive;
}
else
{
if (negNum < 500)
{
uchar *patchPtr = example.data;
uchar *modelPtr = negExp.data;
for (int i = 0; i < STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE; i++)
modelPtr[negNum*STANDARD_PATCH_SIZE*STANDARD_PATCH_SIZE + i] = patchPtr[i];
negNum++;
}
proxyV = &negativeExamples;
proxyN = &timeStampNegativeNext;
proxyT = &timeStampsNegative;
@ -268,9 +372,5 @@ namespace cv
dfprintf((port, "\tpositiveExamples.size() = %d\n", (int)positiveExamples.size()));
dfprintf((port, "\tnegativeExamples.size() = %d\n", (int)negativeExamples.size()));
}
}
}

@ -50,9 +50,6 @@ namespace cv
{
namespace tld
{
class TrackerTLDModel : public TrackerModel
{
public:
@ -61,11 +58,14 @@ namespace cv
void setBoudingBox(Rect2d boundingBox){ boundingBox_ = boundingBox; }
void integrateRelabeled(Mat& img, Mat& imgBlurred, const std::vector<TLDDetector::LabeledPatch>& patches);
void integrateAdditional(const std::vector<Mat_<uchar> >& eForModel, const std::vector<Mat_<uchar> >& eForEnsemble, bool isPositive);
void ocl_integrateAdditional(const std::vector<Mat_<uchar> >& eForModel, const std::vector<Mat_<uchar> >& eForEnsemble, bool isPositive);
Size getMinSize(){ return minSize_; }
void printme(FILE* port = stdout);
Ptr<TLDDetector> detector;
std::vector<Mat_<uchar> > positiveExamples, negativeExamples;
Mat posExp, negExp;
int posNum, negNum;
std::vector<int> timeStampsPositive, timeStampsNegative;
int timeStampPositiveNext, timeStampNegativeNext;
double originalVariance_;
@ -80,7 +80,6 @@ namespace cv
void modelUpdateImpl(){}
Rect2d boundingBox_;
RNG rng;
};
}

@ -116,11 +116,20 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox)
std::vector<Rect2d> candidates;
std::vector<double> candidatesRes;
bool trackerNeedsReInit = false;
bool DETECT_FLG = false;
for( int i = 0; i < 2; i++ )
{
Rect2d tmpCandid = boundingBox;
if( ( (i == 0) && !data->failedLastTime && trackerProxy->update(image, tmpCandid) ) ||
((i == 1) && (tldModel->detector->detect(imageForDetector, image_blurred, tmpCandid, detectorResults, tldModel->getMinSize()))))
if (i == 1)
{
if (ocl::haveOpenCL())
DETECT_FLG = tldModel->detector->ocl_detect(imageForDetector, image_blurred, tmpCandid, detectorResults, tldModel->getMinSize());
else
DETECT_FLG = tldModel->detector->detect(imageForDetector, image_blurred, tmpCandid, detectorResults, tldModel->getMinSize());
}
if( ( (i == 0) && !data->failedLastTime && trackerProxy->update(image, tmpCandid) ) || ( DETECT_FLG))
{
candidates.push_back(tmpCandid);
if( i == 0 )
@ -202,10 +211,17 @@ bool TrackerTLDImpl::updateImpl(const Mat& image, Rect2d& boundingBox)
tldModel->integrateRelabeled(imageForDetector, image_blurred, detectorResults);
//dprintf(("%d relabeled by nExpert\n", negRelabeled));
pExpert.additionalExamples(examplesForModel, examplesForEnsemble);
tldModel->integrateAdditional(examplesForModel, examplesForEnsemble, true);
if (ocl::haveOpenCL())
tldModel->ocl_integrateAdditional(examplesForModel, examplesForEnsemble, true);
else
tldModel->integrateAdditional(examplesForModel, examplesForEnsemble, true);
examplesForModel.clear(); examplesForEnsemble.clear();
nExpert.additionalExamples(examplesForModel, examplesForEnsemble);
tldModel->integrateAdditional(examplesForModel, examplesForEnsemble, false);
if (ocl::haveOpenCL())
tldModel->ocl_integrateAdditional(examplesForModel, examplesForEnsemble, false);
else
tldModel->integrateAdditional(examplesForModel, examplesForEnsemble, false);
}
else
{

@ -60,7 +60,6 @@ void TrackerTLD::Params::write(cv::FileStorage& /*fs*/) const {}
namespace tld
{
class TrackerProxy
{
public:

@ -110,6 +110,7 @@ Ptr<Tracker> Tracker::create( const String& trackerType )
BOILERPLATE_CODE("BOOSTING",TrackerBoosting);
BOILERPLATE_CODE("MEDIANFLOW",TrackerMedianFlow);
BOILERPLATE_CODE("TLD",TrackerTLD);
BOILERPLATE_CODE("KCF",TrackerKCF);
return Ptr<Tracker>();
}

@ -0,0 +1,686 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <complex>
/*---------------------------
| TrackerKCFModel
|---------------------------*/
namespace cv{
/**
* \brief Implementation of TrackerModel for MIL algorithm
*/
class TrackerKCFModel : public TrackerModel{
public:
TrackerKCFModel(TrackerKCF::Params /*params*/){}
~TrackerKCFModel(){}
protected:
void modelEstimationImpl( const std::vector<Mat>& /*responses*/ ){}
void modelUpdateImpl(){}
};
} /* namespace cv */
/*---------------------------
| TrackerKCF
|---------------------------*/
namespace cv{
/*
* Prototype
*/
class TrackerKCFImpl : public TrackerKCF {
public:
TrackerKCFImpl( const TrackerKCF::Params &parameters = TrackerKCF::Params() );
void read( const FileNode& /*fn*/ );
void write( FileStorage& /*fs*/ ) const;
protected:
/*
* basic functions and vars
*/
bool initImpl( const Mat& /*image*/, const Rect2d& boundingBox );
bool updateImpl( const Mat& image, Rect2d& boundingBox );
TrackerKCF::Params params;
/*
* KCF functions and vars
*/
void createHanningWindow(OutputArray _dst, const cv::Size winSize, const int type) const;
void inline fft2(const Mat src, std::vector<Mat> & dest) const;
void inline fft2(const Mat src, Mat & dest) const;
void inline ifft2(const Mat src, Mat & dest) const;
void inline pixelWiseMult(const std::vector<Mat> src1, const std::vector<Mat> src2, std::vector<Mat> & dest, const int flags, const bool conjB=false) const;
void inline sumChannels(std::vector<Mat> src, Mat & dest) const;
void inline updateProjectionMatrix(const Mat src, Mat & old_cov,Mat & _proj_mtx,double pca_rate, int compressed_sz) const;
void inline compress(const Mat _proj_mtx, const Mat src, Mat & dest) const;
bool getSubWindow(const Mat img, const Rect roi, Mat& patch) const;
void extractCN(Mat _patch, Mat & cnFeatures) const;
void denseGaussKernel(const double sigma, const Mat _x, const Mat _y, Mat & _k) const;
void calcResponse(const Mat _alphaf, const Mat _k, Mat & _response) const;
void calcResponse(const Mat _alphaf, const Mat _alphaf_den, const Mat _k, Mat & _response) const;
void shiftRows(Mat& mat) const;
void shiftRows(Mat& mat, int n) const;
void shiftCols(Mat& mat, int n) const;
private:
double output_sigma;
Rect2d roi;
Mat hann; //hann window filter
Mat y,yf; // training response and its FFT
Mat x,xf; // observation and its FFT
Mat k,kf; // dense gaussian kernel and its FFT
Mat kf_lambda; // kf+lambda
Mat new_alphaf, alphaf; // training coefficients
Mat new_alphaf_den, alphaf_den; // for splitted training coefficients
Mat z, new_z; // model
Mat response; // detection result
Mat old_cov_mtx, proj_mtx; // for feature compression
bool resizeImage; // resize the image whenever needed and the patch size is large
int frame;
};
/*
* Constructor
*/
Ptr<TrackerKCF> TrackerKCF::createTracker(const TrackerKCF::Params &parameters){
return Ptr<TrackerKCFImpl>(new TrackerKCFImpl(parameters));
}
TrackerKCFImpl::TrackerKCFImpl( const TrackerKCF::Params &parameters ) :
params( parameters )
{
isInit = false;
resizeImage = false;
CV_Assert(params.descriptor == GRAY || params.descriptor == CN /*|| params.descriptor == CN2*/);
}
void TrackerKCFImpl::read( const cv::FileNode& fn ){
params.read( fn );
}
void TrackerKCFImpl::write( cv::FileStorage& fs ) const {
params.write( fs );
}
/*
* Initialization:
* - creating hann window filter
* - ROI padding
* - creating a gaussian response for the training ground-truth
* - perform FFT to the gaussian response
*/
bool TrackerKCFImpl::initImpl( const Mat& /*image*/, const Rect2d& boundingBox ){
frame=0;
roi = boundingBox;
//calclulate output sigma
output_sigma=sqrt(roi.width*roi.height)*params.output_sigma_factor;
output_sigma=-0.5/(output_sigma*output_sigma);
//resize the ROI whenever needed
if(params.resize && roi.width*roi.height>params.max_patch_size){
resizeImage=true;
roi.x/=2.0;
roi.y/=2.0;
roi.width/=2.0;
roi.height/=2.0;
}
// add padding to the roi
roi.x-=roi.width/2;
roi.y-=roi.height/2;
roi.width*=2;
roi.height*=2;
// initialize the hann window filter
createHanningWindow(hann, roi.size(), CV_64F);
if(params.descriptor==CN){
Mat layers[] = {hann, hann, hann, hann, hann, hann, hann, hann, hann, hann};
merge(layers, 10, hann);
}
// create gaussian response
y=Mat::zeros((int)roi.height,(int)roi.width,CV_64F);
for(unsigned i=0;i<roi.height;i++){
for(unsigned j=0;j<roi.width;j++){
y.at<double>(i,j)=(i-roi.height/2+1)*(i-roi.height/2+1)+(j-roi.width/2+1)*(j-roi.width/2+1);
}
}
y*=(double)output_sigma;
cv::exp(y,y);
// perform fourier transfor to the gaussian response
fft2(y,yf);
model=Ptr<TrackerKCFModel>(new TrackerKCFModel(params));
// TODO: return true only if roi inside the image
return true;
}
/*
* Main part of the KCF algorithm
*/
bool TrackerKCFImpl::updateImpl( const Mat& image, Rect2d& boundingBox ){
double minVal, maxVal; // min-max response
Point minLoc,maxLoc; // min-max location
Mat zc;
Mat img=image.clone();
// check the channels of the input image, grayscale is preferred
CV_Assert(image.channels() == 1 || image.channels() == 3);
// resize the image whenever needed
if(resizeImage)resize(img,img,Size(img.cols/2,img.rows/2));
// extract and pre-process the patch
if(!getSubWindow(img,roi, x))return false;
// detection part
if(frame>0){
//compute the gaussian kernel
if(params.compress_feature){
compress(proj_mtx,x,x);
compress(proj_mtx,z,zc);
denseGaussKernel(params.sigma,x,zc,k);
}else
denseGaussKernel(params.sigma,x,z,k);
// calculate filter response
if(params.split_coeff)
calcResponse(alphaf,alphaf_den,k,response);
else
calcResponse(alphaf,k,response);
// extract the maximum response
minMaxLoc( response, &minVal, &maxVal, &minLoc, &maxLoc );
roi.x+=(maxLoc.x-roi.width/2+1);
roi.y+=(maxLoc.y-roi.height/2+1);
// update the bounding box
boundingBox.x=(resizeImage?roi.x*2:roi.x)+boundingBox.width/2;
boundingBox.y=(resizeImage?roi.y*2:roi.y)+boundingBox.height/2;
}
// extract the patch for learning purpose
if(!getSubWindow(img,roi, x))return false;
//update the training data
new_z=x.clone();
if(frame==0)
z=x.clone();
else
z=(1.0-params.interp_factor)*z+params.interp_factor*new_z;
if(params.compress_feature){
// feature compression
updateProjectionMatrix(z,old_cov_mtx,proj_mtx,params.pca_learning_rate,params.compressed_size);
compress(proj_mtx,x,x);
}
// Kernel Regularized Least-Squares, calculate alphas
denseGaussKernel(params.sigma,x,x,k);
fft2(k,kf);
kf_lambda=kf+params.lambda;
/* TODO: optimize this element-wise division
* new_alphaf=yf./kf
* z=(a+bi)/(c+di)[(ac+bd)+i(bc-ad)]/(c^2+d^2)
*/
new_alphaf=Mat_<Vec2d >(yf.rows, yf.cols);
std::complex<double> temp;
if(params.split_coeff){
mulSpectrums(yf,kf,new_alphaf,0);
mulSpectrums(kf,kf_lambda,new_alphaf_den,0);
}else{
for(int i=0;i<yf.rows;i++){
for(int j=0;j<yf.cols;j++){
temp=std::complex<double>(yf.at<Vec2d>(i,j)[0],yf.at<Vec2d>(i,j)[1])/(std::complex<double>(kf_lambda.at<Vec2d>(i,j)[0],kf_lambda.at<Vec2d>(i,j)[1])/*+std::complex<double>(0.0000000001,0.0000000001)*/);
new_alphaf.at<Vec2d >(i,j)[0]=temp.real();
new_alphaf.at<Vec2d >(i,j)[1]=temp.imag();
}
}
}
// update the RLS model
if(frame==0){
alphaf=new_alphaf.clone();
if(params.split_coeff)alphaf_den=new_alphaf_den.clone();
}else{
alphaf=(1.0-params.interp_factor)*alphaf+params.interp_factor*new_alphaf;
if(params.split_coeff)alphaf_den=(1.0-params.interp_factor)*alphaf_den+params.interp_factor*new_alphaf_den;
}
frame++;
return true;
}
/*-------------------------------------
| implementation of the KCF functions
|-------------------------------------*/
/*
* hann window filter
*/
void TrackerKCFImpl::createHanningWindow(OutputArray _dst, const cv::Size winSize, const int type) const {
CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
_dst.create(winSize, type);
Mat dst = _dst.getMat();
int rows = dst.rows, cols = dst.cols;
AutoBuffer<double> _wc(cols);
double * const wc = (double *)_wc;
double coeff0 = 2.0 * CV_PI / (double)(cols - 1), coeff1 = 2.0f * CV_PI / (double)(rows - 1);
for(int j = 0; j < cols; j++)
wc[j] = 0.5 * (1.0 - cos(coeff0 * j));
if(dst.depth() == CV_32F){
for(int i = 0; i < rows; i++){
float* dstData = dst.ptr<float>(i);
double wr = 0.5 * (1.0 - cos(coeff1 * i));
for(int j = 0; j < cols; j++)
dstData[j] = (float)(wr * wc[j]);
}
}else{
for(int i = 0; i < rows; i++){
double* dstData = dst.ptr<double>(i);
double wr = 0.5 * (1.0 - cos(coeff1 * i));
for(int j = 0; j < cols; j++)
dstData[j] = wr * wc[j];
}
}
// perform batch sqrt for SSE performance gains
//cv::sqrt(dst, dst); //matlab do not use the square rooted version
}
/*
* simplification of fourier transform function in opencv
*/
void inline TrackerKCFImpl::fft2(const Mat src, Mat & dest) const {
std::vector<Mat> layers(src.channels());
std::vector<Mat> outputs(src.channels());
split(src, layers);
for(int i=0;i<src.channels();i++){
dft(layers[i],outputs[i],DFT_COMPLEX_OUTPUT);
}
merge(outputs,dest);
}
void inline TrackerKCFImpl::fft2(const Mat src, std::vector<Mat> & dest) const {
std::vector<Mat> layers(src.channels());
dest.clear();
dest.resize(src.channels());
split(src, layers);
for(int i=0;i<src.channels();i++){
dft(layers[i],dest[i],DFT_COMPLEX_OUTPUT);
}
}
/*
* simplification of inverse fourier transform function in opencv
*/
void inline TrackerKCFImpl::ifft2(const Mat src, Mat & dest) const {
idft(src,dest,DFT_SCALE+DFT_REAL_OUTPUT);
}
/*
* Point-wise multiplication of two Multichannel Mat data
*/
void inline TrackerKCFImpl::pixelWiseMult(const std::vector<Mat> src1, const std::vector<Mat> src2, std::vector<Mat> & dest, const int flags, const bool conjB) const {
dest.clear();
dest.resize(src1.size());
for(unsigned i=0;i<src1.size();i++){
mulSpectrums(src1[i], src2[i], dest[i],flags,conjB);
}
}
/*
* Combines all channels in a multi-channels Mat data into a single channel
*/
void inline TrackerKCFImpl::sumChannels(std::vector<Mat> src, Mat & dest) const {
dest=src[0].clone();
for(unsigned i=1;i<src.size();i++){
dest+=src[i];
}
}
/*
* obtains the projection matrix using PCA
*/
void inline TrackerKCFImpl::updateProjectionMatrix(const Mat src, Mat & old_cov,Mat & _proj_mtx, double pca_rate, int compressed_sz) const {
CV_Assert(compressed_sz<=src.channels());
// compute average
std::vector<Mat> layers(src.channels());
std::vector<Scalar> average(src.channels());
split(src,layers);
for (int i=0;i<src.channels();i++){
average[i]=mean(layers[i]);
layers[i]-=average[i];
}
// calc covariance matrix
Mat data,new_cov;
merge(layers,data);
data=data.reshape(1,src.rows*src.cols);
new_cov=1.0/(double)(src.rows*src.cols-1)*(data.t()*data);
if(old_cov.rows==0)old_cov=new_cov.clone();
// calc PCA
Mat w, u, vt;
SVD::compute((1.0-pca_rate)*old_cov+pca_rate*new_cov, w, u, vt);
// extract the projection matrix
_proj_mtx=u(Rect(0,0,compressed_sz,src.channels())).clone();
Mat proj_vars=Mat::eye(compressed_sz,compressed_sz,_proj_mtx.type());
for(int i=0;i<compressed_sz;i++){
proj_vars.at<double>(i,i)=w.at<double>(i);
}
// update the covariance matrix
old_cov=(1.0-pca_rate)*old_cov+pca_rate*_proj_mtx*proj_vars*_proj_mtx.t();
}
/*
* compress the features
*/
void inline TrackerKCFImpl::compress(const Mat _proj_mtx, const Mat src, Mat & dest) const {
Mat data=src.reshape(1,src.rows*src.cols);
Mat compressed=data*_proj_mtx;
dest=compressed.reshape(_proj_mtx.cols,src.rows).clone();
}
/*
* obtain the patch and apply hann window filter to it
*/
bool TrackerKCFImpl::getSubWindow(const Mat img, const Rect _roi, Mat& patch) const {
Rect region=_roi;
// return false if roi is outside the image
if((_roi.x+_roi.width<0)
||(_roi.y+_roi.height<0)
||(_roi.x>=img.cols)
||(_roi.y>=img.rows)
)return false;
// extract patch inside the image
if(_roi.x<0){region.x=0;region.width+=_roi.x;}
if(_roi.y<0){region.y=0;region.height+=_roi.y;}
if(_roi.x+_roi.width>img.cols)region.width=img.cols-_roi.x;
if(_roi.y+_roi.height>img.rows)region.height=img.rows-_roi.y;
if(region.width>img.cols)region.width=img.cols;
if(region.height>img.rows)region.height=img.rows;
patch=img(region).clone();
// add some padding to compensate when the patch is outside image border
int addTop,addBottom, addLeft, addRight;
addTop=region.y-_roi.y;
addBottom=(_roi.height+_roi.y>img.rows?_roi.height+_roi.y-img.rows:0);
addLeft=region.x-_roi.x;
addRight=(_roi.width+_roi.x>img.cols?_roi.width+_roi.x-img.cols:0);
copyMakeBorder(patch,patch,addTop,addBottom,addLeft,addRight,BORDER_REPLICATE);
if(patch.rows==0 || patch.cols==0)return false;
// extract the desired descriptors
switch(params.descriptor){
case GRAY:
if(img.channels()>1)cvtColor(patch,patch, CV_BGR2GRAY);
patch.convertTo(patch,CV_64F);
patch=patch/255.0-0.5; // normalize to range -0.5 .. 0.5
break;
case CN:
CV_Assert(img.channels() == 3);
extractCN(patch,patch);
break;
case CN2:
if(patch.channels()>1)cvtColor(patch,patch, CV_BGR2GRAY);
break;
}
patch=patch.mul(hann); // hann window filter
return true;
}
/* Convert BGR to ColorNames
*/
void TrackerKCFImpl::extractCN(Mat _patch, Mat & cnFeatures) const {
Vec3b & pixel = _patch.at<Vec3b>(0,0);
unsigned index;
Mat temp = Mat::zeros(_patch.rows,_patch.cols,CV_64FC(10));
for(int i=0;i<_patch.rows;i++){
for(int j=0;j<_patch.cols;j++){
pixel=_patch.at<Vec3b>(i,j);
index=(unsigned)(floor(pixel[2]/8)+32*floor(pixel[1]/8)+32*32*floor(pixel[0]/8));
//copy the values
for(int _k=0;_k<10;_k++){
temp.at<Vec<double,10> >(i,j)[_k]=ColorNames[index][_k];
}
}
}
cnFeatures=temp.clone();
}
/*
* dense gauss kernel function
*/
void TrackerKCFImpl::denseGaussKernel(const double sigma, const Mat _x, const Mat _y, Mat & _k) const {
std::vector<Mat> _xf,_yf,xyf_v;
Mat xy,xyf;
double normX, normY;
fft2(_x,_xf);
fft2(_y,_yf);
normX=norm(_x);
normX*=normX;
normY=norm(_y);
normY*=normY;
pixelWiseMult(_xf,_yf,xyf_v,0,true);
sumChannels(xyf_v,xyf);
ifft2(xyf,xyf);
if(params.wrap_kernel){
shiftRows(xyf, _x.rows/2);
shiftCols(xyf, _x.cols/2);
}
//(xx + yy - 2 * xy) / numel(x)
xy=(normX+normY-2*xyf)/(_x.rows*_x.cols*_x.channels());
// TODO: check wether we really need thresholding or not
//threshold(xy,xy,0.0,0.0,THRESH_TOZERO);//max(0, (xx + yy - 2 * xy) / numel(x))
for(int i=0;i<xy.rows;i++){
for(int j=0;j<xy.cols;j++){
if(xy.at<double>(i,j)<0.0)xy.at<double>(i,j)=0.0;
}
}
double sig=-1.0/(sigma*sigma);
xy=sig*xy;
exp(xy,_k);
}
/* CIRCULAR SHIFT Function
* http://stackoverflow.com/questions/10420454/shift-like-matlab-function-rows-or-columns-of-a-matrix-in-opencv
*/
// circular shift one row from up to down
void TrackerKCFImpl::shiftRows(Mat& mat) const {
Mat temp;
Mat m;
int _k = (mat.rows-1);
mat.row(_k).copyTo(temp);
for(; _k > 0 ; _k-- ) {
m = mat.row(_k);
mat.row(_k-1).copyTo(m);
}
m = mat.row(0);
temp.copyTo(m);
}
// circular shift n rows from up to down if n > 0, -n rows from down to up if n < 0
void TrackerKCFImpl::shiftRows(Mat& mat, int n) const {
if( n < 0 ) {
n = -n;
flip(mat,mat,0);
for(int _k=0; _k < n;_k++) {
shiftRows(mat);
}
flip(mat,mat,0);
}else{
for(int _k=0; _k < n;_k++) {
shiftRows(mat);
}
}
}
//circular shift n columns from left to right if n > 0, -n columns from right to left if n < 0
void TrackerKCFImpl::shiftCols(Mat& mat, int n) const {
if(n < 0){
n = -n;
flip(mat,mat,1);
transpose(mat,mat);
shiftRows(mat,n);
transpose(mat,mat);
flip(mat,mat,1);
}else{
transpose(mat,mat);
shiftRows(mat,n);
transpose(mat,mat);
}
}
/*
* calculate the detection response
*/
void TrackerKCFImpl::calcResponse(const Mat _alphaf, const Mat _k, Mat & _response) const {
//alpha f--> 2channels ; k --> 1 channel;
Mat _kf;
fft2(_k,_kf);
Mat spec;
mulSpectrums(_alphaf,_kf,spec,0,false);
ifft2(spec,_response);
}
/*
* calculate the detection response for splitted form
*/
void TrackerKCFImpl::calcResponse(const Mat _alphaf, const Mat _alphaf_den, const Mat _k, Mat & _response) const {
Mat _kf;
fft2(_k,_kf);
Mat spec;
Mat spec2=Mat_<Vec2d >(_k.rows, _k.cols);
std::complex<double> temp;
mulSpectrums(_alphaf,_kf,spec,0,false);
for(int i=0;i<_k.rows;i++){
for(int j=0;j<_k.cols;j++){
temp=std::complex<double>(spec.at<Vec2d>(i,j)[0],spec.at<Vec2d>(i,j)[1])/(std::complex<double>(_alphaf_den.at<Vec2d>(i,j)[0],_alphaf_den.at<Vec2d>(i,j)[1])/*+std::complex<double>(0.0000000001,0.0000000001)*/);
spec2.at<Vec2d >(i,j)[0]=temp.real();
spec2.at<Vec2d >(i,j)[1]=temp.imag();
}
}
ifft2(spec2,_response);
}
/*----------------------------------------------------------------------*/
/*
* Parameters
*/
TrackerKCF::Params::Params(){
sigma=0.2;
lambda=0.01;
interp_factor=0.075;
output_sigma_factor=1.0/16.0;
resize=true;
max_patch_size=80*80;
descriptor=CN;
split_coeff=true;
wrap_kernel=false;
//feature compression
compress_feature=true;
compressed_size=2;
pca_learning_rate=0.15;
}
void TrackerKCF::Params::read( const cv::FileNode& /*fn*/ ){}
void TrackerKCF::Params::write( cv::FileStorage& /*fs*/ ) const{}
} /* namespace cv */

@ -165,10 +165,10 @@ Note: the descriptor can be coupled with any keypoint extractor. The only demand
Note: a complete example can be found under /samples/cpp/tutorial_code/xfeatures2D/latch_match.cpp
*/
class CV_EXPORTS LATCH : public DescriptorExtractor
class CV_EXPORTS_W LATCH : public Feature2D
{
public:
static Ptr<LATCH> create(int bytes = 32, bool rotationInvariance = true, int half_ssd_size=3);
CV_WRAP static Ptr<LATCH> create(int bytes = 32, bool rotationInvariance = true, int half_ssd_size=3);
};
/** @brief Class implementing DAISY descriptor, described in @cite Tola10
@ -187,14 +187,14 @@ DAISY::NRM_SIFT mean that descriptors are normalized for L2 norm equal to 1.0 bu
@param use_orientation sample patterns using keypoints orientation, disabled by default.
*/
class CV_EXPORTS DAISY : public DescriptorExtractor
class CV_EXPORTS_W DAISY : public Feature2D
{
public:
enum
{
NRM_NONE = 100, NRM_PARTIAL = 101, NRM_FULL = 102, NRM_SIFT = 103,
};
static Ptr<DAISY> create( float radius = 15, int q_radius = 3, int q_theta = 8,
CV_WRAP static Ptr<DAISY> create( float radius = 15, int q_radius = 3, int q_theta = 8,
int q_hist = 8, int norm = DAISY::NRM_NONE, InputArray H = noArray(),
bool interpolation = true, bool use_orientation = false );

@ -1267,7 +1267,7 @@ struct RoundingInvoker : ParallelLoopBody
{
for (int c = range.start; c < range.end; ++c)
{
scale_map->at<float>(r,c) = (float) round( scale_map->at<float>(r,c) );
scale_map->at<float>(r,c) = (float) cvRound( scale_map->at<float>(r,c) );
}
}
int r;

@ -55,3 +55,25 @@
year={2013},
organization={IEEE}
}
@article{Min2014,
title={Fast global image smoothing based on weighted least squares},
author={Min, Dongbo and Choi, Sunghwan and Lu, Jiangbo and Ham, Bumsub and Sohn, Kwanghoon and Do, Minh N},
journal={Image Processing, IEEE Transactions on},
volume={23},
number={12},
pages={5638--5653},
year={2014},
publisher={IEEE}
}
@inproceedings{Farbman2008,
title={Edge-preserving decompositions for multi-scale tone and detail manipulation},
author={Farbman, Zeev and Fattal, Raanan and Lischinski, Dani and Szeliski, Richard},
booktitle={ACM Transactions on Graphics (TOG)},
volume={27},
number={3},
pages={67},
year={2008},
organization={ACM}
}

@ -38,6 +38,7 @@
#define __OPENCV_XIMGPROC_HPP__
#include "ximgproc/edge_filter.hpp"
#include "ximgproc/disparity_filter.hpp"
#include "ximgproc/structured_edge_detection.hpp"
#include "ximgproc/seeds.hpp"

@ -43,49 +43,147 @@
namespace cv {
namespace ximgproc {
//! @addtogroup ximgproc_filters
//! @{
/** @brief Main interface for all disparity map filters.
*/
class CV_EXPORTS_W DisparityFilter : public Algorithm
{
public:
CV_WRAP virtual void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI) = 0;
};
/////////////////////////////////////////////////////////////////////////////////////////////////
/** @brief Apply filtering to the disparity map.
class CV_EXPORTS_W DisparityDTFilter : public DisparityFilter
{
public:
CV_WRAP virtual double getSigmaSpatial() = 0;
CV_WRAP virtual void setSigmaSpatial(double _sigmaSpatial) = 0;
CV_WRAP virtual double getSigmaColor() = 0;
CV_WRAP virtual void setSigmaColor(double _sigmaColor) = 0;
};
@param disparity_map_left disparity map of the left view, 1 channel, CV_16S type. Implicitly assumes that disparity
values are scaled by 16 (one-pixel disparity corresponds to the value of 16 in the disparity map). Disparity map
can have any resolution, it will be automatically resized to fit left_view resolution.
CV_EXPORTS_W
Ptr<DisparityDTFilter> createDisparityDTFilter();
@param left_view left view of the original stereo-pair to guide the filtering process, 8-bit single-channel
or three-channel image.
class CV_EXPORTS_W DisparityGuidedFilter : public DisparityFilter
{
public:
CV_WRAP virtual double getEps() = 0;
CV_WRAP virtual void setEps(double _eps) = 0;
CV_WRAP virtual int getRadius() = 0;
CV_WRAP virtual void setRadius(int _radius) = 0;
};
@param filtered_disparity_map output disparity map.
CV_EXPORTS_W
Ptr<DisparityGuidedFilter> createDisparityGuidedFilter();
@param ROI region of the disparity map to filter.
@param disparity_map_right optional argument, some implementations might also use the disparity map
of the right view to compute confidence maps, for instance.
@param right_view optional argument, some implementations might also use the right view of the original
stereo-pair.
*/
CV_WRAP virtual void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right = Mat(), InputArray right_view = Mat()) = 0;
};
/** @brief Disparity map filter based on Weighted Least Squares filter (in form of Fast Global Smoother that
is a lot faster than traditional Weighted Least Squares filter implementations) and optional use of
left-right-consistency-based confidence to refine the results in half-occlusions and uniform areas.
*/
class CV_EXPORTS_W DisparityWLSFilter : public DisparityFilter
{
public:
/** filter parameters */
/** @brief Lambda is a parameter defining the amount of regularization during filtering. Larger values force
filtered disparity map edges to adhere more to source image edges. Typical value is 8000.
*/
CV_WRAP virtual double getLambda() = 0;
/** @see getLambda */
CV_WRAP virtual void setLambda(double _lambda) = 0;
/** @brief SigmaColor is a parameter defining how sensitive the filtering process is to source image edges.
Large values can lead to disparity leakage through low-contrast edges. Small values can make the filter too
sensitive to noise and textures in the source image. Typical values range from 0.8 to 2.0.
*/
CV_WRAP virtual double getSigmaColor() = 0;
/** @see getSigmaColor */
CV_WRAP virtual void setSigmaColor(double _sigma_color) = 0;
/** confidence-related parameters */
/** @brief LRCthresh is a threshold of disparity difference used in left-right-consistency check during
confidence map computation. The default value of 24 (1.5 pixels) is virtually always good enough.
*/
CV_WRAP virtual int getLRCthresh() = 0;
/** @see getLRCthresh */
CV_WRAP virtual void setLRCthresh(int _LRC_thresh) = 0;
/** @brief DepthDiscontinuityRadius is a parameter used in confidence computation. It defines the size of
low-confidence regions around depth discontinuities. For typical window sizes used in stereo matching the
optimal value is around 5.
*/
CV_WRAP virtual int getDepthDiscontinuityRadius() = 0;
/** @see getDepthDiscontinuityRadius */
CV_WRAP virtual void setDepthDiscontinuityRadius(int _disc_radius) = 0;
/** @brief Get the confidence map that was used in the last filter call. It is a CV_32F one-channel image
with values ranging from 0.0 (totally untrusted regions of the raw disparity map) to 255.0 (regions containing
correct disparity values with a high degree of confidence).
*/
CV_WRAP virtual Mat getConfidenceMap() = 0;
};
/** @brief Factory method, create instance of DisparityWLSFilter and execute the initialization routines.
@param use_confidence filtering with confidence requires two disparity maps (for the left and right views) and is
approximately two times slower. However, quality is typically significantly better.
*/
CV_EXPORTS_W
Ptr<DisparityWLSFilter> createDisparityWLSFilter();
Ptr<DisparityWLSFilter> createDisparityWLSFilter(bool use_confidence);
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/** @brief Function for reading ground truth disparity maps. Supports basic Middlebury
and MPI-Sintel formats. Note that the resulting disparity map is scaled by 16.
@param src_path path to the image, containing ground-truth disparity map
@param dst output disparity map, CV_16S depth
@result returns zero if successfully read the ground truth
*/
CV_EXPORTS
int readGT(String src_path,OutputArray dst);
/** @brief Function for computing mean square error for disparity maps
@param GT ground truth disparity map
@param src disparity map to evaluate
@param ROI region of interest
@result returns mean square error between GT and src
*/
CV_EXPORTS
double computeMSE(InputArray GT, InputArray src, Rect ROI);
/** @brief Function for computing the percent of "bad" pixels in the disparity map
(pixels where error is higher than a specified threshold)
@param GT ground truth disparity map
@param src disparity map to evaluate
@param ROI region of interest
@param thresh threshold used to determine "bad" pixels
@result returns mean square error between GT and src
*/
CV_EXPORTS
double computeBadPixelPercent(InputArray GT, InputArray src, Rect ROI, int thresh=24/*1.5 pixels*/);
/** @brief Function for creating a disparity map visualization (clamped CV_8U image)
@param src input disparity map (CV_16S depth)
@param dst output visualization
@param scale disparity map will be multiplied by this value for visualization
*/
CV_EXPORTS
void getDisparityVis(InputArray src,OutputArray dst,double scale=1.0);
//! @}
}
}
#endif

@ -316,19 +316,69 @@ proportional to sigmaSpace .
CV_EXPORTS_W
void jointBilateralFilter(InputArray joint, InputArray src, OutputArray dst, int d, double sigmaColor, double sigmaSpace, int borderType = BORDER_DEFAULT);
//! @}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
class CV_EXPORTS_W WeightedLeastSquaresFilter : public Algorithm
/** @brief Interface for implementations of Fast Global Smoother filter.
For more details about this filter see @cite Min2014 and @cite Farbman2008 .
*/
class CV_EXPORTS_W FastGlobalSmootherFilter : public Algorithm
{
public:
/** @brief Apply smoothing operation to the source image.
@param src source image for filtering with unsigned 8-bit or signed 16-bit or floating-point 32-bit depth and up to 4 channels.
@param dst destination image.
*/
CV_WRAP virtual void filter(InputArray src, OutputArray dst) = 0;
virtual ~WeightedLeastSquaresFilter();
};
CV_EXPORTS_W Ptr<WeightedLeastSquaresFilter> createWeightedLeastSquaresFilter(InputArray guide, double lambda, double sigma_color, int num_iter=3);
/** @brief Factory method, create instance of FastGlobalSmootherFilter and execute the initialization routines.
@param guide image serving as guide for filtering. It should have 8-bit depth and either 1 or 3 channels.
@param lambda parameter defining the amount of regularization
@param sigma_color parameter, that is similar to color space sigma in bilateralFilter.
@param lambda_attenuation internal parameter, defining how much lambda decreases after each iteration. Normally,
it should be 0.25. Setting it to 1.0 may lead to streaking artifacts.
CV_EXPORTS_W void weightedLeastSquaresFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, int num_iter=3);
@param num_iter number of iterations used for filtering, 3 is usually enough.
For more details about Fast Global Smoother parameters, see the original paper @cite Min2014. However, please note that
there are several differences. Lambda attenuation described in the paper is implemented a bit differently so do not
expect the results to be identical to those from the paper; sigma_color values from the paper should be multiplied by 255.0 to
achieve the same effect. Also, in case of image filtering where source and guide image are the same, authors
propose to dynamically update the guide image after each iteration. To maximize the performance this feature
was not implemented here.
*/
CV_EXPORTS_W Ptr<FastGlobalSmootherFilter> createFastGlobalSmootherFilter(InputArray guide, double lambda, double sigma_color, double lambda_attenuation=0.25, int num_iter=3);
/** @brief Simple one-line Fast Global Smoother filter call. If you have multiple images to filter with the same
guide then use FastGlobalSmootherFilter interface to avoid extra computations.
@param guide image serving as guide for filtering. It should have 8-bit depth and either 1 or 3 channels.
@param src source image for filtering with unsigned 8-bit or signed 16-bit or floating-point 32-bit depth and up to 4 channels.
@param dst destination image.
@param lambda parameter defining the amount of regularization
@param sigma_color parameter, that is similar to color space sigma in bilateralFilter.
@param lambda_attenuation internal parameter, defining how much lambda decreases after each iteration. Normally,
it should be 0.25. Setting it to 1.0 may lead to streaking artifacts.
@param num_iter number of iterations used for filtering, 3 is usually enough.
*/
CV_EXPORTS_W void fastGlobalSmootherFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, double lambda_attenuation=0.25, int num_iter=3);
//! @}
}
}
#endif

@ -0,0 +1,169 @@
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "perf_precomp.hpp"
#include "opencv2/ximgproc/disparity_filter.hpp"
namespace cvtest
{
using std::tr1::tuple;
using std::tr1::get;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::ximgproc;
void MakeArtificialExample(RNG rng, Mat& dst_left_view, Mat& dst_left_disparity_map, Mat& dst_right_disparity_map, Rect& dst_ROI);
CV_ENUM(GuideTypes, CV_8UC3);
CV_ENUM(SrcTypes, CV_16S);
typedef tuple<GuideTypes, SrcTypes, Size, bool, bool> DisparityWLSParams;
typedef TestBaseWithParam<DisparityWLSParams> DisparityWLSFilterPerfTest;
PERF_TEST_P( DisparityWLSFilterPerfTest, perf, Combine(GuideTypes::all(), SrcTypes::all(), Values(sz720p), Values(true,false), Values(true,false)) )
{
RNG rng(0);
DisparityWLSParams params = GetParam();
int guideType = get<0>(params);
int srcType = get<1>(params);
Size sz = get<2>(params);
bool use_conf = get<3>(params);
bool use_downscale = get<4>(params);
Mat guide(sz, guideType);
Mat disp_left(sz, srcType);
Mat disp_right(sz, srcType);
Mat dst(sz, srcType);
Rect ROI;
MakeArtificialExample(rng,guide,disp_left,disp_right,ROI);
if(use_downscale)
{
resize(disp_left,disp_left,Size(),0.5,0.5);
disp_left/=2;
resize(disp_right,disp_right,Size(),0.5,0.5);
disp_right/=2;
ROI = Rect(ROI.x/2,ROI.y/2,ROI.width/2,ROI.height/2);
}
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(10)
{
Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(use_conf);
wls_filter->filter(disp_left,guide,dst,ROI,disp_right);
}
SANITY_CHECK(dst);
}
void MakeArtificialExample(RNG rng, Mat& dst_left_view, Mat& dst_left_disparity_map, Mat& dst_right_disparity_map, Rect& dst_ROI)
{
int w = dst_left_view.cols;
int h = dst_left_view.rows;
//params:
unsigned char bg_level = (unsigned char)rng.uniform(0.0,255.0);
unsigned char fg_level = (unsigned char)rng.uniform(0.0,255.0);
int rect_width = (int)rng.uniform(w/16,w/2);
int rect_height = (int)rng.uniform(h/16,h/2);
int rect_disparity = (int)(0.15*w); //typical maximum disparity value
double sigma = 6.0;
int rect_x_offset = (w-rect_width) /2;
int rect_y_offset = (h-rect_height)/2;
if(dst_left_view.channels()==3)
dst_left_view = Scalar(Vec3b(bg_level,bg_level,bg_level));
else
dst_left_view = Scalar(bg_level);
dst_left_disparity_map = Scalar(0);
dst_right_disparity_map = Scalar(0);
Mat dst_left_view_rect = Mat(dst_left_view, Rect(rect_x_offset,rect_y_offset,rect_width,rect_height));
Mat dst_left_disparity_map_rect = Mat(dst_left_disparity_map,Rect(rect_x_offset,rect_y_offset,rect_width,rect_height));
if(dst_left_view.channels()==3)
dst_left_view_rect = Scalar(Vec3b(fg_level,fg_level,fg_level));
else
dst_left_view_rect = Scalar(fg_level);
dst_left_disparity_map_rect = Scalar(16*rect_disparity);
rect_x_offset-=rect_disparity;
Mat dst_right_disparity_map_rect = Mat(dst_right_disparity_map,Rect(rect_x_offset,rect_y_offset,rect_width,rect_height));
dst_right_disparity_map_rect = Scalar(-16*rect_disparity);
//add some gaussian noise:
unsigned char *l;
short *ldisp, *rdisp;
for(int i=0;i<h;i++)
{
l = dst_left_view.ptr(i);
ldisp = (short*)dst_left_disparity_map.ptr(i);
rdisp = (short*)dst_right_disparity_map.ptr(i);
if(dst_left_view.channels()==3)
{
for(int j=0;j<w;j++)
{
l[0] = saturate_cast<unsigned char>(l[0] + rng.gaussian(sigma));
l[1] = saturate_cast<unsigned char>(l[1] + rng.gaussian(sigma));
l[2] = saturate_cast<unsigned char>(l[2] + rng.gaussian(sigma));
l+=3;
ldisp[0] = saturate_cast<short>(ldisp[0] + rng.gaussian(sigma));
ldisp++;
rdisp[0] = saturate_cast<short>(rdisp[0] + rng.gaussian(sigma));
rdisp++;
}
}
else
{
for(int j=0;j<w;j++)
{
l[0] = saturate_cast<unsigned char>(l[0] + rng.gaussian(sigma));
l++;
ldisp[0] = saturate_cast<short>(ldisp[0] + rng.gaussian(sigma));
ldisp++;
rdisp[0] = saturate_cast<short>(rdisp[0] + rng.gaussian(sigma));
rdisp++;
}
}
}
dst_ROI = Rect(rect_disparity,0,w-rect_disparity,h);
}
}

@ -0,0 +1,81 @@
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "perf_precomp.hpp"
namespace cvtest
{
using std::tr1::tuple;
using std::tr1::get;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::ximgproc;
CV_ENUM(GuideTypes, CV_8UC1, CV_8UC3);
CV_ENUM(SrcTypes, CV_8UC1, CV_8UC3, CV_16SC1, CV_16SC3, CV_32FC1, CV_32FC3);
typedef tuple<GuideTypes, SrcTypes, Size> FGSParams;
typedef TestBaseWithParam<FGSParams> FGSFilterPerfTest;
PERF_TEST_P( FGSFilterPerfTest, perf, Combine(GuideTypes::all(), SrcTypes::all(), Values(sz720p)) )
{
RNG rng(0);
FGSParams params = GetParam();
int guideType = get<0>(params);
int srcType = get<1>(params);
Size sz = get<2>(params);
Mat guide(sz, guideType);
Mat src(sz, srcType);
Mat dst(sz, srcType);
declare.in(guide, src, WARMUP_RNG).out(dst).tbb_threads(cv::getNumberOfCPUs());
cv::setNumThreads(cv::getNumberOfCPUs());
TEST_CYCLE_N(10)
{
double lambda = rng.uniform(500.0, 10000.0);
double sigma = rng.uniform(1.0, 100.0);
fastGlobalSmootherFilter(guide,src,dst,lambda,sigma);
}
SANITY_CHECK(dst);
}
}

@ -4,215 +4,306 @@
#include "opencv2/highgui.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/ximgproc/disparity_filter.hpp"
#include <stdio.h>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#if defined(_WIN32)
#include <direct.h>
#else
#include <sys/stat.h>
#endif
using namespace cv;
using namespace cv::ximgproc;
using namespace std;
#define UNKNOWN_DISPARITY 16320
static void print_help()
{
printf("\nDemo for disparity filtering, evaluating speed and performance of different filters\n");
printf("\nUsage: disparity_filtering.exe <path_to_dataset_folder> <path_to_results_folder>\n");
}
Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance);
const String keys =
"{help h usage ? | | print this message }"
"{@left |../data/aloeL.jpg | left view of the stereopair }"
"{@right |../data/aloeR.jpg | right view of the stereopair }"
"{GT |../data/aloeGT.png| optional ground-truth disparity (MPI-Sintel or Middlebury format) }"
"{dst_path |None | optional path to save the resulting filtered disparity map }"
"{dst_raw_path |None | optional path to save raw disparity map before filtering }"
"{algorithm |bm | stereo matching method (bm or sgbm) }"
"{filter |wls_conf | used post-filtering (wls_conf or wls_no_conf) }"
"{no-display | | don't display results }"
"{no-downscale | | prevent stereo matching on downscaled views }"
"{dst_conf_path |None | optional path to save the confidence map used in filtering }"
"{vis_mult |1.0 | coefficient used to scale disparity map visualizations }"
"{max_disparity |160 | parameter of stereo matching }"
"{window_size |19 | parameter of stereo matching }"
"{wls_lambda |8000.0 | parameter of post-filtering }"
"{wls_sigma |1.0 | parameter of post-filtering }"
;
struct dataset_entry
int main(int argc, char** argv)
{
string name;
string dataset_folder;
string left_file,right_file,GT_file;
dataset_entry(string _dataset_folder): dataset_folder(_dataset_folder){}
void readEntry(Mat& dst_left,Mat& dst_right,Mat& dst_GT)
CommandLineParser parser(argc,argv,keys);
parser.about("Disparity Filtering Demo");
if (parser.has("help"))
{
dst_left = imread(dataset_folder+"/"+left_file, IMREAD_COLOR);
dst_right = imread(dataset_folder+"/"+right_file, IMREAD_COLOR);
Mat raw_disp = imread(dataset_folder+"/"+GT_file, IMREAD_COLOR);
dst_GT = Mat(raw_disp.rows,raw_disp.cols,CV_16S);
for(int i=0;i<raw_disp.rows;i++)
for(int j=0;j<raw_disp.cols;j++)
{
Vec3b bgrPixel = raw_disp.at<Vec3b>(i, j);
dst_GT.at<short>(i,j) = 64*bgrPixel.val[2]+bgrPixel.val[1]/4; //16-multiplied disparity
}
parser.printMessage();
return 0;
}
};
struct config
{
Ptr<StereoMatcher> matcher_instance;
Ptr<DisparityFilter> filter_instance;
config(Ptr<StereoMatcher> _matcher_instance,Ptr<DisparityFilter> _filter_instance)
String left_im = parser.get<String>(0);
String right_im = parser.get<String>(1);
String GT_path = parser.get<String>("GT");
String dst_path = parser.get<String>("dst_path");
String dst_raw_path = parser.get<String>("dst_raw_path");
String dst_conf_path = parser.get<String>("dst_conf_path");
String algo = parser.get<String>("algorithm");
String filter = parser.get<String>("filter");
bool no_display = parser.has("no-display");
bool no_downscale = parser.has("no-downscale");
int max_disp = parser.get<int>("max_disparity");
int wsize = parser.get<int>("window_size");
double lambda = parser.get<double>("wls_lambda");
double sigma = parser.get<double>("wls_sigma");
double vis_mult = parser.get<double>("vis_mult");
if (!parser.check())
{
matcher_instance = _matcher_instance;
filter_instance = _filter_instance;
parser.printErrors();
return -1;
}
config() {}
};
void operator>>(const FileNode& node,dataset_entry& entry);
double computeMSE(Mat& GT, Mat& src, Rect ROI);
double computeBadPixelPercent(Mat& GT, Mat& src, Rect ROI, int thresh=24/*1.5 pixels*/);
void getDisparityVis(Mat& disparity_map,Mat& dst);
Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance);
void setConfigsForTesting(map<string,config>& cfgs);
void CreateDir(string path);
int main(int argc, char** argv)
{
if(argc < 3)
Mat left = imread(left_im ,IMREAD_COLOR);
if ( left.empty() )
{
print_help();
return 0;
cout<<"Cannot read image file: "<<left_im;
return -1;
}
string dataset_folder(argv[1]);
string res_folder(argv[2]);
map<string,config> configs_for_testing;
setConfigsForTesting(configs_for_testing);
CreateDir(res_folder);
Mat right = imread(right_im,IMREAD_COLOR);
if ( right.empty() )
{
cout<<"Cannot read image file: "<<right_im;
return -1;
}
for (map<string,config>::iterator cfg = configs_for_testing.begin(); cfg != configs_for_testing.end(); cfg++)
bool noGT;
Mat GT_disp;
if (GT_path=="../data/aloeGT.png" && left_im!="../data/aloeL.jpg")
noGT=true;
else
{
string vis_folder = res_folder+"/vis_"+cfg->first;
CreateDir(vis_folder);
string cfg_file_name = res_folder+"/"+cfg->first+"_res.csv";
FILE* cur_cfg_res_file = fopen(cfg_file_name.c_str(),"w");
fprintf(cur_cfg_res_file,"Name,MSE,MSE after postfiltering,Percent bad,Percent bad after postfiltering,Matcher Execution Time(s),Filter Execution Time(s)\n");
printf("Processing configuration: %s\n",cfg->first.c_str());
FileStorage fs(dataset_folder + "/_dataset.xml", FileStorage::READ);
FileNode n = fs["data_set"];
double MSE_pre,percent_pre,MSE_post,percent_post,matching_time,filtering_time;
double average_MSE_pre=0,average_percent_pre=0,average_MSE_post=0,
average_percent_post=0,average_matching_time=0,average_filtering_time=0;
int cnt = 0;
for (FileNodeIterator it = n.begin(); it != n.end(); it++)
noGT=false;
if(readGT(GT_path,GT_disp)!=0)
{
dataset_entry entry(dataset_folder);
(*it)>>entry;
printf("%s ",entry.name.c_str());
Mat left,right,GT;
entry.readEntry(left,right,GT);
Mat raw_disp;
Mat left_gray; cvtColor(left, left_gray, COLOR_BGR2GRAY );
Mat right_gray; cvtColor(right, right_gray, COLOR_BGR2GRAY );
matching_time = (double)getTickCount();
cfg->second.matcher_instance->compute(left_gray,right_gray,raw_disp);
matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
Rect ROI = computeROI(left.size(),cfg->second.matcher_instance);
Mat filtered_disp;
filtering_time = (double)getTickCount();
cfg->second.filter_instance->filter(raw_disp,left,filtered_disp,ROI);
filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
cout<<"Cannot read ground truth image file: "<<GT_path<<endl;
return -1;
}
}
Mat left_for_matcher, right_for_matcher;
Mat left_disp,right_disp;
Mat filtered_disp;
Mat conf_map = Mat(left.rows,left.cols,CV_8U);
conf_map = Scalar(255);
Rect ROI;
double matching_time, filtering_time;
if(max_disp<=0 || max_disp%16!=0)
{
cout<<"Incorrect max_disparity value: it should be positive and divisible by 16";
return -1;
}
if(wsize<=0 || wsize%2!=1)
{
cout<<"Incorrect window_size value: it should be positive and odd";
return -1;
}
if(filter=="wls_conf")
{
if(!no_downscale)
{
wsize = wsize/2;
if(wsize%2==0) wsize++;
max_disp/=2;
if(max_disp%16!=0)
max_disp += 16-(max_disp%16);
resize(left ,left_for_matcher ,Size(),0.5,0.5);
resize(right,right_for_matcher,Size(),0.5,0.5);
}
else
{
left_for_matcher = left.clone();
right_for_matcher = right.clone();
}
MSE_pre = computeMSE(GT,raw_disp,ROI);
percent_pre = computeBadPixelPercent(GT,raw_disp,ROI);
MSE_post = computeMSE(GT,filtered_disp,ROI);
percent_post = computeBadPixelPercent(GT,filtered_disp,ROI);
fprintf(cur_cfg_res_file,"%s,%.1f,%.1f,%.1f,%.1f,%.3f,%.3f\n",entry.name.c_str(),MSE_pre,MSE_post,
percent_pre,percent_post,matching_time,filtering_time);
if(algo=="bm")
{
Ptr<StereoBM> left_matcher = StereoBM::create(max_disp,wsize);
left_matcher->setMinDisparity(0);
Ptr<StereoBM> right_matcher = StereoBM::create(max_disp,wsize);
right_matcher->setMinDisparity(-max_disp+1);
left_matcher->setTextureThreshold(0);
left_matcher->setUniquenessRatio(0);
right_matcher->setTextureThreshold(0);
right_matcher->setUniquenessRatio(0);
cvtColor(left_for_matcher, left_for_matcher, COLOR_BGR2GRAY);
cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);
ROI = computeROI(left_for_matcher.size(),left_matcher);
average_matching_time+=matching_time; average_filtering_time+=filtering_time;
average_MSE_pre+=MSE_pre; average_percent_pre+=percent_pre;
average_MSE_post+=MSE_post; average_percent_post+=percent_post;
cnt++;
matching_time = (double)getTickCount();
left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
}
else if(algo=="sgbm")
{
Ptr<StereoSGBM> left_matcher = StereoSGBM::create(0,max_disp,wsize);
left_matcher->setMinDisparity(0);
Ptr<StereoSGBM> right_matcher = StereoSGBM::create(-max_disp+1,max_disp,wsize);
left_matcher->setUniquenessRatio(0);
left_matcher->setDisp12MaxDiff(1000000);
left_matcher->setSpeckleWindowSize(0);
right_matcher->setUniquenessRatio(0);
right_matcher->setDisp12MaxDiff(1000000);
right_matcher->setSpeckleWindowSize(0);
ROI = computeROI(left_for_matcher.size(),left_matcher);
// dump visualizations:
imwrite(vis_folder + "/" + entry.name + "_left.png",left);
Mat GT_vis,raw_disp_vis,filtered_disp_vis;
getDisparityVis(GT,GT_vis);
getDisparityVis(raw_disp,raw_disp_vis);
getDisparityVis(filtered_disp,filtered_disp_vis);
imwrite(vis_folder + "/" + entry.name + "_disparity_GT.png",GT_vis);
imwrite(vis_folder + "/" + entry.name + "_disparity_raw.png",raw_disp_vis);
imwrite(vis_folder + "/" + entry.name + "_disparity_filtered.png",filtered_disp_vis);
matching_time = (double)getTickCount();
left_matcher-> compute(left_for_matcher, right_for_matcher,left_disp);
right_matcher->compute(right_for_matcher,left_for_matcher, right_disp);
matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
}
else
{
cout<<"Unsupported algorithm";
return -1;
}
printf("- Done\n");
Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(true);
wls_filter->setLambda(lambda);
wls_filter->setSigmaColor(sigma);
filtering_time = (double)getTickCount();
wls_filter->filter(left_disp,left,filtered_disp,ROI,right_disp);
filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
conf_map = wls_filter->getConfidenceMap();
if(!no_downscale)
{
// upscale raw disparity and ROI back for a proper comparison:
resize(left_disp,left_disp,Size(),2.0,2.0);
left_disp = left_disp*2.0;
ROI = Rect(ROI.x*2,ROI.y*2,ROI.width*2,ROI.height*2);
}
fprintf(cur_cfg_res_file,"%s,%.1f,%.1f,%.1f,%.1f,%.3f,%.3f\n","average",average_MSE_pre/cnt,
average_MSE_post/cnt,average_percent_pre/cnt,average_percent_post/cnt,
average_matching_time/cnt,average_filtering_time/cnt);
fclose(cur_cfg_res_file);
}
return 0;
}
void operator>>(const FileNode& node,dataset_entry& entry)
{
node["name"] >> entry.name;
node["left_file"] >> entry.left_file;
node["right_file"] >> entry.right_file;
node["GT_file"] >> entry.GT_file;
}
else if(filter=="wls_no_conf")
{
left_for_matcher = left.clone();
right_for_matcher = right.clone();
double computeMSE(Mat& GT, Mat& src, Rect ROI)
{
double res = 0;
Mat GT_ROI(GT,ROI);
Mat src_ROI(src,ROI);
int cnt=0;
for(int i=0;i<src_ROI.rows;i++)
for(int j=0;j<src_ROI.cols;j++)
if(algo=="bm")
{
if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
{
res += (GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))*(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j));
cnt++;
}
Ptr<StereoBM> matcher = StereoBM::create(max_disp,wsize);
matcher->setTextureThreshold(0);
matcher->setUniquenessRatio(0);
cvtColor(left_for_matcher, left_for_matcher, COLOR_BGR2GRAY);
cvtColor(right_for_matcher, right_for_matcher, COLOR_BGR2GRAY);
ROI = computeROI(left_for_matcher.size(),matcher);
matching_time = (double)getTickCount();
matcher->compute(left_for_matcher,right_for_matcher,left_disp);
matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
}
res /= cnt*256;
return res;
}
else if(algo=="sgbm")
{
Ptr<StereoSGBM> matcher = StereoSGBM::create(0,max_disp,wsize);
matcher->setUniquenessRatio(0);
matcher->setDisp12MaxDiff(1000000);
matcher->setSpeckleWindowSize(0);
ROI = computeROI(left_for_matcher.size(),matcher);
double computeBadPixelPercent(Mat& GT, Mat& src, Rect ROI, int thresh)
{
int bad_pixel_num = 0;
Mat GT_ROI(GT,ROI);
Mat src_ROI(src,ROI);
int cnt=0;
for(int i=0;i<src_ROI.rows;i++)
for(int j=0;j<src_ROI.cols;j++)
matching_time = (double)getTickCount();
matcher->compute(left_for_matcher,right_for_matcher,left_disp);
matching_time = ((double)getTickCount() - matching_time)/getTickFrequency();
}
else
{
if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
{
if( abs(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))>=thresh )
bad_pixel_num++;
cnt++;
}
cout<<"Unsupported algorithm";
return -1;
}
return (100.0*bad_pixel_num)/cnt;
}
void getDisparityVis(Mat& disparity_map,Mat& dst)
{
dst = Mat(disparity_map.rows,disparity_map.cols,CV_8UC3);
for(int i=0;i<dst.rows;i++)
for(int j=0;j<dst.cols;j++)
Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(false);
wls_filter->setLambda(lambda);
wls_filter->setSigmaColor(sigma);
filtering_time = (double)getTickCount();
wls_filter->filter(left_disp,left,filtered_disp,ROI);
filtering_time = ((double)getTickCount() - filtering_time)/getTickFrequency();
}
else
{
cout<<"Unsupported filter";
return -1;
}
//collect and print all the stats:
cout.precision(2);
cout<<"Matching time: "<<matching_time<<"s"<<endl;
cout<<"Filtering time: "<<filtering_time<<"s"<<endl;
cout<<endl;
double MSE_before,percent_bad_before,MSE_after,percent_bad_after;
if(!noGT)
{
MSE_before = computeMSE(GT_disp,left_disp,ROI);
percent_bad_before = computeBadPixelPercent(GT_disp,left_disp,ROI);
MSE_after = computeMSE(GT_disp,filtered_disp,ROI);
percent_bad_after = computeBadPixelPercent(GT_disp,filtered_disp,ROI);
cout.precision(5);
cout<<"MSE before filtering: "<<MSE_before<<endl;
cout<<"MSE after filtering: "<<MSE_after<<endl;
cout<<endl;
cout.precision(3);
cout<<"Percent of bad pixels before filtering: "<<percent_bad_before<<endl;
cout<<"Percent of bad pixels after filtering: "<<percent_bad_after<<endl;
}
if(dst_path!="None")
{
Mat filtered_disp_vis;
getDisparityVis(filtered_disp,filtered_disp_vis,vis_mult);
imwrite(dst_path,filtered_disp_vis);
}
if(dst_raw_path!="None")
{
Mat raw_disp_vis;
getDisparityVis(left_disp,raw_disp_vis,vis_mult);
imwrite(dst_raw_path,raw_disp_vis);
}
if(dst_conf_path!="None")
{
imwrite(dst_conf_path,conf_map);
}
if(!no_display)
{
namedWindow("left", WINDOW_AUTOSIZE);
imshow("left", left);
namedWindow("right", WINDOW_AUTOSIZE);
imshow("right", right);
if(!noGT)
{
if(disparity_map.at<short>(i,j)==UNKNOWN_DISPARITY)
dst.at<Vec3b>(i,j) = Vec3b(0,0,0);
else
dst.at<Vec3b>(i,j) = Vec3b(saturate_cast<unsigned char>(disparity_map.at<short>(i,j)/8),
saturate_cast<unsigned char>(disparity_map.at<short>(i,j)/8),
saturate_cast<unsigned char>(disparity_map.at<short>(i,j)/8));
Mat GT_disp_vis;
getDisparityVis(GT_disp,GT_disp_vis,vis_mult);
namedWindow("ground-truth disparity", WINDOW_AUTOSIZE);
imshow("ground-truth disparity", GT_disp_vis);
}
Mat raw_disp_vis;
getDisparityVis(left_disp,raw_disp_vis,vis_mult);
namedWindow("raw disparity", WINDOW_AUTOSIZE);
imshow("raw disparity", raw_disp_vis);
Mat filtered_disp_vis;
getDisparityVis(filtered_disp,filtered_disp_vis,vis_mult);
namedWindow("filtered disparity", WINDOW_AUTOSIZE);
imshow("filtered disparity", filtered_disp_vis);
waitKey();
}
return 0;
}
Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance)
@ -225,34 +316,10 @@ Rect computeROI(Size2i src_sz, Ptr<StereoMatcher> matcher_instance)
int minD = min_disparity, maxD = min_disparity + num_disparities - 1;
int xmin = maxD + bs2;
int xmax = src_sz.width - minD - bs2;
int xmax = src_sz.width + minD - bs2;
int ymin = bs2;
int ymax = src_sz.height - bs2;
Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
return r;
}
void setConfigsForTesting(map<string,config>& cfgs)
{
Ptr<StereoBM> stereobm_matcher = StereoBM::create(128,21);
stereobm_matcher->setTextureThreshold(0);
stereobm_matcher->setUniquenessRatio(0);
Ptr<DisparityFilter> wls_filter = createDisparityWLSFilter();
Ptr<DisparityFilter> dt_filter = createDisparityDTFilter();
Ptr<DisparityFilter> guided_filter = createDisparityGuidedFilter();
cfgs["stereobm_wls"] = config(stereobm_matcher,wls_filter);
cfgs["stereobm_dtf"] = config(stereobm_matcher,dt_filter);
cfgs["stereobm_gf"] = config(stereobm_matcher,guided_filter);
}
void CreateDir(string path)
{
#if defined(_WIN32)
_mkdir(path.c_str());
#else
mkdir(path.c_str(), 0777);
#endif
}

@ -36,149 +36,429 @@
#include "precomp.hpp"
#include "opencv2/ximgproc/disparity_filter.hpp"
#include "opencv2/highgui.hpp"
#include <math.h>
#include <vector>
namespace cv {
namespace ximgproc {
void setROI(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,
Mat& disp,Mat& src,Mat& dst,Rect ROI);
using std::vector;
#define EPS 1e-43f
class DisparityDTFilterImpl : public DisparityDTFilter
class DisparityWLSFilterImpl : public DisparityWLSFilter
{
protected:
double sigmaSpatial,sigmaColor;
void init(double _sigmaSpatial, double _sigmaColor)
double lambda,sigma_color;
bool use_confidence;
Mat confidence_map;
int LRC_thresh,depth_discontinuity_radius;
float depth_discontinuity_roll_off_factor;
float resize_factor;
int num_stripes;
void init(double _lambda, double _sigma_color, bool _use_confidence);
void computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst, Rect ROI);
void computeConfidenceMap(InputArray left_disp, InputArray right_disp, Rect ROI);
protected:
struct ComputeDiscontinuityAwareLRC_ParBody : public ParallelLoopBody
{
sigmaColor = _sigmaColor;
sigmaSpatial = _sigmaSpatial;
}
public:
double getSigmaSpatial() {return sigmaSpatial;}
void setSigmaSpatial(double _sigmaSpatial) {sigmaSpatial = _sigmaSpatial;}
double getSigmaColor() {return sigmaColor;}
void setSigmaColor(double _sigmaColor) {sigmaColor = _sigmaColor;}
DisparityWLSFilterImpl* wls;
Mat *left_disp, *right_disp;
Mat *left_disc, *right_disc, *dst;
Rect left_ROI, right_ROI;
int nstripes, stripe_sz;
ComputeDiscontinuityAwareLRC_ParBody(DisparityWLSFilterImpl& _wls, Mat& _left_disp, Mat& _right_disp, Mat& _left_disc, Mat& _right_disc, Mat& _dst, Rect _left_ROI, Rect _right_ROI, int _nstripes);
void operator () (const Range& range) const;
};
static Ptr<DisparityDTFilterImpl> create()
struct ComputeDepthDisc_ParBody : public ParallelLoopBody
{
DisparityDTFilterImpl *dtf = new DisparityDTFilterImpl();
dtf->init(25.0,60.0);
return Ptr<DisparityDTFilterImpl>(dtf);
DisparityWLSFilterImpl* wls;
Mat *disp,*disp_squares,*dst;
int nstripes, stripe_sz;
ComputeDepthDisc_ParBody(DisparityWLSFilterImpl& _wls, Mat& _disp, Mat& _disp_squares, Mat& _dst, int _nstripes);
void operator () (const Range& range) const;
};
typedef void (DisparityWLSFilterImpl::*MatOp)(Mat& src, Mat& dst);
struct ParallelMatOp_ParBody : public ParallelLoopBody
{
DisparityWLSFilterImpl* wls;
vector<MatOp> ops;
vector<Mat*> src;
vector<Mat*> dst;
ParallelMatOp_ParBody(DisparityWLSFilterImpl& _wls, vector<MatOp> _ops, vector<Mat*>& _src, vector<Mat*>& _dst);
void operator () (const Range& range) const;
};
void boxFilterOp(Mat& src,Mat& dst)
{
int rad = (int)ceil(resize_factor*depth_discontinuity_radius);
boxFilter(src,dst,CV_32F,Size(2*rad+1,2*rad+1),Point(-1,-1));
}
void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI)
void sqrBoxFilterOp(Mat& src,Mat& dst)
{
Mat disp,src,dst;
setROI(disparity_map,left_view,filtered_disparity_map,disp,src,dst,ROI);
int rad = (int)ceil(resize_factor*depth_discontinuity_radius);
sqrBoxFilter(src,dst,CV_32F,Size(2*rad+1,2*rad+1),Point(-1,-1));
}
Mat disp_32F,dst_32F;
disp.convertTo(disp_32F,CV_32F);
dtFilter(src,disp_32F,dst_32F,sigmaSpatial,sigmaColor);
dst_32F.convertTo(dst,CV_16S);
void copyToOp(Mat& src,Mat& dst)
{
src.copyTo(dst);
}
public:
static Ptr<DisparityWLSFilterImpl> create(bool _use_confidence);
void filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right, InputArray);
double getLambda() {return lambda;}
void setLambda(double _lambda) {lambda = _lambda;}
double getSigmaColor() {return sigma_color;}
void setSigmaColor(double _sigma_color) {sigma_color = _sigma_color;}
Mat getConfidenceMap() {return confidence_map;}
int getLRCthresh() {return LRC_thresh;}
void setLRCthresh(int _LRC_thresh) {LRC_thresh = _LRC_thresh;}
int getDepthDiscontinuityRadius() {return depth_discontinuity_radius;}
void setDepthDiscontinuityRadius(int _disc_radius) {depth_discontinuity_radius = _disc_radius;}
};
CV_EXPORTS_W
Ptr<DisparityDTFilter> createDisparityDTFilter()
void DisparityWLSFilterImpl::init(double _lambda, double _sigma_color, bool _use_confidence)
{
return Ptr<DisparityDTFilter>(DisparityDTFilterImpl::create());
lambda = _lambda;
sigma_color = _sigma_color;
use_confidence = _use_confidence;
confidence_map = Mat();
LRC_thresh = 24;
depth_discontinuity_radius = 5;
depth_discontinuity_roll_off_factor = 0.001f;
resize_factor = 1.0;
num_stripes = getNumThreads();
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
class DisparityGuidedFilterImpl : public DisparityGuidedFilter
void DisparityWLSFilterImpl::computeDepthDiscontinuityMaps(Mat& left_disp, Mat& right_disp, Mat& left_dst, Mat& right_dst, Rect ROI)
{
protected:
int radius;
double eps;
void init(int _radius, double _eps)
Rect right_ROI(left_disp.cols-(ROI.x+ROI.width),ROI.y,ROI.width,ROI.height);
Mat left_disp_ROI (left_disp, ROI);
Mat right_disp_ROI(right_disp,right_ROI);
Mat ldisp,rdisp,ldisp_squared,rdisp_squared;
{
radius = _radius;
eps = _eps;
vector<Mat*> _src; _src.push_back(&left_disp_ROI);_src.push_back(&right_disp_ROI);
_src.push_back(&left_disp_ROI);_src.push_back(&right_disp_ROI);
vector<Mat*> _dst; _dst.push_back(&ldisp); _dst.push_back(&rdisp);
_dst.push_back(&ldisp_squared);_dst.push_back(&rdisp_squared);
vector<MatOp> _ops; _ops.push_back(&DisparityWLSFilterImpl::copyToOp); _ops.push_back(&DisparityWLSFilterImpl::copyToOp);
_ops.push_back(&DisparityWLSFilterImpl::copyToOp); _ops.push_back(&DisparityWLSFilterImpl::copyToOp);
parallel_for_(Range(0,4),ParallelMatOp_ParBody(*this,_ops,_src,_dst));
}
public:
double getEps() {return eps;}
void setEps(double _eps) {eps = _eps;}
int getRadius() {return radius;}
void setRadius(int _radius) {radius = _radius;}
static Ptr<DisparityGuidedFilterImpl> create()
{
DisparityGuidedFilterImpl *gf = new DisparityGuidedFilterImpl();
gf->init(20,100.0);
return Ptr<DisparityGuidedFilterImpl>(gf);
vector<Mat*> _src; _src.push_back(&ldisp); _src.push_back(&rdisp);
_src.push_back(&ldisp_squared);_src.push_back(&rdisp_squared);
vector<Mat*> _dst; _dst.push_back(&ldisp); _dst.push_back(&rdisp);
_dst.push_back(&ldisp_squared);_dst.push_back(&rdisp_squared);
vector<MatOp> _ops; _ops.push_back(&DisparityWLSFilterImpl::boxFilterOp); _ops.push_back(&DisparityWLSFilterImpl::boxFilterOp);
_ops.push_back(&DisparityWLSFilterImpl::sqrBoxFilterOp);_ops.push_back(&DisparityWLSFilterImpl::sqrBoxFilterOp);
parallel_for_(Range(0,4),ParallelMatOp_ParBody(*this,_ops,_src,_dst));
}
void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI)
left_dst = Mat::zeros(left_disp.rows,left_disp.cols,CV_32F);
right_dst = Mat::zeros(right_disp.rows,right_disp.cols,CV_32F);
Mat left_dst_ROI (left_dst,ROI);
Mat right_dst_ROI(right_dst,right_ROI);
parallel_for_(Range(0,num_stripes),ComputeDepthDisc_ParBody(*this,ldisp,ldisp_squared,left_dst_ROI ,num_stripes));
parallel_for_(Range(0,num_stripes),ComputeDepthDisc_ParBody(*this,rdisp,rdisp_squared,right_dst_ROI,num_stripes));
}
void DisparityWLSFilterImpl::computeConfidenceMap(InputArray left_disp, InputArray right_disp, Rect ROI)
{
Mat ldisp = left_disp.getMat();
Mat rdisp = right_disp.getMat();
Mat depth_discontinuity_map_left,depth_discontinuity_map_right;
computeDepthDiscontinuityMaps(ldisp,rdisp,depth_discontinuity_map_left,depth_discontinuity_map_right,ROI);
Rect right_ROI(ldisp.cols-(ROI.x+ROI.width),ROI.y,ROI.width,ROI.height);
confidence_map = depth_discontinuity_map_left;
parallel_for_(Range(0,num_stripes),ComputeDiscontinuityAwareLRC_ParBody(*this,ldisp,rdisp, depth_discontinuity_map_left,depth_discontinuity_map_right,confidence_map,ROI,right_ROI,num_stripes));
confidence_map = 255.0f*confidence_map;
}
Ptr<DisparityWLSFilterImpl> DisparityWLSFilterImpl::create(bool _use_confidence)
{
DisparityWLSFilterImpl *wls = new DisparityWLSFilterImpl();
wls->init(8000.0,1.0,_use_confidence);
return Ptr<DisparityWLSFilterImpl>(wls);
}
void DisparityWLSFilterImpl::filter(InputArray disparity_map_left, InputArray left_view, OutputArray filtered_disparity_map, Rect ROI, InputArray disparity_map_right, InputArray)
{
CV_Assert( !disparity_map_left.empty() && (disparity_map_left.depth() == CV_16S) && (disparity_map_left.channels() == 1) );
CV_Assert( !left_view.empty() && (left_view.depth() == CV_8U) && (left_view.channels() == 3 || left_view.channels() == 1) );
Mat disp,src,dst;
if(disparity_map_left.size()!=left_view.size())
resize_factor = disparity_map_left.cols()/(float)left_view.cols();
else
resize_factor = 1.0;
if(!use_confidence)
{
Mat disp,src,dst;
setROI(disparity_map,left_view,filtered_disparity_map,disp,src,dst,ROI);
Mat disp_full_size = disparity_map_left.getMat();
Mat src_full_size = left_view.getMat();
if(disp_full_size.size!=src_full_size.size)
{
float x_ratio = src_full_size.cols/(float)disp_full_size.cols;
float y_ratio = src_full_size.rows/(float)disp_full_size.rows;
resize(disp_full_size,disp_full_size,src_full_size.size());
disp_full_size = disp_full_size*x_ratio;
ROI = Rect((int)(ROI.x*x_ratio),(int)(ROI.y*y_ratio),(int)(ROI.width*x_ratio),(int)(ROI.height*y_ratio));
}
disp = Mat(disp_full_size,ROI);
src = Mat(src_full_size ,ROI);
filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
Mat& dst_full_size = filtered_disparity_map.getMatRef();
dst_full_size = Scalar(-16);
dst = Mat(dst_full_size,ROI);
Mat filtered_disp;
fastGlobalSmootherFilter(src,disp,filtered_disp,lambda,sigma_color);
filtered_disp.copyTo(dst);
}
else
{
CV_Assert( !disparity_map_right.empty() && (disparity_map_right.depth() == CV_16S) && (disparity_map_right.channels() == 1) );
CV_Assert( (disparity_map_left.cols() == disparity_map_right.cols()) );
CV_Assert( (disparity_map_left.rows() == disparity_map_right.rows()) );
computeConfidenceMap(disparity_map_left,disparity_map_right,ROI);
Mat disp_full_size = disparity_map_left.getMat();
Mat src_full_size = left_view.getMat();
if(disp_full_size.size!=src_full_size.size)
{
float x_ratio = src_full_size.cols/(float)disp_full_size.cols;
float y_ratio = src_full_size.rows/(float)disp_full_size.rows;
resize(disp_full_size,disp_full_size,src_full_size.size());
disp_full_size = disp_full_size*x_ratio;
resize(confidence_map,confidence_map,src_full_size.size());
ROI = Rect((int)(ROI.x*x_ratio),(int)(ROI.y*y_ratio),(int)(ROI.width*x_ratio),(int)(ROI.height*y_ratio));
}
disp = Mat(disp_full_size,ROI);
src = Mat(src_full_size ,ROI);
filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
Mat& dst_full_size = filtered_disparity_map.getMatRef();
dst_full_size = Scalar(-16);
dst = Mat(dst_full_size,ROI);
Mat conf(confidence_map,ROI);
Mat disp_32F,dst_32F;
disp.convertTo(disp_32F,CV_32F);
guidedFilter(src,disp_32F,dst_32F,radius,eps);
dst_32F.convertTo(dst,CV_16S);
Mat disp_mul_conf;
disp.convertTo(disp_mul_conf,CV_32F);
disp_mul_conf = conf.mul(disp_mul_conf);
Mat conf_filtered;
Ptr<FastGlobalSmootherFilter> wls = createFastGlobalSmootherFilter(src,lambda,sigma_color);
wls->filter(disp_mul_conf,disp_mul_conf);
wls->filter(conf,conf_filtered);
disp_mul_conf = disp_mul_conf.mul(1/(conf_filtered+EPS));
disp_mul_conf.convertTo(dst,CV_16S);
}
};
}
CV_EXPORTS_W
Ptr<DisparityGuidedFilter> createDisparityGuidedFilter()
DisparityWLSFilterImpl::ComputeDiscontinuityAwareLRC_ParBody::ComputeDiscontinuityAwareLRC_ParBody(DisparityWLSFilterImpl& _wls, Mat& _left_disp, Mat& _right_disp, Mat& _left_disc, Mat& _right_disc, Mat& _dst, Rect _left_ROI, Rect _right_ROI, int _nstripes):
wls(&_wls),left_disp(&_left_disp),right_disp(&_right_disp),left_disc(&_left_disc),right_disc(&_right_disc),dst(&_dst),left_ROI(_left_ROI),right_ROI(_right_ROI),nstripes(_nstripes)
{
return Ptr<DisparityGuidedFilter>(DisparityGuidedFilterImpl::create());
stripe_sz = (int)ceil(left_disp->rows/(double)nstripes);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
class DisparityWLSFilterImpl : public DisparityWLSFilter
void DisparityWLSFilterImpl::ComputeDiscontinuityAwareLRC_ParBody::operator() (const Range& range) const
{
protected:
double lambda,sigma_color;
void init(double _lambda, double _sigma_color)
short* row_left;
float* row_left_conf;
short* row_right;
float* row_right_conf;
float* row_dst;
int right_idx;
int h = left_disp->rows;
int start = std::min(range.start * stripe_sz, h);
int end = std::min(range.end * stripe_sz, h);
int thresh = (int)(wls->resize_factor*wls->LRC_thresh);
for(int i=start;i<end;i++)
{
lambda = _lambda;
sigma_color = _sigma_color;
row_left = (short*)left_disp->ptr(i);
row_left_conf = (float*)left_disc->ptr(i);
row_right = (short*)right_disp->ptr(i);
row_right_conf = (float*)right_disc->ptr(i);
row_dst = (float*)dst->ptr(i);
int j_start = left_ROI.x;
int j_end = left_ROI.x+left_ROI.width;
int right_end = right_ROI.x+right_ROI.width;
for(int j=j_start;j<j_end;j++)
{
right_idx = j-(row_left[j]>>4);
if( right_idx>=right_ROI.x && right_idx<right_end)
{
if(abs(row_left[j] + row_right[right_idx])< thresh)
row_dst[j] = min(row_left_conf[j],row_right_conf[right_idx]);
else
row_dst[j] = 0.0f;
}
}
}
public:
double getLambda() {return lambda;}
void setLambda(double _lambda) {lambda = _lambda;}
double getSigmaColor() {return sigma_color;}
void setSigmaColor(double _sigma_color) {sigma_color = _sigma_color;}
}
DisparityWLSFilterImpl::ComputeDepthDisc_ParBody::ComputeDepthDisc_ParBody(DisparityWLSFilterImpl& _wls, Mat& _disp, Mat& _disp_squares, Mat& _dst, int _nstripes):
wls(&_wls),disp(&_disp),disp_squares(&_disp_squares),dst(&_dst),nstripes(_nstripes)
{
stripe_sz = (int)ceil(disp->rows/(double)nstripes);
}
static Ptr<DisparityWLSFilterImpl> create()
void DisparityWLSFilterImpl::ComputeDepthDisc_ParBody::operator() (const Range& range) const
{
float* row_disp;
float* row_disp_squares;
float* row_dst;
float variance;
int h = disp->rows;
int w = disp->cols;
int start = std::min(range.start * stripe_sz, h);
int end = std::min(range.end * stripe_sz, h);
float roll_off = wls->depth_discontinuity_roll_off_factor/(wls->resize_factor*wls->resize_factor);
for(int i=start;i<end;i++)
{
DisparityWLSFilterImpl *wls = new DisparityWLSFilterImpl();
wls->init(500.0,2.0);
return Ptr<DisparityWLSFilterImpl>(wls);
row_disp = (float*)disp->ptr(i);
row_disp_squares = (float*)disp_squares->ptr(i);
row_dst = (float*)dst->ptr(i);
for(int j=0;j<w;j++)
{
variance = row_disp_squares[j] - (row_disp[j])*(row_disp[j]);
row_dst[j] = max(1.0f - roll_off*variance,0.0f);
}
}
}
DisparityWLSFilterImpl::ParallelMatOp_ParBody::ParallelMatOp_ParBody(DisparityWLSFilterImpl& _wls, vector<MatOp> _ops, vector<Mat*>& _src, vector<Mat*>& _dst):
wls(&_wls),ops(_ops),src(_src),dst(_dst)
{}
void DisparityWLSFilterImpl::ParallelMatOp_ParBody::operator() (const Range& range) const
{
for(int i=range.start;i<range.end;i++)
(wls->*ops[i])(*src[i],*dst[i]);
}
CV_EXPORTS_W
Ptr<DisparityWLSFilter> createDisparityWLSFilter(bool use_confidence)
{
return Ptr<DisparityWLSFilter>(DisparityWLSFilterImpl::create(use_confidence));
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
#define UNKNOWN_DISPARITY 16320
void filter(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,Rect ROI)
int readGT(String src_path,OutputArray dst)
{
Mat src = imread(src_path,IMREAD_UNCHANGED);
dst.create(src.rows,src.cols,CV_16S);
Mat& dstMat = dst.getMatRef();
if(!src.empty() && src.channels()==3 && src.depth()==CV_8U)
{
Mat disp,src,dst;
setROI(disparity_map,left_view,filtered_disparity_map,disp,src,dst,ROI);
weightedLeastSquaresFilter(src,disp,dst,lambda,sigma_color);
//MPI-Sintel format:
for(int i=0;i<src.rows;i++)
for(int j=0;j<src.cols;j++)
{
Vec3b bgrPixel = src.at<Vec3b>(i, j);
dstMat.at<short>(i,j) = 64*bgrPixel.val[2]+bgrPixel.val[1]/4; //16-multiplied disparity
}
return 0;
}
};
else if(!src.empty() && src.channels()==1 && src.depth()==CV_8U)
{
//Middlebury format:
for(int i=0;i<src.rows;i++)
for(int j=0;j<src.cols;j++)
{
short src_val = src.at<unsigned char>(i, j);
if(src_val==0)
dstMat.at<short>(i,j) = UNKNOWN_DISPARITY;
else
dstMat.at<short>(i,j) = 16*src_val; //16-multiplied disparity
}
return 0;
}
else
return 1;
}
CV_EXPORTS_W
Ptr<DisparityWLSFilter> createDisparityWLSFilter()
double computeMSE(InputArray GT, InputArray src, Rect ROI)
{
return Ptr<DisparityWLSFilter>(DisparityWLSFilterImpl::create());
CV_Assert( !GT.empty() && (GT.depth() == CV_16S) && (GT.channels() == 1) );
CV_Assert( !src.empty() && (src.depth() == CV_16S) && (src.channels() == 1) );
CV_Assert( src.rows() == GT.rows() && src.cols() == GT.cols() );
double res = 0;
Mat GT_ROI (GT.getMat(), ROI);
Mat src_ROI(src.getMat(),ROI);
int cnt=0;
for(int i=0;i<src_ROI.rows;i++)
for(int j=0;j<src_ROI.cols;j++)
{
if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
{
res += (GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))*(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j));
cnt++;
}
}
res /= cnt*256;
return res;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
double computeBadPixelPercent(InputArray GT, InputArray src, Rect ROI, int thresh)
{
CV_Assert( !GT.empty() && (GT.depth() == CV_16S) && (GT.channels() == 1) );
CV_Assert( !src.empty() && (src.depth() == CV_16S) && (src.channels() == 1) );
CV_Assert( src.rows() == GT.rows() && src.cols() == GT.cols() );
int bad_pixel_num = 0;
Mat GT_ROI (GT.getMat(), ROI);
Mat src_ROI(src.getMat(),ROI);
int cnt=0;
for(int i=0;i<src_ROI.rows;i++)
for(int j=0;j<src_ROI.cols;j++)
{
if(GT_ROI.at<short>(i,j)!=UNKNOWN_DISPARITY)
{
if( abs(GT_ROI.at<short>(i,j) - src_ROI.at<short>(i,j))>=thresh )
bad_pixel_num++;
cnt++;
}
}
return (100.0*bad_pixel_num)/cnt;
}
void setROI(InputArray disparity_map, InputArray left_view, OutputArray filtered_disparity_map,
Mat& disp,Mat& src,Mat& dst,Rect ROI)
void getDisparityVis(InputArray src,OutputArray dst,double scale)
{
Mat disp_full_size = disparity_map.getMat();
Mat src_full_size = left_view.getMat();
CV_Assert( (disp_full_size.depth() == CV_16S) && (disp_full_size.channels() == 1) );
CV_Assert( (src_full_size.depth() == CV_8U) && (src_full_size.channels() <= 4) );
disp = Mat(disp_full_size,ROI);
src = Mat(src_full_size ,ROI);
filtered_disparity_map.create(disp_full_size.size(), disp_full_size.type());
Mat& dst_full_size = filtered_disparity_map.getMatRef();
dst = Mat(dst_full_size,ROI);
CV_Assert( !src.empty() && (src.depth() == CV_16S) && (src.channels() == 1) );
Mat srcMat = src.getMat();
dst.create(srcMat.rows,srcMat.cols,CV_8UC1);
Mat& dstMat = dst.getMatRef();
for(int i=0;i<dstMat.rows;i++)
for(int j=0;j<dstMat.cols;j++)
{
if(srcMat.at<short>(i,j)==UNKNOWN_DISPARITY)
dstMat.at<unsigned char>(i,j) = 0;
else
dstMat.at<unsigned char>(i,j) = saturate_cast<unsigned char>(scale*srcMat.at<short>(i,j)/16.0);
}
}
}

@ -0,0 +1,694 @@
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "precomp.hpp"
#include "opencv2/hal/intrin.hpp"
#include <vector>
namespace cv {
namespace ximgproc {
using std::vector;
typedef float WorkType;
typedef Vec<WorkType, 1> WorkVec;
typedef WorkType (*get_weight_op)(WorkType*, unsigned char*,unsigned char*);
inline WorkType get_weight_1channel(WorkType* LUT, unsigned char* p1,unsigned char* p2)
{
return LUT[ (p1[0]-p2[0])*(p1[0]-p2[0]) ];
}
inline WorkType get_weight_3channel(WorkType* LUT, unsigned char* p1,unsigned char* p2)
{
return LUT[ (p1[0]-p2[0])*(p1[0]-p2[0])+
(p1[1]-p2[1])*(p1[1]-p2[1])+
(p1[2]-p2[2])*(p1[2]-p2[2]) ];
}
class FastGlobalSmootherFilterImpl : public FastGlobalSmootherFilter
{
public:
static Ptr<FastGlobalSmootherFilterImpl> create(InputArray guide, double lambda, double sigma_color, int num_iter,double lambda_attenuation);
void filter(InputArray src, OutputArray dst);
protected:
int w,h;
int num_stripes;
float sigmaColor,lambda;
float lambda_attenuation;
int num_iter;
Mat weights_LUT;
Mat Chor, Cvert;
Mat interD;
void init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter,double _lambda_attenuation);
void horizontalPass(Mat& cur);
void verticalPass(Mat& cur);
protected:
struct HorizontalPass_ParBody : public ParallelLoopBody
{
FastGlobalSmootherFilterImpl* fgs;
Mat* cur;
int nstripes, stripe_sz;
int h;
HorizontalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _h);
void operator () (const Range& range) const;
};
inline void process_4row_block(Mat* cur,int i);
inline void process_row(Mat* cur,int i);
struct VerticalPass_ParBody : public ParallelLoopBody
{
FastGlobalSmootherFilterImpl* fgs;
Mat* cur;
int nstripes, stripe_sz;
int w;
VerticalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _w);
void operator () (const Range& range) const;
};
template<get_weight_op get_weight, const int num_ch>
struct ComputeHorizontalWeights_ParBody : public ParallelLoopBody
{
FastGlobalSmootherFilterImpl* fgs;
Mat* guide;
int nstripes, stripe_sz;
int h;
ComputeHorizontalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _h);
void operator () (const Range& range) const;
};
template<get_weight_op get_weight, const int num_ch>
struct ComputeVerticalWeights_ParBody : public ParallelLoopBody
{
FastGlobalSmootherFilterImpl* fgs;
Mat* guide;
int nstripes, stripe_sz;
int w;
ComputeVerticalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _w);
void operator () (const Range& range) const;
};
struct ComputeLUT_ParBody : public ParallelLoopBody
{
FastGlobalSmootherFilterImpl* fgs;
WorkType* LUT;
int nstripes, stripe_sz;
int sz;
ComputeLUT_ParBody(FastGlobalSmootherFilterImpl &_fgs, WorkType* _LUT, int _nstripes, int _sz);
void operator () (const Range& range) const;
};
};
void FastGlobalSmootherFilterImpl::init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter,double _lambda_attenuation)
{
CV_Assert( !guide.empty() && _lambda >= 0 && _sigmaColor >= 0 && _num_iter >=1 );
CV_Assert( guide.depth() == CV_8U && (guide.channels() == 1 || guide.channels() == 3) );
sigmaColor = (float)_sigmaColor;
lambda = (float)_lambda;
lambda_attenuation = (float)_lambda_attenuation;
num_iter = _num_iter;
num_stripes = getNumThreads();
int num_levels = 3*256*256;
weights_LUT.create(1,num_levels,WorkVec::type);
WorkType* LUT = (WorkType*)weights_LUT.ptr(0);
parallel_for_(Range(0,num_stripes),ComputeLUT_ParBody(*this,LUT,num_stripes,num_levels));
w = guide.cols();
h = guide.rows();
Chor. create(h,w,WorkVec::type);
Cvert. create(h,w,WorkVec::type);
interD.create(h,w,WorkVec::type);
Mat guideMat = guide.getMat();
if(guide.channels() == 1)
{
parallel_for_(Range(0,num_stripes),ComputeHorizontalWeights_ParBody<get_weight_1channel,1>(*this,guideMat,num_stripes,h));
parallel_for_(Range(0,num_stripes),ComputeVerticalWeights_ParBody <get_weight_1channel,1>(*this,guideMat,num_stripes,w));
}
if(guide.channels() == 3)
{
parallel_for_(Range(0,num_stripes),ComputeHorizontalWeights_ParBody<get_weight_3channel,3>(*this,guideMat,num_stripes,h));
parallel_for_(Range(0,num_stripes),ComputeVerticalWeights_ParBody <get_weight_3channel,3>(*this,guideMat,num_stripes,w));
}
}
Ptr<FastGlobalSmootherFilterImpl> FastGlobalSmootherFilterImpl::create(InputArray guide, double lambda, double sigma_color, int num_iter, double lambda_attenuation)
{
FastGlobalSmootherFilterImpl *fgs = new FastGlobalSmootherFilterImpl();
fgs->init(guide,lambda,sigma_color,num_iter,lambda_attenuation);
return Ptr<FastGlobalSmootherFilterImpl>(fgs);
}
void FastGlobalSmootherFilterImpl::filter(InputArray src, OutputArray dst)
{
CV_Assert(!src.empty() && (src.depth() == CV_8U || src.depth() == CV_16S || src.depth() == CV_32F) && src.channels()<=4);
if (src.rows() != h || src.cols() != w)
{
CV_Error(Error::StsBadSize, "Size of the filtered image must be equal to the size of the guide image");
return;
}
vector<Mat> src_channels;
vector<Mat> dst_channels;
if(src.channels()==1)
src_channels.push_back(src.getMat());
else
split(src,src_channels);
float lambda_ref = lambda;
for(int i=0;i<src.channels();i++)
{
lambda = lambda_ref;
Mat cur_res = src_channels[i].clone();
if(src.depth()!=WorkVec::type)
cur_res.convertTo(cur_res,WorkVec::type);
for(int n=0;n<num_iter;n++)
{
horizontalPass(cur_res);
verticalPass(cur_res);
lambda*=lambda_attenuation;
}
Mat dstMat;
if(src.depth()!=WorkVec::type)
cur_res.convertTo(dstMat,src.depth());
else
dstMat = cur_res;
dst_channels.push_back(dstMat);
}
lambda = lambda_ref;
dst.create(src.size(),src.type());
if(src.channels()==1)
{
Mat& dstMat = dst.getMatRef();
dstMat = dst_channels[0];
}
else
merge(dst_channels,dst);
}
void FastGlobalSmootherFilterImpl::horizontalPass(Mat& cur)
{
parallel_for_(Range(0,num_stripes),HorizontalPass_ParBody(*this,cur,num_stripes,h));
}
void FastGlobalSmootherFilterImpl::verticalPass(Mat& cur)
{
parallel_for_(Range(0,num_stripes),VerticalPass_ParBody(*this,cur,num_stripes,w));
}
FastGlobalSmootherFilterImpl::HorizontalPass_ParBody::HorizontalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _h):
fgs(&_fgs),cur(&_cur), nstripes(_nstripes), h(_h)
{
stripe_sz = (int)ceil(h/(double)nstripes);
}
void FastGlobalSmootherFilterImpl::process_4row_block(Mat* cur,int i)
{
WorkType denom,denom_next,denom_next2,denom_next3;
WorkType *Chor_row = (WorkType*)Chor.ptr (i);
WorkType *interD_row = (WorkType*)interD.ptr(i);
WorkType *cur_row = (WorkType*)cur->ptr (i);
WorkType *Chor_row_next = (WorkType*)Chor.ptr (i+1);
WorkType *interD_row_next = (WorkType*)interD.ptr(i+1);
WorkType *cur_row_next = (WorkType*)cur->ptr (i+1);
WorkType *Chor_row_next2 = (WorkType*)Chor.ptr (i+2);
WorkType *interD_row_next2 = (WorkType*)interD.ptr(i+2);
WorkType *cur_row_next2 = (WorkType*)cur->ptr (i+2);
WorkType *Chor_row_next3 = (WorkType*)Chor.ptr (i+3);
WorkType *interD_row_next3 = (WorkType*)interD.ptr(i+3);
WorkType *cur_row_next3 = (WorkType*)cur->ptr (i+3);
float coef_cur, coef_prev;
float coef_cur_row_next, coef_prev_row_next;
float coef_cur_row_next2,coef_prev_row_next2;
float coef_cur_row_next3,coef_prev_row_next3;
//forward pass:
coef_prev = lambda*Chor_row[0];
coef_prev_row_next = lambda*Chor_row_next[0];
coef_prev_row_next2 = lambda*Chor_row_next2[0];
coef_prev_row_next3 = lambda*Chor_row_next3[0];
interD_row[0] = coef_prev /(1-coef_prev);
interD_row_next[0] = coef_prev_row_next /(1-coef_prev_row_next);
interD_row_next2[0] = coef_prev_row_next2/(1-coef_prev_row_next2);
interD_row_next3[0] = coef_prev_row_next3/(1-coef_prev_row_next3);
cur_row[0] = cur_row[0] /(1-coef_prev);
cur_row_next[0] = cur_row_next[0] /(1-coef_prev_row_next);
cur_row_next2[0] = cur_row_next2[0]/(1-coef_prev_row_next2);
cur_row_next3[0] = cur_row_next3[0]/(1-coef_prev_row_next3);
int j=1;
#if CV_SIMD128
{
v_float32x4 coef_prev_reg(coef_prev,coef_prev_row_next,coef_prev_row_next2,coef_prev_row_next3);
v_float32x4 interD_prev_reg(interD_row[0],interD_row_next[0],interD_row_next2[0],interD_row_next3[0]);
v_float32x4 cur_prev_reg(cur_row[0],cur_row_next[0],cur_row_next2[0],cur_row_next3[0]);
v_float32x4 lambda_reg(lambda,lambda,lambda,lambda);
v_float32x4 one_reg(1.0f,1.0f,1.0f,1.0f);
v_float32x4 a0,a1,a2,a3;
v_float32x4 b0,b1,b2,b3;
v_float32x4 aux0,aux1,aux2,aux3;
#define PROC4(Chor_in,cur_in,coef_prev_in,interD_prev_in,cur_prev_in,interD_out,cur_out,coef_cur_out)\
coef_cur_out = lambda_reg*Chor_in;\
aux0 = interD_prev_in*coef_prev_in;\
aux1 = coef_cur_out+coef_prev_in;\
aux1 = one_reg-aux1;\
aux0 = aux1-aux0;\
interD_out = coef_cur_out/aux0;\
aux1 = cur_prev_in*coef_prev_in;\
aux1 = cur_in - aux1;\
cur_out = aux1/aux0;
for(;j<w-3;j+=4)
{
// processing a 4x4 block:
aux0 = v_load(Chor_row+j);
aux1 = v_load(Chor_row_next+j);
aux2 = v_load(Chor_row_next2+j);
aux3 = v_load(Chor_row_next3+j);
v_transpose4x4(aux0,aux1,aux2,aux3,a0,a1,a2,a3);
aux0 = v_load(cur_row+j);
aux1 = v_load(cur_row_next+j);
aux2 = v_load(cur_row_next2+j);
aux3 = v_load(cur_row_next3+j);
v_transpose4x4(aux0,aux1,aux2,aux3,b0,b1,b2,b3);
PROC4(a0,b0,coef_prev_reg,interD_prev_reg,cur_prev_reg,a0,b0,aux2);
PROC4(a1,b1,aux2,a0,b0,a1,b1,aux3);
PROC4(a2,b2,aux3,a1,b1,a2,b2,aux2);
PROC4(a3,b3,aux2,a2,b2,a3,b3,aux3);
interD_prev_reg = a3;
cur_prev_reg = b3;
coef_prev_reg = aux3;
v_transpose4x4(a0,a1,a2,a3,aux0,aux1,aux2,aux3);
v_store(interD_row+j,aux0);
v_store(interD_row_next+j,aux1);
v_store(interD_row_next2+j,aux2);
v_store(interD_row_next3+j,aux3);
v_transpose4x4(b0,b1,b2,b3,aux0,aux1,aux2,aux3);
v_store(cur_row+j,aux0);
v_store(cur_row_next+j,aux1);
v_store(cur_row_next2+j,aux2);
v_store(cur_row_next3+j,aux3);
}
#undef PROC4
}
#endif
for(;j<w;j++)
{
coef_prev = lambda*Chor_row[j-1];
coef_prev_row_next = lambda*Chor_row_next[j-1];
coef_prev_row_next2 = lambda*Chor_row_next2[j-1];
coef_prev_row_next3 = lambda*Chor_row_next3[j-1];
coef_cur = lambda*Chor_row[j];
coef_cur_row_next = lambda*Chor_row_next[j];
coef_cur_row_next2 = lambda*Chor_row_next2[j];
coef_cur_row_next3 = lambda*Chor_row_next3[j];
denom = (1-coef_prev -coef_cur) -interD_row[j-1] *coef_prev;
denom_next = (1-coef_prev_row_next -coef_cur_row_next) -interD_row_next[j-1] *coef_prev_row_next;
denom_next2 = (1-coef_prev_row_next2-coef_cur_row_next2)-interD_row_next2[j-1]*coef_prev_row_next2;
denom_next3 = (1-coef_prev_row_next3-coef_cur_row_next3)-interD_row_next3[j-1]*coef_prev_row_next3;
interD_row[j] = coef_cur /denom;
interD_row_next[j] = coef_cur_row_next /denom_next;
interD_row_next2[j] = coef_cur_row_next2/denom_next2;
interD_row_next3[j] = coef_cur_row_next3/denom_next3;
cur_row[j] = (cur_row[j] -cur_row[j-1] *coef_prev) /denom;
cur_row_next[j] = (cur_row_next[j] -cur_row_next[j-1] *coef_prev_row_next) /denom_next;
cur_row_next2[j] = (cur_row_next2[j]-cur_row_next2[j-1]*coef_prev_row_next2)/denom_next2;
cur_row_next3[j] = (cur_row_next3[j]-cur_row_next3[j-1]*coef_prev_row_next3)/denom_next3;
}
//backward pass:
j = w-2;
#if CV_SIMD128
{
v_float32x4 cur_next_reg(cur_row[w-1],cur_row_next[w-1],cur_row_next2[w-1],cur_row_next3[w-1]);
v_float32x4 a0,a1,a2,a3;
v_float32x4 b0,b1,b2,b3;
v_float32x4 aux0,aux1,aux2,aux3;
for(j-=3;j>=0;j-=4)
{
//process 4x4 block:
aux0 = v_load(interD_row+j);
aux1 = v_load(interD_row_next+j);
aux2 = v_load(interD_row_next2+j);
aux3 = v_load(interD_row_next3+j);
v_transpose4x4(aux0,aux1,aux2,aux3,a0,a1,a2,a3);
aux0 = v_load(cur_row+j);
aux1 = v_load(cur_row_next+j);
aux2 = v_load(cur_row_next2+j);
aux3 = v_load(cur_row_next3+j);
v_transpose4x4(aux0,aux1,aux2,aux3,b0,b1,b2,b3);
aux0 = a3*cur_next_reg;
b3 = b3-aux0;
aux0 = a2*b3;
b2 = b2-aux0;
aux0 = a1*b2;
b1 = b1-aux0;
aux0 = a0*b1;
b0 = b0-aux0;
cur_next_reg = b0;
v_transpose4x4(b0,b1,b2,b3,aux0,aux1,aux2,aux3);
v_store(cur_row+j,aux0);
v_store(cur_row_next+j,aux1);
v_store(cur_row_next2+j,aux2);
v_store(cur_row_next3+j,aux3);
}
j+=3;
}
#endif
for(;j>=0;j--)
{
cur_row[j] = cur_row[j] -interD_row[j] *cur_row[j+1];
cur_row_next[j] = cur_row_next[j] -interD_row_next[j] *cur_row_next[j+1];
cur_row_next2[j] = cur_row_next2[j]-interD_row_next2[j]*cur_row_next2[j+1];
cur_row_next3[j] = cur_row_next3[j]-interD_row_next3[j]*cur_row_next3[j+1];
}
}
void FastGlobalSmootherFilterImpl::process_row(Mat* cur,int i)
{
WorkType denom;
WorkType *Chor_row = (WorkType*)Chor.ptr(i);
WorkType *interD_row = (WorkType*)interD.ptr(i);
WorkType *cur_row = (WorkType*)cur->ptr(i);
float coef_cur,coef_prev;
//forward pass:
coef_prev = lambda*Chor_row[0];
interD_row[0] = coef_prev/(1-coef_prev);
cur_row[0] = cur_row[0]/(1-coef_prev);
for(int j=1;j<w;j++)
{
coef_cur = lambda*Chor_row[j];
denom = (1-coef_prev-coef_cur)-interD_row[j-1]*coef_prev;
interD_row[j] = coef_cur/denom;
cur_row[j] = (cur_row[j]-cur_row[j-1]*coef_prev)/denom;
coef_prev = coef_cur;
}
//backward pass:
for(int j=w-2;j>=0;j--)
cur_row[j] = cur_row[j]-interD_row[j]*cur_row[j+1];
}
void FastGlobalSmootherFilterImpl::HorizontalPass_ParBody::operator()(const Range& range) const
{
int start = std::min(range.start * stripe_sz, h);
int end = std::min(range.end * stripe_sz, h);
int i=start;
for(;i<end-3;i+=4)
fgs->process_4row_block(cur,i);
for(;i<end;i++)
fgs->process_row(cur,i);
}
FastGlobalSmootherFilterImpl::VerticalPass_ParBody::VerticalPass_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _cur, int _nstripes, int _w):
fgs(&_fgs),cur(&_cur), nstripes(_nstripes), w(_w)
{
stripe_sz = (int)ceil(w/(double)nstripes);
}
void FastGlobalSmootherFilterImpl::VerticalPass_ParBody::operator()(const Range& range) const
{
int start = std::min(range.start * stripe_sz, w);
int end = std::min(range.end * stripe_sz, w);
//float lambda = fgs->lambda;
WorkType denom;
WorkType *Cvert_row, *Cvert_row_prev;
WorkType *interD_row, *interD_row_prev, *cur_row, *cur_row_prev, *cur_row_next;
float coef_cur,coef_prev;
Cvert_row = (WorkType*)fgs->Cvert.ptr(0);
interD_row = (WorkType*)fgs->interD.ptr(0);
cur_row = (WorkType*)cur->ptr(0);
//forward pass:
for(int j=start;j<end;j++)
{
coef_cur = fgs->lambda*Cvert_row[j];
interD_row[j] = coef_cur/(1-coef_cur);
cur_row[j] = cur_row[j]/(1-coef_cur);
}
for(int i=1;i<fgs->h;i++)
{
Cvert_row = (WorkType*)fgs->Cvert.ptr(i);
Cvert_row_prev = (WorkType*)fgs->Cvert.ptr(i-1);
interD_row = (WorkType*)fgs->interD.ptr(i);
interD_row_prev = (WorkType*)fgs->interD.ptr(i-1);
cur_row = (WorkType*)cur->ptr(i);
cur_row_prev = (WorkType*)cur->ptr(i-1);
int j = start;
#if CV_SIMD128
v_float32x4 a,b,c,d,coef_cur_reg,coef_prev_reg;
v_float32x4 one_reg(1.0f,1.0f,1.0f,1.0f);
v_float32x4 lambda_reg(fgs->lambda,fgs->lambda,fgs->lambda,fgs->lambda);
int sz4 = 4*((end-start)/4);
int end4 = start+sz4;
for(;j<end4;j+=4)
{
a = v_load(Cvert_row_prev+j);
b = v_load(Cvert_row+j);
coef_prev_reg = lambda_reg*a;
coef_cur_reg = lambda_reg*b;
a = v_load(interD_row_prev+j);
a = a*coef_prev_reg;
b = coef_prev_reg+coef_cur_reg;
b = b+a;
a = one_reg-b; //computed denom
b = coef_cur_reg/a; //computed interD_row
c = v_load(cur_row_prev+j);
c = c*coef_prev_reg;
d = v_load(cur_row+j);
d = d-c;
d = d/a; //computed cur_row
v_store(interD_row+j,b);
v_store(cur_row+j,d);
}
#endif
for(;j<end;j++)
{
coef_prev = fgs->lambda*Cvert_row_prev[j];
coef_cur = fgs->lambda*Cvert_row[j];
denom = (1-coef_prev-coef_cur)-interD_row_prev[j]*coef_prev;
interD_row[j] = coef_cur/denom;
cur_row[j] = (cur_row[j]-cur_row_prev[j]*coef_prev)/denom;
}
}
//backward pass:
for(int i=fgs->h-2;i>=0;i--)
{
interD_row = (WorkType*)fgs->interD.ptr(i);
cur_row = (WorkType*)cur->ptr(i);
cur_row_next = (WorkType*)cur->ptr(i+1);
int j = start;
#if CV_SIMD128
v_float32x4 a,b;
int sz4 = 4*((end-start)/4);
int end4 = start+sz4;
for(;j<end4;j+=4)
{
a = v_load(interD_row+j);
b = v_load(cur_row_next+j);
b = a*b;
a = v_load(cur_row+j);
b = a-b;
v_store(cur_row+j,b);
}
#endif
for(;j<end;j++)
cur_row[j] = cur_row[j]-interD_row[j]*cur_row_next[j];
}
}
template<get_weight_op get_weight, const int num_ch>
FastGlobalSmootherFilterImpl::ComputeHorizontalWeights_ParBody<get_weight,num_ch>::ComputeHorizontalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _h):
fgs(&_fgs),guide(&_guide), nstripes(_nstripes), h(_h)
{
stripe_sz = (int)ceil(h/(double)nstripes);
}
template<get_weight_op get_weight, const int num_ch>
void FastGlobalSmootherFilterImpl::ComputeHorizontalWeights_ParBody<get_weight,num_ch>::operator()(const Range& range) const
{
int start = std::min(range.start * stripe_sz, h);
int end = std::min(range.end * stripe_sz, h);
WorkType* LUT = (WorkType*)fgs->weights_LUT.ptr(0);
unsigned char *row;
WorkType *Chor_row;
for(int i=start;i<end;i++)
{
row = guide->ptr(i);
Chor_row = (WorkType*)fgs->Chor.ptr(i);
Chor_row[0] = get_weight(LUT,row,row+num_ch);
row+=num_ch;
for(int j=1;j<fgs->w-1;j++)
{
Chor_row[j] = get_weight(LUT,row,row+num_ch);
row+=num_ch;
}
Chor_row[fgs->w-1]=0;
}
}
template<get_weight_op get_weight, const int num_ch>
FastGlobalSmootherFilterImpl::ComputeVerticalWeights_ParBody<get_weight,num_ch>::ComputeVerticalWeights_ParBody(FastGlobalSmootherFilterImpl &_fgs, Mat& _guide, int _nstripes, int _w):
fgs(&_fgs),guide(&_guide), nstripes(_nstripes), w(_w)
{
stripe_sz = (int)ceil(w/(double)nstripes);
}
template<get_weight_op get_weight, const int num_ch>
void FastGlobalSmootherFilterImpl::ComputeVerticalWeights_ParBody<get_weight,num_ch>::operator()(const Range& range) const
{
int start = std::min(range.start * stripe_sz, w);
int end = std::min(range.end * stripe_sz, w);
WorkType* LUT = (WorkType*)fgs->weights_LUT.ptr(0);
unsigned char *row,*row_next;
WorkType *Cvert_row;
Cvert_row = (WorkType*)fgs->Cvert.ptr(0);
row = guide->ptr(0)+start*num_ch;
row_next = guide->ptr(1)+start*num_ch;
for(int j=start;j<end;j++)
{
Cvert_row[j] = get_weight(LUT,row,row_next);
row+=num_ch;
row_next+=num_ch;
}
for(int i=1;i<fgs->h-1;i++)
{
row = guide->ptr(i)+start*num_ch;
row_next = guide->ptr(i+1)+start*num_ch;
Cvert_row = (WorkType*)fgs->Cvert.ptr(i);
for(int j=start;j<end;j++)
{
Cvert_row[j] = get_weight(LUT,row,row_next);
row+=num_ch;
row_next+=num_ch;
}
}
Cvert_row = (WorkType*)fgs->Cvert.ptr(fgs->h-1);
for(int j=start;j<end;j++)
Cvert_row[j] = 0;
}
FastGlobalSmootherFilterImpl::ComputeLUT_ParBody::ComputeLUT_ParBody(FastGlobalSmootherFilterImpl &_fgs, WorkType *_LUT, int _nstripes, int _sz):
fgs(&_fgs), LUT(_LUT), nstripes(_nstripes), sz(_sz)
{
stripe_sz = (int)ceil(sz/(double)nstripes);
}
void FastGlobalSmootherFilterImpl::ComputeLUT_ParBody::operator()(const Range& range) const
{
int start = std::min(range.start * stripe_sz, sz);
int end = std::min(range.end * stripe_sz, sz);
for(int i=start;i<end;i++)
LUT[i] = (WorkType)(-cv::exp(-sqrt((float)i)/fgs->sigmaColor));
}
////////////////////////////////////////////////////////////////////////////////////////////////
CV_EXPORTS_W
Ptr<FastGlobalSmootherFilter> createFastGlobalSmootherFilter(InputArray guide, double lambda, double sigma_color, double lambda_attenuation, int num_iter)
{
return Ptr<FastGlobalSmootherFilter>(FastGlobalSmootherFilterImpl::create(guide, lambda, sigma_color, num_iter, lambda_attenuation));
}
CV_EXPORTS_W
void fastGlobalSmootherFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, double lambda_attenuation, int num_iter)
{
Ptr<FastGlobalSmootherFilter> fgs = createFastGlobalSmootherFilter(guide, lambda, sigma_color, lambda_attenuation, num_iter);
fgs->filter(src, dst);
}
}
}

@ -273,8 +273,8 @@ void SuperpixelSEEDSImpl::initialize(int num_superpixels, int num_levels)
parent_pre_init.resize(seeds_nr_levels);
nr_wh.resize(2 * seeds_nr_levels);
int level = 0;
int nr_seeds_w = (int)floor(width / seeds_w);
int nr_seeds_h = (int)floor(height / seeds_h);
int nr_seeds_w = (width / seeds_w);
int nr_seeds_h = (height / seeds_h);
nr_wh[2 * level] = nr_seeds_w;
nr_wh[2 * level + 1] = nr_seeds_h;
parent_mat.push_back(Mat(nr_seeds_h, nr_seeds_w, CV_32SC1));

@ -188,8 +188,8 @@ static void gradientHist(const cv::Mat &src, cv::Mat &magnitude, cv::Mat &histog
magnitude.create( src.size(), cv::DataType<float>::type );
phase.create( src.size(), cv::DataType<float>::type );
histogram.create( cv::Size( cvRound(src.size().width/float(pSize)),
cvRound(src.size().height/float(pSize)) ),
histogram.create( cv::Size( cvCeil(src.size().width/float(pSize)),
cvCeil(src.size().height/float(pSize)) ),
CV_MAKETYPE(cv::DataType<float>::type, nBins) );
histogram.setTo(0);

@ -1,284 +0,0 @@
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "precomp.hpp"
namespace cv {
namespace ximgproc {
class WeightedLeastSquaresFilterImpl : public WeightedLeastSquaresFilter
{
public:
static Ptr<WeightedLeastSquaresFilterImpl> create(InputArray guide, double lambda, double sigma_color, int num_iter);
void filter(InputArray src, OutputArray dst);
~WeightedLeastSquaresFilterImpl();
protected:
int w,h;
double sigmaColor,lambda;
int num_iter;
double *weights_LUT;
double *Ahor, *Bhor, *Chor,
*Avert, *Bvert, *Cvert;
double *interD,*interE,*cur_res;
void init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter);
void buildCoefMatrices(Mat& guide);
void horizontalPass(double* cur);
void verticalPass(double* cur);
};
void WeightedLeastSquaresFilterImpl::init(InputArray guide,double _lambda,double _sigmaColor,int _num_iter)
{
//currently support only 3 channel 8bit images as guides
CV_Assert( !guide.empty() && _lambda >= 0 && _sigmaColor >= 0 && _num_iter >=1 );
CV_Assert( guide.depth() == CV_8U && guide.channels() == 3 );
sigmaColor = _sigmaColor;
lambda = _lambda;
num_iter = _num_iter;
int num_levels = 3*256*256;
weights_LUT = new double[num_levels];
for(int i=0;i<num_levels;i++)
weights_LUT[i] = exp(-sqrt((double)i)/sigmaColor);
w = guide.cols();
h = guide.rows();
int sz = w*h;
Ahor = new double[sz];Bhor = new double[sz];Chor = new double[sz];
Avert = new double[sz];Bvert = new double[sz];Cvert = new double[sz];
interD = new double[sz];interE = new double[sz];cur_res = new double[sz];
Mat guideMat = guide.getMat();
buildCoefMatrices(guideMat);
}
Ptr<WeightedLeastSquaresFilterImpl> WeightedLeastSquaresFilterImpl::create(InputArray guide, double lambda, double sigma_color, int num_iter)
{
WeightedLeastSquaresFilterImpl *wls = new WeightedLeastSquaresFilterImpl();
wls->init(guide,lambda,sigma_color,num_iter);
return Ptr<WeightedLeastSquaresFilterImpl>(wls);
}
WeightedLeastSquaresFilter::~WeightedLeastSquaresFilter(){}
WeightedLeastSquaresFilterImpl::~WeightedLeastSquaresFilterImpl()
{
delete[] weights_LUT;
delete[] Ahor; delete[] Bhor; delete[] Chor;
delete[] Avert; delete[] Bvert; delete[] Cvert;
delete[] interD;delete[] interE;delete[] cur_res;
}
void WeightedLeastSquaresFilterImpl::buildCoefMatrices(Mat& guide)
{
double hor_weight;
const unsigned char *row,*row_prev,*row_next;
for(int i=0;i<h;i++)
{
//compute horizontal coefs:
row = guide.ptr(i);
Ahor[i*w] = 0;
hor_weight = weights_LUT[ (row[0]-row[3])*(row[0]-row[3])+
(row[1]-row[4])*(row[1]-row[4])+
(row[2]-row[5])*(row[2]-row[5]) ];
Chor[i*w] = -lambda*hor_weight;
Bhor[i*w] = 1 - Ahor[i*w] - Chor[i*w];
row+=3;
for(int j=1;j<w-1;j++)
{
Ahor[i*w+j] = -lambda*hor_weight;
hor_weight = weights_LUT[ (row[0]-row[3])*(row[0]-row[3])+
(row[1]-row[4])*(row[1]-row[4])+
(row[2]-row[5])*(row[2]-row[5]) ];
Chor[i*w+j] = -lambda*hor_weight;
Bhor[i*w+j] = 1 - Ahor[i*w+j] - Chor[i*w+j];
row+=3;
}
Ahor[i*w+w-1] = -lambda*hor_weight;
Chor[i*w+w-1] = 0;
Bhor[i*w+w-1] = 1 - Ahor[i*w+w-1] - Chor[i*w+w-1];
//compute vertical coefs:
row = guide.ptr(i);
if(i==0)
{
row_next = guide.ptr(i+1);
for(int j=0;j<w;j++)
{
Avert[i*w+j] = 0;
Cvert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_next[0])*(row[0]-row_next[0])+
(row[1]-row_next[1])*(row[1]-row_next[1])+
(row[2]-row_next[2])*(row[2]-row_next[2]) ];
Bvert[i*w+j] = 1 - Avert[i*w+j] - Cvert[i*w+j];
row+=3;
row_next+=3;
}
}
else if(i==h-1)
{
row_prev = guide.ptr(i-1);
for(int j=0;j<w;j++)
{
Avert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_prev[0])*(row[0]-row_prev[0])+
(row[1]-row_prev[1])*(row[1]-row_prev[1])+
(row[2]-row_prev[2])*(row[2]-row_prev[2]) ];
Cvert[i*w+j] = 0;
Bvert[i*w+j] = 1 - Avert[i*w+j] - Cvert[i*w+j];
row+=3;
row_prev+=3;
}
}
else
{
row_prev = guide.ptr(i-1);
row_next = guide.ptr(i+1);
for(int j=0;j<w;j++)
{
Avert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_prev[0])*(row[0]-row_prev[0])+
(row[1]-row_prev[1])*(row[1]-row_prev[1])+
(row[2]-row_prev[2])*(row[2]-row_prev[2]) ];
Cvert[i*w+j] = -lambda*weights_LUT[ (row[0]-row_next[0])*(row[0]-row_next[0])+
(row[1]-row_next[1])*(row[1]-row_next[1])+
(row[2]-row_next[2])*(row[2]-row_next[2]) ];
Bvert[i*w+j] = 1 - Avert[i*w+j] - Cvert[i*w+j];
row+=3;
row_prev+=3;
row_next+=3;
}
}
}
}
void WeightedLeastSquaresFilterImpl::filter(InputArray src, OutputArray dst)
{
//temporarily support only one-channel CV_16S src type (for disparity map filtering)
CV_Assert(!src.empty() && (src.depth() == CV_16S) && src.channels()==1);
if (src.rows() != h || src.cols() != w)
{
CV_Error(Error::StsBadSize, "Size of filtering image must be equal to size of guide image");
return;
}
Mat srcMat = src.getMat();
Mat& dstMat = dst.getMatRef();
short* row;
for(int i=0;i<h;i++)
{
row = (short*)srcMat.ptr(i);
for(int j=0;j<w;j++)
{
cur_res[i*w+j] = (double)(*row);
row++;
}
}
for(int n=0;n<num_iter;n++)
{
horizontalPass(cur_res);
verticalPass(cur_res);
}
for(int i=0;i<h;i++)
{
row = (short*)dstMat.ptr(i);
for(int j=0;j<w;j++)
{
*row = saturate_cast<short>(cur_res[i*w+j]);
row++;
}
}
}
void WeightedLeastSquaresFilterImpl::horizontalPass(double* cur)
{
double denom;
for(int i=0;i<h;i++)
{
//forward pass:
interD[i*w] = Chor[i*w]/Bhor[i*w];
interE[i*w] = cur[i*w] /Bhor[i*w];
for(int j=1;j<w;j++)
{
denom = Bhor[i*w+j]-interD[i*w+j-1]*Ahor[i*w+j];
interD[i*w+j] = Chor[i*w+j]/denom;
interE[i*w+j] = (cur[i*w+j]-interE[i*w+j-1]*Ahor[i*w+j])/denom;
}
//backward pass:
cur[i*w+w-1] = interE[i*w+w-1];
for(int j=w-2;j>=0;j--)
cur[i*w+j] = interE[i*w+j]-interD[i*w+j]*cur[i*w+j+1];
}
}
void WeightedLeastSquaresFilterImpl::verticalPass(double* cur)
{
double denom;
//forward pass:
for(int j=0;j<w;j++)
{
interD[j] = Cvert[j]/Bvert[j];
interE[j] = cur[j]/Bvert[j];
}
for(int i=1;i<h;i++)
{
for(int j=0;j<w;j++)
{
denom = Bvert[i*w+j]-interD[(i-1)*w+j]*Avert[i*w+j];
interD[i*w+j] = Cvert[i*w+j]/denom;
interE[i*w+j] = (cur[i*w+j]-interE[(i-1)*w+j]*Avert[i*w+j])/denom;
}
}
//backward pass:
for(int j=0;j<w;j++)
cur[(h-1)*w+j] = interE[(h-1)*w+j];
for(int i=h-2;i>=0;i--)
for(int j=w-1;j>=0;j--)
cur[i*w+j] = interE[i*w+j]-interD[i*w+j]*cur[(i+1)*w+j];
}
////////////////////////////////////////////////////////////////////////////////////////////////
CV_EXPORTS_W
Ptr<WeightedLeastSquaresFilter> createWeightedLeastSquaresFilter(InputArray guide, double lambda, double sigma_color, int num_iter)
{
return Ptr<WeightedLeastSquaresFilter>(WeightedLeastSquaresFilterImpl::create(guide, lambda, sigma_color, num_iter));
}
CV_EXPORTS_W
void weightedLeastSquaresFilter(InputArray guide, InputArray src, OutputArray dst, double lambda, double sigma_color, int num_iter)
{
Ptr<WeightedLeastSquaresFilter> wls = createWeightedLeastSquaresFilter(guide, lambda, sigma_color, num_iter);
wls->filter(src, dst);
}
}
}

@ -0,0 +1,154 @@
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "test_precomp.hpp"
#include "opencv2/ximgproc/disparity_filter.hpp"
namespace cvtest
{
using namespace std;
using namespace std::tr1;
using namespace testing;
using namespace perf;
using namespace cv;
using namespace cv::ximgproc;
static string getDataDir()
{
return cvtest::TS::ptr()->get_data_path();
}
CV_ENUM(SrcTypes, CV_16S);
CV_ENUM(GuideTypes, CV_8UC1, CV_8UC3)
typedef tuple<Size, SrcTypes, GuideTypes, bool, bool> DisparityWLSParams;
typedef TestWithParam<DisparityWLSParams> DisparityWLSFilterTest;
TEST(DisparityWLSFilterTest, ReferenceAccuracy)
{
string dir = getDataDir() + "cv/disparityfilter";
Mat left = imread(dir + "/left_view.png",IMREAD_COLOR);
ASSERT_FALSE(left.empty());
Mat left_disp = imread(dir + "/disparity_left_raw.png",IMREAD_GRAYSCALE);
ASSERT_FALSE(left_disp.empty());
left_disp.convertTo(left_disp,CV_16S,16);
Mat right_disp = imread(dir + "/disparity_right_raw.png",IMREAD_GRAYSCALE);
ASSERT_FALSE(right_disp.empty());
right_disp.convertTo(right_disp,CV_16S,-16);
Mat GT;
ASSERT_FALSE(readGT(dir + "/GT.png",GT));
FileStorage ROI_storage( dir + "/ROI.xml", FileStorage::READ );
Rect ROI((int)ROI_storage["x"],(int)ROI_storage["y"],(int)ROI_storage["width"],(int)ROI_storage["height"]);
FileStorage reference_res( dir + "/reference_accuracy.xml", FileStorage::READ );
double ref_MSE = (double)reference_res["MSE_after"];
double ref_BadPercent = (double)reference_res["BadPercent_after"];
cv::setNumThreads(cv::getNumberOfCPUs());
Mat res;
Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(true);
wls_filter->setLambda(8000.0);
wls_filter->setSigmaColor(0.5);
wls_filter->filter(left_disp,left,res,ROI,right_disp);
double MSE = computeMSE(GT,res,ROI);
double BadPercent = computeBadPixelPercent(GT,res,ROI);
double eps = 0.01;
EXPECT_LE(MSE,ref_MSE+eps*ref_MSE);
EXPECT_LE(BadPercent,ref_BadPercent+eps*ref_BadPercent);
}
TEST_P(DisparityWLSFilterTest, MultiThreadReproducibility)
{
if (cv::getNumberOfCPUs() == 1)
return;
double MAX_DIF = 1.0;
double MAX_MEAN_DIF = 1.0 / 256.0;
int loopsCount = 2;
RNG rng(0);
DisparityWLSParams params = GetParam();
Size size = get<0>(params);
int srcType = get<1>(params);
int guideType = get<2>(params);
bool use_conf = get<3>(params);
bool use_downscale = get<4>(params);
Mat left(size, guideType);
randu(left, 0, 255);
Mat left_disp(size,srcType);
int max_disp = (int)(size.width*0.1);
randu(left_disp, 0, max_disp-1);
Mat right_disp(size,srcType);
randu(left_disp, -max_disp+1, 0);
Rect ROI(max_disp,0,size.width-max_disp,size.height);
if(use_downscale)
{
resize(left_disp,left_disp,Size(),0.5,0.5);
resize(right_disp,right_disp,Size(),0.5,0.5);
ROI = Rect(ROI.x/2,ROI.y/2,ROI.width/2,ROI.height/2);
}
for (int iter = 0; iter <= loopsCount; iter++)
{
double lambda = rng.uniform(100.0, 10000.0);
double sigma = rng.uniform(1.0, 100.0);
Ptr<DisparityWLSFilter> wls_filter = createDisparityWLSFilter(use_conf);
wls_filter->setLambda(lambda);
wls_filter->setSigmaColor(sigma);
cv::setNumThreads(cv::getNumberOfCPUs());
Mat resMultiThread;
wls_filter->filter(left_disp,left,resMultiThread,ROI,right_disp);
cv::setNumThreads(1);
Mat resSingleThread;
wls_filter->filter(left_disp,left,resSingleThread,ROI,right_disp);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF*left.total());
}
}
INSTANTIATE_TEST_CASE_P(FullSet,DisparityWLSFilterTest,Combine(Values(szODD, szQVGA), SrcTypes::all(), GuideTypes::all(),Values(true,false),Values(true,false)));
}

@ -0,0 +1,153 @@
/*
* By downloading, copying, installing or using the software you agree to this license.
* If you do not agree to this license, do not download, install,
* copy or use the software.
*
*
* License Agreement
* For Open Source Computer Vision Library
* (3 - clause BSD License)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met :
*
* *Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution.
*
* * Neither the names of the copyright holders nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided by the copyright holders and contributors "as is" and
* any express or implied warranties, including, but not limited to, the implied
* warranties of merchantability and fitness for a particular purpose are disclaimed.
* In no event shall copyright holders or contributors be liable for any direct,
* indirect, incidental, special, exemplary, or consequential damages
* (including, but not limited to, procurement of substitute goods or services;
* loss of use, data, or profits; or business interruption) however caused
* and on any theory of liability, whether in contract, strict liability,
* or tort(including negligence or otherwise) arising in any way out of
* the use of this software, even if advised of the possibility of such damage.
*/
#include "test_precomp.hpp"
namespace cvtest
{
using namespace std;
using namespace std::tr1;
using namespace testing;
using namespace perf;
using namespace cv;
using namespace cv::ximgproc;
static string getDataDir()
{
return cvtest::TS::ptr()->get_data_path();
}
CV_ENUM(SrcTypes, CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4);
CV_ENUM(GuideTypes, CV_8UC1, CV_8UC3)
typedef tuple<Size, SrcTypes, GuideTypes> FGSParams;
typedef TestWithParam<FGSParams> FastGlobalSmootherTest;
TEST(FastGlobalSmootherTest, SplatSurfaceAccuracy)
{
RNG rnd(0);
for (int i = 0; i < 10; i++)
{
Size sz(rnd.uniform(512, 1024), rnd.uniform(512, 1024));
int guideCn = rnd.uniform(1, 2);
if(guideCn==2) guideCn++; //1 or 3 channels
Mat guide(sz, CV_MAKE_TYPE(CV_8U, guideCn));
randu(guide, 0, 255);
Scalar surfaceValue;
int srcCn = rnd.uniform(1, 4);
rnd.fill(surfaceValue, RNG::UNIFORM, 0, 255);
Mat src(sz, CV_MAKE_TYPE(CV_16S, srcCn), surfaceValue);
double lambda = rnd.uniform(100, 10000);
double sigma = rnd.uniform(1.0, 100.0);
Mat res;
fastGlobalSmootherFilter(guide, src, res, lambda, sigma);
// When filtering a constant image we should get the same image:
double normL1 = cvtest::norm(src, res, NORM_L1)/src.total()/src.channels();
EXPECT_LE(normL1, 1.0/64);
}
}
TEST(FastGlobalSmootherTest, ReferenceAccuracy)
{
string dir = getDataDir() + "cv/edgefilter";
Mat src = imread(dir + "/kodim23.png");
Mat ref = imread(dir + "/fgs/kodim23_lambda=1000_sigma=10.png");
ASSERT_FALSE(src.empty());
ASSERT_FALSE(ref.empty());
cv::setNumThreads(cv::getNumberOfCPUs());
Mat res;
fastGlobalSmootherFilter(src,src,res,1000.0,10.0);
double totalMaxError = 1.0/64.0*src.total()*src.channels();
EXPECT_LE(cvtest::norm(res, ref, NORM_L2), totalMaxError);
EXPECT_LE(cvtest::norm(res, ref, NORM_INF), 1);
}
TEST_P(FastGlobalSmootherTest, MultiThreadReproducibility)
{
if (cv::getNumberOfCPUs() == 1)
return;
double MAX_DIF = 1.0;
double MAX_MEAN_DIF = 1.0 / 64.0;
int loopsCount = 2;
RNG rng(0);
FGSParams params = GetParam();
Size size = get<0>(params);
int srcType = get<1>(params);
int guideType = get<2>(params);
Mat guide(size, guideType);
randu(guide, 0, 255);
Mat src(size,srcType);
if(src.depth()==CV_8U)
randu(src, 0, 255);
else if(src.depth()==CV_16S)
randu(src, -32767, 32767);
else
randu(src, -100000.0f, 100000.0f);
for (int iter = 0; iter <= loopsCount; iter++)
{
double lambda = rng.uniform(100.0, 10000.0);
double sigma = rng.uniform(1.0, 100.0);
cv::setNumThreads(cv::getNumberOfCPUs());
Mat resMultiThread;
fastGlobalSmootherFilter(guide, src, resMultiThread, lambda, sigma);
cv::setNumThreads(1);
Mat resSingleThread;
fastGlobalSmootherFilter(guide, src, resSingleThread, lambda, sigma);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF);
EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF*src.total()*src.channels());
}
}
INSTANTIATE_TEST_CASE_P(FullSet, FastGlobalSmootherTest,Combine(Values(szODD, szQVGA), SrcTypes::all(), GuideTypes::all()));
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<opencv_storage>
<x>166</x>
<y>7</y>
<width>851</width>
<height>422</height>
</opencv_storage>

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 283 KiB

@ -0,0 +1,7 @@
<?xml version="1.0"?>
<opencv_storage>
<MSE_before>5043.6495</MSE_before>
<MSE_after>128.3773</MSE_after>
<BadPercent_before>48.5417</BadPercent_before>
<BadPercent_after>45.8749</BadPercent_after>
</opencv_storage>

Binary file not shown.

After

Width:  |  Height:  |  Size: 287 KiB

@ -47,6 +47,6 @@
*/
#include "xphoto/inpainting.hpp"
#include "xphoto/simple_color_balance.hpp"
#include "xphoto/white_balance.hpp"
#include "xphoto/dct_image_denoising.hpp"
#endif

@ -85,6 +85,38 @@ namespace xphoto
const float inputMin = 0.0f, const float inputMax = 255.0f,
const float outputMin = 0.0f, const float outputMax = 255.0f);
/** @brief Implements a simple grayworld white balance algorithm.
The function autowbGrayworld scales the values of pixels based on a
gray-world assumption which states that the average of all channels
should result in a gray image.
This function adds a modification which thresholds pixels based on their
saturation value and only uses pixels below the provided threshold in
finding average pixel values.
Saturation is calculated using the following for a 3-channel RGB image per
pixel I and is in the range [0, 1]:
\f[ \texttt{Saturation} [I] = \frac{\textrm{max}(R,G,B) - \textrm{min}(R,G,B)
}{\textrm{max}(R,G,B)} \f]
A threshold of 1 means that all pixels are used to white-balance, while a
threshold of 0 means no pixels are used. Lower thresholds are useful in
white-balancing saturated images.
Currently only works on images of type @ref CV_8UC3.
@param src Input array.
@param dst Output array of the same size and type as src.
@param thresh Maximum saturation for a pixel to be included in the
gray-world assumption.
@sa balanceWhite
*/
CV_EXPORTS_W void autowbGrayworld(InputArray src, OutputArray dst,
float thresh = 0.5f);
//! @}
}

@ -0,0 +1,29 @@
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace perf;
typedef std::tr1::tuple<Size, float> Size_WBThresh_t;
typedef perf::TestBaseWithParam<Size_WBThresh_t> Size_WBThresh;
PERF_TEST_P( Size_WBThresh, autowbGrayworld,
testing::Combine(
SZ_ALL_HD,
testing::Values( 0.1, 0.5, 1.0 )
)
)
{
Size size = std::tr1::get<0>(GetParam());
float wb_thresh = std::tr1::get<1>(GetParam());
Mat src(size, CV_8UC3);
Mat dst(size, CV_8UC3);
declare.in(src, WARMUP_RNG).out(dst);
TEST_CYCLE() xphoto::autowbGrayworld(src, dst, wb_thresh);
SANITY_CHECK(dst);
}

@ -0,0 +1,3 @@
#include "perf_precomp.hpp"
CV_PERF_TEST_MAIN(xphoto)

@ -0,0 +1,19 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include "opencv2/ts.hpp"
#include "opencv2/xphoto.hpp"
#ifdef GTEST_CREATE_SHARED_LIBRARY
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
#endif
#endif

@ -0,0 +1,62 @@
#include "opencv2/xphoto.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/core/utility.hpp"
using namespace cv;
using namespace std;
const char* keys =
{
"{i || input image name}"
"{o || output image name}"
};
int main( int argc, const char** argv )
{
bool printHelp = ( argc == 1 );
printHelp = printHelp || ( argc == 2 && string(argv[1]) == "--help" );
printHelp = printHelp || ( argc == 2 && string(argv[1]) == "-h" );
if ( printHelp )
{
printf("\nThis sample demonstrates the grayworld balance algorithm\n"
"Call:\n"
" simple_color_blance -i=in_image_name [-o=out_image_name]\n\n");
return 0;
}
CommandLineParser parser(argc, argv, keys);
if ( !parser.check() )
{
parser.printErrors();
return -1;
}
string inFilename = parser.get<string>("i");
string outFilename = parser.get<string>("o");
Mat src = imread(inFilename, 1);
if ( src.empty() )
{
printf("Cannot read image file: %s\n", inFilename.c_str());
return -1;
}
Mat res(src.size(), src.type());
xphoto::autowbGrayworld(src, res);
if ( outFilename == "" )
{
namedWindow("after white balance", 1);
imshow("after white balance", res);
waitKey(0);
}
else
imwrite(outFilename, res);
return 0;
}

@ -0,0 +1,213 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/xphoto.hpp"
#include "opencv2/core.hpp"
#include "opencv2/hal/intrin.hpp"
namespace cv { namespace xphoto {
void autowbGrayworld(InputArray _src, OutputArray _dst, float thresh)
{
Mat src = _src.getMat();
CV_Assert(!src.empty());
CV_Assert(src.isContinuous());
// TODO: Handle CV_8UC1
// TODO: Handle types other than CV_8U
CV_Assert(src.type() == CV_8UC3);
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
CV_Assert(dst.isContinuous());
int width = src.cols,
height = src.rows,
N = width*height,
N3 = N*3;
// Calculate sum of pixel values of each channel
const uchar* src_data = src.ptr<uchar>(0);
unsigned long sum1 = 0, sum2 = 0, sum3 = 0;
unsigned int thresh255 = cvRound(thresh * 255);
int i = 0;
#if CV_SIMD128
v_uint8x16 v_inB, v_inG, v_inR;
v_uint16x8 v_s1, v_s2;
v_uint32x4 v_iB1, v_iB2, v_iB3, v_iB4,
v_iG1, v_iG2, v_iG3, v_iG4,
v_iR1, v_iR2, v_iR3, v_iR4,
v_255 = v_setall_u32(255),
v_thresh = v_setall_u32(thresh255),
v_min1, v_min2, v_min3, v_min4,
v_max1, v_max2, v_max3, v_max4,
v_m1, v_m2, v_m3, v_m4,
v_SB = v_setzero_u32(),
v_SG = v_setzero_u32(),
v_SR = v_setzero_u32();
for ( ; i < N3 - 47; i += 48 )
{
// NOTE: This block assumes BGR channels in naming variables
// Load 3x uint8x16 and deinterleave into vectors of each channel
v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
// Split into four int vectors per channel
v_expand(v_inB, v_s1, v_s2);
v_expand(v_s1, v_iB1, v_iB2);
v_expand(v_s2, v_iB3, v_iB4);
v_expand(v_inG, v_s1, v_s2);
v_expand(v_s1, v_iG1, v_iG2);
v_expand(v_s2, v_iG3, v_iG4);
v_expand(v_inR, v_s1, v_s2);
v_expand(v_s1, v_iR1, v_iR2);
v_expand(v_s2, v_iR3, v_iR4);
// Get mins and maxs
v_min1 = v_min(v_iB1, v_min(v_iG1, v_iR1));
v_min2 = v_min(v_iB2, v_min(v_iG2, v_iR2));
v_min3 = v_min(v_iB3, v_min(v_iG3, v_iR3));
v_min4 = v_min(v_iB4, v_min(v_iG4, v_iR4));
v_max1 = v_max(v_iB1, v_max(v_iG1, v_iR1));
v_max2 = v_max(v_iB2, v_max(v_iG2, v_iR2));
v_max3 = v_max(v_iB3, v_max(v_iG3, v_iR3));
v_max4 = v_max(v_iB4, v_max(v_iG4, v_iR4));
// Calculate masks
v_m1 = ~((v_max1 - v_min1) * v_255 > v_thresh * v_max1);
v_m2 = ~((v_max2 - v_min2) * v_255 > v_thresh * v_max2);
v_m3 = ~((v_max3 - v_min3) * v_255 > v_thresh * v_max3);
v_m4 = ~((v_max4 - v_min4) * v_255 > v_thresh * v_max4);
// Apply mask
v_SB += (v_iB1 & v_m1) + (v_iB2 & v_m2) + (v_iB3 & v_m3) + (v_iB4 & v_m4);
v_SG += (v_iG1 & v_m1) + (v_iG2 & v_m2) + (v_iG3 & v_m3) + (v_iG4 & v_m4);
v_SR += (v_iR1 & v_m1) + (v_iR2 & v_m2) + (v_iR3 & v_m3) + (v_iR4 & v_m4);
}
// Perform final reduction
sum1 = v_reduce_sum(v_SB);
sum2 = v_reduce_sum(v_SG);
sum3 = v_reduce_sum(v_SR);
#endif
unsigned int minRGB, maxRGB;
for ( ; i < N3; i += 3 )
{
minRGB = min(src_data[i], min(src_data[i + 1], src_data[i + 2]));
maxRGB = max(src_data[i], max(src_data[i + 1], src_data[i + 2]));
if ( (maxRGB - minRGB) * 255 > thresh255 * maxRGB ) continue;
sum1 += src_data[i];
sum2 += src_data[i + 1];
sum3 += src_data[i + 2];
}
// Find inverse of averages
double dinv1 = sum1 == 0 ? 0.f : (double)N / (double)sum1,
dinv2 = sum2 == 0 ? 0.f : (double)N / (double)sum2,
dinv3 = sum3 == 0 ? 0.f : (double)N / (double)sum3;
// Find maximum
double inv_max = max(dinv1, max(dinv2, dinv3));
// Convert to floats
float inv1 = (float) dinv1,
inv2 = (float) dinv2,
inv3 = (float) dinv3;
// Scale by maximum
if ( inv_max > 0 )
{
inv1 = (float)((double)inv1 / inv_max);
inv2 = (float)((double)inv2 / inv_max);
inv3 = (float)((double)inv3 / inv_max);
}
// Fixed point arithmetic, mul by 2^8 then shift back 8 bits
int i_inv1 = cvRound(inv1 * (1 << 8)),
i_inv2 = cvRound(inv2 * (1 << 8)),
i_inv3 = cvRound(inv3 * (1 << 8));
// Scale input pixel values
uchar* dst_data = dst.ptr<uchar>(0);
i = 0;
#if CV_SIMD128
v_uint8x16 v_outB, v_outG, v_outR;
v_uint16x8 v_sB1, v_sB2, v_sG1, v_sG2, v_sR1, v_sR2,
v_invB = v_setall_u16((unsigned short) i_inv1),
v_invG = v_setall_u16((unsigned short) i_inv2),
v_invR = v_setall_u16((unsigned short) i_inv3);
for ( ; i < N3 - 47; i += 48 )
{
// Load 16 x 8bit uchars
v_load_deinterleave(&src_data[i], v_inB, v_inG, v_inR);
// Split into four int vectors per channel
v_expand(v_inB, v_sB1, v_sB2);
v_expand(v_inG, v_sG1, v_sG2);
v_expand(v_inR, v_sR1, v_sR2);
// Multiply by scaling factors
v_sB1 = (v_sB1 * v_invB) >> 8;
v_sB2 = (v_sB2 * v_invB) >> 8;
v_sG1 = (v_sG1 * v_invG) >> 8;
v_sG2 = (v_sG2 * v_invG) >> 8;
v_sR1 = (v_sR1 * v_invR) >> 8;
v_sR2 = (v_sR2 * v_invR) >> 8;
// Pack into vectors of v_uint8x16
v_store_interleave(&dst_data[i], v_pack(v_sB1, v_sB2),
v_pack(v_sG1, v_sG2), v_pack(v_sR1, v_sR2));
}
#endif
for ( ; i < N3; i += 3 )
{
dst_data[i] = (uchar)((src_data[i] * i_inv1) >> 8);
dst_data[i + 1] = (uchar)((src_data[i + 1] * i_inv2) >> 8);
dst_data[i + 2] = (uchar)((src_data[i + 2] * i_inv3) >> 8);
}
}
}}

@ -0,0 +1,90 @@
#include "test_precomp.hpp"
namespace cvtest {
using namespace cv;
void ref_autowbGrayworld(InputArray _src, OutputArray _dst, float thresh)
{
Mat src = _src.getMat();
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
int width = src.cols,
height = src.rows,
N = width*height,
N3 = N*3;
// Calculate sum of pixel values of each channel
const uchar* src_data = src.ptr<uchar>(0);
unsigned long sum1 = 0, sum2 = 0, sum3 = 0;
int i = 0;
unsigned int minRGB, maxRGB, thresh255 = cvRound(thresh * 255);
for ( ; i < N3; i += 3 )
{
minRGB = std::min(src_data[i], std::min(src_data[i + 1], src_data[i + 2]));
maxRGB = std::max(src_data[i], std::max(src_data[i + 1], src_data[i + 2]));
if ( (maxRGB - minRGB) * 255 > thresh255 * maxRGB ) continue;
sum1 += src_data[i];
sum2 += src_data[i + 1];
sum3 += src_data[i + 2];
}
// Find inverse of averages
double inv1 = sum1 == 0 ? 0.f : (double)N / (double)sum1,
inv2 = sum2 == 0 ? 0.f : (double)N / (double)sum2,
inv3 = sum3 == 0 ? 0.f : (double)N / (double)sum3;
// Find maximum
double inv_max = std::max(std::max(inv1, inv2), inv3);
// Scale by maximum
if ( inv_max > 0 )
{
inv1 = (double) inv1 / inv_max;
inv2 = (double) inv2 / inv_max;
inv3 = (double) inv3 / inv_max;
}
// Fixed point arithmetic, mul by 2^8 then shift back 8 bits
int i_inv1 = cvRound(inv1 * (1 << 8)),
i_inv2 = cvRound(inv2 * (1 << 8)),
i_inv3 = cvRound(inv3 * (1 << 8));
// Scale input pixel values
uchar* dst_data = dst.ptr<uchar>(0);
i = 0;
for ( ; i < N3; i += 3 )
{
dst_data[i] = (uchar)((src_data[i] * i_inv1) >> 8);
dst_data[i + 1] = (uchar)((src_data[i + 1] * i_inv2) >> 8);
dst_data[i + 2] = (uchar)((src_data[i + 2] * i_inv3) >> 8);
}
}
TEST(xphoto_grayworld_white_balance, regression)
{
String subfolder = "/xphoto/";
String dir = cvtest::TS::ptr()->get_data_path() + subfolder + "simple_white_balance/";
const int nTests = 14;
const float wb_thresh = 0.5f;
const float acc_thresh = 2.f;
for ( int i = 0; i < nTests; ++i )
{
String srcName = dir + format("sources/%02d.png", i + 1);
Mat src = imread(srcName, IMREAD_COLOR);
ASSERT_TRUE(!src.empty());
Mat referenceResult;
ref_autowbGrayworld(src, referenceResult, wb_thresh);
Mat currentResult;
xphoto::autowbGrayworld(src, currentResult, wb_thresh);
ASSERT_LE(cv::norm(currentResult, referenceResult, NORM_INF), acc_thresh);
}
}
}
Loading…
Cancel
Save