From ac62d70f9765facb5cd114b77909f57f24dc54c3 Mon Sep 17 00:00:00 2001 From: Vladislav Samsonov Date: Mon, 17 Oct 2016 18:15:22 +0300 Subject: [PATCH] [GSoC] Implementation of the Global Patch Collider and demo for PCAFlow (#752) * Minor fixes * Start adding correspondence finding * Added finding of correspondences using GPC * New evaluation tool for GPC * Changed default parameters * Display ground truth in the evaluation tool * Added training tool for MPI Sintel dataset * Added the training tool for Middlebury dataset * Added some OpenCL optimization * Added explanatory notes * Minor improvements: time measurements + little ocl optimization * Added demos * Fixed warnings * Make parameter struct assignable * Fix warning * Proper command line argument usage * Prettified training tool, added parameters * Fixed VS warning * Fixed VS warning * Using of compressed forest.yml.gz files by default to save space * Added OpenCL flag to the evaluation tool * Updated documentation * Major speed and memory improvements: 1) Added new (optional) type of patch descriptors which are much faster. Retraining with option --descriptor-type=1 is required. 2) Got rid of hash table for descriptors, less memory usage. * Fixed various floating point errors related to precision. SIMD for dot product, forest traversing is a little bit faster now. * Tolerant floating point comparison * Triplets * Added comment * Choosing negative sample among nearest neighbors * Fix warning * Usage of parallel_for_() in critical places. Performance improvments. * Simulated annealing heuristic * Moved OpenCL kernel to separate file * Moved implementation to source file * Added basic accuracy tests for GPC and PCAFlow * Fixing warnings * Test accuracy constraints were too strict * Test accuracy constraints were too strict * Make tests more lightweight --- modules/optflow/doc/optflow.bib | 16 + modules/optflow/include/opencv2/optflow.hpp | 6 +- .../include/opencv2/optflow/pcaflow.hpp | 51 +- .../opencv2/optflow/sparse_matching_gpc.hpp | 250 +++++++- modules/optflow/samples/gpc_evaluate.cpp | 164 +++++ modules/optflow/samples/gpc_train.cpp | 65 +- .../optflow/samples/gpc_train_middlebury.py | 58 ++ modules/optflow/samples/gpc_train_sintel.py | 60 ++ modules/optflow/samples/pcaflow_demo.cpp | 172 +++++ .../optflow/src/opencl/sparse_matching_gpc.cl | 69 ++ modules/optflow/src/sparse_matching_gpc.cpp | 603 +++++++++++++++--- modules/optflow/test/test_OF_accuracy.cpp | 101 ++- 12 files changed, 1468 insertions(+), 147 deletions(-) create mode 100644 modules/optflow/samples/gpc_evaluate.cpp create mode 100644 modules/optflow/samples/gpc_train_middlebury.py create mode 100644 modules/optflow/samples/gpc_train_sintel.py create mode 100644 modules/optflow/samples/pcaflow_demo.cpp create mode 100644 modules/optflow/src/opencl/sparse_matching_gpc.cl diff --git a/modules/optflow/doc/optflow.bib b/modules/optflow/doc/optflow.bib index a7b218698..227c7bc61 100644 --- a/modules/optflow/doc/optflow.bib +++ b/modules/optflow/doc/optflow.bib @@ -52,3 +52,19 @@ pages={25--36}, year={2004} } + +@inproceedings{Wulff:CVPR:2015, + title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers}, + author = {Wulff, Jonas and Black, Michael J.}, + booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015}, + month = {June}, + year = {2015} +} + +@inproceedings{Wang_2016_CVPR, + author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet}, + title = {The Global Patch Collider}, + booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2016} +} diff --git a/modules/optflow/include/opencv2/optflow.hpp b/modules/optflow/include/opencv2/optflow.hpp index ccd7f8865..e68d5b78d 100644 --- a/modules/optflow/include/opencv2/optflow.hpp +++ b/modules/optflow/include/opencv2/optflow.hpp @@ -43,9 +43,6 @@ the use of this software, even if advised of the possibility of such damage. #include "opencv2/core.hpp" #include "opencv2/video.hpp" -#include "opencv2/optflow/pcaflow.hpp" -#include "opencv2/optflow/sparse_matching_gpc.hpp" - /** @defgroup optflow Optical Flow Algorithms @@ -69,6 +66,9 @@ Functions reading and writing .flo files in "Middlebury" format, see: + * @brief Implementation of the PCAFlow algorithm from the following paper: + * http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf + * + * @cite Wulff:CVPR:2015 + * + * There are some key differences which distinguish this algorithm from the original PCAFlow (see paper): + * - Discrete Cosine Transform basis is used instead of basis extracted with PCA. + * Reasoning: DCT basis has comparable performance and it doesn't require additional storage space. + * Also, this decision helps to avoid overloading the algorithm with a lot of external input. + * - Usage of built-in OpenCV feature tracking instead of libviso. */ #ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__ @@ -67,7 +63,10 @@ namespace cv namespace optflow { -/* +//! @addtogroup optflow +//! @{ + +/** @brief * This class can be used for imposing a learned prior on the resulting optical flow. * Solution will be regularized according to this prior. * You need to generate appropriate prior file with "learn_prior.py" script beforehand. @@ -90,6 +89,8 @@ public: void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const; }; +/** @brief PCAFlow algorithm. + */ class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow { protected: @@ -103,6 +104,15 @@ protected: bool useOpenCL; public: + /** @brief Creates an instance of PCAFlow algorithm. + * @param _prior Learned prior or no prior (default). @see cv::optflow::PCAPrior + * @param _basisSize Number of basis vectors. + * @param _sparseRate Controls density of sparse matches. + * @param _retainedCornersFraction Retained corners fraction. + * @param _occlusionsThreshold Occlusion threshold. + * @param _dampingFactor Regularization term for solving least-squares. It is not related to the prior regularization. + * @param _claheClip Clip parameter for CLAHE. + */ OpticalFlowPCAFlow( Ptr _prior = Ptr(), const Size _basisSize = Size( 18, 14 ), float _sparseRate = 0.024, float _retainedCornersFraction = 0.2, float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 ); @@ -127,7 +137,12 @@ private: OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable }; +/** @brief Creates an instance of PCAFlow +*/ CV_EXPORTS_W Ptr createOptFlow_PCAFlow(); + +//! @} + } } diff --git a/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp b/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp index 2c0aee46b..312771030 100644 --- a/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp +++ b/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp @@ -37,68 +37,135 @@ or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. */ -/* -Implementation of the Global Patch Collider algorithm from the following paper: -http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf - -@InProceedings{Wang_2016_CVPR, - author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet}, - title = {The Global Patch Collider}, - booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - month = {June}, - year = {2016} -} -*/ +/** + * @file sparse_matching_gpc.hpp + * @author Vladislav Samsonov + * @brief Implementation of the Global Patch Collider. + * + * Implementation of the Global Patch Collider algorithm from the following paper: + * http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf + * + * @cite Wang_2016_CVPR + */ #ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__ #define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__ #include "opencv2/core.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include "opencv2/imgproc.hpp" namespace cv { namespace optflow { +//! @addtogroup optflow +//! @{ + struct CV_EXPORTS_W GPCPatchDescriptor { - static const unsigned nFeatures = 18; // number of features in a patch descriptor + static const unsigned nFeatures = 18; //!< number of features in a patch descriptor Vec< double, nFeatures > feature; - GPCPatchDescriptor( const Mat *imgCh, int i, int j ); + double dot( const Vec< double, nFeatures > &coef ) const; + + void markAsSeparated() { feature[0] = std::numeric_limits< double >::quiet_NaN(); } + + bool isSeparated() const { return cvIsNaN( feature[0] ) != 0; } +}; + +struct CV_EXPORTS_W GPCPatchSample +{ + GPCPatchDescriptor ref; + GPCPatchDescriptor pos; + GPCPatchDescriptor neg; + + void getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const; }; -typedef std::pair< GPCPatchDescriptor, GPCPatchDescriptor > GPCPatchSample; typedef std::vector< GPCPatchSample > GPCSamplesVector; +/** @brief Descriptor types for the Global Patch Collider. + */ +enum GPCDescType +{ + GPC_DESCRIPTOR_DCT = 0, //!< Better quality but slow + GPC_DESCRIPTOR_WHT //!< Worse quality but much faster +}; + /** @brief Class encapsulating training samples. */ class CV_EXPORTS_W GPCTrainingSamples { private: GPCSamplesVector samples; + int descriptorType; public: /** @brief This function can be used to extract samples from a pair of images and a ground truth flow. * Sizes of all the provided vectors must be equal. */ static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, - const std::vector< String > > ); + const std::vector< String > >, int descriptorType ); + + static Ptr< GPCTrainingSamples > create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt, + int descriptorType ); size_t size() const { return samples.size(); } - operator GPCSamplesVector() const { return samples; } + int type() const { return descriptorType; } operator GPCSamplesVector &() { return samples; } }; +/** @brief Class encapsulating training parameters. + */ +struct GPCTrainingParams +{ + unsigned maxTreeDepth; //!< Maximum tree depth to stop partitioning. + int minNumberOfSamples; //!< Minimum number of samples in the node to stop partitioning. + int descriptorType; //!< Type of descriptors to use. + bool printProgress; //!< Print progress to stdout. + + GPCTrainingParams( unsigned _maxTreeDepth = 20, int _minNumberOfSamples = 3, GPCDescType _descriptorType = GPC_DESCRIPTOR_DCT, + bool _printProgress = true ) + : maxTreeDepth( _maxTreeDepth ), minNumberOfSamples( _minNumberOfSamples ), descriptorType( _descriptorType ), + printProgress( _printProgress ) + { + CV_Assert( check() ); + } + + GPCTrainingParams( const GPCTrainingParams ¶ms ) + : maxTreeDepth( params.maxTreeDepth ), minNumberOfSamples( params.minNumberOfSamples ), descriptorType( params.descriptorType ), + printProgress( params.printProgress ) + { + CV_Assert( check() ); + } + + bool check() const { return maxTreeDepth > 1 && minNumberOfSamples > 1; } +}; + +/** @brief Class encapsulating matching parameters. + */ +struct GPCMatchingParams +{ + bool useOpenCL; //!< Whether to use OpenCL to speed up the matching. + + GPCMatchingParams( bool _useOpenCL = false ) : useOpenCL( _useOpenCL ) {} + + GPCMatchingParams( const GPCMatchingParams ¶ms ) : useOpenCL( params.useOpenCL ) {} +}; + +/** @brief Class for individual tree. + */ class CV_EXPORTS_W GPCTree : public Algorithm { public: struct Node { - Vec< double, GPCPatchDescriptor::nFeatures > coef; // hyperplane coefficients - double rhs; + Vec< double, GPCPatchDescriptor::nFeatures > coef; //!< Hyperplane coefficients + double rhs; //!< Bias term of the hyperplane unsigned left; unsigned right; @@ -109,45 +176,100 @@ private: typedef GPCSamplesVector::iterator SIter; std::vector< Node > nodes; + GPCTrainingParams params; bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ); public: - void train( GPCSamplesVector &samples ); + void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() ); void write( FileStorage &fs ) const; void read( const FileNode &fn ); + unsigned findLeafForPatch( const GPCPatchDescriptor &descr ) const; + static Ptr< GPCTree > create() { return makePtr< GPCTree >(); } bool operator==( const GPCTree &t ) const { return nodes == t.nodes; } + + int getDescriptorType() const { return params.descriptorType; } }; template < int T > class CV_EXPORTS_W GPCForest : public Algorithm { private: + struct Trail + { + unsigned leaf[T]; //!< Inside which leaf of the tree 0..T the patch fell? + Point2i coord; //!< Patch coordinates. + + bool operator==( const Trail &trail ) const { return memcmp( leaf, trail.leaf, sizeof( leaf ) ) == 0; } + + bool operator<( const Trail &trail ) const + { + for ( int i = 0; i < T - 1; ++i ) + if ( leaf[i] != trail.leaf[i] ) + return leaf[i] < trail.leaf[i]; + return leaf[T - 1] < trail.leaf[T - 1]; + } + }; + + class ParallelTrailsFilling : public ParallelLoopBody + { + private: + const GPCForest *forest; + const std::vector< GPCPatchDescriptor > *descr; + std::vector< Trail > *trails; + + ParallelTrailsFilling &operator=( const ParallelTrailsFilling & ); + + public: + ParallelTrailsFilling( const GPCForest *_forest, const std::vector< GPCPatchDescriptor > *_descr, std::vector< Trail > *_trails ) + : forest( _forest ), descr( _descr ), trails( _trails ){}; + + void operator()( const Range &range ) const + { + for ( int t = range.start; t < range.end; ++t ) + for ( size_t i = 0; i < descr->size(); ++i ) + trails->at( i ).leaf[t] = forest->tree[t].findLeafForPatch( descr->at( i ) ); + } + }; + GPCTree tree[T]; public: /** @brief Train the forest using one sample set for every tree. * Please, consider using the next method instead of this one for better quality. */ - void train( GPCSamplesVector &samples ) + void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() ) { for ( int i = 0; i < T; ++i ) - tree[i].train( samples ); + tree[i].train( samples, params ); } /** @brief Train the forest using individual samples for each tree. * It is generally better to use this instead of the first method. */ - void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > > ) + void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > >, + const GPCTrainingParams params = GPCTrainingParams() ) { for ( int i = 0; i < T; ++i ) { - Ptr< GPCTrainingSamples > samples = GPCTrainingSamples::create( imagesFrom, imagesTo, gt ); // Create training set for the tree - tree[i].train( *samples ); + Ptr< GPCTrainingSamples > samples = + GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree + tree[i].train( *samples, params ); + } + } + + void train( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt, + const GPCTrainingParams params = GPCTrainingParams() ) + { + for ( int i = 0; i < T; ++i ) + { + Ptr< GPCTrainingSamples > samples = + GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree + tree[i].train( *samples, params ); } } @@ -166,19 +288,93 @@ public: void read( const FileNode &fn ) { - CV_Assert( T == (int)fn["ntrees"] ); + CV_Assert( T <= (int)fn["ntrees"] ); FileNodeIterator it = fn["trees"].begin(); for ( int i = 0; i < T; ++i, ++it ) tree[i].read( *it ); } + /** @brief Find correspondences between two images. + * @param[in] imgFrom First image in a sequence. + * @param[in] imgTo Second image in a sequence. + * @param[out] corr Output vector with pairs of corresponding points. + * @param[in] params Additional matching parameters for fine-tuning. + */ + void findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr, + const GPCMatchingParams params = GPCMatchingParams() ) const; + static Ptr< GPCForest > create() { return makePtr< GPCForest >(); } }; + +class CV_EXPORTS_W GPCDetails +{ +public: + static void dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr ); + + static void getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp, + int type ); + + static void getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y ); +}; + +template < int T > +void GPCForest< T >::findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr, + const GPCMatchingParams params ) const +{ + CV_Assert( imgFrom.channels() == 3 ); + CV_Assert( imgTo.channels() == 3 ); + + Mat from, to; + imgFrom.getMat().convertTo( from, CV_32FC3 ); + imgTo.getMat().convertTo( to, CV_32FC3 ); + cvtColor( from, from, COLOR_BGR2YCrCb ); + cvtColor( to, to, COLOR_BGR2YCrCb ); + + Mat fromCh[3], toCh[3]; + split( from, fromCh ); + split( to, toCh ); + + std::vector< GPCPatchDescriptor > descr; + GPCDetails::getAllDescriptorsForImage( fromCh, descr, params, tree[0].getDescriptorType() ); + std::vector< Trail > trailsFrom( descr.size() ), trailsTo( descr.size() ); + + for ( size_t i = 0; i < descr.size(); ++i ) + GPCDetails::getCoordinatesFromIndex( i, from.size(), trailsFrom[i].coord.x, trailsFrom[i].coord.y ); + parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsFrom ) ); + + descr.clear(); + GPCDetails::getAllDescriptorsForImage( toCh, descr, params, tree[0].getDescriptorType() ); + + for ( size_t i = 0; i < descr.size(); ++i ) + GPCDetails::getCoordinatesFromIndex( i, to.size(), trailsTo[i].coord.x, trailsTo[i].coord.y ); + parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsTo ) ); + + std::sort( trailsFrom.begin(), trailsFrom.end() ); + std::sort( trailsTo.begin(), trailsTo.end() ); + + for ( size_t i = 0; i < trailsFrom.size(); ++i ) + { + bool uniq = true; + while ( i + 1 < trailsFrom.size() && trailsFrom[i] == trailsFrom[i + 1] ) + ++i, uniq = false; + if ( uniq ) + { + typename std::vector< Trail >::const_iterator lb = std::lower_bound( trailsTo.begin(), trailsTo.end(), trailsFrom[i] ); + if ( lb != trailsTo.end() && *lb == trailsFrom[i] && ( ( lb + 1 ) == trailsTo.end() || !( *lb == *( lb + 1 ) ) ) ) + corr.push_back( std::make_pair( trailsFrom[i].coord, lb->coord ) ); + } + } + + GPCDetails::dropOutliers( corr ); } +//! @} + +} // namespace optflow + CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node ); CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node ); -} +} // namespace cv #endif diff --git a/modules/optflow/samples/gpc_evaluate.cpp b/modules/optflow/samples/gpc_evaluate.cpp new file mode 100644 index 000000000..7a53e8f32 --- /dev/null +++ b/modules/optflow/samples/gpc_evaluate.cpp @@ -0,0 +1,164 @@ +#include "opencv2/core/ocl.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgcodecs.hpp" +#include "opencv2/optflow.hpp" +#include +#include +#include + +/* This tool finds correspondences between two images using Global Patch Collider + * and calculates error using provided ground truth flow. + * + * It will look for the file named "forest.yml.gz" with a learned forest. + * You can obtain the "forest.yml.gz" either by manually training it using another tool with *_train suffix + * or by downloading one of the files trained on some publicly available dataset from here: + * + * https://drive.google.com/open?id=0B7Hb8cfuzrIIZDFscXVYd0NBNFU + */ + +using namespace cv; + +const String keys = "{help h ? | | print this message}" + "{@image1 | | image1}" + "{@image2 | | image2}" + "{@groundtruth | | path to the .flo file}" + "{@output | | output to a file instead of displaying, output image path}" + "{g gpu | | use OpenCL}" + "{f forest |forest.yml.gz| path to the forest.yml.gz}"; + +const int nTrees = 5; + +static double normL2( const Point2f &v ) { return sqrt( v.x * v.x + v.y * v.y ); } + +static Vec3d getFlowColor( const Point2f &f, const bool logScale = true, const double scaleDown = 5 ) +{ + if ( f.x == 0 && f.y == 0 ) + return Vec3d( 0, 0, 1 ); + + double radius = normL2( f ); + if ( logScale ) + radius = log( radius + 1 ); + radius /= scaleDown; + radius = std::min( 1.0, radius ); + + double angle = ( atan2( -f.y, -f.x ) + CV_PI ) * 180 / CV_PI; + return Vec3d( angle, radius, 1 ); +} + +static void displayFlow( InputArray _flow, OutputArray _img ) +{ + const Size sz = _flow.size(); + Mat flow = _flow.getMat(); + _img.create( sz, CV_32FC3 ); + Mat img = _img.getMat(); + + for ( int i = 0; i < sz.height; ++i ) + for ( int j = 0; j < sz.width; ++j ) + img.at< Vec3f >( i, j ) = getFlowColor( flow.at< Point2f >( i, j ) ); + + cvtColor( img, img, COLOR_HSV2BGR ); +} + +static bool fileProbe( const char *name ) { return std::ifstream( name ).good(); } + +int main( int argc, const char **argv ) +{ + CommandLineParser parser( argc, argv, keys ); + parser.about( "Global Patch Collider evaluation tool" ); + + if ( parser.has( "help" ) ) + { + parser.printMessage(); + return 0; + } + + String fromPath = parser.get< String >( 0 ); + String toPath = parser.get< String >( 1 ); + String gtPath = parser.get< String >( 2 ); + String outPath = parser.get< String >( 3 ); + const bool useOpenCL = parser.has( "gpu" ); + String forestDumpPath = parser.get< String >( "forest" ); + + if ( !parser.check() ) + { + parser.printErrors(); + return 1; + } + + if ( !fileProbe( forestDumpPath.c_str() ) ) + { + std::cerr << "Can't open the file with a trained model: `" << forestDumpPath + << "`.\nYou can obtain this file either by manually training the model using another tool with *_train suffix or by " + "downloading one of the files trained on some publicly available dataset from " + "here:\nhttps://drive.google.com/open?id=0B7Hb8cfuzrIIZDFscXVYd0NBNFU" + << std::endl; + return 1; + } + + ocl::setUseOpenCL( useOpenCL ); + + Ptr< optflow::GPCForest< nTrees > > forest = Algorithm::load< optflow::GPCForest< nTrees > >( forestDumpPath ); + + Mat from = imread( fromPath ); + Mat to = imread( toPath ); + Mat gt = optflow::readOpticalFlow( gtPath ); + std::vector< std::pair< Point2i, Point2i > > corr; + + TickMeter meter; + meter.start(); + + forest->findCorrespondences( from, to, corr, optflow::GPCMatchingParams( useOpenCL ) ); + + meter.stop(); + + std::cout << "Found " << corr.size() << " matches." << std::endl; + std::cout << "Time: " << meter.getTimeSec() << " sec." << std::endl; + double error = 0; + Mat dispErr = Mat::zeros( from.size(), CV_32FC3 ); + dispErr = Scalar( 0, 0, 1 ); + Mat disp = Mat::zeros( from.size(), CV_32FC3 ); + disp = Scalar( 0, 0, 1 ); + + for ( size_t i = 0; i < corr.size(); ++i ) + { + const Point2f a = corr[i].first; + const Point2f b = corr[i].second; + const Point2f c = a + gt.at< Point2f >( corr[i].first.y, corr[i].first.x ); + error += normL2( b - c ); + circle( disp, a, 3, getFlowColor( b - a ), -1 ); + circle( dispErr, a, 3, getFlowColor( b - c, false, 32 ), -1 ); + } + + error /= corr.size(); + + std::cout << "Average endpoint error: " << error << " px." << std::endl; + + cvtColor( disp, disp, COLOR_HSV2BGR ); + cvtColor( dispErr, dispErr, COLOR_HSV2BGR ); + + Mat dispGroundTruth; + displayFlow( gt, dispGroundTruth ); + + if ( outPath.length() ) + { + putText( disp, "Sparse matching: Global Patch Collider", Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); + char buf[256]; + sprintf( buf, "Average EPE: %.2f", error ); + putText( disp, buf, Point2i( 24, 80 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); + sprintf( buf, "Number of matches: %u", (unsigned)corr.size() ); + putText( disp, buf, Point2i( 24, 120 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); + disp *= 255; + imwrite( outPath, disp ); + return 0; + } + + namedWindow( "Correspondences", WINDOW_AUTOSIZE ); + imshow( "Correspondences", disp ); + namedWindow( "Error", WINDOW_AUTOSIZE ); + imshow( "Error", dispErr ); + namedWindow( "Ground truth", WINDOW_AUTOSIZE ); + imshow( "Ground truth", dispGroundTruth ); + waitKey( 0 ); + + return 0; +} diff --git a/modules/optflow/samples/gpc_train.cpp b/modules/optflow/samples/gpc_train.cpp index d4583f925..6557eb573 100644 --- a/modules/optflow/samples/gpc_train.cpp +++ b/modules/optflow/samples/gpc_train.cpp @@ -1,31 +1,66 @@ #include "opencv2/optflow.hpp" #include +/* This tool trains the forest for the Global Patch Collider and stores output to the "forest.yml.gz". + */ + +using namespace cv; + +const String keys = "{help h ? | | print this message}" + "{max-tree-depth | | Maximum tree depth to stop partitioning}" + "{min-samples | | Minimum number of samples in the node to stop partitioning}" + "{descriptor-type|0 | Descriptor type. Set to 0 for quality, 1 for speed.}" + "{print-progress | | Set to 0 to enable quiet mode, set to 1 to print progress}" + "{f forest |forest.yml.gz| Path where to store resulting forest. It is recommended to use .yml.gz extension.}"; + const int nTrees = 5; -int main( int argc, const char **argv ) +static void fillInputImagesFromCommandLine( std::vector< String > &img1, std::vector< String > &img2, std::vector< String > >, int argc, + const char **argv ) { - int nSequences = argc - 1; - - if ( nSequences <= 0 || nSequences % 3 != 0 ) + for ( int i = 1, j = 0; i < argc; ++i ) { - std::cerr << "Usage: " << argv[0] << " ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN" << std::endl; - return 1; + if ( argv[i][0] == '-' ) + continue; + if ( j % 3 == 0 ) + img1.push_back( argv[i] ); + if ( j % 3 == 1 ) + img2.push_back( argv[i] ); + if ( j % 3 == 2 ) + gt.push_back( argv[i] ); + ++j; } +} - nSequences /= 3; - std::vector< cv::String > img1, img2, gt; +int main( int argc, const char **argv ) +{ + CommandLineParser parser( argc, argv, keys ); + parser.about( "Global Patch Collider training tool" ); + + std::vector< String > img1, img2, gt; + optflow::GPCTrainingParams params; - for ( int i = 0; i < nSequences; ++i ) + if ( parser.has( "max-tree-depth" ) ) + params.maxTreeDepth = parser.get< unsigned >( "max-tree-depth" ); + if ( parser.has( "min-samples" ) ) + params.minNumberOfSamples = parser.get< unsigned >( "min-samples" ); + if ( parser.has( "descriptor-type" ) ) + params.descriptorType = parser.get< int >( "descriptor-type" ); + if ( parser.has( "print-progress" ) ) + params.printProgress = parser.get< unsigned >( "print-progress" ) != 0; + + fillInputImagesFromCommandLine( img1, img2, gt, argc, argv ); + + if ( parser.has( "help" ) || img1.size() != img2.size() || img1.size() != gt.size() || img1.size() == 0 ) { - img1.push_back( argv[1 + i * 3] ); - img2.push_back( argv[1 + i * 3 + 1] ); - gt.push_back( argv[1 + i * 3 + 2] ); + std::cerr << "\nUsage: " << argv[0] << " [params] ImageFrom1 ImageTo1 GroundTruth1 ... ImageFromN ImageToN GroundTruthN\n" << std::endl; + parser.printMessage(); + return 1; } - cv::Ptr< cv::optflow::GPCForest< nTrees > > forest = cv::optflow::GPCForest< nTrees >::create(); - forest->train( img1, img2, gt ); - forest->save( "forest.dump" ); + Ptr< optflow::GPCForest< nTrees > > forest = optflow::GPCForest< nTrees >::create(); + forest->train( img1, img2, gt, params ); + forest->save( parser.get< String >( "forest" ) ); return 0; } diff --git a/modules/optflow/samples/gpc_train_middlebury.py b/modules/optflow/samples/gpc_train_middlebury.py new file mode 100644 index 000000000..e923258ce --- /dev/null +++ b/modules/optflow/samples/gpc_train_middlebury.py @@ -0,0 +1,58 @@ +import argparse +import glob +import os +import subprocess + + +def execute(cmd): + popen = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + for stdout_line in iter(popen.stdout.readline, ''): + print(stdout_line.rstrip()) + for stderr_line in iter(popen.stderr.readline, ''): + print(stderr_line.rstrip()) + popen.stdout.close() + popen.stderr.close() + return_code = popen.wait() + if return_code != 0: + raise subprocess.CalledProcessError(return_code, cmd) + + +def main(): + parser = argparse.ArgumentParser( + description='Train Global Patch Collider using Middlebury dataset') + parser.add_argument( + '--bin_path', + help='Path to the training executable (example_optflow_gpc_train)', + required=True) + parser.add_argument('--dataset_path', + help='Path to the directory with frames', + required=True) + parser.add_argument('--gt_path', + help='Path to the directory with ground truth flow', + required=True) + parser.add_argument('--descriptor_type', + help='Descriptor type', + type=int, + default=0) + args = parser.parse_args() + seq = glob.glob(os.path.join(args.dataset_path, '*')) + seq.sort() + input_files = [] + for s in seq: + if os.path.isdir(s): + seq_name = os.path.basename(s) + frames = glob.glob(os.path.join(s, 'frame*.png')) + frames.sort() + assert (len(frames) == 2) + assert (os.path.basename(frames[0]) == 'frame10.png') + assert (os.path.basename(frames[1]) == 'frame11.png') + gt_flow = os.path.join(args.gt_path, seq_name, 'flow10.flo') + if os.path.isfile(gt_flow): + input_files += [frames[0], frames[1], gt_flow] + execute([args.bin_path, '--descriptor-type=%d' % args.descriptor_type] + input_files) + + +if __name__ == '__main__': + main() diff --git a/modules/optflow/samples/gpc_train_sintel.py b/modules/optflow/samples/gpc_train_sintel.py new file mode 100644 index 000000000..1f1c5d8a3 --- /dev/null +++ b/modules/optflow/samples/gpc_train_sintel.py @@ -0,0 +1,60 @@ +import argparse +import glob +import os +import subprocess + +FRAME_DIST = 2 + +assert (FRAME_DIST >= 1) + + +def execute(cmd): + popen = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + for stdout_line in iter(popen.stdout.readline, ''): + print(stdout_line.rstrip()) + for stderr_line in iter(popen.stderr.readline, ''): + print(stderr_line.rstrip()) + popen.stdout.close() + popen.stderr.close() + return_code = popen.wait() + if return_code != 0: + raise subprocess.CalledProcessError(return_code, cmd) + + +def main(): + parser = argparse.ArgumentParser( + description='Train Global Patch Collider using MPI Sintel dataset') + parser.add_argument( + '--bin_path', + help='Path to the training executable (example_optflow_gpc_train)', + required=True) + parser.add_argument('--dataset_path', + help='Path to the directory with frames', + required=True) + parser.add_argument('--gt_path', + help='Path to the directory with ground truth flow', + required=True) + parser.add_argument('--descriptor_type', + help='Descriptor type', + type=int, + default=0) + args = parser.parse_args() + seq = glob.glob(os.path.join(args.dataset_path, '*')) + seq.sort() + input_files = [] + for s in seq: + seq_name = os.path.basename(s) + frames = glob.glob(os.path.join(s, 'frame*.png')) + frames.sort() + for i in range(0, len(frames) - 1, FRAME_DIST): + gt_flow = os.path.join(args.gt_path, seq_name, + os.path.basename(frames[i])[0:-4] + '.flo') + assert (os.path.isfile(gt_flow)) + input_files += [frames[i], frames[i + 1], gt_flow] + execute([args.bin_path, '--descriptor-type=%d' % args.descriptor_type] + input_files) + + +if __name__ == '__main__': + main() diff --git a/modules/optflow/samples/pcaflow_demo.cpp b/modules/optflow/samples/pcaflow_demo.cpp new file mode 100644 index 000000000..ea9a60712 --- /dev/null +++ b/modules/optflow/samples/pcaflow_demo.cpp @@ -0,0 +1,172 @@ +#include "opencv2/core/ocl.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgcodecs.hpp" +#include "opencv2/optflow.hpp" +#include +#include +#include + +using namespace cv; +using optflow::OpticalFlowPCAFlow; +using optflow::PCAPrior; + +const String keys = "{help h ? | | print this message}" + "{@image1 || image1}" + "{@image2 || image2}" + "{@groundtruth || path to the .flo file}" + "{@prior || path to a prior file for PCAFlow}" + "{@output || output image path}" + "{g gpu | | use OpenCL}"; + +static double normL2( const Point2f &v ) { return sqrt( v.x * v.x + v.y * v.y ); } + +static bool fileProbe( const char *name ) { return std::ifstream( name ).good(); } + +static Vec3d getFlowColor( const Point2f &f, const bool logScale = true, const double scaleDown = 5 ) +{ + if ( f.x == 0 && f.y == 0 ) + return Vec3d( 0, 0, 1 ); + + double radius = normL2( f ); + if ( logScale ) + radius = log( radius + 1 ); + radius /= scaleDown; + radius = std::min( 1.0, radius ); + + double angle = ( atan2( -f.y, -f.x ) + CV_PI ) * 180 / CV_PI; + return Vec3d( angle, radius, 1 ); +} + +static void displayFlow( InputArray _flow, OutputArray _img ) +{ + const Size sz = _flow.size(); + Mat flow = _flow.getMat(); + _img.create( sz, CV_32FC3 ); + Mat img = _img.getMat(); + + for ( int i = 0; i < sz.height; ++i ) + for ( int j = 0; j < sz.width; ++j ) + img.at< Vec3f >( i, j ) = getFlowColor( flow.at< Point2f >( i, j ) ); + + cvtColor( img, img, COLOR_HSV2BGR ); +} + +static bool isFlowCorrect( const Point2f &u ) +{ + return !cvIsNaN( u.x ) && !cvIsNaN( u.y ) && ( fabs( u.x ) < 1e9 ) && ( fabs( u.y ) < 1e9 ); +} + +static double calcEPE( const Mat &f1, const Mat &f2 ) +{ + double sum = 0; + Size sz = f1.size(); + size_t cnt = 0; + for ( int i = 0; i < sz.height; ++i ) + for ( int j = 0; j < sz.width; ++j ) + if ( isFlowCorrect( f1.at< Point2f >( i, j ) ) && isFlowCorrect( f2.at< Point2f >( i, j ) ) ) + { + sum += normL2( f1.at< Point2f >( i, j ) - f2.at< Point2f >( i, j ) ); + ++cnt; + } + return sum / cnt; +} + +static void displayResult( Mat &i1, Mat &i2, Mat >, Ptr< DenseOpticalFlow > &algo, OutputArray _img, const char *descr, + const bool useGpu = false ) +{ + Mat flow( i1.size[0], i1.size[1], CV_32FC2 ); + TickMeter meter; + meter.start(); + + if ( useGpu ) + algo->calc( i1, i2, flow.getUMat( ACCESS_RW ) ); + else + algo->calc( i1, i2, flow ); + + meter.stop(); + displayFlow( flow, _img ); + Mat img = _img.getMat(); + putText( img, descr, Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); + char buf[256]; + sprintf( buf, "Average EPE: %.2f", calcEPE( flow, gt ) ); + putText( img, buf, Point2i( 24, 80 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); + sprintf( buf, "Time: %.2fs", meter.getTimeSec() ); + putText( img, buf, Point2i( 24, 120 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); +} + +static void displayGT( InputArray _flow, OutputArray _img, const char *descr ) +{ + displayFlow( _flow, _img ); + Mat img = _img.getMat(); + putText( img, descr, Point2i( 24, 40 ), FONT_HERSHEY_DUPLEX, 1, Vec3b( 1, 0, 0 ), 2, LINE_AA ); +} + +int main( int argc, const char **argv ) +{ + CommandLineParser parser( argc, argv, keys ); + parser.about( "PCAFlow demonstration" ); + + if ( parser.has( "help" ) ) + { + parser.printMessage(); + return 0; + } + + String img1 = parser.get< String >( 0 ); + String img2 = parser.get< String >( 1 ); + String groundtruth = parser.get< String >( 2 ); + String prior = parser.get< String >( 3 ); + String outimg = parser.get< String >( 4 ); + const bool useGpu = parser.has( "gpu" ); + + if ( !parser.check() ) + { + parser.printErrors(); + return 1; + } + + if ( !fileProbe( prior.c_str() ) ) + { + std::cerr << "Can't open the file with prior! Check the provided path: " << prior << std::endl; + return 1; + } + + cv::ocl::setUseOpenCL( useGpu ); + + Mat i1 = imread( img1 ); + Mat i2 = imread( img2 ); + Mat gt = optflow::readOpticalFlow( groundtruth ); + + Mat i1g, i2g; + cvtColor( i1, i1g, COLOR_BGR2GRAY ); + cvtColor( i2, i2g, COLOR_BGR2GRAY ); + + Mat pcaflowDisp, pcaflowpriDisp, farnebackDisp, gtDisp; + + { + Ptr< DenseOpticalFlow > pcaflow = makePtr< OpticalFlowPCAFlow >( makePtr< PCAPrior >( prior.c_str() ) ); + displayResult( i1, i2, gt, pcaflow, pcaflowpriDisp, "PCAFlow with prior", useGpu ); + } + + { + Ptr< DenseOpticalFlow > pcaflow = makePtr< OpticalFlowPCAFlow >(); + displayResult( i1, i2, gt, pcaflow, pcaflowDisp, "PCAFlow without prior", useGpu ); + } + + { + Ptr< DenseOpticalFlow > farneback = optflow::createOptFlow_Farneback(); + displayResult( i1g, i2g, gt, farneback, farnebackDisp, "Farneback", useGpu ); + } + + displayGT( gt, gtDisp, "Ground truth" ); + + Mat disp1, disp2; + vconcat( pcaflowpriDisp, farnebackDisp, disp1 ); + vconcat( pcaflowDisp, gtDisp, disp2 ); + hconcat( disp1, disp2, disp1 ); + disp1 *= 255; + + imwrite( outimg, disp1 ); + + return 0; +} diff --git a/modules/optflow/src/opencl/sparse_matching_gpc.cl b/modules/optflow/src/opencl/sparse_matching_gpc.cl new file mode 100644 index 000000000..ba19d7b34 --- /dev/null +++ b/modules/optflow/src/opencl/sparse_matching_gpc.cl @@ -0,0 +1,69 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2016, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +// @Authors +// Vladislav Samsonov, vvladxx@gmail.com + +__kernel void getPatchDescriptor( + __global const uchar* imgCh0, int ic0step, int ic0off, + __global const uchar* imgCh1, int ic1step, int ic1off, + __global const uchar* imgCh2, int ic2step, int ic2off, + __global uchar* out, int outstep, int outoff, + const int gh, const int gw, const int PR ) +{ + const int i = get_global_id(0); + const int j = get_global_id(1); + + if (i >= gh || j >= gw) + return; + + __global double* desc = (__global double*)(out + (outstep * (i * gw + j) + outoff)); + const int patchRadius = PR * 2; + float patch[PATCH_RADIUS_DOUBLED][PATCH_RADIUS_DOUBLED]; + + for (int i0 = 0; i0 < patchRadius; ++i0) { + __global const float* ch0Row = (__global const float*)(imgCh0 + (ic0step * (i + i0) + ic0off + j * sizeof(float))); + for (int j0 = 0; j0 < patchRadius; ++j0) + patch[i0][j0] = ch0Row[j0]; + } + + #pragma unroll + for (int n0 = 0; n0 < 4; ++n0) { + #pragma unroll + for (int n1 = 0; n1 < 4; ++n1) { + double sum = 0; + for (int i0 = 0; i0 < patchRadius; ++i0) + for (int j0 = 0; j0 < patchRadius; ++j0) + sum += patch[i0][j0] * cos(CV_PI * (i0 + 0.5) * n0 / patchRadius) * cos(CV_PI * (j0 + 0.5) * n1 / patchRadius); + desc[n0 * 4 + n1] = sum / PR; + } + } + + for (int k = 0; k < 4; ++k) { + desc[k] *= SQRT2_INV; + desc[k * 4] *= SQRT2_INV; + } + + double sum = 0; + + for (int i0 = 0; i0 < patchRadius; ++i0) { + __global const float* ch1Row = (__global const float*)(imgCh1 + (ic1step * (i + i0) + ic1off + j * sizeof(float))); + for (int j0 = 0; j0 < patchRadius; ++j0) + sum += ch1Row[j0]; + } + + desc[16] = sum / patchRadius; + sum = 0; + + for (int i0 = 0; i0 < patchRadius; ++i0) { + __global const float* ch2Row = (__global const float*)(imgCh2 + (ic2step * (i + i0) + ic2off + j * sizeof(float))); + for (int j0 = 0; j0 < patchRadius; ++j0) + sum += ch2Row[j0]; + } + + desc[17] = sum / patchRadius; +} diff --git a/modules/optflow/src/sparse_matching_gpc.cpp b/modules/optflow/src/sparse_matching_gpc.cpp index 11e2abaf3..99ecc1a12 100644 --- a/modules/optflow/src/sparse_matching_gpc.cpp +++ b/modules/optflow/src/sparse_matching_gpc.cpp @@ -41,8 +41,22 @@ //M*/ #include "opencv2/core/core_c.h" +#include "opencv2/core/private.hpp" +#include "opencv2/flann/miniflann.hpp" #include "opencv2/highgui.hpp" #include "precomp.hpp" +#include "opencl_kernels_optflow.hpp" + +/* Disable "from double to float" and "from size_t to int" warnings. + * Fixing these would make the code look ugly by introducing explicit cast all around. + * Here these warning are pointless anyway. + */ +#ifdef _MSC_VER +#pragma warning( disable : 4244 4267 4838 ) +#endif +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif namespace cv { @@ -51,12 +65,23 @@ namespace optflow namespace { -const int patchRadius = 10; -const double thresholdMagnitudeFrac = 0.6666666666; +#define PATCH_RADIUS 10 +#define PATCH_RADIUS_DOUBLED 20 +#define SQRT2_INV 0.7071067811865475 + +const int patchRadius = PATCH_RADIUS; const int globalIters = 3; const int localIters = 500; -const int minNumberOfSamples = 2; -//const bool debugOutput = true; +const double thresholdOutliers = 0.98; +const double thresholdMagnitudeFrac = 0.8; +const double epsTolerance = 1e-12; +const unsigned scoreGainPos = 5; +const unsigned scoreGainNeg = 1; +const unsigned negSearchKNN = 5; +const double simulatedAnnealingTemperatureCoef = 200.0; +const double sigmaGrowthRate = 0.2; + +RNG rng; struct Magnitude { @@ -79,9 +104,9 @@ struct PartitionPredicate1 bool operator()( const GPCPatchSample &sample ) const { - const bool direction1 = ( coef.dot( sample.first.feature ) < rhs ); - const bool direction2 = ( coef.dot( sample.second.feature ) < rhs ); - return direction1 == false && direction1 == direction2; + bool refdir, posdir, negdir; + sample.getDirections( refdir, posdir, negdir, coef, rhs ); + return refdir == false && ( posdir == false || negdir == true ); } }; @@ -94,20 +119,276 @@ struct PartitionPredicate2 bool operator()( const GPCPatchSample &sample ) const { - const bool direction1 = ( coef.dot( sample.first.feature ) < rhs ); - const bool direction2 = ( coef.dot( sample.second.feature ) < rhs ); - return direction1 != direction2; + bool refdir, posdir, negdir; + sample.getDirections( refdir, posdir, negdir, coef, rhs ); + return refdir != posdir && refdir == negdir; + } +}; + +struct CompareWithTolerance +{ + double val; + + CompareWithTolerance( double _val ) : val( _val ) {}; + + bool operator()( const double &elem ) const + { + const double diff = ( val + elem == 0 ) ? std::abs( val - elem ) : std::abs( ( val - elem ) / ( val + elem ) ); + return diff <= epsTolerance; } }; float normL2Sqr( const Vec2f &v ) { return v[0] * v[0] + v[1] * v[1]; } +int normL2Sqr( const Point2i &v ) { return v.x * v.x + v.y * v.y; } + bool checkBounds( int i, int j, Size sz ) { return i >= patchRadius && j >= patchRadius && i + patchRadius < sz.height && j + patchRadius < sz.width; } -void getTrainingSamples( const Mat &from, const Mat &to, const Mat >, GPCSamplesVector &samples ) +void getDCTPatchDescriptor( GPCPatchDescriptor &patchDescr, const Mat *imgCh, int i, int j ) +{ + Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius ); + Mat freqDomain; + dct( imgCh[0]( roi ), freqDomain ); + + double *feature = patchDescr.feature.val; + feature[0] = freqDomain.at< float >( 0, 0 ); + feature[1] = freqDomain.at< float >( 0, 1 ); + feature[2] = freqDomain.at< float >( 0, 2 ); + feature[3] = freqDomain.at< float >( 0, 3 ); + + feature[4] = freqDomain.at< float >( 1, 0 ); + feature[5] = freqDomain.at< float >( 1, 1 ); + feature[6] = freqDomain.at< float >( 1, 2 ); + feature[7] = freqDomain.at< float >( 1, 3 ); + + feature[8] = freqDomain.at< float >( 2, 0 ); + feature[9] = freqDomain.at< float >( 2, 1 ); + feature[10] = freqDomain.at< float >( 2, 2 ); + feature[11] = freqDomain.at< float >( 2, 3 ); + + feature[12] = freqDomain.at< float >( 3, 0 ); + feature[13] = freqDomain.at< float >( 3, 1 ); + feature[14] = freqDomain.at< float >( 3, 2 ); + feature[15] = freqDomain.at< float >( 3, 3 ); + + feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius ); + feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius ); +} + +double sumInt( const Mat &integ, int i, int j, int h, int w ) +{ + return integ.at< double >( i + h, j + w ) - integ.at< double >( i + h, j ) - integ.at< double >( i, j + w ) + integ.at< double >( i, j ); +} + +void getWHTPatchDescriptor( GPCPatchDescriptor &patchDescr, const Mat *imgCh, int i, int j ) +{ + i -= patchRadius; + j -= patchRadius; + const int k = 2 * patchRadius; + const double s = sumInt( imgCh[0], i, j, k, k ); + double *feature = patchDescr.feature.val; + + feature[0] = s; + feature[1] = s - 2 * sumInt( imgCh[0], i, j + k / 2, k, k / 2 ); + feature[2] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k, k / 2 ); + feature[3] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k, k / 4 ); + + feature[4] = s - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k ); + feature[5] = s - 2 * sumInt( imgCh[0], i, j + k / 2, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 2 ); + feature[6] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 4 ) - + 2 * sumInt( imgCh[0], i + k / 2, j + 3 * k / 4, k / 2, k / 4 ); + feature[7] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 2, k / 4 ) - + 2 * sumInt( imgCh[0], i + k / 2, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 2, k / 2, k / 4 ); + + feature[8] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k ); + feature[9] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 2 ) - 2 * sumInt( imgCh[0], i, j + k / 2, k / 4, k / 2 ) - + 2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 2, k / 4, k / 2 ); + feature[10] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + 3 * k / 4, k / 2, k / 4 ) - + 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 4, k / 4, k / 2 ); + feature[11] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + k / 4, j, k / 2, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + k / 2, k / 2, k / 4 ) - + 2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 4, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + 3 * k / 4, j + 3 * k / 4, k / 4, k / 4 ); + + feature[12] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k ); + feature[13] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 2 ) - + 2 * sumInt( imgCh[0], i, j + k / 2, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 2, k / 4, k / 2 ); + feature[14] = s - 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 2 ) - 2 * sumInt( imgCh[0], i + k / 2, j + k / 4, k / 4, k / 2 ) - + 2 * sumInt( imgCh[0], i + k / 4, j + 3 * k / 4, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + 3 * k / 4, j + 3 * k / 4, k / 4, k / 4 ); + feature[15] = s - 2 * sumInt( imgCh[0], i, j + k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i, j + 3 * k / 4, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + k / 4, j, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + k / 4, j + k / 2, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + k / 2, j + k / 4, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + k / 2, j + 3 * k / 4, k / 4, k / 4 ) - 2 * sumInt( imgCh[0], i + 3 * k / 4, j, k / 4, k / 4 ) - + 2 * sumInt( imgCh[0], i + 3 * k / 4, j + k / 2, k / 4, k / 4 ); + + feature[16] = sumInt( imgCh[1], i, j, k, k ); + feature[17] = sumInt( imgCh[2], i, j, k, k ); + + patchDescr.feature /= patchRadius; +} + +class ParallelDCTFiller : public ParallelLoopBody +{ +private: + const Size sz; + const Mat *imgCh; + std::vector< GPCPatchDescriptor > *descr; + + ParallelDCTFiller &operator=( const ParallelDCTFiller & ); + +public: + ParallelDCTFiller( const Size &_sz, const Mat *_imgCh, std::vector< GPCPatchDescriptor > *_descr ) + : sz( _sz ), imgCh( _imgCh ), descr( _descr ){}; + + void operator()( const Range &range ) const + { + for ( int i = range.start; i < range.end; ++i ) + { + int x, y; + GPCDetails::getCoordinatesFromIndex( i, sz, x, y ); + getDCTPatchDescriptor( descr->at( i ), imgCh, y, x ); + } + } +}; + +#ifdef HAVE_OPENCL + +bool ocl_getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr ) +{ + const Size sz = imgCh[0].size(); + ocl::Kernel kernel( "getPatchDescriptor", ocl::optflow::sparse_matching_gpc_oclsrc, + format( "-DPATCH_RADIUS_DOUBLED=%d -DCV_PI=%f -DSQRT2_INV=%f", PATCH_RADIUS_DOUBLED, CV_PI, SQRT2_INV ) ); + size_t globSize[] = {sz.height - 2 * patchRadius, sz.width - 2 * patchRadius}; + UMat out( globSize[0] * globSize[1], GPCPatchDescriptor::nFeatures, CV_64F ); + if ( + kernel + .args( cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[0].getUMat( ACCESS_READ ) ), + cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[1].getUMat( ACCESS_READ ) ), + cv::ocl::KernelArg::ReadOnlyNoSize( imgCh[2].getUMat( ACCESS_READ ) ), + cv::ocl::KernelArg::WriteOnlyNoSize( out ), + (int)globSize[0], (int)globSize[1], (int)patchRadius ) + .run( 2, globSize, 0, true ) == false ) + return false; + Mat cpuOut = out.getMat( 0 ); + for ( int i = 0; i + 2 * patchRadius < sz.height; ++i ) + for ( int j = 0; j + 2 * patchRadius < sz.width; ++j ) + descr.push_back( *cpuOut.ptr< GPCPatchDescriptor >( i * globSize[1] + j ) ); + return true; +} + +#endif + +void getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp ) +{ + const Size sz = imgCh[0].size(); + descr.reserve( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) ); + + (void)mp; // Fix unused parameter warning in case OpenCL is not available + CV_OCL_RUN( mp.useOpenCL, ocl_getAllDCTDescriptorsForImage( imgCh, descr ) ) + + descr.resize( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) ); + parallel_for_( Range( 0, descr.size() ), ParallelDCTFiller( sz, imgCh, &descr ) ); +} + +class ParallelWHTFiller : public ParallelLoopBody +{ +private: + const Size sz; + const Mat *imgChInt; + std::vector< GPCPatchDescriptor > *descr; + + ParallelWHTFiller &operator=( const ParallelWHTFiller & ); + +public: + ParallelWHTFiller( const Size &_sz, const Mat *_imgChInt, std::vector< GPCPatchDescriptor > *_descr ) + : sz( _sz ), imgChInt( _imgChInt ), descr( _descr ){}; + + void operator()( const Range &range ) const + { + for ( int i = range.start; i < range.end; ++i ) + { + int x, y; + GPCDetails::getCoordinatesFromIndex( i, sz, x, y ); + getWHTPatchDescriptor( descr->at( i ), imgChInt, y, x ); + } + } +}; + +void getAllWHTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams & ) +{ + const Size sz = imgCh[0].size(); + descr.resize( ( sz.height - 2 * patchRadius ) * ( sz.width - 2 * patchRadius ) ); + + Mat imgChInt[3]; + integral( imgCh[0], imgChInt[0], CV_64F ); + integral( imgCh[1], imgChInt[1], CV_64F ); + integral( imgCh[2], imgChInt[2], CV_64F ); + + parallel_for_( Range( 0, descr.size() ), ParallelWHTFiller( sz, imgChInt, &descr ) ); +} + +void buildIndex( OutputArray featuresOut, flann::Index &index, const Mat *imgCh, + void ( *getAllDescrFn )( const Mat *, std::vector< GPCPatchDescriptor > &, const GPCMatchingParams & ) ) +{ + std::vector< GPCPatchDescriptor > descriptors; + getAllDescrFn( imgCh, descriptors, GPCMatchingParams() ); + + featuresOut.create( descriptors.size(), GPCPatchDescriptor::nFeatures, CV_32F ); + Mat features = featuresOut.getMat(); + + for ( size_t i = 0; i < descriptors.size(); ++i ) + *features.ptr< Vec< float, GPCPatchDescriptor::nFeatures > >( i ) = descriptors[i].feature; + + cv::flann::KDTreeIndexParams indexParams; + index.build( features, indexParams, cvflann::FLANN_DIST_L2 ); +} + +void getTriplet( const Magnitude &mag, const Mat >, const Mat *fromCh, const Mat *toCh, GPCSamplesVector &samples, flann::Index &index, + void ( *getDescFn )( GPCPatchDescriptor &, const Mat *, int, int ) ) +{ + const Size sz = gt.size(); + const int i0 = mag.i; + const int j0 = mag.j; + const int i1 = i0 + cvRound( gt.at< Vec2f >( i0, j0 )[1] ); + const int j1 = j0 + cvRound( gt.at< Vec2f >( i0, j0 )[0] ); + if ( checkBounds( i1, j1, sz ) ) + { + GPCPatchSample ps; + getDescFn( ps.ref, fromCh, i0, j0 ); + getDescFn( ps.pos, toCh, i1, j1 ); + ps.neg.markAsSeparated(); + + Matx< float, 1, GPCPatchDescriptor::nFeatures > ref32; + Matx< int, 1, negSearchKNN > indices; + int maxDist = 0; + + for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i ) + ref32( 0, i ) = ps.ref.feature[i]; + + index.knnSearch( ref32, indices, noArray(), negSearchKNN ); + + for ( unsigned i = 0; i < negSearchKNN; ++i ) + { + int i2, j2; + GPCDetails::getCoordinatesFromIndex( indices( 0, i ), sz, j2, i2 ); + const int dist = ( i2 - i1 ) * ( i2 - i1 ) + ( j2 - j1 ) * ( j2 - j1 ); + if ( maxDist < dist ) + { + maxDist = dist; + getDescFn( ps.neg, toCh, i2, j2 ); + } + } + + samples.push_back( ps ); + } +} + +void getTrainingSamples( const Mat &from, const Mat &to, const Mat >, GPCSamplesVector &samples, const int type ) { const Size sz = gt.size(); std::vector< Magnitude > mag; @@ -116,33 +397,53 @@ void getTrainingSamples( const Mat &from, const Mat &to, const Mat >, GPCSampl for ( int j = patchRadius; j + patchRadius < sz.width; ++j ) mag.push_back( Magnitude( normL2Sqr( gt.at< Vec2f >( i, j ) ), i, j ) ); - size_t n = size_t(mag.size() * thresholdMagnitudeFrac); // As suggested in the paper, we discard part of the training samples - // with a small displacement and train to better distinguish hard pairs. + size_t n = size_t( mag.size() * thresholdMagnitudeFrac ); // As suggested in the paper, we discard part of the training samples + // with a small displacement and train to better distinguish hard pairs. std::nth_element( mag.begin(), mag.begin() + n, mag.end() ); mag.resize( n ); std::random_shuffle( mag.begin(), mag.end() ); n /= patchRadius; mag.resize( n ); - Mat fromCh[3], toCh[3]; - split( from, fromCh ); - split( to, toCh ); + if ( type == GPC_DESCRIPTOR_DCT ) + { + Mat fromCh[3], toCh[3]; + split( from, fromCh ); + split( to, toCh ); + + Mat allDescriptors; + flann::Index index; + buildIndex( allDescriptors, index, toCh, getAllDCTDescriptorsForImage ); - for ( size_t k = 0; k < n; ++k ) + for ( size_t k = 0; k < n; ++k ) + getTriplet( mag[k], gt, fromCh, toCh, samples, index, getDCTPatchDescriptor ); + } + else if ( type == GPC_DESCRIPTOR_WHT ) { - int i0 = mag[k].i; - int j0 = mag[k].j; - int i1 = i0 + cvRound( gt.at< Vec2f >( i0, j0 )[1] ); - int j1 = j0 + cvRound( gt.at< Vec2f >( i0, j0 )[0] ); - if ( checkBounds( i1, j1, sz ) ) - samples.push_back( std::make_pair( GPCPatchDescriptor( fromCh, i0, j0 ), GPCPatchDescriptor( toCh, i1, j1 ) ) ); + Mat fromCh[3], toCh[3], fromChInt[3], toChInt[3]; + split( from, fromCh ); + split( to, toCh ); + integral( fromCh[0], fromChInt[0], CV_64F ); + integral( fromCh[1], fromChInt[1], CV_64F ); + integral( fromCh[2], fromChInt[2], CV_64F ); + integral( toCh[0], toChInt[0], CV_64F ); + integral( toCh[1], toChInt[1], CV_64F ); + integral( toCh[2], toChInt[2], CV_64F ); + + Mat allDescriptors; + flann::Index index; + buildIndex( allDescriptors, index, toCh, getAllWHTDescriptorsForImage ); + + for ( size_t k = 0; k < n; ++k ) + getTriplet( mag[k], gt, fromChInt, toChInt, samples, index, getWHTPatchDescriptor ); } + else + CV_Error( CV_StsBadArg, "Unknown descriptor type" ); } /* Sample random number from Cauchy distribution. */ double getRandomCauchyScalar() { - static RNG rng; return tan( rng.uniform( -1.54, 1.54 ) ); // I intentionally used the value slightly less than PI/2 to enforce strictly // zero probability for large numbers. Resulting PDF for Cauchy has // truncated "tails". @@ -155,41 +456,65 @@ void getRandomCauchyVector( Vec< double, GPCPatchDescriptor::nFeatures > &v ) for ( unsigned i = 0; i < GPCPatchDescriptor::nFeatures; ++i ) v[i] = getRandomCauchyScalar(); } + +double getRobustMedian( double m ) { return m < 0 ? m * ( 1.0 + epsTolerance ) : m * ( 1.0 - epsTolerance ); } } -GPCPatchDescriptor::GPCPatchDescriptor( const Mat *imgCh, int i, int j ) +double GPCPatchDescriptor::dot( const Vec< double, nFeatures > &coef ) const { - Rect roi( j - patchRadius, i - patchRadius, 2 * patchRadius, 2 * patchRadius ); - Mat freqDomain; - dct( imgCh[0]( roi ), freqDomain ); - - feature[0] = freqDomain.at< float >( 0, 0 ); - feature[1] = freqDomain.at< float >( 0, 1 ); - feature[2] = freqDomain.at< float >( 0, 2 ); - feature[3] = freqDomain.at< float >( 0, 3 ); - - feature[4] = freqDomain.at< float >( 1, 0 ); - feature[5] = freqDomain.at< float >( 1, 1 ); - feature[6] = freqDomain.at< float >( 1, 2 ); - feature[7] = freqDomain.at< float >( 1, 3 ); +#if CV_SIMD128_64F + v_float64x2 sum = v_setzero_f64(); + for ( unsigned i = 0; i < nFeatures; i += 2 ) + { + v_float64x2 x = v_load_aligned( &feature.val[i] ); + v_float64x2 y = v_load_aligned( &coef.val[i] ); + sum = v_muladd( x, y, sum ); + } +#if CV_SSE2 + __m128d sumrev = _mm_shuffle_pd( sum.val, sum.val, _MM_SHUFFLE2( 0, 1 ) ); + return _mm_cvtsd_f64( _mm_add_pd( sum.val, sumrev ) ); +#else + double CV_DECL_ALIGNED( 16 ) buf[2]; + v_store_aligned( buf, sum ); + return OPENCV_HAL_ADD( buf[0], buf[1] ); +#endif + +#else + return feature.dot( coef ); +#endif +} - feature[8] = freqDomain.at< float >( 2, 0 ); - feature[9] = freqDomain.at< float >( 2, 1 ); - feature[10] = freqDomain.at< float >( 2, 2 ); - feature[11] = freqDomain.at< float >( 2, 3 ); +void GPCPatchSample::getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const +{ + refdir = ( ref.dot( coef ) < rhs ); + posdir = pos.isSeparated() ? ( !refdir ) : ( pos.dot( coef ) < rhs ); + negdir = neg.isSeparated() ? ( !refdir ) : ( neg.dot( coef ) < rhs ); +} - feature[12] = freqDomain.at< float >( 3, 0 ); - feature[13] = freqDomain.at< float >( 3, 1 ); - feature[14] = freqDomain.at< float >( 3, 2 ); - feature[15] = freqDomain.at< float >( 3, 3 ); +void GPCDetails::getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp, + int type ) +{ + if ( type == GPC_DESCRIPTOR_DCT ) + getAllDCTDescriptorsForImage( imgCh, descr, mp ); + else if ( type == GPC_DESCRIPTOR_WHT ) + getAllWHTDescriptorsForImage( imgCh, descr, mp ); + else + CV_Error( CV_StsBadArg, "Unknown descriptor type" ); +} - feature[16] = cv::sum( imgCh[1]( roi ) )[0] / ( 2 * patchRadius ); - feature[17] = cv::sum( imgCh[2]( roi ) )[0] / ( 2 * patchRadius ); +void GPCDetails::getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y ) +{ + const size_t stride = sz.width - patchRadius * 2; + y = int( index / stride ); + x = int( index - y * stride + patchRadius ); + y += patchRadius; } bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) { - if ( std::distance( begin, end ) < minNumberOfSamples ) + const int nSamples = (int)std::distance( begin, end ); + + if ( nSamples < params.minNumberOfSamples || depth >= params.maxTreeDepth ) return false; if ( nodeId >= nodes.size() ) @@ -200,6 +525,7 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) // Select the best hyperplane unsigned globalBestScore = 0; std::vector< double > values; + values.reserve( nSamples * 2 ); for ( int j = 0; j < globalIters; ++j ) { // Global search step @@ -209,50 +535,81 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) for ( int i = 0; i < localIters; ++i ) { // Local search step - double randomModification = getRandomCauchyScalar(); + double randomModification = getRandomCauchyScalar() * ( 1.0 + sigmaGrowthRate * int( i / GPCPatchDescriptor::nFeatures ) ); const int pos = i % GPCPatchDescriptor::nFeatures; std::swap( coef[pos], randomModification ); values.clear(); for ( SIter iter = begin; iter != end; ++iter ) - { - values.push_back( coef.dot( iter->first.feature ) ); - values.push_back( coef.dot( iter->second.feature ) ); - } + values.push_back( iter->ref.dot( coef ) ); + + std::nth_element( values.begin(), values.begin() + nSamples / 2, values.end() ); + double median = values[nSamples / 2]; + + // Skip obviously malformed division. This may happen in case there are a large number of equal samples. + // Most likely this won't happen with samples collected from a good dataset. + // Happens in case dataset contains plain (or close to plain) images. + if ( std::count_if( values.begin(), values.end(), CompareWithTolerance( median ) ) > std::max( 1, nSamples / 4 ) ) + continue; - std::nth_element( values.begin(), values.begin() + values.size() / 2, values.end() ); - const double median = values[values.size() / 2]; - unsigned correct = 0; + median = getRobustMedian( median ); + unsigned score = 0; for ( SIter iter = begin; iter != end; ++iter ) { - const bool direction = ( coef.dot( iter->first.feature ) < median ); - if ( direction == ( coef.dot( iter->second.feature ) < median ) ) - ++correct; + bool refdir, posdir, negdir; + iter->getDirections( refdir, posdir, negdir, coef, median ); + if ( refdir == posdir ) + score += scoreGainPos; + if ( refdir != negdir ) + score += scoreGainNeg; } - if ( correct > localBestScore ) - localBestScore = correct; + if ( score > localBestScore ) + localBestScore = score; else - coef[pos] = randomModification; + { + const double beta = simulatedAnnealingTemperatureCoef * std::sqrt( i ) / ( nSamples * ( scoreGainPos + scoreGainNeg ) ); + if ( rng.uniform( 0.0, 1.0 ) > std::exp( -beta * ( localBestScore - score) ) ) + coef[pos] = randomModification; + } - if ( correct > globalBestScore ) + if ( score > globalBestScore ) { - globalBestScore = correct; + globalBestScore = score; node.coef = coef; node.rhs = median; - - /*if ( debugOutput ) - { - printf( "[%u] Updating weights: correct %.2f (%u/%ld)\n", depth, double( correct ) / std::distance( begin, end ), correct, - std::distance( begin, end ) ); - for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k ) - printf( "%.3f ", coef[k] ); - printf( "\n" ); - }*/ } } } + + if ( globalBestScore == 0 ) + return false; + + if ( params.printProgress ) + { + const int maxScore = nSamples * ( scoreGainPos + scoreGainNeg ); + const double correctRatio = double( globalBestScore ) / maxScore; + printf( "[%u] Correct %.2f (%u/%d)\nWeights:", depth, correctRatio, globalBestScore, maxScore ); + for ( unsigned k = 0; k < GPCPatchDescriptor::nFeatures; ++k ) + printf( " %.3f", node.coef[k] ); + printf( "\n" ); + } + + for ( SIter iter = begin; iter != end; ++iter ) + { + bool refdir, posdir, negdir; + iter->getDirections( refdir, posdir, negdir, node.coef, node.rhs ); + // We shouldn't account for positive sample in the scoring in case it was separated before. So mark it as separated. + // After all, we can't bring back samples which were separated from reference on early levels. + if ( refdir != posdir ) + iter->pos.markAsSeparated(); + // The same for negative sample. + if ( refdir != negdir ) + iter->neg.markAsSeparated(); + // If both positive and negative were separated before then such triplet doesn't make sense on deeper levels. We discard it. + } + // Partition vector with samples according to the hyperplane in QuickSort-like manner. // Unlike QuickSort, we need to partition it into 3 parts (left subtree samples; undefined samples; right subtree // samples), so we call it two times. @@ -260,16 +617,21 @@ bool GPCTree::trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth ) SIter rightBegin = std::partition( leftEnd, end, PartitionPredicate2( node.coef, node.rhs ) ); // Separate undefined samples from right subtree samples. - node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? unsigned(nodeId * 2 + 1) : 0; - node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? unsigned(nodeId * 2 + 2) : 0; + node.left = ( trainNode( nodeId * 2 + 1, begin, leftEnd, depth + 1 ) ) ? unsigned( nodeId * 2 + 1 ) : 0; + node.right = ( trainNode( nodeId * 2 + 2, rightBegin, end, depth + 1 ) ) ? unsigned( nodeId * 2 + 2 ) : 0; return true; } -void GPCTree::train( GPCSamplesVector &samples ) +void GPCTree::train( GPCTrainingSamples &samples, const GPCTrainingParams _params ) { + if ( _params.descriptorType != samples.type() ) + CV_Error( CV_StsBadArg, "Descriptor type mismatch! Check that samples are collected with the same descriptor type." ); + nodes.clear(); nodes.reserve( samples.size() * 2 - 1 ); // set upper bound for the possible number of nodes so all subsequent resize() will be no-op - trainNode( 0, samples.begin(), samples.end(), 0 ); + params = _params; + GPCSamplesVector &sv = samples; + trainNode( 0, sv.begin(), sv.end(), 0 ); } void GPCTree::write( FileStorage &fs ) const @@ -277,17 +639,39 @@ void GPCTree::write( FileStorage &fs ) const if ( nodes.empty() ) CV_Error( CV_StsBadArg, "Tree have not been trained" ); fs << "nodes" << nodes; + fs << "dtype" << (int)params.descriptorType; +} + +void GPCTree::read( const FileNode &fn ) +{ + fn["nodes"] >> nodes; + fn["dtype"] >> (int &)params.descriptorType; } -void GPCTree::read( const FileNode &fn ) { fn["nodes"] >> nodes; } +unsigned GPCTree::findLeafForPatch( const GPCPatchDescriptor &descr ) const +{ + unsigned id = 0, prevId; + do + { + prevId = id; + if ( descr.dot( nodes[id].coef ) < nodes[id].rhs ) + id = nodes[id].right; + else + id = nodes[id].left; + } while ( id ); + return prevId; +} Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, - const std::vector< String > > ) + const std::vector< String > >, int _descriptorType ) { CV_Assert( imagesFrom.size() == imagesTo.size() ); CV_Assert( imagesFrom.size() == gt.size() ); Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >(); + + ts->descriptorType = _descriptorType; + for ( size_t i = 0; i < imagesFrom.size(); ++i ) { Mat from = imread( imagesFrom[i] ); @@ -304,12 +688,69 @@ Ptr< GPCTrainingSamples > GPCTrainingSamples::create( const std::vector< String cvtColor( from, from, COLOR_BGR2YCrCb ); cvtColor( to, to, COLOR_BGR2YCrCb ); - getTrainingSamples( from, to, gtFlow, ts->samples ); + getTrainingSamples( from, to, gtFlow, ts->samples, ts->descriptorType ); } return ts; } +Ptr< GPCTrainingSamples > GPCTrainingSamples::create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, + InputArrayOfArrays gt, int _descriptorType ) +{ + CV_Assert( imagesFrom.total() == imagesTo.total() ); + CV_Assert( imagesFrom.total() == gt.total() ); + + Ptr< GPCTrainingSamples > ts = makePtr< GPCTrainingSamples >(); + + ts->descriptorType = _descriptorType; + + for ( size_t i = 0; i < imagesFrom.total(); ++i ) + { + Mat from = imagesFrom.getMat( static_cast( i ) ); + Mat to = imagesTo.getMat( static_cast( i ) ); + Mat gtFlow = gt.getMat( static_cast( i ) ); + + CV_Assert( from.size == to.size ); + CV_Assert( from.size == gtFlow.size ); + CV_Assert( from.channels() == 3 ); + CV_Assert( to.channels() == 3 ); + + from.convertTo( from, CV_32FC3 ); + to.convertTo( to, CV_32FC3 ); + cvtColor( from, from, COLOR_BGR2YCrCb ); + cvtColor( to, to, COLOR_BGR2YCrCb ); + + getTrainingSamples( from, to, gtFlow, ts->samples, ts->descriptorType ); + } + + return ts; +} + +void GPCDetails::dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr ) +{ + std::vector< float > mag( corr.size() ); + + for ( size_t i = 0; i < corr.size(); ++i ) + mag[i] = normL2Sqr( corr[i].first - corr[i].second ); + + const size_t threshold = size_t( mag.size() * thresholdOutliers ); + std::nth_element( mag.begin(), mag.begin() + threshold, mag.end() ); + const float percentile = mag[threshold]; + size_t i = 0, j = 0; + + while ( i < corr.size() ) + { + if ( normL2Sqr( corr[i].first - corr[i].second ) <= percentile ) + { + corr[j] = corr[i]; + ++j; + } + ++i; + } + + corr.resize( j ); +} + } // namespace optflow void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node ) diff --git a/modules/optflow/test/test_OF_accuracy.cpp b/modules/optflow/test/test_OF_accuracy.cpp index 2125f3fd1..c47ff90fc 100644 --- a/modules/optflow/test/test_OF_accuracy.cpp +++ b/modules/optflow/test/test_OF_accuracy.cpp @@ -49,8 +49,16 @@ using namespace optflow; static string getDataDir() { return TS::ptr()->get_data_path(); } +static string getRubberWhaleFrame1() { return getDataDir() + "optflow/RubberWhale1.png"; } + +static string getRubberWhaleFrame2() { return getDataDir() + "optflow/RubberWhale2.png"; } + +static string getRubberWhaleGroundTruth() { return getDataDir() + "optflow/RubberWhale.flo"; } + static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); } +static bool isFlowCorrect(double u) { return !cvIsNaN(u) && (fabs(u) < 1e9); } + static float calcRMSE(Mat flow1, Mat flow2) { float sum = 0; @@ -80,11 +88,36 @@ static float calcRMSE(Mat flow1, Mat flow2) return (float)sqrt(sum / (1e-9 + counter)); } +static float calcAvgEPE(vector< pair > corr, Mat flow) +{ + double sum = 0; + int counter = 0; + + for (size_t i = 0; i < corr.size(); ++i) + { + Vec2f flow1_at_point = Point2f(corr[i].second - corr[i].first); + Vec2f flow2_at_point = flow.at(corr[i].first.y, corr[i].first.x); + + double u1 = (double)flow1_at_point[0]; + double v1 = (double)flow1_at_point[1]; + double u2 = (double)flow2_at_point[0]; + double v2 = (double)flow2_at_point[1]; + + if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) + { + sum += sqrt((u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2)); + counter++; + } + } + + return (float)(sum / counter); +} + bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT) { - const string frame1_path = getDataDir() + "optflow/RubberWhale1.png"; - const string frame2_path = getDataDir() + "optflow/RubberWhale2.png"; - const string gt_flow_path = getDataDir() + "optflow/RubberWhale.flo"; + const string frame1_path = getRubberWhaleFrame1(); + const string frame2_path = getRubberWhaleFrame2(); + const string gt_flow_path = getRubberWhaleGroundTruth(); dst_frame_1 = imread(frame1_path); dst_frame_2 = imread(frame2_path); @@ -188,3 +221,65 @@ TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy) ASSERT_EQ(GT.cols, flow.cols); EXPECT_LE(calcRMSE(GT, flow), target_RMSE); } + +TEST(DenseOpticalFlow_PCAFlow, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + const float target_RMSE = 0.55f; + + Mat flow; + Ptr algo = createOptFlow_PCAFlow(); + algo->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE); +} + +TEST(DenseOpticalFlow_GlobalPatchColliderDCT, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + + const Size sz = frame1.size() / 2; + frame1 = frame1(Rect(0, 0, sz.width, sz.height)); + frame2 = frame2(Rect(0, 0, sz.width, sz.height)); + GT = GT(Rect(0, 0, sz.width, sz.height)); + + vector img1, img2, gt; + vector< pair > corr; + img1.push_back(frame1); + img2.push_back(frame2); + gt.push_back(GT); + + Ptr< GPCForest<5> > forest = GPCForest<5>::create(); + forest->train(img1, img2, gt, GPCTrainingParams(8, 3, GPC_DESCRIPTOR_DCT, false)); + forest->findCorrespondences(frame1, frame2, corr); + + ASSERT_LE(7500U, corr.size()); + ASSERT_LE(calcAvgEPE(corr, GT), 0.5f); +} + +TEST(DenseOpticalFlow_GlobalPatchColliderWHT, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + + const Size sz = frame1.size() / 2; + frame1 = frame1(Rect(0, 0, sz.width, sz.height)); + frame2 = frame2(Rect(0, 0, sz.width, sz.height)); + GT = GT(Rect(0, 0, sz.width, sz.height)); + + vector img1, img2, gt; + vector< pair > corr; + img1.push_back(frame1); + img2.push_back(frame2); + gt.push_back(GT); + + Ptr< GPCForest<5> > forest = GPCForest<5>::create(); + forest->train(img1, img2, gt, GPCTrainingParams(8, 3, GPC_DESCRIPTOR_WHT, false)); + forest->findCorrespondences(frame1, frame2, corr); + + ASSERT_LE(7000U, corr.size()); + ASSERT_LE(calcAvgEPE(corr, GT), 0.5f); +}