From 54e746bebda7cdf3cb78f3e29aa111f0d54f1b41 Mon Sep 17 00:00:00 2001 From: Vladislav Samsonov Date: Sun, 12 Jun 2016 23:04:38 +0300 Subject: [PATCH] Added OpenCL support --- .../include/opencv2/optflow/pcaflow.hpp | 4 +- .../samples/optical_flow_evaluation.cpp | 8 +- modules/optflow/src/pcaflow.cpp | 132 +++++++++++++----- 3 files changed, 109 insertions(+), 35 deletions(-) diff --git a/modules/optflow/include/opencv2/optflow/pcaflow.hpp b/modules/optflow/include/opencv2/optflow/pcaflow.hpp index 2d9495489..80e166521 100644 --- a/modules/optflow/include/opencv2/optflow/pcaflow.hpp +++ b/modules/optflow/include/opencv2/optflow/pcaflow.hpp @@ -88,10 +88,10 @@ public: void collectGarbage(); private: - void findSparseFeatures( Mat &from, Mat &to, std::vector &features, + void findSparseFeatures( UMat &from, UMat &to, std::vector &features, std::vector &predictedFeatures ) const; - void removeOcclusions( Mat &from, Mat &to, std::vector &features, + void removeOcclusions( UMat &from, UMat &to, std::vector &features, std::vector &predictedFeatures ) const; void getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector &features, diff --git a/modules/optflow/samples/optical_flow_evaluation.cpp b/modules/optflow/samples/optical_flow_evaluation.cpp index 675510434..8ec384368 100644 --- a/modules/optflow/samples/optical_flow_evaluation.cpp +++ b/modules/optflow/samples/optical_flow_evaluation.cpp @@ -1,6 +1,7 @@ #include "opencv2/highgui.hpp" #include "opencv2/video.hpp" #include "opencv2/optflow.hpp" +#include "opencv2/core/ocl.hpp" #include #include @@ -15,7 +16,8 @@ const String keys = "{help h usage ? | | print this message }" "{@groundtruth | | path to the .flo file (optional), Middlebury format }" "{m measure |endpoint| error measure - [endpoint or angular] }" "{r region |all | region to compute stats about [all, discontinuities, untextured] }" - "{d display | | display additional info images (pauses program execution) }"; + "{d display | | display additional info images (pauses program execution) }" + "{g gpu | | use OpenCL}"; inline bool isFlowCorrect( const Point2f u ) { @@ -200,6 +202,7 @@ int main( int argc, char** argv ) String error_measure = parser.get("measure"); String region = parser.get("region"); bool display_images = parser.has("display"); + const bool useGpu = parser.has("gpu"); if ( !parser.check() ) { @@ -207,6 +210,9 @@ int main( int argc, char** argv ) return 0; } + cv::ocl::setUseOpenCL(useGpu); + printf("OpenCL Enabled: %u\n", useGpu && cv::ocl::haveOpenCL()); + Mat i1, i2; Mat_ flow, ground_truth; Mat computed_errors; diff --git a/modules/optflow/src/pcaflow.cpp b/modules/optflow/src/pcaflow.cpp index 64aa39c6f..db7639cd2 100644 --- a/modules/optflow/src/pcaflow.cpp +++ b/modules/optflow/src/pcaflow.cpp @@ -140,13 +140,15 @@ static void solveLSQR( const Mat &A, const Mat &b, OutputArray xOut, const doubl for ( unsigned itn = 0; itn < iter_lim; ++itn ) { - u = A * v - alfa * u; + u *= -alfa; + u += A * v; beta = cv::norm( u, NORM_L2 ); if ( beta > 0 ) { u *= 1 / beta; - v = AT * u - beta * v; + v *= -beta; + v += AT * u; alfa = cv::norm( v, NORM_L2 ); if ( alfa > 0 ) v = ( 1 / alfa ) * v; @@ -173,7 +175,7 @@ static void solveLSQR( const Mat &A, const Mat &b, OutputArray xOut, const doubl } } -void OpticalFlowPCAFlow::findSparseFeatures( Mat &from, Mat &to, std::vector &features, +void OpticalFlowPCAFlow::findSparseFeatures( UMat &from, UMat &to, std::vector &features, std::vector &predictedFeatures ) const { Size size = from.size(); @@ -207,7 +209,7 @@ void OpticalFlowPCAFlow::findSparseFeatures( Mat &from, Mat &to, std::vector &features, +void OpticalFlowPCAFlow::removeOcclusions( UMat &from, UMat &to, std::vector &features, std::vector &predictedFeatures ) const { std::vector predictedStatus; @@ -234,6 +236,27 @@ void OpticalFlowPCAFlow::removeOcclusions( Mat &from, Mat &to, std::vector= fs || n1 >= bsw || n2 >= bsh) return;" + "__global const float2* f = features + (fstep * i + foff);" + "__global float* a = A + (Astep * i + Aoff + (n1 * bsh + n2) * 4);" + "const float2 p = f[0];" + "a[0] = cos((n1 * M_PI / sw) * (p.x + 0.5)) * cos((n2 * M_PI / sh) * (p.y + 0.5));" + "}" ); + void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputArray b2Out, const std::vector &features, const std::vector &predictedFeatures, const Size size ) @@ -241,20 +264,40 @@ void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputA AOut.create( features.size(), basisSize.area(), CV_32F ); b1Out.create( features.size(), 1, CV_32F ); b2Out.create( features.size(), 1, CV_32F ); - Mat A = AOut.getMat(); - Mat b1 = b1Out.getMat(); - Mat b2 = b2Out.getMat(); - for ( size_t i = 0; i < features.size(); ++i ) + if ( ocl::useOpenCL() ) { - const Point2f &p = features[i]; - float *row = A.ptr( i ); - for ( int n1 = 0; n1 < basisSize.width; ++n1 ) - for ( int n2 = 0; n2 < basisSize.height; ++n2 ) - row[n1 * basisSize.height + n2] = - cosf( ( n1 * M_PI / size.width ) * ( p.x + 0.5 ) ) * cosf( ( n2 * M_PI / size.height ) * ( p.y + 0.5 ) ); - const Point2f flow = predictedFeatures[i] - features[i]; - b1.at( i ) = flow.x; - b2.at( i ) = flow.y; + UMat A = AOut.getUMat(); + Mat b1 = b1Out.getMat(); + Mat b2 = b2Out.getMat(); + + ocl::Kernel kernel( "fillDCTSampledPoints", _ocl_fillDCTSampledPointsSource ); + size_t globSize[] = {features.size(), basisSize.width, basisSize.height}; + kernel + .args( cv::ocl::KernelArg::ReadOnlyNoSize( Mat( features ).getUMat( ACCESS_READ ) ), + cv::ocl::KernelArg::WriteOnlyNoSize( A ), (int)features.size(), (int)basisSize.width, + (int)basisSize.height, (int)size.width, (int)size.height ) + .run( 3, globSize, 0, true ); + + for ( size_t i = 0; i < features.size(); ++i ) + { + const Point2f flow = predictedFeatures[i] - features[i]; + b1.at( i ) = flow.x; + b2.at( i ) = flow.y; + } + } + else + { + Mat A = AOut.getMat(); + Mat b1 = b1Out.getMat(); + Mat b2 = b2Out.getMat(); + + for ( size_t i = 0; i < features.size(); ++i ) + { + _cpu_fillDCTSampledPoints( A.ptr( i ), features[i], basisSize, size ); + const Point2f flow = predictedFeatures[i] - features[i]; + b1.at( i ) = flow.x; + b2.at( i ) = flow.y; + } } } @@ -268,29 +311,54 @@ void OpticalFlowPCAFlow::getSystem( OutputArray A1Out, OutputArray A2Out, Output A2Out.create( features.size() + prior->getPadding(), basisSize.area(), CV_32F ); b1Out.create( features.size() + prior->getPadding(), 1, CV_32F ); b2Out.create( features.size() + prior->getPadding(), 1, CV_32F ); + + if ( ocl::useOpenCL() ) + { + UMat A = A1Out.getUMat(); + Mat b1 = b1Out.getMat(); + Mat b2 = b2Out.getMat(); + + ocl::Kernel kernel( "fillDCTSampledPoints", _ocl_fillDCTSampledPointsSource ); + size_t globSize[] = {features.size(), basisSize.width, basisSize.height}; + kernel + .args( cv::ocl::KernelArg::ReadOnlyNoSize( Mat( features ).getUMat( ACCESS_READ ) ), + cv::ocl::KernelArg::WriteOnlyNoSize( A ), (int)features.size(), (int)basisSize.width, + (int)basisSize.height, (int)size.width, (int)size.height ) + .run( 3, globSize, 0, true ); + + for ( size_t i = 0; i < features.size(); ++i ) + { + const Point2f flow = predictedFeatures[i] - features[i]; + b1.at( i ) = flow.x; + b2.at( i ) = flow.y; + } + } + else + { + Mat A1 = A1Out.getMat(); + Mat b1 = b1Out.getMat(); + Mat b2 = b2Out.getMat(); + + for ( size_t i = 0; i < features.size(); ++i ) + { + _cpu_fillDCTSampledPoints( A1.ptr( i ), features[i], basisSize, size ); + const Point2f flow = predictedFeatures[i] - features[i]; + b1.at( i ) = flow.x; + b2.at( i ) = flow.y; + } + } + Mat A1 = A1Out.getMat(); Mat A2 = A2Out.getMat(); Mat b1 = b1Out.getMat(); Mat b2 = b2Out.getMat(); - for ( size_t i = 0; i < features.size(); ++i ) - { - const Point2f &p = features[i]; - float *row = A1.ptr( i ); - for ( int n1 = 0; n1 < basisSize.width; ++n1 ) - for ( int n2 = 0; n2 < basisSize.height; ++n2 ) - row[n1 * basisSize.height + n2] = - cosf( ( n1 * M_PI / size.width ) * ( p.x + 0.5 ) ) * cosf( ( n2 * M_PI / size.height ) * ( p.y + 0.5 ) ); - const Point2f flow = predictedFeatures[i] - features[i]; - b1.at( i ) = flow.x; - b2.at( i ) = flow.y; - } memcpy( A2.ptr(), A1.ptr(), features.size() * basisSize.area() * sizeof( float ) ); prior->fillConstraints( A1.ptr( features.size(), 0 ), A2.ptr( features.size(), 0 ), b1.ptr( features.size(), 0 ), b2.ptr( features.size(), 0 ) ); } -static void applyCLAHE( Mat &img ) +static void applyCLAHE( UMat &img ) { Ptr clahe = createCLAHE(); clahe->setClipLimit( 14 ); @@ -334,7 +402,7 @@ void OpticalFlowPCAFlow::calc( InputArray I0, InputArray I1, InputOutputArray fl const Size size = I0.size(); CV_Assert( size == I1.size() ); - Mat from, to; + UMat from, to; if ( I0.channels() == 3 ) { cvtColor( I0, from, COLOR_BGR2GRAY ); @@ -357,7 +425,7 @@ void OpticalFlowPCAFlow::calc( InputArray I0, InputArray I1, InputOutputArray fl CV_Assert( from.channels() == 1 ); CV_Assert( to.channels() == 1 ); - const Mat fromOrig = from.clone(); + const Mat fromOrig = from.getMat( ACCESS_READ ).clone(); applyCLAHE( from ); applyCLAHE( to );