From 1498d2f42792104050d7705b75a14d21afb28c2c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 13 Feb 2013 15:42:58 +0400 Subject: [PATCH 01/12] added dual tvl1 optical flow implementation --- .../motion_analysis_and_object_tracking.rst | 70 ++ .../video/include/opencv2/video/tracking.hpp | 99 ++ modules/video/perf/perf_tvl1optflow.cpp | 33 + modules/video/src/tvl1flow.cpp | 865 ++++++++++++++++++ modules/video/test/test_tvl1optflow.cpp | 171 ++++ samples/cpp/tvl1_optical_flow.cpp | 193 ++++ 6 files changed, 1431 insertions(+) create mode 100644 modules/video/perf/perf_tvl1optflow.cpp create mode 100644 modules/video/src/tvl1flow.cpp create mode 100644 modules/video/test/test_tvl1optflow.cpp create mode 100644 samples/cpp/tvl1_optical_flow.cpp diff --git a/modules/video/doc/motion_analysis_and_object_tracking.rst b/modules/video/doc/motion_analysis_and_object_tracking.rst index 3a5d2bdd41..2372373947 100644 --- a/modules/video/doc/motion_analysis_and_object_tracking.rst +++ b/modules/video/doc/motion_analysis_and_object_tracking.rst @@ -641,6 +641,72 @@ Calculate an optical flow using "SimpleFlow" algorithm. See [Tao2012]_. And site of project - http://graphics.berkeley.edu/papers/Tao-SAN-2012-05/. + + +OpticalFlowDual_TVL1 +-------------------- +"Dual TV L1" Optical Flow Algorithm. + +.. ocv:class:: OpticalFlowDual_TVL12 + + +The class implements the "Dual TV L1" optical flow algorithm described in [Zach2007]_ and [Javier2012]_ . + +Here are important members of the class that control the algorithm, which you can set after constructing the class instance: + + .. ocv:member:: double tau + + Time step of the numerical scheme. + + .. ocv:member:: double lambda + + Weight parameter for the data term, attachment parameter. This is the most relevant parameter, which determines the smoothness of the output. The smaller this parameter is, the smoother the solutions we obtain. It depends on the range of motions of the images, so its value should be adapted to each image sequence. + + .. ocv:member:: double theta + + Weight parameter for (u - v)^2, tightness parameter. It serves as a link between the attachment and the regularization terms. In theory, it should have a small value in order to maintain both parts in correspondence. The method is stable for a large range of values of this parameter. + + .. ocv:member:: int nscales + + Number of scales used to create the pyramid of images. + + .. ocv:member:: int warps + + Number of warpings per scale. Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the method. It also affects the running time, so it is a compromise between speed and accuracy. + + .. ocv:member:: double epsilon + + Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. A small value will yield more accurate solutions at the expense of a slower convergence. + + .. ocv:member:: int iterations + + Stopping criterion iterations number used in the numerical scheme. + + + + +OpticalFlowDual_TVL1::operator() +-------------------------------- +Calculates an optical flow. + +.. ocv:function:: void OpticalFlowDual_TVL1::operator ()(InputArray I0, InputArray I1, InputOutputArray flow) + + :param prev: first 8-bit single-channel input image. + + :param next: second input image of the same size and the same type as ``prev`` . + + :param flow: computed flow image that has the same size as ``prev`` and type ``CV_32FC2`` . + + + +OpticalFlowDual_TVL1::collectGarbage +------------------------------------ +Releases all inner buffers. + +.. ocv:function:: void OpticalFlowDual_TVL1::collectGarbage() + + + .. [Bouguet00] Jean-Yves Bouguet. Pyramidal Implementation of the Lucas Kanade Feature Tracker. .. [Bradski98] Bradski, G.R. "Computer Vision Face Tracking for Use in a Perceptual User Interface", Intel, 1998 @@ -658,3 +724,7 @@ See [Tao2012]_. And site of project - http://graphics.berkeley.edu/papers/Tao-SA .. [Welch95] Greg Welch and Gary Bishop “An Introduction to the Kalman Filter”, 1995 .. [Tao2012] Michael Tao, Jiamin Bai, Pushmeet Kohli and Sylvain Paris. SimpleFlow: A Non-iterative, Sublinear Optical Flow Algorithm. Computer Graphics Forum (Eurographics 2012) + +.. [Zach2007] C. Zach, T. Pock and H. Bischof. "A Duality Based Approach for Realtime TV-L1 Optical Flow", In Proceedings of Pattern Recognition (DAGM), Heidelberg, Germany, pp. 214-223, 2007 + +.. [Javier2012] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp index b936e12e2f..deef20b42a 100644 --- a/modules/video/include/opencv2/video/tracking.hpp +++ b/modules/video/include/opencv2/video/tracking.hpp @@ -352,6 +352,105 @@ CV_EXPORTS_W void calcOpticalFlowSF(Mat& from, double upscale_sigma_color, double speed_up_thr); +// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method +// +// see reference: +// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". +// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". +class CV_EXPORTS OpticalFlowDual_TVL1 +{ +public: + OpticalFlowDual_TVL1(); + + void operator ()(InputArray I0, InputArray I1, InputOutputArray flow); + + void collectGarbage(); + + /** + * Time step of the numerical scheme. + */ + double tau; + + /** + * Weight parameter for the data term, attachment parameter. + * This is the most relevant parameter, which determines the smoothness of the output. + * The smaller this parameter is, the smoother the solutions we obtain. + * It depends on the range of motions of the images, so its value should be adapted to each image sequence. + */ + double lambda; + + /** + * Weight parameter for (u - v)^2, tightness parameter. + * It serves as a link between the attachment and the regularization terms. + * In theory, it should have a small value in order to maintain both parts in correspondence. + * The method is stable for a large range of values of this parameter. + */ + double theta; + + /** + * Number of scales used to create the pyramid of images. + */ + int nscales; + + /** + * Number of warpings per scale. + * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. + * This is a parameter that assures the stability of the method. + * It also affects the running time, so it is a compromise between speed and accuracy. + */ + int warps; + + /** + * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. + * A small value will yield more accurate solutions at the expense of a slower convergence. + */ + double epsilon; + + /** + * Stopping criterion iterations number used in the numerical scheme. + */ + int iterations; + + bool useInitialFlow; + +private: + void procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2); + + std::vector > I0s; + std::vector > I1s; + std::vector > u1s; + std::vector > u2s; + + Mat_ I1x_buf; + Mat_ I1y_buf; + + Mat_ flowMap1_buf; + Mat_ flowMap2_buf; + + Mat_ I1w_buf; + Mat_ I1wx_buf; + Mat_ I1wy_buf; + + Mat_ grad_buf; + Mat_ rho_c_buf; + + Mat_ v1_buf; + Mat_ v2_buf; + + Mat_ p11_buf; + Mat_ p12_buf; + Mat_ p21_buf; + Mat_ p22_buf; + + Mat_ div_p1_buf; + Mat_ div_p2_buf; + + Mat_ u1x_buf; + Mat_ u1y_buf; + Mat_ u2x_buf; + Mat_ u2y_buf; +}; + } #endif diff --git a/modules/video/perf/perf_tvl1optflow.cpp b/modules/video/perf/perf_tvl1optflow.cpp new file mode 100644 index 0000000000..2014130a85 --- /dev/null +++ b/modules/video/perf/perf_tvl1optflow.cpp @@ -0,0 +1,33 @@ +#include "perf_precomp.hpp" + +using namespace std; +using namespace cv; +using namespace perf; + +typedef TestBaseWithParam< pair > ImagePair; + +pair impair(const char* im1, const char* im2) +{ + return make_pair(string(im1), string(im2)); +} + +PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png"))) +{ + declare.time(40); + + Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE); + Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + ASSERT_FALSE(frame2.empty()); + + Mat flow; + + OpticalFlowDual_TVL1 tvl1; + + TEST_CYCLE() + { + tvl1(frame1, frame2, flow); + } + + SANITY_CHECK(flow, 0.5); +} diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp new file mode 100644 index 0000000000..2f8034adaa --- /dev/null +++ b/modules/video/src/tvl1flow.cpp @@ -0,0 +1,865 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* +// +// This implementation is based on Javier Sánchez Pérez implementation. +// Original BSD license: +// +// Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +*/ + +#include "precomp.hpp" + +using namespace std; +using namespace cv; + +cv::OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() +{ + tau = 0.25; + lambda = 0.15; + theta = 0.3; + nscales = 5; + warps = 5; + epsilon = 0.01; + iterations = 300; + useInitialFlow = false; +} + +void cv::OpticalFlowDual_TVL1::operator ()(InputArray _I0, InputArray _I1, InputOutputArray _flow) +{ + Mat I0 = _I0.getMat(); + Mat I1 = _I1.getMat(); + + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) ); + CV_Assert( nscales > 0 ); + + // allocate memory for the pyramid structure + I0s.resize(nscales); + I1s.resize(nscales); + u1s.resize(nscales); + u2s.resize(nscales); + + I0.convertTo(I0s[0], I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(I1s[0], I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); + + if (useInitialFlow) + { + u1s[0].create(I0.size()); + u2s[0].create(I0.size()); + + Mat_ mv[] = {u1s[0], u2s[0]}; + + split(_flow.getMat(), mv); + } + + I1x_buf.create(I0.size()); + I1y_buf.create(I0.size()); + + flowMap1_buf.create(I0.size()); + flowMap2_buf.create(I0.size()); + + I1w_buf.create(I0.size()); + I1wx_buf.create(I0.size()); + I1wy_buf.create(I0.size()); + + grad_buf.create(I0.size()); + rho_c_buf.create(I0.size()); + + v1_buf.create(I0.size()); + v2_buf.create(I0.size()); + + p11_buf.create(I0.size()); + p12_buf.create(I0.size()); + p21_buf.create(I0.size()); + p22_buf.create(I0.size()); + + div_p1_buf.create(I0.size()); + div_p2_buf.create(I0.size()); + + u1x_buf.create(I0.size()); + u1y_buf.create(I0.size()); + u2x_buf.create(I0.size()); + u2y_buf.create(I0.size()); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + pyrDown(I0s[s - 1], I0s[s]); + pyrDown(I1s[s - 1], I1s[s]); + + if (I0s[s].cols < 16 || I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + pyrDown(u1s[s - 1], u1s[s]); + pyrDown(u2s[s - 1], u2s[s]); + + multiply(u1s[s], Scalar::all(0.5), u1s[s]); + multiply(u2s[s], Scalar::all(0.5), u2s[s]); + } + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); + + // if this was the last scale, finish now + if (s == 0) + break; + + // otherwise, upsample the optical flow + + // zoom the optical flow for the next finer scale + resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); + resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); + + // scale the optical flow with the appropriate zoom factor + multiply(u1s[s - 1], Scalar::all(2), u1s[s - 1]); + multiply(u2s[s - 1], Scalar::all(2), u2s[s - 1]); + } + + Mat uxy[] = {u1s[0], u2s[0]}; + merge(uxy, 2, _flow); +} + +namespace +{ + //////////////////////////////////////////////////////////// + // buildFlowMap + + struct BuildFlowMapBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ u1; + Mat_ u2; + mutable Mat_ map1; + mutable Mat_ map2; + }; + + void BuildFlowMapBody::operator() (const Range& range) const + { + for (int y = range.start; y < range.end; ++y) + { + const float* u1Row = u1[y]; + const float* u2Row = u2[y]; + + float* map1Row = map1[y]; + float* map2Row = map2[y]; + + for (int x = 0; x < u1.cols; ++x) + { + map1Row[x] = x + u1Row[x]; + map2Row[x] = y + u2Row[x]; + } + } + } + + void buildFlowMap(const Mat_& u1, const Mat_& u2, Mat_& map1, Mat_& map2) + { + CV_DbgAssert( u2.size() == u1.size() ); + CV_DbgAssert( map1.size() == u1.size() ); + CV_DbgAssert( map2.size() == u1.size() ); + + BuildFlowMapBody body; + + body.u1 = u1; + body.u2 = u2; + body.map1 = map1; + body.map2 = map2; + + parallel_for_(Range(0, u1.rows), body); + } + + //////////////////////////////////////////////////////////// + // centeredGradient + + struct CenteredGradientBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ src; + mutable Mat_ dx; + mutable Mat_ dy; + }; + + void CenteredGradientBody::operator() (const Range& range) const + { + const int last_col = src.cols - 1; + + for (int y = range.start; y < range.end; ++y) + { + const float* srcPrevRow = src[y - 1]; + const float* srcCurRow = src[y]; + const float* srcNextRow = src[y + 1]; + + float* dxRow = dx[y]; + float* dyRow = dy[y]; + + for (int x = 1; x < last_col; ++x) + { + dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]); + dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]); + } + } + } + + void centeredGradient(const Mat_& src, Mat_& dx, Mat_& dy) + { + CV_DbgAssert( src.rows > 2 && src.cols > 2 ); + CV_DbgAssert( dx.size() == src.size() ); + CV_DbgAssert( dy.size() == src.size() ); + + const int last_row = src.rows - 1; + const int last_col = src.cols - 1; + + // compute the gradient on the center body of the image + { + CenteredGradientBody body; + + body.src = src; + body.dx = dx; + body.dy = dy; + + parallel_for_(Range(1, last_row), body); + } + + // compute the gradient on the first and last rows + for (int x = 1; x < last_col; ++x) + { + dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1)); + dy(0, x) = 0.5f * (src(1, x) - src(0, x)); + + dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1)); + dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x)); + } + + // compute the gradient on the first and last columns + for (int y = 1; y < last_row; ++y) + { + dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0)); + dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0)); + + dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1)); + dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col)); + } + + // compute the gradient at the four corners + dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0)); + dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0)); + + dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1)); + dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col)); + + dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0)); + dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0)); + + dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1)); + dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col)); + } + + //////////////////////////////////////////////////////////// + // forwardGradient + + struct ForwardGradientBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ src; + mutable Mat_ dx; + mutable Mat_ dy; + }; + + void ForwardGradientBody::operator() (const Range& range) const + { + const int last_col = src.cols - 1; + + for (int y = range.start; y < range.end; ++y) + { + const float* srcCurRow = src[y]; + const float* srcNextRow = src[y + 1]; + + float* dxRow = dx[y]; + float* dyRow = dy[y]; + + for (int x = 0; x < last_col; ++x) + { + dxRow[x] = srcCurRow[x + 1] - srcCurRow[x]; + dyRow[x] = srcNextRow[x] - srcCurRow[x]; + } + } + } + + void forwardGradient(const Mat_& src, Mat_& dx, Mat_& dy) + { + CV_DbgAssert( src.rows > 2 && src.cols > 2 ); + CV_DbgAssert( dx.size() == src.size() ); + CV_DbgAssert( dy.size() == src.size() ); + + const int last_row = src.rows - 1; + const int last_col = src.cols - 1; + + // compute the gradient on the central body of the image + { + ForwardGradientBody body; + + body.src = src; + body.dx = dx; + body.dy = dy; + + parallel_for_(Range(0, last_row), body); + } + + // compute the gradient on the last row + for (int x = 0; x < last_col; ++x) + { + dx(last_row, x) = src(last_row, x + 1) - src(last_row, x); + dy(last_row, x) = 0.0f; + } + + // compute the gradient on the last column + for (int y = 0; y < last_row; ++y) + { + dx(y, last_col) = 0.0f; + dy(y, last_col) = src(y + 1, last_col) - src(y, last_col); + } + + dx(last_row, last_col) = 0.0f; + dy(last_row, last_col) = 0.0f; + } + + //////////////////////////////////////////////////////////// + // divergence + + struct DivergenceBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ v1; + Mat_ v2; + mutable Mat_ div; + }; + + void DivergenceBody::operator() (const Range& range) const + { + for (int y = range.start; y < range.end; ++y) + { + const float* v1Row = v1[y]; + const float* v2PrevRow = v2[y - 1]; + const float* v2CurRow = v2[y]; + + float* divRow = div[y]; + + for(int x = 1; x < v1.cols; ++x) + { + const float v1x = v1Row[x] - v1Row[x - 1]; + const float v2y = v2CurRow[x] - v2PrevRow[x]; + + divRow[x] = v1x + v2y; + } + } + } + + void divergence(const Mat_& v1, const Mat_& v2, Mat_& div) + { + CV_DbgAssert( v1.rows > 2 && v1.cols > 2 ); + CV_DbgAssert( v2.size() == v1.size() ); + CV_DbgAssert( div.size() == v1.size() ); + + { + DivergenceBody body; + + body.v1 = v1; + body.v2 = v2; + body.div = div; + + parallel_for_(Range(1, v1.rows), body); + } + + // compute the divergence on the first row + for(int x = 1; x < v1.cols; ++x) + div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x); + + // compute the divergence on the first column + for (int y = 1; y < v1.rows; ++y) + div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0); + + div(0, 0) = v1(0, 0) + v2(0, 0); + } + + //////////////////////////////////////////////////////////// + // calcGradRho + + struct CalcGradRhoBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ I0; + Mat_ I1w; + Mat_ I1wx; + Mat_ I1wy; + Mat_ u1; + Mat_ u2; + mutable Mat_ grad; + mutable Mat_ rho_c; + }; + + void CalcGradRhoBody::operator() (const Range& range) const + { + for (int y = range.start; y < range.end; ++y) + { + const float* I0Row = I0[y]; + const float* I1wRow = I1w[y]; + const float* I1wxRow = I1wx[y]; + const float* I1wyRow = I1wy[y]; + const float* u1Row = u1[y]; + const float* u2Row = u2[y]; + + float* gradRow = grad[y]; + float* rhoRow = rho_c[y]; + + for (int x = 0; x < I0.cols; ++x) + { + const float Ix2 = I1wxRow[x] * I1wxRow[x]; + const float Iy2 = I1wyRow[x] * I1wyRow[x]; + + // store the |Grad(I1)|^2 + gradRow[x] = Ix2 + Iy2; + + // compute the constant part of the rho function + rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]); + } + } + } + + void calcGradRho(const Mat_& I0, const Mat_& I1w, const Mat_& I1wx, const Mat_& I1wy, const Mat_& u1, const Mat_& u2, + Mat_& grad, Mat_& rho_c) + { + CV_DbgAssert( I1w.size() == I0.size() ); + CV_DbgAssert( I1wx.size() == I0.size() ); + CV_DbgAssert( I1wy.size() == I0.size() ); + CV_DbgAssert( u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == I0.size() ); + CV_DbgAssert( grad.size() == I0.size() ); + CV_DbgAssert( rho_c.size() == I0.size() ); + + CalcGradRhoBody body; + + body.I0 = I0; + body.I1w = I1w; + body.I1wx = I1wx; + body.I1wy = I1wy; + body.u1 = u1; + body.u2 = u2; + body.grad = grad; + body.rho_c = rho_c; + + parallel_for_(Range(0, I0.rows), body); + } + + //////////////////////////////////////////////////////////// + // estimateV + + struct EstimateVBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ I1wx; + Mat_ I1wy; + Mat_ u1; + Mat_ u2; + Mat_ grad; + Mat_ rho_c; + mutable Mat_ v1; + mutable Mat_ v2; + float l_t; + }; + + void EstimateVBody::operator() (const Range& range) const + { + for (int y = range.start; y < range.end; ++y) + { + const float* I1wxRow = I1wx[y]; + const float* I1wyRow = I1wy[y]; + const float* u1Row = u1[y]; + const float* u2Row = u2[y]; + const float* gradRow = grad[y]; + const float* rhoRow = rho_c[y]; + + float* v1Row = v1[y]; + float* v2Row = v2[y]; + + for (int x = 0; x < I1wx.cols; ++x) + { + const float rho = rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]); + + float d1 = 0.0f; + float d2 = 0.0f; + + if (rho < -l_t * gradRow[x]) + { + d1 = l_t * I1wxRow[x]; + d2 = l_t * I1wyRow[x]; + } + else if (rho > l_t * gradRow[x]) + { + d1 = -l_t * I1wxRow[x]; + d2 = -l_t * I1wyRow[x]; + } + else if (gradRow[x] > numeric_limits::epsilon()) + { + float fi = -rho / gradRow[x]; + d1 = fi * I1wxRow[x]; + d2 = fi * I1wyRow[x]; + } + + v1Row[x] = u1Row[x] + d1; + v2Row[x] = u2Row[x] + d2; + } + } + } + + void estimateV(const Mat_& I1wx, const Mat_& I1wy, const Mat_& u1, const Mat_& u2, const Mat_& grad, const Mat_& rho_c, + Mat_& v1, Mat_& v2, float l_t) + { + CV_DbgAssert( I1wy.size() == I1wx.size() ); + CV_DbgAssert( u1.size() == I1wx.size() ); + CV_DbgAssert( u2.size() == I1wx.size() ); + CV_DbgAssert( grad.size() == I1wx.size() ); + CV_DbgAssert( rho_c.size() == I1wx.size() ); + CV_DbgAssert( v1.size() == I1wx.size() ); + CV_DbgAssert( v2.size() == I1wx.size() ); + + EstimateVBody body; + + body.I1wx = I1wx; + body.I1wy = I1wy; + body.u1 = u1; + body.u2 = u2; + body.grad = grad; + body.rho_c = rho_c; + body.v1 = v1; + body.v2 = v2; + body.l_t = l_t; + + parallel_for_(Range(0, I1wx.rows), body); + } + + //////////////////////////////////////////////////////////// + // estimateU + + float estimateU(const Mat_& v1, const Mat_& v2, const Mat_& div_p1, const Mat_& div_p2, Mat_& u1, Mat_& u2, float theta) + { + CV_DbgAssert( v2.size() == v1.size() ); + CV_DbgAssert( div_p1.size() == v1.size() ); + CV_DbgAssert( div_p2.size() == v1.size() ); + CV_DbgAssert( u1.size() == v1.size() ); + CV_DbgAssert( u2.size() == v1.size() ); + + float error = 0.0f; + for (int y = 0; y < v1.rows; ++y) + { + const float* v1Row = v1[y]; + const float* v2Row = v2[y]; + const float* divP1Row = div_p1[y]; + const float* divP2Row = div_p2[y]; + + float* u1Row = u1[y]; + float* u2Row = u2[y]; + + for (int x = 0; x < v1.cols; ++x) + { + const float u1k = u1Row[x]; + const float u2k = u2Row[x]; + + u1Row[x] = v1Row[x] + theta * divP1Row[x]; + u2Row[x] = v2Row[x] + theta * divP2Row[x]; + + error += (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k); + } + } + + return error; + } + + //////////////////////////////////////////////////////////// + // estimateDualVariables + + struct EstimateDualVariablesBody : ParallelLoopBody + { + void operator() (const Range& range) const; + + Mat_ u1x; + Mat_ u1y; + Mat_ u2x; + Mat_ u2y; + mutable Mat_ p11; + mutable Mat_ p12; + mutable Mat_ p21; + mutable Mat_ p22; + float taut; + }; + + void EstimateDualVariablesBody::operator() (const Range& range) const + { + for (int y = range.start; y < range.end; ++y) + { + const float* u1xRow = u1x[y]; + const float* u1yRow = u1y[y]; + const float* u2xRow = u2x[y]; + const float* u2yRow = u2y[y]; + + float* p11Row = p11[y]; + float* p12Row = p12[y]; + float* p21Row = p21[y]; + float* p22Row = p22[y]; + + for (int x = 0; x < u1x.cols; ++x) + { + const float g1 = static_cast(hypot(u1xRow[x], u1yRow[x])); + const float g2 = static_cast(hypot(u2xRow[x], u2yRow[x])); + + const float ng1 = 1.0f + taut * g1; + const float ng2 = 1.0f + taut * g2; + + p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1; + p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1; + p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2; + p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2; + } + } + } + + void estimateDualVariables(const Mat_& u1x, const Mat_& u1y, const Mat_& u2x, const Mat_& u2y, + Mat_& p11, Mat_& p12, Mat_& p21, Mat_& p22, float taut) + { + CV_DbgAssert( u1y.size() == u1x.size() ); + CV_DbgAssert( u2x.size() == u1x.size() ); + CV_DbgAssert( u2y.size() == u1x.size() ); + CV_DbgAssert( p11.size() == u1x.size() ); + CV_DbgAssert( p12.size() == u1x.size() ); + CV_DbgAssert( p21.size() == u1x.size() ); + CV_DbgAssert( p22.size() == u1x.size() ); + + EstimateDualVariablesBody body; + + body.u1x = u1x; + body.u1y = u1y; + body.u2x = u2x; + body.u2y = u2y; + body.p11 = p11; + body.p12 = p12; + body.p21 = p21; + body.p22 = p22; + body.taut = taut; + + parallel_for_(Range(0, u1x.rows), body); + } +} + +void cv::OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2) +{ + const float scaledEpsilon = static_cast(epsilon * epsilon * I0.size().area()); + + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); + + if (u1.empty()) + { + u1.create(I0.size()); + u1.setTo(Scalar::all(0)); + + u2.create(I0.size()); + u2.setTo(Scalar::all(0)); + } + + Mat_ I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + centeredGradient(I1, I1x, I1y); + + Mat_ flowMap1 = flowMap1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ flowMap2 = flowMap2_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ v1 = v1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ v2 = v2_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + Mat_ div_p1 = div_p1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ div_p2 = div_p2_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ u1x = u1x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u1y = u1y_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u2x = u2x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u2y = u2y_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + + for (int warpings = 0; warpings < warps; ++warpings) + { + // compute the warping of the target image and its derivatives + buildFlowMap(u1, u2, flowMap1, flowMap2); + remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC); + remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC); + remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC); + + calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c); + + float error = numeric_limits::max(); + for (int n = 0; error > scaledEpsilon && n < iterations; ++n) + { + // estimate the values of the variable (v1, v2) (thresholding operator TH) + estimateV(I1wx, I1wy, u1, u2, grad, rho_c, v1, v2, l_t); + + // compute the divergence of the dual variable (p1, p2) + divergence(p11, p12, div_p1); + divergence(p21, p22, div_p2); + + // estimate the values of the optical flow (u1, u2) + error = estimateU(v1, v2, div_p1, div_p2, u1, u2, static_cast(theta)); + + // compute the gradient of the optical flow (Du1, Du2) + forwardGradient(u1, u1x, u1y); + forwardGradient(u2, u2x, u2y); + + // estimate the values of the dual variable (p1, p2) + estimateDualVariables(u1x, u1y, u2x, u2y, p11, p12, p21, p22, taut); + } + } +} + +namespace +{ + template void releaseVector(vector& v) + { + vector empty; + empty.swap(v); + } +} + +void cv::OpticalFlowDual_TVL1::collectGarbage() +{ + releaseVector(I0s); + releaseVector(I1s); + releaseVector(u1s); + releaseVector(u2s); + + I1x_buf.release(); + I1y_buf.release(); + + flowMap1_buf.release(); + flowMap2_buf.release(); + + I1w_buf.release(); + I1wx_buf.release(); + I1wy_buf.release(); + + grad_buf.release(); + rho_c_buf.release(); + + v1_buf.release(); + v2_buf.release(); + + p11_buf.release(); + p12_buf.release(); + p21_buf.release(); + p22_buf.release(); + + div_p1_buf.release(); + div_p2_buf.release(); + + u1x_buf.release(); + u1y_buf.release(); + u2x_buf.release(); + u2y_buf.release(); +} diff --git a/modules/video/test/test_tvl1optflow.cpp b/modules/video/test/test_tvl1optflow.cpp new file mode 100644 index 0000000000..b5688d35ee --- /dev/null +++ b/modules/video/test/test_tvl1optflow.cpp @@ -0,0 +1,171 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include + +using namespace std; +using namespace cv; +using namespace cvtest; + +//#define DUMP + +namespace +{ + // first four bytes, should be the same in little endian + const float FLO_TAG_FLOAT = 202021.25f; // check for this when READING the file + const char FLO_TAG_STRING[] = "PIEH"; // use this when WRITING the file + + // binary file format for flow data specified here: + // http://vision.middlebury.edu/flow/data/ + void writeOpticalFlowToFile(const Mat_& flow, const string& fileName) + { + ofstream file(fileName.c_str(), ios_base::binary); + + file << FLO_TAG_STRING; + + file.write((const char*) &flow.cols, sizeof(int)); + file.write((const char*) &flow.rows, sizeof(int)); + + for (int i = 0; i < flow.rows; ++i) + { + for (int j = 0; j < flow.cols; ++j) + { + const Point2f u = flow(i, j); + + file.write((const char*) &u.x, sizeof(float)); + file.write((const char*) &u.y, sizeof(float)); + } + } + } + + // binary file format for flow data specified here: + // http://vision.middlebury.edu/flow/data/ + void readOpticalFlowFromFile(Mat_& flow, const string& fileName) + { + ifstream file(fileName.c_str(), ios_base::binary); + + float tag; + file.read((char*) &tag, sizeof(float)); + CV_Assert( tag == FLO_TAG_FLOAT ); + + Size size; + + file.read((char*) &size.width, sizeof(int)); + file.read((char*) &size.height, sizeof(int)); + + flow.create(size); + + for (int i = 0; i < flow.rows; ++i) + { + for (int j = 0; j < flow.cols; ++j) + { + Point2f u; + + file.read((char*) &u.x, sizeof(float)); + file.read((char*) &u.y, sizeof(float)); + + flow(i, j) = u; + } + } + } + + bool isFlowCorrect(Point2f u) + { + return !cvIsNaN(u.x) && !cvIsNaN(u.y) && (fabs(u.x) < 1e9) && (fabs(u.y) < 1e9); + } + + double calcRMSE(const Mat_& flow1, const Mat_& flow2) + { + double sum = 0.0; + int counter = 0; + + for (int i = 0; i < flow1.rows; ++i) + { + for (int j = 0; j < flow1.cols; ++j) + { + const Point2f u1 = flow1(i, j); + const Point2f u2 = flow2(i, j); + + if (isFlowCorrect(u1) && isFlowCorrect(u2)) + { + const Point2f diff = u1 - u2; + sum += diff.ddot(diff); + ++counter; + } + } + } + + return sqrt(sum / (1e-9 + counter)); + } +} + +TEST(Video_calcOpticalFlowDual_TVL1, Regression) +{ + const double MAX_RMSE = 0.02; + + const string frame1_path = TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"; + const string frame2_path = TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"; + const string gold_flow_path = TS::ptr()->get_data_path() + "optflow/tvl1_flow.flo"; + + Mat frame1 = imread(frame1_path, IMREAD_GRAYSCALE); + Mat frame2 = imread(frame2_path, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + ASSERT_FALSE(frame2.empty()); + + Mat_ flow; + OpticalFlowDual_TVL1 tvl1; + + tvl1(frame1, frame2, flow); + +#ifdef DUMP + writeOpticalFlowToFile(flow, gold_flow_path); +#else + Mat_ gold; + readOpticalFlowFromFile(gold, gold_flow_path); + + ASSERT_EQ(gold.rows, flow.rows); + ASSERT_EQ(gold.cols, flow.cols); + + const double err = calcRMSE(gold, flow); + EXPECT_LE(err, MAX_RMSE); +#endif +} diff --git a/samples/cpp/tvl1_optical_flow.cpp b/samples/cpp/tvl1_optical_flow.cpp new file mode 100644 index 0000000000..d9a57a2163 --- /dev/null +++ b/samples/cpp/tvl1_optical_flow.cpp @@ -0,0 +1,193 @@ +#include +#include + +#include "opencv2/video/tracking.hpp" +#include "opencv2/highgui/highgui.hpp" + +using namespace cv; +using namespace std; + +inline bool isFlowCorrect(Point2f u) +{ + return !cvIsNaN(u.x) && !cvIsNaN(u.y) && fabs(u.x) < 1e9 && fabs(u.y) < 1e9; +} + +static Vec3b computeColor(float fx, float fy) +{ + static bool first = true; + + // relative lengths of color transitions: + // these are chosen based on perceptual similarity + // (e.g. one can distinguish more shades between red and yellow + // than between yellow and green) + const int RY = 15; + const int YG = 6; + const int GC = 4; + const int CB = 11; + const int BM = 13; + const int MR = 6; + const int NCOLS = RY + YG + GC + CB + BM + MR; + static Vec3i colorWheel[NCOLS]; + + if (first) + { + int k = 0; + + for (int i = 0; i < RY; ++i, ++k) + colorWheel[k] = Vec3i(255, 255 * i / RY, 0); + + for (int i = 0; i < YG; ++i, ++k) + colorWheel[k] = Vec3i(255 - 255 * i / YG, 255, 0); + + for (int i = 0; i < GC; ++i, ++k) + colorWheel[k] = Vec3i(0, 255, 255 * i / GC); + + for (int i = 0; i < CB; ++i, ++k) + colorWheel[k] = Vec3i(0, 255 - 255 * i / CB, 255); + + for (int i = 0; i < BM; ++i, ++k) + colorWheel[k] = Vec3i(255 * i / BM, 0, 255); + + for (int i = 0; i < MR; ++i, ++k) + colorWheel[k] = Vec3i(255, 0, 255 - 255 * i / MR); + + first = false; + } + + const float rad = sqrt(fx * fx + fy * fy); + const float a = atan2(-fy, -fx) / (float)CV_PI; + + const float fk = (a + 1.0f) / 2.0f * (NCOLS - 1); + const int k0 = static_cast(fk); + const int k1 = (k0 + 1) % NCOLS; + const float f = fk - k0; + + Vec3b pix; + + for (int b = 0; b < 3; b++) + { + const float col0 = colorWheel[k0][b] / 255.f; + const float col1 = colorWheel[k1][b] / 255.f; + + float col = (1 - f) * col0 + f * col1; + + if (rad <= 1) + col = 1 - rad * (1 - col); // increase saturation with radius + else + col *= .75; // out of range + + pix[2 - b] = static_cast(255.f * col); + } + + return pix; +} + +static void drawOpticalFlow(const Mat_& flow, Mat& dst, float maxmotion = -1) +{ + dst.create(flow.size(), CV_8UC3); + dst.setTo(Scalar::all(0)); + + // determine motion range: + float maxrad = maxmotion; + + if (maxmotion <= 0) + { + maxrad = 1; + for (int y = 0; y < flow.rows; ++y) + { + for (int x = 0; x < flow.cols; ++x) + { + Point2f u = flow(y, x); + + if (!isFlowCorrect(u)) + continue; + + maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y)); + } + } + } + + for (int y = 0; y < flow.rows; ++y) + { + for (int x = 0; x < flow.cols; ++x) + { + Point2f u = flow(y, x); + + if (isFlowCorrect(u)) + dst.at(y, x) = computeColor(u.x / maxrad, u.y / maxrad); + } + } +} + +// binary file format for flow data specified here: +// http://vision.middlebury.edu/flow/data/ +static void writeOpticalFlowToFile(const Mat_& flow, const string& fileName) +{ + static const char FLO_TAG_STRING[] = "PIEH"; + + ofstream file(fileName.c_str(), ios_base::binary); + + file << FLO_TAG_STRING; + + file.write((const char*) &flow.cols, sizeof(int)); + file.write((const char*) &flow.rows, sizeof(int)); + + for (int i = 0; i < flow.rows; ++i) + { + for (int j = 0; j < flow.cols; ++j) + { + const Point2f u = flow(i, j); + + file.write((const char*) &u.x, sizeof(float)); + file.write((const char*) &u.y, sizeof(float)); + } + } +} + +int main(int argc, const char* argv[]) +{ + if (argc < 3) + { + cerr << "Usage : " << argv[0] << " []" << endl; + return -1; + } + + Mat frame0 = imread(argv[1], IMREAD_GRAYSCALE); + Mat frame1 = imread(argv[2], IMREAD_GRAYSCALE); + + if (frame0.empty()) + { + cerr << "Can't open image [" << argv[1] << "]" << endl; + return -1; + } + if (frame1.empty()) + { + cerr << "Can't open image [" << argv[2] << "]" << endl; + return -1; + } + + if (frame1.size() != frame0.size()) + { + cerr << "Images should be of equal sizes" << endl; + return -1; + } + + Mat_ flow; + OpticalFlowDual_TVL1 tvl1; + + const double start = (double)getTickCount(); + tvl1(frame0, frame1, flow); + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "calcOpticalFlowDual_TVL1 : " << timeSec << " sec" << endl; + + Mat out; + drawOpticalFlow(flow, out); + + if (argc == 4) + writeOpticalFlowToFile(flow, argv[3]); + + imshow("Flow", out); + waitKey(); + + return 0; +} From ce2fd7fec90769b3907aa5e73dbe71e5be130e54 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 13 Feb 2013 15:50:05 +0400 Subject: [PATCH 02/12] added dual tvl1 optical flow gpu implementation --- modules/gpu/include/opencv2/gpu/gpu.hpp | 89 +++++++ modules/gpu/perf/perf_video.cpp | 50 ++++ modules/gpu/src/cuda/tvl1flow.cu | 332 ++++++++++++++++++++++++ modules/gpu/src/tvl1flow.cpp | 256 ++++++++++++++++++ modules/gpu/test/test_optflow.cpp | 44 ++++ 5 files changed, 771 insertions(+) create mode 100644 modules/gpu/src/cuda/tvl1flow.cu create mode 100644 modules/gpu/src/tvl1flow.cpp diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 60cff99f6c..a574d71b58 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1982,6 +1982,95 @@ private: }; +// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method +// +// see reference: +// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". +// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". +class CV_EXPORTS OpticalFlowDual_TVL1_GPU +{ +public: + OpticalFlowDual_TVL1_GPU(); + + void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy); + + void collectGarbage(); + + /** + * Time step of the numerical scheme. + */ + double tau; + + /** + * Weight parameter for the data term, attachment parameter. + * This is the most relevant parameter, which determines the smoothness of the output. + * The smaller this parameter is, the smoother the solutions we obtain. + * It depends on the range of motions of the images, so its value should be adapted to each image sequence. + */ + double lambda; + + /** + * Weight parameter for (u - v)^2, tightness parameter. + * It serves as a link between the attachment and the regularization terms. + * In theory, it should have a small value in order to maintain both parts in correspondence. + * The method is stable for a large range of values of this parameter. + */ + double theta; + + /** + * Number of scales used to create the pyramid of images. + */ + int nscales; + + /** + * Number of warpings per scale. + * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. + * This is a parameter that assures the stability of the method. + * It also affects the running time, so it is a compromise between speed and accuracy. + */ + int warps; + + /** + * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. + * A small value will yield more accurate solutions at the expense of a slower convergence. + */ + double epsilon; + + /** + * Stopping criterion iterations number used in the numerical scheme. + */ + int iterations; + + bool useInitialFlow; + +private: + void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2); + + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + GpuMat I1x_buf; + GpuMat I1y_buf; + + GpuMat I1w_buf; + GpuMat I1wx_buf; + GpuMat I1wy_buf; + + GpuMat grad_buf; + GpuMat rho_c_buf; + + GpuMat p11_buf; + GpuMat p12_buf; + GpuMat p21_buf; + GpuMat p22_buf; + + GpuMat diff_buf; + GpuMat norm_buf; +}; + + //! Interpolate frames (images) using provided optical flow (displacement field). //! frame0 - frame 0 (32-bit floating point images, single channel) //! frame1 - frame 1 (the same type and size) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index b18cb17dfb..b228580fde 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -394,6 +394,56 @@ PERF_TEST_P(ImagePair, Video_FarnebackOpticalFlow, } } +////////////////////////////////////////////////////// +// OpticalFlowDual_TVL1 + +PERF_TEST_P(ImagePair, Video_OpticalFlowDual_TVL1, + Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) +{ + declare.time(20); + + cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_frame0(frame0); + cv::gpu::GpuMat d_frame1(frame1); + cv::gpu::GpuMat d_flowx; + cv::gpu::GpuMat d_flowy; + + cv::gpu::OpticalFlowDual_TVL1_GPU d_alg; + + d_alg(d_frame0, d_frame1, d_flowx, d_flowy); + + TEST_CYCLE() + { + d_alg(d_frame0, d_frame1, d_flowx, d_flowy); + } + + GPU_SANITY_CHECK(d_flowx); + GPU_SANITY_CHECK(d_flowy); + } + else + { + cv::Mat flow; + + cv::OpticalFlowDual_TVL1 alg; + + alg(frame0, frame1, flow); + + TEST_CYCLE() + { + alg(frame0, frame1, flow); + } + + CPU_SANITY_CHECK(flow); + } +} + ////////////////////////////////////////////////////// // FGDStatModel diff --git a/modules/gpu/src/cuda/tvl1flow.cu b/modules/gpu/src/cuda/tvl1flow.cu new file mode 100644 index 0000000000..27694ad269 --- /dev/null +++ b/modules/gpu/src/cuda/tvl1flow.cu @@ -0,0 +1,332 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/border_interpolate.hpp" +#include "opencv2/gpu/device/limits.hpp" + +using namespace cv::gpu; +using namespace cv::gpu::device; + +//////////////////////////////////////////////////////////// +// centeredGradient + +namespace tvl1flow +{ + __global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= src.cols || y >= src.rows) + return; + + dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0))); + dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x)); + } + + void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy) + { + const dim3 block(32, 8); + const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); + + centeredGradientKernel<<>>(src, dx, dy); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +//////////////////////////////////////////////////////////// +// warpBackward + +namespace tvl1flow +{ + static __device__ __forceinline__ float bicubicCoeff(float x_) + { + float x = fabsf(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } + } + + texture tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp); + texture tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp); + texture tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp); + + __global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= I0.cols || y >= I0.rows) + return; + + const float u1Val = u1(y, x); + const float u2Val = u2(y, x); + + const float wx = x + u1Val; + const float wy = y + u2Val; + + const int xmin = ::ceilf(wx - 2.0f); + const int xmax = ::floorf(wx + 2.0f); + + const int ymin = ::ceilf(wy - 2.0f); + const int ymax = ::floorf(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + sum += w * tex2D(tex_I1 , cx, cy); + sumx += w * tex2D(tex_I1x, cx, cy); + sumy += w * tex2D(tex_I1y, cx, cy); + + wsum += w; + } + } + + const float coeff = 1.0f / wsum; + + const float I1wVal = sum * coeff; + const float I1wxVal = sumx * coeff; + const float I1wyVal = sumy * coeff; + + I1w(y, x) = I1wVal; + I1wx(y, x) = I1wxVal; + I1wy(y, x) = I1wyVal; + + const float Ix2 = I1wxVal * I1wxVal; + const float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad(y, x) = Ix2 + Iy2; + + // compute the constant part of the rho function + const float I0Val = I0(y, x); + rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + + void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho) + { + const dim3 block(32, 8); + const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y)); + + bindTexture(&tex_I1 , I1); + bindTexture(&tex_I1x, I1x); + bindTexture(&tex_I1y, I1y); + + warpBackwardKernel<<>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +//////////////////////////////////////////////////////////// +// estimateU + +namespace tvl1flow +{ + __device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x) + { + if (x > 0 && y > 0) + { + const float v1x = v1(y, x) - v1(y, x - 1); + const float v2y = v2(y, x) - v2(y - 1, x); + return v1x + v2y; + } + else + { + if (y > 0) + return v1(y, 0) + v2(y, 0) - v2(y - 1, 0); + else + { + if (x > 0) + return v1(0, x) - v1(0, x - 1) + v2(0, x); + else + return v1(0, 0) + v2(0, 0); + } + } + } + + __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy, + const PtrStepf grad, const PtrStepf rho_c, + const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, + PtrStepf u1, PtrStepf u2, PtrStepf error, + const float l_t, const float theta) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= I1wx.cols || y >= I1wx.rows) + return; + + const float I1wxVal = I1wx(y, x); + const float I1wyVal = I1wy(y, x); + const float gradVal = grad(y, x); + const float u1OldVal = u1(y, x); + const float u2OldVal = u2(y, x); + + const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); + + // estimate the values of the variable (v1, v2) (thresholding operator TH) + + float d1 = 0.0f; + float d2 = 0.0f; + + if (rho < -l_t * gradVal) + { + d1 = l_t * I1wxVal; + d2 = l_t * I1wyVal; + } + else if (rho > l_t * gradVal) + { + d1 = -l_t * I1wxVal; + d2 = -l_t * I1wyVal; + } + else if (gradVal > numeric_limits::epsilon()) + { + const float fi = -rho / gradVal; + d1 = fi * I1wxVal; + d2 = fi * I1wyVal; + } + + const float v1 = u1OldVal + d1; + const float v2 = u2OldVal + d2; + + // compute the divergence of the dual variable (p1, p2) + + const float div_p1 = divergence(p11, p12, y, x); + const float div_p2 = divergence(p21, p22, y, x); + + // estimate the values of the optical flow (u1, u2) + + const float u1NewVal = v1 + theta * div_p1; + const float u2NewVal = v2 + theta * div_p2; + + u1(y, x) = u1NewVal; + u2(y, x) = u2NewVal; + + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error(y, x) = n1 + n2; + } + + void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, + PtrStepSzf grad, PtrStepSzf rho_c, + PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, + PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, + float l_t, float theta) + { + const dim3 block(32, 8); + const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); + + estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +//////////////////////////////////////////////////////////// +// estimateDualVariables + +namespace tvl1flow +{ + __global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, const float taut) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= u1.cols || y >= u1.rows) + return; + + const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x); + const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x); + + const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x); + const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x); + + const float g1 = ::hypotf(u1x, u1y); + const float g2 = ::hypotf(u2x, u2y); + + const float ng1 = 1.0f + taut * g1; + const float ng2 = 1.0f + taut * g2; + + p11(y, x) = (p11(y, x) + taut * u1x) / ng1; + p12(y, x) = (p12(y, x) + taut * u1y) / ng1; + p21(y, x) = (p21(y, x) + taut * u2x) / ng2; + p22(y, x) = (p22(y, x) + taut * u2y) / ng2; + } + + void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut) + { + const dim3 block(32, 8); + const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y)); + + estimateDualVariablesKernel<<>>(u1, u2, p11, p12, p21, p22, taut); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +#endif // !defined CUDA_DISABLER diff --git a/modules/gpu/src/tvl1flow.cpp b/modules/gpu/src/tvl1flow.cpp new file mode 100644 index 0000000000..a598a9ecf0 --- /dev/null +++ b/modules/gpu/src/tvl1flow.cpp @@ -0,0 +1,256 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#if !defined HAVE_CUDA || defined(CUDA_DISABLER) + +cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() { throw_nogpu(); } +void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() {} +void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } + +#else + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +cv::gpu::OpticalFlowDual_TVL1_GPU::OpticalFlowDual_TVL1_GPU() +{ + tau = 0.25; + lambda = 0.15; + theta = 0.3; + nscales = 5; + warps = 5; + epsilon = 0.01; + iterations = 300; + useInitialFlow = false; +} + +void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy) +{ + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); + CV_Assert( nscales > 0 ); + + // allocate memory for the pyramid structure + I0s.resize(nscales); + I1s.resize(nscales); + u1s.resize(nscales); + u2s.resize(nscales); + + I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); + + if (!useInitialFlow) + { + flowx.create(I0.size(), CV_32FC1); + flowy.create(I0.size(), CV_32FC1); + } + + u1s[0] = flowx; + u2s[0] = flowy; + + I1x_buf.create(I0.size(), CV_32FC1); + I1y_buf.create(I0.size(), CV_32FC1); + + I1w_buf.create(I0.size(), CV_32FC1); + I1wx_buf.create(I0.size(), CV_32FC1); + I1wy_buf.create(I0.size(), CV_32FC1); + + grad_buf.create(I0.size(), CV_32FC1); + rho_c_buf.create(I0.size(), CV_32FC1); + + p11_buf.create(I0.size(), CV_32FC1); + p12_buf.create(I0.size(), CV_32FC1); + p21_buf.create(I0.size(), CV_32FC1); + p22_buf.create(I0.size(), CV_32FC1); + + diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + gpu::pyrDown(I0s[s - 1], I0s[s]); + gpu::pyrDown(I1s[s - 1], I1s[s]); + + if (I0s[s].cols < 16 || I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + gpu::pyrDown(u1s[s - 1], u1s[s]); + gpu::pyrDown(u2s[s - 1], u2s[s]); + + gpu::multiply(u1s[s], Scalar::all(0.5), u1s[s]); + gpu::multiply(u2s[s], Scalar::all(0.5), u2s[s]); + } + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); + + // if this was the last scale, finish now + if (s == 0) + break; + + // otherwise, upsample the optical flow + + // zoom the optical flow for the next finer scale + gpu::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); + gpu::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); + + // scale the optical flow with the appropriate zoom factor + gpu::multiply(u1s[s - 1], Scalar::all(2), u1s[s - 1]); + gpu::multiply(u2s[s - 1], Scalar::all(2), u2s[s - 1]); + } +} + +namespace tvl1flow +{ + void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy); + void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho); + void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, + PtrStepSzf grad, PtrStepSzf rho_c, + PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, + PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, + float l_t, float theta); + void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut); +} + +void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2) +{ + using namespace tvl1flow; + + const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); + + if (u1.empty()) + { + u1.create(I0.size(), CV_32FC1); + u1.setTo(Scalar::all(0)); + + u2.create(I0.size(), CV_32FC1); + u2.setTo(Scalar::all(0)); + } + + GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + centeredGradient(I1, I1x, I1y); + + GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + + for (int warpings = 0; warpings < warps; ++warpings) + { + warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); + + double error = numeric_limits::max(); + for (int n = 0; error > scaledEpsilon && n < iterations; ++n) + { + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast(theta)); + + error = gpu::sum(diff, norm_buf)[0]; + + estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); + } + } +} + +void cv::gpu::OpticalFlowDual_TVL1_GPU::collectGarbage() +{ + I0s.clear(); + I1s.clear(); + u1s.clear(); + u2s.clear(); + + I1x_buf.release(); + I1y_buf.release(); + + I1w_buf.release(); + I1wx_buf.release(); + I1wy_buf.release(); + + grad_buf.release(); + rho_c_buf.release(); + + p11_buf.release(); + p12_buf.release(); + p21_buf.release(); + p22_buf.release(); + + diff_buf.release(); + norm_buf.release(); +} + +#endif // !defined HAVE_CUDA || defined(CUDA_DISABLER) diff --git a/modules/gpu/test/test_optflow.cpp b/modules/gpu/test/test_optflow.cpp index 6bc471ecef..46b71b5153 100644 --- a/modules/gpu/test/test_optflow.cpp +++ b/modules/gpu/test/test_optflow.cpp @@ -401,4 +401,48 @@ INSTANTIATE_TEST_CASE_P(GPU_Video, FarnebackOpticalFlow, testing::Combine( testing::Values(FarnebackOptFlowFlags(0), FarnebackOptFlowFlags(cv::OPTFLOW_FARNEBACK_GAUSSIAN)), testing::Values(UseInitFlow(false), UseInitFlow(true)))); +////////////////////////////////////////////////////// +// OpticalFlowDual_TVL1 + +PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::gpu::DeviceInfo, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + useRoi = GET_PARAM(1); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(OpticalFlowDual_TVL1, Accuracy) +{ + cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::gpu::OpticalFlowDual_TVL1_GPU d_alg; + cv::gpu::GpuMat d_flowx = createMat(frame0.size(), CV_32FC1, useRoi); + cv::gpu::GpuMat d_flowy = createMat(frame0.size(), CV_32FC1, useRoi); + d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy); + + cv::OpticalFlowDual_TVL1 alg; + cv::Mat flow; + alg(frame0, frame1, flow); + cv::Mat gold[2]; + cv::split(flow, gold); + + EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3); + EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3); +} + +INSTANTIATE_TEST_CASE_P(GPU_Video, OpticalFlowDual_TVL1, testing::Combine( + ALL_DEVICES, + WHOLE_SUBMAT)); + #endif // HAVE_CUDA From a828b607653daebe2ba176364ee32b19053532f5 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 13 Feb 2013 15:51:27 +0400 Subject: [PATCH 03/12] added enqueueHostCallback method to gpu::Stream --- modules/gpu/doc/data_structures.rst | 104 ++++++-- modules/gpu/include/opencv2/gpu/gpu.hpp | 22 +- modules/gpu/src/cudastream.cpp | 311 ++++++++++++++++-------- modules/gpu/test/test_stream.cpp | 130 ++++++++++ 4 files changed, 428 insertions(+), 139 deletions(-) create mode 100644 modules/gpu/test/test_stream.cpp diff --git a/modules/gpu/doc/data_structures.rst b/modules/gpu/doc/data_structures.rst index 68e702a793..1291cf9bb6 100644 --- a/modules/gpu/doc/data_structures.rst +++ b/modules/gpu/doc/data_structures.rst @@ -271,41 +271,37 @@ This class encapsulates a queue of asynchronous calls. Some functions have overl class CV_EXPORTS Stream { public: - Stream(); - ~Stream(); + Stream(); + ~Stream(); - Stream(const Stream&); - Stream& operator=(const Stream&); + Stream(const Stream&); + Stream& operator=(const Stream&); - bool queryIfComplete(); - void waitForCompletion(); + bool queryIfComplete(); + void waitForCompletion(); - //! downloads asynchronously. - // Warning! cv::Mat must point to page locked memory - (i.e. to CudaMem data or to its subMat) - void enqueueDownload(const GpuMat& src, CudaMem& dst); - void enqueueDownload(const GpuMat& src, Mat& dst); + void enqueueDownload(const GpuMat& src, CudaMem& dst); + void enqueueDownload(const GpuMat& src, Mat& dst); - //! uploads asynchronously. - // Warning! cv::Mat must point to page locked memory - (i.e. to CudaMem data or to its ROI) - void enqueueUpload(const CudaMem& src, GpuMat& dst); - void enqueueUpload(const Mat& src, GpuMat& dst); + void enqueueUpload(const CudaMem& src, GpuMat& dst); + void enqueueUpload(const Mat& src, GpuMat& dst); - void enqueueCopy(const GpuMat& src, GpuMat& dst); + void enqueueCopy(const GpuMat& src, GpuMat& dst); - void enqueueMemSet(const GpuMat& src, Scalar val); - void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); + void enqueueMemSet(const GpuMat& src, Scalar val); + void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); - // converts matrix type, ex from float to uchar depending on type - void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, - double a = 1, double b = 0); + void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, + double a = 1, double b = 0); + + typedef void (*StreamCallback)(Stream& stream, int status, void* userData); + void enqueueHostCallback(StreamCallback callback, void* userData); }; gpu::Stream::queryIfComplete --------------------------------- +---------------------------- Returns ``true`` if the current stream queue is finished. Otherwise, it returns false. .. ocv:function:: bool gpu::Stream::queryIfComplete() @@ -313,13 +309,73 @@ Returns ``true`` if the current stream queue is finished. Otherwise, it returns gpu::Stream::waitForCompletion ----------------------------------- +------------------------------ Blocks the current CPU thread until all operations in the stream are complete. .. ocv:function:: void gpu::Stream::waitForCompletion() +gpu::Stream::enqueueDownload +---------------------------- +Copies data from device to host. + +.. ocv:function:: void gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) + +.. ocv:function:: void gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) + +.. note:: ``cv::Mat`` must point to page locked memory (i.e. to ``CudaMem`` data or to its subMat) or must be registered with :ocv:func:`gpu::registerPageLocked` . + + + +gpu::Stream::enqueueUpload +-------------------------- +Copies data from host to device. + +.. ocv:function:: void gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst) + +.. ocv:function:: void gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) + +.. note:: ``cv::Mat`` must point to page locked memory (i.e. to ``CudaMem`` data or to its subMat) or must be registered with :ocv:func:`gpu::registerPageLocked` . + + + +gpu::Stream::enqueueCopy +------------------------ +Copies data from device to device. + +.. ocv:function:: void gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) + + + +gpu::Stream::enqueueMemSet +-------------------------- +Initializes or sets device memory to a value. + +.. ocv:function:: void gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val) + +.. ocv:function:: void gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) + + + +gpu::Stream::enqueueConvert +--------------------------- +Converts matrix type, ex from float to uchar depending on type. + +.. ocv:function:: void gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0) + + + +gpu::Stream::enqueueHostCallback +-------------------------------- +Adds a callback to be called on the host after all currently enqueued items in the stream have completed. + +.. ocv:function:: void gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData) + +.. note:: Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization that may depend on outstanding device work or other callbacks that are not mandated to run earlier. Callbacks without a mandated order (in independent streams) execute in undefined order and may be serialized. + + + gpu::StreamAccessor ------------------- .. ocv:struct:: gpu::StreamAccessor diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index a574d71b58..7493d83b58 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -145,43 +145,49 @@ public: ~Stream(); Stream(const Stream&); - Stream& operator=(const Stream&); + Stream& operator =(const Stream&); bool queryIfComplete(); void waitForCompletion(); - //! downloads asynchronously. + //! downloads asynchronously // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat) void enqueueDownload(const GpuMat& src, CudaMem& dst); void enqueueDownload(const GpuMat& src, Mat& dst); - //! uploads asynchronously. + //! uploads asynchronously // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI) void enqueueUpload(const CudaMem& src, GpuMat& dst); void enqueueUpload(const Mat& src, GpuMat& dst); + //! copy asynchronously void enqueueCopy(const GpuMat& src, GpuMat& dst); + //! memory set asynchronously void enqueueMemSet(GpuMat& src, Scalar val); void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask); - // converts matrix type, ex from float to uchar depending on type - void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0); + //! converts matrix type, ex from float to uchar depending on type + void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0); + + //! adds a callback to be called on the host after all currently enqueued items in the stream have completed + typedef void (*StreamCallback)(Stream& stream, int status, void* userData); + void enqueueHostCallback(StreamCallback callback, void* userData); static Stream& Null(); operator bool() const; private: + struct Impl; + + explicit Stream(Impl* impl); void create(); void release(); - struct Impl; Impl *impl; friend struct StreamAccessor; - - explicit Stream(Impl* impl); }; diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 5e6e4c3ea0..f9fbe820eb 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -42,51 +42,37 @@ #include "precomp.hpp" +using namespace std; using namespace cv; using namespace cv::gpu; -#if defined HAVE_CUDA - -struct Stream::Impl -{ - static cudaStream_t getStream(const Impl* impl) { return impl ? impl->stream : 0; } - cudaStream_t stream; - int ref_counter; -}; - -#include "opencv2/gpu/stream_accessor.hpp" - -CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) -{ - return Stream::Impl::getStream(stream.impl); -}; - -#endif /* !defined (HAVE_CUDA) */ - - #if !defined (HAVE_CUDA) -void cv::gpu::Stream::create() { throw_nogpu(); } -void cv::gpu::Stream::release() { throw_nogpu(); } -cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); } -cv::gpu::Stream::~Stream() { throw_nogpu(); } -cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); } -Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; } -bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; } +cv::gpu::Stream::Stream() { throw_nogpu(); } +cv::gpu::Stream::~Stream() {} +cv::gpu::Stream::Stream(const Stream&) { throw_nogpu(); } +Stream& cv::gpu::Stream::operator=(const Stream&) { throw_nogpu(); return *this; } +bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return false; } void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); } -void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, CudaMem& /*dst*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueUpload(const CudaMem& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueMemSet(GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); } -void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); } +void cv::gpu::Stream::enqueueDownload(const GpuMat&, Mat&) { throw_nogpu(); } +void cv::gpu::Stream::enqueueDownload(const GpuMat&, CudaMem&) { throw_nogpu(); } +void cv::gpu::Stream::enqueueUpload(const CudaMem&, GpuMat&) { throw_nogpu(); } +void cv::gpu::Stream::enqueueUpload(const Mat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::Stream::enqueueCopy(const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar) { throw_nogpu(); } +void cv::gpu::Stream::enqueueMemSet(GpuMat&, Scalar, const GpuMat&) { throw_nogpu(); } +void cv::gpu::Stream::enqueueConvert(const GpuMat&, GpuMat&, int, double, double) { throw_nogpu(); } +void cv::gpu::Stream::enqueueHostCallback(StreamCallback, void*) { throw_nogpu(); } Stream& cv::gpu::Stream::Null() { throw_nogpu(); static Stream s; return s; } cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; } +cv::gpu::Stream::Stream(Impl*) { throw_nogpu(); } +void cv::gpu::Stream::create() { throw_nogpu(); } +void cv::gpu::Stream::release() { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ +#include "opencv2/gpu/stream_accessor.hpp" + namespace cv { namespace gpu { void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream); @@ -95,63 +81,55 @@ namespace cv { namespace gpu void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream); }} -namespace +struct Stream::Impl { - template void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k) + static cudaStream_t getStream(const Impl* impl) { - dst.create(src.size(), src.type()); - size_t bwidth = src.cols * src.elemSize(); - cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) ); - }; -} - -void cv::gpu::Stream::create() -{ - if (impl) - release(); + return impl ? impl->stream : 0; + } cudaStream_t stream; - cudaSafeCall( cudaStreamCreate( &stream ) ); - - impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl)); + int ref_counter; +}; - impl->stream = stream; - impl->ref_counter = 1; +cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) +{ + return Stream::Impl::getStream(stream.impl); } -void cv::gpu::Stream::release() +cv::gpu::Stream::Stream() : impl(0) { - if( impl && CV_XADD(&impl->ref_counter, -1) == 1 ) - { - cudaSafeCall( cudaStreamDestroy( impl->stream ) ); - cv::fastFree( impl ); - } + create(); } -cv::gpu::Stream::Stream() : impl(0) { create(); } -cv::gpu::Stream::~Stream() { release(); } +cv::gpu::Stream::~Stream() +{ + release(); +} cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl) { - if( impl ) + if (impl) CV_XADD(&impl->ref_counter, 1); } -Stream& cv::gpu::Stream::operator=(const Stream& stream) + +Stream& cv::gpu::Stream::operator =(const Stream& stream) { - if( this != &stream ) + if (this != &stream) { - if( stream.impl ) - CV_XADD(&stream.impl->ref_counter, 1); - release(); impl = stream.impl; + if (impl) + CV_XADD(&impl->ref_counter, 1); } + return *this; } bool cv::gpu::Stream::queryIfComplete() { - cudaError_t err = cudaStreamQuery( Impl::getStream(impl) ); + cudaStream_t stream = Impl::getStream(impl); + cudaError_t err = cudaStreamQuery(stream); if (err == cudaErrorNotReady || err == cudaSuccess) return err == cudaSuccess; @@ -160,94 +138,213 @@ bool cv::gpu::Stream::queryIfComplete() return false; } -void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( Impl::getStream(impl) ) ); } +void cv::gpu::Stream::waitForCompletion() +{ + cudaStream_t stream = Impl::getStream(impl); + cudaSafeCall( cudaStreamSynchronize(stream) ); +} void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst) { // if not -> allocation will be done, but after that dst will not point to page locked memory - CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ); - devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); + CV_Assert( src.size() == dst.size() && src.type() == dst.type() ); + + cudaStream_t stream = Impl::getStream(impl); + size_t bwidth = src.cols * src.elemSize(); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) ); +} + +void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) +{ + dst.create(src.size(), src.type(), CudaMem::ALLOC_PAGE_LOCKED); + + cudaStream_t stream = Impl::getStream(impl); + size_t bwidth = src.cols * src.elemSize(); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToHost, stream) ); +} + +void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst) +{ + dst.create(src.size(), src.type()); + + cudaStream_t stream = Impl::getStream(impl); + size_t bwidth = src.cols * src.elemSize(); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) ); } -void cv::gpu::Stream::enqueueDownload(const GpuMat& src, CudaMem& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToHost); } -void cv::gpu::Stream::enqueueUpload(const CudaMem& src, GpuMat& dst){ devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); } -void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyHostToDevice); } -void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, Impl::getStream(impl), cudaMemcpyDeviceToDevice); } +void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) +{ + dst.create(src.size(), src.type()); + + cudaStream_t stream = Impl::getStream(impl); + size_t bwidth = src.cols * src.elemSize(); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyHostToDevice, stream) ); +} + +void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) +{ + dst.create(src.size(), src.type()); + + cudaStream_t stream = Impl::getStream(impl); + size_t bwidth = src.cols * src.elemSize(); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, cudaMemcpyDeviceToDevice, stream) ); +} -void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s) +void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val) { - CV_Assert((src.depth() != CV_64F) || - (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); + const int sdepth = src.depth(); - if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) + if (sdepth == CV_64F) { - cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + cudaStream_t stream = Impl::getStream(impl); + + if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0) + { + cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) ); return; } - if (src.depth() == CV_8U) + + if (sdepth == CV_8U) { int cn = src.channels(); - if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) + if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3])) { - int val = saturate_cast(s[0]); - cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) ); + int ival = saturate_cast(val[0]); + cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) ); return; } } - setTo(src, s, Impl::getStream(impl)); + setTo(src, val, stream); } void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) { - CV_Assert((src.depth() != CV_64F) || - (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); + const int sdepth = src.depth(); + + if (sdepth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } CV_Assert(mask.type() == CV_8UC1); - setTo(src, val, mask, Impl::getStream(impl)); + cudaStream_t stream = Impl::getStream(impl); + + setTo(src, val, mask, stream); } -void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta) +void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double alpha, double beta) { - CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) || - (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); + if (dtype < 0) + dtype = src.type(); + else + dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()); - bool noScale = fabs(alpha-1) < std::numeric_limits::epsilon() && fabs(beta) < std::numeric_limits::epsilon(); + const int sdepth = src.depth(); + const int ddepth = CV_MAT_DEPTH(dtype); - if( rtype < 0 ) - rtype = src.type(); - else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels()); + if (sdepth == CV_64F || ddepth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + bool noScale = fabs(alpha - 1) < numeric_limits::epsilon() && fabs(beta) < numeric_limits::epsilon(); - int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype); - if( sdepth == ddepth && noScale ) + if (sdepth == ddepth && noScale) { - src.copyTo(dst); + enqueueCopy(src, dst); return; } - GpuMat temp; - const GpuMat* psrc = &src; - if( sdepth != ddepth && psrc == &dst ) - psrc = &(temp = src); + dst.create(src.size(), dtype); - dst.create( src.size(), rtype ); - convertTo(src, dst, alpha, beta, Impl::getStream(impl)); + cudaStream_t stream = Impl::getStream(impl); + convertTo(src, dst, alpha, beta, stream); } -cv::gpu::Stream::operator bool() const +#if CUDA_VERSION >= 5000 + +namespace { - return impl && impl->stream; + struct CallbackData + { + cv::gpu::Stream::StreamCallback callback; + void* userData; + Stream stream; + }; + + void CUDART_CB cudaStreamCallback(cudaStream_t, cudaError_t status, void* userData) + { + CallbackData* data = reinterpret_cast(userData); + data->callback(data->stream, static_cast(status), data->userData); + delete data; + } } -cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) {} +#endif + +void cv::gpu::Stream::enqueueHostCallback(StreamCallback callback, void* userData) +{ +#if CUDA_VERSION >= 5000 + CallbackData* data = new CallbackData; + data->callback = callback; + data->userData = userData; + data->stream = *this; + + cudaStream_t stream = Impl::getStream(impl); + + cudaSafeCall( cudaStreamAddCallback(stream, cudaStreamCallback, data, 0) ); +#else + (void) callback; + (void) userData; + CV_Error(CV_StsNotImplemented, "This function requires CUDA 5.0"); +#endif +} cv::gpu::Stream& cv::gpu::Stream::Null() { - static Stream s((Impl*)0); + static Stream s((Impl*) 0); return s; } +cv::gpu::Stream::operator bool() const +{ + return impl && impl->stream; +} + +cv::gpu::Stream::Stream(Impl* impl_) : impl(impl_) +{ +} + +void cv::gpu::Stream::create() +{ + if (impl) + release(); + + cudaStream_t stream; + cudaSafeCall( cudaStreamCreate( &stream ) ); + + impl = (Stream::Impl*) fastMalloc(sizeof(Stream::Impl)); + + impl->stream = stream; + impl->ref_counter = 1; +} + +void cv::gpu::Stream::release() +{ + if (impl && CV_XADD(&impl->ref_counter, -1) == 1) + { + cudaSafeCall( cudaStreamDestroy(impl->stream) ); + cv::fastFree(impl); + } +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/test/test_stream.cpp b/modules/gpu/test/test_stream.cpp new file mode 100644 index 0000000000..4adac41292 --- /dev/null +++ b/modules/gpu/test/test_stream.cpp @@ -0,0 +1,130 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +#ifdef HAVE_CUDA + +#if CUDA_VERSION >= 5000 + +struct Async : testing::TestWithParam +{ + cv::gpu::CudaMem src; + cv::gpu::GpuMat d_src; + + cv::gpu::CudaMem dst; + cv::gpu::GpuMat d_dst; + + virtual void SetUp() + { + cv::gpu::DeviceInfo devInfo = GetParam(); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Mat m = randomMat(cv::Size(128, 128), CV_8UC1); + src.create(m.size(), m.type(), cv::gpu::CudaMem::ALLOC_PAGE_LOCKED); + m.copyTo(src.createMatHeader()); + } +}; + +void checkMemSet(cv::gpu::Stream&, int status, void* userData) +{ + ASSERT_EQ(cudaSuccess, status); + + Async* test = reinterpret_cast(userData); + + cv::Mat src = test->src; + cv::Mat dst = test->dst; + + cv::Mat dst_gold = cv::Mat::zeros(src.size(), src.type()); + + ASSERT_MAT_NEAR(dst_gold, dst, 0); +} + +GPU_TEST_P(Async, MemSet) +{ + cv::gpu::Stream stream; + + d_dst.upload(src); + + stream.enqueueMemSet(d_dst, cv::Scalar::all(0)); + stream.enqueueDownload(d_dst, dst); + + Async* test = this; + stream.enqueueHostCallback(checkMemSet, test); + + stream.waitForCompletion(); +} + +void checkConvert(cv::gpu::Stream&, int status, void* userData) +{ + ASSERT_EQ(cudaSuccess, status); + + Async* test = reinterpret_cast(userData); + + cv::Mat src = test->src; + cv::Mat dst = test->dst; + + cv::Mat dst_gold; + src.convertTo(dst_gold, CV_32S); + + ASSERT_MAT_NEAR(dst_gold, dst, 0); +} + +GPU_TEST_P(Async, Convert) +{ + cv::gpu::Stream stream; + + stream.enqueueUpload(src, d_src); + stream.enqueueConvert(d_src, d_dst, CV_32S); + stream.enqueueDownload(d_dst, dst); + + Async* test = this; + stream.enqueueHostCallback(checkConvert, test); + + stream.waitForCompletion(); +} + +INSTANTIATE_TEST_CASE_P(GPU_Stream, Async, ALL_DEVICES); + +#endif + +#endif // HAVE_CUDA From 08914aa7089e75a59e820b8cc4d56e048e54384a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 13 Feb 2013 15:53:03 +0400 Subject: [PATCH 04/12] added gpu compare with scalar --- modules/gpu/doc/per_element_operations.rst | 2 + modules/gpu/include/opencv2/gpu/gpu.hpp | 1 + modules/gpu/perf/perf_core.cpp | 33 ++++ modules/gpu/src/cuda/element_operations.cu | 220 +++++++++++++++++++++ modules/gpu/src/element_operations.cpp | 64 ++++++ modules/gpu/test/test_core.cpp | 111 +++++++++++ 6 files changed, 431 insertions(+) diff --git a/modules/gpu/doc/per_element_operations.rst b/modules/gpu/doc/per_element_operations.rst index a59875e646..2670ba3233 100644 --- a/modules/gpu/doc/per_element_operations.rst +++ b/modules/gpu/doc/per_element_operations.rst @@ -276,6 +276,8 @@ Compares elements of two matrices. .. ocv:function:: void gpu::compare( const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()) + :param a: First source matrix. :param b: Second source matrix with the same size and type as ``a`` . diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 7493d83b58..4eb339e7ea 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -533,6 +533,7 @@ CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream //! compares elements of two arrays (c = a b) CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); +CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); //! performs per-elements bit-wise inversion CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); diff --git a/modules/gpu/perf/perf_core.cpp b/modules/gpu/perf/perf_core.cpp index cfd572dc16..c78bfd6e20 100644 --- a/modules/gpu/perf/perf_core.cpp +++ b/modules/gpu/perf/perf_core.cpp @@ -647,6 +647,39 @@ PERF_TEST_P(Sz_Depth_Code, Core_CompareMat, Combine(GPU_TYPICAL_MAT_SIZES, ARITH } } +////////////////////////////////////////////////////////////////////// +// CompareScalar + +PERF_TEST_P(Sz_Depth_Code, Core_CompareScalar, Combine(GPU_TYPICAL_MAT_SIZES, ARITHM_MAT_DEPTH, ALL_CMP_CODES)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int cmp_code = GET_PARAM(2); + + cv::Mat src(size, depth); + fillRandom(src); + + cv::Scalar s = cv::Scalar::all(100); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat d_dst; + + TEST_CYCLE() cv::gpu::compare(d_src, s, d_dst, cmp_code); + + GPU_SANITY_CHECK(d_dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::compare(src, s, dst, cmp_code); + + CPU_SANITY_CHECK(dst); + } +} + ////////////////////////////////////////////////////////////////////// // BitwiseNot diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 4b52cc7dd3..27fb61ff70 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -1954,6 +1954,226 @@ namespace arithm template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); } +////////////////////////////////////////////////////////////////////////////////////// +// cmpScalar + +namespace arithm +{ +#define TYPE_VEC(type, cn) typename TypeVec::vec_type + + template struct CmpScalar; + template + struct CmpScalar : unary_function + { + const T val; + + __host__ explicit CmpScalar(T val_) : val(val_) {} + + __device__ __forceinline__ uchar operator()(T src) const + { + Cmp op; + return op(src, val); + } + }; + template + struct CmpScalar : unary_function + { + const TYPE_VEC(T, 2) val; + + __host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {} + + __device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const + { + Cmp op; + return VecTraits::make(op(src.x, val.x), op(src.y, val.y)); + } + }; + template + struct CmpScalar : unary_function + { + const TYPE_VEC(T, 3) val; + + __host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {} + + __device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const + { + Cmp op; + return VecTraits::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z)); + } + }; + template + struct CmpScalar : unary_function + { + const TYPE_VEC(T, 4) val; + + __host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {} + + __device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const + { + Cmp op; + return VecTraits::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w)); + } + }; + +#undef TYPE_VEC +} + +namespace cv { namespace gpu { namespace device +{ + template struct TransformFunctorTraits< arithm::CmpScalar > : arithm::ArithmFuncTraits + { + }; +}}} + +namespace arithm +{ + template