From 71c391cd8dbac3b2203af757ddb1fbdeed41d87c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 11:27:31 +0300 Subject: [PATCH 01/10] remove unused memory transfer from TVL1 CUDA implementation it caused runtime failures --- modules/cudaoptflow/src/tvl1flow.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/cudaoptflow/src/tvl1flow.cpp b/modules/cudaoptflow/src/tvl1flow.cpp index d0efffa57f..b8dfea56f1 100644 --- a/modules/cudaoptflow/src/tvl1flow.cpp +++ b/modules/cudaoptflow/src/tvl1flow.cpp @@ -248,7 +248,6 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G { // some tweaks to make sum operation less frequently bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon); - cv::Mat m1(u3); estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast(theta), gamma, calcError); if (calcError) { From c4b2058d233347b83b53b3afd293b9eed80ed806 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 11:27:51 +0300 Subject: [PATCH 02/10] simplify TVL1 accuracy test to reduce run time --- modules/cudaoptflow/test/test_optflow.cpp | 35 +++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index 2b976563b0..dce9cc59bc 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -325,15 +325,20 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlow, testing::Combine( ////////////////////////////////////////////////////// // OpticalFlowDual_TVL1 -PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::cuda::DeviceInfo, UseRoi) +namespace +{ + IMPLEMENT_PARAM_CLASS(Gamma, double) +} + +PARAM_TEST_CASE(OpticalFlowDual_TVL1, cv::cuda::DeviceInfo, Gamma) { cv::cuda::DeviceInfo devInfo; - bool useRoi; + double gamma; virtual void SetUp() { devInfo = GET_PARAM(0); - useRoi = GET_PARAM(1); + gamma = GET_PARAM(1); cv::cuda::setDevice(devInfo.deviceID()); } @@ -348,30 +353,22 @@ CUDA_TEST_P(OpticalFlowDual_TVL1, Accuracy) ASSERT_FALSE(frame1.empty()); cv::cuda::OpticalFlowDual_TVL1_CUDA d_alg; - cv::cuda::GpuMat d_flowx = createMat(frame0.size(), CV_32FC1, useRoi); - cv::cuda::GpuMat d_flowy = createMat(frame0.size(), CV_32FC1, useRoi); - d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy); + d_alg.iterations = 10; + d_alg.gamma = gamma; + + cv::cuda::GpuMat d_flowx, d_flowy; + d_alg(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy); cv::Ptr alg = cv::createOptFlow_DualTVL1(); alg->set("medianFiltering", 1); alg->set("innerIterations", 1); alg->set("outerIterations", d_alg.iterations); + alg->set("gamma", gamma); + cv::Mat flow; alg->calc(frame0, frame1, flow); cv::Mat gold[2]; cv::split(flow, gold); - cv::Mat mx(d_flowx); - cv::Mat my(d_flowx); - - EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3); - EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3); - d_alg.gamma = 1; - alg->set("gamma", 1); - d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy); - alg->calc(frame0, frame1, flow); - cv::split(flow, gold); - mx = cv::Mat(d_flowx); - my = cv::Mat(d_flowx); EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3); EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3); @@ -379,7 +376,7 @@ CUDA_TEST_P(OpticalFlowDual_TVL1, Accuracy) INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine( ALL_DEVICES, - WHOLE_SUBMAT)); + testing::Values(Gamma(0.0), Gamma(1.0)))); ////////////////////////////////////////////////////// // FastOpticalFlowBM From 19c6bbe7d91efc74039636b6ce00a26810f40ef9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:35:23 +0300 Subject: [PATCH 03/10] move obsolete algorithms from cudaoptflow to cudalegacy --- .../cudalegacy/include/opencv2/cudalegacy.hpp | 51 +++++- .../{cudaoptflow => cudalegacy}/src/bm.cpp | 0 .../src/bm_fast.cpp | 0 .../src/cuda/bm.cu | 0 .../src/cuda/bm_fast.cu | 0 .../src/cuda/needle_map.cu | 0 .../src/interpolate_frames.cpp | 0 .../src/needle_map.cpp | 0 .../include/opencv2/cudaoptflow.hpp | 41 ----- modules/cudaoptflow/perf/perf_optflow.cpp | 150 ------------------ modules/cudaoptflow/test/test_optflow.cpp | 118 -------------- 11 files changed, 49 insertions(+), 311 deletions(-) rename modules/{cudaoptflow => cudalegacy}/src/bm.cpp (100%) rename modules/{cudaoptflow => cudalegacy}/src/bm_fast.cpp (100%) rename modules/{cudaoptflow => cudalegacy}/src/cuda/bm.cu (100%) rename modules/{cudaoptflow => cudalegacy}/src/cuda/bm_fast.cu (100%) rename modules/{cudaoptflow => cudalegacy}/src/cuda/needle_map.cu (100%) rename modules/{cudaoptflow => cudalegacy}/src/interpolate_frames.cpp (100%) rename modules/{cudaoptflow => cudalegacy}/src/needle_map.cpp (100%) diff --git a/modules/cudalegacy/include/opencv2/cudalegacy.hpp b/modules/cudalegacy/include/opencv2/cudalegacy.hpp index 328836c530..f0107499d3 100644 --- a/modules/cudalegacy/include/opencv2/cudalegacy.hpp +++ b/modules/cudalegacy/include/opencv2/cudalegacy.hpp @@ -71,8 +71,9 @@ public: CV_EXPORTS Ptr createImagePyramid(InputArray img, int nLayers = -1, Stream& stream = Stream::Null()); -//////////////////////////////////////////////////// +// // GMG +// /** @brief Background/Foreground Segmentation Algorithm. @@ -125,8 +126,9 @@ public: CV_EXPORTS Ptr createBackgroundSubtractorGMG(int initializationFrames = 120, double decisionThreshold = 0.8); -//////////////////////////////////////////////////// +// // FGD +// /** @brief The class discriminates between foreground and background pixels by building and maintaining a model of the background. @@ -180,6 +182,51 @@ struct CV_EXPORTS FGDParams CV_EXPORTS Ptr createBackgroundSubtractorFGD(const FGDParams& params = FGDParams()); +// +// Optical flow +// + +//! Calculates optical flow for 2 images using block matching algorithm */ +CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, + Size block_size, Size shift_size, Size max_range, bool use_previous, + GpuMat& velx, GpuMat& vely, GpuMat& buf, + Stream& stream = Stream::Null()); + +class CV_EXPORTS FastOpticalFlowBM +{ +public: + void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null()); + +private: + GpuMat buffer; + GpuMat extended_I0; + GpuMat extended_I1; +}; + +/** @brief Interpolates frames (images) using provided optical flow (displacement field). + +@param frame0 First frame (32-bit floating point images, single channel). +@param frame1 Second frame. Must have the same type and size as frame0 . +@param fu Forward horizontal displacement. +@param fv Forward vertical displacement. +@param bu Backward horizontal displacement. +@param bv Backward vertical displacement. +@param pos New frame position. +@param newFrame Output image. +@param buf Temporary buffer, will have width x 6\*height size, CV_32FC1 type and contain 6 +GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward +horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, +interpolated backward vertical flow. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, + const GpuMat& fu, const GpuMat& fv, + const GpuMat& bu, const GpuMat& bv, + float pos, GpuMat& newFrame, GpuMat& buf, + Stream& stream = Stream::Null()); + +CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors); + //! @} }} diff --git a/modules/cudaoptflow/src/bm.cpp b/modules/cudalegacy/src/bm.cpp similarity index 100% rename from modules/cudaoptflow/src/bm.cpp rename to modules/cudalegacy/src/bm.cpp diff --git a/modules/cudaoptflow/src/bm_fast.cpp b/modules/cudalegacy/src/bm_fast.cpp similarity index 100% rename from modules/cudaoptflow/src/bm_fast.cpp rename to modules/cudalegacy/src/bm_fast.cpp diff --git a/modules/cudaoptflow/src/cuda/bm.cu b/modules/cudalegacy/src/cuda/bm.cu similarity index 100% rename from modules/cudaoptflow/src/cuda/bm.cu rename to modules/cudalegacy/src/cuda/bm.cu diff --git a/modules/cudaoptflow/src/cuda/bm_fast.cu b/modules/cudalegacy/src/cuda/bm_fast.cu similarity index 100% rename from modules/cudaoptflow/src/cuda/bm_fast.cu rename to modules/cudalegacy/src/cuda/bm_fast.cu diff --git a/modules/cudaoptflow/src/cuda/needle_map.cu b/modules/cudalegacy/src/cuda/needle_map.cu similarity index 100% rename from modules/cudaoptflow/src/cuda/needle_map.cu rename to modules/cudalegacy/src/cuda/needle_map.cu diff --git a/modules/cudaoptflow/src/interpolate_frames.cpp b/modules/cudalegacy/src/interpolate_frames.cpp similarity index 100% rename from modules/cudaoptflow/src/interpolate_frames.cpp rename to modules/cudalegacy/src/interpolate_frames.cpp diff --git a/modules/cudaoptflow/src/needle_map.cpp b/modules/cudalegacy/src/needle_map.cpp similarity index 100% rename from modules/cudaoptflow/src/needle_map.cpp rename to modules/cudalegacy/src/needle_map.cpp diff --git a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp index f65b1447b2..7882a8e622 100644 --- a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp +++ b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp @@ -347,47 +347,6 @@ private: GpuMat norm_buf; }; -//! Calculates optical flow for 2 images using block matching algorithm */ -CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, - Size block_size, Size shift_size, Size max_range, bool use_previous, - GpuMat& velx, GpuMat& vely, GpuMat& buf, - Stream& stream = Stream::Null()); - -class CV_EXPORTS FastOpticalFlowBM -{ -public: - void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null()); - -private: - GpuMat buffer; - GpuMat extended_I0; - GpuMat extended_I1; -}; - -/** @brief Interpolates frames (images) using provided optical flow (displacement field). - -@param frame0 First frame (32-bit floating point images, single channel). -@param frame1 Second frame. Must have the same type and size as frame0 . -@param fu Forward horizontal displacement. -@param fv Forward vertical displacement. -@param bu Backward horizontal displacement. -@param bv Backward vertical displacement. -@param pos New frame position. -@param newFrame Output image. -@param buf Temporary buffer, will have width x 6\*height size, CV_32FC1 type and contain 6 -GpuMat: occlusion masks for first frame, occlusion masks for second, interpolated forward -horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, -interpolated backward vertical flow. -@param stream Stream for the asynchronous version. - */ -CV_EXPORTS void interpolateFrames(const GpuMat& frame0, const GpuMat& frame1, - const GpuMat& fu, const GpuMat& fv, - const GpuMat& bu, const GpuMat& bv, - float pos, GpuMat& newFrame, GpuMat& buf, - Stream& stream = Stream::Null()); - -CV_EXPORTS void createOpticalFlowNeedleMap(const GpuMat& u, const GpuMat& v, GpuMat& vertex, GpuMat& colors); - //! @} }} // namespace cv { namespace cuda { diff --git a/modules/cudaoptflow/perf/perf_optflow.cpp b/modules/cudaoptflow/perf/perf_optflow.cpp index d22eb7e60d..12612b0622 100644 --- a/modules/cudaoptflow/perf/perf_optflow.cpp +++ b/modules/cudaoptflow/perf/perf_optflow.cpp @@ -46,91 +46,10 @@ using namespace std; using namespace testing; using namespace perf; -////////////////////////////////////////////////////// -// InterpolateFrames - typedef pair pair_string; DEF_PARAM_TEST_1(ImagePair, pair_string); -PERF_TEST_P(ImagePair, InterpolateFrames, - Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) -{ - cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()); - - cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - - frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0); - frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0); - - if (PERF_RUN_CUDA()) - { - const cv::cuda::GpuMat d_frame0(frame0); - const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat d_fu, d_fv; - cv::cuda::GpuMat d_bu, d_bv; - - cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, - 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); - - d_flow(d_frame0, d_frame1, d_fu, d_fv); - d_flow(d_frame1, d_frame0, d_bu, d_bv); - - cv::cuda::GpuMat newFrame; - cv::cuda::GpuMat d_buf; - - TEST_CYCLE() cv::cuda::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf); - - CUDA_SANITY_CHECK(newFrame, 1e-4); - } - else - { - FAIL_NO_CPU(); - } -} - -////////////////////////////////////////////////////// -// CreateOpticalFlowNeedleMap - -PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap, - Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) -{ - cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()); - - cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - - frame0.convertTo(frame0, CV_32FC1, 1.0 / 255.0); - frame1.convertTo(frame1, CV_32FC1, 1.0 / 255.0); - - if (PERF_RUN_CUDA()) - { - const cv::cuda::GpuMat d_frame0(frame0); - const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; - - cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, - 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); - - d_flow(d_frame0, d_frame1, u, v); - - cv::cuda::GpuMat vertex, colors; - - TEST_CYCLE() cv::cuda::createOpticalFlowNeedleMap(u, v, vertex, colors); - - CUDA_SANITY_CHECK(vertex, 1e-6); - CUDA_SANITY_CHECK(colors); - } - else - { - FAIL_NO_CPU(); - } -} - ////////////////////////////////////////////////////// // BroxOpticalFlow @@ -383,72 +302,3 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, CPU_SANITY_CHECK(flow); } } - -////////////////////////////////////////////////////// -// OpticalFlowBM - -PERF_TEST_P(ImagePair, OpticalFlowBM, - Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) -{ - declare.time(400); - - const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()); - - const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - - const cv::Size block_size(16, 16); - const cv::Size shift_size(1, 1); - const cv::Size max_range(16, 16); - - if (PERF_RUN_CUDA()) - { - const cv::cuda::GpuMat d_frame0(frame0); - const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u, v, buf; - - TEST_CYCLE() cv::cuda::calcOpticalFlowBM(d_frame0, d_frame1, block_size, shift_size, max_range, false, u, v, buf); - - CUDA_SANITY_CHECK(u); - CUDA_SANITY_CHECK(v); - } - else - { - FAIL_NO_CPU(); - } -} - -PERF_TEST_P(ImagePair, DISABLED_FastOpticalFlowBM, - Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) -{ - declare.time(400); - - const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()); - - const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - - const cv::Size block_size(16, 16); - const cv::Size shift_size(1, 1); - const cv::Size max_range(16, 16); - - if (PERF_RUN_CUDA()) - { - const cv::cuda::GpuMat d_frame0(frame0); - const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u, v; - - cv::cuda::FastOpticalFlowBM fastBM; - - TEST_CYCLE() fastBM(d_frame0, d_frame1, u, v, max_range.width, block_size.width); - - CUDA_SANITY_CHECK(u, 2); - CUDA_SANITY_CHECK(v, 2); - } - else - { - FAIL_NO_CPU(); - } -} diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index dce9cc59bc..7a6e68310f 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -378,122 +378,4 @@ INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine( ALL_DEVICES, testing::Values(Gamma(0.0), Gamma(1.0)))); -////////////////////////////////////////////////////// -// FastOpticalFlowBM - -namespace -{ - void FastOpticalFlowBM_gold(const cv::Mat_& I0, const cv::Mat_& I1, cv::Mat_& velx, cv::Mat_& vely, int search_window, int block_window) - { - velx.create(I0.size()); - vely.create(I0.size()); - - int search_radius = search_window / 2; - int block_radius = block_window / 2; - - for (int y = 0; y < I0.rows; ++y) - { - for (int x = 0; x < I0.cols; ++x) - { - int bestDist = std::numeric_limits::max(); - int bestDx = 0; - int bestDy = 0; - - for (int dy = -search_radius; dy <= search_radius; ++dy) - { - for (int dx = -search_radius; dx <= search_radius; ++dx) - { - int dist = 0; - - for (int by = -block_radius; by <= block_radius; ++by) - { - for (int bx = -block_radius; bx <= block_radius; ++bx) - { - int I0_val = I0(cv::borderInterpolate(y + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + bx, I0.cols, cv::BORDER_DEFAULT)); - int I1_val = I1(cv::borderInterpolate(y + dy + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + dx + bx, I0.cols, cv::BORDER_DEFAULT)); - - dist += std::abs(I0_val - I1_val); - } - } - - if (dist < bestDist) - { - bestDist = dist; - bestDx = dx; - bestDy = dy; - } - } - } - - velx(y, x) = (float) bestDx; - vely(y, x) = (float) bestDy; - } - } - } - - double calc_rmse(const cv::Mat_& flow1, const cv::Mat_& flow2) - { - double sum = 0.0; - - for (int y = 0; y < flow1.rows; ++y) - { - for (int x = 0; x < flow1.cols; ++x) - { - double diff = flow1(y, x) - flow2(y, x); - sum += diff * diff; - } - } - - return std::sqrt(sum / flow1.size().area()); - } -} - -struct FastOpticalFlowBM : testing::TestWithParam -{ -}; - -CUDA_TEST_P(FastOpticalFlowBM, Accuracy) -{ - const double MAX_RMSE = 0.6; - - int search_window = 15; - int block_window = 5; - - cv::cuda::DeviceInfo devInfo = GetParam(); - cv::cuda::setDevice(devInfo.deviceID()); - - cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()); - - cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()); - - cv::Size smallSize(320, 240); - cv::Mat frame0_small; - cv::Mat frame1_small; - - cv::resize(frame0, frame0_small, smallSize); - cv::resize(frame1, frame1_small, smallSize); - - cv::cuda::GpuMat d_flowx; - cv::cuda::GpuMat d_flowy; - cv::cuda::FastOpticalFlowBM fastBM; - - fastBM(loadMat(frame0_small), loadMat(frame1_small), d_flowx, d_flowy, search_window, block_window); - - cv::Mat_ flowx; - cv::Mat_ flowy; - FastOpticalFlowBM_gold(frame0_small, frame1_small, flowx, flowy, search_window, block_window); - - double err; - - err = calc_rmse(flowx, cv::Mat(d_flowx)); - EXPECT_LE(err, MAX_RMSE); - - err = calc_rmse(flowy, cv::Mat(d_flowy)); - EXPECT_LE(err, MAX_RMSE); -} - -INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FastOpticalFlowBM, ALL_DEVICES); - #endif // HAVE_CUDA From 381216aa5411fd20faf897cc8900c0358e7dc695 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:36:15 +0300 Subject: [PATCH 04/10] refactor cudaoptflow public API: * use opaque algorithm interfaces * add stream support --- .../include/opencv2/cudaoptflow.hpp | 397 ++++++------- modules/cudaoptflow/src/brox.cpp | 168 ++++-- modules/cudaoptflow/src/cuda/pyrlk.cu | 12 +- modules/cudaoptflow/src/cuda/tvl1flow.cu | 35 +- modules/cudaoptflow/src/farneback.cpp | 559 ++++++++++-------- modules/cudaoptflow/src/pyrlk.cpp | 350 +++++++---- modules/cudaoptflow/src/tvl1flow.cpp | 474 +++++++++------ 7 files changed, 1164 insertions(+), 831 deletions(-) diff --git a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp index 7882a8e622..6ea75594d2 100644 --- a/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp +++ b/modules/cudaoptflow/include/opencv2/cudaoptflow.hpp @@ -61,49 +61,94 @@ namespace cv { namespace cuda { //! @addtogroup cudaoptflow //! @{ -/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm -(@cite Brox2004). : +// +// Interface +// + +/** @brief Base interface for dense optical flow algorithms. */ -class CV_EXPORTS BroxOpticalFlow +class CV_EXPORTS DenseOpticalFlow : public Algorithm { public: - BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) : - alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_), - inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_) - { - } + /** @brief Calculates a dense optical flow. - //! Compute optical flow - //! frame0 - source frame (supports only CV_32FC1 type) - //! frame1 - frame to track (with the same size and type as frame0) - //! u - flow horizontal component (along x axis) - //! v - flow vertical component (along y axis) - void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null()); + @param I0 first input image. + @param I1 second input image of the same size and the same type as I0. + @param flow computed flow image that has the same size as I0 and type CV_32FC2. + @param stream Stream for the asynchronous version. + */ + virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream = Stream::Null()) = 0; +}; - //! flow smoothness - float alpha; +/** @brief Base interface for sparse optical flow algorithms. + */ +class CV_EXPORTS SparseOpticalFlow : public Algorithm +{ +public: + /** @brief Calculates a sparse optical flow. + + @param prevImg First input image. + @param nextImg Second input image of the same size and the same type as prevImg. + @param prevPts Vector of 2D points for which the flow needs to be found. + @param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image. + @param status Output status vector. Each element of the vector is set to 1 if the + flow for the corresponding features has been found. Otherwise, it is set to 0. + @param err Optional output vector that contains error response for each point (inverse confidence). + @param stream Stream for the asynchronous version. + */ + virtual void calc(InputArray prevImg, InputArray nextImg, + InputArray prevPts, InputOutputArray nextPts, + OutputArray status, + OutputArray err = cv::noArray(), + Stream& stream = Stream::Null()) = 0; +}; - //! gradient constancy importance - float gamma; +// +// BroxOpticalFlow +// - //! pyramid scale factor - float scale_factor; +/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm (@cite Brox2004). + */ +class CV_EXPORTS BroxOpticalFlow : public DenseOpticalFlow +{ +public: + virtual double getFlowSmoothness() const = 0; + virtual void setFlowSmoothness(double alpha) = 0; + + virtual double getGradientConstancyImportance() const = 0; + virtual void setGradientConstancyImportance(double gamma) = 0; + + virtual double getPyramidScaleFactor() const = 0; + virtual void setPyramidScaleFactor(double scale_factor) = 0; //! number of lagged non-linearity iterations (inner loop) - int inner_iterations; + virtual int getInnerIterations() const = 0; + virtual void setInnerIterations(int inner_iterations) = 0; //! number of warping iterations (number of pyramid levels) - int outer_iterations; + virtual int getOuterIterations() const = 0; + virtual void setOuterIterations(int outer_iterations) = 0; //! number of linear system solver iterations - int solver_iterations; - - GpuMat buf; + virtual int getSolverIterations() const = 0; + virtual void setSolverIterations(int solver_iterations) = 0; + + static Ptr create( + double alpha = 0.197, + double gamma = 50.0, + double scale_factor = 0.8, + int inner_iterations = 5, + int outer_iterations = 150, + int solver_iterations = 10); }; -/** @brief Class used for calculating an optical flow. +// +// PyrLKOpticalFlow +// + +/** @brief Class used for calculating a sparse optical flow. -The class can calculate an optical flow for a sparse feature set or dense optical flow using the +The class can calculate an optical flow for a sparse feature set using the iterative Lucas-Kanade method with pyramids. @sa calcOpticalFlowPyrLK @@ -112,158 +157,116 @@ iterative Lucas-Kanade method with pyramids. - An example of the Lucas Kanade optical flow algorithm can be found at opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp */ -class CV_EXPORTS PyrLKOpticalFlow +class CV_EXPORTS SparsePyrLKOpticalFlow : public SparseOpticalFlow { public: - PyrLKOpticalFlow(); - - /** @brief Calculate an optical flow for a sparse feature set. - - @param prevImg First 8-bit input image (supports both grayscale and color images). - @param nextImg Second input image of the same size and the same type as prevImg . - @param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix - with CV_32FC2 type. - @param nextPts Output vector of 2D points (with single-precision floating-point coordinates) - containing the calculated new positions of input features in the second image. When useInitialFlow - is true, the vector must have the same size as in the input. - @param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the - flow for the corresponding features has been found. Otherwise, it is set to 0. - @param err Output vector (CV_32FC1 type) that contains the difference between patches around the - original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not - needed. - - @sa calcOpticalFlowPyrLK - */ - void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, - GpuMat& status, GpuMat* err = 0); - - /** @brief Calculate dense optical flow. - - @param prevImg First 8-bit grayscale input image. - @param nextImg Second input image of the same size and the same type as prevImg . - @param u Horizontal component of the optical flow of the same size as input images, 32-bit - floating-point, single-channel - @param v Vertical component of the optical flow of the same size as input images, 32-bit - floating-point, single-channel - @param err Output vector (CV_32FC1 type) that contains the difference between patches around the - original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not - needed. - */ - void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0); - - /** @brief Releases inner buffers memory. - */ - void releaseMemory(); + virtual Size getWinSize() const = 0; + virtual void setWinSize(Size winSize) = 0; - Size winSize; - int maxLevel; - int iters; - bool useInitialFlow; + virtual int getMaxLevel() const = 0; + virtual void setMaxLevel(int maxLevel) = 0; -private: - std::vector prevPyr_; - std::vector nextPyr_; + virtual int getNumIters() const = 0; + virtual void setNumIters(int iters) = 0; - GpuMat buf_; + virtual bool getUseInitialFlow() const = 0; + virtual void setUseInitialFlow(bool useInitialFlow) = 0; - GpuMat uPyr_[2]; - GpuMat vPyr_[2]; + static Ptr create( + Size winSize = Size(21, 21), + int maxLevel = 3, + int iters = 30, + bool useInitialFlow = false); }; -/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. : +/** @brief Class used for calculating a dense optical flow. + +The class can calculate an optical flow for a dense optical flow using the +iterative Lucas-Kanade method with pyramids. */ -class CV_EXPORTS FarnebackOpticalFlow +class CV_EXPORTS DensePyrLKOpticalFlow : public DenseOpticalFlow { public: - FarnebackOpticalFlow() - { - numLevels = 5; - pyrScale = 0.5; - fastPyramids = false; - winSize = 13; - numIters = 10; - polyN = 5; - polySigma = 1.1; - flags = 0; - } - - int numLevels; - double pyrScale; - bool fastPyramids; - int winSize; - int numIters; - int polyN; - double polySigma; - int flags; - - /** @brief Computes a dense optical flow using the Gunnar Farneback’s algorithm. - - @param frame0 First 8-bit gray-scale input image - @param frame1 Second 8-bit gray-scale input image - @param flowx Flow horizontal component - @param flowy Flow vertical component - @param s Stream - - @sa calcOpticalFlowFarneback - */ - void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null()); + virtual Size getWinSize() const = 0; + virtual void setWinSize(Size winSize) = 0; - /** @brief Releases unused auxiliary memory buffers. - */ - void releaseMemory() - { - frames_[0].release(); - frames_[1].release(); - pyrLevel_[0].release(); - pyrLevel_[1].release(); - M_.release(); - bufM_.release(); - R_[0].release(); - R_[1].release(); - blurredFrame_[0].release(); - blurredFrame_[1].release(); - pyramid0_.clear(); - pyramid1_.clear(); - } - -private: - void prepareGaussian( - int n, double sigma, float *g, float *xg, float *xxg, - double &ig11, double &ig03, double &ig33, double &ig55); - - void setPolynomialExpansionConsts(int n, double sigma); - - void updateFlow_boxFilter( - const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, - GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); - - void updateFlow_gaussianBlur( - const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, - GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); - - GpuMat frames_[2]; - GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; - std::vector pyramid0_, pyramid1_; + virtual int getMaxLevel() const = 0; + virtual void setMaxLevel(int maxLevel) = 0; + + virtual int getNumIters() const = 0; + virtual void setNumIters(int iters) = 0; + + virtual bool getUseInitialFlow() const = 0; + virtual void setUseInitialFlow(bool useInitialFlow) = 0; + + static Ptr create( + Size winSize = Size(13, 13), + int maxLevel = 3, + int iters = 30, + bool useInitialFlow = false); }; -// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method // -// see reference: -// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". -// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". -class CV_EXPORTS OpticalFlowDual_TVL1_CUDA +// FarnebackOpticalFlow +// + +/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. + */ +class CV_EXPORTS FarnebackOpticalFlow : public DenseOpticalFlow { public: - OpticalFlowDual_TVL1_CUDA(); + virtual int getNumLevels() const = 0; + virtual void setNumLevels(int numLevels) = 0; + + virtual double getPyrScale() const = 0; + virtual void setPyrScale(double pyrScale) = 0; + + virtual bool getFastPyramids() const = 0; + virtual void setFastPyramids(bool fastPyramids) = 0; - void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy); + virtual int getWinSize() const = 0; + virtual void setWinSize(int winSize) = 0; - void collectGarbage(); + virtual int getNumIters() const = 0; + virtual void setNumIters(int numIters) = 0; + + virtual int getPolyN() const = 0; + virtual void setPolyN(int polyN) = 0; + + virtual double getPolySigma() const = 0; + virtual void setPolySigma(double polySigma) = 0; + + virtual int getFlags() const = 0; + virtual void setFlags(int flags) = 0; + + static Ptr create( + int numLevels = 5, + double pyrScale = 0.5, + bool fastPyramids = false, + int winSize = 13, + int numIters = 10, + int polyN = 5, + double polySigma = 1.1, + int flags = 0); +}; + +// +// OpticalFlowDual_TVL1 +// +/** @brief Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method. + * + * @sa C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". + * @sa Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". + */ +class CV_EXPORTS OpticalFlowDual_TVL1 : public DenseOpticalFlow +{ +public: /** * Time step of the numerical scheme. */ - double tau; + virtual double getTau() const = 0; + virtual void setTau(double tau) = 0; /** * Weight parameter for the data term, attachment parameter. @@ -271,7 +274,8 @@ public: * The smaller this parameter is, the smoother the solutions we obtain. * It depends on the range of motions of the images, so its value should be adapted to each image sequence. */ - double lambda; + virtual double getLambda() const = 0; + virtual void setLambda(double lambda) = 0; /** * Weight parameter for (u - v)^2, tightness parameter. @@ -279,20 +283,23 @@ public: * In theory, it should have a small value in order to maintain both parts in correspondence. * The method is stable for a large range of values of this parameter. */ + virtual double getGamma() const = 0; + virtual void setGamma(double gamma) = 0; - double gamma; /** - * parameter used for motion estimation. It adds a variable allowing for illumination variations - * Set this parameter to 1. if you have varying illumination. - * See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging - * Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145 - */ - double theta; + * parameter used for motion estimation. It adds a variable allowing for illumination variations + * Set this parameter to 1. if you have varying illumination. + * See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging + * Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145 + */ + virtual double getTheta() const = 0; + virtual void setTheta(double theta) = 0; /** * Number of scales used to create the pyramid of images. */ - int nscales; + virtual int getNumScales() const = 0; + virtual void setNumScales(int nscales) = 0; /** * Number of warpings per scale. @@ -300,51 +307,39 @@ public: * This is a parameter that assures the stability of the method. * It also affects the running time, so it is a compromise between speed and accuracy. */ - int warps; + virtual int getNumWarps() const = 0; + virtual void setNumWarps(int warps) = 0; /** * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. * A small value will yield more accurate solutions at the expense of a slower convergence. */ - double epsilon; + virtual double getEpsilon() const = 0; + virtual void setEpsilon(double epsilon) = 0; /** * Stopping criterion iterations number used in the numerical scheme. */ - int iterations; - - double scaleStep; - - bool useInitialFlow; - -private: - void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3); - - std::vector I0s; - std::vector I1s; - std::vector u1s; - std::vector u2s; - std::vector u3s; - - GpuMat I1x_buf; - GpuMat I1y_buf; - - GpuMat I1w_buf; - GpuMat I1wx_buf; - GpuMat I1wy_buf; - - GpuMat grad_buf; - GpuMat rho_c_buf; - - GpuMat p11_buf; - GpuMat p12_buf; - GpuMat p21_buf; - GpuMat p22_buf; - GpuMat p31_buf; - GpuMat p32_buf; - - GpuMat diff_buf; - GpuMat norm_buf; + virtual int getNumIterations() const = 0; + virtual void setNumIterations(int iterations) = 0; + + virtual double getScaleStep() const = 0; + virtual void setScaleStep(double scaleStep) = 0; + + virtual bool getUseInitialFlow() const = 0; + virtual void setUseInitialFlow(bool useInitialFlow) = 0; + + static Ptr create( + double tau = 0.25, + double lambda = 0.15, + double theta = 0.3, + int nscales = 5, + int warps = 5, + double epsilon = 0.01, + int iterations = 300, + double scaleStep = 0.8, + double gamma = 0.0, + bool useInitialFlow = false); }; //! @} diff --git a/modules/cudaoptflow/src/brox.cpp b/modules/cudaoptflow/src/brox.cpp index 39eae9a8ab..11c541906b 100644 --- a/modules/cudaoptflow/src/brox.cpp +++ b/modules/cudaoptflow/src/brox.cpp @@ -47,84 +47,148 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER) -void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::cuda::BroxOpticalFlow::create(double, double, double, int, int, int) { throw_no_cuda(); return Ptr(); } #else -namespace -{ - size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix& frame0, const NCVMatrix& frame1, - NCVMatrix& u, NCVMatrix& v, const cudaDeviceProp& devProp) +namespace { + + class BroxOpticalFlowImpl : public BroxOpticalFlow { - NCVMemStackAllocator gpuCounter(static_cast(devProp.textureAlignment)); + public: + BroxOpticalFlowImpl(double alpha, double gamma, double scale_factor, + int inner_iterations, int outer_iterations, int solver_iterations) : + alpha_(alpha), gamma_(gamma), scale_factor_(scale_factor), + inner_iterations_(inner_iterations), outer_iterations_(outer_iterations), + solver_iterations_(solver_iterations) + { + } + + virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream); + + virtual double getFlowSmoothness() const { return alpha_; } + virtual void setFlowSmoothness(double alpha) { alpha_ = static_cast(alpha); } + + virtual double getGradientConstancyImportance() const { return gamma_; } + virtual void setGradientConstancyImportance(double gamma) { gamma_ = static_cast(gamma); } + + virtual double getPyramidScaleFactor() const { return scale_factor_; } + virtual void setPyramidScaleFactor(double scale_factor) { scale_factor_ = static_cast(scale_factor); } + + //! number of lagged non-linearity iterations (inner loop) + virtual int getInnerIterations() const { return inner_iterations_; } + virtual void setInnerIterations(int inner_iterations) { inner_iterations_ = inner_iterations; } + + //! number of warping iterations (number of pyramid levels) + virtual int getOuterIterations() const { return outer_iterations_; } + virtual void setOuterIterations(int outer_iterations) { outer_iterations_ = outer_iterations; } + + //! number of linear system solver iterations + virtual int getSolverIterations() const { return solver_iterations_; } + virtual void setSolverIterations(int solver_iterations) { solver_iterations_ = solver_iterations; } + + private: + //! flow smoothness + float alpha_; + + //! gradient constancy importance + float gamma_; + + //! pyramid scale factor + float scale_factor_; + + //! number of lagged non-linearity iterations (inner loop) + int inner_iterations_; + + //! number of warping iterations (number of pyramid levels) + int outer_iterations_; + + //! number of linear system solver iterations + int solver_iterations_; + }; + + static size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, + const NCVMatrix& frame0, const NCVMatrix& frame1, + NCVMatrix& u, NCVMatrix& v, + size_t textureAlignment) + { + NCVMemStackAllocator gpuCounter(static_cast(textureAlignment)); ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) ); return gpuCounter.maxSize(); } -} -namespace -{ - static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); } -} + static void outputHandler(const String &msg) + { + CV_Error(cv::Error::GpuApiCallError, msg.c_str()); + } -void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s) -{ - ncvSetDebugOutputHandler(outputHandler); + void BroxOpticalFlowImpl::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow, Stream& stream) + { + const GpuMat frame0 = _I0.getGpuMat(); + const GpuMat frame1 = _I1.getGpuMat(); - CV_Assert(frame0.type() == CV_32FC1); - CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type()); + CV_Assert( frame0.type() == CV_32FC1 ); + CV_Assert( frame1.size() == frame0.size() && frame1.type() == frame0.type() ); - u.create(frame0.size(), CV_32FC1); - v.create(frame0.size(), CV_32FC1); + ncvSetDebugOutputHandler(outputHandler); - cudaDeviceProp devProp; - cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) ); + BufferPool pool(stream); + GpuMat u = pool.getBuffer(frame0.size(), CV_32FC1); + GpuMat v = pool.getBuffer(frame0.size(), CV_32FC1); - NCVBroxOpticalFlowDescriptor desc; + NCVBroxOpticalFlowDescriptor desc; + desc.alpha = alpha_; + desc.gamma = gamma_; + desc.scale_factor = scale_factor_; + desc.number_of_inner_iterations = inner_iterations_; + desc.number_of_outer_iterations = outer_iterations_; + desc.number_of_solver_iterations = solver_iterations_; - desc.alpha = alpha; - desc.gamma = gamma; - desc.scale_factor = scale_factor; - desc.number_of_inner_iterations = inner_iterations; - desc.number_of_outer_iterations = outer_iterations; - desc.number_of_solver_iterations = solver_iterations; + NCVMemSegment frame0MemSeg; + frame0MemSeg.begin.memtype = NCVMemoryTypeDevice; + frame0MemSeg.begin.ptr = const_cast(frame0.data); + frame0MemSeg.size = frame0.step * frame0.rows; - NCVMemSegment frame0MemSeg; - frame0MemSeg.begin.memtype = NCVMemoryTypeDevice; - frame0MemSeg.begin.ptr = const_cast(frame0.data); - frame0MemSeg.size = frame0.step * frame0.rows; + NCVMemSegment frame1MemSeg; + frame1MemSeg.begin.memtype = NCVMemoryTypeDevice; + frame1MemSeg.begin.ptr = const_cast(frame1.data); + frame1MemSeg.size = frame1.step * frame1.rows; - NCVMemSegment frame1MemSeg; - frame1MemSeg.begin.memtype = NCVMemoryTypeDevice; - frame1MemSeg.begin.ptr = const_cast(frame1.data); - frame1MemSeg.size = frame1.step * frame1.rows; + NCVMemSegment uMemSeg; + uMemSeg.begin.memtype = NCVMemoryTypeDevice; + uMemSeg.begin.ptr = u.ptr(); + uMemSeg.size = u.step * u.rows; - NCVMemSegment uMemSeg; - uMemSeg.begin.memtype = NCVMemoryTypeDevice; - uMemSeg.begin.ptr = u.ptr(); - uMemSeg.size = u.step * u.rows; + NCVMemSegment vMemSeg; + vMemSeg.begin.memtype = NCVMemoryTypeDevice; + vMemSeg.begin.ptr = v.ptr(); + vMemSeg.size = v.step * v.rows; - NCVMemSegment vMemSeg; - vMemSeg.begin.memtype = NCVMemoryTypeDevice; - vMemSeg.begin.ptr = v.ptr(); - vMemSeg.size = v.step * v.rows; + DeviceInfo devInfo; + size_t textureAlignment = devInfo.textureAlignment(); - NCVMatrixReuse frame0Mat(frame0MemSeg, static_cast(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast(frame0.step)); - NCVMatrixReuse frame1Mat(frame1MemSeg, static_cast(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast(frame1.step)); - NCVMatrixReuse uMat(uMemSeg, static_cast(devProp.textureAlignment), u.cols, u.rows, static_cast(u.step)); - NCVMatrixReuse vMat(vMemSeg, static_cast(devProp.textureAlignment), v.cols, v.rows, static_cast(v.step)); + NCVMatrixReuse frame0Mat(frame0MemSeg, static_cast(textureAlignment), frame0.cols, frame0.rows, static_cast(frame0.step)); + NCVMatrixReuse frame1Mat(frame1MemSeg, static_cast(textureAlignment), frame1.cols, frame1.rows, static_cast(frame1.step)); + NCVMatrixReuse uMat(uMemSeg, static_cast(textureAlignment), u.cols, u.rows, static_cast(u.step)); + NCVMatrixReuse vMat(vMemSeg, static_cast(textureAlignment), v.cols, v.rows, static_cast(v.step)); - cudaStream_t stream = StreamAccessor::getStream(s); + size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, textureAlignment); + GpuMat buf = pool.getBuffer(1, static_cast(bufSize), CV_8UC1); - size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp); + NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast(textureAlignment), buf.ptr()); - ensureSizeIsEnough(1, static_cast(bufSize), CV_8UC1, buf); + ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, StreamAccessor::getStream(stream)) ); - NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast(devProp.textureAlignment), buf.ptr()); + GpuMat flows[] = {u, v}; + cuda::merge(flows, 2, _flow, stream); + } +} - ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) ); +Ptr cv::cuda::BroxOpticalFlow::create(double alpha, double gamma, double scale_factor, int inner_iterations, int outer_iterations, int solver_iterations) +{ + return makePtr(alpha, gamma, scale_factor, inner_iterations, outer_iterations, solver_iterations); } #endif /* HAVE_CUDA */ diff --git a/modules/cudaoptflow/src/cuda/pyrlk.cu b/modules/cudaoptflow/src/cuda/pyrlk.cu index d4606f2281..7693551fca 100644 --- a/modules/cudaoptflow/src/cuda/pyrlk.cu +++ b/modules/cudaoptflow/src/cuda/pyrlk.cu @@ -472,16 +472,16 @@ namespace pyrlk } } - void loadConstants(int2 winSize, int iters) + void loadConstants(int2 winSize, int iters, cudaStream_t stream) { - cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) ); + cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) ); int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2); - cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) ); + cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) ); - cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) ); + cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) ); } void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, diff --git a/modules/cudaoptflow/src/cuda/tvl1flow.cu b/modules/cudaoptflow/src/cuda/tvl1flow.cu index 2b66c972bc..66f0d664a0 100644 --- a/modules/cudaoptflow/src/cuda/tvl1flow.cu +++ b/modules/cudaoptflow/src/cuda/tvl1flow.cu @@ -66,15 +66,16 @@ namespace tvl1flow dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x)); } - void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy) + void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream) { const dim3 block(32, 8); const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); - centeredGradientKernel<<>>(src, dx, dy); + centeredGradientKernel<<>>(src, dx, dy); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + if (!stream) + cudaSafeCall( cudaDeviceSynchronize() ); } } @@ -164,7 +165,10 @@ namespace tvl1flow rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; } - void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho) + void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, + PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, + PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho, + cudaStream_t stream) { const dim3 block(32, 8); const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y)); @@ -173,10 +177,11 @@ namespace tvl1flow bindTexture(&tex_I1x, I1x); bindTexture(&tex_I1y, I1y); - warpBackwardKernel<<>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); + warpBackwardKernel<<>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + if (!stream) + cudaSafeCall( cudaDeviceSynchronize() ); } } @@ -292,15 +297,17 @@ namespace tvl1flow PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error, - float l_t, float theta, float gamma, bool calcError) + float l_t, float theta, float gamma, bool calcError, + cudaStream_t stream) { const dim3 block(32, 8); const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); - estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError); + estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + if (!stream) + cudaSafeCall( cudaDeviceSynchronize() ); } } @@ -346,15 +353,19 @@ namespace tvl1flow } } - void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, float gamma) + void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, + PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, + float taut, float gamma, + cudaStream_t stream) { const dim3 block(32, 8); const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y)); - estimateDualVariablesKernel<<>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma); + estimateDualVariablesKernel<<>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + if (!stream) + cudaSafeCall( cudaDeviceSynchronize() ); } } diff --git a/modules/cudaoptflow/src/farneback.cpp b/modules/cudaoptflow/src/farneback.cpp index 6b74432632..b7fefeb191 100644 --- a/modules/cudaoptflow/src/farneback.cpp +++ b/modules/cudaoptflow/src/farneback.cpp @@ -42,23 +42,21 @@ #include "precomp.hpp" -#define MIN_SIZE 32 - -#define S(x) StreamAccessor::getStream(x) - -// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag -// leads to an inefficient code. It's for debug purposes only. -#define ENABLE_CUDA_RESIZE 1 - using namespace cv; using namespace cv::cuda; #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -void cv::cuda::FarnebackOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr(); } #else +#define MIN_SIZE 32 + +// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag +// leads to an inefficient code. It's for debug purposes only. +#define ENABLE_CUDA_RESIZE 1 + namespace cv { namespace cuda { namespace device { namespace optflow_farneback { void setPolynomialExpansionConsts( @@ -76,8 +74,6 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback void updateFlowGpu( const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream); - /*void boxFilterGpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);*/ - void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream); void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream); @@ -93,296 +89,381 @@ namespace cv { namespace cuda { namespace device { namespace optflow_farneback void gaussianBlur5Gpu_CC11( const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream); -}}}} // namespace cv { namespace cuda { namespace cudev { namespace optflow_farneback +}}}} namespace { - GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) + class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow { - if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) - return mat(Rect(0, 0, cols, rows)); + public: + FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize, + int numIters, int polyN, double polySigma, int flags) : + numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize), + numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags) + { + } - return mat = GpuMat(rows, cols, type); - } -} + virtual int getNumLevels() const { return numLevels_; } + virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; } -void cv::cuda::FarnebackOpticalFlow::prepareGaussian( - int n, double sigma, float *g, float *xg, float *xxg, - double &ig11, double &ig03, double &ig33, double &ig55) -{ - double s = 0.; - for (int x = -n; x <= n; x++) - { - g[x] = (float)std::exp(-x*x/(2*sigma*sigma)); - s += g[x]; - } + virtual double getPyrScale() const { return pyrScale_; } + virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; } - s = 1./s; - for (int x = -n; x <= n; x++) - { - g[x] = (float)(g[x]*s); - xg[x] = (float)(x*g[x]); - xxg[x] = (float)(x*x*g[x]); - } + virtual bool getFastPyramids() const { return fastPyramids_; } + virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; } - Mat_ G(6, 6); - G.setTo(0); + virtual int getWinSize() const { return winSize_; } + virtual void setWinSize(int winSize) { winSize_ = winSize; } - for (int y = -n; y <= n; y++) - { - for (int x = -n; x <= n; x++) - { - G(0,0) += g[y]*g[x]; - G(1,1) += g[y]*g[x]*x*x; - G(3,3) += g[y]*g[x]*x*x*x*x; - G(5,5) += g[y]*g[x]*x*x*y*y; - } - } + virtual int getNumIters() const { return numIters_; } + virtual void setNumIters(int numIters) { numIters_ = numIters; } - //G[0][0] = 1.; - G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1); - G(4,4) = G(3,3); - G(3,4) = G(4,3) = G(5,5); - - // invG: - // [ x e e ] - // [ y ] - // [ y ] - // [ e z ] - // [ e z ] - // [ u ] - Mat_ invG = G.inv(DECOMP_CHOLESKY); - - ig11 = invG(1,1); - ig03 = invG(0,3); - ig33 = invG(3,3); - ig55 = invG(5,5); -} + virtual int getPolyN() const { return polyN_; } + virtual void setPolyN(int polyN) { polyN_ = polyN; } + virtual double getPolySigma() const { return polySigma_; } + virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; } -void cv::cuda::FarnebackOpticalFlow::setPolynomialExpansionConsts(int n, double sigma) -{ - std::vector buf(n*6 + 3); - float* g = &buf[0] + n; - float* xg = g + n*2 + 1; - float* xxg = xg + n*2 + 1; + virtual int getFlags() const { return flags_; } + virtual void setFlags(int flags) { flags_ = flags; } - if (sigma < FLT_EPSILON) - sigma = n*0.3; + virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream); - double ig11, ig03, ig33, ig55; - prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55); + private: + int numLevels_; + double pyrScale_; + bool fastPyramids_; + int winSize_; + int numIters_; + int polyN_; + double polySigma_; + int flags_; - device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast(ig11), static_cast(ig03), static_cast(ig33), static_cast(ig55)); -} + private: + void prepareGaussian( + int n, double sigma, float *g, float *xg, float *xxg, + double &ig11, double &ig03, double &ig33, double &ig55); + void setPolynomialExpansionConsts(int n, double sigma); -void cv::cuda::FarnebackOpticalFlow::updateFlow_boxFilter( - const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, - GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) -{ - if (deviceSupports(FEATURE_SET_COMPUTE_12)) - device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, S(streams[0])); - else - device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, S(streams[0])); - swap(M, bufM); - - for (int i = 1; i < 5; ++i) - streams[i].waitForCompletion(); - device::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0])); - - if (updateMatrices) - device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0])); -} + void updateFlow_boxFilter( + const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, + GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); + void updateFlow_gaussianBlur( + const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, + GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]); -void cv::cuda::FarnebackOpticalFlow::updateFlow_gaussianBlur( - const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, - GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) -{ - if (deviceSupports(FEATURE_SET_COMPUTE_12)) - device::optflow_farneback::gaussianBlur5Gpu( - M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0])); - else - device::optflow_farneback::gaussianBlur5Gpu_CC11( - M, blockSize/2, bufM, BORDER_REPLICATE, S(streams[0])); - swap(M, bufM); - - device::optflow_farneback::updateFlowGpu(M, flowx, flowy, S(streams[0])); - - if (updateMatrices) - device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, S(streams[0])); -} + void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream); + GpuMat frames_[2]; + GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2]; + std::vector pyramid0_, pyramid1_; + }; -void cv::cuda::FarnebackOpticalFlow::operator ()( - const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s) -{ - CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); - CV_Assert(frame0.size() == frame1.size()); - CV_Assert(polyN == 5 || polyN == 7); - CV_Assert(!fastPyramids || std::abs(pyrScale - 0.5) < 1e-6); - - Stream streams[5]; - if (S(s)) - streams[0] = s; - - Size size = frame0.size(); - GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY; - - flowx.create(size, CV_32F); - flowy.create(size, CV_32F); - GpuMat flowx0 = flowx; - GpuMat flowy0 = flowy; - - // Crop unnecessary levels - double scale = 1; - int numLevelsCropped = 0; - for (; numLevelsCropped < numLevels; numLevelsCropped++) + void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream) { - scale *= pyrScale; - if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE) - break; + const GpuMat frame0 = _frame0.getGpuMat(); + const GpuMat frame1 = _frame1.getGpuMat(); + + BufferPool pool(stream); + GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1); + GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1); + + calcImpl(frame0, frame1, flowx, flowy, stream); + + GpuMat flows[] = {flowx, flowy}; + cuda::merge(flows, 2, _flow, stream); } - frame0.convertTo(frames_[0], CV_32F, streams[0]); - frame1.convertTo(frames_[1], CV_32F, streams[1]); + GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat) + { + if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols) + return mat(Rect(0, 0, cols, rows)); + + return mat = GpuMat(rows, cols, type); + } - if (fastPyramids) + void FarnebackOpticalFlowImpl::prepareGaussian( + int n, double sigma, float *g, float *xg, float *xxg, + double &ig11, double &ig03, double &ig33, double &ig55) { - // Build Gaussian pyramids using pyrDown() - pyramid0_.resize(numLevelsCropped + 1); - pyramid1_.resize(numLevelsCropped + 1); - pyramid0_[0] = frames_[0]; - pyramid1_[0] = frames_[1]; - for (int i = 1; i <= numLevelsCropped; ++i) + double s = 0.; + for (int x = -n; x <= n; x++) { - cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]); - cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]); + g[x] = (float)std::exp(-x*x/(2*sigma*sigma)); + s += g[x]; + } + + s = 1./s; + for (int x = -n; x <= n; x++) + { + g[x] = (float)(g[x]*s); + xg[x] = (float)(x*g[x]); + xxg[x] = (float)(x*x*g[x]); + } + + Mat_ G(6, 6); + G.setTo(0); + + for (int y = -n; y <= n; y++) + { + for (int x = -n; x <= n; x++) + { + G(0,0) += g[y]*g[x]; + G(1,1) += g[y]*g[x]*x*x; + G(3,3) += g[y]*g[x]*x*x*x*x; + G(5,5) += g[y]*g[x]*x*x*y*y; + } } + + //G[0][0] = 1.; + G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1); + G(4,4) = G(3,3); + G(3,4) = G(4,3) = G(5,5); + + // invG: + // [ x e e ] + // [ y ] + // [ y ] + // [ e z ] + // [ e z ] + // [ u ] + Mat_ invG = G.inv(DECOMP_CHOLESKY); + + ig11 = invG(1,1); + ig03 = invG(0,3); + ig33 = invG(3,3); + ig55 = invG(5,5); } - setPolynomialExpansionConsts(polyN, polySigma); - device::optflow_farneback::setUpdateMatricesConsts(); + void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma) + { + std::vector buf(n*6 + 3); + float* g = &buf[0] + n; + float* xg = g + n*2 + 1; + float* xxg = xg + n*2 + 1; + + if (sigma < FLT_EPSILON) + sigma = n*0.3; + + double ig11, ig03, ig33, ig55; + prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55); - for (int k = numLevelsCropped; k >= 0; k--) + device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast(ig11), static_cast(ig03), static_cast(ig33), static_cast(ig55)); + } + + void FarnebackOpticalFlowImpl::updateFlow_boxFilter( + const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy, + GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) { - streams[0].waitForCompletion(); + if (deviceSupports(FEATURE_SET_COMPUTE_12)) + device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0])); + else + device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0])); + swap(M, bufM); - scale = 1; - for (int i = 0; i < k; i++) - scale *= pyrScale; + for (int i = 1; i < 5; ++i) + streams[i].waitForCompletion(); + device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0])); - double sigma = (1./scale - 1) * 0.5; - int smoothSize = cvRound(sigma*5) | 1; - smoothSize = std::max(smoothSize, 3); + if (updateMatrices) + device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0])); + } - int width = cvRound(size.width*scale); - int height = cvRound(size.height*scale); + void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur( + const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy, + GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]) + { + if (deviceSupports(FEATURE_SET_COMPUTE_12)) + device::optflow_farneback::gaussianBlur5Gpu( + M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0])); + else + device::optflow_farneback::gaussianBlur5Gpu_CC11( + M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0])); + swap(M, bufM); - if (fastPyramids) - { - width = pyramid0_[k].cols; - height = pyramid0_[k].rows; - } + device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0])); - if (k > 0) + if (updateMatrices) + device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0])); + } + + void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream) + { + CV_Assert(frame0.channels() == 1 && frame1.channels() == 1); + CV_Assert(frame0.size() == frame1.size()); + CV_Assert(polyN_ == 5 || polyN_ == 7); + CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6); + + Stream streams[5]; + if (stream) + streams[0] = stream; + + Size size = frame0.size(); + GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY; + + flowx.create(size, CV_32F); + flowy.create(size, CV_32F); + GpuMat flowx0 = flowx; + GpuMat flowy0 = flowy; + + // Crop unnecessary levels + double scale = 1; + int numLevelsCropped = 0; + for (; numLevelsCropped < numLevels_; numLevelsCropped++) { - curFlowX.create(height, width, CV_32F); - curFlowY.create(height, width, CV_32F); + scale *= pyrScale_; + if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE) + break; } - else + + frame0.convertTo(frames_[0], CV_32F, streams[0]); + frame1.convertTo(frames_[1], CV_32F, streams[1]); + + if (fastPyramids_) { - curFlowX = flowx0; - curFlowY = flowy0; + // Build Gaussian pyramids using pyrDown() + pyramid0_.resize(numLevelsCropped + 1); + pyramid1_.resize(numLevelsCropped + 1); + pyramid0_[0] = frames_[0]; + pyramid1_[0] = frames_[1]; + for (int i = 1; i <= numLevelsCropped; ++i) + { + cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]); + cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]); + } } - if (!prevFlowX.data) + setPolynomialExpansionConsts(polyN_, polySigma_); + device::optflow_farneback::setUpdateMatricesConsts(); + + for (int k = numLevelsCropped; k >= 0; k--) { - if (flags & OPTFLOW_USE_INITIAL_FLOW) + streams[0].waitForCompletion(); + + scale = 1; + for (int i = 0; i < k; i++) + scale *= pyrScale_; + + double sigma = (1./scale - 1) * 0.5; + int smoothSize = cvRound(sigma*5) | 1; + smoothSize = std::max(smoothSize, 3); + + int width = cvRound(size.width*scale); + int height = cvRound(size.height*scale); + + if (fastPyramids_) { - cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); - cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); - curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]); - curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]); + width = pyramid0_[k].cols; + height = pyramid0_[k].rows; + } + + if (k > 0) + { + curFlowX.create(height, width, CV_32F); + curFlowY.create(height, width, CV_32F); } else { - curFlowX.setTo(0, streams[0]); - curFlowY.setTo(0, streams[1]); + curFlowX = flowx0; + curFlowY = flowy0; } - } - else - { - cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); - cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); - curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale, streams[0]); - curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale, streams[1]); - } - GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_); - GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_); - GpuMat R[2] = - { - allocMatFromBuf(5*height, width, CV_32F, R_[0]), - allocMatFromBuf(5*height, width, CV_32F, R_[1]) - }; + if (!prevFlowX.data) + { + if (flags_ & OPTFLOW_USE_INITIAL_FLOW) + { + cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); + cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); + curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]); + curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]); + } + else + { + curFlowX.setTo(0, streams[0]); + curFlowY.setTo(0, streams[1]); + } + } + else + { + cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]); + cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]); + curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]); + curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]); + } - if (fastPyramids) - { - device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN, R[0], S(streams[0])); - device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN, R[1], S(streams[1])); - } - else - { - GpuMat blurredFrame[2] = + GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_); + GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_); + GpuMat R[2] = { - allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]), - allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1]) + allocMatFromBuf(5*height, width, CV_32F, R_[0]), + allocMatFromBuf(5*height, width, CV_32F, R_[1]) }; - GpuMat pyrLevel[2] = + + if (fastPyramids_) { - allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]), - allocMatFromBuf(height, width, CV_32F, pyrLevel_[1]) - }; + device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0])); + device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1])); + } + else + { + GpuMat blurredFrame[2] = + { + allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]), + allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1]) + }; + GpuMat pyrLevel[2] = + { + allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]), + allocMatFromBuf(height, width, CV_32F, pyrLevel_[1]) + }; + + Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); + device::optflow_farneback::setGaussianBlurKernel(g.ptr(smoothSize/2), smoothSize/2); + + for (int i = 0; i < 2; i++) + { + device::optflow_farneback::gaussianBlurGpu( + frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i])); + cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]); + device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i])); + } + } - Mat g = getGaussianKernel(smoothSize, sigma, CV_32F); - device::optflow_farneback::setGaussianBlurKernel(g.ptr(smoothSize/2), smoothSize/2); + streams[1].waitForCompletion(); + device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0])); - for (int i = 0; i < 2; i++) + if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN) { - device::optflow_farneback::gaussianBlurGpu( - frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, S(streams[i])); - cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]); - device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN, R[i], S(streams[i])); + Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F); + device::optflow_farneback::setGaussianBlurKernel(g.ptr(winSize_/2), winSize_/2); + } + for (int i = 0; i < numIters_; i++) + { + if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN) + updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams); + else + updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams); } - } - - streams[1].waitForCompletion(); - device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, S(streams[0])); - if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) - { - Mat g = getGaussianKernel(winSize, winSize/2*0.3f, CV_32F); - device::optflow_farneback::setGaussianBlurKernel(g.ptr(winSize/2), winSize/2); - } - for (int i = 0; i < numIters; i++) - { - if (flags & OPTFLOW_FARNEBACK_GAUSSIAN) - updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams); - else - updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize, i < numIters-1, streams); + prevFlowX = curFlowX; + prevFlowY = curFlowY; } - prevFlowX = curFlowX; - prevFlowY = curFlowY; - } + flowx = curFlowX; + flowy = curFlowY; - flowx = curFlowX; - flowy = curFlowY; + if (!stream) + streams[0].waitForCompletion(); + } +} - if (!S(s)) - streams[0].waitForCompletion(); +Ptr cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize, + int numIters, int polyN, double polySigma, int flags) +{ + return makePtr(numLevels, pyrScale, fastPyramids, winSize, + numIters, polyN, polySigma, flags); } #endif diff --git a/modules/cudaoptflow/src/pyrlk.cpp b/modules/cudaoptflow/src/pyrlk.cpp index 52ee91f2fe..f4182743c0 100644 --- a/modules/cudaoptflow/src/pyrlk.cpp +++ b/modules/cudaoptflow/src/pyrlk.cpp @@ -47,37 +47,54 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); } -void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); } -void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); } -void cv::cuda::PyrLKOpticalFlow::releaseMemory() {} +Ptr cv::cuda::SparsePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr(); } + +Ptr cv::cuda::DensePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ namespace pyrlk { - void loadConstants(int2 winSize, int iters); + void loadConstants(int2 winSize, int iters, cudaStream_t stream); void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, - int level, dim3 block, dim3 patch, cudaStream_t stream = 0); + int level, dim3 block, dim3 patch, cudaStream_t stream); void sparse4(PtrStepSz I, PtrStepSz J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount, - int level, dim3 block, dim3 patch, cudaStream_t stream = 0); + int level, dim3 block, dim3 patch, cudaStream_t stream); void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV, - PtrStepSzf err, int2 winSize, cudaStream_t stream = 0); -} - -cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow() -{ - winSize = Size(21, 21); - maxLevel = 3; - iters = 30; - useInitialFlow = false; + PtrStepSzf err, int2 winSize, cudaStream_t stream); } namespace { - void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch) + class PyrLKOpticalFlowBase + { + public: + PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow); + + void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, + GpuMat& status, GpuMat* err, Stream& stream); + + void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream); + + protected: + Size winSize_; + int maxLevel_; + int iters_; + bool useInitialFlow_; + + private: + std::vector prevPyr_; + std::vector nextPyr_; + }; + + PyrLKOpticalFlowBase::PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow) : + winSize_(winSize), maxLevel_(maxLevel), iters_(iters), useInitialFlow_(useInitialFlow) + { + } + + void calcPatchSize(Size winSize, dim3& block, dim3& patch) { if (winSize.width > 32 && winSize.width > 2 * winSize.height) { @@ -95,156 +112,239 @@ namespace block.z = patch.z = 1; } -} -void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err) -{ - if (prevPts.empty()) + void PyrLKOpticalFlowBase::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err, Stream& stream) { - nextPts.release(); - status.release(); - if (err) err->release(); - return; - } + if (prevPts.empty()) + { + nextPts.release(); + status.release(); + if (err) err->release(); + return; + } - dim3 block, patch; - calcPatchSize(winSize, block, patch); + dim3 block, patch; + calcPatchSize(winSize_, block, patch); - CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4); - CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); - CV_Assert(maxLevel >= 0); - CV_Assert(winSize.width > 2 && winSize.height > 2); - CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6); - CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2); + CV_Assert( prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4 ); + CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() ); + CV_Assert( maxLevel_ >= 0 ); + CV_Assert( winSize_.width > 2 && winSize_.height > 2 ); + CV_Assert( patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6 ); + CV_Assert( prevPts.rows == 1 && prevPts.type() == CV_32FC2 ); - if (useInitialFlow) - CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2); - else - ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); + if (useInitialFlow_) + CV_Assert( nextPts.size() == prevPts.size() && nextPts.type() == prevPts.type() ); + else + ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts); - GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); - GpuMat temp2 = nextPts.reshape(1); - cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2); + GpuMat temp1 = (useInitialFlow_ ? nextPts : prevPts).reshape(1); + GpuMat temp2 = nextPts.reshape(1); + cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel_) / 2.0), temp2, 1, -1, stream); - ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); - status.setTo(Scalar::all(1)); + ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); + status.setTo(Scalar::all(1), stream); - if (err) - ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); + if (err) + ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err); - // build the image pyramids. + // build the image pyramids. - prevPyr_.resize(maxLevel + 1); - nextPyr_.resize(maxLevel + 1); + BufferPool pool(stream); - int cn = prevImg.channels(); + prevPyr_.resize(maxLevel_ + 1); + nextPyr_.resize(maxLevel_ + 1); - if (cn == 1 || cn == 4) - { - prevImg.convertTo(prevPyr_[0], CV_32F); - nextImg.convertTo(nextPyr_[0], CV_32F); - } - else - { - cuda::cvtColor(prevImg, buf_, COLOR_BGR2BGRA); - buf_.convertTo(prevPyr_[0], CV_32F); + int cn = prevImg.channels(); + + if (cn == 1 || cn == 4) + { + prevImg.convertTo(prevPyr_[0], CV_32F, stream); + nextImg.convertTo(nextPyr_[0], CV_32F, stream); + } + else + { + GpuMat buf = pool.getBuffer(prevImg.size(), CV_MAKE_TYPE(prevImg.depth(), 4)); - cuda::cvtColor(nextImg, buf_, COLOR_BGR2BGRA); - buf_.convertTo(nextPyr_[0], CV_32F); + cuda::cvtColor(prevImg, buf, COLOR_BGR2BGRA, 0, stream); + buf.convertTo(prevPyr_[0], CV_32F, stream); + + cuda::cvtColor(nextImg, buf, COLOR_BGR2BGRA, 0, stream); + buf.convertTo(nextPyr_[0], CV_32F, stream); + } + + for (int level = 1; level <= maxLevel_; ++level) + { + cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream); + cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream); + } + + pyrlk::loadConstants(make_int2(winSize_.width, winSize_.height), iters_, StreamAccessor::getStream(stream)); + + for (int level = maxLevel_; level >= 0; level--) + { + if (cn == 1) + { + pyrlk::sparse1(prevPyr_[level], nextPyr_[level], + prevPts.ptr(), nextPts.ptr(), + status.ptr(), + level == 0 && err ? err->ptr() : 0, prevPts.cols, + level, block, patch, + StreamAccessor::getStream(stream)); + } + else + { + pyrlk::sparse4(prevPyr_[level], nextPyr_[level], + prevPts.ptr(), nextPts.ptr(), + status.ptr(), + level == 0 && err ? err->ptr() : 0, prevPts.cols, + level, block, patch, + StreamAccessor::getStream(stream)); + } + } } - for (int level = 1; level <= maxLevel; ++level) + void PyrLKOpticalFlowBase::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream) { - cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]); - cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]); - } + CV_Assert( prevImg.type() == CV_8UC1 ); + CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() ); + CV_Assert( maxLevel_ >= 0 ); + CV_Assert( winSize_.width > 2 && winSize_.height > 2 ); - pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters); + // build the image pyramids. - for (int level = maxLevel; level >= 0; level--) - { - if (cn == 1) + prevPyr_.resize(maxLevel_ + 1); + nextPyr_.resize(maxLevel_ + 1); + + prevPyr_[0] = prevImg; + nextImg.convertTo(nextPyr_[0], CV_32F, stream); + + for (int level = 1; level <= maxLevel_; ++level) { - pyrlk::sparse1(prevPyr_[level], nextPyr_[level], - prevPts.ptr(), nextPts.ptr(), status.ptr(), level == 0 && err ? err->ptr() : 0, prevPts.cols, - level, block, patch); + cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream); + cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream); } - else + + BufferPool pool(stream); + + GpuMat uPyr[] = { + pool.getBuffer(prevImg.size(), CV_32FC1), + pool.getBuffer(prevImg.size(), CV_32FC1), + }; + GpuMat vPyr[] = { + pool.getBuffer(prevImg.size(), CV_32FC1), + pool.getBuffer(prevImg.size(), CV_32FC1), + }; + + uPyr[0].setTo(Scalar::all(0), stream); + vPyr[0].setTo(Scalar::all(0), stream); + uPyr[1].setTo(Scalar::all(0), stream); + vPyr[1].setTo(Scalar::all(0), stream); + + int2 winSize2i = make_int2(winSize_.width, winSize_.height); + pyrlk::loadConstants(winSize2i, iters_, StreamAccessor::getStream(stream)); + + int idx = 0; + + for (int level = maxLevel_; level >= 0; level--) { - pyrlk::sparse4(prevPyr_[level], nextPyr_[level], - prevPts.ptr(), nextPts.ptr(), status.ptr(), level == 0 && err ? err->ptr() : 0, prevPts.cols, - level, block, patch); + int idx2 = (idx + 1) & 1; + + pyrlk::dense(prevPyr_[level], nextPyr_[level], + uPyr[idx], vPyr[idx], uPyr[idx2], vPyr[idx2], + PtrStepSzf(), winSize2i, + StreamAccessor::getStream(stream)); + + if (level > 0) + idx = idx2; } + + uPyr[idx].copyTo(u, stream); + vPyr[idx].copyTo(v, stream); } -} -void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err) -{ - CV_Assert(prevImg.type() == CV_8UC1); - CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); - CV_Assert(maxLevel >= 0); - CV_Assert(winSize.width > 2 && winSize.height > 2); + class SparsePyrLKOpticalFlowImpl : public SparsePyrLKOpticalFlow, private PyrLKOpticalFlowBase + { + public: + SparsePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) : + PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow) + { + } - if (err) - err->create(prevImg.size(), CV_32FC1); + virtual Size getWinSize() const { return winSize_; } + virtual void setWinSize(Size winSize) { winSize_ = winSize; } - // build the image pyramids. + virtual int getMaxLevel() const { return maxLevel_; } + virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; } - prevPyr_.resize(maxLevel + 1); - nextPyr_.resize(maxLevel + 1); + virtual int getNumIters() const { return iters_; } + virtual void setNumIters(int iters) { iters_ = iters; } - prevPyr_[0] = prevImg; - nextImg.convertTo(nextPyr_[0], CV_32F); + virtual bool getUseInitialFlow() const { return useInitialFlow_; } + virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; } - for (int level = 1; level <= maxLevel; ++level) + virtual void calc(InputArray _prevImg, InputArray _nextImg, + InputArray _prevPts, InputOutputArray _nextPts, + OutputArray _status, + OutputArray _err, + Stream& stream) + { + const GpuMat prevImg = _prevImg.getGpuMat(); + const GpuMat nextImg = _nextImg.getGpuMat(); + const GpuMat prevPts = _prevPts.getGpuMat(); + GpuMat& nextPts = _nextPts.getGpuMatRef(); + GpuMat& status = _status.getGpuMatRef(); + GpuMat* err = _err.needed() ? &(_err.getGpuMatRef()) : NULL; + + sparse(prevImg, nextImg, prevPts, nextPts, status, err, stream); + } + }; + + class DensePyrLKOpticalFlowImpl : public DensePyrLKOpticalFlow, private PyrLKOpticalFlowBase { - cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]); - cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]); - } + public: + DensePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) : + PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow) + { + } - ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]); - ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]); - ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]); - ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]); - uPyr_[0].setTo(Scalar::all(0)); - vPyr_[0].setTo(Scalar::all(0)); - uPyr_[1].setTo(Scalar::all(0)); - vPyr_[1].setTo(Scalar::all(0)); + virtual Size getWinSize() const { return winSize_; } + virtual void setWinSize(Size winSize) { winSize_ = winSize; } - int2 winSize2i = make_int2(winSize.width, winSize.height); - pyrlk::loadConstants(winSize2i, iters); + virtual int getMaxLevel() const { return maxLevel_; } + virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; } - PtrStepSzf derr = err ? *err : PtrStepSzf(); + virtual int getNumIters() const { return iters_; } + virtual void setNumIters(int iters) { iters_ = iters; } - int idx = 0; + virtual bool getUseInitialFlow() const { return useInitialFlow_; } + virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; } - for (int level = maxLevel; level >= 0; level--) - { - int idx2 = (idx + 1) & 1; + virtual void calc(InputArray _prevImg, InputArray _nextImg, InputOutputArray _flow, Stream& stream) + { + const GpuMat prevImg = _prevImg.getGpuMat(); + const GpuMat nextImg = _nextImg.getGpuMat(); - pyrlk::dense(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2], - level == 0 ? derr : PtrStepSzf(), winSize2i); + BufferPool pool(stream); + GpuMat u = pool.getBuffer(prevImg.size(), CV_32FC1); + GpuMat v = pool.getBuffer(prevImg.size(), CV_32FC1); - if (level > 0) - idx = idx2; - } + dense(prevImg, nextImg, u, v, stream); - uPyr_[idx].copyTo(u); - vPyr_[idx].copyTo(v); + GpuMat flows[] = {u, v}; + cuda::merge(flows, 2, _flow, stream); + } + }; } -void cv::cuda::PyrLKOpticalFlow::releaseMemory() +Ptr cv::cuda::SparsePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow) { - prevPyr_.clear(); - nextPyr_.clear(); - - buf_.release(); - - uPyr_[0].release(); - vPyr_[0].release(); + return makePtr(winSize, maxLevel, iters, useInitialFlow); +} - uPyr_[1].release(); - vPyr_[1].release(); +Ptr cv::cuda::DensePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow) +{ + return makePtr(winSize, maxLevel, iters, useInitialFlow); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaoptflow/src/tvl1flow.cpp b/modules/cudaoptflow/src/tvl1flow.cpp index b8dfea56f1..e2ef07b0d1 100644 --- a/modules/cudaoptflow/src/tvl1flow.cpp +++ b/modules/cudaoptflow/src/tvl1flow.cpp @@ -44,256 +44,338 @@ #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA() { throw_no_cuda(); } -void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage() {} -void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } +Ptr cv::cuda::OpticalFlowDual_TVL1::create(double, double, double, int, int, double, int, double, double, bool) { throw_no_cuda(); return Ptr(); } #else using namespace cv; using namespace cv::cuda; -cv::cuda::OpticalFlowDual_TVL1_CUDA::OpticalFlowDual_TVL1_CUDA() +namespace tvl1flow { - tau = 0.25; - lambda = 0.15; - theta = 0.3; - nscales = 5; - warps = 5; - epsilon = 0.01; - iterations = 300; - scaleStep = 0.8; - gamma = 0.0; - useInitialFlow = false; + void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream); + void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, + PtrStepSzf u1, PtrStepSzf u2, + PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, + PtrStepSzf grad, PtrStepSzf rho, + cudaStream_t stream); + void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, + PtrStepSzf grad, PtrStepSzf rho_c, + PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, + PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error, + float l_t, float theta, float gamma, bool calcError, + cudaStream_t stream); + void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, + PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, + float taut, float gamma, + cudaStream_t stream); } -void cv::cuda::OpticalFlowDual_TVL1_CUDA::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy) +namespace { - CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); - CV_Assert( I0.size() == I1.size() ); - CV_Assert( I0.type() == I1.type() ); - CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); - CV_Assert( nscales > 0 ); - - // allocate memory for the pyramid structure - I0s.resize(nscales); - I1s.resize(nscales); - u1s.resize(nscales); - u2s.resize(nscales); - u3s.resize(nscales); - - I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); - I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); - - if (!useInitialFlow) + class OpticalFlowDual_TVL1_Impl : public OpticalFlowDual_TVL1 { - flowx.create(I0.size(), CV_32FC1); - flowy.create(I0.size(), CV_32FC1); - } + public: + OpticalFlowDual_TVL1_Impl(double tau, double lambda, double theta, int nscales, int warps, double epsilon, + int iterations, double scaleStep, double gamma, bool useInitialFlow) : + tau_(tau), lambda_(lambda), gamma_(gamma), theta_(theta), nscales_(nscales), warps_(warps), + epsilon_(epsilon), iterations_(iterations), scaleStep_(scaleStep), useInitialFlow_(useInitialFlow) + { + } + + virtual double getTau() const { return tau_; } + virtual void setTau(double tau) { tau_ = tau; } + + virtual double getLambda() const { return lambda_; } + virtual void setLambda(double lambda) { lambda_ = lambda; } + + virtual double getGamma() const { return gamma_; } + virtual void setGamma(double gamma) { gamma_ = gamma; } + + virtual double getTheta() const { return theta_; } + virtual void setTheta(double theta) { theta_ = theta; } + + virtual int getNumScales() const { return nscales_; } + virtual void setNumScales(int nscales) { nscales_ = nscales; } + + virtual int getNumWarps() const { return warps_; } + virtual void setNumWarps(int warps) { warps_ = warps; } + + virtual double getEpsilon() const { return epsilon_; } + virtual void setEpsilon(double epsilon) { epsilon_ = epsilon; } + + virtual int getNumIterations() const { return iterations_; } + virtual void setNumIterations(int iterations) { iterations_ = iterations; } + + virtual double getScaleStep() const { return scaleStep_; } + virtual void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; } + + virtual bool getUseInitialFlow() const { return useInitialFlow_; } + virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; } + + virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream); + + private: + double tau_; + double lambda_; + double gamma_; + double theta_; + int nscales_; + int warps_; + double epsilon_; + int iterations_; + double scaleStep_; + bool useInitialFlow_; + + private: + void calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream); + void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& stream); - u1s[0] = flowx; - u2s[0] = flowy; - if (gamma) - u3s[0].create(I0.size(), CV_32FC1); + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + std::vector u3s; - I1x_buf.create(I0.size(), CV_32FC1); - I1y_buf.create(I0.size(), CV_32FC1); + GpuMat I1x_buf; + GpuMat I1y_buf; - I1w_buf.create(I0.size(), CV_32FC1); - I1wx_buf.create(I0.size(), CV_32FC1); - I1wy_buf.create(I0.size(), CV_32FC1); + GpuMat I1w_buf; + GpuMat I1wx_buf; + GpuMat I1wy_buf; - grad_buf.create(I0.size(), CV_32FC1); - rho_c_buf.create(I0.size(), CV_32FC1); + GpuMat grad_buf; + GpuMat rho_c_buf; - p11_buf.create(I0.size(), CV_32FC1); - p12_buf.create(I0.size(), CV_32FC1); - p21_buf.create(I0.size(), CV_32FC1); - p22_buf.create(I0.size(), CV_32FC1); - if (gamma) + GpuMat p11_buf; + GpuMat p12_buf; + GpuMat p21_buf; + GpuMat p22_buf; + GpuMat p31_buf; + GpuMat p32_buf; + + GpuMat diff_buf; + GpuMat norm_buf; + }; + + void OpticalFlowDual_TVL1_Impl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream) { - p31_buf.create(I0.size(), CV_32FC1); - p32_buf.create(I0.size(), CV_32FC1); + const GpuMat frame0 = _frame0.getGpuMat(); + const GpuMat frame1 = _frame1.getGpuMat(); + + BufferPool pool(stream); + GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1); + GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1); + + calcImpl(frame0, frame1, flowx, flowy, stream); + + GpuMat flows[] = {flowx, flowy}; + cuda::merge(flows, 2, _flow, stream); } - diff_buf.create(I0.size(), CV_32FC1); - // create the scales - for (int s = 1; s < nscales; ++s) + void OpticalFlowDual_TVL1_Impl::calcImpl(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, Stream& stream) { - cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep); - cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep); - - if (I0s[s].cols < 16 || I0s[s].rows < 16) + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow_ || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); + CV_Assert( nscales_ > 0 ); + + // allocate memory for the pyramid structure + I0s.resize(nscales_); + I1s.resize(nscales_); + u1s.resize(nscales_); + u2s.resize(nscales_); + u3s.resize(nscales_); + + I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0, stream); + I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0, stream); + + if (!useInitialFlow_) { - nscales = s; - break; + flowx.create(I0.size(), CV_32FC1); + flowy.create(I0.size(), CV_32FC1); } - if (useInitialFlow) + u1s[0] = flowx; + u2s[0] = flowy; + if (gamma_) { - cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep); - cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep); + u3s[0].create(I0.size(), CV_32FC1); + } + + I1x_buf.create(I0.size(), CV_32FC1); + I1y_buf.create(I0.size(), CV_32FC1); + + I1w_buf.create(I0.size(), CV_32FC1); + I1wx_buf.create(I0.size(), CV_32FC1); + I1wy_buf.create(I0.size(), CV_32FC1); - cuda::multiply(u1s[s], Scalar::all(scaleStep), u1s[s]); - cuda::multiply(u2s[s], Scalar::all(scaleStep), u2s[s]); + grad_buf.create(I0.size(), CV_32FC1); + rho_c_buf.create(I0.size(), CV_32FC1); + + p11_buf.create(I0.size(), CV_32FC1); + p12_buf.create(I0.size(), CV_32FC1); + p21_buf.create(I0.size(), CV_32FC1); + p22_buf.create(I0.size(), CV_32FC1); + if (gamma_) + { + p31_buf.create(I0.size(), CV_32FC1); + p32_buf.create(I0.size(), CV_32FC1); } - else + diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales_; ++s) { - u1s[s].create(I0s[s].size(), CV_32FC1); - u2s[s].create(I0s[s].size(), CV_32FC1); + cuda::resize(I0s[s-1], I0s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); + cuda::resize(I1s[s-1], I1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); + + if (I0s[s].cols < 16 || I0s[s].rows < 16) + { + nscales_ = s; + break; + } + + if (useInitialFlow_) + { + cuda::resize(u1s[s-1], u1s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); + cuda::resize(u2s[s-1], u2s[s], Size(), scaleStep_, scaleStep_, INTER_LINEAR, stream); + + cuda::multiply(u1s[s], Scalar::all(scaleStep_), u1s[s], 1, -1, stream); + cuda::multiply(u2s[s], Scalar::all(scaleStep_), u2s[s], 1, -1, stream); + } + else + { + u1s[s].create(I0s[s].size(), CV_32FC1); + u2s[s].create(I0s[s].size(), CV_32FC1); + } + if (gamma_) + { + u3s[s].create(I0s[s].size(), CV_32FC1); + } } - if (gamma) - u3s[s].create(I0s[s].size(), CV_32FC1); - } - if (!useInitialFlow) - { - u1s[nscales-1].setTo(Scalar::all(0)); - u2s[nscales-1].setTo(Scalar::all(0)); - } - if (gamma) - u3s[nscales - 1].setTo(Scalar::all(0)); + if (!useInitialFlow_) + { + u1s[nscales_-1].setTo(Scalar::all(0), stream); + u2s[nscales_-1].setTo(Scalar::all(0), stream); + } + if (gamma_) + { + u3s[nscales_ - 1].setTo(Scalar::all(0), stream); + } - // pyramidal structure for computing the optical flow - for (int s = nscales - 1; s >= 0; --s) - { - // compute the optical flow at the current scale - procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s]); + // pyramidal structure for computing the optical flow + for (int s = nscales_ - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(I0s[s], I1s[s], u1s[s], u2s[s], u3s[s], stream); - // if this was the last scale, finish now - if (s == 0) - break; + // if this was the last scale, finish now + if (s == 0) + break; - // otherwise, upsample the optical flow + // otherwise, upsample the optical flow - // zoom the optical flow for the next finer scale - cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); - cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); - if (gamma) - cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size()); + // zoom the optical flow for the next finer scale + cuda::resize(u1s[s], u1s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream); + cuda::resize(u2s[s], u2s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream); + if (gamma_) + { + cuda::resize(u3s[s], u3s[s - 1], I0s[s - 1].size(), 0, 0, INTER_LINEAR, stream); + } - // scale the optical flow with the appropriate zoom factor - cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]); - cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]); + // scale the optical flow with the appropriate zoom factor + cuda::multiply(u1s[s - 1], Scalar::all(1/scaleStep_), u1s[s - 1], 1, -1, stream); + cuda::multiply(u2s[s - 1], Scalar::all(1/scaleStep_), u2s[s - 1], 1, -1, stream); + } } -} - -namespace tvl1flow -{ - void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy); - void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho); - void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, - PtrStepSzf grad, PtrStepSzf rho_c, - PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, - PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error, - float l_t, float theta, float gamma, bool calcError); - void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, const float gamma); -} -void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3) -{ - using namespace tvl1flow; + void OpticalFlowDual_TVL1_Impl::procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3, Stream& _stream) + { + using namespace tvl1flow; - const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + cudaStream_t stream = StreamAccessor::getStream(_stream); - CV_DbgAssert( I1.size() == I0.size() ); - CV_DbgAssert( I1.type() == I0.type() ); - CV_DbgAssert( u1.size() == I0.size() ); - CV_DbgAssert( u2.size() == u1.size() ); + const double scaledEpsilon = epsilon_ * epsilon_ * I0.size().area(); - GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); - centeredGradient(I1, I1x, I1y); + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); - GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + centeredGradient(I1, I1x, I1y, stream); - GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat p31, p32; - if (gamma) - { - p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows)); - p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows)); - } - p11.setTo(Scalar::all(0)); - p12.setTo(Scalar::all(0)); - p21.setTo(Scalar::all(0)); - p22.setTo(Scalar::all(0)); - if (gamma) - { - p31.setTo(Scalar::all(0)); - p32.setTo(Scalar::all(0)); - } + GpuMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); - GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + GpuMat p31, p32; + if (gamma_) + { + p31 = p31_buf(Rect(0, 0, I0.cols, I0.rows)); + p32 = p32_buf(Rect(0, 0, I0.cols, I0.rows)); + } + p11.setTo(Scalar::all(0), _stream); + p12.setTo(Scalar::all(0), _stream); + p21.setTo(Scalar::all(0), _stream); + p22.setTo(Scalar::all(0), _stream); + if (gamma_) + { + p31.setTo(Scalar::all(0), _stream); + p32.setTo(Scalar::all(0), _stream); + } - const float l_t = static_cast(lambda * theta); - const float taut = static_cast(tau / theta); + GpuMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); - for (int warpings = 0; warpings < warps; ++warpings) - { - warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); + const float l_t = static_cast(lambda_ * theta_); + const float taut = static_cast(tau_ / theta_); - double error = std::numeric_limits::max(); - double prevError = 0.0; - for (int n = 0; error > scaledEpsilon && n < iterations; ++n) + for (int warpings = 0; warpings < warps_; ++warpings) { - // some tweaks to make sum operation less frequently - bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon); - estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast(theta), gamma, calcError); - if (calcError) - { - error = cuda::sum(diff, norm_buf)[0]; - prevError = error; - } - else + warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c, stream); + + double error = std::numeric_limits::max(); + double prevError = 0.0; + for (int n = 0; error > scaledEpsilon && n < iterations_; ++n) { - error = std::numeric_limits::max(); - prevError -= scaledEpsilon; + // some tweaks to make sum operation less frequently + bool calcError = (epsilon_ > 0) && (n & 0x1) && (prevError < scaledEpsilon); + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast(theta_), gamma_, calcError, stream); + if (calcError) + { + _stream.waitForCompletion(); + error = cuda::sum(diff, norm_buf)[0]; + prevError = error; + } + else + { + error = std::numeric_limits::max(); + prevError -= scaledEpsilon; + } + + estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma_, stream); } - - estimateDualVariables(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma); } } } -void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage() +Ptr cv::cuda::OpticalFlowDual_TVL1::create( + double tau, double lambda, double theta, int nscales, int warps, + double epsilon, int iterations, double scaleStep, double gamma, bool useInitialFlow) { - I0s.clear(); - I1s.clear(); - u1s.clear(); - u2s.clear(); - u3s.clear(); - - I1x_buf.release(); - I1y_buf.release(); - - I1w_buf.release(); - I1wx_buf.release(); - I1wy_buf.release(); - - grad_buf.release(); - rho_c_buf.release(); - - p11_buf.release(); - p12_buf.release(); - p21_buf.release(); - p22_buf.release(); - if (gamma) - { - p31_buf.release(); - p32_buf.release(); - } - diff_buf.release(); - norm_buf.release(); + return makePtr(tau, lambda, theta, nscales, warps, + epsilon, iterations, scaleStep, gamma, useInitialFlow); } #endif // !defined HAVE_CUDA || defined(CUDA_DISABLER) From 2dc3b0f7f95135648d6a05f61e7e47787b3d63f2 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:36:31 +0300 Subject: [PATCH 05/10] fix cudaoptflow tests build --- modules/cudaoptflow/perf/perf_optflow.cpp | 81 ++++++++++++--------- modules/cudaoptflow/perf/perf_precomp.hpp | 1 + modules/cudaoptflow/test/test_optflow.cpp | 87 ++++++++++++----------- modules/cudaoptflow/test/test_precomp.hpp | 1 + 4 files changed, 98 insertions(+), 72 deletions(-) diff --git a/modules/cudaoptflow/perf/perf_optflow.cpp b/modules/cudaoptflow/perf/perf_optflow.cpp index 12612b0622..32040f282c 100644 --- a/modules/cudaoptflow/perf/perf_optflow.cpp +++ b/modules/cudaoptflow/perf/perf_optflow.cpp @@ -71,13 +71,19 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow, { const cv::cuda::GpuMat d_frame0(frame0); const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; + cv::cuda::GpuMat flow; - cv::cuda::BroxOpticalFlow d_flow(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, - 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); + cv::Ptr d_alg = + cv::cuda::BroxOpticalFlow::create(0.197 /*alpha*/, 50.0 /*gamma*/, 0.8 /*scale_factor*/, + 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); - TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v); + TEST_CYCLE() d_alg->calc(d_frame0, d_frame1, flow); + + cv::cuda::GpuMat flows[2]; + cv::cuda::split(flow, flows); + + cv::cuda::GpuMat u = flows[0]; + cv::cuda::GpuMat v = flows[1]; CUDA_SANITY_CHECK(u, 1e-1); CUDA_SANITY_CHECK(v, 1e-1); @@ -129,17 +135,17 @@ PERF_TEST_P(ImagePair_Gray_NPts_WinSz_Levels_Iters, PyrLKOpticalFlowSparse, { const cv::cuda::GpuMat d_pts(pts.reshape(2, 1)); - cv::cuda::PyrLKOpticalFlow d_pyrLK; - d_pyrLK.winSize = cv::Size(winSize, winSize); - d_pyrLK.maxLevel = levels - 1; - d_pyrLK.iters = iters; + cv::Ptr d_pyrLK = + cv::cuda::SparsePyrLKOpticalFlow::create(cv::Size(winSize, winSize), + levels - 1, + iters); const cv::cuda::GpuMat d_frame0(frame0); const cv::cuda::GpuMat d_frame1(frame1); cv::cuda::GpuMat nextPts; cv::cuda::GpuMat status; - TEST_CYCLE() d_pyrLK.sparse(d_frame0, d_frame1, d_pts, nextPts, status); + TEST_CYCLE() d_pyrLK->calc(d_frame0, d_frame1, d_pts, nextPts, status); CUDA_SANITY_CHECK(nextPts); CUDA_SANITY_CHECK(status); @@ -189,15 +195,20 @@ PERF_TEST_P(ImagePair_WinSz_Levels_Iters, PyrLKOpticalFlowDense, { const cv::cuda::GpuMat d_frame0(frame0); const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; + cv::cuda::GpuMat flow; + + cv::Ptr d_pyrLK = + cv::cuda::DensePyrLKOpticalFlow::create(cv::Size(winSize, winSize), + levels - 1, + iters); + + TEST_CYCLE() d_pyrLK->calc(d_frame0, d_frame1, flow); - cv::cuda::PyrLKOpticalFlow d_pyrLK; - d_pyrLK.winSize = cv::Size(winSize, winSize); - d_pyrLK.maxLevel = levels - 1; - d_pyrLK.iters = iters; + cv::cuda::GpuMat flows[2]; + cv::cuda::split(flow, flows); - TEST_CYCLE() d_pyrLK.dense(d_frame0, d_frame1, u, v); + cv::cuda::GpuMat u = flows[0]; + cv::cuda::GpuMat v = flows[1]; CUDA_SANITY_CHECK(u); CUDA_SANITY_CHECK(v); @@ -234,19 +245,19 @@ PERF_TEST_P(ImagePair, FarnebackOpticalFlow, { const cv::cuda::GpuMat d_frame0(frame0); const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; + cv::cuda::GpuMat flow; - cv::cuda::FarnebackOpticalFlow d_farneback; - d_farneback.numLevels = numLevels; - d_farneback.pyrScale = pyrScale; - d_farneback.winSize = winSize; - d_farneback.numIters = numIters; - d_farneback.polyN = polyN; - d_farneback.polySigma = polySigma; - d_farneback.flags = flags; + cv::Ptr d_farneback = + cv::cuda::FarnebackOpticalFlow::create(numLevels, pyrScale, false, winSize, + numIters, polyN, polySigma, flags); - TEST_CYCLE() d_farneback(d_frame0, d_frame1, u, v); + TEST_CYCLE() d_farneback->calc(d_frame0, d_frame1, flow); + + cv::cuda::GpuMat flows[2]; + cv::cuda::split(flow, flows); + + cv::cuda::GpuMat u = flows[0]; + cv::cuda::GpuMat v = flows[1]; CUDA_SANITY_CHECK(u, 1e-4); CUDA_SANITY_CHECK(v, 1e-4); @@ -279,12 +290,18 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, { const cv::cuda::GpuMat d_frame0(frame0); const cv::cuda::GpuMat d_frame1(frame1); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; + cv::cuda::GpuMat flow; + + cv::Ptr d_alg = + cv::cuda::OpticalFlowDual_TVL1::create(); + + TEST_CYCLE() d_alg->calc(d_frame0, d_frame1, flow); - cv::cuda::OpticalFlowDual_TVL1_CUDA d_alg; + cv::cuda::GpuMat flows[2]; + cv::cuda::split(flow, flows); - TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v); + cv::cuda::GpuMat u = flows[0]; + cv::cuda::GpuMat v = flows[1]; CUDA_SANITY_CHECK(u, 1e-1); CUDA_SANITY_CHECK(v, 1e-1); diff --git a/modules/cudaoptflow/perf/perf_precomp.hpp b/modules/cudaoptflow/perf/perf_precomp.hpp index 1dc00ae4b7..d7761a587a 100644 --- a/modules/cudaoptflow/perf/perf_precomp.hpp +++ b/modules/cudaoptflow/perf/perf_precomp.hpp @@ -55,6 +55,7 @@ #include "opencv2/ts/cuda_perf.hpp" #include "opencv2/cudaoptflow.hpp" +#include "opencv2/cudaarithm.hpp" #include "opencv2/video.hpp" #ifdef GTEST_CREATE_SHARED_LIBRARY diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index 7a6e68310f..c5b2ad8478 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -71,12 +71,18 @@ CUDA_TEST_P(BroxOpticalFlow, Regression) cv::Mat frame1 = readImageType("opticalflow/frame1.png", CV_32FC1); ASSERT_FALSE(frame1.empty()); - cv::cuda::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, - 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); + cv::Ptr brox = + cv::cuda::BroxOpticalFlow::create(0.197 /*alpha*/, 50.0 /*gamma*/, 0.8 /*scale_factor*/, + 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; - brox(loadMat(frame0), loadMat(frame1), u, v); + cv::cuda::GpuMat flow; + brox->calc(loadMat(frame0), loadMat(frame1), flow); + + cv::cuda::GpuMat flows[2]; + cv::cuda::split(flow, flows); + + cv::cuda::GpuMat u = flows[0]; + cv::cuda::GpuMat v = flows[1]; std::string fname(cvtest::TS::ptr()->get_data_path()); if (devInfo.majorVersion() >= 2) @@ -133,12 +139,18 @@ CUDA_TEST_P(BroxOpticalFlow, OpticalFlowNan) cv::resize(frame0, r_frame0, cv::Size(1380,1000)); cv::resize(frame1, r_frame1, cv::Size(1380,1000)); - cv::cuda::BroxOpticalFlow brox(0.197f /*alpha*/, 50.0f /*gamma*/, 0.8f /*scale_factor*/, - 5 /*inner_iterations*/, 150 /*outer_iterations*/, 10 /*solver_iterations*/); + cv::Ptr brox = + cv::cuda::BroxOpticalFlow::create(0.197 /*alpha*/, 50.0 /*gamma*/, 0.8 /*scale_factor*/, + 10 /*inner_iterations*/, 77 /*outer_iterations*/, 10 /*solver_iterations*/); + + cv::cuda::GpuMat flow; + brox->calc(loadMat(frame0), loadMat(frame1), flow); - cv::cuda::GpuMat u; - cv::cuda::GpuMat v; - brox(loadMat(r_frame0), loadMat(r_frame1), u, v); + cv::cuda::GpuMat flows[2]; + cv::cuda::split(flow, flows); + + cv::cuda::GpuMat u = flows[0]; + cv::cuda::GpuMat v = flows[1]; cv::Mat h_u, h_v; u.download(h_u); @@ -193,11 +205,12 @@ CUDA_TEST_P(PyrLKOpticalFlow, Sparse) cv::Mat pts_mat(1, (int) pts.size(), CV_32FC2, (void*) &pts[0]); d_pts.upload(pts_mat); - cv::cuda::PyrLKOpticalFlow pyrLK; + cv::Ptr pyrLK = + cv::cuda::SparsePyrLKOpticalFlow::create(); cv::cuda::GpuMat d_nextPts; cv::cuda::GpuMat d_status; - pyrLK.sparse(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status); + pyrLK->calc(loadMat(frame0), loadMat(frame1), d_pts, d_nextPts, d_status); std::vector nextPts(d_nextPts.cols); cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*) &nextPts[0]); @@ -285,34 +298,30 @@ CUDA_TEST_P(FarnebackOpticalFlow, Accuracy) double polySigma = polyN <= 5 ? 1.1 : 1.5; - cv::cuda::FarnebackOpticalFlow farn; - farn.pyrScale = pyrScale; - farn.polyN = polyN; - farn.polySigma = polySigma; - farn.flags = flags; + cv::Ptr farn = + cv::cuda::FarnebackOpticalFlow::create(); + farn->setPyrScale(pyrScale); + farn->setPolyN(polyN); + farn->setPolySigma(polySigma); + farn->setFlags(flags); - cv::cuda::GpuMat d_flowx, d_flowy; - farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy); + cv::cuda::GpuMat d_flow; + farn->calc(loadMat(frame0), loadMat(frame1), d_flow); cv::Mat flow; if (useInitFlow) { - cv::Mat flowxy[] = {cv::Mat(d_flowx), cv::Mat(d_flowy)}; - cv::merge(flowxy, 2, flow); + d_flow.download(flow); - farn.flags |= cv::OPTFLOW_USE_INITIAL_FLOW; - farn(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy); + farn->setFlags(farn->getFlags() | cv::OPTFLOW_USE_INITIAL_FLOW); + farn->calc(loadMat(frame0), loadMat(frame1), d_flow); } cv::calcOpticalFlowFarneback( - frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize, - farn.numIters, farn.polyN, farn.polySigma, farn.flags); - - std::vector flowxy; - cv::split(flow, flowxy); + frame0, frame1, flow, farn->getPyrScale(), farn->getNumLevels(), farn->getWinSize(), + farn->getNumIters(), farn->getPolyN(), farn->getPolySigma(), farn->getFlags()); - EXPECT_MAT_SIMILAR(flowxy[0], d_flowx, 0.1); - EXPECT_MAT_SIMILAR(flowxy[1], d_flowy, 0.1); + EXPECT_MAT_SIMILAR(flow, d_flow, 0.1); } INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, FarnebackOpticalFlow, testing::Combine( @@ -352,26 +361,24 @@ CUDA_TEST_P(OpticalFlowDual_TVL1, Accuracy) cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()); - cv::cuda::OpticalFlowDual_TVL1_CUDA d_alg; - d_alg.iterations = 10; - d_alg.gamma = gamma; + cv::Ptr d_alg = + cv::cuda::OpticalFlowDual_TVL1::create(); + d_alg->setNumIterations(10); + d_alg->setGamma(gamma); - cv::cuda::GpuMat d_flowx, d_flowy; - d_alg(loadMat(frame0), loadMat(frame1), d_flowx, d_flowy); + cv::cuda::GpuMat d_flow; + d_alg->calc(loadMat(frame0), loadMat(frame1), d_flow); cv::Ptr alg = cv::createOptFlow_DualTVL1(); alg->set("medianFiltering", 1); alg->set("innerIterations", 1); - alg->set("outerIterations", d_alg.iterations); + alg->set("outerIterations", d_alg->getNumIterations()); alg->set("gamma", gamma); cv::Mat flow; alg->calc(frame0, frame1, flow); - cv::Mat gold[2]; - cv::split(flow, gold); - EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3); - EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3); + EXPECT_MAT_SIMILAR(flow, d_flow, 4e-3); } INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine( diff --git a/modules/cudaoptflow/test/test_precomp.hpp b/modules/cudaoptflow/test/test_precomp.hpp index 54812022a6..2dc36abf5c 100644 --- a/modules/cudaoptflow/test/test_precomp.hpp +++ b/modules/cudaoptflow/test/test_precomp.hpp @@ -57,6 +57,7 @@ #include "opencv2/ts/cuda_test.hpp" #include "opencv2/cudaoptflow.hpp" +#include "opencv2/cudaarithm.hpp" #include "opencv2/video.hpp" #include "cvconfig.h" From 03ae1e5aae21bc12bfbefaf78b353a3b83c96ff4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:36:44 +0300 Subject: [PATCH 06/10] fix superres module compilation --- modules/superres/src/optical_flow.cpp | 161 ++++++++++++++++---------- 1 file changed, 98 insertions(+), 63 deletions(-) diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index fcc9bef347..52fc2648e2 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -341,7 +341,7 @@ namespace int iterations_; bool useInitialFlow_; - Ptr alg_; + Ptr alg_; }; CV_INIT_ALGORITHM(DualTVL1, "DenseOpticalFlowExt.DualTVL1", @@ -514,7 +514,7 @@ namespace int outerIterations_; int solverIterations_; - BroxOpticalFlow alg_; + Ptr alg_; }; CV_INIT_ALGORITHM(Brox_CUDA, "DenseOpticalFlowExt.Brox_CUDA", @@ -525,31 +525,40 @@ namespace obj.info()->addParam(obj, "outerIterations", obj.outerIterations_, false, 0, 0, "Number of warping iterations (number of pyramid levels)"); obj.info()->addParam(obj, "solverIterations", obj.solverIterations_, false, 0, 0, "Number of linear system solver iterations")) - Brox_CUDA::Brox_CUDA() : GpuOpticalFlow(CV_32FC1), alg_(0.197f, 50.0f, 0.8f, 10, 77, 10) + Brox_CUDA::Brox_CUDA() : GpuOpticalFlow(CV_32FC1) { - alpha_ = alg_.alpha; - gamma_ = alg_.gamma; - scaleFactor_ = alg_.scale_factor; - innerIterations_ = alg_.inner_iterations; - outerIterations_ = alg_.outer_iterations; - solverIterations_ = alg_.solver_iterations; + alg_ = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10); + + alpha_ = alg_->getFlowSmoothness(); + gamma_ = alg_->getGradientConstancyImportance(); + scaleFactor_ = alg_->getPyramidScaleFactor(); + innerIterations_ = alg_->getInnerIterations(); + outerIterations_ = alg_->getOuterIterations(); + solverIterations_ = alg_->getSolverIterations(); } void Brox_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2) { - alg_.alpha = static_cast(alpha_); - alg_.gamma = static_cast(gamma_); - alg_.scale_factor = static_cast(scaleFactor_); - alg_.inner_iterations = innerIterations_; - alg_.outer_iterations = outerIterations_; - alg_.solver_iterations = solverIterations_; + alg_->setFlowSmoothness(alpha_); + alg_->setGradientConstancyImportance(gamma_); + alg_->setPyramidScaleFactor(scaleFactor_); + alg_->setInnerIterations(innerIterations_); + alg_->setOuterIterations(outerIterations_); + alg_->setSolverIterations(solverIterations_); + + GpuMat flow; + alg_->calc(input0, input1, flow); + + GpuMat flows[2]; + cuda::split(flow, flows); - alg_(input0, input1, dst1, dst2); + dst1 = flows[0]; + dst2 = flows[1]; } void Brox_CUDA::collectGarbage() { - alg_.buf.release(); + alg_ = cuda::BroxOpticalFlow::create(alpha_, gamma_, scaleFactor_, innerIterations_, outerIterations_, solverIterations_); GpuOpticalFlow::collectGarbage(); } } @@ -581,7 +590,7 @@ namespace int maxLevel_; int iterations_; - PyrLKOpticalFlow alg_; + Ptr alg_; }; CV_INIT_ALGORITHM(PyrLK_CUDA, "DenseOpticalFlowExt.PyrLK_CUDA", @@ -591,24 +600,32 @@ namespace PyrLK_CUDA::PyrLK_CUDA() : GpuOpticalFlow(CV_8UC1) { - winSize_ = alg_.winSize.width; - maxLevel_ = alg_.maxLevel; - iterations_ = alg_.iters; + alg_ = cuda::DensePyrLKOpticalFlow::create(); + + winSize_ = alg_->getWinSize().width; + maxLevel_ = alg_->getMaxLevel(); + iterations_ = alg_->getNumIters(); } void PyrLK_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2) { - alg_.winSize.width = winSize_; - alg_.winSize.height = winSize_; - alg_.maxLevel = maxLevel_; - alg_.iters = iterations_; + alg_->setWinSize(Size(winSize_, winSize_)); + alg_->setMaxLevel(maxLevel_); + alg_->setNumIters(iterations_); + + GpuMat flow; + alg_->calc(input0, input1, flow); + + GpuMat flows[2]; + cuda::split(flow, flows); - alg_.dense(input0, input1, dst1, dst2); + dst1 = flows[0]; + dst2 = flows[1]; } void PyrLK_CUDA::collectGarbage() { - alg_.releaseMemory(); + alg_ = cuda::DensePyrLKOpticalFlow::create(); GpuOpticalFlow::collectGarbage(); } } @@ -644,7 +661,7 @@ namespace double polySigma_; int flags_; - FarnebackOpticalFlow alg_; + Ptr alg_; }; CV_INIT_ALGORITHM(Farneback_CUDA, "DenseOpticalFlowExt.Farneback_CUDA", @@ -658,31 +675,40 @@ namespace Farneback_CUDA::Farneback_CUDA() : GpuOpticalFlow(CV_8UC1) { - pyrScale_ = alg_.pyrScale; - numLevels_ = alg_.numLevels; - winSize_ = alg_.winSize; - numIters_ = alg_.numIters; - polyN_ = alg_.polyN; - polySigma_ = alg_.polySigma; - flags_ = alg_.flags; + alg_ = cuda::FarnebackOpticalFlow::create(); + + pyrScale_ = alg_->getPyrScale(); + numLevels_ = alg_->getNumLevels(); + winSize_ = alg_->getWinSize(); + numIters_ = alg_->getNumIters(); + polyN_ = alg_->getPolyN(); + polySigma_ = alg_->getPolySigma(); + flags_ = alg_->getFlags(); } void Farneback_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2) { - alg_.pyrScale = pyrScale_; - alg_.numLevels = numLevels_; - alg_.winSize = winSize_; - alg_.numIters = numIters_; - alg_.polyN = polyN_; - alg_.polySigma = polySigma_; - alg_.flags = flags_; + alg_->setPyrScale(pyrScale_); + alg_->setNumLevels(numLevels_); + alg_->setWinSize(winSize_); + alg_->setNumIters(numIters_); + alg_->setPolyN(polyN_); + alg_->setPolySigma(polySigma_); + alg_->setFlags(flags_); + + GpuMat flow; + alg_->calc(input0, input1, flow); + + GpuMat flows[2]; + cuda::split(flow, flows); - alg_(input0, input1, dst1, dst2); + dst1 = flows[0]; + dst2 = flows[1]; } void Farneback_CUDA::collectGarbage() { - alg_.releaseMemory(); + alg_ = cuda::FarnebackOpticalFlow::create(); GpuOpticalFlow::collectGarbage(); } } @@ -719,7 +745,7 @@ namespace int iterations_; bool useInitialFlow_; - OpticalFlowDual_TVL1_CUDA alg_; + Ptr alg_; }; CV_INIT_ALGORITHM(DualTVL1_CUDA, "DenseOpticalFlowExt.DualTVL1_CUDA", @@ -734,33 +760,42 @@ namespace DualTVL1_CUDA::DualTVL1_CUDA() : GpuOpticalFlow(CV_8UC1) { - tau_ = alg_.tau; - lambda_ = alg_.lambda; - theta_ = alg_.theta; - nscales_ = alg_.nscales; - warps_ = alg_.warps; - epsilon_ = alg_.epsilon; - iterations_ = alg_.iterations; - useInitialFlow_ = alg_.useInitialFlow; + alg_ = cuda::OpticalFlowDual_TVL1::create(); + + tau_ = alg_->getTau(); + lambda_ = alg_->getLambda(); + theta_ = alg_->getTheta(); + nscales_ = alg_->getNumScales(); + warps_ = alg_->getNumWarps(); + epsilon_ = alg_->getEpsilon(); + iterations_ = alg_->getNumIterations(); + useInitialFlow_ = alg_->getUseInitialFlow(); } void DualTVL1_CUDA::impl(const GpuMat& input0, const GpuMat& input1, GpuMat& dst1, GpuMat& dst2) { - alg_.tau = tau_; - alg_.lambda = lambda_; - alg_.theta = theta_; - alg_.nscales = nscales_; - alg_.warps = warps_; - alg_.epsilon = epsilon_; - alg_.iterations = iterations_; - alg_.useInitialFlow = useInitialFlow_; + alg_->setTau(tau_); + alg_->setLambda(lambda_); + alg_->setTheta(theta_); + alg_->setNumScales(nscales_); + alg_->setNumWarps(warps_); + alg_->setEpsilon(epsilon_); + alg_->setNumIterations(iterations_); + alg_->setUseInitialFlow(useInitialFlow_); + + GpuMat flow; + alg_->calc(input0, input1, flow); + + GpuMat flows[2]; + cuda::split(flow, flows); - alg_(input0, input1, dst1, dst2); + dst1 = flows[0]; + dst2 = flows[1]; } void DualTVL1_CUDA::collectGarbage() { - alg_.collectGarbage(); + alg_ = cuda::OpticalFlowDual_TVL1::create(); GpuOpticalFlow::collectGarbage(); } } From 62f8240b004fe2d1386be8003082b42673fa7c55 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:36:51 +0300 Subject: [PATCH 07/10] fix videostab module compilation --- .../opencv2/videostab/optical_flow.hpp | 4 +-- modules/videostab/src/optical_flow.cpp | 36 +++++++++++++------ 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/modules/videostab/include/opencv2/videostab/optical_flow.hpp b/modules/videostab/include/opencv2/videostab/optical_flow.hpp index a34a82e3f8..41d1953549 100644 --- a/modules/videostab/include/opencv2/videostab/optical_flow.hpp +++ b/modules/videostab/include/opencv2/videostab/optical_flow.hpp @@ -121,7 +121,7 @@ public: cuda::GpuMat &status); private: - cuda::PyrLKOpticalFlow optFlowEstimator_; + Ptr optFlowEstimator_; cuda::GpuMat frame0_, frame1_, points0_, points1_, status_, errors_; }; @@ -136,7 +136,7 @@ public: OutputArray errors); private: - cuda::PyrLKOpticalFlow optFlowEstimator_; + Ptr optFlowEstimator_; cuda::GpuMat frame0_, frame1_, flowX_, flowY_, errors_; }; diff --git a/modules/videostab/src/optical_flow.cpp b/modules/videostab/src/optical_flow.cpp index d8a059c1fd..32c8133a7d 100644 --- a/modules/videostab/src/optical_flow.cpp +++ b/modules/videostab/src/optical_flow.cpp @@ -45,6 +45,10 @@ #include "opencv2/videostab/optical_flow.hpp" #include "opencv2/videostab/ring_buffer.hpp" +#ifdef HAVE_OPENCV_CUDAARITHM + #include "opencv2/cudaarithm.hpp" +#endif + namespace cv { namespace videostab @@ -63,6 +67,7 @@ void SparsePyrLkOptFlowEstimator::run( SparsePyrLkOptFlowEstimatorGpu::SparsePyrLkOptFlowEstimatorGpu() { CV_Assert(cuda::getCudaEnabledDeviceCount() > 0); + optFlowEstimator_ = cuda::SparsePyrLKOpticalFlow::create(); } @@ -91,9 +96,9 @@ void SparsePyrLkOptFlowEstimatorGpu::run( const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0, cuda::GpuMat &points1, cuda::GpuMat &status, cuda::GpuMat &errors) { - optFlowEstimator_.winSize = winSize_; - optFlowEstimator_.maxLevel = maxLevel_; - optFlowEstimator_.sparse(frame0, frame1, points0, points1, status, &errors); + optFlowEstimator_->setWinSize(winSize_); + optFlowEstimator_->setMaxLevel(maxLevel_); + optFlowEstimator_->calc(frame0, frame1, points0, points1, status, errors); } @@ -101,15 +106,16 @@ void SparsePyrLkOptFlowEstimatorGpu::run( const cuda::GpuMat &frame0, const cuda::GpuMat &frame1, const cuda::GpuMat &points0, cuda::GpuMat &points1, cuda::GpuMat &status) { - optFlowEstimator_.winSize = winSize_; - optFlowEstimator_.maxLevel = maxLevel_; - optFlowEstimator_.sparse(frame0, frame1, points0, points1, status); + optFlowEstimator_->setWinSize(winSize_); + optFlowEstimator_->setMaxLevel(maxLevel_); + optFlowEstimator_->calc(frame0, frame1, points0, points1, status); } DensePyrLkOptFlowEstimatorGpu::DensePyrLkOptFlowEstimatorGpu() { CV_Assert(cuda::getCudaEnabledDeviceCount() > 0); + optFlowEstimator_ = cuda::DensePyrLKOpticalFlow::create(); } @@ -120,16 +126,24 @@ void DensePyrLkOptFlowEstimatorGpu::run( frame0_.upload(frame0.getMat()); frame1_.upload(frame1.getMat()); - optFlowEstimator_.winSize = winSize_; - optFlowEstimator_.maxLevel = maxLevel_; + optFlowEstimator_->setWinSize(winSize_); + optFlowEstimator_->setMaxLevel(maxLevel_); if (errors.needed()) { - optFlowEstimator_.dense(frame0_, frame1_, flowX_, flowY_, &errors_); - errors_.download(errors.getMatRef()); + CV_Error(Error::StsNotImplemented, "DensePyrLkOptFlowEstimatorGpu doesn't support errors calculation"); } else - optFlowEstimator_.dense(frame0_, frame1_, flowX_, flowY_); + { + cuda::GpuMat flow; + optFlowEstimator_->calc(frame0_, frame1_, flow); + + cuda::GpuMat flows[2]; + cuda::split(flow, flows); + + flowX_ = flows[0]; + flowY_ = flows[1]; + } flowX_.download(flowX.getMatRef()); flowY_.download(flowY.getMatRef()); From b3a743f09a94bddcde0344ae37f1754c887a1685 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:37:09 +0300 Subject: [PATCH 08/10] fix gpu samples compilation --- samples/gpu/farneback_optical_flow.cpp | 19 +++++--- samples/gpu/optical_flow.cpp | 61 +++++++++----------------- samples/gpu/pyrlk_optical_flow.cpp | 24 ++-------- 3 files changed, 35 insertions(+), 69 deletions(-) diff --git a/samples/gpu/farneback_optical_flow.cpp b/samples/gpu/farneback_optical_flow.cpp index b8ed55ea6c..798b108a62 100644 --- a/samples/gpu/farneback_optical_flow.cpp +++ b/samples/gpu/farneback_optical_flow.cpp @@ -7,6 +7,7 @@ #include "opencv2/highgui.hpp" #include "opencv2/video.hpp" #include "opencv2/cudaoptflow.hpp" +#include "opencv2/cudaarithm.hpp" using namespace std; using namespace cv; @@ -70,8 +71,8 @@ int main(int argc, char **argv) if (frameL.empty() || frameR.empty()) return -1; GpuMat d_frameL(frameL), d_frameR(frameR); - GpuMat d_flowx, d_flowy; - FarnebackOpticalFlow d_calc; + GpuMat d_flow; + Ptr d_calc = cuda::FarnebackOpticalFlow::create(); Mat flowxy, flowx, flowy, image; bool running = true, gpuMode = true; @@ -86,17 +87,21 @@ int main(int argc, char **argv) if (gpuMode) { tc0 = getTickCount(); - d_calc(d_frameL, d_frameR, d_flowx, d_flowy); + d_calc->calc(d_frameL, d_frameR, d_flow); tc1 = getTickCount(); - d_flowx.download(flowx); - d_flowy.download(flowy); + + GpuMat planes[2]; + cuda::split(d_flow, planes); + + planes[0].download(flowx); + planes[1].download(flowy); } else { tc0 = getTickCount(); calcOpticalFlowFarneback( - frameL, frameR, flowxy, d_calc.pyrScale, d_calc.numLevels, d_calc.winSize, - d_calc.numIters, d_calc.polyN, d_calc.polySigma, d_calc.flags); + frameL, frameR, flowxy, d_calc->getPyrScale(), d_calc->getNumLevels(), d_calc->getWinSize(), + d_calc->getNumIters(), d_calc->getPolyN(), d_calc->getPolySigma(), d_calc->getFlags()); tc1 = getTickCount(); Mat planes[] = {flowx, flowy}; diff --git a/samples/gpu/optical_flow.cpp b/samples/gpu/optical_flow.cpp index 7d625de85b..b1b3c8de1e 100644 --- a/samples/gpu/optical_flow.cpp +++ b/samples/gpu/optical_flow.cpp @@ -5,6 +5,7 @@ #include #include "opencv2/highgui.hpp" #include "opencv2/cudaoptflow.hpp" +#include "opencv2/cudaarithm.hpp" using namespace std; using namespace cv; @@ -122,10 +123,13 @@ static void drawOpticalFlow(const Mat_& flowx, const Mat_& flowy, } } -static void showFlow(const char* name, const GpuMat& d_flowx, const GpuMat& d_flowy) +static void showFlow(const char* name, const GpuMat& d_flow) { - Mat flowx(d_flowx); - Mat flowy(d_flowy); + GpuMat planes[2]; + cuda::split(d_flow, planes); + + Mat flowx(planes[0]); + Mat flowy(planes[1]); Mat out; drawOpticalFlow(flowx, flowy, out, 10); @@ -171,14 +175,12 @@ int main(int argc, const char* argv[]) GpuMat d_frame0(frame0); GpuMat d_frame1(frame1); - GpuMat d_flowx(frame0.size(), CV_32FC1); - GpuMat d_flowy(frame0.size(), CV_32FC1); + GpuMat d_flow(frame0.size(), CV_32FC2); - BroxOpticalFlow brox(0.197f, 50.0f, 0.8f, 10, 77, 10); - PyrLKOpticalFlow lk; lk.winSize = Size(7, 7); - FarnebackOpticalFlow farn; - OpticalFlowDual_TVL1_CUDA tvl1; - FastOpticalFlowBM fastBM; + Ptr brox = cuda::BroxOpticalFlow::create(0.197f, 50.0f, 0.8f, 10, 77, 10); + Ptr lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7)); + Ptr farn = cuda::FarnebackOpticalFlow::create(); + Ptr tvl1 = cuda::OpticalFlowDual_TVL1::create(); { GpuMat d_frame0f; @@ -189,68 +191,45 @@ int main(int argc, const char* argv[]) const int64 start = getTickCount(); - brox(d_frame0f, d_frame1f, d_flowx, d_flowy); + brox->calc(d_frame0f, d_frame1f, d_flow); const double timeSec = (getTickCount() - start) / getTickFrequency(); cout << "Brox : " << timeSec << " sec" << endl; - showFlow("Brox", d_flowx, d_flowy); + showFlow("Brox", d_flow); } { const int64 start = getTickCount(); - lk.dense(d_frame0, d_frame1, d_flowx, d_flowy); + lk->calc(d_frame0, d_frame1, d_flow); const double timeSec = (getTickCount() - start) / getTickFrequency(); cout << "LK : " << timeSec << " sec" << endl; - showFlow("LK", d_flowx, d_flowy); + showFlow("LK", d_flow); } { const int64 start = getTickCount(); - farn(d_frame0, d_frame1, d_flowx, d_flowy); + farn->calc(d_frame0, d_frame1, d_flow); const double timeSec = (getTickCount() - start) / getTickFrequency(); cout << "Farn : " << timeSec << " sec" << endl; - showFlow("Farn", d_flowx, d_flowy); + showFlow("Farn", d_flow); } { const int64 start = getTickCount(); - tvl1(d_frame0, d_frame1, d_flowx, d_flowy); + tvl1->calc(d_frame0, d_frame1, d_flow); const double timeSec = (getTickCount() - start) / getTickFrequency(); cout << "TVL1 : " << timeSec << " sec" << endl; - showFlow("TVL1", d_flowx, d_flowy); - } - - { - const int64 start = getTickCount(); - - GpuMat buf; - calcOpticalFlowBM(d_frame0, d_frame1, Size(7, 7), Size(1, 1), Size(21, 21), false, d_flowx, d_flowy, buf); - - const double timeSec = (getTickCount() - start) / getTickFrequency(); - cout << "BM : " << timeSec << " sec" << endl; - - showFlow("BM", d_flowx, d_flowy); - } - - { - const int64 start = getTickCount(); - - fastBM(d_frame0, d_frame1, d_flowx, d_flowy); - - const double timeSec = (getTickCount() - start) / getTickFrequency(); - cout << "Fast BM : " << timeSec << " sec" << endl; - - showFlow("Fast BM", d_flowx, d_flowy); + showFlow("TVL1", d_flow); } imshow("Frame 0", frame0); diff --git a/samples/gpu/pyrlk_optical_flow.cpp b/samples/gpu/pyrlk_optical_flow.cpp index febc28f28d..9074c47b68 100644 --- a/samples/gpu/pyrlk_optical_flow.cpp +++ b/samples/gpu/pyrlk_optical_flow.cpp @@ -186,12 +186,8 @@ int main(int argc, const char* argv[]) // Sparse - PyrLKOpticalFlow d_pyrLK; - - d_pyrLK.winSize.width = winSize; - d_pyrLK.winSize.height = winSize; - d_pyrLK.maxLevel = maxLevel; - d_pyrLK.iters = iters; + Ptr d_pyrLK = cuda::SparsePyrLKOpticalFlow::create( + Size(winSize, winSize), maxLevel, iters); GpuMat d_frame0(frame0); GpuMat d_frame1(frame1); @@ -199,7 +195,7 @@ int main(int argc, const char* argv[]) GpuMat d_nextPts; GpuMat d_status; - d_pyrLK.sparse(useGray ? d_frame0Gray : d_frame0, useGray ? d_frame1Gray : d_frame1, d_prevPts, d_nextPts, d_status); + d_pyrLK->calc(useGray ? d_frame0Gray : d_frame0, useGray ? d_frame1Gray : d_frame1, d_prevPts, d_nextPts, d_status); // Draw arrows @@ -216,20 +212,6 @@ int main(int argc, const char* argv[]) imshow("PyrLK [Sparse]", frame0); - // Dense - - GpuMat d_u; - GpuMat d_v; - - d_pyrLK.dense(d_frame0Gray, d_frame1Gray, d_u, d_v); - - // Draw flow field - - Mat flowField; - getFlowField(Mat(d_u), Mat(d_v), flowField); - - imshow("PyrLK [Dense] Flow Field", flowField); - waitKey(); return 0; From 63ff39f9f3e1e486002266b75410ad988be9e9ec Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 31 Dec 2014 15:37:25 +0300 Subject: [PATCH 09/10] remove obsolete gpu optical flow samples --- samples/gpu/brox_optical_flow.cpp | 270 ------------------------------ samples/gpu/performance/tests.cpp | 81 --------- 2 files changed, 351 deletions(-) delete mode 100644 samples/gpu/brox_optical_flow.cpp diff --git a/samples/gpu/brox_optical_flow.cpp b/samples/gpu/brox_optical_flow.cpp deleted file mode 100644 index 638aade45a..0000000000 --- a/samples/gpu/brox_optical_flow.cpp +++ /dev/null @@ -1,270 +0,0 @@ -#include -#include -#include -#include - -#include "opencv2/core.hpp" -#include "opencv2/core/utility.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/cudaoptflow.hpp" -#include "opencv2/cudaarithm.hpp" - -using namespace std; -using namespace cv; -using namespace cv::cuda; - -void getFlowField(const Mat& u, const Mat& v, Mat& flowField); - -int main(int argc, const char* argv[]) -{ - try - { - const char* keys = - "{ h help | | print help message }" - "{ l left | | specify left image }" - "{ r right | | specify right image }" - "{ s scale | 0.8 | set pyramid scale factor }" - "{ a alpha | 0.197 | set alpha }" - "{ g gamma | 50.0 | set gamma }" - "{ i inner | 10 | set number of inner iterations }" - "{ o outer | 77 | set number of outer iterations }" - "{ si solver | 10 | set number of basic solver iterations }" - "{ t time_step | 0.1 | set frame interpolation time step }"; - - CommandLineParser cmd(argc, argv, keys); - - if (cmd.has("help") || !cmd.check()) - { - cmd.printMessage(); - cmd.printErrors(); - return 0; - } - - string frame0Name = cmd.get("left"); - string frame1Name = cmd.get("right"); - float scale = cmd.get("scale"); - float alpha = cmd.get("alpha"); - float gamma = cmd.get("gamma"); - int inner_iterations = cmd.get("inner"); - int outer_iterations = cmd.get("outer"); - int solver_iterations = cmd.get("solver"); - float timeStep = cmd.get("time_step"); - - if (frame0Name.empty() || frame1Name.empty()) - { - cerr << "Missing input file names" << endl; - return -1; - } - - Mat frame0Color = imread(frame0Name); - Mat frame1Color = imread(frame1Name); - - if (frame0Color.empty() || frame1Color.empty()) - { - cout << "Can't load input images" << endl; - return -1; - } - - cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice()); - - cout << "OpenCV / NVIDIA Computer Vision" << endl; - cout << "Optical Flow Demo: Frame Interpolation" << endl; - cout << "=========================================" << endl; - - namedWindow("Forward flow"); - namedWindow("Backward flow"); - - namedWindow("Interpolated frame"); - - cout << "Press:" << endl; - cout << "\tESC to quit" << endl; - cout << "\t'a' to move to the previous frame" << endl; - cout << "\t's' to move to the next frame\n" << endl; - - frame0Color.convertTo(frame0Color, CV_32F, 1.0 / 255.0); - frame1Color.convertTo(frame1Color, CV_32F, 1.0 / 255.0); - - Mat frame0Gray, frame1Gray; - - cv::cvtColor(frame0Color, frame0Gray, COLOR_BGR2GRAY); - cv::cvtColor(frame1Color, frame1Gray, COLOR_BGR2GRAY); - - GpuMat d_frame0(frame0Gray); - GpuMat d_frame1(frame1Gray); - - cout << "Estimating optical flow" << endl; - - BroxOpticalFlow d_flow(alpha, gamma, scale, inner_iterations, outer_iterations, solver_iterations); - - cout << "\tForward..." << endl; - - GpuMat d_fu, d_fv; - - d_flow(d_frame0, d_frame1, d_fu, d_fv); - - Mat flowFieldForward; - getFlowField(Mat(d_fu), Mat(d_fv), flowFieldForward); - - cout << "\tBackward..." << endl; - - GpuMat d_bu, d_bv; - - d_flow(d_frame1, d_frame0, d_bu, d_bv); - - Mat flowFieldBackward; - getFlowField(Mat(d_bu), Mat(d_bv), flowFieldBackward); - - cout << "Interpolating..." << endl; - - // first frame color components - GpuMat d_b, d_g, d_r; - - // second frame color components - GpuMat d_bt, d_gt, d_rt; - - // prepare color components on host and copy them to device memory - Mat channels[3]; - cv::split(frame0Color, channels); - - d_b.upload(channels[0]); - d_g.upload(channels[1]); - d_r.upload(channels[2]); - - cv::split(frame1Color, channels); - - d_bt.upload(channels[0]); - d_gt.upload(channels[1]); - d_rt.upload(channels[2]); - - // temporary buffer - GpuMat d_buf; - - // intermediate frame color components (GPU memory) - GpuMat d_rNew, d_gNew, d_bNew; - - GpuMat d_newFrame; - - vector frames; - frames.reserve(static_cast(1.0f / timeStep) + 2); - - frames.push_back(frame0Color); - - // compute interpolated frames - for (float timePos = timeStep; timePos < 1.0f; timePos += timeStep) - { - // interpolate blue channel - interpolateFrames(d_b, d_bt, d_fu, d_fv, d_bu, d_bv, timePos, d_bNew, d_buf); - - // interpolate green channel - interpolateFrames(d_g, d_gt, d_fu, d_fv, d_bu, d_bv, timePos, d_gNew, d_buf); - - // interpolate red channel - interpolateFrames(d_r, d_rt, d_fu, d_fv, d_bu, d_bv, timePos, d_rNew, d_buf); - - GpuMat channels3[] = {d_bNew, d_gNew, d_rNew}; - cuda::merge(channels3, 3, d_newFrame); - - frames.push_back(Mat(d_newFrame)); - - cout << setprecision(4) << timePos * 100.0f << "%\r"; - } - - frames.push_back(frame1Color); - - cout << setw(5) << "100%" << endl; - - cout << "Done" << endl; - - imshow("Forward flow", flowFieldForward); - imshow("Backward flow", flowFieldBackward); - - int currentFrame = 0; - - imshow("Interpolated frame", frames[currentFrame]); - - for(;;) - { - int key = toupper(waitKey(10) & 0xff); - - switch (key) - { - case 27: - return 0; - - case 'A': - if (currentFrame > 0) - --currentFrame; - - imshow("Interpolated frame", frames[currentFrame]); - break; - - case 'S': - if (currentFrame < static_cast(frames.size()) - 1) - ++currentFrame; - - imshow("Interpolated frame", frames[currentFrame]); - break; - } - } - } - catch (const exception& ex) - { - cerr << ex.what() << endl; - return -1; - } - catch (...) - { - cerr << "Unknow error" << endl; - return -1; - } -} - -template inline T clamp (T x, T a, T b) -{ - return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); -} - -template inline T mapValue(T x, T a, T b, T c, T d) -{ - x = clamp(x, a, b); - return c + (d - c) * (x - a) / (b - a); -} - -void getFlowField(const Mat& u, const Mat& v, Mat& flowField) -{ - float maxDisplacement = 1.0f; - - for (int i = 0; i < u.rows; ++i) - { - const float* ptr_u = u.ptr(i); - const float* ptr_v = v.ptr(i); - - for (int j = 0; j < u.cols; ++j) - { - float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); - - if (d > maxDisplacement) - maxDisplacement = d; - } - } - - flowField.create(u.size(), CV_8UC4); - - for (int i = 0; i < flowField.rows; ++i) - { - const float* ptr_u = u.ptr(i); - const float* ptr_v = v.ptr(i); - - - Vec4b* row = flowField.ptr(i); - - for (int j = 0; j < flowField.cols; ++j) - { - row[j][0] = 0; - row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); - row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); - row[j][3] = 255; - } - } -} diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 14910f9a38..9e8bfd28cd 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -1187,87 +1187,6 @@ TEST(GoodFeaturesToTrack) CUDA_OFF; } -TEST(PyrLKOpticalFlow) -{ - Mat frame0 = imread(abspath("../data/rubberwhale1.png")); - if (frame0.empty()) throw runtime_error("can't open ../data/rubberwhale1.png"); - - Mat frame1 = imread(abspath("../data/rubberwhale2.png")); - if (frame1.empty()) throw runtime_error("can't open ../data/rubberwhale2.png"); - - Mat gray_frame; - cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); - - for (int points = 1000; points <= 8000; points *= 2) - { - SUBTEST << points; - - vector pts; - goodFeaturesToTrack(gray_frame, pts, points, 0.01, 0.0); - - vector nextPts; - vector status; - - vector err; - - calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - - CPU_ON; - calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - CPU_OFF; - - cuda::PyrLKOpticalFlow d_pyrLK; - - cuda::GpuMat d_frame0(frame0); - cuda::GpuMat d_frame1(frame1); - - cuda::GpuMat d_pts; - Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]); - d_pts.upload(pts_mat); - - cuda::GpuMat d_nextPts; - cuda::GpuMat d_status; - cuda::GpuMat d_err; - - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - - CUDA_ON; - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - CUDA_OFF; - } -} - - -TEST(FarnebackOpticalFlow) -{ - const string datasets[] = {"../data/rubberwhale", "../data/basketball"}; - for (size_t i = 0; i < sizeof(datasets)/sizeof(*datasets); ++i) { - for (int fastPyramids = 0; fastPyramids < 2; ++fastPyramids) { - for (int useGaussianBlur = 0; useGaussianBlur < 2; ++useGaussianBlur) { - - SUBTEST << "dataset=" << datasets[i] << ", fastPyramids=" << fastPyramids << ", useGaussianBlur=" << useGaussianBlur; - Mat frame0 = imread(abspath(datasets[i] + "1.png"), IMREAD_GRAYSCALE); - Mat frame1 = imread(abspath(datasets[i] + "2.png"), IMREAD_GRAYSCALE); - if (frame0.empty()) throw runtime_error("can't open " + datasets[i] + "1.png"); - if (frame1.empty()) throw runtime_error("can't open " + datasets[i] + "2.png"); - - cuda::FarnebackOpticalFlow calc; - calc.fastPyramids = fastPyramids != 0; - calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0; - - cuda::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy; - CUDA_ON; - calc(d_frame0, d_frame1, d_flowx, d_flowy); - CUDA_OFF; - - Mat flow; - CPU_ON; - calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags); - CPU_OFF; - - }}} -} - #ifdef HAVE_OPENCV_BGSEGM TEST(MOG) From 710617034ba4840d3d007e3444a03d84b6f5e423 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 12 Jan 2015 11:26:07 +0300 Subject: [PATCH 10/10] remove unused function from pyrlk_optical_flow sample --- samples/gpu/pyrlk_optical_flow.cpp | 38 ------------------------------ 1 file changed, 38 deletions(-) diff --git a/samples/gpu/pyrlk_optical_flow.cpp b/samples/gpu/pyrlk_optical_flow.cpp index 9074c47b68..f13487b622 100644 --- a/samples/gpu/pyrlk_optical_flow.cpp +++ b/samples/gpu/pyrlk_optical_flow.cpp @@ -77,44 +77,6 @@ template inline T mapValue(T x, T a, T b, T c, T d) return c + (d - c) * (x - a) / (b - a); } -static void getFlowField(const Mat& u, const Mat& v, Mat& flowField) -{ - float maxDisplacement = 1.0f; - - for (int i = 0; i < u.rows; ++i) - { - const float* ptr_u = u.ptr(i); - const float* ptr_v = v.ptr(i); - - for (int j = 0; j < u.cols; ++j) - { - float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); - - if (d > maxDisplacement) - maxDisplacement = d; - } - } - - flowField.create(u.size(), CV_8UC4); - - for (int i = 0; i < flowField.rows; ++i) - { - const float* ptr_u = u.ptr(i); - const float* ptr_v = v.ptr(i); - - - Vec4b* row = flowField.ptr(i); - - for (int j = 0; j < flowField.cols; ++j) - { - row[j][0] = 0; - row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); - row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); - row[j][3] = 255; - } - } -} - int main(int argc, const char* argv[]) { const char* keys =