From 693c4e57414e4729a636cd5b715bcca8a1c9c70c Mon Sep 17 00:00:00 2001 From: Ernest Galbrun Date: Fri, 4 Jul 2014 14:23:09 +0200 Subject: [PATCH] debug of cuda_tvl1 => pass tests succesfully --- modules/cudaoptflow/src/cuda/tvl1flow.cu | 4 +++- modules/cudaoptflow/src/tvl1flow.cpp | 12 ++++++---- modules/cudaoptflow/test/test_optflow.cpp | 12 ++++++++++ modules/video/src/tvl1flow.cpp | 29 ++++++++++------------- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/modules/cudaoptflow/src/cuda/tvl1flow.cu b/modules/cudaoptflow/src/cuda/tvl1flow.cu index 48add3b63f..d78af19626 100644 --- a/modules/cudaoptflow/src/cuda/tvl1flow.cu +++ b/modules/cudaoptflow/src/cuda/tvl1flow.cu @@ -209,7 +209,9 @@ namespace tvl1flow __global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy, const PtrStepf grad, const PtrStepf rho_c, - const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, const PtrStepf p31, const PtrStepf p32, + const PtrStepf p11, const PtrStepf p12, + const PtrStepf p21, const PtrStepf p22, + const PtrStepf p31, const PtrStepf p32, PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error, const float l_t, const float theta, const float gamma, const bool calcError) { diff --git a/modules/cudaoptflow/src/tvl1flow.cpp b/modules/cudaoptflow/src/tvl1flow.cpp index 6b7af3ca3f..bf1c026a21 100644 --- a/modules/cudaoptflow/src/tvl1flow.cpp +++ b/modules/cudaoptflow/src/tvl1flow.cpp @@ -235,9 +235,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G { // some tweaks to make sum operation less frequently bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon); - - estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, gamma, static_cast(theta), calcError); - + cv::Mat m1(u3); + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, diff, l_t, static_cast(theta), gamma, calcError); if (calcError) { error = cuda::sum(diff, norm_buf)[0]; @@ -259,7 +258,8 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage() I0s.clear(); I1s.clear(); u1s.clear(); - u2s.clear(); + u2s.clear(); + u3s.clear(); I1x_buf.release(); I1y_buf.release(); @@ -274,7 +274,9 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::collectGarbage() p11_buf.release(); p12_buf.release(); p21_buf.release(); - p22_buf.release(); + p22_buf.release(); + p31_buf.release(); + p32_buf.release(); diff_buf.release(); norm_buf.release(); diff --git a/modules/cudaoptflow/test/test_optflow.cpp b/modules/cudaoptflow/test/test_optflow.cpp index dc7615dbac..5299836fba 100644 --- a/modules/cudaoptflow/test/test_optflow.cpp +++ b/modules/cudaoptflow/test/test_optflow.cpp @@ -361,9 +361,21 @@ CUDA_TEST_P(OpticalFlowDual_TVL1, Accuracy) alg->calc(frame0, frame1, flow); cv::Mat gold[2]; cv::split(flow, gold); + cv::Mat mx(d_flowx); + cv::Mat my(d_flowx); EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3); EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3); + d_alg.gamma = 1; + alg->set("gamma", 1); + d_alg(loadMat(frame0, useRoi), loadMat(frame1, useRoi), d_flowx, d_flowy); + alg->calc(frame0, frame1, flow); + cv::split(flow, gold); + mx = cv::Mat(d_flowx); + my = cv::Mat(d_flowx); + + EXPECT_MAT_SIMILAR(gold[0], d_flowx, 4e-3); + EXPECT_MAT_SIMILAR(gold[1], d_flowy, 4e-3); } INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, OpticalFlowDual_TVL1, testing::Combine( diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index 781f621483..ebecc9bde8 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -121,8 +121,8 @@ private: std::vector > I0s; std::vector > I1s; std::vector > u1s; - std::vector > u2s; - std::vector > u3s; + std::vector > u2s; + std::vector > u3s; Mat_ I1x_buf; Mat_ I1y_buf; @@ -138,26 +138,26 @@ private: Mat_ rho_c_buf; Mat_ v1_buf; - Mat_ v2_buf; - Mat_ v3_buf; + Mat_ v2_buf; + Mat_ v3_buf; Mat_ p11_buf; Mat_ p12_buf; Mat_ p21_buf; - Mat_ p22_buf; - Mat_ p31_buf; - Mat_ p32_buf; + Mat_ p22_buf; + Mat_ p31_buf; + Mat_ p32_buf; Mat_ div_p1_buf; - Mat_ div_p2_buf; - Mat_ div_p3_buf; + Mat_ div_p2_buf; + Mat_ div_p3_buf; Mat_ u1x_buf; Mat_ u1y_buf; Mat_ u2x_buf; - Mat_ u2y_buf; - Mat_ u3x_buf; - Mat_ u3y_buf; + Mat_ u2y_buf; + Mat_ u3x_buf; + Mat_ u3y_buf; } dm; struct dataUMat { @@ -892,10 +892,6 @@ void CalcGradRhoBody::operator() (const Range& range) const // compute the constant part of the rho function rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]); - //It = I1wRow[x] - I0Row[x] - //(u - u0)*i_X = I1wxRow[x] * u1Row[x] - //(v - v0)*i_Y = I1wyRow[x] * u2Row[x] - // gamma * w = gamma * u3 } } } @@ -970,7 +966,6 @@ void EstimateVBody::operator() (const Range& range) const float d1 = 0.0f; float d2 = 0.0f; float d3 = 0.0f; -// add d3 for 3 cases if (rho < -l_t * gradRow[x]) { d1 = l_t * I1wxRow[x];