From 10f2a038c2f30e78e14e530a6c9f9c17d618a1c5 Mon Sep 17 00:00:00 2001 From: mlyashko Date: Tue, 18 Feb 2014 14:24:30 +0400 Subject: [PATCH 1/6] Optical flow dualt tvl1 ocl-based implementation and tests --- .../perf/opencl/perf_optflow_dualTVL1.cpp | 113 +++ modules/video/src/opencl/optical_flow_tvl1.cl | 378 ++++++++++ modules/video/src/tvl1flow.cpp | 672 +++++++++++++++--- .../video/test/ocl/test_optflow_tvl1flow.cpp | 117 +++ 4 files changed, 1162 insertions(+), 118 deletions(-) create mode 100644 modules/video/perf/opencl/perf_optflow_dualTVL1.cpp create mode 100644 modules/video/src/opencl/optical_flow_tvl1.cl create mode 100644 modules/video/test/ocl/test_optflow_tvl1flow.cpp diff --git a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp new file mode 100644 index 0000000000..8b71b9ad8f --- /dev/null +++ b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp @@ -0,0 +1,113 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +using std::tr1::make_tuple; + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +///////////// OpticalFlow Dual TVL1 //////////////////////// +typedef tuple< tuple, bool> OpticalFlowDualTVL1Params; +typedef TestBaseWithParam OpticalFlowDualTVL1Fixture; + +OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, + ::testing::Combine( + ::testing::Values(make_tuple(-1, 0.3), + make_tuple(3, 0.5)), + ::testing::Bool() + ) + ) + { + Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png"; + + Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png"; + + const Size srcSize = frame0.size(); + + const OpticalFlowDualTVL1Params params = GetParam(); + const tuple filteringScale = get<0>(params); + const int medianFiltering = get<0>(filteringScale); + const double scaleStep = get<1>(filteringScale); + const bool useInitFlow = get<1>(params); + const double eps = 0.001; + + UMat uFrame0; frame0.copyTo(uFrame0); + UMat uFrame1; frame1.copyTo(uFrame1); + UMat uFlow(srcSize, CV_32FC2); + declare.in(uFrame0, uFrame1, WARMUP_READ).out(uFlow, WARMUP_READ); + + //create algorithm + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + + //set parameters + alg->set("scaleStep", scaleStep); + alg->setInt("medianFiltering", medianFiltering); + + if (useInitFlow) + { + //calculate initial flow as result of optical flow + OCL_ON(alg->calc(uFrame0, uFrame1, uFlow)); + } + + //set flag to use initial flow + alg->setBool("useInitialFlow", useInitFlow); + OCL_TEST_CYCLE() + alg->calc(uFrame0, uFrame1, uFlow); + + SANITY_CHECK(uFlow, eps, ERROR_RELATIVE); + } + + } +} // namespace cvtest::ocl + +#endif // HAVE_OPENCL \ No newline at end of file diff --git a/modules/video/src/opencl/optical_flow_tvl1.cl b/modules/video/src/opencl/optical_flow_tvl1.cl new file mode 100644 index 0000000000..472c4fa1a4 --- /dev/null +++ b/modules/video/src/opencl/optical_flow_tvl1.cl @@ -0,0 +1,378 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void centeredGradientKernel(__global const float* src_ptr, int src_col, int src_row, int src_step, + __global float* dx, __global float* dy, int d_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if((x < src_col)&&(y < src_row)) + { + int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); + int src_x2 = (x - 1) > 0 ? (x -1) : 0; + dx[y * d_step+ x] = 0.5f * (src_ptr[y * src_step + src_x1] - src_ptr[y * src_step+ src_x2]); + + int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); + int src_y2 = (y - 1) > 0 ? (y - 1) : 0; + dy[y * d_step+ x] = 0.5f * (src_ptr[src_y1 * src_step + x] - src_ptr[src_y2 * src_step+ x]); + } + +} + +inline float bicubicCoeff(float x_) +{ + + float x = fabs(x_); + if (x <= 1.0f) + return x * x * (1.5f * x - 2.5f) + 1.0f; + else if (x < 2.0f) + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + else + return 0.0f; +} + +__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row, + image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //float u1Val = u1(y, x); + float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + //float u2Val = u2(y, x); + float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + float wx = x + u1Val; + float wy = y + u2Val; + + int xmin = ceil(wx - 2.0f); + int xmax = floor(wx + 2.0f); + + int ymin = ceil(wy - 2.0f); + int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + //sum += w * tex2D(tex_I1 , cx, cy); + int2 cood = (int2)(cx, cy); + sum += w * read_imagef(tex_I1, sampleri, cood).x; + //sumx += w * tex2D(tex_I1x, cx, cy); + sumx += w * read_imagef(tex_I1x, sampleri, cood).x; + //sumy += w * tex2D(tex_I1y, cx, cy); + sumy += w * read_imagef(tex_I1y, sampleri, cood).x; + wsum += w; + } + } + float coeff = 1.0f / wsum; + float I1wVal = sum * coeff; + float I1wxVal = sumx * coeff; + float I1wyVal = sumy * coeff; + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + float Ix2 = I1wxVal * I1wxVal; + float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } +} + +inline float readImage(__global float *image, int x, int y, int rows, int cols, int elemCntPerRow) +{ + int i0 = clamp(x, 0, cols - 1); + int j0 = clamp(y, 0, rows - 1); + + return image[j0 * elemCntPerRow + i0]; +} + +__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row, + __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int I1_step, + int I1x_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //float u1Val = u1(y, x); + float u1Val = u1[y * u1_step + x]; + //float u2Val = u2(y, x); + float u2Val = u2[y * u2_step + x]; + + float wx = x + u1Val; + float wy = y + u2Val; + + int xmin = ceil(wx - 2.0f); + int xmax = floor(wx + 2.0f); + + int ymin = ceil(wy - 2.0f); + int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + int2 cood = (int2)(cx, cy); + sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); + sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step); + sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step); + wsum += w; + } + } + + float coeff = 1.0f / wsum; + + float I1wVal = sum * coeff; + float I1wxVal = sumx * coeff; + float I1wyVal = sumy * coeff; + + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + + float Ix2 = I1wxVal * I1wxVal; + float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + +} + + +__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, + __global const float* u2, + __global float* p11, int p11_step, + __global float* p12, + __global float* p21, + __global float* p22, + float taut, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < u1_col && y < u1_row) + { + int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + float g1 = hypot(u1x, u1y); + float g2 = hypot(u2x, u2y); + + float ng1 = 1.0f + taut * g1; + float ng2 = 1.0f + taut * g2; + + p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; + p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; + p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2; + p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2; + } + +} + +inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step) +{ + + if (x > 0 && y > 0) + { + float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; + float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; + return v1x + v2y; + } + else + { + if (y > 0) + return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0]; + else + { + if (x > 0) + return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x]; + else + return v1[0 * v1_step + 0] + v2[0 * v2_step + 0]; + } + } + +} + +__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step, + __global const float* I1wy, /*int I1wy_step,*/ + __global const float* grad, /*int grad_step,*/ + __global const float* rho_c, /*int rho_c_step,*/ + __global const float* p11, /*int p11_step,*/ + __global const float* p12, /*int p12_step,*/ + __global const float* p21, /*int p21_step,*/ + __global const float* p22, /*int p22_step,*/ + __global float* u1, int u1_step, + __global float* u2, + __global float* error, float l_t, float theta, int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y, + char calc_error) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < I1wx_col && y < I1wx_row) + { + float I1wxVal = I1wx[y * I1wx_step + x]; + float I1wyVal = I1wy[y * I1wx_step + x]; + float gradVal = grad[y * I1wx_step + x]; + float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); + + // estimate the values of the variable (v1, v2) (thresholding operator TH) + + float d1 = 0.0f; + float d2 = 0.0f; + + if (rho < -l_t * gradVal) + { + d1 = l_t * I1wxVal; + d2 = l_t * I1wyVal; + } + else if (rho > l_t * gradVal) + { + d1 = -l_t * I1wxVal; + d2 = -l_t * I1wyVal; + } + else if (gradVal > 1.192092896e-07f) + { + float fi = -rho / gradVal; + d1 = fi * I1wxVal; + d2 = fi * I1wyVal; + } + + float v1 = u1OldVal + d1; + float v2 = u2OldVal + d2; + + // compute the divergence of the dual variable (p1, p2) + + float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); + float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); + + // estimate the values of the optical flow (u1, u2) + + float u1NewVal = v1 + theta * div_p1; + float u2NewVal = v2 + theta * div_p2; + + u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; + u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; + + if(calc_error) + { + float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error[y * I1wx_step + x] = n1 + n2; + } + } +} diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index ddcc32b405..f57aa8082d 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -73,7 +73,14 @@ */ #include "precomp.hpp" +#include "opencl_kernels.hpp" + #include +#include +#include +#include "opencv2/core/opencl/ocl_defs.hpp" + + using namespace cv; @@ -105,41 +112,254 @@ protected: private: void procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2); - std::vector > I0s; - std::vector > I1s; - std::vector > u1s; - std::vector > u2s; + bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2); + + bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow); + struct dataMat + { + std::vector > I0s; + std::vector > I1s; + std::vector > u1s; + std::vector > u2s; + + Mat_ I1x_buf; + Mat_ I1y_buf; + + Mat_ flowMap1_buf; + Mat_ flowMap2_buf; + + Mat_ I1w_buf; + Mat_ I1wx_buf; + Mat_ I1wy_buf; - Mat_ I1x_buf; - Mat_ I1y_buf; + Mat_ grad_buf; + Mat_ rho_c_buf; - Mat_ flowMap1_buf; - Mat_ flowMap2_buf; + Mat_ v1_buf; + Mat_ v2_buf; - Mat_ I1w_buf; - Mat_ I1wx_buf; - Mat_ I1wy_buf; + Mat_ p11_buf; + Mat_ p12_buf; + Mat_ p21_buf; + Mat_ p22_buf; - Mat_ grad_buf; - Mat_ rho_c_buf; + Mat_ div_p1_buf; + Mat_ div_p2_buf; - Mat_ v1_buf; - Mat_ v2_buf; + Mat_ u1x_buf; + Mat_ u1y_buf; + Mat_ u2x_buf; + Mat_ u2y_buf; + } dm; + struct dataUMat + { + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + UMat I1x_buf; + UMat I1y_buf; - Mat_ p11_buf; - Mat_ p12_buf; - Mat_ p21_buf; - Mat_ p22_buf; + UMat I1w_buf; + UMat I1wx_buf; + UMat I1wy_buf; - Mat_ div_p1_buf; - Mat_ div_p2_buf; + UMat grad_buf; + UMat rho_c_buf; - Mat_ u1x_buf; - Mat_ u1y_buf; - Mat_ u2x_buf; - Mat_ u2y_buf; + UMat p11_buf; + UMat p12_buf; + UMat p21_buf; + UMat p22_buf; + + UMat diff_buf; + UMat norm_buf; + } dum; }; +namespace cv_ocl_tvl1flow +{ + bool centeredGradient(const UMat &src, UMat &dx, UMat &dy); + + bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, + UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, + UMat &grad, UMat &rho); + + bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, + UMat &rho_c, UMat &p11, UMat &p12, + UMat &p21, UMat &p22, UMat &u1, + UMat &u2, UMat &error, float l_t, float theta, char calc_error); + + bool estimateDualVariables(UMat &u1, UMat &u2, + UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut); +} + +bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) +{ +#ifdef ANDROID + size_t localsize[2] = { 32, 4 }; +#else + size_t localsize[2] = { 32, 8 }; +#endif + size_t globalsize[2] = { src.cols, src.rows }; + + ocl::Kernel kernel; + if (!kernel.create("centeredGradientKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat + idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col + idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows + idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy + idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step + return kernel.run(2, globalsize, localsize, false); + +} + +bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, + UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, + UMat &grad, UMat &rho) +{ +#ifdef ANDROID + size_t localsize[2] = { 32, 4 }; +#else + size_t localsize[2] = { 32, 8 }; +#endif + size_t globalsize[2] = { I0.cols, I0.rows }; + + ocl::Kernel kernel; + if (!kernel.create("warpBackwardKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat + int I0_step = (int)(I0.step / I0.elemSize()); + idxArg = kernel.set(idxArg, I0_step);//I0_step + idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col + idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row + ocl::Image2D imageI1(I1); + ocl::Image2D imageI1x(I1x); + ocl::Image2D imageI1y(I1y); + idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1 + idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x + idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1 + idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho + idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step + idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step + int u1_offset_x = (int)((u1.offset) % (u1.step)); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x + idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y + int u2_offset_x = (int)((u2.offset) % (u2.step)); + u2_offset_x = (int) (u2_offset_x / u2.elemSize()); + idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y + + return kernel.run(2, globalsize, localsize, false); + +} + +bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, + UMat &rho_c, UMat &p11, UMat &p12, + UMat &p21, UMat &p22, UMat &u1, + UMat &u2, UMat &error, float l_t, float theta, char calc_error) +{ +#ifdef ANDROID + size_t localsize[2] = { 32, 4 }; +#else + size_t localsize[2] = { 32, 8 }; +#endif + size_t globalsize[2] = { I1wx.cols, I1wx.rows }; + + ocl::Kernel kernel; + if (!kernel.create("estimateUKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx + idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col + idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row + idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1 + idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error + idxArg = kernel.set(idxArg, (float)l_t); //float l_t + idxArg = kernel.set(idxArg, (float)theta); //float theta + idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step + int u1_offset_x = (int)(u1.offset % u1.step); + u1_offset_x = (int) (u1_offset_x / u1.elemSize()); + idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x + idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y + int u2_offset_x = (int)(u2.offset % u2.step); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y + idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error + + return kernel.run(2, globalsize, localsize, false); +} + +bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, + UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut) +{ +#ifdef ANDROID + size_t localsize[2] = { 32, 4 }; +#else + size_t localsize[2] = { 32, 8 }; +#endif + size_t globalsize[2] = { u1.cols, u1.rows }; + + ocl::Kernel kernel; + if (!kernel.create("estimateDualVariablesKernel", cv::ocl::video::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 + idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col + idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row + idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11 + idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22 + idxArg = kernel.set(idxArg, (float)(taut)); //float taut + idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step + int u1_offset_x = (int)(u1.offset % u1.step); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x + idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y + int u2_offset_x = (int)(u2.offset % u2.step); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y + + return kernel.run(2, globalsize, localsize, false); + +} + + OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() { tau = 0.25; @@ -157,6 +377,8 @@ OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow) { + CV_OCL_RUN(_flow.isUMat(), calc_ocl(_I0, _I1, _flow)) + Mat I0 = _I0.getMat(); Mat I1 = _I1.getMat(); @@ -167,59 +389,59 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray CV_Assert( nscales > 0 ); // allocate memory for the pyramid structure - I0s.resize(nscales); - I1s.resize(nscales); - u1s.resize(nscales); - u2s.resize(nscales); + dm.I0s.resize(nscales); + dm.I1s.resize(nscales); + dm.u1s.resize(nscales); + dm.u2s.resize(nscales); - I0.convertTo(I0s[0], I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); - I1.convertTo(I1s[0], I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); + I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); - u1s[0].create(I0.size()); - u2s[0].create(I0.size()); + dm.u1s[0].create(I0.size()); + dm.u2s[0].create(I0.size()); if (useInitialFlow) { - Mat_ mv[] = {u1s[0], u2s[0]}; + Mat_ mv[] = { dm.u1s[0], dm.u2s[0] }; split(_flow.getMat(), mv); } - I1x_buf.create(I0.size()); - I1y_buf.create(I0.size()); + dm.I1x_buf.create(I0.size()); + dm.I1y_buf.create(I0.size()); - flowMap1_buf.create(I0.size()); - flowMap2_buf.create(I0.size()); + dm.flowMap1_buf.create(I0.size()); + dm.flowMap2_buf.create(I0.size()); - I1w_buf.create(I0.size()); - I1wx_buf.create(I0.size()); - I1wy_buf.create(I0.size()); + dm.I1w_buf.create(I0.size()); + dm.I1wx_buf.create(I0.size()); + dm.I1wy_buf.create(I0.size()); - grad_buf.create(I0.size()); - rho_c_buf.create(I0.size()); + dm.grad_buf.create(I0.size()); + dm.rho_c_buf.create(I0.size()); - v1_buf.create(I0.size()); - v2_buf.create(I0.size()); + dm.v1_buf.create(I0.size()); + dm.v2_buf.create(I0.size()); - p11_buf.create(I0.size()); - p12_buf.create(I0.size()); - p21_buf.create(I0.size()); - p22_buf.create(I0.size()); + dm.p11_buf.create(I0.size()); + dm.p12_buf.create(I0.size()); + dm.p21_buf.create(I0.size()); + dm.p22_buf.create(I0.size()); - div_p1_buf.create(I0.size()); - div_p2_buf.create(I0.size()); + dm.div_p1_buf.create(I0.size()); + dm.div_p2_buf.create(I0.size()); - u1x_buf.create(I0.size()); - u1y_buf.create(I0.size()); - u2x_buf.create(I0.size()); - u2y_buf.create(I0.size()); + dm.u1x_buf.create(I0.size()); + dm.u1y_buf.create(I0.size()); + dm.u2x_buf.create(I0.size()); + dm.u2y_buf.create(I0.size()); // create the scales for (int s = 1; s < nscales; ++s) { - resize(I0s[s-1], I0s[s], Size(), scaleStep, scaleStep); - resize(I1s[s-1], I1s[s], Size(), scaleStep, scaleStep); + resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep); + resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep); - if (I0s[s].cols < 16 || I0s[s].rows < 16) + if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16) { nscales = s; break; @@ -227,30 +449,30 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray if (useInitialFlow) { - resize(u1s[s-1], u1s[s], Size(), scaleStep, scaleStep); - resize(u2s[s-1], u2s[s], Size(), scaleStep, scaleStep); + resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep); + resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep); - multiply(u1s[s], Scalar::all(scaleStep), u1s[s]); - multiply(u2s[s], Scalar::all(scaleStep), u2s[s]); + multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]); + multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]); } else { - u1s[s].create(I0s[s].size()); - u2s[s].create(I0s[s].size()); + dm.u1s[s].create(dm.I0s[s].size()); + dm.u2s[s].create(dm.I0s[s].size()); } } if (!useInitialFlow) { - u1s[nscales-1].setTo(Scalar::all(0)); - u2s[nscales-1].setTo(Scalar::all(0)); + dm.u1s[nscales - 1].setTo(Scalar::all(0)); + dm.u2s[nscales - 1].setTo(Scalar::all(0)); } // pyramidal structure for computing the optical flow for (int s = nscales - 1; s >= 0; --s) { // compute the optical flow at the current scale - procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); + procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s]); // if this was the last scale, finish now if (s == 0) @@ -259,18 +481,118 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray // otherwise, upsample the optical flow // zoom the optical flow for the next finer scale - resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); - resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); + resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size()); + resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size()); // scale the optical flow with the appropriate zoom factor - multiply(u1s[s - 1], Scalar::all(1/scaleStep), u1s[s - 1]); - multiply(u2s[s - 1], Scalar::all(1/scaleStep), u2s[s - 1]); + multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]); + multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]); } - Mat uxy[] = {u1s[0], u2s[0]}; + Mat uxy[] = { dm.u1s[0], dm.u2s[0] }; merge(uxy, 2, _flow); } +bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow) +{ + UMat I0 = _I0.getUMat(); + UMat I1 = _I1.getUMat(); + + CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1); + CV_Assert(I0.size() == I1.size()); + CV_Assert(I0.type() == I1.type()); + CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2)); + CV_Assert(nscales > 0); + + // allocate memory for the pyramid structure + dum.I0s.resize(nscales); + dum.I1s.resize(nscales); + dum.u1s.resize(nscales); + dum.u2s.resize(nscales); + //I0s_step == I1s_step + double alpha = I0.depth() == CV_8U ? 1.0 : 255.0; + + I0.convertTo(dum.I0s[0], CV_32F, alpha); + I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); + + dum.u1s[0].create(I0.size(), CV_32FC1); + dum.u2s[0].create(I0.size(), CV_32FC1); + + if (useInitialFlow) + { + std::vector umv; + umv.push_back(dum.u1s[0]); + umv.push_back(dum.u2s[0]); + cv::split(_flow,umv); + } + + dum.I1x_buf.create(I0.size(), CV_32FC1); + dum.I1y_buf.create(I0.size(), CV_32FC1); + + dum.I1w_buf.create(I0.size(), CV_32FC1); + dum.I1wx_buf.create(I0.size(), CV_32FC1); + dum.I1wy_buf.create(I0.size(), CV_32FC1); + + dum.grad_buf.create(I0.size(), CV_32FC1); + dum.rho_c_buf.create(I0.size(), CV_32FC1); + + dum.p11_buf.create(I0.size(), CV_32FC1); + dum.p12_buf.create(I0.size(), CV_32FC1); + dum.p21_buf.create(I0.size(), CV_32FC1); + dum.p22_buf.create(I0.size(), CV_32FC1); + + dum.diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep); + resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep); + + if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep); + resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep); + + //scale by scale factor + multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]); + multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]); + } + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s])) + return false; + + // if this was the last scale, finish now + if (s == 0) + break; + + // zoom the optical flow for the next finer scale + resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size()); + resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size()); + + // scale the optical flow with the appropriate zoom factor + multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]); + multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]); + } + + std::vector uxy; + uxy.push_back(dum.u1s[0]); + uxy.push_back(dum.u2s[0]); + merge(uxy, _flow); + return true; +} + //////////////////////////////////////////////////////////// // buildFlowMap @@ -803,6 +1125,94 @@ void estimateDualVariables(const Mat_& u1x, const Mat_& u1y, const parallel_for_(Range(0, u1x.rows), body); } +bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2) +{ + using namespace cv_ocl_tvl1flow; + + const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + + CV_DbgAssert(I1.size() == I0.size()); + CV_DbgAssert(I1.type() == I0.type()); + CV_DbgAssert(u1.empty() || u1.size() == I0.size()); + CV_DbgAssert(u2.size() == u1.size()); + + if (u1.empty()) + { + u1.create(I0.size(), CV_32FC1); + u1.setTo(Scalar::all(0)); + + u2.create(I0.size(), CV_32FC1); + u2.setTo(Scalar::all(0)); + } + + UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + + if (!centeredGradient(I1, I1x, I1y)) + return false; + + UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + int n; + + for (int warpings = 0; warpings < warps; ++warpings) + { + if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c)) + return false; + + double error = std::numeric_limits::max(); + double prev_error = 0; + + for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer) + { + if (medianFiltering > 1) { + cv::medianBlur(u1, u1, medianFiltering); + cv::medianBlur(u2, u2, medianFiltering); + } + for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner) + { + // some tweaks to make sum operation less frequently + n = n_inner + n_outer*innerIterations; + char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); + if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, + u1, u2, diff, l_t, static_cast(theta), calc_error)) + return false; + if (calc_error) + { + error = cv::sum(diff)[0]; + prev_error = error; + } + else + { + error = std::numeric_limits::max(); + prev_error -= scaledEpsilon; + } + if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut)) + return false; + } + } + } + return true; +} + void OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2) { const float scaledEpsilon = static_cast(epsilon * epsilon * I0.size().area()); @@ -812,39 +1222,39 @@ void OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_ CV_DbgAssert( u1.size() == I0.size() ); CV_DbgAssert( u2.size() == u1.size() ); - Mat_ I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows)); centeredGradient(I1, I1x, I1y); - Mat_ flowMap1 = flowMap1_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ flowMap2 = flowMap2_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ v1 = v1_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ v2 = v2_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows)); p11.setTo(Scalar::all(0)); p12.setTo(Scalar::all(0)); p21.setTo(Scalar::all(0)); p22.setTo(Scalar::all(0)); - Mat_ div_p1 = div_p1_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ div_p2 = div_p2_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u1x = u1x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u1y = u1y_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u2x = u2x_buf(Rect(0, 0, I0.cols, I0.rows)); - Mat_ u2y = u2y_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows)); const float l_t = static_cast(lambda * theta); const float taut = static_cast(tau / theta); @@ -891,41 +1301,67 @@ void OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_ void OpticalFlowDual_TVL1::collectGarbage() { - I0s.clear(); - I1s.clear(); - u1s.clear(); - u2s.clear(); + //dataMat structure dm + dm.I0s.clear(); + dm.I1s.clear(); + dm.u1s.clear(); + dm.u2s.clear(); - I1x_buf.release(); - I1y_buf.release(); + dm.I1x_buf.release(); + dm.I1y_buf.release(); - flowMap1_buf.release(); - flowMap2_buf.release(); + dm.flowMap1_buf.release(); + dm.flowMap2_buf.release(); - I1w_buf.release(); - I1wx_buf.release(); - I1wy_buf.release(); + dm.I1w_buf.release(); + dm.I1wx_buf.release(); + dm.I1wy_buf.release(); - grad_buf.release(); - rho_c_buf.release(); + dm.grad_buf.release(); + dm.rho_c_buf.release(); - v1_buf.release(); - v2_buf.release(); + dm.v1_buf.release(); + dm.v2_buf.release(); - p11_buf.release(); - p12_buf.release(); - p21_buf.release(); - p22_buf.release(); + dm.p11_buf.release(); + dm.p12_buf.release(); + dm.p21_buf.release(); + dm.p22_buf.release(); - div_p1_buf.release(); - div_p2_buf.release(); + dm.div_p1_buf.release(); + dm.div_p2_buf.release(); - u1x_buf.release(); - u1y_buf.release(); - u2x_buf.release(); - u2y_buf.release(); + dm.u1x_buf.release(); + dm.u1y_buf.release(); + dm.u2x_buf.release(); + dm.u2y_buf.release(); + + //dataUMat structure dum + dum.I0s.clear(); + dum.I1s.clear(); + dum.u1s.clear(); + dum.u2s.clear(); + + dum.I1x_buf.release(); + dum.I1y_buf.release(); + + dum.I1w_buf.release(); + dum.I1wx_buf.release(); + dum.I1wy_buf.release(); + + dum.grad_buf.release(); + dum.rho_c_buf.release(); + + dum.p11_buf.release(); + dum.p12_buf.release(); + dum.p21_buf.release(); + dum.p22_buf.release(); + + dum.diff_buf.release(); + dum.norm_buf.release(); } + CV_INIT_ALGORITHM(OpticalFlowDual_TVL1, "DenseOpticalFlow.DualTVL1", obj.info()->addParam(obj, "tau", obj.tau, false, 0, 0, "Time step of the numerical scheme"); diff --git a/modules/video/test/ocl/test_optflow_tvl1flow.cpp b/modules/video/test/ocl/test_optflow_tvl1flow.cpp new file mode 100644 index 0000000000..6e7150718b --- /dev/null +++ b/modules/video/test/ocl/test_optflow_tvl1flow.cpp @@ -0,0 +1,117 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +///////////////////////////////////////////////////////////////////////////////////////////////// +// Optical_flow_tvl1 +namespace +{ + IMPLEMENT_PARAM_CLASS(UseInitFlow, bool) + IMPLEMENT_PARAM_CLASS(MedianFiltering, int) + IMPLEMENT_PARAM_CLASS(ScaleStep, double) +} + +PARAM_TEST_CASE(OpticalFlowTVL1, UseInitFlow, MedianFiltering, ScaleStep) +{ + bool useInitFlow; + int medianFiltering; + double scaleStep; + virtual void SetUp() + { + useInitFlow = GET_PARAM(0); + medianFiltering = GET_PARAM(1); + scaleStep = GET_PARAM(2); + } +}; + +OCL_TEST_P(OpticalFlowTVL1, Mat) +{ + cv::Mat frame0 = readImage("optflow/RubberWhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage("optflow/RubberWhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Mat flow; cv::UMat uflow; + + //create algorithm + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + + //set parameters + alg->set("scaleStep", scaleStep); + alg->setInt("medianFiltering", medianFiltering); + + //create initial flow as result of algorithm calculation + if (useInitFlow) + { + OCL_ON(alg->calc(frame0, frame1, uflow)); + uflow.copyTo(flow); + } + + //set flag to use initial flow as it is ready to use + alg->setBool("useInitialFlow", useInitFlow); + + OCL_OFF(alg->calc(frame0, frame1, flow)); + OCL_ON(alg->calc(frame0, frame1, uflow)); + + EXPECT_MAT_SIMILAR(flow, uflow, 1e-2); +} + +OCL_INSTANTIATE_TEST_CASE_P(Video, OpticalFlowTVL1, + Combine( + Values(UseInitFlow(false), UseInitFlow(true)), + Values(MedianFiltering(3), MedianFiltering(-1)), + Values(ScaleStep(0.3),ScaleStep(0.5)) + ) + ); + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL \ No newline at end of file From e75a257f929079c9e21a0821092c7a00d137d2c2 Mon Sep 17 00:00:00 2001 From: mlyashko Date: Mon, 24 Feb 2014 17:47:56 +0400 Subject: [PATCH 2/6] changed epsilon for test passing --- modules/video/perf/opencl/perf_optflow_dualTVL1.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp index 8b71b9ad8f..adf5e35832 100644 --- a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp +++ b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp @@ -66,10 +66,10 @@ OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, ) ) { - Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE); + Mat frame0 = imread(getDataPath("cv/optflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png"; - Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE); + Mat frame1 = imread(getDataPath("cv/optflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE); ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png"; const Size srcSize = frame0.size(); @@ -79,7 +79,7 @@ OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, const int medianFiltering = get<0>(filteringScale); const double scaleStep = get<1>(filteringScale); const bool useInitFlow = get<1>(params); - const double eps = 0.001; + double eps = 0.9; UMat uFrame0; frame0.copyTo(uFrame0); UMat uFrame1; frame1.copyTo(uFrame1); @@ -96,7 +96,7 @@ OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, if (useInitFlow) { //calculate initial flow as result of optical flow - OCL_ON(alg->calc(uFrame0, uFrame1, uFlow)); + alg->calc(uFrame0, uFrame1, uFlow); } //set flag to use initial flow @@ -106,8 +106,7 @@ OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, SANITY_CHECK(uFlow, eps, ERROR_RELATIVE); } - - } +} } // namespace cvtest::ocl #endif // HAVE_OPENCL \ No newline at end of file From ba5ebaa7588e1442f76f89e0214f0dbecb2f9f49 Mon Sep 17 00:00:00 2001 From: mlyashko Date: Wed, 26 Feb 2014 10:55:18 +0400 Subject: [PATCH 3/6] changed file name to case-sensitve to support linux --- modules/video/perf/opencl/perf_optflow_dualTVL1.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp index adf5e35832..72b1b0cbb2 100644 --- a/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp +++ b/modules/video/perf/opencl/perf_optflow_dualTVL1.cpp @@ -66,11 +66,11 @@ OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, ) ) { - Mat frame0 = imread(getDataPath("cv/optflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png"; + Mat frame0 = imread(getDataPath("cv/optflow/RubberWhale1.png"), cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()) << "can't load RubberWhale1.png"; - Mat frame1 = imread(getDataPath("cv/optflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png"; + Mat frame1 = imread(getDataPath("cv/optflow/RubberWhale2.png"), cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()) << "can't load RubberWhale2.png"; const Size srcSize = frame0.size(); From 5c867463d2ff7b2b8b6cdb876d674cecbcc484ef Mon Sep 17 00:00:00 2001 From: mlyashko Date: Tue, 4 Mar 2014 11:36:47 +0400 Subject: [PATCH 4/6] changed localsize to NULL (reviewers comment) --- modules/video/src/tvl1flow.cpp | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index f57aa8082d..097014577e 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -197,11 +197,6 @@ namespace cv_ocl_tvl1flow bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) { -#ifdef ANDROID - size_t localsize[2] = { 32, 4 }; -#else - size_t localsize[2] = { 32, 8 }; -#endif size_t globalsize[2] = { src.cols, src.rows }; ocl::Kernel kernel; @@ -216,19 +211,13 @@ bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step - return kernel.run(2, globalsize, localsize, false); - + return kernel.run(2, globalsize, NULL, false); } bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, UMat &grad, UMat &rho) { -#ifdef ANDROID - size_t localsize[2] = { 32, 4 }; -#else - size_t localsize[2] = { 32, 8 }; -#endif size_t globalsize[2] = { I0.cols, I0.rows }; ocl::Kernel kernel; @@ -266,8 +255,7 @@ bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UM idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y - return kernel.run(2, globalsize, localsize, false); - + return kernel.run(2, globalsize, NULL, false); } bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, @@ -275,11 +263,6 @@ bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, UMat &p21, UMat &p22, UMat &u1, UMat &u2, UMat &error, float l_t, float theta, char calc_error) { -#ifdef ANDROID - size_t localsize[2] = { 32, 4 }; -#else - size_t localsize[2] = { 32, 8 }; -#endif size_t globalsize[2] = { I1wx.cols, I1wx.rows }; ocl::Kernel kernel; @@ -316,17 +299,12 @@ bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error - return kernel.run(2, globalsize, localsize, false); + return kernel.run(2, globalsize, NULL, false); } bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut) { -#ifdef ANDROID - size_t localsize[2] = { 32, 4 }; -#else - size_t localsize[2] = { 32, 8 }; -#endif size_t globalsize[2] = { u1.cols, u1.rows }; ocl::Kernel kernel; @@ -355,7 +333,7 @@ bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y - return kernel.run(2, globalsize, localsize, false); + return kernel.run(2, globalsize, NULL, false); } From 66ed6a8a50e15c628e56a748b6c9defa5d660152 Mon Sep 17 00:00:00 2001 From: mlyashko Date: Thu, 6 Mar 2014 11:43:01 +0400 Subject: [PATCH 5/6] macro for normal return from thriveless kernel.set --- modules/core/include/opencv2/core/ocl.hpp | 11 +++ modules/video/src/lkpyramid.cpp | 10 --- modules/video/src/tvl1flow.cpp | 99 ++++++++++++++++++++++- 3 files changed, 106 insertions(+), 14 deletions(-) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index c6b0cf2d0f..abdc9c5d5b 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -599,6 +599,17 @@ protected: CV_EXPORTS MatAllocator* getOpenCLAllocator(); +#define SAFE_KERNEL_SET_ARG(idx, arg) \ +{\ + int idxNew = kernel.set(idx, arg); \ +if (-1 == idxNew)\ +{\ + printf("algorithm can't setup argument index = %d to kernel\n", idx); \ + return false; \ +}\ + idx = idxNew; \ +} + }} #endif diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 598e69c888..71fc9590b3 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -706,16 +706,6 @@ namespace cv block.z = patch.z = 1; } - #define SAFE_KERNEL_SET_ARG(idx, arg) \ - {\ - int idxNew = kernel.set(idx, arg);\ - if (-1 == idxNew)\ - {\ - printf("lkSparse_run can't setup argument index = %d to kernel\n", idx);\ - return false;\ - }\ - idx = idxNew;\ - } bool lkSparse_run(UMat &I, UMat &J, const UMat &prevPts, UMat &nextPts, UMat &status, UMat& err, int ptcount, int level) { diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index 097014577e..e1a5b7856e 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -204,6 +204,7 @@ bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) return false; int idxArg = 0; +#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows @@ -211,6 +212,15 @@ bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step +#else + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(src)); + SAFE_KERNEL_SET_ARG(idxArg, (int)(src.cols)); + SAFE_KERNEL_SET_ARG(idxArg, (int)(src.rows)); + SAFE_KERNEL_SET_ARG(idxArg, (int)(src.step / src.elemSize())); + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(dx)); + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(dy)); + SAFE_KERNEL_SET_ARG(idxArg, (int)(dx.step / dx.elemSize())); +#endif return kernel.run(2, globalsize, NULL, false); } @@ -225,6 +235,7 @@ bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UM return false; int idxArg = 0; +#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat int I0_step = (int)(I0.step / I0.elemSize()); idxArg = kernel.set(idxArg, I0_step);//I0_step @@ -254,7 +265,37 @@ bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UM u2_offset_x = (int) (u2_offset_x / u2.elemSize()); idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y - +#else + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat + int I0_step = (int)(I0.step / I0.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, I0_step);//I0_step + SAFE_KERNEL_SET_ARG(idxArg, (int)(I0.cols));//I0_col + SAFE_KERNEL_SET_ARG(idxArg, (int)(I0.rows));//I0_row + ocl::Image2D imageI1(I1); + ocl::Image2D imageI1x(I1x); + ocl::Image2D imageI1y(I1y); + SAFE_KERNEL_SET_ARG(idxArg, imageI1);//image2d_t tex_I1 + SAFE_KERNEL_SET_ARG(idxArg, imageI1x);//image2d_t tex_I1x + SAFE_KERNEL_SET_ARG(idxArg, imageI1y);//image2d_t tex_I1y + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1 + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho + SAFE_KERNEL_SET_ARG(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step + SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.step / u2.elemSize()));//u2_step + int u1_offset_x = (int)((u1.offset) % (u1.step)); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, (int)u1_offset_x);//u1_offset_x + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.offset / u1.step));//u1_offset_y + int u2_offset_x = (int)((u2.offset) % (u2.step)); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, (int)u2_offset_x);//u2_offset_x + SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.offset / u2.step));//u2_offset_y +#endif return kernel.run(2, globalsize, NULL, false); } @@ -270,7 +311,7 @@ bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, return false; int idxArg = 0; - +#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row @@ -298,7 +339,35 @@ bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error - +#else + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx + SAFE_KERNEL_SET_ARG(idxArg, (int)(I1wx.cols)); //int I1wx_col + SAFE_KERNEL_SET_ARG(idxArg, (int)(I1wx.rows)); //int I1wx_row + SAFE_KERNEL_SET_ARG(idxArg, (int)(I1wx.step / I1wx.elemSize())); //int I1wx_step + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1 + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error + SAFE_KERNEL_SET_ARG(idxArg, (float)l_t); //float l_t + SAFE_KERNEL_SET_ARG(idxArg, (float)theta); //float theta + SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step + int u1_offset_x = (int)(u1.offset % u1.step); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, (int)u1_offset_x); //int u1_offset_x + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y + int u2_offset_x = (int)(u2.offset % u2.step); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, (int)u2_offset_x); //int u2_offset_x + SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y + SAFE_KERNEL_SET_ARG(idxArg, (char)calc_error); //char calc_error +#endif return kernel.run(2, globalsize, NULL, false); } @@ -312,6 +381,7 @@ bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, return false; int idxArg = 0; +#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row @@ -332,7 +402,28 @@ bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, u2_offset_x = (int)(u2_offset_x / u2.elemSize()); idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y - +#else + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.cols)); //int u1_col + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.rows)); //int u1_row + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11 + SAFE_KERNEL_SET_ARG(idxArg, (int)(p11.step / p11.elemSize())); //int p11_step + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21 + SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22 + SAFE_KERNEL_SET_ARG(idxArg, (float)(taut)); //float taut + SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.step / u2.elemSize())); //int u2_step + int u1_offset_x = (int)(u1.offset % u1.step); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, u1_offset_x); //int u1_offset_x + SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y + int u2_offset_x = (int)(u2.offset % u2.step); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + SAFE_KERNEL_SET_ARG(idxArg, u2_offset_x); //int u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y +#endif return kernel.run(2, globalsize, NULL, false); } From eb2e8a11faa496871b3c8d3737a5497db926ddb1 Mon Sep 17 00:00:00 2001 From: mlyashko Date: Fri, 7 Mar 2014 11:15:54 +0400 Subject: [PATCH 6/6] changed kernel.set to support unsuccssful set --- modules/core/include/opencv2/core/ocl.hpp | 11 --- modules/core/src/ocl.cpp | 6 +- modules/video/src/lkpyramid.cpp | 21 ----- modules/video/src/tvl1flow.cpp | 98 +---------------------- 4 files changed, 6 insertions(+), 130 deletions(-) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index abdc9c5d5b..c6b0cf2d0f 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -599,17 +599,6 @@ protected: CV_EXPORTS MatAllocator* getOpenCLAllocator(); -#define SAFE_KERNEL_SET_ARG(idx, arg) \ -{\ - int idxNew = kernel.set(idx, arg); \ -if (-1 == idxNew)\ -{\ - printf("algorithm can't setup argument index = %d to kernel\n", idx); \ - return false; \ -}\ - idx = idxNew; \ -} - }} #endif diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index c7f18dcb17..d34ea94fc0 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2796,7 +2796,8 @@ int Kernel::set(int i, const void* value, size_t sz) { if (!p || !p->handle) return -1; - CV_Assert(i >= 0); + if (i < 0) + return i; if( i == 0 ) p->cleanupUMats(); @@ -2822,7 +2823,8 @@ int Kernel::set(int i, const KernelArg& arg) { if( !p || !p->handle ) return -1; - CV_Assert( i >= 0 ); + if (i < 0) + return i; if( i == 0 ) p->cleanupUMats(); if( arg.m ) diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 71fc9590b3..158ec279bc 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -726,7 +726,6 @@ namespace cv ocl::Image2D imageI(I); ocl::Image2D imageJ(J); int idxArg = 0; -#if 0 idxArg = kernel.set(idxArg, imageI); //image2d_t I idxArg = kernel.set(idxArg, imageJ); //image2d_t J idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts @@ -744,26 +743,6 @@ namespace cv idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y idxArg = kernel.set(idxArg, (int)iters); // int c_iters idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr -#else - SAFE_KERNEL_SET_ARG(idxArg, imageI); //image2d_t I - SAFE_KERNEL_SET_ARG(idxArg, imageJ); //image2d_t J - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(prevPts)); // __global const float2* prevPts - SAFE_KERNEL_SET_ARG(idxArg, (int)prevPts.step); // int prevPtsStep - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(nextPts)); // __global const float2* nextPts - SAFE_KERNEL_SET_ARG(idxArg, (int)nextPts.step); // int nextPtsStep - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(status)); // __global uchar* status - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(err)); // __global float* err - SAFE_KERNEL_SET_ARG(idxArg, (int)level); // const int level - SAFE_KERNEL_SET_ARG(idxArg, (int)I.rows); // const int rows - SAFE_KERNEL_SET_ARG(idxArg, (int)I.cols); // const int cols - SAFE_KERNEL_SET_ARG(idxArg, (int)patch.x); // int PATCH_X - SAFE_KERNEL_SET_ARG(idxArg, (int)patch.y); // int PATCH_Y - SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.width); // int c_winSize_x - SAFE_KERNEL_SET_ARG(idxArg, (int)winSize.height); // int c_winSize_y - SAFE_KERNEL_SET_ARG(idxArg, (int)iters); // int c_iters - SAFE_KERNEL_SET_ARG(idxArg, (char)calcErr); //char calcErr -#endif - return kernel.run(2, globalThreads, localThreads, true); } private: diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index e1a5b7856e..fad73ef65d 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -204,7 +204,6 @@ bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) return false; int idxArg = 0; -#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows @@ -212,15 +211,6 @@ bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step -#else - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(src)); - SAFE_KERNEL_SET_ARG(idxArg, (int)(src.cols)); - SAFE_KERNEL_SET_ARG(idxArg, (int)(src.rows)); - SAFE_KERNEL_SET_ARG(idxArg, (int)(src.step / src.elemSize())); - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(dx)); - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(dy)); - SAFE_KERNEL_SET_ARG(idxArg, (int)(dx.step / dx.elemSize())); -#endif return kernel.run(2, globalsize, NULL, false); } @@ -235,7 +225,6 @@ bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UM return false; int idxArg = 0; -#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat int I0_step = (int)(I0.step / I0.elemSize()); idxArg = kernel.set(idxArg, I0_step);//I0_step @@ -265,37 +254,6 @@ bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UM u2_offset_x = (int) (u2_offset_x / u2.elemSize()); idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y -#else - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat - int I0_step = (int)(I0.step / I0.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, I0_step);//I0_step - SAFE_KERNEL_SET_ARG(idxArg, (int)(I0.cols));//I0_col - SAFE_KERNEL_SET_ARG(idxArg, (int)(I0.rows));//I0_row - ocl::Image2D imageI1(I1); - ocl::Image2D imageI1x(I1x); - ocl::Image2D imageI1y(I1y); - SAFE_KERNEL_SET_ARG(idxArg, imageI1);//image2d_t tex_I1 - SAFE_KERNEL_SET_ARG(idxArg, imageI1x);//image2d_t tex_I1x - SAFE_KERNEL_SET_ARG(idxArg, imageI1y);//image2d_t tex_I1y - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1 - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho - SAFE_KERNEL_SET_ARG(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step - SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.step / u2.elemSize()));//u2_step - int u1_offset_x = (int)((u1.offset) % (u1.step)); - u1_offset_x = (int)(u1_offset_x / u1.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, (int)u1_offset_x);//u1_offset_x - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.offset / u1.step));//u1_offset_y - int u2_offset_x = (int)((u2.offset) % (u2.step)); - u2_offset_x = (int)(u2_offset_x / u2.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, (int)u2_offset_x);//u2_offset_x - SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.offset / u2.step));//u2_offset_y -#endif return kernel.run(2, globalsize, NULL, false); } @@ -311,7 +269,6 @@ bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, return false; int idxArg = 0; -#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row @@ -339,35 +296,7 @@ bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error -#else - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx - SAFE_KERNEL_SET_ARG(idxArg, (int)(I1wx.cols)); //int I1wx_col - SAFE_KERNEL_SET_ARG(idxArg, (int)(I1wx.rows)); //int I1wx_row - SAFE_KERNEL_SET_ARG(idxArg, (int)(I1wx.step / I1wx.elemSize())); //int I1wx_step - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1 - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error - SAFE_KERNEL_SET_ARG(idxArg, (float)l_t); //float l_t - SAFE_KERNEL_SET_ARG(idxArg, (float)theta); //float theta - SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step - int u1_offset_x = (int)(u1.offset % u1.step); - u1_offset_x = (int)(u1_offset_x / u1.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, (int)u1_offset_x); //int u1_offset_x - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y - int u2_offset_x = (int)(u2.offset % u2.step); - u2_offset_x = (int)(u2_offset_x / u2.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, (int)u2_offset_x); //int u2_offset_x - SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y - SAFE_KERNEL_SET_ARG(idxArg, (char)calc_error); //char calc_error -#endif + return kernel.run(2, globalsize, NULL, false); } @@ -381,7 +310,6 @@ bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, return false; int idxArg = 0; -#if 0 idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row @@ -402,33 +330,11 @@ bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, u2_offset_x = (int)(u2_offset_x / u2.elemSize()); idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y -#else - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.cols)); //int u1_col - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.rows)); //int u1_row - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11 - SAFE_KERNEL_SET_ARG(idxArg, (int)(p11.step / p11.elemSize())); //int p11_step - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21 - SAFE_KERNEL_SET_ARG(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22 - SAFE_KERNEL_SET_ARG(idxArg, (float)(taut)); //float taut - SAFE_KERNEL_SET_ARG(idxArg, (int)(u2.step / u2.elemSize())); //int u2_step - int u1_offset_x = (int)(u1.offset % u1.step); - u1_offset_x = (int)(u1_offset_x / u1.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, u1_offset_x); //int u1_offset_x - SAFE_KERNEL_SET_ARG(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y - int u2_offset_x = (int)(u2.offset % u2.step); - u2_offset_x = (int)(u2_offset_x / u2.elemSize()); - SAFE_KERNEL_SET_ARG(idxArg, u2_offset_x); //int u2_offset_x - idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y -#endif + return kernel.run(2, globalsize, NULL, false); } - OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() { tau = 0.25;