diff --git a/modules/optflow/perf/opencl/perf_dis_optflow.cpp b/modules/optflow/perf/opencl/perf_dis_optflow.cpp new file mode 100644 index 000000000..cdbc61a53 --- /dev/null +++ b/modules/optflow/perf/opencl/perf_dis_optflow.cpp @@ -0,0 +1,113 @@ +/* + * By downloading, copying, installing or using the software you agree to this license. + * If you do not agree to this license, do not download, install, + * copy or use the software. + * + * + * License Agreement + * For Open Source Computer Vision Library + * (3 - clause BSD License) + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met : + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and / or other materials provided with the distribution. + * + * * Neither the names of the copyright holders nor the names of the contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * This software is provided by the copyright holders and contributors "as is" and + * any express or implied warranties, including, but not limited to, the implied + * warranties of merchantability and fitness for a particular purpose are disclaimed. + * In no event shall copyright holders or contributors be liable for any direct, + * indirect, incidental, special, exemplary, or consequential damages + * (including, but not limited to, procurement of substitute goods or services; + * loss of use, data, or profits; or business interruption) however caused + * and on any theory of liability, whether in contract, strict liability, + * or tort(including negligence or otherwise) arising in any way out of + * the use of this software, even if advised of the possibility of such damage. + */ + +#include "../perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +using std::tr1::tuple; +using std::tr1::get; +using namespace perf; +using namespace testing; +using namespace cv; +using namespace cv::optflow; + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2); + +typedef tuple DISParams; +typedef TestBaseWithParam DenseOpticalFlow_DIS; + +OCL_PERF_TEST_P(DenseOpticalFlow_DIS, perf, + Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p))) +{ + DISParams params = GetParam(); + + // use strings to print preset names in the perf test results: + String preset_string = get<0>(params); + int preset = DISOpticalFlow::PRESET_FAST; + if (preset_string == "PRESET_ULTRAFAST") + preset = DISOpticalFlow::PRESET_ULTRAFAST; + else if (preset_string == "PRESET_FAST") + preset = DISOpticalFlow::PRESET_FAST; + else if (preset_string == "PRESET_MEDIUM") + preset = DISOpticalFlow::PRESET_MEDIUM; + Size sz = get<1>(params); + + UMat frame1(sz, CV_8U); + UMat frame2(sz, CV_8U); + UMat flow; + + MakeArtificialExample(frame1, frame2); + + Ptr algo = createOptFlow_DIS(preset); + + OCL_TEST_CYCLE_N(10) + { + algo->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} + +void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2) +{ + int src_scale = 2; + int OF_scale = 6; + double sigma = dst_frame1.cols / 300; + + UMat tmp(Size(dst_frame1.cols / (int)pow(2, src_scale), dst_frame1.rows / (int)pow(2, src_scale)), CV_8U); + randu(tmp, 0, 255); + resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR); + resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR); + + Mat displacement_field(Size(dst_frame1.cols / (int)pow(2, OF_scale), dst_frame1.rows / (int)pow(2, OF_scale)), + CV_32FC2); + randn(displacement_field, 0.0, sigma); + resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC); + for (int i = 0; i < displacement_field.rows; i++) + for (int j = 0; j < displacement_field.cols; j++) + displacement_field.at(i, j) += Vec2f((float)j, (float)i); + + remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE); +} + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL diff --git a/modules/optflow/src/dis_flow.cpp b/modules/optflow/src/dis_flow.cpp index 3980a467e..81e53e768 100644 --- a/modules/optflow/src/dis_flow.cpp +++ b/modules/optflow/src/dis_flow.cpp @@ -42,6 +42,8 @@ #include "opencv2/core/hal/intrin.hpp" #include "precomp.hpp" +#include "opencl_kernels_optflow.hpp" + using namespace std; #define EPS 0.001F #define INF 1E+10F @@ -166,6 +168,50 @@ class DISOpticalFlowImpl : public DISOpticalFlow Mat &src_Sy, Mat &_I0, Mat &_I1); void operator()(const Range &range) const; }; + +#ifdef HAVE_OPENCL + vector u_I0s; //!< Gaussian pyramid for the current frame + vector u_I1s; //!< Gaussian pyramid for the next frame + vector u_I1s_ext; //!< I1s with borders + + vector u_I0xs; //!< Gaussian pyramid for the x gradient of the current frame + vector u_I0ys; //!< Gaussian pyramid for the y gradient of the current frame + + vector u_Ux; //!< x component of the flow vectors + vector u_Uy; //!< y component of the flow vectors + + vector u_initial_Ux; //!< x component of the initial flow field, if one was passed as an input + vector u_initial_Uy; //!< y component of the initial flow field, if one was passed as an input + + UMat u_U; //!< a buffer for the merged flow + + UMat u_Sx; //!< intermediate sparse flow representation (x component) + UMat u_Sy; //!< intermediate sparse flow representation (y component) + + /* Structure tensor components: */ + UMat u_I0xx_buf; //!< sum of squares of x gradient values + UMat u_I0yy_buf; //!< sum of squares of y gradient values + UMat u_I0xy_buf; //!< sum of x and y gradient products + + /* Extra buffers that are useful if patch mean-normalization is used: */ + UMat u_I0x_buf; //!< sum of x gradient values + UMat u_I0y_buf; //!< sum of y gradient values + + /* Auxiliary buffers used in structure tensor computation: */ + UMat u_I0xx_buf_aux; + UMat u_I0yy_buf_aux; + UMat u_I0xy_buf_aux; + UMat u_I0x_buf_aux; + UMat u_I0y_buf_aux; + + bool ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy, + UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y); + void ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow); + bool ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow); + bool ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1); + bool ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy, + UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level); +#endif }; DISOpticalFlowImpl::DISOpticalFlowImpl() @@ -1004,6 +1050,319 @@ void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) c #undef UPDATE_SPARSE_J_COORDINATES } +#ifdef HAVE_OPENCL +bool DISOpticalFlowImpl::ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy, + UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level) +{ + size_t globalSize[] = {(size_t)ws, (size_t)hs}; + size_t localSize[] = {16, 16}; + int idx; + int num_inner_iter = (int)floor(grad_descent_iter / (float)num_iter); + + for (int iter = 0; iter < num_iter; iter++) + { + if (iter == 0) + { + ocl::Kernel k1("dis_patch_inverse_search_fwd_1", ocl::optflow::dis_flow_oclsrc); + size_t global_sz[] = {(size_t)hs * 8}; + size_t local_sz[] = {8}; + idx = 0; + + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux)); + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy)); + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k1.set(idx, (int)border_size); + idx = k1.set(idx, (int)patch_size); + idx = k1.set(idx, (int)patch_stride); + idx = k1.set(idx, (int)w); + idx = k1.set(idx, (int)h); + idx = k1.set(idx, (int)ws); + idx = k1.set(idx, (int)hs); + idx = k1.set(idx, (int)pyr_level); + idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sx)); + idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sy)); + if (!k1.run(1, global_sz, local_sz, false)) + return false; + + ocl::Kernel k2("dis_patch_inverse_search_fwd_2", ocl::optflow::dis_flow_oclsrc); + idx = 0; + + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0x)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0y)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf)); + idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf)); + idx = k2.set(idx, (int)border_size); + idx = k2.set(idx, (int)patch_size); + idx = k2.set(idx, (int)patch_stride); + idx = k2.set(idx, (int)w); + idx = k2.set(idx, (int)h); + idx = k2.set(idx, (int)ws); + idx = k2.set(idx, (int)hs); + idx = k2.set(idx, (int)num_inner_iter); + idx = k2.set(idx, (int)pyr_level); + idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); + idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); + if (!k2.run(2, globalSize, localSize, false)) + return false; + } + else + { + ocl::Kernel k3("dis_patch_inverse_search_bwd_1", ocl::optflow::dis_flow_oclsrc); + size_t global_sz[] = {(size_t)hs * 8}; + size_t local_sz[] = {8}; + idx = 0; + + idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k3.set(idx, (int)border_size); + idx = k3.set(idx, (int)patch_size); + idx = k3.set(idx, (int)patch_stride); + idx = k3.set(idx, (int)w); + idx = k3.set(idx, (int)h); + idx = k3.set(idx, (int)ws); + idx = k3.set(idx, (int)hs); + idx = k3.set(idx, (int)pyr_level); + idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); + idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); + if (!k3.run(1, global_sz, local_sz, false)) + return false; + + ocl::Kernel k4("dis_patch_inverse_search_bwd_2", ocl::optflow::dis_flow_oclsrc); + idx = 0; + + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I1)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0x)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0y)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf)); + idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf)); + idx = k4.set(idx, (int)border_size); + idx = k4.set(idx, (int)patch_size); + idx = k4.set(idx, (int)patch_stride); + idx = k4.set(idx, (int)w); + idx = k4.set(idx, (int)h); + idx = k4.set(idx, (int)ws); + idx = k4.set(idx, (int)hs); + idx = k4.set(idx, (int)num_inner_iter); + idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); + idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); + if (!k4.run(2, globalSize, localSize, false)) + return false; + } + } + return true; +} + +bool DISOpticalFlowImpl::ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1) +{ + size_t globalSize[] = {(size_t)w, (size_t)h}; + size_t localSize[] = {16, 16}; + + ocl::Kernel kernel("dis_densification", ocl::optflow::dis_flow_oclsrc); + kernel.args(ocl::KernelArg::PtrReadOnly(src_Sx), + ocl::KernelArg::PtrReadOnly(src_Sy), + ocl::KernelArg::PtrReadOnly(_I0), + ocl::KernelArg::PtrReadOnly(_I1), + (int)patch_size, (int)patch_stride, + (int)w, (int)h, (int)ws, + ocl::KernelArg::PtrWriteOnly(dst_Ux), + ocl::KernelArg::PtrWriteOnly(dst_Uy)); + return kernel.run(2, globalSize, localSize, false); +} + +void DISOpticalFlowImpl::ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow) +{ + u_I0s.resize(coarsest_scale + 1); + u_I1s.resize(coarsest_scale + 1); + u_I1s_ext.resize(coarsest_scale + 1); + u_I0xs.resize(coarsest_scale + 1); + u_I0ys.resize(coarsest_scale + 1); + u_Ux.resize(coarsest_scale + 1); + u_Uy.resize(coarsest_scale + 1); + + vector flow_uv(2); + if (use_flow) + { + split(flow, flow_uv); + u_initial_Ux.resize(coarsest_scale + 1); + u_initial_Uy.resize(coarsest_scale + 1); + } + + int fraction = 1; + int cur_rows = 0, cur_cols = 0; + + for (int i = 0; i <= coarsest_scale; i++) + { + /* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */ + if (i == finest_scale) + { + cur_rows = I0.rows / fraction; + cur_cols = I0.cols / fraction; + u_I0s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(I0, u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA); + u_I1s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(I1, u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA); + + /* These buffers are reused in each scale so we initialize them once on the finest scale: */ + u_Sx.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_Sy.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + u_I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); + + u_I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0x_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + u_I0y_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); + + u_U.create(cur_rows, cur_cols, CV_32FC2); + } + else if (i > finest_scale) + { + cur_rows = u_I0s[i - 1].rows / 2; + cur_cols = u_I0s[i - 1].cols / 2; + u_I0s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(u_I0s[i - 1], u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA); + u_I1s[i].create(cur_rows, cur_cols, CV_8UC1); + resize(u_I1s[i - 1], u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA); + } + + if (i >= finest_scale) + { + u_I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size, CV_8UC1); + copyMakeBorder(u_I1s[i], u_I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE); + u_I0xs[i].create(cur_rows, cur_cols, CV_16SC1); + u_I0ys[i].create(cur_rows, cur_cols, CV_16SC1); + spatialGradient(u_I0s[i], u_I0xs[i], u_I0ys[i]); + u_Ux[i].create(cur_rows, cur_cols, CV_32FC1); + u_Uy[i].create(cur_rows, cur_cols, CV_32FC1); + variational_refinement_processors[i]->setAlpha(variational_refinement_alpha); + variational_refinement_processors[i]->setDelta(variational_refinement_delta); + variational_refinement_processors[i]->setGamma(variational_refinement_gamma); + variational_refinement_processors[i]->setSorIterations(5); + variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter); + + if (use_flow) + { + resize(flow_uv[0], u_initial_Ux[i], Size(cur_cols, cur_rows)); + divide(u_initial_Ux[i], static_cast(fraction), u_initial_Ux[i]); + resize(flow_uv[1], u_initial_Uy[i], Size(cur_cols, cur_rows)); + divide(u_initial_Uy[i], static_cast(fraction), u_initial_Uy[i]); + } + } + + fraction *= 2; + } +} + +bool DISOpticalFlowImpl::ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy, + UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y) +{ + size_t globalSizeX[] = {(size_t)h}; + size_t localSizeX[] = {16}; + + ocl::Kernel kernelX("dis_precomputeStructureTensor_hor", ocl::optflow::dis_flow_oclsrc); + kernelX.args(ocl::KernelArg::PtrReadOnly(I0x), + ocl::KernelArg::PtrReadOnly(I0y), + (int)patch_size, (int)patch_stride, + (int)w, (int)h, (int)ws, + ocl::KernelArg::PtrWriteOnly(u_I0xx_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0yy_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0xy_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0x_buf_aux), + ocl::KernelArg::PtrWriteOnly(u_I0y_buf_aux)); + if (!kernelX.run(1, globalSizeX, localSizeX, false)) + return false; + + size_t globalSizeY[] = {(size_t)ws}; + size_t localSizeY[] = {16}; + + ocl::Kernel kernelY("dis_precomputeStructureTensor_ver", ocl::optflow::dis_flow_oclsrc); + kernelY.args(ocl::KernelArg::PtrReadOnly(u_I0xx_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0yy_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0xy_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0x_buf_aux), + ocl::KernelArg::PtrReadOnly(u_I0y_buf_aux), + (int)patch_size, (int)patch_stride, + (int)w, (int)h, (int)ws, + ocl::KernelArg::PtrWriteOnly(dst_I0xx), + ocl::KernelArg::PtrWriteOnly(dst_I0yy), + ocl::KernelArg::PtrWriteOnly(dst_I0xy), + ocl::KernelArg::PtrWriteOnly(dst_I0x), + ocl::KernelArg::PtrWriteOnly(dst_I0y)); + return kernelY.run(1, globalSizeY, localSizeY, false); +} + +bool DISOpticalFlowImpl::ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow) +{ + UMat I0Mat = I0.getUMat(); + UMat I1Mat = I1.getUMat(); + bool use_input_flow = false; + if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2) + use_input_flow = true; + else + flow.create(I1Mat.size(), CV_32FC2); + UMat &u_flowMat = flow.getUMatRef(); + coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1; + + ocl_prepareBuffers(I0Mat, I1Mat, u_flowMat, use_input_flow); + u_Ux[coarsest_scale].setTo(0.0f); + u_Uy[coarsest_scale].setTo(0.0f); + + for (int i = coarsest_scale; i >= finest_scale; i--) + { + w = u_I0s[i].cols; + h = u_I0s[i].rows; + ws = 1 + (w - patch_size) / patch_stride; + hs = 1 + (h - patch_size) / patch_stride; + + if (!ocl_precomputeStructureTensor(u_I0xx_buf, u_I0yy_buf, u_I0xy_buf, + u_I0x_buf, u_I0y_buf, u_I0xs[i], u_I0ys[i])) + return false; + + if (!ocl_PatchInverseSearch(u_Ux[i], u_Uy[i], u_I0s[i], u_I1s_ext[i], u_I0xs[i], u_I0ys[i], 2, i)) + return false; + + if (!ocl_Densification(u_Ux[i], u_Uy[i], u_Sx, u_Sy, u_I0s[i], u_I1s[i])) + return false; + + if (variational_refinement_iter > 0) + variational_refinement_processors[i]->calcUV(u_I0s[i], u_I1s[i], + u_Ux[i].getMat(ACCESS_WRITE), u_Uy[i].getMat(ACCESS_WRITE)); + + if (i > finest_scale) + { + resize(u_Ux[i], u_Ux[i - 1], u_Ux[i - 1].size()); + resize(u_Uy[i], u_Uy[i - 1], u_Uy[i - 1].size()); + multiply(u_Ux[i - 1], 2, u_Ux[i - 1]); + multiply(u_Uy[i - 1], 2, u_Uy[i - 1]); + } + } + vector uxy(2); + uxy[0] = u_Ux[finest_scale]; + uxy[1] = u_Uy[finest_scale]; + merge(uxy, u_U); + resize(u_U, u_flowMat, u_flowMat.size()); + multiply(u_flowMat, 1 << finest_scale, u_flowMat); + + return true; +} +#endif + void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) { CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1); @@ -1012,6 +1371,10 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo CV_Assert(I0.isContinuous()); CV_Assert(I1.isContinuous()); + CV_OCL_RUN(ocl::Device::getDefault().isIntel() && flow.isUMat() && + (patch_size == 8) && (use_spatial_propagation == true), + ocl_calc(I0, I1, flow)); + Mat I0Mat = I0.getMat(); Mat I1Mat = I1.getMat(); bool use_input_flow = false; @@ -1019,7 +1382,7 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo use_input_flow = true; else flow.create(I1Mat.size(), CV_32FC2); - Mat &flowMat = flow.getMatRef(); + Mat flowMat = flow.getMat(); coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1; int num_stripes = getNumThreads(); @@ -1088,6 +1451,25 @@ void DISOpticalFlowImpl::collectGarbage() I0yy_buf_aux.release(); I0xy_buf_aux.release(); +#ifdef HAVE_OPENCL + u_I0s.clear(); + u_I1s.clear(); + u_I1s_ext.clear(); + u_I0xs.clear(); + u_I0ys.clear(); + u_Ux.clear(); + u_Uy.clear(); + u_U.release(); + u_Sx.release(); + u_Sy.release(); + u_I0xx_buf.release(); + u_I0yy_buf.release(); + u_I0xy_buf.release(); + u_I0xx_buf_aux.release(); + u_I0yy_buf_aux.release(); + u_I0xy_buf_aux.release(); +#endif + for (int i = finest_scale; i <= coarsest_scale; i++) variational_refinement_processors[i]->collectGarbage(); variational_refinement_processors.clear(); diff --git a/modules/optflow/src/opencl/dis_flow.cl b/modules/optflow/src/opencl/dis_flow.cl new file mode 100644 index 000000000..54c642d9e --- /dev/null +++ b/modules/optflow/src/opencl/dis_flow.cl @@ -0,0 +1,522 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#define EPS 0.001f +#define INF 1E+10F + +__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x, + __global const short *I0y, + int patch_size, int patch_stride, + int w, int h, int ws, + __global float *I0xx_aux_ptr, + __global float *I0yy_aux_ptr, + __global float *I0xy_aux_ptr, + __global float *I0x_aux_ptr, + __global float *I0y_aux_ptr) +{ + + int i = get_global_id(0); + + if (i >= h) return; + + const __global short *x_row = I0x + i * w; + const __global short *y_row = I0y + i * w; + + float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f; + float8 x_vec = convert_float8(vload8(0, x_row)); + float8 y_vec = convert_float8(vload8(0, y_row)); + sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi); + sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi); + sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi); + sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f); + sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f); + + I0xx_aux_ptr[i * ws] = sum_xx; + I0yy_aux_ptr[i * ws] = sum_yy; + I0xy_aux_ptr[i * ws] = sum_xy; + I0x_aux_ptr[i * ws] = sum_x; + I0y_aux_ptr[i * ws] = sum_y; + + int js = 1; + for (int j = patch_size; j < w; j++) + { + short x_val1 = x_row[j]; + short x_val2 = x_row[j - patch_size]; + short y_val1 = y_row[j]; + short y_val2 = y_row[j - patch_size]; + sum_xx += (x_val1 * x_val1 - x_val2 * x_val2); + sum_yy += (y_val1 * y_val1 - y_val2 * y_val2); + sum_xy += (x_val1 * y_val1 - x_val2 * y_val2); + sum_x += (x_val1 - x_val2); + sum_y += (y_val1 - y_val2); + if ((j - patch_size + 1) % patch_stride == 0) + { + int index = i * ws + js; + I0xx_aux_ptr[index] = sum_xx; + I0yy_aux_ptr[index] = sum_yy; + I0xy_aux_ptr[index] = sum_xy; + I0x_aux_ptr[index] = sum_x; + I0y_aux_ptr[index] = sum_y; + js++; + } + } +} + +__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr, + __global const float *I0yy_aux_ptr, + __global const float *I0xy_aux_ptr, + __global const float *I0x_aux_ptr, + __global const float *I0y_aux_ptr, + int patch_size, int patch_stride, + int w, int h, int ws, + __global float *I0xx_ptr, + __global float *I0yy_ptr, + __global float *I0xy_ptr, + __global float *I0x_ptr, + __global float *I0y_ptr) +{ + int j = get_global_id(0); + + if (j >= ws) return; + + float sum_xx, sum_yy, sum_xy, sum_x, sum_y; + sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f; + + for (int i = 0; i < patch_size; i++) + { + sum_xx += I0xx_aux_ptr[i * ws + j]; + sum_yy += I0yy_aux_ptr[i * ws + j]; + sum_xy += I0xy_aux_ptr[i * ws + j]; + sum_x += I0x_aux_ptr[i * ws + j]; + sum_y += I0y_aux_ptr[i * ws + j]; + } + I0xx_ptr[j] = sum_xx; + I0yy_ptr[j] = sum_yy; + I0xy_ptr[j] = sum_xy; + I0x_ptr[j] = sum_x; + I0y_ptr[j] = sum_y; + + int is = 1; + for (int i = patch_size; i < h; i++) + { + sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]); + sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]); + sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]); + sum_x += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]); + sum_y += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]); + + if ((i - patch_size + 1) % patch_stride == 0) + { + I0xx_ptr[is * ws + j] = sum_xx; + I0yy_ptr[is * ws + j] = sum_yy; + I0xy_ptr[is * ws + j] = sum_xy; + I0x_ptr[is * ws + j] = sum_x; + I0y_ptr[is * ws + j] = sum_y; + is++; + } + } +} + +__kernel void dis_densification(__global const float *sx, __global const float *sy, + __global const uchar *i0, __global const uchar *i1, + int psz, int pstr, + int w, int h, int ws, + __global float *ux, __global float *uy) +{ + int x = get_global_id(0); + int y = get_global_id(1); + int i, j; + + if (x >= w || y >= h) return; + + int start_is, end_is; + int start_js, end_js; + + end_is = min(y / pstr, (h - psz) / pstr); + start_is = max(0, y - psz + pstr) / pstr; + start_is = min(start_is, end_is); + + end_js = min(x / pstr, (w - psz) / pstr); + start_js = max(0, x - psz + pstr) / pstr; + start_js = min(start_js, end_js); + + float coef, sum_coef = 0.0f; + float sum_Ux = 0.0f; + float sum_Uy = 0.0f; + + int i_l, i_u; + int j_l, j_u; + float i_m, j_m, diff; + + i = y; + j = x; + + /* Iterate through all the patches that overlap the current location (i,j) */ + for (int is = start_is; is <= end_is; is++) + for (int js = start_js; js <= end_js; js++) + { + float sx_val = sx[is * ws + js]; + float sy_val = sy[is * ws + js]; + uchar2 i1_vec1, i1_vec2; + + j_m = min(max(j + sx_val, 0.0f), w - 1.0f - EPS); + i_m = min(max(i + sy_val, 0.0f), h - 1.0f - EPS); + j_l = (int)j_m; + j_u = j_l + 1; + i_l = (int)i_m; + i_u = i_l + 1; + i1_vec1 = vload2(0, i1 + i_u * w + j_l); + i1_vec2 = vload2(0, i1 + i_l * w + j_l); + diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y + + (j_u - j_m) * (i_m - i_l) * i1_vec1.x + + (j_m - j_l) * (i_u - i_m) * i1_vec2.y + + (j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j]; + coef = 1 / max(1.0f, fabs(diff)); + sum_Ux += coef * sx_val; + sum_Uy += coef * sy_val; + sum_coef += coef; + } + + ux[i * w + j] = sum_Ux / sum_coef; + uy[i * w + j] = sum_Uy / sum_coef; +} + +#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \ + i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \ + j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \ + \ + w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \ + w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \ + w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \ + w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1); + +float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr, + int I0_stride, int I1_stride, + float w00, float w01, float w10, float w11, int patch_sz, int i) +{ + float sum_diff = 0.0f, sum_diff_sq = 0.0f; + int n = patch_sz * patch_sz; + + uchar8 I1_vec1, I1_vec2, I0_vec; + uchar I1_val1, I1_val2; + + I0_vec = vload8(0, I0_ptr + i * I0_stride); + I1_vec1 = vload8(0, I1_ptr + i * I1_stride); + I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride); + I1_val1 = I1_ptr[i * I1_stride + 8]; + I1_val2 = I1_ptr[(i + 1) * I1_stride + 8]; + + float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) + + w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) - + convert_float8(I0_vec); + + sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0)); + sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi)); + + sum_diff = sub_group_reduce_add(sum_diff); + sum_diff_sq = sub_group_reduce_add(sum_diff_sq); + + return sum_diff_sq - sum_diff * sum_diff / n; +} + +__kernel void dis_patch_inverse_search_fwd_1(__global const float *Ux_ptr, __global const float *Uy_ptr, + __global const uchar *I0_ptr, __global const uchar *I1_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int pyr_level, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int id = get_global_id(0); + int is = id / 8; + if (id >= (hs * 8)) return; + + int i = is * patch_stride; + int j = 0; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float i_I1, j_I1, w00, w01, w10, w11; + + float prev_Ux = Ux_ptr[(i + psz2) * w + j + psz2]; + float prev_Uy = Uy_ptr[(i + psz2) * w + j + psz2]; + Sx_ptr[is * ws] = prev_Ux; + Sy_ptr[is * ws] = prev_Uy; + j += patch_stride; + + int sid = get_sub_group_local_id(); + for (int js = 1; js < ws; js++, j += patch_stride) + { + float min_SSD, cur_SSD; + float Ux = Ux_ptr[(i + psz2) * w + j + psz2]; + float Uy = Uy_ptr[(i + psz2) * w + j + psz2]; + + INIT_BILINEAR_WEIGHTS(Ux, Uy); + min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + + INIT_BILINEAR_WEIGHTS(prev_Ux, prev_Uy); + cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + if (cur_SSD < min_SSD) + { + Ux = prev_Ux; + Uy = prev_Uy; + } + + prev_Ux = Ux; + prev_Uy = Uy; + Sx_ptr[is * ws + js] = Ux; + Sy_ptr[is * ws + js] = Uy; + } +} + +float3 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr, + const __global short *I0x_ptr, const __global short *I0y_ptr, + int I0_stride, int I1_stride, float w00, float w01, float w10, + float w11, int patch_sz, float x_grad_sum, float y_grad_sum) +{ + float sum_diff = 0.0, sum_diff_sq = 0.0; + float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; + int n = patch_sz * patch_sz; + uchar8 I1_vec1, I1_vec2; + uchar I1_val1, I1_val2; + + for (int i = 0; i < 8; i++) + { + uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride); + + I1_vec1 = (i == 0) ? vload8(0, I1_ptr + i * I1_stride) : I1_vec2; + I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride); + I1_val1 = (i == 0) ? I1_ptr[i * I1_stride + patch_sz] : I1_val2; + I1_val2 = I1_ptr[(i + 1) * I1_stride + patch_sz]; + + float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) + + w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) - + convert_float8(I0_vec); + + sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0)); + sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi)); + + short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride); + short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride); + + sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo)); + sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi)); + sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo)); + sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi)); + } + + float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; + float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n; + float SSD = sum_diff_sq - sum_diff * sum_diff / n; + + return (float3)(SSD, dst_dUx, dst_dUy); +} + +__kernel void dis_patch_inverse_search_fwd_2(__global const float *Ux_ptr, __global const float *Uy_ptr, + __global const uchar *I0_ptr, __global const uchar *I1_ptr, + __global const short *I0x_ptr, __global const short *I0y_ptr, + __global const float *xx_ptr, __global const float *yy_ptr, + __global const float *xy_ptr, + __global const float *x_ptr, __global const float *y_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int num_inner_iter, int pyr_level, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int js = get_global_id(0); + int is = get_global_id(1); + int i = is * patch_stride; + int j = js * patch_stride; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + int index = is * ws + js; + + if (js >= ws || is >= hs) return; + + float Ux = Sx_ptr[index]; + float Uy = Sy_ptr[index]; + float cur_Ux = Ux; + float cur_Uy = Uy; + float cur_xx = xx_ptr[index]; + float cur_yy = yy_ptr[index]; + float cur_xy = xy_ptr[index]; + float detH = cur_xx * cur_yy - cur_xy * cur_xy; + + if (fabs(detH) < EPS) detH = EPS; + + float invH11 = cur_yy / detH; + float invH12 = -cur_xy / detH; + float invH22 = cur_xx / detH; + float prev_SSD = INF, SSD; + float x_grad_sum = x_ptr[index]; + float y_grad_sum = y_ptr[index]; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; + float3 res; + + for (int t = 0; t < num_inner_iter; t++) + { + INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); + res = processPatchMeanNorm(I0_ptr + i * w + j, + I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j, + I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz, + x_grad_sum, y_grad_sum); + + SSD = res.x; + dUx = res.y; + dUy = res.z; + dx = invH11 * dUx + invH12 * dUy; + dy = invH12 * dUx + invH22 * dUy; + + cur_Ux -= dx; + cur_Uy -= dy; + + if (SSD >= prev_SSD) + break; + prev_SSD = SSD; + } + + float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy); + if (dot(vec, vec) <= (float)(psz * psz)) + { + Sx_ptr[index] = cur_Ux; + Sy_ptr[index] = cur_Uy; + } +} + +__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int pyr_level, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int id = get_global_id(0); + int is = id / 8; + if (id >= (hs * 8)) return; + + is = (hs - 1 - is); + int i = is * patch_stride; + int j = (ws - 2) * patch_stride; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float i_I1, j_I1, w00, w01, w10, w11; + + int sid = get_sub_group_local_id(); + for (int js = (ws - 2); js > -1; js--, j -= patch_stride) + { + float min_SSD, cur_SSD; + float2 Ux = vload2(0, Sx_ptr + is * ws + js); + float2 Uy = vload2(0, Sy_ptr + is * ws + js); + + INIT_BILINEAR_WEIGHTS(Ux.x, Uy.x); + min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + + INIT_BILINEAR_WEIGHTS(Ux.y, Uy.y); + cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + w, w_ext, w00, w01, w10, w11, psz, sid); + if (cur_SSD < min_SSD) + { + Sx_ptr[is * ws + js] = Ux.y; + Sy_ptr[is * ws + js] = Uy.y; + } + } +} + +__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr, + __global const short *I0x_ptr, __global const short *I0y_ptr, + __global const float *xx_ptr, __global const float *yy_ptr, + __global const float *xy_ptr, + __global const float *x_ptr, __global const float *y_ptr, + int border_size, int patch_size, int patch_stride, + int w, int h, int ws, int hs, int num_inner_iter, + __global float *Sx_ptr, __global float *Sy_ptr) +{ + int js = get_global_id(0); + int is = get_global_id(1); + if (js >= ws || is >= hs) return; + + js = (ws - 1 - js); + is = (hs - 1 - is); + + int j = js * patch_stride; + int i = is * patch_stride; + int psz = patch_size; + int psz2 = psz / 2; + int w_ext = w + 2 * border_size; + int bsz = border_size; + int index = is * ws + js; + + float Ux = Sx_ptr[index]; + float Uy = Sy_ptr[index]; + float cur_Ux = Ux; + float cur_Uy = Uy; + float cur_xx = xx_ptr[index]; + float cur_yy = yy_ptr[index]; + float cur_xy = xy_ptr[index]; + float detH = cur_xx * cur_yy - cur_xy * cur_xy; + + if (fabs(detH) < EPS) detH = EPS; + + float invH11 = cur_yy / detH; + float invH12 = -cur_xy / detH; + float invH22 = cur_xx / detH; + float prev_SSD = INF, SSD; + float x_grad_sum = x_ptr[index]; + float y_grad_sum = y_ptr[index]; + + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + w - 1.0f; + float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; + float3 res; + + for (int t = 0; t < num_inner_iter; t++) + { + INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); + res = processPatchMeanNorm(I0_ptr + i * w + j, + I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j, + I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz, + x_grad_sum, y_grad_sum); + + SSD = res.x; + dUx = res.y; + dUy = res.z; + dx = invH11 * dUx + invH12 * dUy; + dy = invH12 * dUx + invH22 * dUy; + + cur_Ux -= dx; + cur_Uy -= dy; + + if (SSD >= prev_SSD) + break; + prev_SSD = SSD; + } + + float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy); + if ((dot(vec, vec)) <= (float)(psz * psz)) + { + Sx_ptr[index] = cur_Ux; + Sy_ptr[index] = cur_Uy; + } +} + diff --git a/modules/optflow/src/precomp.hpp b/modules/optflow/src/precomp.hpp index 577006999..c3461b3d9 100644 --- a/modules/optflow/src/precomp.hpp +++ b/modules/optflow/src/precomp.hpp @@ -45,6 +45,8 @@ the use of this software, even if advised of the possibility of such damage. #include #include #include +#include "opencv2/core/utility.hpp" +#include "opencv2/core/private.hpp" #include "opencv2/core/ocl.hpp" #include diff --git a/modules/optflow/test/ocl/test_dis.cpp b/modules/optflow/test/ocl/test_dis.cpp new file mode 100644 index 000000000..5c847cd11 --- /dev/null +++ b/modules/optflow/test/ocl/test_dis.cpp @@ -0,0 +1,100 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "../test_precomp.hpp" +#include "opencv2/ts/ocl_test.hpp" + +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace optflow; + +namespace cvtest { +namespace ocl { + +PARAM_TEST_CASE(OCL_DenseOpticalFlow_DIS, int) +{ + int preset; + + virtual void SetUp() + { + preset = GET_PARAM(0); + } +}; + +OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat) +{ + Mat frame1, frame2, GT; + + frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"); + frame2 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"); + + CV_Assert(!frame1.empty() && !frame2.empty()); + + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Ptr algo; + + // iterate over presets: + for (int i = 0; i < test_loop_times; i++) + { + Mat flow; + UMat ocl_flow; + + algo = createOptFlow_DIS(preset); + OCL_OFF(algo->calc(frame1, frame2, flow)); + OCL_ON(algo->calc(frame1, frame2, ocl_flow)); + ASSERT_EQ(flow.rows, ocl_flow.rows); + ASSERT_EQ(flow.cols, ocl_flow.cols); + + EXPECT_MAT_SIMILAR(flow, ocl_flow, 6e-3); + } +} + +OCL_INSTANTIATE_TEST_CASE_P(Contrib, OCL_DenseOpticalFlow_DIS, + Values(DISOpticalFlow::PRESET_ULTRAFAST, + DISOpticalFlow::PRESET_FAST, + DISOpticalFlow::PRESET_MEDIUM)); + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL