diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index da7ca27aeb..fa97b7df3f 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -1701,6 +1701,73 @@ namespace cv private: oclMat minSSD, leBuf, riBuf; }; + class CV_EXPORTS StereoBeliefPropagation + { + public: + enum { DEFAULT_NDISP = 64 }; + enum { DEFAULT_ITERS = 5 }; + enum { DEFAULT_LEVELS = 5 }; + static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels); + explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int msg_type = CV_16S); + StereoBeliefPropagation(int ndisp, int iters, int levels, + float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump, + int msg_type = CV_32F); + void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); + void operator()(const oclMat &data, oclMat &disparity); + int ndisp; + int iters; + int levels; + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + int msg_type; + private: + oclMat u, d, l, r, u2, d2, l2, r2; + std::vector datas; + oclMat out; + }; + class CV_EXPORTS StereoConstantSpaceBP + { + public: + enum { DEFAULT_NDISP = 128 }; + enum { DEFAULT_ITERS = 8 }; + enum { DEFAULT_LEVELS = 4 }; + enum { DEFAULT_NR_PLANE = 4 }; + static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane); + explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int nr_plane = DEFAULT_NR_PLANE, + int msg_type = CV_32F); + StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, + float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, + int min_disp_th = 0, + int msg_type = CV_32F); + void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); + int ndisp; + int iters; + int levels; + int nr_plane; + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + int min_disp_th; + int msg_type; + bool use_local_init_data_cost; + private: + oclMat u[2], d[2], l[2], r[2]; + oclMat disp_selected_pyr[2]; + oclMat data_cost; + oclMat data_cost_selected; + oclMat temp; + oclMat out; + }; } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl new file mode 100644 index 0000000000..3196e581ad --- /dev/null +++ b/modules/ocl/src/opencl/stereobp.cl @@ -0,0 +1,380 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (DOUBLE_SUPPORT) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif + +#endif + +#ifdef T_FLOAT +#define T float +#else +#define T short +#endif + +/////////////////////////////////////////////////////////////// +/////////////////common/////////////////////////////////////// +///////////////////////////////////////////////////////////// +T saturate_cast(float v){ +#ifdef T_SHORT + return convert_short_sat_rte(v); +#else + return v; +#endif +} + +#define FLOAT_MAX 3.402823466e+38f +typedef struct +{ + int cndisp; + float cmax_data_term; + float cdata_weight; + float cmax_disc_term; + float cdisc_single_jump; +}con_srtuct_t; +/////////////////////////////////////////////////////////////// +////////////////////////// comp data ////////////////////////// +/////////////////////////////////////////////////////////////// + +float pix_diff_1(__global const uchar *ls, __global const uchar *rs) +{ + return abs((int)(*ls) - *rs); +} + +float pix_diff_3(__global const uchar *ls, __global const uchar *rs) +{ + const float tr = 0.299f; + const float tg = 0.587f; + const float tb = 0.114f; + + float val; + + val = tb * abs((int)ls[0] - rs[0]); + val += tg * abs((int)ls[1] - rs[1]); + val += tr * abs((int)ls[2] - rs[2]); + + return val; +} +float pix_diff_4(__global const uchar *ls, __global const uchar *rs) +{ + uchar4 l, r; + l = *((__global uchar4 *)ls); + r = *((__global uchar4 *)rs); + + const float tr = 0.299f; + const float tg = 0.587f; + const float tb = 0.114f; + + float val; + + val = tb * abs((int)l.x - r.x); + val += tg * abs((int)l.y - r.y); + val += tr * abs((int)l.z - r.z); + + return val; +} + + +#ifndef CN +#define CN 4 +#endif + +#define CAT(X,Y) X##Y +#define CAT2(X,Y) CAT(X,Y) + +#define PIX_DIFF CAT2(pix_diff_, CN) + +__kernel void comp_data(__global uchar *left, int left_rows, int left_cols, int left_step, + __global uchar *right, int right_step, + __global T *data, int data_step, + __constant con_srtuct_t *con_st) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y > 0 && y < (left_rows - 1) && x > 0 && x < (left_cols - 1)) + { + data_step /= sizeof(T); + const __global uchar* ls = left + y * left_step + x * CN; + const __global uchar* rs = right + y * right_step + x * CN; + + __global T *ds = data + y * data_step + x; + + const unsigned int disp_step = data_step * left_rows; + + for (int disp = 0; disp < con_st -> cndisp; disp++) + { + if (x - disp >= 1) + { + float val = 0; + val = PIX_DIFF(ls, rs - disp * CN); + ds[disp * disp_step] = saturate_cast(fmin(con_st -> cdata_weight * val, + con_st -> cdata_weight * con_st -> cmax_data_term)); + } + else + { + ds[disp * disp_step] = saturate_cast(con_st -> cdata_weight * con_st -> cmax_data_term); + } + } + } +} + +/////////////////////////////////////////////////////////////// +//////////////////////// data step down /////////////////////// +/////////////////////////////////////////////////////////////// +__kernel void data_step_down(__global T *src, int src_rows, + __global T *dst, int dst_rows, int dst_cols, + int src_step, int dst_step, + int cndisp) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x < dst_cols && y < dst_rows) + { + src_step /= sizeof(T); + dst_step /= sizeof(T); + for (int d = 0; d < cndisp; ++d) + { + float dst_reg; + dst_reg = src[(d * src_rows + (2*y+0)) * src_step + 2*x+0]; + dst_reg += src[(d * src_rows + (2*y+1)) * src_step + 2*x+0]; + dst_reg += src[(d * src_rows + (2*y+0)) * src_step + 2*x+1]; + dst_reg += src[(d * src_rows + (2*y+1)) * src_step + 2*x+1]; + + dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg); + } + } +} + +/////////////////////////////////////////////////////////////// +/////////////////// level up messages //////////////////////// +/////////////////////////////////////////////////////////////// +__kernel void level_up_message(__global T *src, int src_rows, int src_step, + __global T *dst, int dst_rows, int dst_cols, int dst_step, + int cndisp) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x < dst_cols && y < dst_rows) + { + src_step /= sizeof(T); + dst_step /= sizeof(T); + + const int dst_disp_step = dst_step * dst_rows; + const int src_disp_step = src_step * src_rows; + + __global T *dstr = dst + y * dst_step + x; + __global const T *srcr = src + (y / 2 * src_step) + (x / 2); + + for (int d = 0; d < cndisp; ++d) + dstr[d * dst_disp_step] = srcr[d * src_disp_step]; + } +} + +/////////////////////////////////////////////////////////////// +//////////////////// calc all iterations ///////////////////// +/////////////////////////////////////////////////////////////// +void calc_min_linear_penalty(__global T * dst, int disp_step, + int cndisp, float cdisc_single_jump) +{ + float prev = dst[0]; + float cur; + + for (int disp = 1; disp < cndisp; ++disp) + { + prev += cdisc_single_jump; + cur = dst[disp_step * disp]; + + if (prev < cur) + { + cur = prev; + dst[disp_step * disp] = saturate_cast(prev); + } + + prev = cur; + } + + prev = dst[(cndisp - 1) * disp_step]; + for (int disp = cndisp - 2; disp >= 0; disp--) + { + prev += cdisc_single_jump; + cur = dst[disp_step * disp]; + + if (prev < cur) + { + cur = prev; + dst[disp_step * disp] = saturate_cast(prev); + } + prev = cur; + } +} +void message(const __global T *msg1, const __global T *msg2, + const __global T *msg3, const __global T *data, __global T *dst, + int msg_disp_step, int data_disp_step, int cndisp, float cmax_disc_term, float cdisc_single_jump) +{ + float minimum = FLOAT_MAX; + + for(int i = 0; i < cndisp; ++i) + { + float dst_reg; + dst_reg = msg1[msg_disp_step * i]; + dst_reg += msg2[msg_disp_step * i]; + dst_reg += msg3[msg_disp_step * i]; + dst_reg += data[data_disp_step * i]; + + if (dst_reg < minimum) + minimum = dst_reg; + + dst[msg_disp_step * i] = saturate_cast(dst_reg); + } + + calc_min_linear_penalty(dst, msg_disp_step, cndisp, cdisc_single_jump); + + minimum += cmax_disc_term; + + float sum = 0; + for(int i = 0; i < cndisp; ++i) + { + float dst_reg = dst[msg_disp_step * i]; + if (dst_reg > minimum) + { + dst_reg = minimum; + dst[msg_disp_step * i] = saturate_cast(minimum); + } + sum += dst_reg; + } + sum /= cndisp; + + for(int i = 0; i < cndisp; ++i) + dst[msg_disp_step * i] -= sum; +} +__kernel void one_iteration(__global T *u, int u_step, + __global T *data, int data_step, + __global T *d, __global T *l, __global T *r, + int t, int cols, int rows, + int cndisp, float cmax_disc_term, float cdisc_single_jump) +{ + const int y = get_global_id(1); + const int x = ((get_global_id(0)) << 1) + ((y + t) & 1); + + if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1)) + { + u_step /= sizeof(T); + data_step /= sizeof(T); + + __global T *us = u + y * u_step + x; + __global T *ds = d + y * u_step + x; + __global T *ls = l + y * u_step + x; + __global T *rs = r + y * u_step + x; + const __global T *dt = data + y * data_step + x; + + int msg_disp_step = u_step * rows; + int data_disp_step = data_step * rows; + + message(us + u_step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + message(ds - u_step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + + message(us + u_step, ds - u_step, rs - 1, dt, rs, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + message(us + u_step, ds - u_step, ls + 1, dt, ls, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + } +} + +/////////////////////////////////////////////////////////////// +/////////////////////////// output //////////////////////////// +/////////////////////////////////////////////////////////////// +__kernel void output(const __global T *u, int u_step, + const __global T *d, const __global T *l, + const __global T *r, const __global T *data, + __global T *disp, int disp_rows, int disp_cols, int disp_step, + int cndisp) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (y > 0 && y < disp_rows - 1 && x > 0 && x < disp_cols - 1) + { + u_step /= sizeof(T); + disp_step /= sizeof(T); + const __global T *us = u + (y + 1) * u_step + x; + const __global T *ds = d + (y - 1) * u_step + x; + const __global T *ls = l + y * u_step + (x + 1); + const __global T *rs = r + y * u_step + (x - 1); + const __global T *dt = data + y * u_step + x; + + int disp_steps = disp_rows * u_step; + + int best = 0; + float best_val = FLOAT_MAX; + for (int d = 0; d < cndisp; ++d) + { + float val; + val = us[d * disp_steps]; + val += ds[d * disp_steps]; + val += ls[d * disp_steps]; + val += rs[d * disp_steps]; + val += dt[d * disp_steps]; + + if (val < best_val) + { + best_val = val; + best = d; + } + } + + (disp + y * disp_step)[x] = convert_short_sat(best); + } +} diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp new file mode 100644 index 0000000000..acc31c9e56 --- /dev/null +++ b/modules/ocl/src/stereobp.cpp @@ -0,0 +1,519 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@outlook.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +//////////////////////////////////////////////////////////////////////// +///////////////// stereoBP ///////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +namespace cv +{ + namespace ocl + { + + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *stereobp; + } + +} +namespace cv +{ + namespace ocl + { + namespace stereoBP + { + ////////////////////////////////////////////////////////////////////////// + //////////////////////////////common//////////////////////////////////// + //////////////////////////////////////////////////////////////////////// + typedef struct + { + int cndisp; + float cmax_data_term; + float cdata_weight; + float cmax_disc_term; + float cdisc_single_jump; + } con_struct_t; + + cl_mem cl_con_struct = NULL; + static void load_constants(Context *clCxt, int ndisp, float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump) + { + con_struct_t *con_struct = new con_struct_t; + con_struct -> cndisp = ndisp; + con_struct -> cmax_data_term = max_data_term; + con_struct -> cdata_weight = data_weight; + con_struct -> cmax_disc_term = max_disc_term; + con_struct -> cdisc_single_jump = disc_single_jump; + + cl_con_struct = load_constant(clCxt->impl->clContext, clCxt->impl->clCmdQueue, (void *)con_struct, + sizeof(con_struct_t)); + + delete con_struct; + } + static void release_constants() + { + openCLFree(cl_con_struct); + } + static inline int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } + ///////////////////////////////////////////////////////////////////////////// + ///////////////////////////comp data//////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////// + static void comp_data_call(const oclMat &left, const oclMat &right, oclMat &data, int /*disp*/, + float /*cmax_data_term*/, float /*cdata_weight*/) + { + Context *clCxt = left.clCxt; + int channels = left.oclchannels(); + int data_type = data.type(); + + string kernelName = "comp_data"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&left.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&left.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&left.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&left.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&right.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&right.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&data.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&cl_con_struct)); + + size_t gt[3] = {left.cols, left.rows, 1}, lt[3] = {16, 16, 1}; + + const int OPT_SIZE = 50; + char cn_opt [OPT_SIZE] = ""; + sprintf( cn_opt, "%s -D CN=%d", + (data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"), + channels + ); + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, cn_opt); + } + /////////////////////////////////////////////////////////////////////////////////// + /////////////////////////data set down//////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + static void data_step_down_call(int dst_cols, int dst_rows, int src_rows, + const oclMat &src, oclMat &dst, int disp) + { + Context *clCxt = src.clCxt; + int data_type = src.type(); + + string kernelName = "data_step_down"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src_rows)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp)); + + size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + ///////////////////////////////////////////////////////////////////////////////// + ///////////////////////////live up message//////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + static void level_up_message_call(int dst_cols, int dst_rows, int src_rows, + oclMat &src, oclMat &dst, int ndisp) + { + Context *clCxt = src.clCxt; + int data_type = src.type(); + + string kernelName = "level_up_message"; + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src_rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&ndisp)); + + size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + static void level_up_messages_calls(int dst_idx, int dst_cols, int dst_rows, int src_rows, + oclMat *mus, oclMat *mds, oclMat *mls, oclMat *mrs, + int ndisp) + { + int src_idx = (dst_idx + 1) & 1; + + level_up_message_call(dst_cols, dst_rows, src_rows, + mus[src_idx], mus[dst_idx], ndisp); + + level_up_message_call(dst_cols, dst_rows, src_rows, + mds[src_idx], mds[dst_idx], ndisp); + + level_up_message_call(dst_cols, dst_rows, src_rows, + mls[src_idx], mls[dst_idx], ndisp); + + level_up_message_call(dst_cols, dst_rows, src_rows, + mrs[src_idx], mrs[dst_idx], ndisp); + } + ////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////cals_all_iterations_call/////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + static void calc_all_iterations_call(int cols, int rows, oclMat &u, oclMat &d, + oclMat &l, oclMat &r, oclMat &data, + int t, int cndisp, float cmax_disc_term, + float cdisc_single_jump) + { + Context *clCxt = l.clCxt; + int data_type = u.type(); + + string kernelName = "one_iteration"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&u.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&u.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&data.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&d.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&l.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&r.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&cndisp)); + args.push_back( make_pair( sizeof(cl_float) , (void *)&cmax_disc_term)); + args.push_back( make_pair( sizeof(cl_float) , (void *)&cdisc_single_jump)); + + size_t gt[3] = {cols, rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + + static void calc_all_iterations_calls(int cols, int rows, int iters, oclMat &u, + oclMat &d, oclMat &l, oclMat &r, + oclMat &data, int cndisp, float cmax_disc_term, + float cdisc_single_jump) + { + for(int t = 0; t < iters; ++t) + calc_all_iterations_call(cols, rows, u, d, l, r, data, t, cndisp, + cmax_disc_term, cdisc_single_jump); + } + /////////////////////////////////////////////////////////////////////////////// + ///////////////////////output/////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// + static void output_call(const oclMat &u, const oclMat &d, const oclMat l, const oclMat &r, + const oclMat &data, oclMat &disp, int ndisp) + { + Context *clCxt = u.clCxt; + int data_type = u.type(); + + string kernelName = "output"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&u.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&u.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&d.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&l.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&r.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&disp.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&ndisp)); + + size_t gt[3] = {disp.cols, disp.rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + } + } +} +namespace +{ + const float DEFAULT_MAX_DATA_TERM = 10.0f; + const float DEFAULT_DATA_WEIGHT = 0.07f; + const float DEFAULT_MAX_DISC_TERM = 1.7f; + const float DEFAULT_DISC_SINGLE_JUMP = 1.0f; +} + +void cv::ocl::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels) +{ + ndisp = width / 4; + if ((ndisp & 1) != 0) + ndisp++; + + int mm = ::max(width, height); + iters = mm / 100 + 2; + + levels = (int)(::log(static_cast(mm)) + 1) * 4 / 5; + if (levels == 0) levels++; +} + +cv::ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, int msg_type_) + : ndisp(ndisp_), iters(iters_), levels(levels_), + max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT), + max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), + msg_type(msg_type_), datas(levels_) +{ +} + +cv::ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, int msg_type_) + : ndisp(ndisp_), iters(iters_), levels(levels_), + max_data_term(max_data_term_), data_weight(data_weight_), + max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), + msg_type(msg_type_), datas(levels_) +{ +} + +namespace +{ + class StereoBeliefPropagationImpl + { + public: + StereoBeliefPropagationImpl(StereoBeliefPropagation &rthis_, + oclMat &u_, oclMat &d_, oclMat &l_, oclMat &r_, + oclMat &u2_, oclMat &d2_, oclMat &l2_, oclMat &r2_, + vector &datas_, oclMat &out_) + : rthis(rthis_), u(u_), d(d_), l(l_), r(r_), u2(u2_), d2(d2_), l2(l2_), r2(r2_), datas(datas_), out(out_), + zero(Scalar::all(0)), scale(rthis_.msg_type == CV_32F ? 1.0f : 10.0f) + { + CV_Assert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels); + CV_Assert(rthis.msg_type == CV_32F || rthis.msg_type == CV_16S); + CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < numeric_limits::max()); + } + + void operator()(const oclMat &left, const oclMat &right, oclMat &disp) + { + CV_Assert(left.size() == right.size() && left.type() == right.type()); + CV_Assert(left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4); + + rows = left.rows; + cols = left.cols; + + int divisor = (int)pow(2.f, rthis.levels - 1.0f); + int lowest_cols = cols / divisor; + int lowest_rows = rows / divisor; + const int min_image_dim_size = 2; + CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size); + + init(); + + datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type); + datas[0].setTo(Scalar_::all(0)); + + cv::ocl::stereoBP::comp_data_call(left, right, datas[0], rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight); + calcBP(disp); + } + + void operator()(const oclMat &data, oclMat &disp) + { + CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0)); + + rows = data.rows / rthis.ndisp; + cols = data.cols; + + int divisor = (int)pow(2.f, rthis.levels - 1.0f); + int lowest_cols = cols / divisor; + int lowest_rows = rows / divisor; + const int min_image_dim_size = 2; + CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size); + + init(); + + datas[0] = data; + + calcBP(disp); + } + private: + void init() + { + u.create(rows * rthis.ndisp, cols, rthis.msg_type); + d.create(rows * rthis.ndisp, cols, rthis.msg_type); + l.create(rows * rthis.ndisp, cols, rthis.msg_type); + r.create(rows * rthis.ndisp, cols, rthis.msg_type); + + if (rthis.levels & 1) + { + //can clear less area + u = zero; + d = zero; + l = zero; + r = zero; + } + + if (rthis.levels > 1) + { + int less_rows = (rows + 1) / 2; + int less_cols = (cols + 1) / 2; + + u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + + if ((rthis.levels & 1) == 0) + { + u2 = zero; + d2 = zero; + l2 = zero; + r2 = zero; + } + } + + cv::ocl::stereoBP::load_constants(u.clCxt, rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, + scale * rthis.max_disc_term, scale * rthis.disc_single_jump); + + datas.resize(rthis.levels); + cols_all.resize(rthis.levels); + rows_all.resize(rthis.levels); + + cols_all[0] = cols; + rows_all[0] = rows; + } + + void calcBP(oclMat &disp) + { + using namespace cv::ocl::stereoBP; + + for (int i = 1; i < rthis.levels; ++i) + { + cols_all[i] = (cols_all[i - 1] + 1) / 2; + rows_all[i] = (rows_all[i - 1] + 1) / 2; + + datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type); + datas[i].setTo(Scalar_::all(0)); + + data_step_down_call(cols_all[i], rows_all[i], rows_all[i - 1], + datas[i - 1], datas[i], rthis.ndisp); + } + + oclMat mus[] = {u, u2}; + oclMat mds[] = {d, d2}; + oclMat mrs[] = {r, r2}; + oclMat mls[] = {l, l2}; + + int mem_idx = (rthis.levels & 1) ? 0 : 1; + + for (int i = rthis.levels - 1; i >= 0; --i) + { + // for lower level we have already computed messages by setting to zero + if (i != rthis.levels - 1) + level_up_messages_calls(mem_idx, cols_all[i], rows_all[i], rows_all[i + 1], + mus, mds, mls, mrs, rthis.ndisp); + + calc_all_iterations_calls(cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], + mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], + rthis.ndisp, scale * rthis.max_disc_term, + scale * rthis.disc_single_jump); + + mem_idx = (mem_idx + 1) & 1; + } + if (disp.empty()) + disp.create(rows, cols, CV_16S); + + out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out)); + out = zero; + + output_call(u, d, l, r, datas.front(), out, rthis.ndisp); + + if (disp.type() != CV_16S) + out.convertTo(disp, disp.type()); + + release_constants(); + } + StereoBeliefPropagationImpl& operator=(const StereoBeliefPropagationImpl&); + + StereoBeliefPropagation &rthis; + + oclMat &u; + oclMat &d; + oclMat &l; + oclMat &r; + + oclMat &u2; + oclMat &d2; + oclMat &l2; + oclMat &r2; + + vector &datas; + oclMat &out; + + const Scalar zero; + const float scale; + + int rows, cols; + + vector cols_all, rows_all; + }; +} + +void cv::ocl::StereoBeliefPropagation::operator()(const oclMat &left, const oclMat &right, oclMat &disp) +{ + ::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out); + impl(left, right, disp); +} + +void cv::ocl::StereoBeliefPropagation::operator()(const oclMat &data, oclMat &disp) +{ + ::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out); + impl(data, disp); +} + diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp index 58dbcc2e3d..179829e0e6 100644 --- a/modules/ocl/test/test_calib3d.cpp +++ b/modules/ocl/test/test_calib3d.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors - +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -63,12 +63,12 @@ PARAM_TEST_CASE(StereoMatchBM, int, int) } }; -TEST_P(StereoMatchBM, Accuracy) +TEST_P(StereoMatchBM, Regression) { - Mat left_image = readImage(workdir + "../ocl/aloe-L.png", IMREAD_GRAYSCALE); - Mat right_image = readImage(workdir + "../ocl/aloe-R.png", IMREAD_GRAYSCALE); - Mat disp_gold = readImage(workdir + "../ocl/aloe-disp.png", IMREAD_GRAYSCALE); + Mat left_image = readImage("stereobm/aloe-L.png", IMREAD_GRAYSCALE); + Mat right_image = readImage("stereobm/aloe-R.png", IMREAD_GRAYSCALE); + Mat disp_gold = readImage("stereobm/aloe-disp.png", IMREAD_GRAYSCALE); ocl::oclMat d_left, d_right; ocl::oclMat d_disp(left_image.size(), CV_8U); Mat disp; @@ -88,7 +88,50 @@ TEST_P(StereoMatchBM, Accuracy) EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-3); } -INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128), +INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128), testing::Values(19))); +PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float) +{ + int ndisp_; + int iters_; + int levels_; + float max_data_term_; + float data_weight_; + float max_disc_term_; + float disc_single_jump_; + virtual void SetUp() + { + ndisp_ = GET_PARAM(0); + iters_ = GET_PARAM(1); + levels_ = GET_PARAM(2); + max_data_term_ = GET_PARAM(3); + data_weight_ = GET_PARAM(4); + max_disc_term_ = GET_PARAM(5); + disc_single_jump_ = GET_PARAM(6); + } +}; +TEST_P(StereoMatchBP, Regression) +{ + Mat left_image = readImage("stereobp/aloe-L.png"); + Mat right_image = readImage("stereobp/aloe-R.png"); + Mat disp_gold = readImage("stereobp/aloe-disp.png", IMREAD_GRAYSCALE); + ocl::oclMat d_left, d_right; + ocl::oclMat d_disp; + Mat disp; + ASSERT_FALSE(left_image.empty()); + ASSERT_FALSE(right_image.empty()); + ASSERT_FALSE(disp_gold.empty()); + d_left.upload(left_image); + d_right.upload(right_image); + ocl::StereoBeliefPropagation bp(ndisp_, iters_, levels_, max_data_term_, data_weight_, + max_disc_term_, disc_single_jump_, CV_16S); + bp(d_left, d_right, d_disp); + d_disp.download(disp); + disp.convertTo(disp, disp_gold.depth()); + EXPECT_MAT_NEAR(disp_gold, disp, 0.0, ""); +} +INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBP, testing::Combine(testing::Values(64), + testing::Values(8),testing::Values(2),testing::Values(25.0f), + testing::Values(0.1f),testing::Values(15.0f),testing::Values(1.0f))); #endif // HAVE_OPENCL