From 0e2ea45c93bde7e92a61736654aaff9cc7318e5b Mon Sep 17 00:00:00 2001 From: Aaron Denney Date: Mon, 30 Jun 2014 08:46:14 -0700 Subject: [PATCH] ndisp no longer constant --- modules/cudastereo/src/cuda/stereocsbp.cu | 42 ++++++++++------------ modules/cudastereo/src/cuda/stereocsbp.hpp | 2 +- modules/cudastereo/src/stereocsbp.cpp | 2 +- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/modules/cudastereo/src/cuda/stereocsbp.cu b/modules/cudastereo/src/cuda/stereocsbp.cu index d0097f3bee..4c3bde337f 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.cu +++ b/modules/cudastereo/src/cuda/stereocsbp.cu @@ -58,8 +58,6 @@ namespace cv { namespace cuda { namespace device /////////////////////// load constants //////////////////////// /////////////////////////////////////////////////////////////// - __constant__ int cndisp; - __constant__ float cmax_data_term; __constant__ float cdata_weight; __constant__ float cmax_disc_term; @@ -72,10 +70,8 @@ namespace cv { namespace cuda { namespace device __constant__ size_t cdisp_step2; - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th) + void load_constants(float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th) { - cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) ); @@ -123,7 +119,7 @@ namespace cv { namespace cuda { namespace device }; template - __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane) + __global__ void get_first_k_initial_global(uchar *ctemp, T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane, int ndisp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -138,7 +134,7 @@ namespace cv { namespace cuda { namespace device { T minimum = device::numeric_limits::max(); int id = 0; - for(int d = 0; d < cndisp; d++) + for(int d = 0; d < ndisp; d++) { T cur = data_cost[d * cdisp_step1]; if(cur < minimum) @@ -157,7 +153,7 @@ namespace cv { namespace cuda { namespace device template - __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane) + __global__ void get_first_k_initial_local(uchar *ctemp, T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane, int ndisp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -174,7 +170,7 @@ namespace cv { namespace cuda { namespace device T cur = data_cost[1 * cdisp_step1]; T next = data_cost[2 * cdisp_step1]; - for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++) + for (int d = 1; d < ndisp - 1 && nr_local_minimum < nr_plane; d++) { if (cur < prev && cur < next) { @@ -195,7 +191,7 @@ namespace cv { namespace cuda { namespace device T minimum = numeric_limits::max(); int id = 0; - for (int d = 0; d < cndisp; d++) + for (int d = 0; d < ndisp; d++) { cur = data_cost[d * cdisp_step1]; if (cur < minimum) @@ -213,7 +209,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level) + __global__ void init_data_cost(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int h, int w, int level, int ndisp) { int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -228,7 +224,7 @@ namespace cv { namespace cuda { namespace device T* data_cost = (T*)ctemp + y * cmsg_step + x; - for(int d = 0; d < cndisp; ++d) + for(int d = 0; d < ndisp; ++d) { float val = 0.0f; for(int yi = y0; yi < yt; yi++) @@ -253,7 +249,7 @@ namespace cv { namespace cuda { namespace device } template - __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h) + __global__ void init_data_cost_reduce(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int level, int rows, int cols, int h, int ndisp) { int x_out = blockIdx.x; int y_out = blockIdx.y % h; @@ -261,7 +257,7 @@ namespace cv { namespace cuda { namespace device int tid = threadIdx.x; - if (d < cndisp) + if (d < ndisp) { int x0 = x_out << level; int y0 = y_out << level; @@ -301,7 +297,7 @@ namespace cv { namespace cuda { namespace device template - void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream) + void init_data_cost_caller_(const uchar *cleft, const uchar *cright, uchar *ctemp, size_t cimg_step, int /*rows*/, int /*cols*/, int h, int w, int level, int ndisp, int channels, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -311,9 +307,9 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; - case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; - case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level); break; + case 1: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; + case 3: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; + case 4: init_data_cost<<>>(cleft, cright, ctemp, cimg_step, h, w, level, ndisp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } @@ -330,9 +326,9 @@ namespace cv { namespace cuda { namespace device switch (channels) { - case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; - case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; - case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h); break; + case 1: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; + case 3: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; + case 4: init_data_cost_reduce<<>>(cleft, cright, ctemp, cimg_step, level, rows, cols, h, ndisp); break; default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); } } @@ -368,9 +364,9 @@ namespace cv { namespace cuda { namespace device grid.y = divUp(h, threads.y); if (use_local_init_data_cost == true) - get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane); + get_first_k_initial_local<<>> (ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp); else - get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane); + get_first_k_initial_global<<>>(ctemp, data_cost_selected, disp_selected_pyr, h, w, nr_plane, ndisp); cudaSafeCall( cudaGetLastError() ); diff --git a/modules/cudastereo/src/cuda/stereocsbp.hpp b/modules/cudastereo/src/cuda/stereocsbp.hpp index 0854a92a88..c9f3983256 100644 --- a/modules/cudastereo/src/cuda/stereocsbp.hpp +++ b/modules/cudastereo/src/cuda/stereocsbp.hpp @@ -2,7 +2,7 @@ namespace cv { namespace cuda { namespace device { namespace stereocsbp { - void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th); + void load_constants(float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th); template void init_data_cost(const uchar *left, const uchar *right, uchar *ctemp, size_t cimg_step, int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step, diff --git a/modules/cudastereo/src/stereocsbp.cpp b/modules/cudastereo/src/stereocsbp.cpp index be4d8f1ee0..946a14fb2b 100644 --- a/modules/cudastereo/src/stereocsbp.cpp +++ b/modules/cudastereo/src/stereocsbp.cpp @@ -222,7 +222,7 @@ namespace //////////////////////////////////////////////////////////////////////////// // Compute - load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_); + load_constants(max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_); l[0].setTo(0, _stream); d[0].setTo(0, _stream);