diff --git a/modules/gpu/src/cuda/stereobp.cu b/modules/gpu/src/cuda/stereobp.cu index 05a19b4199..d011c7f4fe 100644 --- a/modules/gpu/src/cuda/stereobp.cu +++ b/modules/gpu/src/cuda/stereobp.cu @@ -255,7 +255,7 @@ namespace cv { namespace gpu { namespace device /////////////////////////////////////////////////////////////// template - __global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep src, PtrStep dst) + __global__ void data_step_down(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStep src, PtrStep dst) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -264,10 +264,10 @@ namespace cv { namespace gpu { namespace device { for (int d = 0; d < cndisp; ++d) { - float dst_reg = src.ptr(d * src_rows + (2*y+0))[(2*x+0)]; - dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)]; - dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)]; - dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)]; + float dst_reg = src.ptr(d * src_rows + ::min(2*y+0, src_rows-1))[::min(2*x+0, src_cols-1)]; + dst_reg += src.ptr(d * src_rows + ::min(2*y+1, src_rows-1))[::min(2*x+0, src_cols-1)]; + dst_reg += src.ptr(d * src_rows + ::min(2*y+0, src_rows-1))[::min(2*x+1, src_cols-1)]; + dst_reg += src.ptr(d * src_rows + ::min(2*y+1, src_rows-1))[::min(2*x+1, src_cols-1)]; dst.ptr(d * dst_rows + y)[x] = saturate_cast(dst_reg); } @@ -275,7 +275,7 @@ namespace cv { namespace gpu { namespace device } template - void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) + void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -283,15 +283,15 @@ namespace cv { namespace gpu { namespace device grid.x = divUp(dst_cols, threads.x); grid.y = divUp(dst_rows, threads.y); - data_step_down<<>>(dst_cols, dst_rows, src_rows, (PtrStepSz)src, (PtrStepSz)dst); + data_step_down<<>>(dst_cols, dst_rows, src_cols, src_rows, (PtrStepSz)src, (PtrStepSz)dst); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); - template void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + template void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + template void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); /////////////////////////////////////////////////////////////// /////////////////// level up messages //////////////////////// diff --git a/modules/gpu/src/stereobp.cpp b/modules/gpu/src/stereobp.cpp index 3b827a3129..2bcefe3770 100644 --- a/modules/gpu/src/stereobp.cpp +++ b/modules/gpu/src/stereobp.cpp @@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device template void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream); template - void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); template void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); template @@ -158,7 +158,7 @@ namespace init(stream); - datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, datas[0]); comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream)); @@ -187,10 +187,10 @@ namespace private: void init(Stream& stream) { - u.create(rows * rthis.ndisp, cols, rthis.msg_type); - d.create(rows * rthis.ndisp, cols, rthis.msg_type); - l.create(rows * rthis.ndisp, cols, rthis.msg_type); - r.create(rows * rthis.ndisp, cols, rthis.msg_type); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, u); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, d); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, l); + createContinuous(rows * rthis.ndisp, cols, rthis.msg_type, r); if (rthis.levels & 1) { @@ -213,13 +213,13 @@ namespace if (rthis.levels > 1) { - int less_rows = rows / 2; - int less_cols = cols / 2; + int less_rows = (rows + 1) / 2; + int less_cols = (cols + 1) / 2; - u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, u2); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, d2); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, l2); + createContinuous(less_rows * rthis.ndisp, less_cols, rthis.msg_type, r2); if ((rthis.levels & 1) == 0) { @@ -253,7 +253,7 @@ namespace void calcBP(GpuMat& disp, Stream& stream) { - typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); static const data_step_down_t data_step_down_callers[2] = { data_step_down_gpu, data_step_down_gpu @@ -283,12 +283,12 @@ namespace for (int i = 1; i < rthis.levels; ++i) { - cols_all[i] = cols_all[i-1] / 2; - rows_all[i] = rows_all[i-1] / 2; + cols_all[i] = (cols_all[i-1] + 1) / 2; + rows_all[i] = (rows_all[i-1] + 1) / 2; - datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type); + createContinuous(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type, datas[i]); - data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream); + data_step_down_callers[funcIdx](cols_all[i], rows_all[i], cols_all[i-1], rows_all[i-1], datas[i-1], datas[i], cudaStream); } PtrStepSzb mus[] = {u, u2};