diff --git a/modules/cudastereo/src/cuda/stereobp.cu b/modules/cudastereo/src/cuda/stereobp.cu index f8ecdaf508..a4e7d2daba 100644 --- a/modules/cudastereo/src/cuda/stereobp.cu +++ b/modules/cudastereo/src/cuda/stereobp.cu @@ -255,7 +255,7 @@ namespace cv { namespace cuda { namespace device /////////////////////////////////////////////////////////////// template - __global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep src, PtrStep dst) + __global__ void data_step_down(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStep src, PtrStep dst) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -264,10 +264,15 @@ namespace cv { namespace cuda { namespace device { for (int d = 0; d < cndisp; ++d) { - float dst_reg = src.ptr(d * src_rows + (2*y+0))[(2*x+0)]; - dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)]; - dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)]; - dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)]; + // check the index of src + const int x0 = 2 * x; + const int x1 = ::min(x0 + 1, src_cols - 1); + const int y0 = 2 * y; + const int y1 = ::min(y0 + 1, src_rows - 1); + float dst_reg = src.ptr(d * src_rows + y0)[x0]; + dst_reg += src.ptr(d * src_rows + y1)[x0]; + dst_reg += src.ptr(d * src_rows + y0)[x1]; + dst_reg += src.ptr(d * src_rows + y1)[x1]; dst.ptr(d * dst_rows + y)[x] = saturate_cast(dst_reg); } @@ -275,7 +280,7 @@ namespace cv { namespace cuda { namespace device } template - void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) + void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); @@ -283,15 +288,15 @@ namespace cv { namespace cuda { namespace device grid.x = divUp(dst_cols, threads.x); grid.y = divUp(dst_rows, threads.y); - data_step_down<<>>(dst_cols, dst_rows, src_rows, (PtrStepSz)src, (PtrStepSz)dst); + data_step_down<<>>(dst_cols, dst_rows, src_cols, src_rows, (PtrStepSz)src, (PtrStepSz)dst); cudaSafeCall( cudaGetLastError() ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); - template void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + template void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + template void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); /////////////////////////////////////////////////////////////// /////////////////// level up messages //////////////////////// diff --git a/modules/cudastereo/src/stereobp.cpp b/modules/cudastereo/src/stereobp.cpp index 953674b904..2c39d3e240 100644 --- a/modules/cudastereo/src/stereobp.cpp +++ b/modules/cudastereo/src/stereobp.cpp @@ -61,7 +61,7 @@ namespace cv { namespace cuda { namespace device template void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream); template - void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + void data_step_down_gpu(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); template void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); template @@ -283,7 +283,7 @@ namespace { using namespace cv::cuda::device::stereobp; - typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_cols, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); static const data_step_down_t data_step_down_callers[2] = { data_step_down_gpu, data_step_down_gpu @@ -318,7 +318,7 @@ namespace datas_[i].create(rows_all_[i] * ndisp_, cols_all_[i], msg_type_); - data_step_down_callers[funcIdx](cols_all_[i], rows_all_[i], rows_all_[i-1], datas_[i-1], datas_[i], stream); + data_step_down_callers[funcIdx](cols_all_[i], rows_all_[i], cols_all_[i-1], rows_all_[i-1], datas_[i-1], datas_[i], stream); } PtrStepSzb mus[] = {u_, u2_};