diff --git a/modules/cudaarithm/test/test_reductions.cpp b/modules/cudaarithm/test/test_reductions.cpp index 5fd7e2dec9..e3c54055df 100644 --- a/modules/cudaarithm/test/test_reductions.cpp +++ b/modules/cudaarithm/test/test_reductions.cpp @@ -850,7 +850,7 @@ CUDA_TEST_P(Integral, Accuracy) INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine( ALL_DEVICES, - DIFFERENT_SIZES, + testing::Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(768, 1066)), WHOLE_SUBMAT)); /////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp b/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp index 5c90e99893..7af52650c0 100644 --- a/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp +++ b/modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp @@ -439,8 +439,6 @@ namespace integral_detail T sum = (tidx < cols) && (y < rows) ? *p : 0; - y += blockDim.y; - sums[threadIdx.x][threadIdx.y] = sum; __syncthreads(); @@ -467,14 +465,17 @@ namespace integral_detail if (threadIdx.y > 0) sum += sums[threadIdx.x][threadIdx.y - 1]; - if (tidx < cols) + sum += stepSum; + stepSum += sums[threadIdx.x][blockDim.y - 1]; + + __syncthreads(); + + if ((tidx < cols) && (y < rows)) { - sum += stepSum; - stepSum += sums[threadIdx.x][blockDim.y - 1]; *p = sum; } - __syncthreads(); + y += blockDim.y; } #else __shared__ T smem[32][32];