From 0b08d2559eaf2a6d3d7370a9aea26df0536c3b2a Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Tue, 15 Mar 2016 19:11:15 +0800 Subject: [PATCH] fix potential race condition in canny.cl. See the below code snippet: while(l_counter != 0) { int mod = l_counter % LOCAL_TOTAL; int pix_per_thr = l_counter / LOCAL_TOTAL + ((lid < mod) ? 1 : 0); for (int i = 0; i < pix_per_thr; ++i) { int index = atomic_dec(&l_counter) - 1; .... } .... barrier(CLK_LOCAL_MEM_FENCE); } If we don't put a barrier before the for loop, then there is a possiblity that some work item enter this loop but the others are not, the the l_counter will be reduced in the for loop and may be changed to zero, and the other work items may can't enter the while loop. If this happens, it breaks the barrier's rule which requires all the work items reach the same barrier. And it may hang the GPU depends on the implementation of opencl platform. This issue is raised at: https://github.com/Itseez/opencv/issues/5175 Signed-off-by: Zhigang Gong --- modules/imgproc/src/opencl/canny.cl | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/imgproc/src/opencl/canny.cl b/modules/imgproc/src/opencl/canny.cl index 2cc0796ecc..584cf9e90b 100644 --- a/modules/imgproc/src/opencl/canny.cl +++ b/modules/imgproc/src/opencl/canny.cl @@ -428,6 +428,7 @@ __kernel void stage2_hysteresis(__global uchar *map_ptr, int map_step, int map_o int mod = l_counter % LOCAL_TOTAL; int pix_per_thr = l_counter / LOCAL_TOTAL + ((lid < mod) ? 1 : 0); + barrier(CLK_LOCAL_MEM_FENCE); for (int i = 0; i < pix_per_thr; ++i) { int index = atomic_dec(&l_counter) - 1;