From 5a72be08fd75e221bc6c8b5f197fc1f9531508f3 Mon Sep 17 00:00:00 2001 From: GabrieleDalmazzone Date: Tue, 1 Dec 2015 09:19:31 +0100 Subject: [PATCH] Race condition bug-fix in hog.cu See https://github.com/Itseez/opencv/issues/5721 COMMENTS: * The second __syncthreads() is necessary, I am sure of that. * The code works without the first __syncthreads() too, but I have however added it for symmetry. Anyway it doesn't affect time performances, I have checked it with some profiling with nvvp --- modules/cudaobjdetect/src/cuda/hog.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/cudaobjdetect/src/cuda/hog.cu b/modules/cudaobjdetect/src/cuda/hog.cu index e5c7e8e9ed..c8609e7b03 100644 --- a/modules/cudaobjdetect/src/cuda/hog.cu +++ b/modules/cudaobjdetect/src/cuda/hog.cu @@ -331,11 +331,13 @@ namespace cv { namespace cuda { namespace device if (threadIdx.x < block_hist_size) elem = hist[0]; + __syncthreads(); // prevent race condition (redundant?) float sum = reduce_smem(squares, elem * elem); float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size); elem = ::min(elem * scale, threshold); + __syncthreads(); // prevent race condition sum = reduce_smem(squares, elem * elem); scale = 1.0f / (::sqrtf(sum) + 1e-3f);