From c66e27d49e8c40a98149e6c3a53075a18dfa6878 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 28 Jun 2013 17:45:39 +0800 Subject: [PATCH] stereoBM fix an error on Linux when running full performance test --- modules/ocl/src/opencl/stereobm.cl | 37 +++++++++++------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl index 552874d427..f1b958812f 100644 --- a/modules/ocl/src/opencl/stereobm.cl +++ b/modules/ocl/src/opencl/stereobm.cl @@ -258,27 +258,13 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols) float CalcSums(__local float *cols, __local float *cols_cache, int winsz) { - float cache = 0; - float cache2 = 0; - int winsz2 = winsz/2; - - int x = get_local_id(0); - int group_size_x = get_local_size(0); + unsigned int cache = cols[0]; - for(int i = 1; i <= winsz2; i++) +#pragma unroll + for(int i = 1; i <= winsz; i++) cache += cols[i]; - cols_cache[0] = cache; - - barrier(CLK_LOCAL_MEM_FENCE); - - if (x < group_size_x - winsz2) - cache2 = cols_cache[winsz2]; - else - for(int i = winsz2 + 1; i < winsz; i++) - cache2 += cols[i]; - - return cols[0] + cache + cache2; + return cache; } #define RpT (2 * ROWSperTHREAD) // got experimentally @@ -301,8 +287,7 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in int beg_row = group_id_y * RpT; int end_row = min(beg_row + RpT, disp_rows); -// if (x < disp_cols) -// { + int y = beg_row; float sum = 0; @@ -340,11 +325,15 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in } barrier(CLK_LOCAL_MEM_FENCE); - float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255; - if (sum_win < threshold) - disp[y * disp_step + x] = 0; + + if (x < disp_cols) + { + float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255; + if (sum_win < threshold) + disp[y * disp_step + x] = 0; + } barrier(CLK_LOCAL_MEM_FENCE); } - // } + }