|
|
@ -147,6 +147,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri |
|
|
|
__local int best_disp[2]; |
|
|
|
__local int best_disp[2]; |
|
|
|
__local int best_cost[2]; |
|
|
|
__local int best_cost[2]; |
|
|
|
best_cost[nthread] = MAX_VAL; |
|
|
|
best_cost[nthread] = MAX_VAL; |
|
|
|
|
|
|
|
best_disp[nthread] = MAX_VAL; |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
short costbuf[wsz]; |
|
|
|
short costbuf[wsz]; |
|
|
@ -187,16 +188,12 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri |
|
|
|
if(nthread==1) |
|
|
|
if(nthread==1) |
|
|
|
{ |
|
|
|
{ |
|
|
|
cost[0] = tempcost; |
|
|
|
cost[0] = tempcost; |
|
|
|
#ifndef CPU |
|
|
|
|
|
|
|
atomic_min(best_cost+nthread, tempcost); |
|
|
|
atomic_min(best_cost+nthread, tempcost); |
|
|
|
#else |
|
|
|
|
|
|
|
*(best_cost+nthread) = min(*(best_cost+nthread), tempcost); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
if(best_cost[1] == tempcost) |
|
|
|
if(best_cost[1] == tempcost) |
|
|
|
best_disp[1] = ndisp - d - 1; |
|
|
|
atomic_min(best_disp + 1, ndisp - d - 1); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short)); |
|
|
|
int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short)); |
|
|
@ -214,6 +211,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri |
|
|
|
y = (ly < sizeY) ? gy + shiftY + ly : rows; |
|
|
|
y = (ly < sizeY) ? gy + shiftY + ly : rows; |
|
|
|
|
|
|
|
|
|
|
|
best_cost[nthread] = MAX_VAL; |
|
|
|
best_cost[nthread] = MAX_VAL; |
|
|
|
|
|
|
|
best_disp[nthread] = MAX_VAL; |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
costIdx = calcLocalIdx(lx, ly, d, sizeY); |
|
|
|
costIdx = calcLocalIdx(lx, ly, d, sizeY); |
|
|
@ -228,20 +226,15 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri |
|
|
|
cost[0], cost[1], cost[-1], winsize); |
|
|
|
cost[0], cost[1], cost[-1], winsize); |
|
|
|
} |
|
|
|
} |
|
|
|
cost[0] = tempcost; |
|
|
|
cost[0] = tempcost; |
|
|
|
#ifndef CPU |
|
|
|
|
|
|
|
atomic_min(best_cost + nthread, tempcost); |
|
|
|
atomic_min(best_cost + nthread, tempcost); |
|
|
|
#else |
|
|
|
|
|
|
|
*(best_cost + nthread) = min(*(best_cost + nthread), tempcost); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
if(best_cost[nthread] == tempcost) |
|
|
|
if(best_cost[nthread] == tempcost) |
|
|
|
best_disp[nthread] = ndisp - d - 1; |
|
|
|
atomic_min(best_disp + nthread, ndisp - d - 1); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
|
|
|
|
|
|
|
|
int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short)); |
|
|
|
int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short)); |
|
|
|
disp = (__global short *)(dispptr + dispIdx); |
|
|
|
disp = (__global short *)(dispptr + dispIdx); |
|
|
|
|
|
|
|
|
|
|
|
calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY, |
|
|
|
calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY, |
|
|
|
best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2); |
|
|
|
best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|