Fixed incorrect calculation of best_disp

pull/2535/head
Alexander Karsakov 11 years ago
parent 8c39b4e8b6
commit 4ceaf44fa0
  1. 15
      modules/calib3d/src/opencl/stereobm.cl
  2. 3
      modules/calib3d/src/stereobm.cpp

@ -147,6 +147,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
__local int best_disp[2]; __local int best_disp[2];
__local int best_cost[2]; __local int best_cost[2];
best_cost[nthread] = MAX_VAL; best_cost[nthread] = MAX_VAL;
best_disp[nthread] = MAX_VAL;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
short costbuf[wsz]; short costbuf[wsz];
@ -187,16 +188,12 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
if(nthread==1) if(nthread==1)
{ {
cost[0] = tempcost; cost[0] = tempcost;
#ifndef CPU
atomic_min(best_cost+nthread, tempcost); atomic_min(best_cost+nthread, tempcost);
#else
*(best_cost+nthread) = min(*(best_cost+nthread), tempcost);
#endif
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(best_cost[1] == tempcost) if(best_cost[1] == tempcost)
best_disp[1] = ndisp - d - 1; atomic_min(best_disp + 1, ndisp - d - 1);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short)); int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short));
@ -214,6 +211,7 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
y = (ly < sizeY) ? gy + shiftY + ly : rows; y = (ly < sizeY) ? gy + shiftY + ly : rows;
best_cost[nthread] = MAX_VAL; best_cost[nthread] = MAX_VAL;
best_disp[nthread] = MAX_VAL;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
costIdx = calcLocalIdx(lx, ly, d, sizeY); costIdx = calcLocalIdx(lx, ly, d, sizeY);
@ -228,20 +226,15 @@ __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * ri
cost[0], cost[1], cost[-1], winsize); cost[0], cost[1], cost[-1], winsize);
} }
cost[0] = tempcost; cost[0] = tempcost;
#ifndef CPU
atomic_min(best_cost + nthread, tempcost); atomic_min(best_cost + nthread, tempcost);
#else
*(best_cost + nthread) = min(*(best_cost + nthread), tempcost);
#endif
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(best_cost[nthread] == tempcost) if(best_cost[nthread] == tempcost)
best_disp[nthread] = ndisp - d - 1; atomic_min(best_disp + nthread, ndisp - d - 1);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short)); int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short));
disp = (__global short *)(dispptr + dispIdx); disp = (__global short *)(dispptr + dispIdx);
calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY, calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2); best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);

@ -744,9 +744,8 @@ static bool ocl_stereobm( InputArray _left, InputArray _right,
int wsz2 = wsz/2; int wsz2 = wsz/2;
int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2; int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2;
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d%s", (2*sizeY)*ndisp, wsz, is_cpu ? " -D CPU" : "")); ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) );
if(k.empty()) if(k.empty())
return false; return false;

Loading…
Cancel
Save