From 799d7e7a5093eef28fd37d8e0f86fa55e8814aa3 Mon Sep 17 00:00:00 2001 From: Konstantin Matskevich Date: Fri, 28 Feb 2014 09:28:07 +0400 Subject: [PATCH] bad experiment =( --- modules/calib3d/src/opencl/stereobm.cl | 145 +++++++++++------- modules/calib3d/src/stereobm.cpp | 12 +- modules/calib3d/test/opencl/test_stereobm.cpp | 6 +- 3 files changed, 95 insertions(+), 68 deletions(-) diff --git a/modules/calib3d/src/opencl/stereobm.cl b/modules/calib3d/src/opencl/stereobm.cl index 2e74f591cf..63765357fe 100644 --- a/modules/calib3d/src/opencl/stereobm.cl +++ b/modules/calib3d/src/opencl/stereobm.cl @@ -49,38 +49,85 @@ #define MAX_VAL 32767 +void calcDisp(__local short * costFunc, __global short * disp, int uniquenessRatio, int textureTreshold, short textsum, int mindisp, int ndisp) +{ + short FILTERED = (mindisp - 1)<<4; + short best_disp = FILTERED, best_cost = MAX_VAL-1; + __local short * cost; + cost = &costFunc[0]; + #pragma unroll + for(int i = 0; i < tsize/2; i++) + { + short c = cost[0]; + best_cost = (c < best_cost) ? c : best_cost; + best_disp = (best_cost == c) ? ndisp - i - 1 : best_disp; + cost++; + } + + cost = &costFunc[0]; + int thresh = best_cost + (best_cost * uniquenessRatio/100); + #pragma unroll + for(int i = 0; (i < tsize/2) && (uniquenessRatio > 0); i++) + { + best_disp = ( (cost[0] <= thresh) && (i < (ndisp - best_disp - 2) || i > (ndisp - best_disp) ) ) ? + FILTERED : best_disp; + cost++; + } + + best_disp = (textsum < textureTreshold) ? FILTERED : best_disp; + + if( best_disp != FILTERED ) + { + cost = &costFunc[0] + (ndisp - best_disp - 1); + int y3 = ((ndisp - best_disp - 1) > 0) ? cost[-1] : cost[1], + y2 = cost[0], + y1 = ((ndisp - best_disp - 1) < ndisp-1) ? cost[1] : cost[-1]; + int d = y3+y1-2*y2 + abs(y3-y1); + disp[0] = (short)best_disp;//(((ndisp - best_disp - 1 + mindisp)*256 + (d != 0 ? (y3-y1)*256/d : 0) + 15) >> 4); + } +} + __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar * rightptr, __global uchar * dispptr, int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp, - int preFilterCap, int nthreads, int textureTreshold, int uniquenessRatio) + int preFilterCap, int textureTreshold, int uniquenessRatio) { int x = get_global_id(0); - int total_y = get_global_id(1); - int z = get_local_id(2); - int d = get_local_id(1); - int gy = get_group_id(1), y = gy*ndisp + z*ndisp/nthreads; + int ly = get_local_id(1); + int y = get_global_id(1)*32; + int d = get_local_id(2); int wsz2 = wsz/2; short FILTERED = (mindisp - 1)<<4; - __local short costFunc[csize]; - short textsum[tsize]; - __local short * cost = &costFunc[0] + d + ndisp*ndisp/nthreads*z; + __local short costFunc[tsize]; + __local short bestdisp[tsize]; + short textsum; + __local short * cost = &costFunc[0] + d +ly*ndisp; __global uchar * left, * right; - int dispIdx = mad24(total_y, disp_step, disp_offset + x*(int)sizeof(short) ); + int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) ); __global short * disp = (__global short*)(dispptr + dispIdx); - if( x < cols && total_y < rows) - { - disp[0] = FILTERED; - } + short best_cost = MAX_VAL-1, best_disp = FILTERED; short costbuf[wsz]; short textbuf[wsz]; int head = 0; + int endy = y+32; + + cost[0] = 0; + bestdisp[d + ly*ndisp] = d; + textsum = 0; + + for(; y < wsz2; y++) + { + disp[0] = FILTERED; + disp += cols; + } + if( x < cols && y < rows) + { + disp[0] = FILTERED; + } + if( (x > ndisp+mindisp+wsz2-2) && (x < cols - wsz2 - mindisp) ) { - cost += (y < wsz2) ? ndisp*wsz2 : 0; - y = (y= wsz2) ) + { + calcDisp(&costFunc[ly*ndisp], &disp[0], uniquenessRatio, textureTreshold, textsum, mindisp, ndisp); + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + y++; + cost = &costFunc[0] + d+ly*ndisp; + + for(; (y < endy) && (y wsz2 && (x > ndisp+mindisp+wsz2-2) && (x < cols - wsz2 - mindisp) ) { head = head%wsz; - cost += ndisp; - cost[0] = cost[-ndisp]; - textsum[y-(gy*ndisp)] = textsum[(y-1)-(gy*ndisp)]; left = leftptr + mad24(y-wsz2-1, cols, x - wsz2); right = rightptr + mad24(y-wsz2-1, cols, x - wsz2 - d - mindisp); @@ -120,47 +179,17 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar left++; right++; } cost[0] += costdiff - costbuf[head]; - textsum[y-(gy*ndisp)] += textdiff - textbuf[head]; + textsum += textdiff - textbuf[head]; costbuf[head] = costdiff; textbuf[head] = textdiff; head++; - } - barrier(CLK_LOCAL_MEM_FENCE); - - cost = &costFunc[0] + d*ndisp; - short best_disp = FILTERED, best_cost = MAX_VAL-1; - #pragma unroll - for(int i = 0; i < tsize; i++) - { - short c = cost[0]; - best_cost = (c < best_cost) ? c : best_cost; - best_disp = (best_cost == c) ? ndisp - i - 1 : best_disp; - cost++; - } + barrier(CLK_LOCAL_MEM_FENCE); - cost = &costFunc[0] + d*ndisp; - int thresh = best_cost + (best_cost * uniquenessRatio/100); - #pragma unroll - for(int i = 0; (i < tsize) && (uniquenessRatio > 0); i++) - { - best_disp = ( (cost[0] <= thresh) && (i < (ndisp - best_disp - 2) || i > (ndisp - best_disp) ) ) ? - FILTERED : best_disp; - cost++; - } - - best_disp = (total_y >= rows-wsz2) || (total_y < wsz2) || (textsum[d] < textureTreshold) ? FILTERED : best_disp; - - if( best_disp != FILTERED ) - { - cost = &costFunc[0] + (ndisp - best_disp - 1) + ndisp*d; - int y3 = ((ndisp - best_disp - 1) > 0) ? cost[-1] : cost[1], - y2 = cost[0], - y1 = ((ndisp - best_disp - 1) < ndisp-1) ? cost[1] : cost[-1]; - d = y3+y1-2*y2 + abs(y3-y1); - if( x < cols && total_y < rows) + if(d == 0) { - disp[0] = (short)(((ndisp - best_disp - 1 + mindisp)*256 + (d != 0 ? (y3-y1)*256/d : 0) + 15) >> 4); + calcDisp(&costFunc[ly*ndisp], &disp[0], uniquenessRatio, textureTreshold, textsum, mindisp, ndisp); } + barrier(CLK_LOCAL_MEM_FENCE); } } } diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index 876215b53d..67864b07dd 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -681,7 +681,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right, sad[ndisp] = sad[ndisp-2]; int p = sad[mind+1], n = sad[mind-1]; d = p + n - 2*sad[mind] + std::abs(p - n); - dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4); + dptr[y*dstep] = (short)mind;//(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4); costptr[y*coststep] = sad[mind]; } } @@ -739,7 +739,7 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, OutputArray _disp, StereoBMParams* state) {//printf("opt\n"); int ndisp = state->numDisparities; - ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d -D wsz=%d", ndisp*ndisp, ndisp, state->SADWindowSize) ); + ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D tsize=%d -D wsz=%d", ndisp*ndisp, 2*ndisp, state->SADWindowSize) ); if(k.empty()) return false; @@ -747,9 +747,8 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, _disp.create(_left.size(), CV_16S); UMat disp = _disp.getUMat(); - int nthreads = (ndisp <= 64) ? 2 : 4; - size_t globalThreads[3] = { left.cols, (left.rows - left.rows%ndisp + ndisp), nthreads}; - size_t localThreads[3] = {1, ndisp, nthreads}; + size_t globalThreads[3] = { left.cols, (left.rows-left.rows%32 + 32)/32, ndisp}; + size_t localThreads[3] = {1, 2, ndisp}; int idx = 0; idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left)); @@ -758,7 +757,6 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, idx = k.set(idx, state->minDisparity); idx = k.set(idx, ndisp); idx = k.set(idx, state->preFilterCap); - idx = k.set(idx, nthreads); idx = k.set(idx, state->textureThreshold); idx = k.set(idx, state->uniquenessRatio); @@ -993,7 +991,7 @@ public: bufSize2 = width*height*(sizeof(Point_) + sizeof(int) + sizeof(uchar)); #if CV_SSE2 - bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); + bool useShorts = false;//params.preFilterCap <= 31 && params.SADWindowSize <= 21 && checkHardwareSupport(CV_CPU_SSE2); #else const bool useShorts = false; #endif diff --git a/modules/calib3d/test/opencl/test_stereobm.cpp b/modules/calib3d/test/opencl/test_stereobm.cpp index a683e6938b..1852e0dfde 100644 --- a/modules/calib3d/test/opencl/test_stereobm.cpp +++ b/modules/calib3d/test/opencl/test_stereobm.cpp @@ -90,12 +90,12 @@ OCL_TEST_P(StereoBMFixture, StereoBM) cv::ocl::finish(); long t3 = clock(); std::cout << (double)(t2-t1)/CLOCKS_PER_SEC << " " << (double)(t3-t2)/CLOCKS_PER_SEC << std::endl; -/* + Mat t; absdiff(disp, udisp, t); /* for(int i = 0; i(i,j) > 0) - if(i>=5 && i <=16 && j == 36+15) + if(t.at(i,j) > 0) + // if(i == 125 && j == 174) printf("%d %d cv: %d ocl: %d\n", i, j, disp.at(i,j), udisp.getMat(ACCESS_READ).at(i,j) );*/ /* imshow("diff.png", t*100); imshow("cv.png", disp*100);