diff --git a/modules/imgproc/src/featureselect.cpp b/modules/imgproc/src/featureselect.cpp index 53743c6a85..0d665aac4e 100644 --- a/modules/imgproc/src/featureselect.cpp +++ b/modules/imgproc/src/featureselect.cpp @@ -57,6 +57,8 @@ struct greaterThanPtr : { return *a > *b; } }; +#ifdef HAVE_OPENCL + struct Corner { float val; @@ -67,67 +69,108 @@ struct Corner { return val > c.val; } }; -#ifdef HAVE_OPENCL - static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners, int maxCorners, double qualityLevel, double minDistance, InputArray _mask, int blockSize, bool useHarrisDetector, double harrisK ) { - UMat eig, tmp; + UMat eig, maxEigenValue; if( useHarrisDetector ) cornerHarris( _image, eig, blockSize, 3, harrisK ); else cornerMinEigenVal( _image, eig, blockSize, 3 ); - double maxVal = 0; - minMaxLoc( eig, NULL, &maxVal, NULL, NULL, _mask ); - threshold( eig, eig, maxVal*qualityLevel, 0, THRESH_TOZERO ); - dilate( eig, tmp, Mat()); - Size imgsize = _image.size(); std::vector tmpCorners; size_t total, i, j, ncorners = 0, possibleCornersCount = std::max(1024, static_cast(imgsize.area() * 0.1)); bool haveMask = !_mask.empty(); + // find threshold + { + CV_Assert(eig.type() == CV_32FC1); + int dbsize = ocl::Device::getDefault().maxComputeUnits(); + size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + + int wgs2_aligned = 1; + while (wgs2_aligned < (int)wgs) + wgs2_aligned <<= 1; + wgs2_aligned >>= 1; + + ocl::Kernel k("maxEigenVal", ocl::imgproc::gftt_oclsrc, + format("-D OP_MAX_EIGEN_VAL -D WGS=%d -D groupnum=%d -D WGS2_ALIGNED=%d%s", + (int)wgs, dbsize, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "")); + if (k.empty()) + return false; + + UMat mask = _mask.getUMat(); + maxEigenValue.create(1, dbsize, CV_32FC1); + + ocl::KernelArg eigarg = ocl::KernelArg::ReadOnlyNoSize(eig), + dbarg = ocl::KernelArg::PtrWriteOnly(maxEigenValue), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); + + if (haveMask) + k.args(eigarg, eig.cols, (int)eig.total(), dbarg, maskarg); + else + k.args(eigarg, eig.cols, (int)eig.total(), dbarg); + + size_t globalsize = dbsize * wgs; + if (!k.run(1, &globalsize, &wgs, false)) + return false; + + ocl::Kernel k2("maxEigenValTask", ocl::imgproc::gftt_oclsrc, + format("-D OP_MAX_EIGEN_VAL -D WGS=%d -D WGS2_ALIGNED=%d -D groupnum=%d", + wgs, wgs2_aligned, dbsize)); + if (k2.empty()) + return false; + + k2.args(dbarg, (float)qualityLevel); + + if (!k2.runTask(false)) + return false; + } + // collect list of pointers to features - put them into temporary image { ocl::Kernel k("findCorners", ocl::imgproc::gftt_oclsrc, - format(haveMask ? "-D HAVE_MASK" : "")); + format("-D OP_FIND_CORNERS%s", haveMask ? " -D HAVE_MASK" : "")); if (k.empty()) return false; UMat counter(1, 1, CV_32SC1, Scalar::all(0)), - corners(1, (int)(possibleCornersCount * sizeof(Corner)), CV_8UC1); + corners(1, (int)possibleCornersCount, CV_32FC2, Scalar::all(-1)); + CV_Assert(sizeof(Corner) == corners.elemSize()); + ocl::KernelArg eigarg = ocl::KernelArg::ReadOnlyNoSize(eig), - tmparg = ocl::KernelArg::ReadOnlyNoSize(tmp), cornersarg = ocl::KernelArg::PtrWriteOnly(corners), - counterarg = ocl::KernelArg::PtrReadWrite(counter); + counterarg = ocl::KernelArg::PtrReadWrite(counter), + thresholdarg = ocl::KernelArg::PtrReadOnly(maxEigenValue); if (!haveMask) - k.args(eigarg, tmparg, cornersarg, counterarg, - imgsize.height - 2, imgsize.width - 2); + k.args(eigarg, cornersarg, counterarg, + eig.rows - 2, eig.cols - 2, thresholdarg, + (int)possibleCornersCount); else { UMat mask = _mask.getUMat(); - k.args(eigarg, ocl::KernelArg::ReadOnlyNoSize(mask), tmparg, - cornersarg, counterarg, imgsize.height - 2, imgsize.width - 2); + k.args(eigarg, ocl::KernelArg::ReadOnlyNoSize(mask), + cornersarg, counterarg, eig.rows - 2, eig.cols - 2, + thresholdarg, (int)possibleCornersCount); } - size_t globalsize[2] = { imgsize.width - 2, imgsize.height - 2 }; + size_t globalsize[2] = { eig.cols - 2, eig.rows - 2 }; if (!k.run(2, globalsize, NULL, false)) return false; - total = counter.getMat(ACCESS_READ).at(0, 0); - int totalb = (int)(sizeof(Corner) * total); - + total = std::min(counter.getMat(ACCESS_READ).at(0, 0), possibleCornersCount); tmpCorners.resize(total); - Mat mcorners(1, totalb, CV_8UC1, &tmpCorners[0]); - corners.colRange(0, totalb).copyTo(mcorners); + + Mat mcorners(1, (int)total, CV_32FC2, &tmpCorners[0]); + corners.colRange(0, (int)total).copyTo(mcorners); } + std::sort(tmpCorners.begin(), tmpCorners.end()); - std::sort( tmpCorners.begin(), tmpCorners.end() ); std::vector corners; corners.reserve(total); @@ -159,13 +202,13 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners, // boundary check x1 = std::max(0, x1); y1 = std::max(0, y1); - x2 = std::min(grid_width-1, x2); - y2 = std::min(grid_height-1, y2); + x2 = std::min(grid_width - 1, x2); + y2 = std::min(grid_height - 1, y2); for( int yy = y1; yy <= y2; yy++ ) for( int xx = x1; xx <= x2; xx++ ) { - std::vector &m = grid[yy*grid_width + xx]; + std::vector &m = grid[yy * grid_width + xx]; if( m.size() ) { @@ -259,8 +302,8 @@ void cv::goodFeaturesToTrack( InputArray _image, OutputArray _corners, tmpCorners.push_back(eig_data + x); } } - std::sort( tmpCorners.begin(), tmpCorners.end(), greaterThanPtr() ); + std::vector corners; size_t i, j, total = tmpCorners.size(), ncorners = 0; diff --git a/modules/imgproc/src/opencl/gftt.cl b/modules/imgproc/src/opencl/gftt.cl index 46e37990de..a63ad044b1 100644 --- a/modules/imgproc/src/opencl/gftt.cl +++ b/modules/imgproc/src/opencl/gftt.cl @@ -43,39 +43,116 @@ // //M*/ +#ifdef OP_MAX_EIGEN_VAL + +__kernel void maxEigenVal(__global const uchar * srcptr, int src_step, int src_offset, int cols, + int total, __global uchar * dstptr +#ifdef HAVE_MASK + , __global const uchar * maskptr, int mask_step, int mask_offset +#endif + ) +{ + int lid = get_local_id(0); + int gid = get_group_id(0); + int id = get_global_id(0); + + __local float localmem_max[WGS2_ALIGNED]; + float maxval = -FLT_MAX; + + for (int grain = groupnum * WGS; id < total; id += grain) + { + int src_index = mad24(id / cols, src_step, mad24((id % cols), (int)sizeof(float), src_offset)); +#ifdef HAVE_MASK + int mask_index = mad24(id / cols, mask_step, id % cols + mask_offset); + if (mask[mask_index]) +#endif + maxval = max(maxval, *(__global const float *)(srcptr + src_index)); + } + + if (lid < WGS2_ALIGNED) + localmem_max[lid] = maxval; + barrier(CLK_LOCAL_MEM_FENCE); + + if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED) + localmem_max[lid - WGS2_ALIGNED] = max(maxval, localmem_max[lid - WGS2_ALIGNED]); + barrier(CLK_LOCAL_MEM_FENCE); + + for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1) + { + if (lid < lsize) + { + int lid2 = lsize + lid; + localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (lid == 0) + *(__global float *)(dstptr + (int)sizeof(float) * gid) = localmem_max[0]; +} + +__kernel void maxEigenValTask(__global float * dst, float qualityLevel) +{ + float maxval = -FLT_MAX; + + #pragma unroll + for (int x = 0; x < groupnum; ++x) + maxval = max(maxval, dst[x]); + + dst[0] = maxval * qualityLevel; +} + +#elif OP_FIND_CORNERS + +#define GET_SRC_32F(_y, _x) *(__global const float *)(eigptr + (_y) * eig_step + (_x) * (int)sizeof(float) ) + __kernel void findCorners(__global const uchar * eigptr, int eig_step, int eig_offset, #ifdef HAVE_MASK __global const uchar * mask, int mask_step, int mask_offset, #endif - __global const uchar * tmpptr, int tmp_step, int tmp_offset, __global uchar * cornersptr, __global int * counter, - int rows, int cols) + int rows, int cols, __constant float * threshold, int max_corners) { int x = get_global_id(0); int y = get_global_id(1); - if (x < cols && y < rows) + if (y < rows && x < cols +#ifdef HAVE_MASK + && mask[mad24(y, mask_step, x + mask_offset)] +#endif + ) { ++x, ++y; + float val = GET_SRC_32F(y, x); - int eig_index = mad24(y, eig_step, eig_offset + x * (int)sizeof(float)); - int tmp_index = mad24(y, tmp_step, tmp_offset + x * (int)sizeof(float)); -#ifdef HAVE_MASK - int mask_index = mad24(y, mask_step, mask_offset + x); - mask += mask_index; -#endif + if (val > threshold[0]) + { + float maxVal = val; + maxVal = max(GET_SRC_32F(y - 1, x - 1), maxVal); + maxVal = max(GET_SRC_32F(y - 1, x ), maxVal); + maxVal = max(GET_SRC_32F(y - 1, x + 1), maxVal); - float val = *(__global const float *)(eigptr + eig_index); - float tmp = *(__global const float *)(tmpptr + tmp_index); + maxVal = max(GET_SRC_32F(y , x - 1), maxVal); + maxVal = max(GET_SRC_32F(y , x + 1), maxVal); - if (val != 0 && val == tmp -#ifdef HAVE_MASK - && mask[0] != 0 -#endif - ) - { - __global float2 * corners = (__global float2 *)(cornersptr + (int)sizeof(float2) * atomic_inc(counter)); - corners[0] = (float2)(val, as_float( (x<<16) | y )); + maxVal = max(GET_SRC_32F(y + 1, x - 1), maxVal); + maxVal = max(GET_SRC_32F(y + 1, x ), maxVal); + maxVal = max(GET_SRC_32F(y + 1, x + 1), maxVal); + + if (val == maxVal) + { + int ind = atomic_inc(counter); + if (ind < max_corners) + { + __global float2 * corners = (__global float2 *)(cornersptr + ind * (int)sizeof(float2)); + + // pack and store eigenvalue and its coordinates + corners[0].x = val; + corners[0].y = as_float(y | (x << 16)); + } + } } } } + +#endif diff --git a/modules/imgproc/test/ocl/test_gftt.cpp b/modules/imgproc/test/ocl/test_gftt.cpp index df6fa731dd..e47997623f 100644 --- a/modules/imgproc/test/ocl/test_gftt.cpp +++ b/modules/imgproc/test/ocl/test_gftt.cpp @@ -114,6 +114,7 @@ OCL_TEST_P(GoodFeaturesToTrack, Accuracy) for (size_t i = 0; i < pts.size(); ++i) { Point2i a = upts[i], b = pts[i]; + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; if (!eq)