From d84738769422aad33038d90681c47486e47a0380 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 21 Jan 2014 14:35:34 +0400 Subject: [PATCH 1/2] split CUDA Hough sources --- modules/gpu/src/cuda/build_point_list.cu | 138 ++++ .../cuda/{hough.cu => generalized_hough.cu} | 638 +----------------- modules/gpu/src/cuda/hough_circles.cu | 254 +++++++ modules/gpu/src/cuda/hough_lines.cu | 212 ++++++ modules/gpu/src/cuda/hough_segments.cu | 249 +++++++ .../src/{hough.cpp => generalized_hough.cpp} | 303 --------- modules/gpu/src/hough_circles.cpp | 223 ++++++ modules/gpu/src/hough_lines.cpp | 142 ++++ modules/gpu/src/hough_segments.cpp | 110 +++ 9 files changed, 1330 insertions(+), 939 deletions(-) create mode 100644 modules/gpu/src/cuda/build_point_list.cu rename modules/gpu/src/cuda/{hough.cu => generalized_hough.cu} (65%) create mode 100644 modules/gpu/src/cuda/hough_circles.cu create mode 100644 modules/gpu/src/cuda/hough_lines.cu create mode 100644 modules/gpu/src/cuda/hough_segments.cu rename modules/gpu/src/{hough.cpp => generalized_hough.cpp} (79%) create mode 100644 modules/gpu/src/hough_circles.cpp create mode 100644 modules/gpu/src/hough_lines.cpp create mode 100644 modules/gpu/src/hough_segments.cpp diff --git a/modules/gpu/src/cuda/build_point_list.cu b/modules/gpu/src/cuda/build_point_list.cu new file mode 100644 index 0000000000..8a9c73b3f1 --- /dev/null +++ b/modules/gpu/src/cuda/build_point_list.cu @@ -0,0 +1,138 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/emulation.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + __device__ static int g_counter; + + template + __global__ void buildPointList(const PtrStepSzb src, unsigned int* list) + { + __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; + __shared__ int s_qsize[4]; + __shared__ int s_globStart[4]; + + const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (threadIdx.x == 0) + s_qsize[threadIdx.y] = 0; + __syncthreads(); + + if (y < src.rows) + { + // fill the queue + const uchar* srcRow = src.ptr(y); + for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) + { + if (srcRow[xx]) + { + const unsigned int val = (y << 16) | xx; + const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); + s_queues[threadIdx.y][qidx] = val; + } + } + } + + __syncthreads(); + + // let one thread reserve the space required in the global list + if (threadIdx.x == 0 && threadIdx.y == 0) + { + // find how many items are stored in each list + int totalSize = 0; + for (int i = 0; i < blockDim.y; ++i) + { + s_globStart[i] = totalSize; + totalSize += s_qsize[i]; + } + + // calculate the offset in the global list + const int globalOffset = atomicAdd(&g_counter, totalSize); + for (int i = 0; i < blockDim.y; ++i) + s_globStart[i] += globalOffset; + } + + __syncthreads(); + + // copy local queues to global queue + const int qsize = s_qsize[threadIdx.y]; + int gidx = s_globStart[threadIdx.y] + threadIdx.x; + for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) + list[gidx] = s_queues[threadIdx.y][i]; + } + + int buildPointList_gpu(PtrStepSzb src, unsigned int* list) + { + const int PIXELS_PER_THREAD = 16; + + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 4); + const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildPointList, cudaFuncCachePreferShared) ); + + buildPointList<<>>(src, list); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return totalCount; + } + } +}}} + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/generalized_hough.cu similarity index 65% rename from modules/gpu/src/cuda/hough.cu rename to modules/gpu/src/cuda/generalized_hough.cu index 59eba26081..5e2041eae4 100644 --- a/modules/gpu/src/cuda/hough.cu +++ b/modules/gpu/src/cuda/generalized_hough.cu @@ -43,651 +43,18 @@ #if !defined CUDA_DISABLER #include -#include +#include #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/emulation.hpp" #include "opencv2/gpu/device/vec_math.hpp" #include "opencv2/gpu/device/functional.hpp" -#include "opencv2/gpu/device/limits.hpp" -#include "opencv2/gpu/device/dynamic_smem.hpp" namespace cv { namespace gpu { namespace device { namespace hough { - __device__ int g_counter; - - //////////////////////////////////////////////////////////////////////// - // buildPointList - - template - __global__ void buildPointList(const PtrStepSzb src, unsigned int* list) - { - __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; - __shared__ int s_qsize[4]; - __shared__ int s_globStart[4]; - - const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (threadIdx.x == 0) - s_qsize[threadIdx.y] = 0; - __syncthreads(); - - if (y < src.rows) - { - // fill the queue - const uchar* srcRow = src.ptr(y); - for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x) - { - if (srcRow[xx]) - { - const unsigned int val = (y << 16) | xx; - const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1); - s_queues[threadIdx.y][qidx] = val; - } - } - } - - __syncthreads(); - - // let one thread reserve the space required in the global list - if (threadIdx.x == 0 && threadIdx.y == 0) - { - // find how many items are stored in each list - int totalSize = 0; - for (int i = 0; i < blockDim.y; ++i) - { - s_globStart[i] = totalSize; - totalSize += s_qsize[i]; - } - - // calculate the offset in the global list - const int globalOffset = atomicAdd(&g_counter, totalSize); - for (int i = 0; i < blockDim.y; ++i) - s_globStart[i] += globalOffset; - } - - __syncthreads(); - - // copy local queues to global queue - const int qsize = s_qsize[threadIdx.y]; - int gidx = s_globStart[threadIdx.y] + threadIdx.x; - for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x) - list[gidx] = s_queues[threadIdx.y][i]; - } - - int buildPointList_gpu(PtrStepSzb src, unsigned int* list) - { - const int PIXELS_PER_THREAD = 16; - - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 4); - const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildPointList, cudaFuncCachePreferShared) ); - - buildPointList<<>>(src, list); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // linesAccum - - __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) - { - const int n = blockIdx.x; - const float ang = n * theta; - - float sinVal; - float cosVal; - sincosf(ang, &sinVal, &cosVal); - sinVal *= irho; - cosVal *= irho; - - const int shift = (numrho - 1) / 2; - - int* accumRow = accum.ptr(n + 1); - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - const unsigned int val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - int r = __float2int_rn(x * cosVal + y * sinVal); - r += shift; - - ::atomicAdd(accumRow + r + 1, 1); - } - } - - __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) - { - int* smem = DynamicSharedMem(); - - for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) - smem[i] = 0; - - __syncthreads(); - - const int n = blockIdx.x; - const float ang = n * theta; - - float sinVal; - float cosVal; - sincosf(ang, &sinVal, &cosVal); - sinVal *= irho; - cosVal *= irho; - - const int shift = (numrho - 1) / 2; - - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - const unsigned int val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - int r = __float2int_rn(x * cosVal + y * sinVal); - r += shift; - - Emulation::smem::atomicAdd(&smem[r + 1], 1); - } - - __syncthreads(); - - int* accumRow = accum.ptr(n + 1); - for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) - accumRow[i] = smem[i]; - } - - void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20) - { - const dim3 block(has20 ? 1024 : 512); - const dim3 grid(accum.rows - 2); - - size_t smemSize = (accum.cols - 1) * sizeof(int); - - if (smemSize < sharedMemPerBlock - 1000) - linesAccumShared<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); - else - linesAccumGlobal<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); - - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // linesGetResult - - __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho) - { - const int r = blockIdx.x * blockDim.x + threadIdx.x; - const int n = blockIdx.y * blockDim.y + threadIdx.y; - - if (r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - const int curVotes = accum(n + 1, r + 1); - - if (curVotes > threshold && - curVotes > accum(n + 1, r) && - curVotes >= accum(n + 1, r + 2) && - curVotes > accum(n, r + 1) && - curVotes >= accum(n + 2, r + 1)) - { - const float radius = (r - (numrho - 1) * 0.5f) * rho; - const float angle = n * theta; - - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - { - out[ind] = make_float2(radius, angle); - votes[ind] = curVotes; - } - } - } - - int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); - - linesGetResult<<>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - if (doSort && totalCount > 0) - { - thrust::device_ptr outPtr(out); - thrust::device_ptr votesPtr(votes); - thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater()); - } - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // houghLinesProbabilistic - - texture tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp); - - __global__ void houghLinesProbabilistic(const PtrStepSzi accum, - int4* out, const int maxSize, - const float rho, const float theta, - const int lineGap, const int lineLength, - const int rows, const int cols) - { - const int r = blockIdx.x * blockDim.x + threadIdx.x; - const int n = blockIdx.y * blockDim.y + threadIdx.y; - - if (r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - const int curVotes = accum(n + 1, r + 1); - - if (curVotes >= lineLength && - curVotes > accum(n, r) && - curVotes > accum(n, r + 1) && - curVotes > accum(n, r + 2) && - curVotes > accum(n + 1, r) && - curVotes > accum(n + 1, r + 2) && - curVotes > accum(n + 2, r) && - curVotes > accum(n + 2, r + 1) && - curVotes > accum(n + 2, r + 2)) - { - const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho; - const float angle = n * theta; - - float cosa; - float sina; - sincosf(angle, &sina, &cosa); - - float2 p0 = make_float2(cosa * radius, sina * radius); - float2 dir = make_float2(-sina, cosa); - - float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)}; - float a; - - if (dir.x != 0) - { - a = -p0.x / dir.x; - pb[0].x = 0; - pb[0].y = p0.y + a * dir.y; - - a = (cols - 1 - p0.x) / dir.x; - pb[1].x = cols - 1; - pb[1].y = p0.y + a * dir.y; - } - if (dir.y != 0) - { - a = -p0.y / dir.y; - pb[2].x = p0.x + a * dir.x; - pb[2].y = 0; - - a = (rows - 1 - p0.y) / dir.y; - pb[3].x = p0.x + a * dir.x; - pb[3].y = rows - 1; - } - - if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows)) - { - p0 = pb[0]; - if (dir.x < 0) - dir = -dir; - } - else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) - { - p0 = pb[1]; - if (dir.x > 0) - dir = -dir; - } - else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols)) - { - p0 = pb[2]; - if (dir.y < 0) - dir = -dir; - } - else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols)) - { - p0 = pb[3]; - if (dir.y > 0) - dir = -dir; - } - - float2 d; - if (::fabsf(dir.x) > ::fabsf(dir.y)) - { - d.x = dir.x > 0 ? 1 : -1; - d.y = dir.y / ::fabsf(dir.x); - } - else - { - d.x = dir.x / ::fabsf(dir.y); - d.y = dir.y > 0 ? 1 : -1; - } - - float2 line_end[2]; - int gap; - bool inLine = false; - - float2 p1 = p0; - if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) - return; - - for (;;) - { - if (tex2D(tex_mask, p1.x, p1.y)) - { - gap = 0; - - if (!inLine) - { - line_end[0] = p1; - line_end[1] = p1; - inLine = true; - } - else - { - line_end[1] = p1; - } - } - else if (inLine) - { - if (++gap > lineGap) - { - bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || - ::abs(line_end[1].y - line_end[0].y) >= lineLength; - - if (good_line) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); - } - - gap = 0; - inLine = false; - } - } - - p1 = p1 + d; - if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) - { - if (inLine) - { - bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || - ::abs(line_end[1].y - line_end[0].y) >= lineLength; - - if (good_line) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxSize) - out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); - } - - } - break; - } - } - } - } - - int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - bindTexture(&tex_mask, mask); - - houghLinesProbabilistic<<>>(accum, - out, maxSize, - rho, theta, - lineGap, lineLength, - mask.rows, mask.cols); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxSize); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // circlesAccumCenters - - __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, - PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) - { - const int SHIFT = 10; - const int ONE = 1 << SHIFT; - - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - - if (tid >= count) - return; - - const unsigned int val = list[tid]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - const int vx = dx(y, x); - const int vy = dy(y, x); - - if (vx == 0 && vy == 0) - return; - - const float mag = ::sqrtf(vx * vx + vy * vy); - - const int x0 = __float2int_rn((x * idp) * ONE); - const int y0 = __float2int_rn((y * idp) * ONE); - - int sx = __float2int_rn((vx * idp) * ONE / mag); - int sy = __float2int_rn((vy * idp) * ONE / mag); - - // Step from minRadius to maxRadius in both directions of the gradient - for (int k1 = 0; k1 < 2; ++k1) - { - int x1 = x0 + minRadius * sx; - int y1 = y0 + minRadius * sy; - - for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) - { - const int x2 = x1 >> SHIFT; - const int y2 = y1 >> SHIFT; - - if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) - break; - - ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); - } - - sx = -sx; - sy = -sy; - } - } - - void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp) - { - const dim3 block(256); - const dim3 grid(divUp(count, block.x)); - - cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); - - circlesAccumCenters<<>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - - //////////////////////////////////////////////////////////////////////// - // buildCentersList - - __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x < accum.cols - 2 && y < accum.rows - 2) - { - const int top = accum(y, x + 1); - - const int left = accum(y + 1, x); - const int cur = accum(y + 1, x + 1); - const int right = accum(y + 1, x + 2); - - const int bottom = accum(y + 2, x + 1); - - if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) - { - const unsigned int val = (y << 16) | x; - const int idx = ::atomicAdd(&g_counter, 1); - centers[idx] = val; - } - } - } - - int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); - - buildCentersList<<>>(accum, centers, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // circlesAccumRadius - - __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, - float3* circles, const int maxCircles, const float dp, - const int minRadius, const int maxRadius, const int histSize, const int threshold) - { - int* smem = DynamicSharedMem(); - - for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) - smem[i] = 0; - __syncthreads(); - - unsigned int val = centers[blockIdx.x]; - - float cx = (val & 0xFFFF); - float cy = (val >> 16) & 0xFFFF; - - cx = (cx + 0.5f) * dp; - cy = (cy + 0.5f) * dp; - - for (int i = threadIdx.x; i < count; i += blockDim.x) - { - val = list[i]; - - const int x = (val & 0xFFFF); - const int y = (val >> 16) & 0xFFFF; - - const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); - if (rad >= minRadius && rad <= maxRadius) - { - const int r = __float2int_rn(rad - minRadius); - - Emulation::smem::atomicAdd(&smem[r + 1], 1); - } - } - - __syncthreads(); - - for (int i = threadIdx.x; i < histSize; i += blockDim.x) - { - const int curVotes = smem[i + 1]; - - if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) - { - const int ind = ::atomicAdd(&g_counter, 1); - if (ind < maxCircles) - circles[ind] = make_float3(cx, cy, i + minRadius); - } - } - } - - int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, - float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) - { - void* counterPtr; - cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); - - cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); - - const dim3 block(has20 ? 1024 : 512); - const dim3 grid(centersCount); - - const int histSize = maxRadius - minRadius + 1; - size_t smemSize = (histSize + 2) * sizeof(int); - - circlesAccumRadius<<>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); - cudaSafeCall( cudaGetLastError() ); - - cudaSafeCall( cudaDeviceSynchronize() ); - - int totalCount; - cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); - - totalCount = ::min(totalCount, maxCircles); - - return totalCount; - } - - //////////////////////////////////////////////////////////////////////// - // Generalized Hough + __device__ static int g_counter; template __global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep dx, const PtrStep dy, unsigned int* coordList, float* thetaList) @@ -1706,5 +1073,4 @@ namespace cv { namespace gpu { namespace device } }}} - #endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hough_circles.cu b/modules/gpu/src/cuda/hough_circles.cu new file mode 100644 index 0000000000..5df7887b0b --- /dev/null +++ b/modules/gpu/src/cuda/hough_circles.cu @@ -0,0 +1,254 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/emulation.hpp" +#include "opencv2/gpu/device/dynamic_smem.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + __device__ static int g_counter; + + //////////////////////////////////////////////////////////////////////// + // circlesAccumCenters + + __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, + PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) + { + const int SHIFT = 10; + const int ONE = 1 << SHIFT; + + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid >= count) + return; + + const unsigned int val = list[tid]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + const int vx = dx(y, x); + const int vy = dy(y, x); + + if (vx == 0 && vy == 0) + return; + + const float mag = ::sqrtf(vx * vx + vy * vy); + + const int x0 = __float2int_rn((x * idp) * ONE); + const int y0 = __float2int_rn((y * idp) * ONE); + + int sx = __float2int_rn((vx * idp) * ONE / mag); + int sy = __float2int_rn((vy * idp) * ONE / mag); + + // Step from minRadius to maxRadius in both directions of the gradient + for (int k1 = 0; k1 < 2; ++k1) + { + int x1 = x0 + minRadius * sx; + int y1 = y0 + minRadius * sy; + + for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) + { + const int x2 = x1 >> SHIFT; + const int y2 = y1 >> SHIFT; + + if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) + break; + + ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); + } + + sx = -sx; + sy = -sy; + } + } + + void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp) + { + const dim3 block(256); + const dim3 grid(divUp(count, block.x)); + + cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); + + circlesAccumCenters<<>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // buildCentersList + + __global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < accum.cols - 2 && y < accum.rows - 2) + { + const int top = accum(y, x + 1); + + const int left = accum(y + 1, x); + const int cur = accum(y + 1, x + 1); + const int right = accum(y + 1, x + 2); + + const int bottom = accum(y + 2, x + 1); + + if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) + { + const unsigned int val = (y << 16) | x; + const int idx = ::atomicAdd(&g_counter, 1); + centers[idx] = val; + } + } + } + + int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); + + buildCentersList<<>>(accum, centers, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return totalCount; + } + + //////////////////////////////////////////////////////////////////////// + // circlesAccumRadius + + __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, + float3* circles, const int maxCircles, const float dp, + const int minRadius, const int maxRadius, const int histSize, const int threshold) + { + int* smem = DynamicSharedMem(); + + for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) + smem[i] = 0; + __syncthreads(); + + unsigned int val = centers[blockIdx.x]; + + float cx = (val & 0xFFFF); + float cy = (val >> 16) & 0xFFFF; + + cx = (cx + 0.5f) * dp; + cy = (cy + 0.5f) * dp; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); + if (rad >= minRadius && rad <= maxRadius) + { + const int r = __float2int_rn(rad - minRadius); + + Emulation::smem::atomicAdd(&smem[r + 1], 1); + } + } + + __syncthreads(); + + for (int i = threadIdx.x; i < histSize; i += blockDim.x) + { + const int curVotes = smem[i + 1]; + + if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxCircles) + circles[ind] = make_float3(cx, cy, i + minRadius); + } + } + } + + int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, + float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(has20 ? 1024 : 512); + const dim3 grid(centersCount); + + const int histSize = maxRadius - minRadius + 1; + size_t smemSize = (histSize + 2) * sizeof(int); + + circlesAccumRadius<<>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxCircles); + + return totalCount; + } + } +}}} + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hough_lines.cu b/modules/gpu/src/cuda/hough_lines.cu new file mode 100644 index 0000000000..2f2fe9a7e2 --- /dev/null +++ b/modules/gpu/src/cuda/hough_lines.cu @@ -0,0 +1,212 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include +#include + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/emulation.hpp" +#include "opencv2/gpu/device/dynamic_smem.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + __device__ static int g_counter; + + //////////////////////////////////////////////////////////////////////// + // linesAccum + + __global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) + { + const int n = blockIdx.x; + const float ang = n * theta; + + float sinVal; + float cosVal; + sincosf(ang, &sinVal, &cosVal); + sinVal *= irho; + cosVal *= irho; + + const int shift = (numrho - 1) / 2; + + int* accumRow = accum.ptr(n + 1); + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + const unsigned int val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + int r = __float2int_rn(x * cosVal + y * sinVal); + r += shift; + + ::atomicAdd(accumRow + r + 1, 1); + } + } + + __global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho) + { + int* smem = DynamicSharedMem(); + + for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) + smem[i] = 0; + + __syncthreads(); + + const int n = blockIdx.x; + const float ang = n * theta; + + float sinVal; + float cosVal; + sincosf(ang, &sinVal, &cosVal); + sinVal *= irho; + cosVal *= irho; + + const int shift = (numrho - 1) / 2; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + const unsigned int val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + int r = __float2int_rn(x * cosVal + y * sinVal); + r += shift; + + Emulation::smem::atomicAdd(&smem[r + 1], 1); + } + + __syncthreads(); + + int* accumRow = accum.ptr(n + 1); + for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x) + accumRow[i] = smem[i]; + } + + void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20) + { + const dim3 block(has20 ? 1024 : 512); + const dim3 grid(accum.rows - 2); + + size_t smemSize = (accum.cols - 1) * sizeof(int); + + if (smemSize < sharedMemPerBlock - 1000) + linesAccumShared<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); + else + linesAccumGlobal<<>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2); + + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // linesGetResult + + __global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho) + { + const int r = blockIdx.x * blockDim.x + threadIdx.x; + const int n = blockIdx.y * blockDim.y + threadIdx.y; + + if (r >= accum.cols - 2 || n >= accum.rows - 2) + return; + + const int curVotes = accum(n + 1, r + 1); + + if (curVotes > threshold && + curVotes > accum(n + 1, r) && + curVotes >= accum(n + 1, r + 2) && + curVotes > accum(n, r + 1) && + curVotes >= accum(n + 2, r + 1)) + { + const float radius = (r - (numrho - 1) * 0.5f) * rho; + const float angle = n * theta; + + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + { + out[ind] = make_float2(radius, angle); + votes[ind] = curVotes; + } + } + } + + int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); + + linesGetResult<<>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + if (doSort && totalCount > 0) + { + thrust::device_ptr outPtr(out); + thrust::device_ptr votesPtr(votes); + thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater()); + } + + return totalCount; + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/cuda/hough_segments.cu b/modules/gpu/src/cuda/hough_segments.cu new file mode 100644 index 0000000000..f55bb4de9f --- /dev/null +++ b/modules/gpu/src/cuda/hough_segments.cu @@ -0,0 +1,249 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if !defined CUDA_DISABLER + +#include "opencv2/gpu/device/common.hpp" +#include "opencv2/gpu/device/vec_math.hpp" + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + __device__ int g_counter; + + texture tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp); + + __global__ void houghLinesProbabilistic(const PtrStepSzi accum, + int4* out, const int maxSize, + const float rho, const float theta, + const int lineGap, const int lineLength, + const int rows, const int cols) + { + const int r = blockIdx.x * blockDim.x + threadIdx.x; + const int n = blockIdx.y * blockDim.y + threadIdx.y; + + if (r >= accum.cols - 2 || n >= accum.rows - 2) + return; + + const int curVotes = accum(n + 1, r + 1); + + if (curVotes >= lineLength && + curVotes > accum(n, r) && + curVotes > accum(n, r + 1) && + curVotes > accum(n, r + 2) && + curVotes > accum(n + 1, r) && + curVotes > accum(n + 1, r + 2) && + curVotes > accum(n + 2, r) && + curVotes > accum(n + 2, r + 1) && + curVotes > accum(n + 2, r + 2)) + { + const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho; + const float angle = n * theta; + + float cosa; + float sina; + sincosf(angle, &sina, &cosa); + + float2 p0 = make_float2(cosa * radius, sina * radius); + float2 dir = make_float2(-sina, cosa); + + float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)}; + float a; + + if (dir.x != 0) + { + a = -p0.x / dir.x; + pb[0].x = 0; + pb[0].y = p0.y + a * dir.y; + + a = (cols - 1 - p0.x) / dir.x; + pb[1].x = cols - 1; + pb[1].y = p0.y + a * dir.y; + } + if (dir.y != 0) + { + a = -p0.y / dir.y; + pb[2].x = p0.x + a * dir.x; + pb[2].y = 0; + + a = (rows - 1 - p0.y) / dir.y; + pb[3].x = p0.x + a * dir.x; + pb[3].y = rows - 1; + } + + if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows)) + { + p0 = pb[0]; + if (dir.x < 0) + dir = -dir; + } + else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) + { + p0 = pb[1]; + if (dir.x > 0) + dir = -dir; + } + else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols)) + { + p0 = pb[2]; + if (dir.y < 0) + dir = -dir; + } + else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols)) + { + p0 = pb[3]; + if (dir.y > 0) + dir = -dir; + } + + float2 d; + if (::fabsf(dir.x) > ::fabsf(dir.y)) + { + d.x = dir.x > 0 ? 1 : -1; + d.y = dir.y / ::fabsf(dir.x); + } + else + { + d.x = dir.x / ::fabsf(dir.y); + d.y = dir.y > 0 ? 1 : -1; + } + + float2 line_end[2]; + int gap; + bool inLine = false; + + float2 p1 = p0; + if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) + return; + + for (;;) + { + if (tex2D(tex_mask, p1.x, p1.y)) + { + gap = 0; + + if (!inLine) + { + line_end[0] = p1; + line_end[1] = p1; + inLine = true; + } + else + { + line_end[1] = p1; + } + } + else if (inLine) + { + if (++gap > lineGap) + { + bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || + ::abs(line_end[1].y - line_end[0].y) >= lineLength; + + if (good_line) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); + } + + gap = 0; + inLine = false; + } + } + + p1 = p1 + d; + if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows) + { + if (inLine) + { + bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength || + ::abs(line_end[1].y - line_end[0].y) >= lineLength; + + if (good_line) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxSize) + out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y); + } + + } + break; + } + } + } + } + + int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + bindTexture(&tex_mask, mask); + + houghLinesProbabilistic<<>>(accum, + out, maxSize, + rho, theta, + lineGap, lineLength, + mask.rows, mask.cols); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxSize); + + return totalCount; + } + } +}}} + + +#endif /* CUDA_DISABLER */ diff --git a/modules/gpu/src/hough.cpp b/modules/gpu/src/generalized_hough.cpp similarity index 79% rename from modules/gpu/src/hough.cpp rename to modules/gpu/src/generalized_hough.cpp index 09cf01850e..a92c37d1a5 100644 --- a/modules/gpu/src/hough.cpp +++ b/modules/gpu/src/generalized_hough.cpp @@ -48,16 +48,6 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -void cv::gpu::HoughLines(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } -void cv::gpu::HoughLines(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_nogpu(); } -void cv::gpu::HoughLinesDownload(const GpuMat&, OutputArray, OutputArray) { throw_nogpu(); } - -void cv::gpu::HoughLinesP(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, int, int) { throw_nogpu(); } - -void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); } -void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); } -void cv::gpu::HoughCirclesDownload(const GpuMat&, OutputArray) { throw_nogpu(); } - Ptr cv::gpu::GeneralizedHough_GPU::create(int) { throw_nogpu(); return Ptr(); } cv::gpu::GeneralizedHough_GPU::~GeneralizedHough_GPU() {} void cv::gpu::GeneralizedHough_GPU::setTemplate(const GpuMat&, int, Point) { throw_nogpu(); } @@ -77,299 +67,6 @@ namespace cv { namespace gpu { namespace device } }}} -////////////////////////////////////////////////////////// -// HoughLines - -namespace cv { namespace gpu { namespace device -{ - namespace hough - { - void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); - int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort); - } -}}} - -void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) -{ - HoughLinesBuf buf; - HoughLines(src, lines, buf, rho, theta, threshold, doSort, maxLines); -} - -void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort, int maxLines) -{ - using namespace cv::gpu::device::hough; - - CV_Assert(src.type() == CV_8UC1); - CV_Assert(src.cols < std::numeric_limits::max()); - CV_Assert(src.rows < std::numeric_limits::max()); - - ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list); - unsigned int* srcPoints = buf.list.ptr(); - - const int pointsCount = buildPointList_gpu(src, srcPoints); - if (pointsCount == 0) - { - lines.release(); - return; - } - - const int numangle = cvRound(CV_PI / theta); - const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); - CV_Assert(numangle > 0 && numrho > 0); - - ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum); - buf.accum.setTo(Scalar::all(0)); - - DeviceInfo devInfo; - linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); - - ensureSizeIsEnough(2, maxLines, CV_32FC2, lines); - - int linesCount = linesGetResult_gpu(buf.accum, lines.ptr(0), lines.ptr(1), maxLines, rho, theta, threshold, doSort); - if (linesCount > 0) - lines.cols = linesCount; - else - lines.release(); -} - -void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, OutputArray h_votes_) -{ - if (d_lines.empty()) - { - h_lines_.release(); - if (h_votes_.needed()) - h_votes_.release(); - return; - } - - CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2); - - h_lines_.create(1, d_lines.cols, CV_32FC2); - Mat h_lines = h_lines_.getMat(); - d_lines.row(0).download(h_lines); - - if (h_votes_.needed()) - { - h_votes_.create(1, d_lines.cols, CV_32SC1); - Mat h_votes = h_votes_.getMat(); - GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast(d_lines.ptr(1))); - d_votes.download(h_votes); - } -} - -////////////////////////////////////////////////////////// -// HoughLinesP - -namespace cv { namespace gpu { namespace device -{ - namespace hough - { - int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength); - } -}}} - -void cv::gpu::HoughLinesP(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines) -{ - using namespace cv::gpu::device::hough; - - CV_Assert( src.type() == CV_8UC1 ); - CV_Assert( src.cols < std::numeric_limits::max() ); - CV_Assert( src.rows < std::numeric_limits::max() ); - - ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list); - unsigned int* srcPoints = buf.list.ptr(); - - const int pointsCount = buildPointList_gpu(src, srcPoints); - if (pointsCount == 0) - { - lines.release(); - return; - } - - const int numangle = cvRound(CV_PI / theta); - const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); - CV_Assert( numangle > 0 && numrho > 0 ); - - ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum); - buf.accum.setTo(Scalar::all(0)); - - DeviceInfo devInfo; - linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); - - ensureSizeIsEnough(1, maxLines, CV_32SC4, lines); - - int linesCount = houghLinesProbabilistic_gpu(src, buf.accum, lines.ptr(), maxLines, rho, theta, maxLineGap, minLineLength); - - if (linesCount > 0) - lines.cols = linesCount; - else - lines.release(); -} - -////////////////////////////////////////////////////////// -// HoughCircles - -namespace cv { namespace gpu { namespace device -{ - namespace hough - { - void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp); - int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold); - int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, - float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20); - } -}}} - -void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) -{ - HoughCirclesBuf buf; - HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles); -} - -void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, - float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) -{ - using namespace cv::gpu::device::hough; - - CV_Assert(src.type() == CV_8UC1); - CV_Assert(src.cols < std::numeric_limits::max()); - CV_Assert(src.rows < std::numeric_limits::max()); - CV_Assert(method == CV_HOUGH_GRADIENT); - CV_Assert(dp > 0); - CV_Assert(minRadius > 0 && maxRadius > minRadius); - CV_Assert(cannyThreshold > 0); - CV_Assert(votesThreshold > 0); - CV_Assert(maxCircles > 0); - - const float idp = 1.0f / dp; - - cv::gpu::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold); - - ensureSizeIsEnough(2, src.size().area(), CV_32SC1, buf.list); - unsigned int* srcPoints = buf.list.ptr(0); - unsigned int* centers = buf.list.ptr(1); - - const int pointsCount = buildPointList_gpu(buf.edges, srcPoints); - if (pointsCount == 0) - { - circles.release(); - return; - } - - ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum); - buf.accum.setTo(Scalar::all(0)); - - circlesAccumCenters_gpu(srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp); - - int centersCount = buildCentersList_gpu(buf.accum, centers, votesThreshold); - if (centersCount == 0) - { - circles.release(); - return; - } - - if (minDist > 1) - { - cv::AutoBuffer oldBuf_(centersCount); - cv::AutoBuffer newBuf_(centersCount); - int newCount = 0; - - ushort2* oldBuf = oldBuf_; - ushort2* newBuf = newBuf_; - - cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) ); - - const int cellSize = cvRound(minDist); - const int gridWidth = (src.cols + cellSize - 1) / cellSize; - const int gridHeight = (src.rows + cellSize - 1) / cellSize; - - std::vector< std::vector > grid(gridWidth * gridHeight); - - const float minDist2 = minDist * minDist; - - for (int i = 0; i < centersCount; ++i) - { - ushort2 p = oldBuf[i]; - - bool good = true; - - int xCell = static_cast(p.x / cellSize); - int yCell = static_cast(p.y / cellSize); - - int x1 = xCell - 1; - int y1 = yCell - 1; - int x2 = xCell + 1; - int y2 = yCell + 1; - - // boundary check - x1 = std::max(0, x1); - y1 = std::max(0, y1); - x2 = std::min(gridWidth - 1, x2); - y2 = std::min(gridHeight - 1, y2); - - for (int yy = y1; yy <= y2; ++yy) - { - for (int xx = x1; xx <= x2; ++xx) - { - vector& m = grid[yy * gridWidth + xx]; - - for(size_t j = 0; j < m.size(); ++j) - { - float dx = (float)(p.x - m[j].x); - float dy = (float)(p.y - m[j].y); - - if (dx * dx + dy * dy < minDist2) - { - good = false; - goto break_out; - } - } - } - } - - break_out: - - if(good) - { - grid[yCell * gridWidth + xCell].push_back(p); - - newBuf[newCount++] = p; - } - } - - cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) ); - centersCount = newCount; - } - - ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles); - - const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr(), maxCircles, - dp, minRadius, maxRadius, votesThreshold, deviceSupports(FEATURE_SET_COMPUTE_20)); - - if (circlesCount > 0) - circles.cols = circlesCount; - else - circles.release(); -} - -void cv::gpu::HoughCirclesDownload(const GpuMat& d_circles, cv::OutputArray h_circles_) -{ - if (d_circles.empty()) - { - h_circles_.release(); - return; - } - - CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3); - - h_circles_.create(1, d_circles.cols, CV_32FC3); - Mat h_circles = h_circles_.getMat(); - d_circles.download(h_circles); -} - -////////////////////////////////////////////////////////// -// GeneralizedHough - namespace cv { namespace gpu { namespace device { namespace hough diff --git a/modules/gpu/src/hough_circles.cpp b/modules/gpu/src/hough_circles.cpp new file mode 100644 index 0000000000..74aa7394f2 --- /dev/null +++ b/modules/gpu/src/hough_circles.cpp @@ -0,0 +1,223 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); } +void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); } +void cv::gpu::HoughCirclesDownload(const GpuMat&, OutputArray) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + int buildPointList_gpu(PtrStepSzb src, unsigned int* list); + } +}}} + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp); + int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold); + int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, + float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20); + } +}}} + +void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) +{ + HoughCirclesBuf buf; + HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles); +} + +void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, + float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) +{ + using namespace cv::gpu::device::hough; + + CV_Assert(src.type() == CV_8UC1); + CV_Assert(src.cols < std::numeric_limits::max()); + CV_Assert(src.rows < std::numeric_limits::max()); + CV_Assert(method == CV_HOUGH_GRADIENT); + CV_Assert(dp > 0); + CV_Assert(minRadius > 0 && maxRadius > minRadius); + CV_Assert(cannyThreshold > 0); + CV_Assert(votesThreshold > 0); + CV_Assert(maxCircles > 0); + + const float idp = 1.0f / dp; + + cv::gpu::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold); + + ensureSizeIsEnough(2, src.size().area(), CV_32SC1, buf.list); + unsigned int* srcPoints = buf.list.ptr(0); + unsigned int* centers = buf.list.ptr(1); + + const int pointsCount = buildPointList_gpu(buf.edges, srcPoints); + if (pointsCount == 0) + { + circles.release(); + return; + } + + ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum); + buf.accum.setTo(Scalar::all(0)); + + circlesAccumCenters_gpu(srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp); + + int centersCount = buildCentersList_gpu(buf.accum, centers, votesThreshold); + if (centersCount == 0) + { + circles.release(); + return; + } + + if (minDist > 1) + { + cv::AutoBuffer oldBuf_(centersCount); + cv::AutoBuffer newBuf_(centersCount); + int newCount = 0; + + ushort2* oldBuf = oldBuf_; + ushort2* newBuf = newBuf_; + + cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) ); + + const int cellSize = cvRound(minDist); + const int gridWidth = (src.cols + cellSize - 1) / cellSize; + const int gridHeight = (src.rows + cellSize - 1) / cellSize; + + std::vector< std::vector > grid(gridWidth * gridHeight); + + const float minDist2 = minDist * minDist; + + for (int i = 0; i < centersCount; ++i) + { + ushort2 p = oldBuf[i]; + + bool good = true; + + int xCell = static_cast(p.x / cellSize); + int yCell = static_cast(p.y / cellSize); + + int x1 = xCell - 1; + int y1 = yCell - 1; + int x2 = xCell + 1; + int y2 = yCell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(gridWidth - 1, x2); + y2 = std::min(gridHeight - 1, y2); + + for (int yy = y1; yy <= y2; ++yy) + { + for (int xx = x1; xx <= x2; ++xx) + { + vector& m = grid[yy * gridWidth + xx]; + + for(size_t j = 0; j < m.size(); ++j) + { + float dx = (float)(p.x - m[j].x); + float dy = (float)(p.y - m[j].y); + + if (dx * dx + dy * dy < minDist2) + { + good = false; + goto break_out; + } + } + } + } + + break_out: + + if(good) + { + grid[yCell * gridWidth + xCell].push_back(p); + + newBuf[newCount++] = p; + } + } + + cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) ); + centersCount = newCount; + } + + ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles); + + const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr(), maxCircles, + dp, minRadius, maxRadius, votesThreshold, deviceSupports(FEATURE_SET_COMPUTE_20)); + + if (circlesCount > 0) + circles.cols = circlesCount; + else + circles.release(); +} + +void cv::gpu::HoughCirclesDownload(const GpuMat& d_circles, cv::OutputArray h_circles_) +{ + if (d_circles.empty()) + { + h_circles_.release(); + return; + } + + CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3); + + h_circles_.create(1, d_circles.cols, CV_32FC3); + Mat h_circles = h_circles_.getMat(); + d_circles.download(h_circles); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/hough_lines.cpp b/modules/gpu/src/hough_lines.cpp new file mode 100644 index 0000000000..4cc4067b20 --- /dev/null +++ b/modules/gpu/src/hough_lines.cpp @@ -0,0 +1,142 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::gpu::HoughLines(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } +void cv::gpu::HoughLines(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_nogpu(); } +void cv::gpu::HoughLinesDownload(const GpuMat&, OutputArray, OutputArray) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + int buildPointList_gpu(PtrStepSzb src, unsigned int* list); + } +}}} + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); + int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort); + } +}}} + +void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) +{ + HoughLinesBuf buf; + HoughLines(src, lines, buf, rho, theta, threshold, doSort, maxLines); +} + +void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort, int maxLines) +{ + using namespace cv::gpu::device::hough; + + CV_Assert(src.type() == CV_8UC1); + CV_Assert(src.cols < std::numeric_limits::max()); + CV_Assert(src.rows < std::numeric_limits::max()); + + ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list); + unsigned int* srcPoints = buf.list.ptr(); + + const int pointsCount = buildPointList_gpu(src, srcPoints); + if (pointsCount == 0) + { + lines.release(); + return; + } + + const int numangle = cvRound(CV_PI / theta); + const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + CV_Assert(numangle > 0 && numrho > 0); + + ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum); + buf.accum.setTo(Scalar::all(0)); + + DeviceInfo devInfo; + linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); + + ensureSizeIsEnough(2, maxLines, CV_32FC2, lines); + + int linesCount = linesGetResult_gpu(buf.accum, lines.ptr(0), lines.ptr(1), maxLines, rho, theta, threshold, doSort); + if (linesCount > 0) + lines.cols = linesCount; + else + lines.release(); +} + +void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, OutputArray h_votes_) +{ + if (d_lines.empty()) + { + h_lines_.release(); + if (h_votes_.needed()) + h_votes_.release(); + return; + } + + CV_Assert(d_lines.rows == 2 && d_lines.type() == CV_32FC2); + + h_lines_.create(1, d_lines.cols, CV_32FC2); + Mat h_lines = h_lines_.getMat(); + d_lines.row(0).download(h_lines); + + if (h_votes_.needed()) + { + h_votes_.create(1, d_lines.cols, CV_32SC1); + Mat h_votes = h_votes_.getMat(); + GpuMat d_votes(1, d_lines.cols, CV_32SC1, const_cast(d_lines.ptr(1))); + d_votes.download(h_votes); + } +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/hough_segments.cpp b/modules/gpu/src/hough_segments.cpp new file mode 100644 index 0000000000..c34f33a626 --- /dev/null +++ b/modules/gpu/src/hough_segments.cpp @@ -0,0 +1,110 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace std; +using namespace cv; +using namespace cv::gpu; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::gpu::HoughLinesP(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, int, int) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + int buildPointList_gpu(PtrStepSzb src, unsigned int* list); + } +}}} + +namespace cv { namespace gpu { namespace device +{ + namespace hough + { + void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); + int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength); + } +}}} + +void cv::gpu::HoughLinesP(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int minLineLength, int maxLineGap, int maxLines) +{ + using namespace cv::gpu::device::hough; + + CV_Assert( src.type() == CV_8UC1 ); + CV_Assert( src.cols < std::numeric_limits::max() ); + CV_Assert( src.rows < std::numeric_limits::max() ); + + ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list); + unsigned int* srcPoints = buf.list.ptr(); + + const int pointsCount = buildPointList_gpu(src, srcPoints); + if (pointsCount == 0) + { + lines.release(); + return; + } + + const int numangle = cvRound(CV_PI / theta); + const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); + CV_Assert( numangle > 0 && numrho > 0 ); + + ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum); + buf.accum.setTo(Scalar::all(0)); + + DeviceInfo devInfo; + linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); + + ensureSizeIsEnough(1, maxLines, CV_32SC4, lines); + + int linesCount = houghLinesProbabilistic_gpu(src, buf.accum, lines.ptr(), maxLines, rho, theta, maxLineGap, minLineLength); + + if (linesCount > 0) + lines.cols = linesCount; + else + lines.release(); +} + +#endif /* !defined (HAVE_CUDA) */ From 33d42b740c6fe938b63a0b25c9ad51741aba48c3 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 21 Jan 2014 14:35:51 +0400 Subject: [PATCH 2/2] disable CUDA generalized Hough Transform --- modules/gpu/src/cuda/generalized_hough.cu | 2 ++ modules/gpu/src/generalized_hough.cpp | 2 ++ modules/gpu/test/test_hough.cpp | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/gpu/src/cuda/generalized_hough.cu b/modules/gpu/src/cuda/generalized_hough.cu index 5e2041eae4..35451e7e23 100644 --- a/modules/gpu/src/cuda/generalized_hough.cu +++ b/modules/gpu/src/cuda/generalized_hough.cu @@ -40,6 +40,8 @@ // //M*/ +#define CUDA_DISABLER + #if !defined CUDA_DISABLER #include diff --git a/modules/gpu/src/generalized_hough.cpp b/modules/gpu/src/generalized_hough.cpp index a92c37d1a5..867ba7ee71 100644 --- a/modules/gpu/src/generalized_hough.cpp +++ b/modules/gpu/src/generalized_hough.cpp @@ -40,6 +40,8 @@ // //M*/ +#define CUDA_DISABLER + #include "precomp.hpp" using namespace std; diff --git a/modules/gpu/test/test_hough.cpp b/modules/gpu/test/test_hough.cpp index f876a7a2b0..6441fc69ab 100644 --- a/modules/gpu/test/test_hough.cpp +++ b/modules/gpu/test/test_hough.cpp @@ -189,7 +189,7 @@ PARAM_TEST_CASE(GeneralizedHough, cv::gpu::DeviceInfo, UseRoi) { }; -GPU_TEST_P(GeneralizedHough, POSITION) +GPU_TEST_P(GeneralizedHough, DISABLED_POSITION) { const cv::gpu::DeviceInfo devInfo = GET_PARAM(0); cv::gpu::setDevice(devInfo.deviceID());