@ -307,268 +307,6 @@ namespace cv { namespace gpu { namespace cudev
return totalCount;
return totalCount;
}
}
////////////////////////////////////////////////////////////////////////
// Ballard_PosScale
__global__ void Ballard_PosScale_calcHist(const unsigned int* coordList, const float* thetaList,
PtrStep<short2> r_table, const int* r_sizes,
PtrStepi hist, const int rows, const int cols,
const float minScale, const float scaleStep, const int scaleRange,
const float idp, const float thetaScale)
{
const unsigned int coord = coordList[blockIdx.x];
float2 p;
p.x = (coord & 0xFFFF);
p.y = (coord >> 16) & 0xFFFF;
const float theta = thetaList[blockIdx.x];
const int n = __float2int_rn(theta * thetaScale);
const short2* r_row = r_table.ptr(n);
const int r_row_size = r_sizes[n];
for (int j = 0; j < r_row_size; ++j)
{
const float2 d = saturate_cast<float2>(r_row[j]);
for (int s = threadIdx.x; s < scaleRange; s += blockDim.x)
{
const float scale = minScale + s * scaleStep;
float2 c = p - scale * d;
c.x *= idp;
c.y *= idp;
if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows)
::atomicAdd(hist.ptr((s + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1);
}
}
}
void Ballard_PosScale_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepi hist, int rows, int cols,
float minScale, float scaleStep, int scaleRange,
float dp, int levels)
{
const dim3 block(256);
const dim3 grid(pointsCount);
const float idp = 1.0f / dp;
const float thetaScale = levels / (2.0f * CV_PI_F);
Ballard_PosScale_calcHist<<<grid, block>>>(coordList, thetaList,
r_table, r_sizes,
hist, rows, cols,
minScale, scaleStep, scaleRange,
idp, thetaScale);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange,
float4* out, int3* votes, const int maxSize,
const float minScale, const float scaleStep, const float dp, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows)
return;
for (int s = 0; s < scaleRange; ++s)
{
const float scale = minScale + s * scaleStep;
const int prevScaleIdx = (s) * (rows + 2);
const int curScaleIdx = (s + 1) * (rows + 2);
const int nextScaleIdx = (s + 2) * (rows + 2);
const int curVotes = hist(curScaleIdx + y + 1, x + 1);
if (curVotes > threshold &&
curVotes > hist(curScaleIdx + y + 1, x) &&
curVotes >= hist(curScaleIdx + y + 1, x + 2) &&
curVotes > hist(curScaleIdx + y, x + 1) &&
curVotes >= hist(curScaleIdx + y + 2, x + 1) &&
curVotes > hist(prevScaleIdx + y + 1, x + 1) &&
curVotes >= hist(nextScaleIdx + y + 1, x + 1))
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float4(x * dp, y * dp, scale, 0.0f);
votes[ind] = make_int3(curVotes, curVotes, 0);
}
}
}
}
int Ballard_PosScale_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int scaleRange, float4* out, int3* votes, int maxSize,
float minScale, float scaleStep, float dp, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes,
maxSize, minScale, scaleStep, dp, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// Ballard_PosRotation
__global__ void Ballard_PosRotation_calcHist(const unsigned int* coordList, const float* thetaList,
PtrStep<short2> r_table, const int* r_sizes,
PtrStepi hist, const int rows, const int cols,
const float minAngle, const float angleStep, const int angleRange,
const float idp, const float thetaScale)
{
const unsigned int coord = coordList[blockIdx.x];
float2 p;
p.x = (coord & 0xFFFF);
p.y = (coord >> 16) & 0xFFFF;
const float thetaVal = thetaList[blockIdx.x];
for (int a = threadIdx.x; a < angleRange; a += blockDim.x)
{
const float angle = (minAngle + a * angleStep) * (CV_PI_F / 180.0f);
float sinA, cosA;
sincosf(angle, &sinA, &cosA);
float theta = thetaVal - angle;
if (theta < 0)
theta += 2.0f * CV_PI_F;
const int n = __float2int_rn(theta * thetaScale);
const short2* r_row = r_table.ptr(n);
const int r_row_size = r_sizes[n];
for (int j = 0; j < r_row_size; ++j)
{
const float2 d = saturate_cast<float2>(r_row[j]);
const float2 dr = make_float2(d.x * cosA - d.y * sinA, d.x * sinA + d.y * cosA);
float2 c = make_float2(p.x - dr.x, p.y - dr.y);
c.x *= idp;
c.y *= idp;
if (c.x >= 0 && c.x < cols && c.y >= 0 && c.y < rows)
::atomicAdd(hist.ptr((a + 1) * (rows + 2) + __float2int_rn(c.y + 1)) + __float2int_rn(c.x + 1), 1);
}
}
}
void Ballard_PosRotation_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepi hist, int rows, int cols,
float minAngle, float angleStep, int angleRange,
float dp, int levels)
{
const dim3 block(256);
const dim3 grid(pointsCount);
const float idp = 1.0f / dp;
const float thetaScale = levels / (2.0f * CV_PI_F);
Ballard_PosRotation_calcHist<<<grid, block>>>(coordList, thetaList,
r_table, r_sizes,
hist, rows, cols,
minAngle, angleStep, angleRange,
idp, thetaScale);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange,
float4* out, int3* votes, const int maxSize,
const float minAngle, const float angleStep, const float dp, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= cols || y >= rows)
return;
for (int a = 0; a < angleRange; ++a)
{
const float angle = minAngle + a * angleStep;
const int prevAngleIdx = (a) * (rows + 2);
const int curAngleIdx = (a + 1) * (rows + 2);
const int nextAngleIdx = (a + 2) * (rows + 2);
const int curVotes = hist(curAngleIdx + y + 1, x + 1);
if (curVotes > threshold &&
curVotes > hist(curAngleIdx + y + 1, x) &&
curVotes >= hist(curAngleIdx + y + 1, x + 2) &&
curVotes > hist(curAngleIdx + y, x + 1) &&
curVotes >= hist(curAngleIdx + y + 2, x + 1) &&
curVotes > hist(prevAngleIdx + y + 1, x + 1) &&
curVotes >= hist(nextAngleIdx + y + 1, x + 1))
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float4(x * dp, y * dp, 1.0f, angle);
votes[ind] = make_int3(curVotes, 0, curVotes);
}
}
}
}
int Ballard_PosRotation_findPosInHist_gpu(PtrStepi hist, int rows, int cols, int angleRange, float4* out, int3* votes, int maxSize,
float minAngle, float angleStep, float dp, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes,
maxSize, minAngle, angleStep, dp, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// Guil_Full
// Guil_Full