@ -122,22 +122,22 @@ namespace cv { namespace gpu { namespace cudev
const int PIXELS_PER_THREAD = 16;
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
@ -225,9 +225,9 @@ namespace cv { namespace gpu { namespace cudev
else
linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
@ -264,22 +264,22 @@ namespace cv { namespace gpu { namespace cudev
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
@ -462,9 +462,9 @@ namespace cv { namespace gpu { namespace cudev
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
@ -476,12 +476,12 @@ namespace cv { namespace gpu { namespace cudev
rho, theta,
lineGap, lineLength,
mask.rows, mask.cols);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
@ -548,12 +548,12 @@ namespace cv { namespace gpu { namespace cudev
const dim3 block(256);
const dim3 grid(divUp(count, block.x));
cvC udaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
@ -586,22 +586,22 @@ namespace cv { namespace gpu { namespace cudev
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
buildCentersList<<<grid, block>>>(accum, centers, threshold);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
@ -662,9 +662,9 @@ namespace cv { namespace gpu { namespace cudev
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(centersCount);
@ -673,12 +673,12 @@ namespace cv { namespace gpu { namespace cudev
size_t smemSize = (histSize + 2) * sizeof(int);
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxCircles);
@ -768,22 +768,22 @@ namespace cv { namespace gpu { namespace cudev
const int PIXELS_PER_THREAD = 8;
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
@ -824,9 +824,9 @@ namespace cv { namespace gpu { namespace cudev
const float thetaScale = levels / (2.0f * CV_PI_F);
buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
@ -877,9 +877,9 @@ namespace cv { namespace gpu { namespace cudev
const float thetaScale = levels / (2.0f * CV_PI_F);
GHT_Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void GHT_Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize, const float dp, const int threshold)
@ -911,22 +911,22 @@ namespace cv { namespace gpu { namespace cudev
int GHT_Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
@ -989,9 +989,9 @@ namespace cv { namespace gpu { namespace cudev
hist, rows, cols,
minScale, scaleStep, scaleRange,
idp, thetaScale);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void GHT_Ballard_PosScale_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int scaleRange,
@ -1037,22 +1037,22 @@ namespace cv { namespace gpu { namespace cudev
float minScale, float scaleStep, float dp, int threshold)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosScale_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_PosScale_findPosInHist<<<grid, block>>>(hist, rows, cols, scaleRange, out, votes, maxSize, minScale, scaleStep, dp, threshold);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
@ -1123,9 +1123,9 @@ namespace cv { namespace gpu { namespace cudev
hist, rows, cols,
minAngle, angleStep, angleRange,
idp, thetaScale);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void GHT_Ballard_PosRotation_findPosInHist(const PtrStepi hist, const int rows, const int cols, const int angleRange,
@ -1171,22 +1171,22 @@ namespace cv { namespace gpu { namespace cudev
float minAngle, float angleStep, float dp, int threshold)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Ballard_PosRotation_findPosInHist, cudaFuncCachePreferL1) );
GHT_Ballard_PosRotation_findPosInHist<<<grid, block>>>(hist, rows, cols, angleRange, out, votes, maxSize, minAngle, angleStep, dp, threshold);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
@ -1242,7 +1242,7 @@ namespace cv { namespace gpu { namespace cudev
tbl.r2_data = r2.data;
tbl.r2_step = r2.step;
cvC udaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
}
void GHT_Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
{
@ -1266,7 +1266,7 @@ namespace cv { namespace gpu { namespace cudev
tbl.r2_data = r2.data;
tbl.r2_step = r2.step;
cvC udaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
}
struct TemplFeatureTable
@ -1419,9 +1419,9 @@ namespace cv { namespace gpu { namespace cudev
sizes, maxSize,
xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
center, maxDist);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
thrust::device_ptr<int> sizesPtr(sizes);
thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, cudev::bind2nd(cudev::minimum<int>(), maxSize));
@ -1501,9 +1501,9 @@ namespace cv { namespace gpu { namespace cudev
GHT_Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
minAngle, maxAngle, 1.0f / angleStep, angleRange);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void GHT_Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
@ -1566,9 +1566,9 @@ namespace cv { namespace gpu { namespace cudev
GHT_Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
angle, angleEpsilon,
minScale, maxScale, iScaleStep, scaleRange);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void GHT_Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
@ -1636,14 +1636,14 @@ namespace cv { namespace gpu { namespace cudev
const float sinVal = ::sinf(angle);
const float cosVal = ::cosf(angle);
cvC udaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_calcPHist, cudaFuncCachePreferL1) );
GHT_Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
angle, sinVal, cosVal, angleEpsilon, scale,
1.0f / dp);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void GHT_Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
@ -1679,24 +1679,24 @@ namespace cv { namespace gpu { namespace cudev
float dp, int threshold)
{
void* counterPtr;
cvC udaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cvC udaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cvC udaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
cudaSafeCall( cudaFuncSetCacheConfig(GHT_Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
GHT_Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
angle, angleVotes, scale, scaleVotes,
dp, threshold);
cvC udaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cvC udaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cvC udaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);