Refactoring and optimization

pull/3279/head
Alexander Karsakov 10 years ago
parent d59a6fa518
commit 39b27a19be
  1. 114
      modules/imgproc/src/hough.cpp
  2. 5
      modules/imgproc/src/opencl/hough_lines.cl

@ -652,80 +652,102 @@ HoughLinesProbabilistic( Mat& image,
}
}
}
static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double min_theta, double max_theta)
static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters)
{
CV_Assert(_src.type() == CV_8UC1);
if (max_theta < 0 || max_theta > CV_PI ) {
CV_Error( CV_StsBadArg, "max_theta must fall between 0 and pi" );
}
if (min_theta < 0 || min_theta > max_theta ) {
CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
}
UMat src = _src.getUMat();
float irho = (float) (1 / rho);
int numangle = cvRound((max_theta - min_theta) / theta);
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
_pointsList.create(1, (int) src.total(), CV_32SC1);
UMat pointsList = _pointsList.getUMat();
UMat counters = _counters.getUMat();
ocl::Device dev = ocl::Device::getDefault();
// make list of nonzero points
const int pixelsPerWI = 8;
const int pixelsPerWI = 16;
int workgroup_size = min((int) dev.maxWorkGroupSize(), (src.cols + pixelsPerWI - 1)/pixelsPerWI);
ocl::Kernel pointListKernel("make_point_list", ocl::imgproc::hough_lines_oclsrc,
format("-D MAKE_POINTS_LIST -D GROUP_SIZE=%d -D LOCAL_SIZE=%d", workgroup_size, src.cols));
if (pointListKernel.empty())
return false;
UMat pointsList(1, (int) src.total(), CV_32SC1);
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
pointListKernel.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnlyNoSize(pointsList),
ocl::KernelArg::PtrWriteOnly(counters));
size_t localThreads[2] = { workgroup_size, 1 };
size_t globalThreads[2] = { workgroup_size, src.rows };
if (!pointListKernel.run(2, globalThreads, localThreads, false))
return false;
return pointListKernel.run(2, globalThreads, localThreads, false);
}
int total_points = counters.getMat(ACCESS_READ).at<int>(0, 0);
if (total_points <= 0)
{
_lines.assign(UMat(0,0,CV_32FC2));
return true;
}
static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_points, double rho, double theta, int numrho, int numangle)
{
UMat pointsList = _pointsList.getUMat();
_accum.create(numangle + 2, numrho + 2, CV_32SC1);
UMat accum = _accum.getUMat();
ocl::Device dev = ocl::Device::getDefault();
// convert src image to hough space
UMat accum(numangle + 2, numrho + 2, CV_32SC1);
workgroup_size = min((int) dev.maxWorkGroupSize(), total_points);
float irho = (float) (1 / rho);
int workgroup_size = min((int) dev.maxWorkGroupSize(), total_points);
ocl::Kernel fillAccumKernel;
size_t* fillAccumLT = NULL;
size_t localThreads[2];
size_t globalThreads[2];
int local_memory_needed = (numrho + 2)*sizeof(int);
if (local_memory_needed > dev.localMemSize())
{
accum.setTo(Scalar::all(0));
fillAccumKernel.create("fill_accum_global", ocl::imgproc::hough_lines_oclsrc,
format("-D FILL_ACCUM_GLOBAL"));
if (fillAccumKernel.empty())
return false;
globalThreads[0] = workgroup_size; globalThreads[1] = numangle;
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
total_points, irho, (float) theta, numrho, numangle);
return fillAccumKernel.run(2, globalThreads, NULL, false);
}
else
{
fillAccumKernel.create("fill_accum_local", ocl::imgproc::hough_lines_oclsrc,
format("-D FILL_ACCUM_LOCAL -D LOCAL_SIZE=%d -D BUFFER_SIZE=%d", workgroup_size, numrho + 2));
if (fillAccumKernel.empty())
return false;
localThreads[0] = workgroup_size; localThreads[1] = 1;
globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2;
fillAccumLT = localThreads;
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
total_points, irho, (float) theta, numrho, numangle);
return fillAccumKernel.run(2, globalThreads, localThreads, false);
}
}
static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double min_theta, double max_theta)
{
CV_Assert(_src.type() == CV_8UC1);
if (max_theta < 0 || max_theta > CV_PI ) {
CV_Error( CV_StsBadArg, "max_theta must fall between 0 and pi" );
}
if (fillAccumKernel.empty())
if (min_theta < 0 || min_theta > max_theta ) {
CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
}
UMat src = _src.getUMat();
int numangle = cvRound((max_theta - min_theta) / theta);
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
UMat pointsList;
UMat counters(1, 2, CV_32SC1, Scalar::all(0));
if (!ocl_makePointsList(src, pointsList, counters))
return false;
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
total_points, irho, (float) theta, numrho, numangle);
int total_points = counters.getMat(ACCESS_READ).at<int>(0, 0);
if (total_points <= 0)
{
_lines.assign(UMat(0,0,CV_32FC2));
return true;
}
if (!fillAccumKernel.run(2, globalThreads, fillAccumLT, false))
UMat accum;
if (!ocl_fillAccum(pointsList, accum, total_points, rho, theta, numrho, numangle))
return false;
const int pixPerWI = 8;
@ -741,7 +763,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
ocl::KernelArg::PtrWriteOnly(counters), linesMax, threshold, (float) rho, (float) theta);
globalThreads[0] = (numrho + pixPerWI - 1)/pixPerWI; globalThreads[1] = numangle;
size_t globalThreads[2] = { (numrho + pixPerWI - 1)/pixPerWI, numangle };
if (!getLinesKernel.run(2, globalThreads, NULL, false))
return false;
@ -753,13 +775,23 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
return true;
}
static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, double theta, int threshold,
double minLineLength, double maxGap)
{
CV_Assert(_src.type() == CV_8UC1);
UMat src = _src.getUMat();
return false;
}
}
void cv::HoughLines( InputArray _image, OutputArray _lines,
double rho, double theta, int threshold,
double srn, double stn, double min_theta, double max_theta )
{
CV_OCL_RUN(srn == 0 && stn == 0 && _lines.isUMat(),
CV_OCL_RUN(srn == 0 && stn == 0 && _image.isUMat() && _lines.isUMat(),
ocl_HoughLines(_image, _lines, rho, theta, threshold, min_theta, max_theta));
Mat image = _image.getMat();
@ -778,6 +810,8 @@ void cv::HoughLinesP(InputArray _image, OutputArray _lines,
double rho, double theta, int threshold,
double minLineLength, double maxGap )
{
CV_OCL_RUN(_image.isUMat() && _lines.isUMat(), ocl_HoughLinesP(_image, _lines, rho, theta, threshold, minLineLength, maxGap));
Mat image = _image.getMat();
std::vector<Vec4i> lines;
HoughLinesProbabilistic(image, (float)rho, (float)theta, threshold, cvRound(minLineLength), cvRound(maxGap), lines, INT_MAX);

@ -107,14 +107,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
__global const int * list = (__global const int*)(list_ptr + list_offset);
const int shift = (numrho - 1) / 2;
for (int i = count_idx; i < total_points; i += LOCAL_SIZE)
{
const int point = list[i];
const int x = (point & 0xFFFF);
const int y = (point >> 16) & 0xFFFF;
const int y = point >> 16;
int r = convert_int_rte(x * cosVal + y * sinVal) + shift;
int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
atomic_inc(l_accum + r + 1);
}

Loading…
Cancel
Save