added ROI support to ocl::columnSum

pull/1633/head
Ilya Lavrenov 11 years ago
parent 60f9ba0c64
commit 110a92c126
  1. 15
      modules/ocl/src/columnsum.cpp
  2. 28
      modules/ocl/src/opencl/imgproc_columnsum.cl

@ -52,25 +52,24 @@ using namespace cv::ocl;
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
{
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
Context *clCxt = src.clCxt;
const std::string kernelName = "columnSum";
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
std::vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {dst.cols, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
}

@ -43,38 +43,28 @@
//
//M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
#if defined (__ATI__)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (__NVIDIA__)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
////////////////////////////////////////////////////////////////////
///////////////////////// columnSum ////////////////////////////////
////////////////////////////////////////////////////////////////////
/// CV_32FC1
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep)
__kernel void columnSum_C1_D5(__global float * src, __global float * dst,
int cols, int rows, int src_step, int dst_step, int src_offset, int dst_offset)
{
const int x = get_global_id(0);
srcStep >>= 2;
dstStep >>= 2;
if (x < srcCols)
if (x < cols)
{
int srcIdx = x ;
int dstIdx = x ;
int srcIdx = x + src_offset;
int dstIdx = x + dst_offset;
float sum = 0;
for (int y = 0; y < srcRows; ++y)
for (int y = 0; y < rows; ++y)
{
sum += src[srcIdx];
dst[dstIdx] = sum;
srcIdx += srcStep;
dstIdx += dstStep;
srcIdx += src_step;
dstIdx += dst_step;
}
}
}

Loading…
Cancel
Save