fixed minMaxLoc kernel (removed compilation errors)

pull/1503/head
Ilya Lavrenov 12 years ago
parent 544c02407e
commit 0faac595a8
  1. 28
      modules/ocl/src/arithm.cpp
  2. 158
      modules/ocl/src/opencl/arithm_minMaxLoc.cl

@ -380,7 +380,7 @@ Scalar cv::ocl::sum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
static sumFunc functab[2] =
{
@ -397,7 +397,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
static sumFunc functab[2] =
{
@ -414,7 +414,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
static sumFunc functab[2] =
{
@ -519,8 +519,6 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl
args.push_back( make_pair( sizeof(cl_int) , (void *)&moffset ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
// printf("elemnum:%d,cols:%d,invalid_cols:%d,offset:%d,minvalid_cols:%d,moffset:%d,repeat_e:%d\r\n",
// elemnum,cols,invalid_cols,offset,minvalid_cols,moffset,repeat_me);
openCLExecuteKernel(src.clCxt, &arithm_minMax_mask, kernelName, gt, lt, args, -1, -1, build_options);
}
}
@ -579,7 +577,7 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons
CV_Assert(src.oclchannels() == 1);
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
static minMaxFunc functab[8] =
{
@ -665,7 +663,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
@ -712,7 +710,7 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
@ -879,7 +877,7 @@ static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
@ -923,7 +921,7 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
@ -979,7 +977,7 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
@ -1032,7 +1030,7 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d
{
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
return;
}
@ -1143,6 +1141,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
openCLExecuteKernel(src.clCxt, &arithm_minMaxLoc_mask, "arithm_op_minMaxLoc_mask", gt, lt, args, -1, -1, build_options);
}
}
template <typename T>
void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
@ -1211,7 +1210,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
{
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
return;
}
@ -1261,10 +1260,9 @@ int cv::ocl::countNonZero(const oclMat &src)
size_t groupnum = src.clCxt->computeUnits();
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "select device don't support double");
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
}
CV_Assert(groupnum != 0);
// groupnum = groupnum * 2;
int vlen = 8 , dbsize = groupnum * vlen;
Context *clCxt = src.clCxt;
string kernelName = "arithm_op_nonzero";

@ -142,6 +142,7 @@
#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics:enable
/**************************************Array minMax**************************************/
__kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int elemnum, int groupnum,
__global VEC_TYPE *src, __global RES_TYPE *dst)
{
@ -149,16 +150,21 @@ __kernel void arithm_op_minMaxLoc (int cols,int invalid_cols,int offset,int elem
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
__local VEC_TYPE localmem_max[128], localmem_min[128];
VEC_TYPE minval, maxval, temp;
__local VEC_TYPE_LOC localmem_maxloc[128], localmem_minloc[128];
VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1;
int idx_c;
if (id < elemnum)
{
temp = src[idx];
idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c, idx_c + 1, idx_c + 2, idx_c + 3);
if (id % cols == 0 )
{
repeat_s(temp);
@ -181,13 +187,15 @@ __kernel void arithm_op_minMaxLoc (int cols,int invalid_cols,int offset,int elem
minloc = negative;
maxloc = negative;
}
float4 aaa;
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
int grainSize = (groupnum << 8);
for (id = id + grainSize; id < elemnum; id = id + grainSize)
{
idx = offset + id + (id / cols) * invalid_cols;
temp = src[idx];
idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c, idx_c+1, idx_c+2, idx_c+3);
if (id % cols == 0 )
{
repeat_s(temp);
@ -198,146 +206,13 @@ __kernel void arithm_op_minMaxLoc (int cols,int invalid_cols,int offset,int elem
repeat_e(temp);
repeat_e(temploc);
}
minval = min(minval, temp);
maxval = max(maxval, temp);
minloc = CONDITION_FUNC(minval == temp, temploc, minloc);
maxloc = CONDITION_FUNC(maxval == temp, temploc, maxloc);
aaa= convert_float4(maxval == temp);
maxloc = convert_int4(aaa) ? temploc : maxloc;
}
if(lid > 127)
{
localmem_min[lid - 128] = minval;
localmem_max[lid - 128] = maxval;
localmem_minloc[lid - 128] = minloc;
localmem_maxloc[lid - 128] = maxloc;
}
barrier(CLK_LOCAL_MEM_FENCE);
if(lid < 128)
{
localmem_min[lid] = min(minval,localmem_min[lid]);
localmem_max[lid] = max(maxval,localmem_max[lid]);
localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc , localmem_minloc[lid]);
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc , localmem_maxloc[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int lsize = 64; lsize > 0; lsize >>= 1)
{
if(lid < lsize)
{
int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid] , localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid] , localmem_max[lid2]);
localmem_minloc[lid] =
CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2] , localmem_minloc[lid]);
localmem_maxloc[lid] =
CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2] , localmem_maxloc[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if( lid == 0)
{
dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
dst[gid + groupnum] = CONVERT_RES_TYPE(localmem_max[0]);
dst[gid + 2 * groupnum] = CONVERT_RES_TYPE(localmem_minloc[0]);
dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
}
}
#if defined (REPEAT_S0)
#define repeat_ms(a) a = a;
#endif
#if defined (REPEAT_S1)
#define repeat_ms(a) a.s0 = 0;
#endif
#if defined (REPEAT_S2)
#define repeat_ms(a) a.s0 = 0;a.s1 = 0;
#endif
#if defined (REPEAT_S3)
#define repeat_ms(a) a.s0 = 0;a.s1 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E0)
#define repeat_me(a) a = a;
#endif
#if defined (REPEAT_E1)
#define repeat_me(a) a.s3 = 0;
#endif
#if defined (REPEAT_E2)
#define repeat_me(a) a.s3 = 0;a.s2 = 0;
#endif
#if defined (REPEAT_E3)
#define repeat_me(a) a.s3 = 0;a.s2 = 0;a.s1 = 0;
#endif
/**************************************Array minMaxLoc mask**************************************/
/*
__kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int elemnum,int groupnum,__global VEC_TYPE *src,
int minvalid_cols,int moffset,__global uchar4 *mask,__global RES_TYPE *dst)
{
unsigned int lid = get_local_id(0);
unsigned int gid = get_group_id(0);
unsigned int id = get_global_id(0);
unsigned int idx = offset + id + (id / cols) * invalid_cols;
unsigned int midx = moffset + id + (id / cols) * minvalid_cols;
__local VEC_TYPE localmem_max[128],localmem_min[128];
VEC_TYPE minval,maxval,temp,max_val = MAX_VAL,min_val = MIN_VAL,zero = 0,m_temp;
__local VEC_TYPE_LOC localmem_maxloc[128],localmem_minloc[128];
VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1;
if(id < elemnum)
{
temp = src[idx];
m_temp = CONVERT_TYPE(mask[midx]);
int idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
if(id % cols == 0 )
{
repeat_ms(m_temp);
repeat_s(temploc);
}
if(id % cols == cols - 1)
{
repeat_me(m_temp);
repeat_e(temploc);
}
minval = m_temp > zero ? temp : max_val;
maxval = m_temp > zero ? temp : min_val;
minloc = CONDITION_FUNC(m_temp > zero, temploc , negative);
maxloc = minloc;
}
else
{
minval = MAX_VAL;
maxval = MIN_VAL;
minloc = negative;
maxloc = negative;
}
for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
{
idx = offset + id + (id / cols) * invalid_cols;
midx = moffset + id + (id / cols) * minvalid_cols;
temp = src[idx];
m_temp = CONVERT_TYPE(mask[midx]);
int idx_c = idx << 2;
temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
if(id % cols == 0 )
{
repeat_ms(m_temp);
repeat_s(temploc);
}
if(id % cols == cols - 1)
{
repeat_me(m_temp);
repeat_e(temploc);
}
minval = min(minval,m_temp > zero ? temp : max_val);
maxval = max(maxval,m_temp > zero ? temp : min_val);
temploc = CONDITION_FUNC(m_temp > zero, temploc , negative);
minloc = CONDITION_FUNC(minval == temp, temploc , minloc);
maxloc = CONDITION_FUNC(maxval == temp, temploc , maxloc);
}
if (lid > 127)
{
localmem_min[lid - 128] = minval;
@ -346,6 +221,7 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
localmem_maxloc[lid - 128] = maxloc;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (lid < 128)
{
localmem_min[lid] = min(minval,localmem_min[lid]);
@ -354,6 +230,7 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc, localmem_maxloc[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int lsize = 64; lsize > 0; lsize >>= 1)
{
if (lid < lsize)
@ -361,13 +238,12 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
int lid2 = lsize + lid;
localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
localmem_minloc[lid] =
CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2] , localmem_minloc[lid]);
localmem_maxloc[lid] =
CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2] , localmem_maxloc[lid]);
localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2], localmem_minloc[lid]);
localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2], localmem_maxloc[lid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if ( lid == 0)
{
dst[gid] = CONVERT_RES_TYPE(localmem_min[0]);
@ -376,5 +252,3 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
dst[gid + 3 * groupnum] = CONVERT_RES_TYPE(localmem_maxloc[0]);
}
}
*/

Loading…
Cancel
Save