|
|
|
@ -975,10 +975,12 @@ namespace cv |
|
|
|
|
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(src.type() == CV_8UC1); |
|
|
|
|
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) |
|
|
|
|
if(!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F) |
|
|
|
|
{ |
|
|
|
|
CV_Error(CV_GpuNotSupported, "select device don't support double"); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int vlen = 4; |
|
|
|
|
int offset = src.offset / vlen; |
|
|
|
|
int pre_invalid = src.offset % vlen; |
|
|
|
@ -986,50 +988,45 @@ namespace cv |
|
|
|
|
|
|
|
|
|
oclMat t_sum , t_sqsum; |
|
|
|
|
int w = src.cols + 1, h = src.rows + 1; |
|
|
|
|
int depth; |
|
|
|
|
if( src.cols * src.rows <= 2901 * 2901 ) //2901 is the maximum size for int when all values are 255
|
|
|
|
|
{ |
|
|
|
|
t_sum.create(src.cols, src.rows, CV_32SC1); |
|
|
|
|
sum.create(h, w, CV_32SC1); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
//Use float to prevent overflow
|
|
|
|
|
t_sum.create(src.cols, src.rows, CV_32FC1); |
|
|
|
|
sum.create(h, w, CV_32FC1); |
|
|
|
|
} |
|
|
|
|
t_sqsum.create(src.cols, src.rows, CV_32FC1); |
|
|
|
|
sqsum.create(h, w, CV_32FC1); |
|
|
|
|
depth = sum.depth(); |
|
|
|
|
int sum_offset = sum.offset / vlen; |
|
|
|
|
int sqsum_offset = sqsum.offset / vlen; |
|
|
|
|
|
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); |
|
|
|
|
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); |
|
|
|
|
args.clear(); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); |
|
|
|
|
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); |
|
|
|
|
int depth = src.depth() == CV_8U ? CV_32S : CV_64F; |
|
|
|
|
int type = CV_MAKE_TYPE(depth, 1); |
|
|
|
|
|
|
|
|
|
t_sum.create(src.cols, src.rows, type); |
|
|
|
|
sum.create(h, w, type); |
|
|
|
|
|
|
|
|
|
t_sqsum.create(src.cols, src.rows, CV_32FC1); |
|
|
|
|
sqsum.create(h, w, CV_32FC1); |
|
|
|
|
|
|
|
|
|
int sum_offset = sum.offset / vlen; |
|
|
|
|
int sqsum_offset = sqsum.offset / vlen; |
|
|
|
|
|
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); |
|
|
|
|
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth); |
|
|
|
|
|
|
|
|
|
args.clear(); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sqsum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sqsum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset)); |
|
|
|
|
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void integral(const oclMat &src, oclMat &sum) |
|
|
|
@ -1042,39 +1039,35 @@ namespace cv |
|
|
|
|
|
|
|
|
|
oclMat t_sum; |
|
|
|
|
int w = src.cols + 1, h = src.rows + 1; |
|
|
|
|
int depth; |
|
|
|
|
if(src.cols * src.rows <= 2901 * 2901) |
|
|
|
|
{ |
|
|
|
|
t_sum.create(src.cols, src.rows, CV_32SC1); |
|
|
|
|
sum.create(h, w, CV_32SC1); |
|
|
|
|
}else |
|
|
|
|
{ |
|
|
|
|
t_sum.create(src.cols, src.rows, CV_32FC1); |
|
|
|
|
sum.create(h, w, CV_32FC1); |
|
|
|
|
} |
|
|
|
|
depth = sum.depth(); |
|
|
|
|
int sum_offset = sum.offset / vlen; |
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); |
|
|
|
|
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); |
|
|
|
|
args.clear(); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); |
|
|
|
|
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); |
|
|
|
|
int depth = src.depth() == CV_8U ? CV_32S : CV_32F; |
|
|
|
|
int type = CV_MAKE_TYPE(depth, 1); |
|
|
|
|
|
|
|
|
|
t_sum.create(src.cols, src.rows, type); |
|
|
|
|
sum.create(h, w, type); |
|
|
|
|
|
|
|
|
|
int sum_offset = sum.offset / vlen; |
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&pre_invalid )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step)); |
|
|
|
|
size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth); |
|
|
|
|
|
|
|
|
|
args.clear(); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset)); |
|
|
|
|
size_t gt2[3] = {t_sum.cols * 32, 1, 1}, lt2[3] = {256, 1, 1}; |
|
|
|
|
openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/////////////////////// corner //////////////////////////////
|
|
|
|
|