|
|
|
@ -1585,30 +1585,31 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st |
|
|
|
|
{ |
|
|
|
|
dst.create(src.size(), src.type()); |
|
|
|
|
|
|
|
|
|
int channels = dst.oclchannels(), depth = dst.depth(); |
|
|
|
|
|
|
|
|
|
size_t vector_length = 1; |
|
|
|
|
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); |
|
|
|
|
int cols = divUp(dst.cols * channels + offset_cols, vector_length); |
|
|
|
|
int rows = dst.rows; |
|
|
|
|
|
|
|
|
|
size_t localThreads[3] = { 16, 16, 1 }; |
|
|
|
|
size_t globalThreads[3] = { cols, rows, 1 }; |
|
|
|
|
size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; |
|
|
|
|
|
|
|
|
|
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); |
|
|
|
|
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); |
|
|
|
|
int temp1_step = temp1.step / temp1.elemSize(), temp1_offset = temp1.offset / temp1.elemSize(); |
|
|
|
|
|
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&temp1_offset )); |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); |
|
|
|
|
openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, dst.depth()); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) |
|
|
|
|
{ |
|
|
|
|
CV_Assert(x.depth() == CV_32F && t.depth() == CV_32F); |
|
|
|
|