|
|
|
@ -120,6 +120,7 @@ namespace cv |
|
|
|
|
extern const char *operator_convertTo; |
|
|
|
|
extern const char *operator_setTo; |
|
|
|
|
extern const char *operator_setToM; |
|
|
|
|
extern const char *convertC3C4; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -127,43 +128,98 @@ namespace cv |
|
|
|
|
// convert_C3C4
|
|
|
|
|
void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep) |
|
|
|
|
{ |
|
|
|
|
int dstStep = dst.step1() / dst.channels(); |
|
|
|
|
int dstStep_in_pixel = dst.step1() / dst.channels(); |
|
|
|
|
int pixel_end = dst.wholecols * dst.wholerows -1; |
|
|
|
|
Context *clCxt = dst.clCxt; |
|
|
|
|
string kernelName = "convertC3C4"; |
|
|
|
|
|
|
|
|
|
char compile_option[32]; |
|
|
|
|
switch(dst.depth()) |
|
|
|
|
{ |
|
|
|
|
case 0: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=uchar4"); |
|
|
|
|
break; |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=char4"); |
|
|
|
|
break; |
|
|
|
|
case 2: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=ushort4"); |
|
|
|
|
break; |
|
|
|
|
case 3: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=short4"); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=int4"); |
|
|
|
|
break; |
|
|
|
|
case 5: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=float4"); |
|
|
|
|
break; |
|
|
|
|
case 6: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=double4"); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unknown depth"); |
|
|
|
|
} |
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.wholecols)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.wholerows)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); |
|
|
|
|
|
|
|
|
|
size_t globalThreads[3] = {(dst.wholecols *dst.wholerows + 255) / 256 * 256, 1, 1}; |
|
|
|
|
size_t globalThreads[3] = {((dst.wholecols *dst.wholerows+3)/4 + 255) / 256 * 256, 1, 1}; |
|
|
|
|
size_t localThreads[3] = {256, 1, 1}; |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, dst.elemSize1() >> 1); |
|
|
|
|
openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1,compile_option); |
|
|
|
|
} |
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// convert_C4C3
|
|
|
|
|
void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep) |
|
|
|
|
{ |
|
|
|
|
int srcStep = src.step1() / src.channels(); |
|
|
|
|
int srcStep_in_pixel = src.step1() / src.channels(); |
|
|
|
|
int pixel_end = src.wholecols*src.wholerows -1; |
|
|
|
|
Context *clCxt = src.clCxt; |
|
|
|
|
string kernelName = "convertC4C3"; |
|
|
|
|
char compile_option[32]; |
|
|
|
|
switch(src.depth()) |
|
|
|
|
{ |
|
|
|
|
case 0: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=uchar4"); |
|
|
|
|
break; |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=char4"); |
|
|
|
|
break; |
|
|
|
|
case 2: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=ushort4"); |
|
|
|
|
break; |
|
|
|
|
case 3: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=short4"); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=int4"); |
|
|
|
|
break; |
|
|
|
|
case 5: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=float4"); |
|
|
|
|
break; |
|
|
|
|
case 6: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=double4"); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unknown depth"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
vector< pair<size_t, const void *> > args; |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel)); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); |
|
|
|
|
|
|
|
|
|
size_t globalThreads[3] = {(src.wholecols *src.wholerows + 255) / 256 * 256, 1, 1}; |
|
|
|
|
size_t globalThreads[3] = {((src.wholecols *src.wholerows+3)/4 + 255) / 256 * 256, 1, 1}; |
|
|
|
|
size_t localThreads[3] = {256, 1, 1}; |
|
|
|
|
|
|
|
|
|
openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, src.elemSize1() >> 1); |
|
|
|
|
openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1,compile_option); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void cv::ocl::oclMat::upload(const Mat &m) |
|
|
|
@ -173,23 +229,47 @@ void cv::ocl::oclMat::upload(const Mat &m) |
|
|
|
|
Point ofs; |
|
|
|
|
m.locateROI(wholeSize, ofs); |
|
|
|
|
int type = m.type(); |
|
|
|
|
//if(m.channels() == 3)
|
|
|
|
|
//type = CV_MAKETYPE(m.depth(), 4);
|
|
|
|
|
if(m.channels() == 3) |
|
|
|
|
{ |
|
|
|
|
type = CV_MAKETYPE(m.depth(), 4); |
|
|
|
|
} |
|
|
|
|
create(wholeSize, type); |
|
|
|
|
|
|
|
|
|
//if(m.channels() == 3)
|
|
|
|
|
//{
|
|
|
|
|
//int pitch = GPU_MATRIX_MALLOC_STEP(wholeSize.width * 3 * m.elemSize1());
|
|
|
|
|
//int err;
|
|
|
|
|
//cl_mem temp = clCreateBuffer(clCxt->clContext,CL_MEM_READ_WRITE,
|
|
|
|
|
//pitch*wholeSize.height,0,&err);
|
|
|
|
|
//CV_DbgAssert(err==0);
|
|
|
|
|
|
|
|
|
|
//openCLMemcpy2D(clCxt,temp,pitch,m.datastart,m.step,wholeSize.width*m.elemSize(),wholeSize.height,clMemcpyHostToDevice);
|
|
|
|
|
//convert_C3C4(temp, *this, pitch);
|
|
|
|
|
//}
|
|
|
|
|
//else
|
|
|
|
|
openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice); |
|
|
|
|
if(m.channels() == 3) |
|
|
|
|
{ |
|
|
|
|
int pitch = wholeSize.width * 3 * m.elemSize1(); |
|
|
|
|
int tail_padding = m.elemSize1()*3072; |
|
|
|
|
int err; |
|
|
|
|
cl_mem temp = clCreateBuffer(clCxt->impl->clContext,CL_MEM_READ_WRITE, |
|
|
|
|
(pitch*wholeSize.height+tail_padding-1)/tail_padding*tail_padding,0,&err); |
|
|
|
|
openCLVerifyCall(err); |
|
|
|
|
|
|
|
|
|
openCLMemcpy2D(clCxt,temp,pitch,m.datastart,m.step,wholeSize.width*m.elemSize(),wholeSize.height,clMemcpyHostToDevice,3); |
|
|
|
|
convert_C3C4(temp, *this, pitch); |
|
|
|
|
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
|
|
|
|
|
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
|
|
|
|
|
// 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
|
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
|
|
|
|
|
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
|
|
|
|
|
//for(int i=0;i<wholeSize.height;i++)
|
|
|
|
|
//{
|
|
|
|
|
// int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
|
|
|
|
|
// for(int j=0;j<wholeSize.width;j++)
|
|
|
|
|
// {
|
|
|
|
|
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
|
|
|
|
|
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
|
|
|
|
|
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
|
|
|
|
|
// }
|
|
|
|
|
//}
|
|
|
|
|
//delete []cputemp;
|
|
|
|
|
//delete []cpudata;
|
|
|
|
|
openCLSafeCall(clReleaseMemObject(temp)); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
rows = m.rows; |
|
|
|
|
cols = m.cols; |
|
|
|
@ -201,23 +281,47 @@ void cv::ocl::oclMat::download(cv::Mat &m) const |
|
|
|
|
{ |
|
|
|
|
CV_DbgAssert(!this->empty()); |
|
|
|
|
int t = type(); |
|
|
|
|
//if(download_channels == 3)
|
|
|
|
|
//t = CV_MAKETYPE(depth(), 3);
|
|
|
|
|
if(download_channels == 3) |
|
|
|
|
{ |
|
|
|
|
t = CV_MAKETYPE(depth(), 3); |
|
|
|
|
} |
|
|
|
|
m.create(wholerows, wholecols, t); |
|
|
|
|
|
|
|
|
|
//if(download_channels == 3)
|
|
|
|
|
//{
|
|
|
|
|
//int pitch = GPU_MATRIX_MALLOC_STEP(wholecols * 3 * m.elemSize1());
|
|
|
|
|
//int err;
|
|
|
|
|
//cl_mem temp = clCreateBuffer(clCxt->clContext,CL_MEM_READ_WRITE,
|
|
|
|
|
//pitch*wholerows,0,&err);
|
|
|
|
|
//CV_DbgAssert(err==0);
|
|
|
|
|
|
|
|
|
|
//convert_C4C3(*this, temp, pitch/m.elemSize1());
|
|
|
|
|
//openCLMemcpy2D(clCxt,m.data,m.step,temp,pitch,wholecols*m.elemSize(),wholerows,clMemcpyDeviceToHost);
|
|
|
|
|
//}
|
|
|
|
|
//else
|
|
|
|
|
openCLMemcpy2D(clCxt, m.data, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost); |
|
|
|
|
if(download_channels == 3) |
|
|
|
|
{ |
|
|
|
|
int pitch = wholecols * 3 * m.elemSize1(); |
|
|
|
|
int tail_padding = m.elemSize1()*3072; |
|
|
|
|
int err; |
|
|
|
|
cl_mem temp = clCreateBuffer(clCxt->impl->clContext,CL_MEM_READ_WRITE, |
|
|
|
|
(pitch*wholerows+tail_padding-1)/tail_padding*tail_padding,0,&err); |
|
|
|
|
openCLVerifyCall(err); |
|
|
|
|
|
|
|
|
|
convert_C4C3(*this, temp, pitch/m.elemSize1()); |
|
|
|
|
openCLMemcpy2D(clCxt,m.data,m.step,temp,pitch,wholecols*m.elemSize(),wholerows,clMemcpyDeviceToHost,3); |
|
|
|
|
//int* cputemp=new int[wholecols*wholerows * 3];
|
|
|
|
|
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
|
|
|
|
|
// 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL));
|
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
|
|
|
|
|
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
|
|
|
|
|
//for(int i=0;i<wholerows;i++)
|
|
|
|
|
//{
|
|
|
|
|
// int *a = cputemp+i*wholecols * 3,*b = cpudata + i*this->step/sizeof(int);
|
|
|
|
|
// for(int j=0;j<wholecols;j++)
|
|
|
|
|
// {
|
|
|
|
|
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
|
|
|
|
|
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
|
|
|
|
|
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
|
|
|
|
|
// }
|
|
|
|
|
//}
|
|
|
|
|
//delete []cputemp;
|
|
|
|
|
//delete []cpudata;
|
|
|
|
|
openCLSafeCall(clReleaseMemObject(temp)); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
openCLMemcpy2D(clCxt, m.data, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost); |
|
|
|
|
} |
|
|
|
|
Size wholesize; |
|
|
|
|
Point ofs; |
|
|
|
|
locateROI(wholesize, ofs); |
|
|
|
@ -373,11 +477,7 @@ oclMat &cv::ocl::oclMat::operator = (const Scalar &s) |
|
|
|
|
void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName) |
|
|
|
|
{ |
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
cl_float4 val; |
|
|
|
|
val.s[0] = scalar.val[0]; |
|
|
|
|
val.s[1] = scalar.val[1]; |
|
|
|
|
val.s[2] = scalar.val[2]; |
|
|
|
|
val.s[3] = scalar.val[3]; |
|
|
|
|
|
|
|
|
|
size_t localThreads[3] = {16, 16, 1}; |
|
|
|
|
size_t globalThreads[3]; |
|
|
|
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; |
|
|
|
@ -388,25 +488,168 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern |
|
|
|
|
{ |
|
|
|
|
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; |
|
|
|
|
} |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val )); |
|
|
|
|
char compile_option[32]; |
|
|
|
|
union sc |
|
|
|
|
{ |
|
|
|
|
cl_uchar4 uval; |
|
|
|
|
cl_char4 cval; |
|
|
|
|
cl_ushort4 usval; |
|
|
|
|
cl_short4 shval; |
|
|
|
|
cl_int4 ival; |
|
|
|
|
cl_float4 fval; |
|
|
|
|
cl_double4 dval; |
|
|
|
|
}val; |
|
|
|
|
switch(dst.depth()) |
|
|
|
|
{ |
|
|
|
|
case 0: |
|
|
|
|
val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]); |
|
|
|
|
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]); |
|
|
|
|
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]); |
|
|
|
|
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=uchar"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=uchar4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 1: |
|
|
|
|
val.cval.s[0] = saturate_cast<char>(scalar.val[0]); |
|
|
|
|
val.cval.s[1] = saturate_cast<char>(scalar.val[1]); |
|
|
|
|
val.cval.s[2] = saturate_cast<char>(scalar.val[2]); |
|
|
|
|
val.cval.s[3] = saturate_cast<char>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=char"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=char4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 2: |
|
|
|
|
val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]); |
|
|
|
|
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]); |
|
|
|
|
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]); |
|
|
|
|
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=ushort"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=ushort4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 3: |
|
|
|
|
val.shval.s[0] = saturate_cast<short>(scalar.val[0]); |
|
|
|
|
val.shval.s[1] = saturate_cast<short>(scalar.val[1]); |
|
|
|
|
val.shval.s[2] = saturate_cast<short>(scalar.val[2]); |
|
|
|
|
val.shval.s[3] = saturate_cast<short>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=short"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=short4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
val.ival.s[0] = saturate_cast<int>(scalar.val[0]); |
|
|
|
|
val.ival.s[1] = saturate_cast<int>(scalar.val[1]); |
|
|
|
|
val.ival.s[2] = saturate_cast<int>(scalar.val[2]); |
|
|
|
|
val.ival.s[3] = saturate_cast<int>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=int"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=int4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 5: |
|
|
|
|
val.fval.s[0] = scalar.val[0]; |
|
|
|
|
val.fval.s[1] = scalar.val[1]; |
|
|
|
|
val.fval.s[2] = scalar.val[2]; |
|
|
|
|
val.fval.s[3] = scalar.val[3];
|
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=float"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=float4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 6: |
|
|
|
|
val.dval.s[0] = scalar.val[0]; |
|
|
|
|
val.dval.s[1] = scalar.val[1]; |
|
|
|
|
val.dval.s[2] = scalar.val[2]; |
|
|
|
|
val.dval.s[3] = scalar.val[3]; |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=double"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=double4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unknown depth"); |
|
|
|
|
} |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); |
|
|
|
|
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, |
|
|
|
|
localThreads, args, dst.channels(), dst.depth()); |
|
|
|
|
localThreads, args, -1, -1,compile_option); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName) |
|
|
|
|
{ |
|
|
|
|
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols); |
|
|
|
|
vector<pair<size_t , const void *> > args; |
|
|
|
|
cl_float4 val; |
|
|
|
|
val.s[0] = scalar.val[0]; |
|
|
|
|
val.s[1] = scalar.val[1]; |
|
|
|
|
val.s[2] = scalar.val[2]; |
|
|
|
|
val.s[3] = scalar.val[3]; |
|
|
|
|
size_t localThreads[3] = {16, 16, 1}; |
|
|
|
|
size_t globalThreads[3]; |
|
|
|
|
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; |
|
|
|
@ -417,7 +660,155 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat & |
|
|
|
|
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; |
|
|
|
|
} |
|
|
|
|
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val )); |
|
|
|
|
char compile_option[32]; |
|
|
|
|
union sc |
|
|
|
|
{ |
|
|
|
|
cl_uchar4 uval; |
|
|
|
|
cl_char4 cval; |
|
|
|
|
cl_ushort4 usval; |
|
|
|
|
cl_short4 shval; |
|
|
|
|
cl_int4 ival; |
|
|
|
|
cl_float4 fval; |
|
|
|
|
cl_double4 dval; |
|
|
|
|
}val; |
|
|
|
|
switch(dst.depth()) |
|
|
|
|
{ |
|
|
|
|
case 0: |
|
|
|
|
val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]); |
|
|
|
|
val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]); |
|
|
|
|
val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]); |
|
|
|
|
val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=uchar"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=uchar4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 1: |
|
|
|
|
val.cval.s[0] = saturate_cast<char>(scalar.val[0]); |
|
|
|
|
val.cval.s[1] = saturate_cast<char>(scalar.val[1]); |
|
|
|
|
val.cval.s[2] = saturate_cast<char>(scalar.val[2]); |
|
|
|
|
val.cval.s[3] = saturate_cast<char>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=char"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=char4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 2: |
|
|
|
|
val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]); |
|
|
|
|
val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]); |
|
|
|
|
val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]); |
|
|
|
|
val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=ushort"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=ushort4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 3: |
|
|
|
|
val.shval.s[0] = saturate_cast<short>(scalar.val[0]); |
|
|
|
|
val.shval.s[1] = saturate_cast<short>(scalar.val[1]); |
|
|
|
|
val.shval.s[2] = saturate_cast<short>(scalar.val[2]); |
|
|
|
|
val.shval.s[3] = saturate_cast<short>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=short"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=short4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
val.ival.s[0] = saturate_cast<int>(scalar.val[0]); |
|
|
|
|
val.ival.s[1] = saturate_cast<int>(scalar.val[1]); |
|
|
|
|
val.ival.s[2] = saturate_cast<int>(scalar.val[2]); |
|
|
|
|
val.ival.s[3] = saturate_cast<int>(scalar.val[3]); |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=int"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=int4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 5: |
|
|
|
|
val.fval.s[0] = scalar.val[0]; |
|
|
|
|
val.fval.s[1] = scalar.val[1]; |
|
|
|
|
val.fval.s[2] = scalar.val[2]; |
|
|
|
|
val.fval.s[3] = scalar.val[3];
|
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=float"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=float4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 6: |
|
|
|
|
val.dval.s[0] = scalar.val[0]; |
|
|
|
|
val.dval.s[1] = scalar.val[1]; |
|
|
|
|
val.dval.s[2] = scalar.val[2]; |
|
|
|
|
val.dval.s[3] = scalar.val[3]; |
|
|
|
|
switch(dst.channels()) |
|
|
|
|
{ |
|
|
|
|
case 1: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=double"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); |
|
|
|
|
break; |
|
|
|
|
case 4: |
|
|
|
|
sprintf(compile_option, "-D GENTYPE=double4"); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unsupported channels"); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
CV_Error(-217,"unknown depth"); |
|
|
|
|
} |
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); |
|
|
|
@ -427,7 +818,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat & |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); |
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); |
|
|
|
|
openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, |
|
|
|
|
localThreads, args, dst.channels(), dst.depth()); |
|
|
|
|
localThreads, args, -1, -1,compile_option); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) |
|
|
|
@ -446,11 +837,25 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) |
|
|
|
|
// (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0));
|
|
|
|
|
if (mask.empty()) |
|
|
|
|
{ |
|
|
|
|
set_to_withoutmask_run(*this, scalar, "set_to_without_mask"); |
|
|
|
|
if(type()==CV_8UC1) |
|
|
|
|
{ |
|
|
|
|
set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0"); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
set_to_withoutmask_run(*this, scalar, "set_to_without_mask"); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
set_to_withmask_run(*this, scalar, mask, "set_to_with_mask"); |
|
|
|
|
if(type()==CV_8UC1) |
|
|
|
|
{ |
|
|
|
|
set_to_withmask_run(*this, scalar, mask,"set_to_with_mask_C1_D0"); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
set_to_withmask_run(*this, scalar, mask, "set_to_with_mask"); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return *this; |
|
|
|
|