Merge pull request #2795 from ilya-lavrenov:tapi_setto

pull/2822/head
Alexander Alekhin 11 years ago committed by OpenCV Buildbot
commit 93af92c878
  1. 23
      modules/core/src/opencl/copyset.cl
  2. 16
      modules/core/src/umatrix.cpp

@ -104,15 +104,20 @@ __kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
int rows, int cols, dstST value_)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1) * rowsPerWI;
if (x < cols && y < rows)
if (x < cols)
{
int mask_index = mad24(y, maskstep, x + maskoffset);
if( mask[mask_index] )
int mask_index = mad24(y0, maskstep, x + maskoffset);
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT1) * cn, dstoffset));
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y)
{
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT1) * cn, dstoffset));
if( mask[mask_index] )
storedst(value);
mask_index += maskstep;
dst_index += dststep;
}
}
}
@ -121,11 +126,13 @@ __kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, dstST value_)
{
int x = get_global_id(0);
int y = get_global_id(1);
int y0 = get_global_id(1) * rowsPerWI;
if (x < cols && y < rows)
if (x < cols)
{
int dst_index = mad24(y, dststep, mad24(x, (int)sizeof(dstT1) * cn, dstoffset));
int dst_index = mad24(y0, dststep, mad24(x, (int)sizeof(dstT1) * cn, dstoffset));
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, dst_index += dststep)
storedst(value);
}
}

@ -768,9 +768,9 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
double buf[4] = { 0, 0, 0, 0 };
convertAndUnrollScalar(value, tp, (uchar *)buf, 1);
int scalarcn = cn == 3 ? 4 : cn;
char opts[1024];
sprintf(opts, "-D dstT=%s -D dstST=%s -D dstT1=%s -D cn=%d", ocl::memopTypeToStr(tp),
int scalarcn = cn == 3 ? 4 : cn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
ocl::memopTypeToStr(tp), rowsPerWI,
ocl::memopTypeToStr(CV_MAKETYPE(tp, scalarcn)),
ocl::memopTypeToStr(CV_MAT_DEPTH(tp)), cn);
@ -783,9 +783,9 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
if( haveMask )
{
mask = _mask.getUMat();
CV_Assert( mask.size() == size() && mask.type() == CV_8U );
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
ocl::KernelArg dstarg = ocl::KernelArg::ReadWrite(*this);
CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
dstarg = ocl::KernelArg::ReadWrite(*this);
setK.args(maskarg, dstarg, scalararg);
}
else
@ -794,8 +794,8 @@ UMat& UMat::setTo(InputArray _value, InputArray _mask)
setK.args(dstarg, scalararg);
}
size_t globalsize[] = { cols, rows };
if( setK.run(2, globalsize, 0, false) )
size_t globalsize[] = { cols, (rows + rowsPerWI - 1) / rowsPerWI };
if( setK.run(2, globalsize, NULL, false) )
return *this;
}
}

Loading…
Cancel
Save