|
|
|
@ -369,11 +369,17 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray |
|
|
|
|
CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE || |
|
|
|
|
op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED); |
|
|
|
|
|
|
|
|
|
int stype = _src.type(), cn = CV_MAT_CN(stype); |
|
|
|
|
int sdepth = CV_MAT_DEPTH(stype), ddepth = _dst.depth(); |
|
|
|
|
|
|
|
|
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, |
|
|
|
|
haveMask = !_mask.empty(); |
|
|
|
|
const ocl::Device & dev = ocl::Device::getDefault(); |
|
|
|
|
int vectorWidths[] = { 4, 4, 2, 2, 1, 1, 1, -1 }; |
|
|
|
|
int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth(); |
|
|
|
|
int pcn = std::max(vectorWidths[sdepth], vectorWidths[ddepth]), sesz = CV_ELEM_SIZE(sdepth) * pcn, |
|
|
|
|
desz = CV_ELEM_SIZE(ddepth) * pcn, rowsPerWI = dev.isIntel() ? 4 : 1; |
|
|
|
|
|
|
|
|
|
bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), |
|
|
|
|
usepcn = _src.offset() % sesz == 0 && _src.step() % sesz == 0 && (_src.cols() * cn) % pcn == 0 && |
|
|
|
|
_src2.offset() % desz == 0 && _src2.step() % desz == 0 && |
|
|
|
|
_dst.offset() % pcn == 0 && _dst.step() % desz == 0 && !haveMask; |
|
|
|
|
int kercn = usepcn ? pcn : haveMask ? cn : 1; |
|
|
|
|
|
|
|
|
|
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) |
|
|
|
|
return false; |
|
|
|
@ -381,11 +387,13 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray |
|
|
|
|
const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT", |
|
|
|
|
"ACCUMULATE_WEIGHTED" }; |
|
|
|
|
|
|
|
|
|
char cvt[40]; |
|
|
|
|
ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc, |
|
|
|
|
format("-D %s%s -D srcT=%s -D cn=%d -D dstT=%s%s", |
|
|
|
|
format("-D %s%s -D srcT1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=%s", |
|
|
|
|
opMap[op_type], haveMask ? " -D HAVE_MASK" : "", |
|
|
|
|
ocl::typeToStr(sdepth), cn, ocl::typeToStr(ddepth), |
|
|
|
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "")); |
|
|
|
|
ocl::typeToStr(sdepth), kercn, ocl::typeToStr(ddepth), |
|
|
|
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI, |
|
|
|
|
ocl::convertTypeStr(sdepth, ddepth, 1, cvt))); |
|
|
|
|
if (k.empty()) |
|
|
|
|
return false; |
|
|
|
|
|
|
|
|
@ -393,7 +401,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray |
|
|
|
|
|
|
|
|
|
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), |
|
|
|
|
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), |
|
|
|
|
dstarg = ocl::KernelArg::ReadWrite(dst), |
|
|
|
|
dstarg = ocl::KernelArg::ReadWrite(dst, cn, kercn), |
|
|
|
|
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); |
|
|
|
|
|
|
|
|
|
int argidx = k.set(0, srcarg); |
|
|
|
@ -410,7 +418,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray |
|
|
|
|
if (haveMask) |
|
|
|
|
k.set(argidx, maskarg); |
|
|
|
|
|
|
|
|
|
size_t globalsize[2] = { src.cols, src.rows }; |
|
|
|
|
size_t globalsize[2] = { src.cols * cn / kercn, (src.rows + rowsPerWI - 1) / rowsPerWI }; |
|
|
|
|
return k.run(2, globalsize, NULL, false); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|