diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index a84b98d151..0ee492e097 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -618,7 +618,7 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), OclVectorStrategy strat = OCL_VECTOR_DEFAULT); -CV_EXPORTS int checkOptimalVectorWidth(int *vectorWidths, +CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths, InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index d6da1a2253..829b984c9f 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -3275,13 +3275,26 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) { const ocl::Device & d = ocl::Device::getDefault(); - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - kercn = ocl::predictOptimalVectorWidth(_src, _dst), rowsPerWI = d.isIntel() ? 4 : 1; - bool doubleSupport = d.doubleFPConfig() > 0; + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + bool doubleSupport = d.doubleFPConfig() > 0; if (!doubleSupport && depth == CV_64F) return false; + _dst.create(_src.size(), CV_8UC(cn)); + int kercn = 1; + if (d.isIntel()) + { + static const int vectorWidths[] = {4, 4, 4, 4, 4, 4, 4, -1}; + kercn = ocl::checkOptimalVectorWidth( vectorWidths, _src, _dst, + noArray(), noArray(), noArray(), + noArray(), noArray(), noArray(), + noArray(), ocl::OCL_VECTOR_MAX); + } + else + kercn = ocl::predictOptimalVectorWidthMax(_src, _dst); + + int rowsPerWI = d.isIntel() ? 4 : 1; char cvt[2][50]; int wdepth = std::max(depth, CV_32F); String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" @@ -3299,7 +3312,6 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha return false; UMat src = _src.getUMat(); - _dst.create(src.size(), CV_8UC(cn)); UMat dst = _dst.getUMat(); ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index c333b08c40..faf45d3c87 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4531,12 +4531,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat); } -int checkOptimalVectorWidth(int *vectorWidths, +int checkOptimalVectorWidth(const int *vectorWidths, InputArray src1, InputArray src2, InputArray src3, InputArray src4, InputArray src5, InputArray src6, InputArray src7, InputArray src8, InputArray src9, OclVectorStrategy strat) { + CV_Assert(vectorWidths); + int ref_type = src1.type(); std::vector offsets, steps, cols;