mirror of https://github.com/opencv/opencv.git
Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
526 lines
17 KiB
526 lines
17 KiB
// This file is part of OpenCV project. |
|
// It is subject to the license terms in the LICENSE file found in the top-level directory |
|
// of this distribution and at http://opencv.org/license.html |
|
|
|
|
|
#include "precomp.hpp" |
|
#include "opencl_kernels_core.hpp" |
|
#include "stat.hpp" |
|
#include "opencv2/core/detail/dispatch_helper.impl.hpp" |
|
#include <algorithm> |
|
|
|
#include "minmax.simd.hpp" |
|
#include "minmax.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content |
|
|
|
namespace cv { |
|
|
|
static MinMaxIdxFunc getMinMaxIdxFunc(int depth) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
CV_CPU_DISPATCH(getMinMaxIdxFunc, (depth), |
|
CV_CPU_DISPATCH_MODES_ALL); |
|
} |
|
|
|
static void ofs2idx(const Mat& a, size_t ofs, int* idx) |
|
{ |
|
int i, d = a.dims; |
|
if( ofs > 0 ) |
|
{ |
|
ofs--; |
|
for( i = d-1; i >= 0; i-- ) |
|
{ |
|
int sz = a.size[i]; |
|
idx[i] = (int)(ofs % sz); |
|
ofs /= sz; |
|
} |
|
} |
|
else |
|
{ |
|
for( i = d-1; i >= 0; i-- ) |
|
idx[i] = -1; |
|
} |
|
} |
|
|
|
#ifdef HAVE_OPENCL |
|
|
|
#define MINMAX_STRUCT_ALIGNMENT 8 // sizeof double |
|
|
|
template <typename T> |
|
void getMinMaxRes(const Mat & db, double * minVal, double * maxVal, |
|
int* minLoc, int* maxLoc, |
|
int groupnum, int cols, double * maxVal2) |
|
{ |
|
uint index_max = std::numeric_limits<uint>::max(); |
|
T minval = std::numeric_limits<T>::max(); |
|
T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval; |
|
uint minloc = index_max, maxloc = index_max; |
|
|
|
size_t index = 0; |
|
const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL; |
|
const uint * minlocptr = NULL, * maxlocptr = NULL; |
|
if (minVal || minLoc) |
|
{ |
|
minptr = db.ptr<T>(); |
|
index += sizeof(T) * groupnum; |
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); |
|
} |
|
if (maxVal || maxLoc) |
|
{ |
|
maxptr = (const T *)(db.ptr() + index); |
|
index += sizeof(T) * groupnum; |
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); |
|
} |
|
if (minLoc) |
|
{ |
|
minlocptr = (const uint *)(db.ptr() + index); |
|
index += sizeof(uint) * groupnum; |
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); |
|
} |
|
if (maxLoc) |
|
{ |
|
maxlocptr = (const uint *)(db.ptr() + index); |
|
index += sizeof(uint) * groupnum; |
|
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); |
|
} |
|
if (maxVal2) |
|
maxptr2 = (const T *)(db.ptr() + index); |
|
|
|
for (int i = 0; i < groupnum; i++) |
|
{ |
|
if (minptr && minptr[i] <= minval) |
|
{ |
|
if (minptr[i] == minval) |
|
{ |
|
if (minlocptr) |
|
minloc = std::min(minlocptr[i], minloc); |
|
} |
|
else |
|
{ |
|
if (minlocptr) |
|
minloc = minlocptr[i]; |
|
minval = minptr[i]; |
|
} |
|
} |
|
if (maxptr && maxptr[i] >= maxval) |
|
{ |
|
if (maxptr[i] == maxval) |
|
{ |
|
if (maxlocptr) |
|
maxloc = std::min(maxlocptr[i], maxloc); |
|
} |
|
else |
|
{ |
|
if (maxlocptr) |
|
maxloc = maxlocptr[i]; |
|
maxval = maxptr[i]; |
|
} |
|
} |
|
if (maxptr2 && maxptr2[i] > maxval2) |
|
maxval2 = maxptr2[i]; |
|
} |
|
bool zero_mask = (minLoc && minloc == index_max) || |
|
(maxLoc && maxloc == index_max); |
|
|
|
if (minVal) |
|
*minVal = zero_mask ? 0 : (double)minval; |
|
if (maxVal) |
|
*maxVal = zero_mask ? 0 : (double)maxval; |
|
if (maxVal2) |
|
*maxVal2 = zero_mask ? 0 : (double)maxval2; |
|
|
|
if (minLoc) |
|
{ |
|
minLoc[0] = zero_mask ? -1 : minloc / cols; |
|
minLoc[1] = zero_mask ? -1 : minloc % cols; |
|
} |
|
if (maxLoc) |
|
{ |
|
maxLoc[0] = zero_mask ? -1 : maxloc / cols; |
|
maxLoc[1] = zero_mask ? -1 : maxloc % cols; |
|
} |
|
} |
|
|
|
typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal, |
|
int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2); |
|
|
|
bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask, |
|
int ddepth, bool absValues, InputArray _src2, double * maxVal2) |
|
{ |
|
const ocl::Device & dev = ocl::Device::getDefault(); |
|
|
|
#ifdef __ANDROID__ |
|
if (dev.isNVidia()) |
|
return false; |
|
#endif |
|
|
|
if (dev.deviceVersionMajor() == 1 && dev.deviceVersionMinor() < 2) |
|
{ |
|
// 'static' storage class specifier used by "minmaxloc" is available from OpenCL 1.2+ only |
|
return false; |
|
} |
|
|
|
bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), |
|
haveSrc2 = _src2.kind() != _InputArray::NONE; |
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), |
|
kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2)); |
|
|
|
if (depth >= CV_16F) |
|
return false; |
|
|
|
// disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014) |
|
if ((haveMask || type == CV_32FC1) && dev.isAMD()) |
|
return false; |
|
|
|
CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) || |
|
(cn >= 1 && !minLoc && !maxLoc) ); |
|
|
|
if (ddepth < 0) |
|
ddepth = depth; |
|
|
|
CV_Assert(!haveSrc2 || _src2.type() == type); |
|
|
|
if (depth == CV_32S || depth == CV_8S || depth == CV_32U || depth == CV_64U || |
|
depth == CV_64S || depth == CV_16F || depth == CV_16BF) |
|
return false; |
|
|
|
if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport) |
|
return false; |
|
|
|
int groupnum = dev.maxComputeUnits(); |
|
size_t wgs = dev.maxWorkGroupSize(); |
|
|
|
int wgs2_aligned = 1; |
|
while (wgs2_aligned < (int)wgs) |
|
wgs2_aligned <<= 1; |
|
wgs2_aligned >>= 1; |
|
|
|
bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL, |
|
needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL; |
|
|
|
// in case of mask we must know whether mask is filled with zeros or not |
|
// so let's calculate min or max location, if it's undefined, so mask is zeros |
|
if (!(needMaxLoc || needMinLoc) && haveMask) |
|
{ |
|
if (needMinVal) |
|
needMinLoc = true; |
|
else |
|
needMaxLoc = true; |
|
} |
|
|
|
char cvt[2][50]; |
|
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" |
|
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" |
|
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s" |
|
" -D MINMAX_STRUCT_ALIGNMENT=%d", |
|
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, |
|
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, |
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "", |
|
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "", |
|
_mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, |
|
needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "", |
|
needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "", |
|
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), |
|
ocl::convertTypeStr(depth, ddepth, kercn, cvt[0], sizeof(cvt[0])), |
|
absValues ? " -D OP_ABS" : "", |
|
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", |
|
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth, |
|
depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1], sizeof(cvt[1])) : "noconvert", |
|
MINMAX_STRUCT_ALIGNMENT); |
|
|
|
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); |
|
if (k.empty()) |
|
return false; |
|
|
|
int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S), |
|
dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + |
|
(needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) + |
|
(maxVal2 ? esz : 0)) |
|
+ 5 * MINMAX_STRUCT_ALIGNMENT; |
|
UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); |
|
|
|
if (cn > 1 && !haveMask) |
|
{ |
|
src = src.reshape(1); |
|
src2 = src2.reshape(1); |
|
} |
|
|
|
if (haveSrc2) |
|
{ |
|
if (!haveMask) |
|
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), |
|
groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2)); |
|
else |
|
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), |
|
groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask), |
|
ocl::KernelArg::ReadOnlyNoSize(src2)); |
|
} |
|
else |
|
{ |
|
if (!haveMask) |
|
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), |
|
groupnum, ocl::KernelArg::PtrWriteOnly(db)); |
|
else |
|
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), |
|
groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask)); |
|
} |
|
|
|
size_t globalsize = groupnum * wgs; |
|
if (!k.run(1, &globalsize, &wgs, true)) |
|
return false; |
|
|
|
static const getMinMaxResFunc functab[7] = |
|
{ |
|
getMinMaxRes<uchar>, |
|
getMinMaxRes<char>, |
|
getMinMaxRes<ushort>, |
|
getMinMaxRes<short>, |
|
getMinMaxRes<int>, |
|
getMinMaxRes<float>, |
|
getMinMaxRes<double> |
|
}; |
|
|
|
CV_Assert(ddepth <= CV_64F); |
|
getMinMaxResFunc func = functab[ddepth]; |
|
|
|
int locTemp[2]; |
|
func(db.getMat(ACCESS_READ), minVal, maxVal, |
|
needMinLoc ? minLoc ? minLoc : locTemp : minLoc, |
|
needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, |
|
groupnum, src.cols, maxVal2); |
|
|
|
return true; |
|
} |
|
|
|
#endif |
|
|
|
} |
|
|
|
void cv::minMaxIdx(InputArray _src, double* minVal, |
|
double* maxVal, int* minIdx, int* maxIdx, |
|
InputArray _mask) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); |
|
CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) || |
|
(cn > 1 && _mask.empty() && !minIdx && !maxIdx) ); |
|
|
|
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()), |
|
ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask)) |
|
|
|
Mat src = _src.getMat(), mask = _mask.getMat(); |
|
|
|
if (src.dims <= 2) |
|
{ |
|
CALL_HAL(minMaxIdx, cv_hal_minMaxIdx, src.data, src.step, src.cols*cn, src.rows, |
|
src.depth(), minVal, maxVal, minIdx, maxIdx, mask.data); |
|
} |
|
else if (src.isContinuous()) |
|
{ |
|
int res = cv_hal_minMaxIdx(src.data, 0, (int)src.total()*cn, 1, src.depth(), |
|
minVal, maxVal, minIdx, maxIdx, mask.data); |
|
|
|
if (res == CV_HAL_ERROR_OK) |
|
{ |
|
// minIdx[0] and minIdx[0] are always 0 for "flatten" version |
|
if (minIdx) |
|
ofs2idx(src, minIdx[1], minIdx); |
|
if (maxIdx) |
|
ofs2idx(src, maxIdx[1], maxIdx); |
|
return; |
|
} |
|
else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) |
|
{ |
|
CV_Error_(cv::Error::StsInternal, |
|
("HAL implementation minMaxIdx ==> " CVAUX_STR(cv_hal_minMaxIdx) " returned %d (0x%08x)", res, res)); |
|
} |
|
} |
|
|
|
MinMaxIdxFunc func = getMinMaxIdxFunc(depth); |
|
CV_Assert( func != 0 ); |
|
|
|
const Mat* arrays[] = {&src, &mask, 0}; |
|
uchar* ptrs[2] = {}; |
|
NAryMatIterator it(arrays, ptrs); |
|
|
|
size_t minidx = 0, maxidx = 0; |
|
size_t startidx = 1; |
|
union { |
|
int i; |
|
float f; |
|
double d; |
|
int64 L; |
|
uint64 UL; |
|
} minval, maxval; |
|
int planeSize = (int)it.size*cn; |
|
minval.L = maxval.L = 0; |
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize ) |
|
func( ptrs[0], ptrs[1], &minval.L, &maxval.L, &minidx, &maxidx, planeSize, startidx ); |
|
|
|
double dminval, dmaxval; |
|
if( depth <= CV_32S || depth == CV_Bool ) |
|
dminval = minval.i, dmaxval = maxval.i; |
|
else if( depth == CV_32F || depth == CV_16F || depth == CV_16BF ) |
|
dminval = minval.f, dmaxval = maxval.f; |
|
else if( depth == CV_64F ) |
|
dminval = minval.d, dmaxval = maxval.d; |
|
else if( depth == CV_64S || depth == CV_32U ) |
|
dminval = (double)minval.L, dmaxval = (double)maxval.L; |
|
else { |
|
CV_Assert(depth == CV_64U); |
|
dminval = (double)minval.UL, dmaxval = (double)maxval.UL; |
|
} |
|
|
|
if( minVal ) |
|
*minVal = dminval; |
|
if( maxVal ) |
|
*maxVal = dmaxval; |
|
|
|
if( minIdx ) |
|
ofs2idx(src, minidx, minIdx); |
|
if( maxIdx ) |
|
ofs2idx(src, maxidx, maxIdx); |
|
} |
|
|
|
void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal, |
|
Point* minLoc, Point* maxLoc, InputArray mask ) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
int dims = _img.dims(); |
|
CV_CheckLE(dims, 2, ""); |
|
|
|
minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask); |
|
if( minLoc) { |
|
if (dims == 2) |
|
std::swap(minLoc->x, minLoc->y); |
|
else { |
|
minLoc->y = 0; |
|
} |
|
} |
|
if( maxLoc) { |
|
if (dims == 2) |
|
std::swap(maxLoc->x, maxLoc->y); |
|
else { |
|
maxLoc->y = 0; |
|
} |
|
} |
|
} |
|
|
|
enum class ReduceMode |
|
{ |
|
FIRST_MIN = 0, //!< get index of first min occurrence |
|
LAST_MIN = 1, //!< get index of last min occurrence |
|
FIRST_MAX = 2, //!< get index of first max occurrence |
|
LAST_MAX = 3, //!< get index of last max occurrence |
|
}; |
|
|
|
template <typename T> |
|
struct reduceMinMaxImpl |
|
{ |
|
void operator()(const cv::Mat& src, cv::Mat& dst, ReduceMode mode, const int axis) const |
|
{ |
|
switch(mode) |
|
{ |
|
case ReduceMode::FIRST_MIN: |
|
reduceMinMaxApply<std::less>(src, dst, axis); |
|
break; |
|
case ReduceMode::LAST_MIN: |
|
reduceMinMaxApply<std::less_equal>(src, dst, axis); |
|
break; |
|
case ReduceMode::FIRST_MAX: |
|
reduceMinMaxApply<std::greater>(src, dst, axis); |
|
break; |
|
case ReduceMode::LAST_MAX: |
|
reduceMinMaxApply<std::greater_equal>(src, dst, axis); |
|
break; |
|
} |
|
} |
|
|
|
template <template<class> class Cmp> |
|
static void reduceMinMaxApply(const cv::Mat& src, cv::Mat& dst, const int axis) |
|
{ |
|
Cmp<T> cmp; |
|
|
|
const auto *src_ptr = src.ptr<T>(); |
|
auto *dst_ptr = dst.ptr<int32_t>(); |
|
|
|
const size_t outer_size = src.total(0, axis); |
|
const auto mid_size = static_cast<size_t>(src.size[axis]); |
|
|
|
const size_t outer_step = src.total(axis); |
|
const size_t dst_step = dst.total(axis); |
|
|
|
const size_t mid_step = src.total(axis + 1); |
|
|
|
for (size_t outer = 0; outer < outer_size; ++outer) |
|
{ |
|
const size_t outer_offset = outer * outer_step; |
|
const size_t dst_offset = outer * dst_step; |
|
for (size_t mid = 0; mid != mid_size; ++mid) |
|
{ |
|
const size_t src_offset = outer_offset + mid * mid_step; |
|
for (size_t inner = 0; inner < mid_step; inner++) |
|
{ |
|
int32_t& index = dst_ptr[dst_offset + inner]; |
|
|
|
const size_t prev = outer_offset + index * mid_step + inner; |
|
const size_t curr = src_offset + inner; |
|
|
|
if (cmp(src_ptr[curr], src_ptr[prev])) |
|
{ |
|
index = static_cast<int32_t>(mid); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
}; |
|
|
|
static void reduceMinMax(cv::InputArray src, cv::OutputArray dst, ReduceMode mode, int axis) |
|
{ |
|
CV_INSTRUMENT_REGION(); |
|
|
|
cv::Mat srcMat = src.getMat(); |
|
int dims = std::max(1, srcMat.dims); |
|
axis = (axis + dims) % dims; |
|
CV_Assert(srcMat.channels() == 1 && axis >= 0 && axis <= srcMat.dims); |
|
|
|
std::vector<int> sizes(srcMat.dims); |
|
std::copy(srcMat.size.p, srcMat.size.p + srcMat.dims, sizes.begin()); |
|
if(!sizes.empty()) |
|
sizes[axis] = 1; |
|
|
|
dst.create(srcMat.dims, sizes.data(), CV_32SC1); // indices |
|
cv::Mat dstMat = dst.getMat(); |
|
dstMat.setTo(cv::Scalar::all(0)); |
|
|
|
if (!srcMat.isContinuous()) |
|
{ |
|
srcMat = srcMat.clone(); |
|
} |
|
|
|
bool needs_copy = !dstMat.isContinuous(); |
|
if (needs_copy) |
|
{ |
|
dstMat = dstMat.clone(); |
|
} |
|
|
|
cv::detail::depthDispatch<reduceMinMaxImpl>(srcMat.depth(), srcMat, dstMat, mode, axis); |
|
|
|
if (needs_copy) |
|
{ |
|
dstMat.copyTo(dst); |
|
} |
|
} |
|
|
|
void cv::reduceArgMin(InputArray src, OutputArray dst, int axis, bool lastIndex) |
|
{ |
|
reduceMinMax(src, dst, lastIndex ? ReduceMode::LAST_MIN : ReduceMode::FIRST_MIN, axis); |
|
} |
|
|
|
void cv::reduceArgMax(InputArray src, OutputArray dst, int axis, bool lastIndex) |
|
{ |
|
reduceMinMax(src, dst, lastIndex ? ReduceMode::LAST_MAX : ReduceMode::FIRST_MAX, axis); |
|
}
|
|
|