Merge pull request #16754 from alalek:issue_16752

* core(test): FP16 norm test

* core: norm()-FP16 disable OpenCL

* core(norm): fix 16f32f local buffer size
pull/16759/head
Alexander Alekhin 5 years ago committed by GitHub
parent 619180dffd
commit 198b5096aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      modules/core/src/minmax.cpp
  2. 7
      modules/core/src/norm.cpp
  3. 3
      modules/core/src/sum.dispatch.cpp
  4. 2
      modules/core/test/ocl/test_arithm.cpp
  5. 8
      modules/ts/src/ts_func.cpp

@ -978,6 +978,9 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2));
if (depth >= CV_16F)
return false;
// disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014)
if ((haveMask || type == CV_32FC1) && dev.isAMD())
return false;

@ -433,6 +433,9 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
bool doubleSupport = d.doubleFPConfig() > 0,
haveMask = _mask.kind() != _InputArray::NONE;
if (depth >= CV_16F)
return false; // TODO: support FP16
if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
(!doubleSupport && depth == CV_64F))
return false;
@ -747,7 +750,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
const size_t esz = src.elemSize();
const int total = (int)it.size;
const int blockSize = std::min(total, divUp(1024, cn));
AutoBuffer<float, 1024> fltbuf(blockSize);
AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn);
float* data0 = fltbuf.data();
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
@ -1235,7 +1238,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
const size_t esz = src1.elemSize();
const int total = (int)it.size;
const int blockSize = std::min(total, divUp(512, cn));
AutoBuffer<float, 1024> fltbuf(blockSize * 2);
AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2);
float* data0 = fltbuf.data();
float* data1 = fltbuf.data() + blockSize * cn;
for (size_t i = 0; i < it.nplanes; i++, ++it)

@ -46,6 +46,9 @@ bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask,
if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
return false;
if (depth >= CV_16F)
return false;
int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
size_t wgs = dev.maxWorkGroupSize();

@ -1905,7 +1905,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F,
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx_Mask, Combine(OCL_ALL_DEPTHS, ::testing::Values(Channels(1)), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS_16F, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));

@ -87,7 +87,9 @@ double getMinVal(int depth)
depth = CV_MAT_DEPTH(depth);
double val = depth == CV_8U ? 0 : depth == CV_8S ? SCHAR_MIN : depth == CV_16U ? 0 :
depth == CV_16S ? SHRT_MIN : depth == CV_32S ? INT_MIN :
depth == CV_32F ? -FLT_MAX : depth == CV_64F ? -DBL_MAX : -1;
depth == CV_32F ? -FLT_MAX : depth == CV_64F ? -DBL_MAX :
depth == CV_16F ? -65504
: -1;
CV_Assert(val != -1);
return val;
}
@ -97,7 +99,9 @@ double getMaxVal(int depth)
depth = CV_MAT_DEPTH(depth);
double val = depth == CV_8U ? UCHAR_MAX : depth == CV_8S ? SCHAR_MAX : depth == CV_16U ? USHRT_MAX :
depth == CV_16S ? SHRT_MAX : depth == CV_32S ? INT_MAX :
depth == CV_32F ? FLT_MAX : depth == CV_64F ? DBL_MAX : -1;
depth == CV_32F ? FLT_MAX : depth == CV_64F ? DBL_MAX :
depth == CV_16F ? 65504
: -1;
CV_Assert(val != -1);
return val;
}

Loading…
Cancel
Save