Merge pull request #23098 from savuor:nanMask

finiteMask() and doubles for patchNaNs() #23098

Related to #22826
Connected PR in extra: [#1037@extra](https://github.com/opencv/opencv_extra/pull/1037)

### TODOs:
- [ ] Vectorize `finiteMask()` for 64FC3 and 64FC4

### Changes

This PR:
* adds a new function `finiteMask()`
* extends `patchNaNs()` by CV_64F support
* moves `patchNaNs()` and `finiteMask()` to a separate file

**NOTE:** now the function is called `finiteMask()` as discussed with the OpenCV core team

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
pull/24517/head
Rostislav Vasilikhin 1 year ago committed by GitHub
parent 34f34f6227
commit 53aad98a1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 17
      modules/3d/perf/perf_tsdf.cpp
  2. 14
      modules/3d/src/rgbd/odometry_functions.cpp
  3. 10
      modules/3d/test/test_odometry.cpp
  4. 49
      modules/3d/test/test_tsdf.cpp
  5. 1
      modules/core/CMakeLists.txt
  6. 13
      modules/core/include/opencv2/core.hpp
  7. 97
      modules/core/perf/opencl/perf_arithm.cpp
  8. 132
      modules/core/perf/perf_arithm.cpp
  9. 70
      modules/core/src/mathfuncs.cpp
  10. 152
      modules/core/src/nan_mask.dispatch.cpp
  11. 440
      modules/core/src/nan_mask.simd.hpp
  12. 44
      modules/core/src/opencl/finitemask.cl
  13. 132
      modules/core/test/ocl/test_arithm.cpp
  14. 156
      modules/core/test/test_arithm.cpp
  15. 1
      modules/core/test/test_precomp.hpp

@ -302,6 +302,9 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
Mat_<Vec4b> img = image.getMat(); Mat_<Vec4b> img = image.getMat();
Mat goods;
finiteMask(points, goods);
Range range(0, sz.height); Range range(0, sz.height);
const int nstripes = -1; const int nstripes = -1;
parallel_for_(range, [&](const Range&) parallel_for_(range, [&](const Range&)
@ -311,6 +314,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
Vec4b* imgRow = img[y]; Vec4b* imgRow = img[y];
const ptype* ptsRow = points[y]; const ptype* ptsRow = points[y];
const ptype* nrmRow = normals[y]; const ptype* nrmRow = normals[y];
const uchar* goodRow = goods.ptr<uchar>(y);
for (int x = 0; x < sz.width; x++) for (int x = 0; x < sz.width; x++)
{ {
@ -319,7 +323,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
Vec4b color; Vec4b color;
if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z) ) if ( !goodRow[x] )
{ {
color = Vec4b(0, 32, 0, 0); color = Vec4b(0, 32, 0, 0);
} }
@ -357,6 +361,11 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
Points points = _points.getMat(); Points points = _points.getMat();
Colors colors = _colors.getMat(); Colors colors = _colors.getMat();
Mat goods, goodp, goodc;
finiteMask(points, goodp);
finiteMask(colors, goodc);
goods = goodp & goodc;
Mat_<Vec4b> img = image.getMat(); Mat_<Vec4b> img = image.getMat();
Range range(0, sz.height); Range range(0, sz.height);
@ -366,18 +375,16 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
for (int y = range.start; y < range.end; y++) for (int y = range.start; y < range.end; y++)
{ {
Vec4b* imgRow = img[y]; Vec4b* imgRow = img[y];
const ptype* ptsRow = points[y];
const ptype* clrRow = colors[y]; const ptype* clrRow = colors[y];
const uchar* goodRow = goods.ptr<uchar>(y);
for (int x = 0; x < sz.width; x++) for (int x = 0; x < sz.width; x++)
{ {
Point3f p = fromPtype(ptsRow[x]);
Point3f c = fromPtype(clrRow[x]); Point3f c = fromPtype(clrRow[x]);
Vec4b color; Vec4b color;
if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z) if ( !goodRow[x] )
|| cvIsNaN(c.x) || cvIsNaN(c.y) || cvIsNaN(c.z))
{ {
color = Vec4b(0, 32, 0, 0); color = Vec4b(0, 32, 0, 0);
} }

@ -102,16 +102,8 @@ static void extendPyrMaskByPyrNormals(const std::vector<UMat>& pyramidNormals,
UMat maski = pyramidMask[i]; UMat maski = pyramidMask[i];
UMat normali = pyramidNormals[i]; UMat normali = pyramidNormals[i];
UMat validNormalMask; UMat validNormalMask;
// NaN check finiteMask(normali, validNormalMask);
cv::compare(normali, normali, validNormalMask, CMP_EQ); cv::bitwise_and(maski, validNormalMask, maski);
CV_Assert(validNormalMask.type() == CV_8UC4);
std::vector<UMat> channelMasks;
split(validNormalMask, channelMasks);
UMat tmpChMask;
cv::bitwise_and(channelMasks[0], channelMasks[1], tmpChMask);
cv::bitwise_and(channelMasks[2], tmpChMask, tmpChMask);
cv::bitwise_and(maski, tmpChMask, maski);
} }
} }
} }
@ -727,7 +719,7 @@ void computeCorresps(const Matx33f& _K, const Mat& rt,
{ {
float ddst = depthDst_row[udst]; float ddst = depthDst_row[udst];
if (maskDst_row[udst] && !cvIsNaN(ddst)) if (maskDst_row[udst])
{ {
float transformed_ddst = static_cast<float>(ddst * (KRK_inv6_u1[udst] + KRK_inv7_v1_plus_KRK_inv8[vdst]) + ktinv.z); float transformed_ddst = static_cast<float>(ddst * (KRK_inv6_u1[udst] + KRK_inv7_v1_plus_KRK_inv8[vdst]) + ktinv.z);

@ -16,11 +16,8 @@ void dilateFrame(Mat& image, Mat& depth)
CV_Assert(depth.type() == CV_32FC1); CV_Assert(depth.type() == CV_32FC1);
CV_Assert(depth.size() == image.size()); CV_Assert(depth.size() == image.size());
Mat mask(image.size(), CV_8UC1, Scalar(255)); Mat mask;
for(int y = 0; y < depth.rows; y++) inRange(depth, FLT_EPSILON, 10, mask);
for(int x = 0; x < depth.cols; x++)
if(cvIsNaN(depth.at<float>(y,x)) || depth.at<float>(y,x) > 10 || depth.at<float>(y,x) <= FLT_EPSILON)
mask.at<uchar>(y,x) = 0;
image.setTo(255, ~mask); image.setTo(255, ~mask);
Mat minImage; Mat minImage;
@ -726,7 +723,8 @@ TEST(RGBD_Odometry_WarpFrame, nansAreMasked)
ASSERT_EQ(0, rgbDiff); ASSERT_EQ(0, rgbDiff);
Mat goodVals = (w.warpedDepth == w.warpedDepth); Mat goodVals;
finiteMask(w.warpedDepth, goodVals);
double l2diff = cv::norm(w.dstDepth, w.warpedDepth, NORM_L2, goodVals); double l2diff = cv::norm(w.dstDepth, w.warpedDepth, NORM_L2, goodVals);
double lidiff = cv::norm(w.dstDepth, w.warpedDepth, NORM_INF, goodVals); double lidiff = cv::norm(w.dstDepth, w.warpedDepth, NORM_INF, goodVals);

@ -295,6 +295,9 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
Points points = _points.getMat(); Points points = _points.getMat();
Normals normals = _normals.getMat(); Normals normals = _normals.getMat();
Mat goods;
finiteMask(points, goods);
Mat_<Vec4b> img = image.getMat(); Mat_<Vec4b> img = image.getMat();
Range range(0, sz.height); Range range(0, sz.height);
@ -306,6 +309,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
Vec4b* imgRow = img[y]; Vec4b* imgRow = img[y];
const ptype* ptsRow = points[y]; const ptype* ptsRow = points[y];
const ptype* nrmRow = normals[y]; const ptype* nrmRow = normals[y];
const uchar* goodRow = goods.ptr<uchar>(y);
for (int x = 0; x < sz.width; x++) for (int x = 0; x < sz.width; x++)
{ {
@ -314,7 +318,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
Vec4b color; Vec4b color;
if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z)) if (!goodRow[x])
{ {
color = Vec4b(0, 32, 0, 0); color = Vec4b(0, 32, 0, 0);
} }
@ -352,6 +356,11 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
Points points = _points.getMat(); Points points = _points.getMat();
Colors colors = _colors.getMat(); Colors colors = _colors.getMat();
Mat goods, goodc, goodp;
finiteMask(points, goodp);
finiteMask(colors, goodc);
goods = goodp & goodc;
Mat_<Vec4b> img = image.getMat(); Mat_<Vec4b> img = image.getMat();
Range range(0, sz.height); Range range(0, sz.height);
@ -361,18 +370,16 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
for (int y = range.start; y < range.end; y++) for (int y = range.start; y < range.end; y++)
{ {
Vec4b* imgRow = img[y]; Vec4b* imgRow = img[y];
const ptype* ptsRow = points[y];
const ptype* clrRow = colors[y]; const ptype* clrRow = colors[y];
const uchar* goodRow = goods.ptr<uchar>(y);
for (int x = 0; x < sz.width; x++) for (int x = 0; x < sz.width; x++)
{ {
Point3f p = fromPtype(ptsRow[x]);
Point3f c = fromPtype(clrRow[x]); Point3f c = fromPtype(clrRow[x]);
Vec4b color; Vec4b color;
if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z) if (!goodRow[x])
|| cvIsNaN(c.x) || cvIsNaN(c.y) || cvIsNaN(c.z))
{ {
color = Vec4b(0, 32, 0, 0); color = Vec4b(0, 32, 0, 0);
} }
@ -587,33 +594,6 @@ void boundingBoxGrowthTest(bool enableGrowth)
} }
static Mat nanMask(Mat img)
{
int depth = img.depth();
Mat mask(img.size(), CV_8U);
for (int y = 0; y < img.rows; y++)
{
uchar *maskRow = mask.ptr<uchar>(y);
if (depth == CV_32F)
{
Vec4f *imgrow = img.ptr<Vec4f>(y);
for (int x = 0; x < img.cols; x++)
{
maskRow[x] = (imgrow[x] == imgrow[x]) * 255;
}
}
else if (depth == CV_64F)
{
Vec4d *imgrow = img.ptr<Vec4d>(y);
for (int x = 0; x < img.cols; x++)
{
maskRow[x] = (imgrow[x] == imgrow[x]) * 255;
}
}
}
return mask;
}
template <typename VT> template <typename VT>
static Mat_<typename VT::value_type> normalsErrorT(Mat_<VT> srcNormals, Mat_<VT> dstNormals) static Mat_<typename VT::value_type> normalsErrorT(Mat_<VT> srcNormals, Mat_<VT> dstNormals)
{ {
@ -725,8 +705,9 @@ void regressionVolPoseRot()
split(uptsRot, ptsRotCh); split(uptsRot, ptsRotCh);
Mat maskPts0 = ptsCh[2] > 0; Mat maskPts0 = ptsCh[2] > 0;
Mat maskPtsRot = ptsRotCh[2] > 0; Mat maskPtsRot = ptsRotCh[2] > 0;
Mat maskNrm0 = nanMask(mnrm); Mat maskNrm0, maskNrmRot;
Mat maskNrmRot = nanMask(mnrmRot); finiteMask(mnrm, maskNrm0);
finiteMask(mnrmRot, maskNrmRot);
Mat maskPtsDiff, maskNrmDiff; Mat maskPtsDiff, maskNrmDiff;
cv::bitwise_xor(maskPts0, maskPtsRot, maskPtsDiff); cv::bitwise_xor(maskPts0, maskPtsRot, maskPtsDiff);
cv::bitwise_xor(maskNrm0, maskNrmRot, maskNrmDiff); cv::bitwise_xor(maskNrm0, maskNrmRot, maskNrmDiff);

@ -10,6 +10,7 @@ ocv_add_dispatched_file(has_non_zero SSE2 AVX2)
ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD) ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD)
ocv_add_dispatched_file(mean SSE2 AVX2) ocv_add_dispatched_file(mean SSE2 AVX2)
ocv_add_dispatched_file(merge SSE2 AVX2) ocv_add_dispatched_file(merge SSE2 AVX2)
ocv_add_dispatched_file(nan_mask SSE2 AVX2)
ocv_add_dispatched_file(split SSE2 AVX2) ocv_add_dispatched_file(split SSE2 AVX2)
ocv_add_dispatched_file(sum SSE2 AVX2) ocv_add_dispatched_file(sum SSE2 AVX2)

@ -1697,12 +1697,21 @@ elements.
CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0, CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0,
double minVal = -DBL_MAX, double maxVal = DBL_MAX); double minVal = -DBL_MAX, double maxVal = DBL_MAX);
/** @brief converts NaNs to the given number /** @brief Replaces NaNs by given number
@param a input/output matrix (CV_32F type). @param a input/output matrix (CV_32F or CV_64F type)
@param val value to convert the NaNs @param val value to convert the NaNs
*/ */
CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0); CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0);
/** @brief Generates a mask of finite float values, i.e. not NaNs nor Infs.
An element is set to to 255 (all 1-bits) if all channels are finite.
@param src Input matrix, should contain float or double elements of 1 to 4 channels
@param mask Output matrix of the same size as input of type CV_8UC1
*/
CV_EXPORTS_W void finiteMask(InputArray src, OutputArray mask);
/** @brief Performs generalized matrix multiplication. /** @brief Performs generalized matrix multiplication.
The function cv::gemm performs generalized matrix multiplication similar to the The function cv::gemm performs generalized matrix multiplication similar to the

@ -41,6 +41,7 @@
#include "../perf_precomp.hpp" #include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp" #include "opencv2/ts/ocl_perf.hpp"
#include "opencv2/core/softfloat.hpp"
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
@ -1038,14 +1039,40 @@ OCL_PERF_TEST_P(ConvertScaleAbsFixture, ConvertScaleAbs,
///////////// PatchNaNs //////////////////////// ///////////// PatchNaNs ////////////////////////
template<typename _Tp>
_Tp randomNan(RNG& rng);
template<>
float randomNan(RNG& rng)
{
uint32_t r = rng.next();
Cv32suf v;
v.u = r;
// exp & set a bit to avoid zero mantissa
v.u = v.u | 0x7f800001;
return v.f;
}
template<>
double randomNan(RNG& rng)
{
uint32_t r0 = rng.next();
uint32_t r1 = rng.next();
Cv64suf v;
v.u = (uint64_t(r0) << 32) | uint64_t(r1);
// exp &set a bit to avoid zero mantissa
v.u = v.u | 0x7ff0000000000001;
return v.f;
}
typedef Size_MatType PatchNaNsFixture; typedef Size_MatType PatchNaNsFixture;
OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs, OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs,
::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4))) ::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC3, CV_64FC4)))
{ {
const Size_MatType_t params = GetParam(); const Size_MatType_t params = GetParam();
Size srcSize = get<0>(params); Size srcSize = get<0>(params);
const int type = get<1>(params), cn = CV_MAT_CN(type); const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
checkDeviceMaxMemoryAllocSize(srcSize, type); checkDeviceMaxMemoryAllocSize(srcSize, type);
@ -1056,11 +1083,22 @@ OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs,
{ {
Mat src_ = src.getMat(ACCESS_RW); Mat src_ = src.getMat(ACCESS_RW);
srcSize.width *= cn; srcSize.width *= cn;
RNG& rng = theRNG();
for (int y = 0; y < srcSize.height; ++y) for (int y = 0; y < srcSize.height; ++y)
{ {
float * const ptr = src_.ptr<float>(y); float *const ptrf = src_.ptr<float>(y);
double *const ptrd = src_.ptr<double>(y);
for (int x = 0; x < srcSize.width; ++x) for (int x = 0; x < srcSize.width; ++x)
ptr[x] = (x + y) % 2 == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x]; {
if (depth == CV_32F)
{
ptrf[x] = (x + y) % 2 == 0 ? randomNan<float >(rng) : ptrf[x];
}
else if (depth == CV_64F)
{
ptrd[x] = (x + y) % 2 == 0 ? randomNan<double>(rng) : ptrd[x];
}
}
} }
} }
@ -1069,6 +1107,57 @@ OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs,
SANITY_CHECK(src); SANITY_CHECK(src);
} }
////////////// finiteMask ////////////////////////
typedef Size_MatType FiniteMaskFixture;
OCL_PERF_TEST_P(FiniteMaskFixture, FiniteMask,
::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC3, CV_64FC4)))
{
const Size_MatType_t params = GetParam();
Size srcSize = get<0>(params);
const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
UMat mask(srcSize, CV_8UC1);
declare.in(src, WARMUP_RNG).out(mask);
// generating NaNs
{
Mat src_ = src.getMat(ACCESS_RW);
srcSize.width *= cn;
const softfloat fpinf = softfloat ::inf();
const softfloat fninf = softfloat ::inf().setSign(true);
const softdouble dpinf = softdouble::inf();
const softdouble dninf = softdouble::inf().setSign(true);
RNG& rng = theRNG();
for (int y = 0; y < srcSize.height; ++y)
{
float *const ptrf = src_.ptr<float>(y);
double *const ptrd = src_.ptr<double>(y);
for (int x = 0; x < srcSize.width; ++x)
{
int rem = (x + y) % 10;
if (depth == CV_32F)
{
ptrf[x] = rem < 4 ? randomNan<float >(rng) :
rem == 5 ? (float )((x + y)%2 ? fpinf : fninf) : ptrf[x];
}
else if (depth == CV_64F)
{
ptrd[x] = rem < 4 ? randomNan<double>(rng) :
rem == 5 ? (double)((x + y)%2 ? dpinf : dninf) : ptrd[x];
}
}
}
}
OCL_TEST_CYCLE() cv::finiteMask(src, mask);
SANITY_CHECK(mask);
}
///////////// ScaleAdd //////////////////////// ///////////// ScaleAdd ////////////////////////

@ -1,4 +1,5 @@
#include "perf_precomp.hpp" #include "perf_precomp.hpp"
#include "opencv2/core/softfloat.hpp"
#include <numeric> #include <numeric>
namespace opencv_test namespace opencv_test
@ -451,4 +452,135 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest,
) )
); );
///////////// PatchNaNs ////////////////////////
template<typename _Tp>
_Tp randomNan(RNG& rng);
template<>
float randomNan(RNG& rng)
{
uint32_t r = rng.next();
Cv32suf v;
v.u = r;
// exp & set a bit to avoid zero mantissa
v.u = v.u | 0x7f800001;
return v.f;
}
template<>
double randomNan(RNG& rng)
{
uint32_t r0 = rng.next();
uint32_t r1 = rng.next();
Cv64suf v;
v.u = (uint64_t(r0) << 32) | uint64_t(r1);
// exp &set a bit to avoid zero mantissa
v.u = v.u | 0x7ff0000000000001;
return v.f;
}
typedef Size_MatType PatchNaNsFixture;
PERF_TEST_P_(PatchNaNsFixture, PatchNaNs)
{
const Size_MatType_t params = GetParam();
Size srcSize = get<0>(params);
const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
Mat src(srcSize, type);
declare.in(src, WARMUP_RNG).out(src);
// generating NaNs
{
srcSize.width *= cn;
RNG& rng = theRNG();
for (int y = 0; y < srcSize.height; ++y)
{
float *const ptrf = src.ptr<float>(y);
double *const ptrd = src.ptr<double>(y);
for (int x = 0; x < srcSize.width; ++x)
{
if (depth == CV_32F)
{
ptrf[x] = (x + y) % 2 == 0 ? randomNan<float >(rng) : ptrf[x];
}
else if (depth == CV_64F)
{
ptrd[x] = (x + y) % 2 == 0 ? randomNan<double>(rng) : ptrd[x];
}
}
}
}
TEST_CYCLE() cv::patchNaNs(src, 17.7);
SANITY_CHECK(src);
}
INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture,
testing::Combine(
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4)
)
);
////////////// finiteMask ////////////////////////
typedef Size_MatType FiniteMaskFixture;
PERF_TEST_P_(FiniteMaskFixture, FiniteMask)
{
const Size_MatType_t params = GetParam();
Size srcSize = get<0>(params);
const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
Mat src(srcSize, type);
Mat mask(srcSize, CV_8UC1);
declare.in(src, WARMUP_RNG).out(mask);
// generating NaNs
{
srcSize.width *= cn;
const softfloat fpinf = softfloat ::inf();
const softfloat fninf = softfloat ::inf().setSign(true);
const softdouble dpinf = softdouble::inf();
const softdouble dninf = softdouble::inf().setSign(true);
RNG& rng = theRNG();
for (int y = 0; y < srcSize.height; ++y)
{
float *const ptrf = src.ptr<float>(y);
double *const ptrd = src.ptr<double>(y);
for (int x = 0; x < srcSize.width; ++x)
{
int rem = (x + y) % 10;
if (depth == CV_32F)
{
ptrf[x] = rem < 4 ? randomNan<float >(rng) :
rem == 5 ? (float )((x + y)%2 ? fpinf : fninf) : ptrf[x];
}
else if (depth == CV_64F)
{
ptrd[x] = rem < 4 ? randomNan<double>(rng) :
rem == 5 ? (double)((x + y)%2 ? dpinf : dninf) : ptrd[x];
}
}
}
}
TEST_CYCLE() cv::finiteMask(src, mask);
SANITY_CHECK(mask);
}
INSTANTIATE_TEST_CASE_P(/*nothing*/ , FiniteMaskFixture,
testing::Combine(
testing::Values(szVGA, sz720p, sz1080p, sz2160p),
testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4)
)
);
} // namespace } // namespace

@ -1574,75 +1574,7 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
return true; return true;
} }
#ifdef HAVE_OPENCL } // namespace cv
static bool ocl_patchNaNs( InputOutputArray _a, float value )
{
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D DEPTH_dst=%d -D rowsPerWI=%d",
CV_32F, rowsPerWI));
if (k.empty())
return false;
UMat a = _a.getUMat();
int cn = a.channels();
k.args(ocl::KernelArg::ReadOnlyNoSize(a),
ocl::KernelArg::WriteOnly(a, cn), (float)value);
size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
void patchNaNs( InputOutputArray _a, double _val )
{
CV_INSTRUMENT_REGION();
CV_Assert( _a.depth() == CV_32F );
CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
ocl_patchNaNs(_a, (float)_val))
Mat a = _a.getMat();
const Mat* arrays[] = {&a, 0};
int* ptrs[1] = {};
NAryMatIterator it(arrays, (uchar**)ptrs);
size_t len = it.size*a.channels();
Cv32suf val;
val.f = (float)_val;
#if (CV_SIMD || CV_SIMD_SCALABLE)
v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000);
v_int32 v_val = vx_setall_s32(val.i);
#endif
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
int* tptr = ptrs[0];
size_t j = 0;
#if (CV_SIMD || CV_SIMD_SCALABLE)
size_t cWidth = (size_t)VTraits<v_int32>::vlanes();
for ( ; j + cWidth <= len; j += cWidth)
{
v_int32 v_src = vx_load(tptr + j);
v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1));
v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src);
v_store(tptr + j, v_dst);
}
vx_cleanup();
#endif
for( ; j < len; j++ )
if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
tptr[j] = val.i;
}
}
}
#ifndef OPENCV_EXCLUDE_C_API #ifndef OPENCV_EXCLUDE_C_API

@ -0,0 +1,152 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "nan_mask.simd.hpp"
#include "nan_mask.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace cv {
#ifdef HAVE_OPENCL
static bool ocl_patchNaNs( InputOutputArray _a, double value )
{
int ftype = _a.depth();
const ocl::Device d = ocl::Device::getDefault();
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && ftype == CV_64F)
{
return false;
}
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d %s",
ftype == CV_64F ? "double" : "float", ftype, rowsPerWI,
doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
UMat a = _a.getUMat();
int cn = a.channels();
// to pass float or double to args
if (ftype == CV_32F)
{
k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), (float)value);
}
else // CV_64F
{
k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), value);
}
size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
static PatchNanFunc getPatchNanFunc(bool isDouble)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(getPatchNanFunc, (isDouble), CV_CPU_DISPATCH_MODES_ALL);
}
void patchNaNs( InputOutputArray _a, double _val )
{
CV_INSTRUMENT_REGION();
CV_Assert( _a.depth() == CV_32F || _a.depth() == CV_64F);
CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
ocl_patchNaNs(_a, _val))
Mat a = _a.getMat();
const Mat* arrays[] = {&a, 0};
uchar* ptrs[1] = {};
NAryMatIterator it(arrays, ptrs);
size_t len = it.size*a.channels();
PatchNanFunc func = getPatchNanFunc(_a.depth() == CV_64F);
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
func(ptrs[0], len, _val);
}
}
#ifdef HAVE_OPENCL
static bool ocl_finiteMask(const UMat img, UMat mask)
{
int channels = img.channels();
int depth = img.depth();
const ocl::Device d = ocl::Device::getDefault();
bool doubleSupport = d.doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F)
{
return false;
}
int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("finiteMask", ocl::core::finitemask_oclsrc,
format("-D srcT=%s -D cn=%d -D rowsPerWI=%d %s",
depth == CV_32F ? "float" : "double", channels, rowsPerWI,
doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::ReadOnlyNoSize(img), ocl::KernelArg::WriteOnly(mask));
size_t globalsize[2] = { (size_t)img.cols, ((size_t)img.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
static FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(getFiniteMaskFunc, (isDouble, cn), CV_CPU_DISPATCH_MODES_ALL);
}
void finiteMask(InputArray _src, OutputArray _mask)
{
CV_INSTRUMENT_REGION();
int channels = _src.channels();
int depth = _src.depth();
CV_Assert( channels > 0 && channels <= 4);
CV_Assert( depth == CV_32F || depth == CV_64F );
std::vector<int> vsz(_src.dims());
_src.sizend(vsz.data());
_mask.create(_src.dims(), vsz.data(), CV_8UC1);
CV_OCL_RUN(_src.isUMat() && _mask.isUMat() && _src.dims() <= 2,
ocl_finiteMask(_src.getUMat(), _mask.getUMat()));
Mat src = _src.getMat();
Mat mask = _mask.getMat();
const Mat *arrays[]={&src, &mask, 0};
Mat planes[2];
NAryMatIterator it(arrays, planes);
size_t total = planes[0].total();
size_t i, nplanes = it.nplanes;
FiniteMaskFunc func = getFiniteMaskFunc((depth == CV_64F), channels);
for( i = 0; i < nplanes; i++, ++it )
{
const uchar* sptr = planes[0].ptr();
uchar* dptr = planes[1].ptr();
func(sptr, dptr, total);
}
}
} //namespace cv

@ -0,0 +1,440 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
namespace cv {
typedef void (*PatchNanFunc)(uchar* tptr, size_t len, double newVal);
typedef void (*FiniteMaskFunc)(const uchar *src, uchar *dst, size_t total);
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
PatchNanFunc getPatchNanFunc(bool isDouble);
FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
static void patchNaNs_32f(uchar* ptr, size_t ulen, double newVal)
{
CV_INSTRUMENT_REGION();
int32_t* tptr = (int32_t*)ptr;
Cv32suf val;
val.f = (float)newVal;
int j = 0;
int len = (int)ulen;
#if (CV_SIMD || CV_SIMD_SCALABLE)
v_int32 v_pos_mask = vx_setall_s32(0x7fffffff), v_exp_mask = vx_setall_s32(0x7f800000);
v_int32 v_val = vx_setall_s32(val.i);
int cWidth = VTraits<v_int32>::vlanes();
for (; j < len - cWidth*2 + 1; j += cWidth*2)
{
v_int32 v_src0 = vx_load(tptr + j);
v_int32 v_src1 = vx_load(tptr + j + cWidth);
v_int32 v_cmp_mask0 = v_lt(v_exp_mask, v_and(v_src0, v_pos_mask));
v_int32 v_cmp_mask1 = v_lt(v_exp_mask, v_and(v_src1, v_pos_mask));
if (v_check_any(v_or(v_cmp_mask0, v_cmp_mask1)))
{
v_int32 v_dst0 = v_select(v_cmp_mask0, v_val, v_src0);
v_int32 v_dst1 = v_select(v_cmp_mask1, v_val, v_src1);
v_store(tptr + j, v_dst0);
v_store(tptr + j + cWidth, v_dst1);
}
}
#endif
for (; j < len; j++)
{
if ((tptr[j] & 0x7fffffff) > 0x7f800000)
{
tptr[j] = val.i;
}
}
}
static void patchNaNs_64f(uchar* ptr, size_t ulen, double newVal)
{
CV_INSTRUMENT_REGION();
int64_t* tptr = (int64_t*)ptr;
Cv64suf val;
val.f = newVal;
int j = 0;
int len = (int)ulen;
#if (CV_SIMD || CV_SIMD_SCALABLE)
v_int64 v_exp_mask = vx_setall_s64(0x7FF0000000000000);
v_int64 v_pos_mask = vx_setall_s64(0x7FFFFFFFFFFFFFFF);
v_int64 v_val = vx_setall_s64(val.i);
int cWidth = VTraits<v_int64>::vlanes();
for (; j < len - cWidth * 2 + 1; j += cWidth*2)
{
v_int64 v_src0 = vx_load(tptr + j);
v_int64 v_src1 = vx_load(tptr + j + cWidth);
v_int64 v_cmp_mask0 = v_lt(v_exp_mask, v_and(v_src0, v_pos_mask));
v_int64 v_cmp_mask1 = v_lt(v_exp_mask, v_and(v_src1, v_pos_mask));
if (v_check_any(v_cmp_mask0) || v_check_any(v_cmp_mask1))
{
// v_select is not available for v_int64, emulating it
v_int32 val32 = v_reinterpret_as_s32(v_val);
v_int64 v_dst0 = v_reinterpret_as_s64(v_select(v_reinterpret_as_s32(v_cmp_mask0), val32, v_reinterpret_as_s32(v_src0)));
v_int64 v_dst1 = v_reinterpret_as_s64(v_select(v_reinterpret_as_s32(v_cmp_mask1), val32, v_reinterpret_as_s32(v_src1)));
v_store(tptr + j, v_dst0);
v_store(tptr + j + cWidth, v_dst1);
}
}
#endif
for (; j < len; j++)
if ((tptr[j] & 0x7FFFFFFFFFFFFFFF) > 0x7FF0000000000000)
tptr[j] = val.i;
}
PatchNanFunc getPatchNanFunc(bool isDouble)
{
return isDouble ? (PatchNanFunc)GET_OPTIMIZED(patchNaNs_64f)
: (PatchNanFunc)GET_OPTIMIZED(patchNaNs_32f);
}
////// finiteMask //////
#if (CV_SIMD || CV_SIMD_SCALABLE)
template <typename _Tp, int cn>
int finiteMaskSIMD_(const _Tp *src, uchar *dst, size_t total);
template <>
int finiteMaskSIMD_<float, 1>(const float *fsrc, uchar *dst, size_t utotal)
{
const uint32_t* src = (const uint32_t*)fsrc;
const int osize = VTraits<v_uint8>::vlanes();
v_uint32 vmaskExp = vx_setall_u32(0x7f800000);
int i = 0;
int total = (int)utotal;
for(; i < total - osize + 1; i += osize )
{
v_uint32 vv0, vv1, vv2, vv3;
vv0 = v_ne(v_and(vx_load(src + i ), vmaskExp), vmaskExp);
vv1 = v_ne(v_and(vx_load(src + i + (osize/4)), vmaskExp), vmaskExp);
vv2 = v_ne(v_and(vx_load(src + i + 2*(osize/4)), vmaskExp), vmaskExp);
vv3 = v_ne(v_and(vx_load(src + i + 3*(osize/4)), vmaskExp), vmaskExp);
v_store(dst + i, v_pack_b(vv0, vv1, vv2, vv3));
}
return i;
}
template <>
int finiteMaskSIMD_<float, 2>(const float *fsrc, uchar *dst, size_t utotal)
{
const uint32_t* src = (const uint32_t*)fsrc;
const int size8 = VTraits<v_uint8>::vlanes();
v_uint32 vmaskExp = vx_setall_u32(0x7f800000);
v_uint16 vmaskBoth = vx_setall_u16(0xffff);
int i = 0;
int total = (int)utotal;
for(; i < total - (size8 / 2) + 1; i += (size8 / 2) )
{
v_uint32 vv0, vv1, vv2, vv3;
vv0 = v_ne(v_and(vx_load(src + i*2 ), vmaskExp), vmaskExp);
vv1 = v_ne(v_and(vx_load(src + i*2 + (size8 / 4)), vmaskExp), vmaskExp);
vv2 = v_ne(v_and(vx_load(src + i*2 + 2*(size8 / 4)), vmaskExp), vmaskExp);
vv3 = v_ne(v_and(vx_load(src + i*2 + 3*(size8 / 4)), vmaskExp), vmaskExp);
v_uint8 velems = v_pack_b(vv0, vv1, vv2, vv3);
v_uint16 vfinite = v_eq(v_reinterpret_as_u16(velems), vmaskBoth);
// 2nd argument in vfinite is useless
v_store_low(dst + i, v_pack(vfinite, vfinite));
}
return i;
}
template <>
int finiteMaskSIMD_<float, 3>(const float *fsrc, uchar *dst, size_t utotal)
{
const uint32_t* src = (const uint32_t*)fsrc;
const int npixels = VTraits<v_float32>::vlanes();
v_uint32 vmaskExp = vx_setall_u32(0x7f800000);
v_uint32 z = vx_setzero_u32();
int i = 0;
int total = (int)utotal;
for (; i < total - npixels + 1; i += npixels)
{
v_uint32 vv0, vv1, vv2;
vv0 = v_ne(v_and(vx_load(src + i*3 ), vmaskExp), vmaskExp);
vv1 = v_ne(v_and(vx_load(src + i*3 + npixels), vmaskExp), vmaskExp);
vv2 = v_ne(v_and(vx_load(src + i*3 + 2*npixels), vmaskExp), vmaskExp);
v_uint8 velems = v_pack_b(vv0, vv1, vv2, z);
// 2nd arg is useless
v_uint8 vsh1 = v_extract<1>(velems, velems);
v_uint8 vsh2 = v_extract<2>(velems, velems);
v_uint8 vres3 = v_and(v_and(velems, vsh1), vsh2);
for (int j = 0; j < npixels; j++)
{
dst[i + j] = v_get0(vres3);
// 2nd arg is useless
vres3 = v_extract<3>(vres3, vres3);
}
}
return i;
}
template <>
int finiteMaskSIMD_<float, 4>(const float *fsrc, uchar *dst, size_t utotal)
{
const uint32_t* src = (const uint32_t*)fsrc;
const int npixels = VTraits<v_uint8>::vlanes() / 2;
const int nfloats = VTraits<v_uint32>::vlanes();
const v_uint32 vMaskExp = vx_setall_u32(0x7f800000);
v_uint32 vmaskAll4 = vx_setall_u32(0xFFFFFFFF);
int i = 0;
int total = (int)utotal;
for(; i < total - npixels + 1; i += npixels )
{
v_uint32 v0 = vx_load(src + i * 4 + 0*nfloats);
v_uint32 v1 = vx_load(src + i * 4 + 1*nfloats);
v_uint32 v2 = vx_load(src + i * 4 + 2*nfloats);
v_uint32 v3 = vx_load(src + i * 4 + 3*nfloats);
v_uint32 v4 = vx_load(src + i * 4 + 4*nfloats);
v_uint32 v5 = vx_load(src + i * 4 + 5*nfloats);
v_uint32 v6 = vx_load(src + i * 4 + 6*nfloats);
v_uint32 v7 = vx_load(src + i * 4 + 7*nfloats);
v_uint32 vmask0 = v_ne(v_and(v0, vMaskExp), vMaskExp);
v_uint32 vmask1 = v_ne(v_and(v1, vMaskExp), vMaskExp);
v_uint32 vmask2 = v_ne(v_and(v2, vMaskExp), vMaskExp);
v_uint32 vmask3 = v_ne(v_and(v3, vMaskExp), vMaskExp);
v_uint32 vmask4 = v_ne(v_and(v4, vMaskExp), vMaskExp);
v_uint32 vmask5 = v_ne(v_and(v5, vMaskExp), vMaskExp);
v_uint32 vmask6 = v_ne(v_and(v6, vMaskExp), vMaskExp);
v_uint32 vmask7 = v_ne(v_and(v7, vMaskExp), vMaskExp);
v_uint8 velems0 = v_pack_b(vmask0, vmask1, vmask2, vmask3);
v_uint8 velems1 = v_pack_b(vmask4, vmask5, vmask6, vmask7);
v_uint32 vresWide0 = v_eq(v_reinterpret_as_u32(velems0), vmaskAll4);
v_uint32 vresWide1 = v_eq(v_reinterpret_as_u32(velems1), vmaskAll4);
// last 2 args are useless
v_uint8 vres = v_pack_b(vresWide0, vresWide1, vresWide0, vresWide1);
v_store_low(dst + i, vres);
}
return i;
}
template <>
int finiteMaskSIMD_<double, 1>(const double *dsrc, uchar *dst, size_t utotal)
{
const uint64_t* src = (const uint64_t*)dsrc;
const int size8 = VTraits<v_uint8>::vlanes();
int i = 0;
int total = (int)utotal;
v_uint64 vmaskExp = vx_setall_u64(0x7ff0000000000000);
v_uint64 z = vx_setzero_u64();
for(; i < total - (size8 / 2) + 1; i += (size8 / 2) )
{
v_uint64 vv0, vv1, vv2, vv3;
vv0 = v_ne(v_and(vx_load(src + i ), vmaskExp), vmaskExp);
vv1 = v_ne(v_and(vx_load(src + i + (size8 / 8)), vmaskExp), vmaskExp);
vv2 = v_ne(v_and(vx_load(src + i + 2*(size8 / 8)), vmaskExp), vmaskExp);
vv3 = v_ne(v_and(vx_load(src + i + 3*(size8 / 8)), vmaskExp), vmaskExp);
v_uint8 v = v_pack_b(vv0, vv1, vv2, vv3, z, z, z, z);
v_store_low(dst + i, v);
}
return i;
}
template <>
int finiteMaskSIMD_<double, 2>(const double *dsrc, uchar *dst, size_t utotal)
{
const uint64_t* src = (const uint64_t*)dsrc;
const int npixels = VTraits<v_uint8>::vlanes() / 2;
const int ndoubles = VTraits<v_uint64>::vlanes();
v_uint64 vmaskExp = vx_setall_u64(0x7ff0000000000000);
v_uint16 vmaskBoth = vx_setall_u16(0xffff);
int i = 0;
int total = (int)utotal;
for(; i < total - npixels + 1; i += npixels )
{
v_uint64 vv0 = v_ne(v_and(vx_load(src + i*2 + 0*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv1 = v_ne(v_and(vx_load(src + i*2 + 1*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv2 = v_ne(v_and(vx_load(src + i*2 + 2*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv3 = v_ne(v_and(vx_load(src + i*2 + 3*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv4 = v_ne(v_and(vx_load(src + i*2 + 4*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv5 = v_ne(v_and(vx_load(src + i*2 + 5*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv6 = v_ne(v_and(vx_load(src + i*2 + 6*ndoubles), vmaskExp), vmaskExp);
v_uint64 vv7 = v_ne(v_and(vx_load(src + i*2 + 7*ndoubles), vmaskExp), vmaskExp);
v_uint8 velems0 = v_pack_b(vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7);
v_uint16 vfinite0 = v_eq(v_reinterpret_as_u16(velems0), vmaskBoth);
// 2nd arg is useless
v_uint8 vres = v_pack(vfinite0, vfinite0);
v_store_low(dst + i, vres);
}
return i;
}
template <>
int finiteMaskSIMD_<double, 3>(const double *dsrc, uchar *dst, size_t utotal)
{
//TODO: vectorize it properly
const uint64_t* src = (const uint64_t*)dsrc;
const int npixels = VTraits<v_uint8>::vlanes() / 2;
uint64_t maskExp = 0x7ff0000000000000;
int i = 0;
int total = (int)utotal;
for(; i < total - npixels + 1; i += npixels )
{
for (int j = 0; j < npixels; j++)
{
uint64_t val0 = src[i * 3 + j * 3 + 0];
uint64_t val1 = src[i * 3 + j * 3 + 1];
uint64_t val2 = src[i * 3 + j * 3 + 2];
bool finite = ((val0 & maskExp) != maskExp) &&
((val1 & maskExp) != maskExp) &&
((val2 & maskExp) != maskExp);
dst[i + j] = finite ? 255 : 0;
}
}
return i;
}
template <>
int finiteMaskSIMD_<double, 4>(const double *dsrc, uchar *dst, size_t utotal)
{
//TODO: vectorize it properly
uint64_t* src = (uint64_t*)dsrc;
const int npixels = VTraits<v_uint8>::vlanes() / 2;
const int ndoubles = VTraits<v_uint64>::vlanes();
v_uint16 vmaskExp16 = vx_setall_u16(0x7ff0);
v_uint32 z = vx_setzero_u32();
int i = 0;
int total = (int)utotal;
for(; i < total - npixels + 1; i += npixels )
{
v_uint16 vexpb0, vexpb1, vexpb2, vexpb3, vexpb4, vexpb5, vexpb6, vexpb7;
v_uint16 dummy;
v_load_deinterleave((uint16_t*)(src + 0*4*ndoubles), dummy, dummy, dummy, vexpb0);
v_load_deinterleave((uint16_t*)(src + 1*4*ndoubles), dummy, dummy, dummy, vexpb1);
v_load_deinterleave((uint16_t*)(src + 2*4*ndoubles), dummy, dummy, dummy, vexpb2);
v_load_deinterleave((uint16_t*)(src + 3*4*ndoubles), dummy, dummy, dummy, vexpb3);
v_uint16 vcmp0 = v_eq(v_and(vexpb0, vmaskExp16), vmaskExp16);
v_uint16 vcmp1 = v_eq(v_and(vexpb1, vmaskExp16), vmaskExp16);
v_uint16 vcmp2 = v_eq(v_and(vexpb2, vmaskExp16), vmaskExp16);
v_uint16 vcmp3 = v_eq(v_and(vexpb3, vmaskExp16), vmaskExp16);
v_uint8 velems0 = v_pack(vcmp0, vcmp1);
v_uint8 velems1 = v_pack(vcmp2, vcmp3);
v_uint32 vResWide0 = v_eq(v_reinterpret_as_u32(velems0), z);
v_uint32 vResWide1 = v_eq(v_reinterpret_as_u32(velems1), z);
v_uint16 vp16 = v_pack(vResWide0, vResWide1);
// 2nd arg is useless
v_uint8 vres = v_pack(vp16, vp16);
v_store_low(dst, vres);
src += npixels * 4;
dst += npixels;
}
return i;
}
#endif
template <typename _Tp, int cn>
void finiteMask_(const uchar *src, uchar *dst, size_t total)
{
CV_INSTRUMENT_REGION();
size_t i = 0;
const _Tp* tsrc = (const _Tp*) src;
#if (CV_SIMD || CV_SIMD_SCALABLE)
i = finiteMaskSIMD_<_Tp, cn>(tsrc, dst, total);
#endif
for(; i < total; i++ )
{
bool finite = true;
for (int c = 0; c < cn; c++)
{
_Tp val = tsrc[i * cn + c];
finite = finite && !cvIsNaN(val) && !cvIsInf(val);
}
dst[i] = finite ? 255 : 0;
}
}
FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)
{
static FiniteMaskFunc tab[] =
{
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float, 1>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float, 2>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float, 3>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float, 4>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 1>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 2>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 3>)),
(FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 4>)),
};
int idx = (isDouble ? 4 : 0) + cn - 1;
return tab[idx];
}
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
} // namespace cv

@ -0,0 +1,44 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
// This kernel is compiled with the following possible defines:
// - srcT, cn: source type and number of channels per pixel
// - rowsPerWI: Intel GPU optimization
// - DOUBLE_SUPPORT: enable double support if available
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
__kernel void finiteMask(__global const uchar * srcptr, int srcstep, int srcoffset,
__global uchar * dstptr, int dststep, int dstoffset,
int rows, int cols )
{
int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI;
if (x < cols)
{
int src_index = mad24(y0, srcstep, mad24(x, (int)sizeof(srcT) * cn, srcoffset));
int dst_index = mad24(y0, dststep, x + dstoffset);
for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src_index += srcstep, dst_index += dststep)
{
bool vfinite = true;
for (int c = 0; c < cn; c++)
{
srcT val = *(__global srcT *)(srcptr + src_index + c * (int)sizeof(srcT));
vfinite = vfinite && !isnan(val) & !isinf(val);
}
*(dstptr + dst_index) = vfinite ? 255 : 0;
}
}
}

@ -1699,8 +1699,36 @@ OCL_TEST_P(ScaleAdd, Mat)
//////////////////////////////// PatchNans //////////////////////////////////////////////// //////////////////////////////// PatchNans ////////////////////////////////////////////////
PARAM_TEST_CASE(PatchNaNs, Channels, bool) template<typename _Tp>
_Tp randomNan(RNG& rng);
template<>
float randomNan(RNG& rng)
{
uint32_t r = rng.next();
Cv32suf v;
v.u = r;
// exp & set a bit to avoid zero mantissa
v.u = v.u | 0x7f800001;
return v.f;
}
template<>
double randomNan(RNG& rng)
{
uint32_t r0 = rng.next();
uint32_t r1 = rng.next();
Cv64suf v;
v.u = (uint64_t(r0) << 32) | uint64_t(r1);
// exp &set a bit to avoid zero mantissa
v.u = v.u | 0x7ff0000000000001;
return v.f;
}
PARAM_TEST_CASE(PatchNaNs, MatDepth, Channels, bool)
{ {
int ftype;
int cn; int cn;
bool use_roi; bool use_roi;
double value; double value;
@ -1709,13 +1737,14 @@ PARAM_TEST_CASE(PatchNaNs, Channels, bool)
virtual void SetUp() virtual void SetUp()
{ {
cn = GET_PARAM(0); ftype = GET_PARAM(0);
use_roi = GET_PARAM(1); cn = GET_PARAM(1);
use_roi = GET_PARAM(2);
} }
void generateTestData() void generateTestData()
{ {
const int type = CV_MAKE_TYPE(CV_32F, cn); const int type = CV_MAKE_TYPE(ftype, cn);
Size roiSize = randomSize(1, 10); Size roiSize = randomSize(1, 10);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
@ -1725,9 +1754,19 @@ PARAM_TEST_CASE(PatchNaNs, Channels, bool)
roiSize.width *= cn; roiSize.width *= cn;
for (int y = 0; y < roiSize.height; ++y) for (int y = 0; y < roiSize.height; ++y)
{ {
float * const ptr = src_roi.ptr<float>(y); float *const ptrf = src_roi.ptr<float >(y);
double *const ptrd = src_roi.ptr<double>(y);
for (int x = 0; x < roiSize.width; ++x) for (int x = 0; x < roiSize.width; ++x)
ptr[x] = randomInt(-1, 1) == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x]; {
if (ftype == CV_32F)
{
ptrf[x] = randomInt(-1, 1) == 0 ? randomNan<float >(rng) : ptrf[x];
}
else if (ftype == CV_64F)
{
ptrd[x] = randomInt(-1, 1) == 0 ? randomNan<double>(rng) : ptrd[x];
}
}
} }
value = randomDouble(-100, 100); value = randomDouble(-100, 100);
@ -1754,6 +1793,84 @@ OCL_TEST_P(PatchNaNs, Mat)
} }
} }
//////////////////////////////// finiteMask /////////////////////////////////////////////
PARAM_TEST_CASE(FiniteMask, MatDepth, Channels, bool)
{
int ftype;
int cn;
bool use_roi;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(mask);
virtual void SetUp()
{
ftype = GET_PARAM(0);
cn = GET_PARAM(1);
use_roi = GET_PARAM(2);
}
void generateTestData()
{
const int type = CV_MAKE_TYPE(ftype, cn);
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -40, 40);
randomSubMat(mask, mask_roi, roiSize, srcBorder, CV_8UC1, 5, 16);
// generating NaNs
const softfloat fpinf = softfloat ::inf();
const softfloat fninf = softfloat ::inf().setSign(true);
const softdouble dpinf = softdouble::inf();
const softdouble dninf = softdouble::inf().setSign(true);
for (int y = 0; y < roiSize.height; ++y)
{
float *const ptrf = src_roi.ptr<float >(y);
double *const ptrd = src_roi.ptr<double>(y);
for (int x = 0; x < roiSize.width * cn; ++x)
{
int rem = randomInt(0, 10);
if (ftype == CV_32F)
{
ptrf[x] = rem < 4 ? randomNan<float >(rng) :
rem == 5 ? (float )((x + y)%2 ? fpinf : fninf) : ptrf[x];
}
else if (ftype == CV_64F)
{
ptrd[x] = rem < 4 ? randomNan<double>(rng) :
rem == 5 ? (double)((x + y)%2 ? dpinf : dninf) : ptrd[x];
}
}
}
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(mask);
}
void Near()
{
OCL_EXPECT_MATS_NEAR(mask, 0);
}
};
OCL_TEST_P(FiniteMask, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::finiteMask(src_roi, mask_roi));
OCL_ON(cv::finiteMask(usrc_roi, umask_roi));
Near();
}
}
//////////////////////////////// Psnr //////////////////////////////////////////////// //////////////////////////////// Psnr ////////////////////////////////////////////////
typedef ArithmTestBase Psnr; typedef ArithmTestBase Psnr;
@ -1928,7 +2045,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHA
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, FiniteMask, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, UMatDot, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, UMatDot, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));

@ -729,6 +729,88 @@ struct InRangeOp : public BaseArithmOp
} }
}; };
namespace reference {
template<typename _Tp>
struct SoftType;
template<>
struct SoftType<float>
{
typedef softfloat type;
};
template<>
struct SoftType<double>
{
typedef softdouble type;
};
template <typename _Tp>
static void finiteMask_(const _Tp *src, uchar *dst, size_t total, int cn)
{
for(size_t i = 0; i < total; i++ )
{
bool good = true;
for (int c = 0; c < cn; c++)
{
_Tp val = src[i * cn + c];
typename SoftType<_Tp>::type sval(val);
good = good && !sval.isNaN() && !sval.isInf();
}
dst[i] = good ? 255 : 0;
}
}
static void finiteMask(const Mat& src, Mat& dst)
{
dst.create(src.dims, &src.size[0], CV_8UC1);
const Mat *arrays[]={&src, &dst, 0};
Mat planes[2];
NAryMatIterator it(arrays, planes);
size_t total = planes[0].total();
size_t i, nplanes = it.nplanes;
int depth = src.depth(), cn = src.channels();
for( i = 0; i < nplanes; i++, ++it )
{
const uchar* sptr = planes[0].ptr();
uchar* dptr = planes[1].ptr();
switch( depth )
{
case CV_32F: finiteMask_<float >((const float*)sptr, dptr, total, cn); break;
case CV_64F: finiteMask_<double>((const double*)sptr, dptr, total, cn); break;
}
}
}
}
struct FiniteMaskOp : public BaseElemWiseOp
{
FiniteMaskOp() : BaseElemWiseOp(1, 0, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::finiteMask(src[0], dst);
}
void refop(const vector<Mat>& src, Mat& dst, const Mat&)
{
reference::finiteMask(src[0], dst);
}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_FLT, 1, 4);
}
double getMaxErr(int)
{
return 0;
}
};
struct ConvertScaleOp : public BaseElemWiseOp struct ConvertScaleOp : public BaseElemWiseOp
{ {
@ -1573,6 +1655,8 @@ INSTANTIATE_TEST_CASE_P(Core_CmpS, ElemWiseTest, ::testing::Values(ElemWiseOpPtr
INSTANTIATE_TEST_CASE_P(Core_InRangeS, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new InRangeSOp))); INSTANTIATE_TEST_CASE_P(Core_InRangeS, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new InRangeSOp)));
INSTANTIATE_TEST_CASE_P(Core_InRange, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new InRangeOp))); INSTANTIATE_TEST_CASE_P(Core_InRange, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new InRangeOp)));
INSTANTIATE_TEST_CASE_P(Core_FiniteMask, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new FiniteMaskOp)));
INSTANTIATE_TEST_CASE_P(Core_Flip, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new FlipOp))); INSTANTIATE_TEST_CASE_P(Core_Flip, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new FlipOp)));
INSTANTIATE_TEST_CASE_P(Core_Transpose, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new TransposeOp))); INSTANTIATE_TEST_CASE_P(Core_Transpose, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new TransposeOp)));
INSTANTIATE_TEST_CASE_P(Core_SetIdentity, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new SetIdentityOp))); INSTANTIATE_TEST_CASE_P(Core_SetIdentity, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new SetIdentityOp)));
@ -2876,4 +2960,76 @@ TEST(Core_CartPolar, inplace)
EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
} }
// Check different values for finiteMask()
template<typename _Tp>
_Tp randomNan(RNG& rng);
template<>
float randomNan(RNG& rng)
{
uint32_t r = rng.next();
Cv32suf v;
v.u = r;
// exp & set a bit to avoid zero mantissa
v.u = v.u | 0x7f800001;
return v.f;
}
template<>
double randomNan(RNG& rng)
{
uint32_t r0 = rng.next();
uint32_t r1 = rng.next();
Cv64suf v;
v.u = (uint64_t(r0) << 32) | uint64_t(r1);
// exp &set a bit to avoid zero mantissa
v.u = v.u | 0x7ff0000000000001;
return v.f;
}
template<typename T>
Mat generateFiniteMaskData(int cn, RNG& rng)
{
typedef typename reference::SoftType<T>::type SFT;
SFT pinf = SFT::inf();
SFT ninf = SFT::inf().setSign(true);
const int len = 100;
Mat_<T> plainData(1, cn*len);
for(int i = 0; i < cn*len; i++)
{
int r = rng.uniform(0, 3);
plainData(i) = r == 0 ? T(rng.uniform(0, 2) ? pinf : ninf) :
r == 1 ? randomNan<T>(rng) : T(0);
}
return Mat(plainData).reshape(cn);
}
typedef std::tuple<int, int> FiniteMaskFixtureParams;
class FiniteMaskFixture : public ::testing::TestWithParam<FiniteMaskFixtureParams> {};
TEST_P(FiniteMaskFixture, flags)
{
auto p = GetParam();
int depth = get<0>(p);
int channels = get<1>(p);
RNG rng((uint64)ARITHM_RNG_SEED);
Mat data = (depth == CV_32F) ? generateFiniteMaskData<float >(channels, rng)
/* CV_64F */ : generateFiniteMaskData<double>(channels, rng);
Mat nans, gtNans;
cv::finiteMask(data, nans);
reference::finiteMask(data, gtNans);
EXPECT_MAT_NEAR(nans, gtNans, 0);
}
// Params are: depth, channels 1 to 4
INSTANTIATE_TEST_CASE_P(Core_FiniteMask, FiniteMaskFixture, ::testing::Combine(::testing::Values(CV_32F, CV_64F), ::testing::Range(1, 5)));
}} // namespace }} // namespace

@ -8,5 +8,6 @@
#include "opencv2/ts/ocl_test.hpp" #include "opencv2/ts/ocl_test.hpp"
#include "opencv2/core/private.hpp" #include "opencv2/core/private.hpp"
#include "opencv2/core/hal/hal.hpp" #include "opencv2/core/hal/hal.hpp"
#include "opencv2/core/softfloat.hpp"
#endif #endif

Loading…
Cancel
Save