Merge pull request #23098 from savuor:nanMask

finiteMask() and doubles for patchNaNs() #23098 Related to #22826 Connected PR in extra: [#1037@extra](https://github.com/opencv/opencv_extra/pull/1037) ### TODOs: - [ ] Vectorize `finiteMask()` for 64FC3 and 64FC4 ### Changes This PR: * adds a new function `finiteMask()` * extends `patchNaNs()` by CV_64F support * moves `patchNaNs()` and `finiteMask()` to a separate file **NOTE:** now the function is called `finiteMask()` as discussed with the OpenCV core team ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1 year ago · 53aad98a1a
parent 34f34f6227
commit 53aad98a1a
15 changed files with 1190 additions and 138 deletions
--- a/modules/3d/perf/perf_tsdf.cpp
+++ b/modules/3d/perf/perf_tsdf.cpp
@ -302,6 +302,9 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
    Mat_<Vec4b> img = image.getMat();
    Mat goods;
    finiteMask(points, goods);
    Range range(0, sz.height);
    const int nstripes = -1;
    parallel_for_(range, [&](const Range&)
@ -311,6 +314,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
                Vec4b* imgRow = img[y];
                const ptype* ptsRow = points[y];
                const ptype* nrmRow = normals[y];
                const uchar* goodRow = goods.ptr<uchar>(y);
                for (int x = 0; x < sz.width; x++)
                {
@ -319,7 +323,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
                    Vec4b color;
-                    if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z) )
+                    if ( !goodRow[x] )
                    {
                        color = Vec4b(0, 32, 0, 0);
                    }
@ -357,6 +361,11 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
    Points  points = _points.getMat();
    Colors  colors = _colors.getMat();
    Mat goods, goodp, goodc;
    finiteMask(points, goodp);
    finiteMask(colors, goodc);
    goods = goodp & goodc;
    Mat_<Vec4b> img = image.getMat();
    Range range(0, sz.height);
@ -366,18 +375,16 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
            for (int y = range.start; y < range.end; y++)
            {
                Vec4b* imgRow = img[y];
                const ptype* ptsRow = points[y];
                const ptype* clrRow = colors[y];
                const uchar* goodRow = goods.ptr<uchar>(y);
                for (int x = 0; x < sz.width; x++)
                {
                    Point3f p = fromPtype(ptsRow[x]);
                    Point3f c = fromPtype(clrRow[x]);
                    Vec4b color;
-                    if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z)
+                    if ( !goodRow[x] )
                        || cvIsNaN(c.x) || cvIsNaN(c.y) || cvIsNaN(c.z))
                    {
                        color = Vec4b(0, 32, 0, 0);
                    }
--- a/modules/3d/src/rgbd/odometry_functions.cpp
+++ b/modules/3d/src/rgbd/odometry_functions.cpp
@ -102,16 +102,8 @@ static void extendPyrMaskByPyrNormals(const std::vector<UMat>& pyramidNormals,
            UMat maski = pyramidMask[i];
            UMat normali = pyramidNormals[i];
            UMat validNormalMask;
-            // NaN check
+            finiteMask(normali, validNormalMask);
-            cv::compare(normali, normali, validNormalMask, CMP_EQ);
+            cv::bitwise_and(maski, validNormalMask, maski);
            CV_Assert(validNormalMask.type() == CV_8UC4);
            std::vector<UMat> channelMasks;
            split(validNormalMask, channelMasks);
            UMat tmpChMask;
            cv::bitwise_and(channelMasks[0], channelMasks[1], tmpChMask);
            cv::bitwise_and(channelMasks[2], tmpChMask, tmpChMask);
            cv::bitwise_and(maski, tmpChMask, maski);
        }
    }
 }
@ -727,7 +719,7 @@ void computeCorresps(const Matx33f& _K, const Mat& rt,
        {
            float ddst = depthDst_row[udst];
-            if (maskDst_row[udst] && !cvIsNaN(ddst))
+            if (maskDst_row[udst])
            {
                float transformed_ddst = static_cast<float>(ddst * (KRK_inv6_u1[udst] + KRK_inv7_v1_plus_KRK_inv8[vdst]) + ktinv.z);
--- a/modules/3d/test/test_odometry.cpp
+++ b/modules/3d/test/test_odometry.cpp
@ -16,11 +16,8 @@ void dilateFrame(Mat& image, Mat& depth)
    CV_Assert(depth.type() == CV_32FC1);
    CV_Assert(depth.size() == image.size());
-    Mat mask(image.size(), CV_8UC1, Scalar(255));
+    Mat mask;
-    for(int y = 0; y < depth.rows; y++)
+    inRange(depth, FLT_EPSILON, 10, mask);
        for(int x = 0; x < depth.cols; x++)
            if(cvIsNaN(depth.at<float>(y,x)) || depth.at<float>(y,x) > 10 || depth.at<float>(y,x) <= FLT_EPSILON)
                mask.at<uchar>(y,x) = 0;
    image.setTo(255, ~mask);
    Mat minImage;
@ -726,7 +723,8 @@ TEST(RGBD_Odometry_WarpFrame, nansAreMasked)
    ASSERT_EQ(0, rgbDiff);
-    Mat goodVals = (w.warpedDepth == w.warpedDepth);
+    Mat goodVals;
    finiteMask(w.warpedDepth, goodVals);
    double l2diff = cv::norm(w.dstDepth, w.warpedDepth, NORM_L2, goodVals);
    double lidiff = cv::norm(w.dstDepth, w.warpedDepth, NORM_INF, goodVals);
--- a/modules/3d/test/test_tsdf.cpp
+++ b/modules/3d/test/test_tsdf.cpp
@ -295,6 +295,9 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
    Points  points = _points.getMat();
    Normals normals = _normals.getMat();
    Mat goods;
    finiteMask(points, goods);
    Mat_<Vec4b> img = image.getMat();
    Range range(0, sz.height);
@ -306,6 +309,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
                Vec4b* imgRow = img[y];
                const ptype* ptsRow = points[y];
                const ptype* nrmRow = normals[y];
                const uchar* goodRow = goods.ptr<uchar>(y);
                for (int x = 0; x < sz.width; x++)
                {
@ -314,7 +318,7 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
                    Vec4b color;
-                    if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z))
+                    if (!goodRow[x])
                    {
                        color = Vec4b(0, 32, 0, 0);
                    }
@ -352,6 +356,11 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
    Points  points  = _points.getMat();
    Colors  colors  = _colors.getMat();
    Mat goods, goodc, goodp;
    finiteMask(points, goodp);
    finiteMask(colors, goodc);
    goods = goodp & goodc;
    Mat_<Vec4b> img = image.getMat();
    Range range(0, sz.height);
@ -361,18 +370,16 @@ void renderPointsNormalsColors(InputArray _points, InputArray, InputArray _color
            for (int y = range.start; y < range.end; y++)
            {
                Vec4b* imgRow = img[y];
                const ptype* ptsRow = points[y];
                const ptype* clrRow = colors[y];
                const uchar* goodRow = goods.ptr<uchar>(y);
                for (int x = 0; x < sz.width; x++)
                {
                    Point3f p = fromPtype(ptsRow[x]);
                    Point3f c = fromPtype(clrRow[x]);
                    Vec4b color;
-                    if (cvIsNaN(p.x) || cvIsNaN(p.y) || cvIsNaN(p.z)
+                    if (!goodRow[x])
                        || cvIsNaN(c.x) || cvIsNaN(c.y) || cvIsNaN(c.z))
                    {
                        color = Vec4b(0, 32, 0, 0);
                    }
@ -587,33 +594,6 @@ void boundingBoxGrowthTest(bool enableGrowth)
 }
 static Mat nanMask(Mat img)
 {
    int depth = img.depth();
    Mat mask(img.size(), CV_8U);
    for (int y = 0; y < img.rows; y++)
    {
        uchar *maskRow = mask.ptr<uchar>(y);
        if (depth == CV_32F)
        {
            Vec4f *imgrow = img.ptr<Vec4f>(y);
            for (int x = 0; x < img.cols; x++)
            {
                maskRow[x] = (imgrow[x] == imgrow[x]) * 255;
            }
        }
        else if (depth == CV_64F)
        {
            Vec4d *imgrow = img.ptr<Vec4d>(y);
            for (int x = 0; x < img.cols; x++)
            {
                maskRow[x] = (imgrow[x] == imgrow[x]) * 255;
            }
        }
    }
    return mask;
 }
 template <typename VT>
 static Mat_<typename VT::value_type> normalsErrorT(Mat_<VT> srcNormals, Mat_<VT> dstNormals)
 {
@ -725,8 +705,9 @@ void regressionVolPoseRot()
    split(uptsRot, ptsRotCh);
    Mat maskPts0 = ptsCh[2] > 0;
    Mat maskPtsRot = ptsRotCh[2] > 0;
-    Mat maskNrm0 = nanMask(mnrm);
+    Mat maskNrm0, maskNrmRot;
-    Mat maskNrmRot = nanMask(mnrmRot);
+    finiteMask(mnrm, maskNrm0);
    finiteMask(mnrmRot, maskNrmRot);
    Mat maskPtsDiff, maskNrmDiff;
    cv::bitwise_xor(maskPts0, maskPtsRot, maskPtsDiff);
    cv::bitwise_xor(maskNrm0, maskNrmRot, maskNrmDiff);
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@ -10,6 +10,7 @@ ocv_add_dispatched_file(has_non_zero SSE2 AVX2)
 ocv_add_dispatched_file(matmul SSE2 SSE4_1 AVX2 AVX512_SKX NEON_DOTPROD)
 ocv_add_dispatched_file(mean SSE2 AVX2)
 ocv_add_dispatched_file(merge SSE2 AVX2)
 ocv_add_dispatched_file(nan_mask SSE2 AVX2)
 ocv_add_dispatched_file(split SSE2 AVX2)
 ocv_add_dispatched_file(sum SSE2 AVX2)
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -1697,12 +1697,21 @@ elements.
 CV_EXPORTS_W bool checkRange(InputArray a, bool quiet = true, CV_OUT Point* pos = 0,
                            double minVal = -DBL_MAX, double maxVal = DBL_MAX);
-/** @brief converts NaNs to the given number
+/** @brief Replaces NaNs by given number
-@param a input/output matrix (CV_32F type).
+@param a input/output matrix (CV_32F or CV_64F type)
@param val value to convert the NaNs
 */
 CV_EXPORTS_W void patchNaNs(InputOutputArray a, double val = 0);
 /** @brief Generates a mask of finite float values, i.e. not NaNs nor Infs.
 An element is set to to 255 (all 1-bits) if all channels are finite.
@param src Input matrix, should contain float or double elements of 1 to 4 channels
@param mask Output matrix of the same size as input of type CV_8UC1
 */
 CV_EXPORTS_W void finiteMask(InputArray src, OutputArray mask);
 /** @brief Performs generalized matrix multiplication.
 The function cv::gemm performs generalized matrix multiplication similar to the
--- a/modules/core/perf/opencl/perf_arithm.cpp
+++ b/modules/core/perf/opencl/perf_arithm.cpp
@ -41,6 +41,7 @@
 #include "../perf_precomp.hpp"
 #include "opencv2/ts/ocl_perf.hpp"
 #include "opencv2/core/softfloat.hpp"
 #ifdef HAVE_OPENCL
@ -1038,14 +1039,40 @@ OCL_PERF_TEST_P(ConvertScaleAbsFixture, ConvertScaleAbs,
 ///////////// PatchNaNs ////////////////////////
 template<typename _Tp>
 _Tp randomNan(RNG& rng);
 template<>
 float randomNan(RNG& rng)
 {
    uint32_t r = rng.next();
    Cv32suf v;
    v.u = r;
    // exp & set a bit to avoid zero mantissa
    v.u = v.u | 0x7f800001;
    return v.f;
 }
 template<>
 double randomNan(RNG& rng)
 {
    uint32_t r0 = rng.next();
    uint32_t r1 = rng.next();
    Cv64suf v;
    v.u = (uint64_t(r0) << 32) | uint64_t(r1);
    // exp &set a bit to avoid zero mantissa
    v.u = v.u | 0x7ff0000000000001;
    return v.f;
 }
 typedef Size_MatType PatchNaNsFixture;
 OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs,
-                ::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
+                ::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC3, CV_64FC4)))
 {
    const Size_MatType_t params = GetParam();
    Size srcSize = get<0>(params);
-    const int type = get<1>(params), cn = CV_MAT_CN(type);
+    const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
    checkDeviceMaxMemoryAllocSize(srcSize, type);
@ -1056,11 +1083,22 @@ OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs,
    {
        Mat src_ = src.getMat(ACCESS_RW);
        srcSize.width *= cn;
        RNG& rng = theRNG();
        for (int y = 0; y < srcSize.height; ++y)
        {
-            float * const ptr = src_.ptr<float>(y);
+            float  *const ptrf = src_.ptr<float>(y);
            double *const ptrd = src_.ptr<double>(y);
            for (int x = 0; x < srcSize.width; ++x)
-                ptr[x] = (x + y) % 2 == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x];
+            {
                if (depth == CV_32F)
                {
                    ptrf[x] = (x + y) % 2 == 0 ? randomNan<float >(rng) : ptrf[x];
                }
                else if (depth == CV_64F)
                {
                    ptrd[x] = (x + y) % 2 == 0 ? randomNan<double>(rng) : ptrd[x];
                }
            }
        }
    }
@ -1069,6 +1107,57 @@ OCL_PERF_TEST_P(PatchNaNsFixture, PatchNaNs,
    SANITY_CHECK(src);
 }
 ////////////// finiteMask ////////////////////////
 typedef Size_MatType FiniteMaskFixture;
 OCL_PERF_TEST_P(FiniteMaskFixture, FiniteMask,
                ::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC3, CV_64FC4)))
 {
    const Size_MatType_t params = GetParam();
    Size srcSize = get<0>(params);
    const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
    checkDeviceMaxMemoryAllocSize(srcSize, type);
    UMat src(srcSize, type);
    UMat mask(srcSize, CV_8UC1);
    declare.in(src, WARMUP_RNG).out(mask);
    // generating NaNs
    {
        Mat src_ = src.getMat(ACCESS_RW);
        srcSize.width *= cn;
        const softfloat  fpinf = softfloat ::inf();
        const softfloat  fninf = softfloat ::inf().setSign(true);
        const softdouble dpinf = softdouble::inf();
        const softdouble dninf = softdouble::inf().setSign(true);
        RNG& rng = theRNG();
        for (int y = 0; y < srcSize.height; ++y)
        {
            float  *const ptrf = src_.ptr<float>(y);
            double *const ptrd = src_.ptr<double>(y);
            for (int x = 0; x < srcSize.width; ++x)
            {
                int rem = (x + y) % 10;
                if (depth == CV_32F)
                {
                    ptrf[x] = rem <  4 ? randomNan<float >(rng) :
                              rem == 5 ? (float )((x + y)%2 ? fpinf : fninf) : ptrf[x];
                }
                else if (depth == CV_64F)
                {
                    ptrd[x] = rem <  4 ? randomNan<double>(rng) :
                              rem == 5 ? (double)((x + y)%2 ? dpinf : dninf) : ptrd[x];
                }
            }
        }
    }
    OCL_TEST_CYCLE() cv::finiteMask(src, mask);
    SANITY_CHECK(mask);
 }
 ///////////// ScaleAdd ////////////////////////
--- a/modules/core/perf/perf_arithm.cpp
+++ b/modules/core/perf/perf_arithm.cpp
@ -1,4 +1,5 @@
 #include "perf_precomp.hpp"
 #include "opencv2/core/softfloat.hpp"
 #include <numeric>
 namespace opencv_test
@ -451,4 +452,135 @@ INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest,
    )
 );
 ///////////// PatchNaNs ////////////////////////
 template<typename _Tp>
 _Tp randomNan(RNG& rng);
 template<>
 float randomNan(RNG& rng)
 {
    uint32_t r = rng.next();
    Cv32suf v;
    v.u = r;
    // exp & set a bit to avoid zero mantissa
    v.u = v.u | 0x7f800001;
    return v.f;
 }
 template<>
 double randomNan(RNG& rng)
 {
    uint32_t r0 = rng.next();
    uint32_t r1 = rng.next();
    Cv64suf v;
    v.u = (uint64_t(r0) << 32) | uint64_t(r1);
    // exp &set a bit to avoid zero mantissa
    v.u = v.u | 0x7ff0000000000001;
    return v.f;
 }
 typedef Size_MatType PatchNaNsFixture;
 PERF_TEST_P_(PatchNaNsFixture, PatchNaNs)
 {
    const Size_MatType_t params = GetParam();
    Size srcSize = get<0>(params);
    const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
    Mat src(srcSize, type);
    declare.in(src, WARMUP_RNG).out(src);
    // generating NaNs
    {
        srcSize.width *= cn;
        RNG& rng = theRNG();
        for (int y = 0; y < srcSize.height; ++y)
        {
            float  *const ptrf = src.ptr<float>(y);
            double *const ptrd = src.ptr<double>(y);
            for (int x = 0; x < srcSize.width; ++x)
            {
                if (depth == CV_32F)
                {
                    ptrf[x] = (x + y) % 2 == 0 ? randomNan<float >(rng) : ptrf[x];
                }
                else if (depth == CV_64F)
                {
                    ptrd[x] = (x + y) % 2 == 0 ? randomNan<double>(rng) : ptrd[x];
                }
            }
        }
    }
    TEST_CYCLE() cv::patchNaNs(src, 17.7);
    SANITY_CHECK(src);
 }
 INSTANTIATE_TEST_CASE_P(/*nothing*/ , PatchNaNsFixture,
    testing::Combine(
        testing::Values(szVGA, sz720p, sz1080p, sz2160p),
        testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4)
    )
 );
 ////////////// finiteMask ////////////////////////
 typedef Size_MatType FiniteMaskFixture;
 PERF_TEST_P_(FiniteMaskFixture, FiniteMask)
 {
    const Size_MatType_t params = GetParam();
    Size srcSize = get<0>(params);
    const int type = get<1>(params), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
    Mat src(srcSize, type);
    Mat mask(srcSize, CV_8UC1);
    declare.in(src, WARMUP_RNG).out(mask);
    // generating NaNs
    {
        srcSize.width *= cn;
        const softfloat  fpinf = softfloat ::inf();
        const softfloat  fninf = softfloat ::inf().setSign(true);
        const softdouble dpinf = softdouble::inf();
        const softdouble dninf = softdouble::inf().setSign(true);
        RNG& rng = theRNG();
        for (int y = 0; y < srcSize.height; ++y)
        {
            float  *const ptrf = src.ptr<float>(y);
            double *const ptrd = src.ptr<double>(y);
            for (int x = 0; x < srcSize.width; ++x)
            {
                int rem = (x + y) % 10;
                if (depth == CV_32F)
                {
                    ptrf[x] = rem <  4 ? randomNan<float >(rng) :
                              rem == 5 ? (float )((x + y)%2 ? fpinf : fninf) : ptrf[x];
                }
                else if (depth == CV_64F)
                {
                    ptrd[x] = rem <  4 ? randomNan<double>(rng) :
                              rem == 5 ? (double)((x + y)%2 ? dpinf : dninf) : ptrd[x];
                }
            }
        }
    }
    TEST_CYCLE() cv::finiteMask(src, mask);
    SANITY_CHECK(mask);
 }
 INSTANTIATE_TEST_CASE_P(/*nothing*/ , FiniteMaskFixture,
    testing::Combine(
        testing::Values(szVGA, sz720p, sz1080p, sz2160p),
        testing::Values(CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4)
    )
 );
 } // namespace
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -1574,75 +1574,7 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
    return true;
 }
-#ifdef HAVE_OPENCL
+} // namespace cv
 static bool ocl_patchNaNs( InputOutputArray _a, float value )
 {
    int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
                     format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D DEPTH_dst=%d -D rowsPerWI=%d",
                            CV_32F, rowsPerWI));
    if (k.empty())
        return false;
    UMat a = _a.getUMat();
    int cn = a.channels();
    k.args(ocl::KernelArg::ReadOnlyNoSize(a),
           ocl::KernelArg::WriteOnly(a, cn), (float)value);
    size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }
 #endif
 void patchNaNs( InputOutputArray _a, double _val )
 {
    CV_INSTRUMENT_REGION();
    CV_Assert( _a.depth() == CV_32F );
    CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
               ocl_patchNaNs(_a, (float)_val))
    Mat a = _a.getMat();
    const Mat* arrays[] = {&a, 0};
    int* ptrs[1] = {};
    NAryMatIterator it(arrays, (uchar**)ptrs);
    size_t len = it.size*a.channels();
    Cv32suf val;
    val.f = (float)_val;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    v_int32 v_mask1 = vx_setall_s32(0x7fffffff), v_mask2 = vx_setall_s32(0x7f800000);
    v_int32 v_val = vx_setall_s32(val.i);
 #endif
    for( size_t i = 0; i < it.nplanes; i++, ++it )
    {
        int* tptr = ptrs[0];
        size_t j = 0;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
        size_t cWidth = (size_t)VTraits<v_int32>::vlanes();
        for ( ; j + cWidth <= len; j += cWidth)
        {
            v_int32 v_src = vx_load(tptr + j);
            v_int32 v_cmp_mask = v_lt(v_mask2, v_and(v_src, v_mask1));
            v_int32 v_dst = v_select(v_cmp_mask, v_val, v_src);
            v_store(tptr + j, v_dst);
        }
        vx_cleanup();
 #endif
        for( ; j < len; j++ )
            if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
                tptr[j] = val.i;
    }
 }
 }
 #ifndef OPENCV_EXCLUDE_C_API
--- a/modules/core/src/nan_mask.dispatch.cpp
+++ b/modules/core/src/nan_mask.dispatch.cpp
@ -0,0 +1,152 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 #include "precomp.hpp"
 #include "opencl_kernels_core.hpp"
 #include "nan_mask.simd.hpp"
 #include "nan_mask.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
 namespace cv {
 #ifdef HAVE_OPENCL
 static bool ocl_patchNaNs( InputOutputArray _a, double value )
 {
    int ftype = _a.depth();
    const ocl::Device d = ocl::Device::getDefault();
    bool doubleSupport = d.doubleFPConfig() > 0;
    if (!doubleSupport && ftype == CV_64F)
    {
        return false;
    }
    int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
                     format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d %s",
                            ftype == CV_64F ? "double" : "float", ftype, rowsPerWI,
                            doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;
    UMat a = _a.getUMat();
    int cn = a.channels();
    // to pass float or double to args
    if (ftype == CV_32F)
    {
        k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), (float)value);
    }
    else // CV_64F
    {
        k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), value);
    }
    size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }
 #endif
 static PatchNanFunc getPatchNanFunc(bool isDouble)
 {
    CV_INSTRUMENT_REGION();
    CV_CPU_DISPATCH(getPatchNanFunc, (isDouble), CV_CPU_DISPATCH_MODES_ALL);
 }
 void patchNaNs( InputOutputArray _a, double _val )
 {
    CV_INSTRUMENT_REGION();
    CV_Assert( _a.depth() == CV_32F || _a.depth() == CV_64F);
    CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
               ocl_patchNaNs(_a, _val))
    Mat a = _a.getMat();
    const Mat* arrays[] = {&a, 0};
    uchar* ptrs[1] = {};
    NAryMatIterator it(arrays, ptrs);
    size_t len = it.size*a.channels();
    PatchNanFunc func = getPatchNanFunc(_a.depth() == CV_64F);
    for (size_t i = 0; i < it.nplanes; i++, ++it)
    {
        func(ptrs[0], len, _val);
    }
 }
 #ifdef HAVE_OPENCL
 static bool ocl_finiteMask(const UMat img, UMat mask)
 {
    int channels = img.channels();
    int depth = img.depth();
    const ocl::Device d = ocl::Device::getDefault();
    bool doubleSupport = d.doubleFPConfig() > 0;
    if (!doubleSupport && depth == CV_64F)
    {
        return false;
    }
    int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
    ocl::Kernel k("finiteMask", ocl::core::finitemask_oclsrc,
                  format("-D srcT=%s -D cn=%d -D rowsPerWI=%d %s",
                         depth == CV_32F ? "float" : "double", channels, rowsPerWI,
                         doubleSupport ? "-D DOUBLE_SUPPORT" : ""));
    if (k.empty())
        return false;
    k.args(ocl::KernelArg::ReadOnlyNoSize(img), ocl::KernelArg::WriteOnly(mask));
    size_t globalsize[2] = { (size_t)img.cols, ((size_t)img.rows + rowsPerWI - 1) / rowsPerWI };
    return k.run(2, globalsize, NULL, false);
 }
 #endif
 static FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)
 {
    CV_INSTRUMENT_REGION();
    CV_CPU_DISPATCH(getFiniteMaskFunc, (isDouble, cn), CV_CPU_DISPATCH_MODES_ALL);
 }
 void finiteMask(InputArray _src, OutputArray _mask)
 {
    CV_INSTRUMENT_REGION();
    int channels = _src.channels();
    int depth = _src.depth();
    CV_Assert( channels > 0 && channels <= 4);
    CV_Assert( depth == CV_32F || depth == CV_64F );
    std::vector<int> vsz(_src.dims());
    _src.sizend(vsz.data());
    _mask.create(_src.dims(), vsz.data(), CV_8UC1);
    CV_OCL_RUN(_src.isUMat() && _mask.isUMat() && _src.dims() <= 2,
               ocl_finiteMask(_src.getUMat(), _mask.getUMat()));
    Mat src = _src.getMat();
    Mat mask = _mask.getMat();
    const Mat *arrays[]={&src, &mask, 0};
    Mat planes[2];
    NAryMatIterator it(arrays, planes);
    size_t total = planes[0].total();
    size_t i, nplanes = it.nplanes;
    FiniteMaskFunc func = getFiniteMaskFunc((depth == CV_64F), channels);
    for( i = 0; i < nplanes; i++, ++it )
    {
        const uchar* sptr = planes[0].ptr();
        uchar* dptr = planes[1].ptr();
        func(sptr, dptr, total);
    }
 }
 } //namespace cv
--- a/modules/core/src/nan_mask.simd.hpp
+++ b/modules/core/src/nan_mask.simd.hpp
@ -0,0 +1,440 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html
 #include "precomp.hpp"
 namespace cv {
 typedef void (*PatchNanFunc)(uchar* tptr, size_t len, double newVal);
 typedef void (*FiniteMaskFunc)(const uchar *src, uchar *dst, size_t total);
 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
 PatchNanFunc getPatchNanFunc(bool isDouble);
 FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn);
 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 static void patchNaNs_32f(uchar* ptr, size_t ulen, double newVal)
 {
    CV_INSTRUMENT_REGION();
    int32_t* tptr = (int32_t*)ptr;
    Cv32suf val;
    val.f = (float)newVal;
    int j = 0;
    int len = (int)ulen;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    v_int32 v_pos_mask = vx_setall_s32(0x7fffffff), v_exp_mask = vx_setall_s32(0x7f800000);
    v_int32 v_val = vx_setall_s32(val.i);
    int cWidth = VTraits<v_int32>::vlanes();
    for (; j < len - cWidth*2 + 1; j += cWidth*2)
    {
        v_int32 v_src0 = vx_load(tptr + j);
        v_int32 v_src1 = vx_load(tptr + j + cWidth);
        v_int32 v_cmp_mask0 = v_lt(v_exp_mask, v_and(v_src0, v_pos_mask));
        v_int32 v_cmp_mask1 = v_lt(v_exp_mask, v_and(v_src1, v_pos_mask));
        if (v_check_any(v_or(v_cmp_mask0, v_cmp_mask1)))
        {
            v_int32 v_dst0 = v_select(v_cmp_mask0, v_val, v_src0);
            v_int32 v_dst1 = v_select(v_cmp_mask1, v_val, v_src1);
            v_store(tptr + j, v_dst0);
            v_store(tptr + j + cWidth, v_dst1);
        }
    }
 #endif
    for (; j < len; j++)
    {
        if ((tptr[j] & 0x7fffffff) > 0x7f800000)
        {
            tptr[j] = val.i;
        }
    }
 }
 static void patchNaNs_64f(uchar* ptr, size_t ulen, double newVal)
 {
    CV_INSTRUMENT_REGION();
    int64_t* tptr = (int64_t*)ptr;
    Cv64suf val;
    val.f = newVal;
    int j = 0;
    int len = (int)ulen;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    v_int64 v_exp_mask = vx_setall_s64(0x7FF0000000000000);
    v_int64 v_pos_mask = vx_setall_s64(0x7FFFFFFFFFFFFFFF);
    v_int64 v_val = vx_setall_s64(val.i);
    int cWidth = VTraits<v_int64>::vlanes();
    for (; j < len - cWidth * 2 + 1; j += cWidth*2)
    {
        v_int64 v_src0 = vx_load(tptr + j);
        v_int64 v_src1 = vx_load(tptr + j + cWidth);
        v_int64 v_cmp_mask0 = v_lt(v_exp_mask, v_and(v_src0, v_pos_mask));
        v_int64 v_cmp_mask1 = v_lt(v_exp_mask, v_and(v_src1, v_pos_mask));
        if (v_check_any(v_cmp_mask0) || v_check_any(v_cmp_mask1))
        {
            // v_select is not available for v_int64, emulating it
            v_int32 val32 = v_reinterpret_as_s32(v_val);
            v_int64 v_dst0 = v_reinterpret_as_s64(v_select(v_reinterpret_as_s32(v_cmp_mask0), val32, v_reinterpret_as_s32(v_src0)));
            v_int64 v_dst1 = v_reinterpret_as_s64(v_select(v_reinterpret_as_s32(v_cmp_mask1), val32, v_reinterpret_as_s32(v_src1)));
            v_store(tptr + j, v_dst0);
            v_store(tptr + j + cWidth, v_dst1);
        }
    }
 #endif
    for (; j < len; j++)
        if ((tptr[j] & 0x7FFFFFFFFFFFFFFF) > 0x7FF0000000000000)
            tptr[j] = val.i;
 }
 PatchNanFunc getPatchNanFunc(bool isDouble)
 {
    return isDouble ? (PatchNanFunc)GET_OPTIMIZED(patchNaNs_64f)
                    : (PatchNanFunc)GET_OPTIMIZED(patchNaNs_32f);
 }
 ////// finiteMask //////
 #if (CV_SIMD || CV_SIMD_SCALABLE)
 template <typename _Tp, int cn>
 int finiteMaskSIMD_(const _Tp *src, uchar *dst, size_t total);
 template <>
 int finiteMaskSIMD_<float, 1>(const float *fsrc, uchar *dst, size_t utotal)
 {
    const uint32_t* src = (const uint32_t*)fsrc;
    const int osize = VTraits<v_uint8>::vlanes();
    v_uint32 vmaskExp = vx_setall_u32(0x7f800000);
    int i = 0;
    int total = (int)utotal;
    for(; i < total - osize + 1; i += osize )
    {
        v_uint32 vv0, vv1, vv2, vv3;
        vv0 = v_ne(v_and(vx_load(src + i              ), vmaskExp), vmaskExp);
        vv1 = v_ne(v_and(vx_load(src + i +   (osize/4)), vmaskExp), vmaskExp);
        vv2 = v_ne(v_and(vx_load(src + i + 2*(osize/4)), vmaskExp), vmaskExp);
        vv3 = v_ne(v_and(vx_load(src + i + 3*(osize/4)), vmaskExp), vmaskExp);
        v_store(dst + i, v_pack_b(vv0, vv1, vv2, vv3));
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<float, 2>(const float *fsrc, uchar *dst, size_t utotal)
 {
    const uint32_t* src = (const uint32_t*)fsrc;
    const int size8 = VTraits<v_uint8>::vlanes();
    v_uint32 vmaskExp = vx_setall_u32(0x7f800000);
    v_uint16 vmaskBoth = vx_setall_u16(0xffff);
    int i = 0;
    int total = (int)utotal;
    for(; i < total - (size8 / 2) + 1; i += (size8 / 2) )
    {
        v_uint32 vv0, vv1, vv2, vv3;
        vv0 = v_ne(v_and(vx_load(src + i*2                ), vmaskExp), vmaskExp);
        vv1 = v_ne(v_and(vx_load(src + i*2 +   (size8 / 4)), vmaskExp), vmaskExp);
        vv2 = v_ne(v_and(vx_load(src + i*2 + 2*(size8 / 4)), vmaskExp), vmaskExp);
        vv3 = v_ne(v_and(vx_load(src + i*2 + 3*(size8 / 4)), vmaskExp), vmaskExp);
        v_uint8 velems = v_pack_b(vv0, vv1, vv2, vv3);
        v_uint16 vfinite = v_eq(v_reinterpret_as_u16(velems), vmaskBoth);
        // 2nd argument in vfinite is useless
        v_store_low(dst + i, v_pack(vfinite, vfinite));
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<float, 3>(const float *fsrc, uchar *dst, size_t utotal)
 {
    const uint32_t* src = (const uint32_t*)fsrc;
    const int npixels = VTraits<v_float32>::vlanes();
    v_uint32 vmaskExp = vx_setall_u32(0x7f800000);
    v_uint32 z = vx_setzero_u32();
    int i = 0;
    int total = (int)utotal;
    for (; i < total - npixels + 1; i += npixels)
    {
        v_uint32 vv0, vv1, vv2;
        vv0 = v_ne(v_and(vx_load(src + i*3            ), vmaskExp), vmaskExp);
        vv1 = v_ne(v_and(vx_load(src + i*3 +   npixels), vmaskExp), vmaskExp);
        vv2 = v_ne(v_and(vx_load(src + i*3 + 2*npixels), vmaskExp), vmaskExp);
        v_uint8 velems = v_pack_b(vv0, vv1, vv2, z);
        // 2nd arg is useless
        v_uint8 vsh1 = v_extract<1>(velems, velems);
        v_uint8 vsh2 = v_extract<2>(velems, velems);
        v_uint8 vres3 = v_and(v_and(velems, vsh1), vsh2);
        for (int j = 0; j < npixels; j++)
        {
            dst[i + j] = v_get0(vres3);
            // 2nd arg is useless
            vres3 = v_extract<3>(vres3, vres3);
        }
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<float, 4>(const float *fsrc, uchar *dst, size_t utotal)
 {
    const uint32_t* src = (const uint32_t*)fsrc;
    const int npixels = VTraits<v_uint8>::vlanes() / 2;
    const int nfloats = VTraits<v_uint32>::vlanes();
    const v_uint32 vMaskExp = vx_setall_u32(0x7f800000);
    v_uint32 vmaskAll4 = vx_setall_u32(0xFFFFFFFF);
    int i = 0;
    int total = (int)utotal;
    for(; i < total - npixels + 1; i += npixels )
    {
        v_uint32 v0 = vx_load(src + i * 4 + 0*nfloats);
        v_uint32 v1 = vx_load(src + i * 4 + 1*nfloats);
        v_uint32 v2 = vx_load(src + i * 4 + 2*nfloats);
        v_uint32 v3 = vx_load(src + i * 4 + 3*nfloats);
        v_uint32 v4 = vx_load(src + i * 4 + 4*nfloats);
        v_uint32 v5 = vx_load(src + i * 4 + 5*nfloats);
        v_uint32 v6 = vx_load(src + i * 4 + 6*nfloats);
        v_uint32 v7 = vx_load(src + i * 4 + 7*nfloats);
        v_uint32 vmask0 = v_ne(v_and(v0, vMaskExp), vMaskExp);
        v_uint32 vmask1 = v_ne(v_and(v1, vMaskExp), vMaskExp);
        v_uint32 vmask2 = v_ne(v_and(v2, vMaskExp), vMaskExp);
        v_uint32 vmask3 = v_ne(v_and(v3, vMaskExp), vMaskExp);
        v_uint32 vmask4 = v_ne(v_and(v4, vMaskExp), vMaskExp);
        v_uint32 vmask5 = v_ne(v_and(v5, vMaskExp), vMaskExp);
        v_uint32 vmask6 = v_ne(v_and(v6, vMaskExp), vMaskExp);
        v_uint32 vmask7 = v_ne(v_and(v7, vMaskExp), vMaskExp);
        v_uint8 velems0 = v_pack_b(vmask0, vmask1, vmask2, vmask3);
        v_uint8 velems1 = v_pack_b(vmask4, vmask5, vmask6, vmask7);
        v_uint32 vresWide0 = v_eq(v_reinterpret_as_u32(velems0), vmaskAll4);
        v_uint32 vresWide1 = v_eq(v_reinterpret_as_u32(velems1), vmaskAll4);
        // last 2 args are useless
        v_uint8 vres = v_pack_b(vresWide0, vresWide1, vresWide0, vresWide1);
        v_store_low(dst + i, vres);
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<double, 1>(const double *dsrc, uchar *dst, size_t utotal)
 {
    const uint64_t* src = (const uint64_t*)dsrc;
    const int size8 = VTraits<v_uint8>::vlanes();
    int i = 0;
    int total = (int)utotal;
    v_uint64 vmaskExp = vx_setall_u64(0x7ff0000000000000);
    v_uint64 z = vx_setzero_u64();
    for(; i < total - (size8 / 2) + 1; i += (size8 / 2) )
    {
        v_uint64 vv0, vv1, vv2, vv3;
        vv0 = v_ne(v_and(vx_load(src + i                ), vmaskExp), vmaskExp);
        vv1 = v_ne(v_and(vx_load(src + i +   (size8 / 8)), vmaskExp), vmaskExp);
        vv2 = v_ne(v_and(vx_load(src + i + 2*(size8 / 8)), vmaskExp), vmaskExp);
        vv3 = v_ne(v_and(vx_load(src + i + 3*(size8 / 8)), vmaskExp), vmaskExp);
        v_uint8 v = v_pack_b(vv0, vv1, vv2, vv3, z, z, z, z);
        v_store_low(dst + i, v);
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<double, 2>(const double *dsrc, uchar *dst, size_t utotal)
 {
    const uint64_t* src = (const uint64_t*)dsrc;
    const int npixels = VTraits<v_uint8>::vlanes() / 2;
    const int ndoubles = VTraits<v_uint64>::vlanes();
    v_uint64 vmaskExp = vx_setall_u64(0x7ff0000000000000);
    v_uint16 vmaskBoth = vx_setall_u16(0xffff);
    int i = 0;
    int total = (int)utotal;
    for(; i < total - npixels + 1; i += npixels )
    {
        v_uint64 vv0 = v_ne(v_and(vx_load(src + i*2 + 0*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv1 = v_ne(v_and(vx_load(src + i*2 + 1*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv2 = v_ne(v_and(vx_load(src + i*2 + 2*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv3 = v_ne(v_and(vx_load(src + i*2 + 3*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv4 = v_ne(v_and(vx_load(src + i*2 + 4*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv5 = v_ne(v_and(vx_load(src + i*2 + 5*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv6 = v_ne(v_and(vx_load(src + i*2 + 6*ndoubles), vmaskExp), vmaskExp);
        v_uint64 vv7 = v_ne(v_and(vx_load(src + i*2 + 7*ndoubles), vmaskExp), vmaskExp);
        v_uint8 velems0 = v_pack_b(vv0, vv1, vv2, vv3, vv4, vv5, vv6, vv7);
        v_uint16 vfinite0 = v_eq(v_reinterpret_as_u16(velems0), vmaskBoth);
        // 2nd arg is useless
        v_uint8 vres = v_pack(vfinite0, vfinite0);
        v_store_low(dst + i, vres);
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<double, 3>(const double *dsrc, uchar *dst, size_t utotal)
 {
    //TODO: vectorize it properly
    const uint64_t* src = (const uint64_t*)dsrc;
    const int npixels = VTraits<v_uint8>::vlanes() / 2;
    uint64_t maskExp = 0x7ff0000000000000;
    int i = 0;
    int total = (int)utotal;
    for(; i < total - npixels + 1; i += npixels )
    {
        for (int j = 0; j < npixels; j++)
        {
            uint64_t val0 = src[i * 3 + j * 3 + 0];
            uint64_t val1 = src[i * 3 + j * 3 + 1];
            uint64_t val2 = src[i * 3 + j * 3 + 2];
            bool finite = ((val0 & maskExp) != maskExp) &&
                          ((val1 & maskExp) != maskExp) &&
                          ((val2 & maskExp) != maskExp);
            dst[i + j] = finite ? 255 : 0;
        }
    }
    return i;
 }
 template <>
 int finiteMaskSIMD_<double, 4>(const double *dsrc, uchar *dst, size_t utotal)
 {
    //TODO: vectorize it properly
    uint64_t* src = (uint64_t*)dsrc;
    const int npixels = VTraits<v_uint8>::vlanes() / 2;
    const int ndoubles = VTraits<v_uint64>::vlanes();
    v_uint16 vmaskExp16 = vx_setall_u16(0x7ff0);
    v_uint32 z = vx_setzero_u32();
    int i = 0;
    int total = (int)utotal;
    for(; i < total - npixels + 1; i += npixels )
    {
        v_uint16 vexpb0, vexpb1, vexpb2, vexpb3, vexpb4, vexpb5, vexpb6, vexpb7;
        v_uint16 dummy;
        v_load_deinterleave((uint16_t*)(src + 0*4*ndoubles), dummy, dummy, dummy, vexpb0);
        v_load_deinterleave((uint16_t*)(src + 1*4*ndoubles), dummy, dummy, dummy, vexpb1);
        v_load_deinterleave((uint16_t*)(src + 2*4*ndoubles), dummy, dummy, dummy, vexpb2);
        v_load_deinterleave((uint16_t*)(src + 3*4*ndoubles), dummy, dummy, dummy, vexpb3);
        v_uint16 vcmp0 = v_eq(v_and(vexpb0, vmaskExp16), vmaskExp16);
        v_uint16 vcmp1 = v_eq(v_and(vexpb1, vmaskExp16), vmaskExp16);
        v_uint16 vcmp2 = v_eq(v_and(vexpb2, vmaskExp16), vmaskExp16);
        v_uint16 vcmp3 = v_eq(v_and(vexpb3, vmaskExp16), vmaskExp16);
        v_uint8 velems0 = v_pack(vcmp0, vcmp1);
        v_uint8 velems1 = v_pack(vcmp2, vcmp3);
        v_uint32 vResWide0 = v_eq(v_reinterpret_as_u32(velems0), z);
        v_uint32 vResWide1 = v_eq(v_reinterpret_as_u32(velems1), z);
        v_uint16 vp16 = v_pack(vResWide0, vResWide1);
        // 2nd arg is useless
        v_uint8 vres = v_pack(vp16, vp16);
        v_store_low(dst, vres);
        src += npixels * 4;
        dst += npixels;
    }
    return i;
 }
 #endif
 template <typename _Tp, int cn>
 void finiteMask_(const uchar *src, uchar *dst, size_t total)
 {
    CV_INSTRUMENT_REGION();
    size_t i = 0;
    const _Tp* tsrc = (const _Tp*) src;
 #if (CV_SIMD || CV_SIMD_SCALABLE)
    i = finiteMaskSIMD_<_Tp, cn>(tsrc, dst, total);
 #endif
    for(; i < total; i++ )
    {
        bool finite = true;
        for (int c = 0; c < cn; c++)
        {
            _Tp val = tsrc[i * cn + c];
            finite = finite && !cvIsNaN(val) && !cvIsInf(val);
        }
        dst[i] = finite ? 255 : 0;
    }
 }
 FiniteMaskFunc getFiniteMaskFunc(bool isDouble, int cn)
 {
    static FiniteMaskFunc tab[] =
    {
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float,  1>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float,  2>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float,  3>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<float,  4>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 1>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 2>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 3>)),
        (FiniteMaskFunc)GET_OPTIMIZED((finiteMask_<double, 4>)),
    };
    int idx = (isDouble ? 4 : 0) + cn - 1;
    return tab[idx];
 }
 #endif
 CV_CPU_OPTIMIZATION_NAMESPACE_END
 } // namespace cv
--- a/modules/core/src/opencl/finitemask.cl
+++ b/modules/core/src/opencl/finitemask.cl
@ -0,0 +1,44 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html
 // This kernel is compiled with the following possible defines:
 //  - srcT, cn: source type and number of channels per pixel
 //  - rowsPerWI: Intel GPU optimization
 //  - DOUBLE_SUPPORT: enable double support if available
 #ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined cl_khr_fp64
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 __kernel void finiteMask(__global const uchar * srcptr, int srcstep, int srcoffset,
                         __global uchar * dstptr, int dststep, int dstoffset,
                         int rows, int cols )
 {
    int x = get_global_id(0);
    int y0 = get_global_id(1) * rowsPerWI;
    if (x < cols)
    {
        int src_index = mad24(y0, srcstep, mad24(x, (int)sizeof(srcT) * cn, srcoffset));
        int dst_index = mad24(y0, dststep, x + dstoffset);
        for (int y = y0, y1 = min(rows, y0 + rowsPerWI); y < y1; ++y, src_index += srcstep, dst_index += dststep)
        {
            bool vfinite = true;
            for (int c = 0; c < cn; c++)
            {
                srcT val = *(__global srcT *)(srcptr + src_index + c * (int)sizeof(srcT));
                vfinite = vfinite && !isnan(val) & !isinf(val);
            }
            *(dstptr + dst_index) = vfinite ? 255 : 0;
        }
    }
 }
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@ -1699,8 +1699,36 @@ OCL_TEST_P(ScaleAdd, Mat)
 //////////////////////////////// PatchNans ////////////////////////////////////////////////
-PARAM_TEST_CASE(PatchNaNs, Channels, bool)
+template<typename _Tp>
 _Tp randomNan(RNG& rng);
 template<>
 float randomNan(RNG& rng)
 {
    uint32_t r = rng.next();
    Cv32suf v;
    v.u = r;
    // exp & set a bit to avoid zero mantissa
    v.u = v.u | 0x7f800001;
    return v.f;
 }
 template<>
 double randomNan(RNG& rng)
 {
    uint32_t r0 = rng.next();
    uint32_t r1 = rng.next();
    Cv64suf v;
    v.u = (uint64_t(r0) << 32) | uint64_t(r1);
    // exp &set a bit to avoid zero mantissa
    v.u = v.u | 0x7ff0000000000001;
    return v.f;
 }
 PARAM_TEST_CASE(PatchNaNs, MatDepth, Channels, bool)
 {
    int ftype;
    int cn;
    bool use_roi;
    double value;
@ -1709,13 +1737,14 @@ PARAM_TEST_CASE(PatchNaNs, Channels, bool)
    virtual void SetUp()
    {
-        cn = GET_PARAM(0);
+        ftype = GET_PARAM(0);
-        use_roi = GET_PARAM(1);
+        cn = GET_PARAM(1);
        use_roi = GET_PARAM(2);
    }
    void generateTestData()
    {
-        const int type = CV_MAKE_TYPE(CV_32F, cn);
+        const int type = CV_MAKE_TYPE(ftype, cn);
        Size roiSize = randomSize(1, 10);
        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
@ -1725,9 +1754,19 @@ PARAM_TEST_CASE(PatchNaNs, Channels, bool)
        roiSize.width *= cn;
        for (int y = 0; y < roiSize.height; ++y)
        {
-            float * const ptr = src_roi.ptr<float>(y);
+            float  *const ptrf = src_roi.ptr<float >(y);
            double *const ptrd = src_roi.ptr<double>(y);
            for (int x = 0; x < roiSize.width; ++x)
-                ptr[x] = randomInt(-1, 1) == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x];
+            {
                if (ftype == CV_32F)
                {
                    ptrf[x] = randomInt(-1, 1) == 0 ? randomNan<float >(rng) : ptrf[x];
                }
                else if (ftype == CV_64F)
                {
                    ptrd[x] = randomInt(-1, 1) == 0 ? randomNan<double>(rng) : ptrd[x];
                }
            }
        }
        value = randomDouble(-100, 100);
@ -1754,6 +1793,84 @@ OCL_TEST_P(PatchNaNs, Mat)
    }
 }
 //////////////////////////////// finiteMask /////////////////////////////////////////////
 PARAM_TEST_CASE(FiniteMask, MatDepth, Channels, bool)
 {
    int ftype;
    int cn;
    bool use_roi;
    TEST_DECLARE_INPUT_PARAMETER(src);
    TEST_DECLARE_OUTPUT_PARAMETER(mask);
    virtual void SetUp()
    {
        ftype = GET_PARAM(0);
        cn = GET_PARAM(1);
        use_roi = GET_PARAM(2);
    }
    void generateTestData()
    {
        const int type = CV_MAKE_TYPE(ftype, cn);
        Size roiSize = randomSize(1, MAX_VALUE);
        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
        randomSubMat(src, src_roi, roiSize, srcBorder, type, -40, 40);
        randomSubMat(mask, mask_roi, roiSize, srcBorder, CV_8UC1, 5, 16);
        // generating NaNs
        const softfloat  fpinf = softfloat ::inf();
        const softfloat  fninf = softfloat ::inf().setSign(true);
        const softdouble dpinf = softdouble::inf();
        const softdouble dninf = softdouble::inf().setSign(true);
        for (int y = 0; y < roiSize.height; ++y)
        {
            float  *const ptrf = src_roi.ptr<float >(y);
            double *const ptrd = src_roi.ptr<double>(y);
            for (int x = 0; x < roiSize.width * cn; ++x)
            {
                int rem = randomInt(0, 10);
                if (ftype == CV_32F)
                {
                    ptrf[x] = rem <  4 ? randomNan<float >(rng) :
                              rem == 5 ? (float )((x + y)%2 ? fpinf : fninf) : ptrf[x];
                }
                else if (ftype == CV_64F)
                {
                    ptrd[x] = rem <  4 ? randomNan<double>(rng) :
                              rem == 5 ? (double)((x + y)%2 ? dpinf : dninf) : ptrd[x];
                }
            }
        }
        UMAT_UPLOAD_INPUT_PARAMETER(src);
        UMAT_UPLOAD_OUTPUT_PARAMETER(mask);
    }
    void Near()
    {
        OCL_EXPECT_MATS_NEAR(mask, 0);
    }
 };
 OCL_TEST_P(FiniteMask, Mat)
 {
    for (int j = 0; j < test_loop_times; j++)
    {
        generateTestData();
        OCL_OFF(cv::finiteMask(src_roi, mask_roi));
        OCL_ON(cv::finiteMask(usrc_roi, umask_roi));
        Near();
    }
 }
 //////////////////////////////// Psnr ////////////////////////////////////////////////
 typedef ArithmTestBase Psnr;
@ -1928,7 +2045,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHA
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
-OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, FiniteMask, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, UMatDot, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -729,6 +729,88 @@ struct InRangeOp : public BaseArithmOp
    }
 };
 namespace reference {
 template<typename _Tp>
 struct SoftType;
 template<>
 struct SoftType<float>
 {
    typedef softfloat type;
 };
 template<>
 struct SoftType<double>
 {
    typedef softdouble type;
 };
 template <typename _Tp>
 static void finiteMask_(const _Tp *src, uchar *dst, size_t total, int cn)
 {
    for(size_t i = 0; i < total; i++ )
    {
        bool good = true;
        for (int c = 0; c < cn; c++)
        {
            _Tp val = src[i * cn + c];
            typename SoftType<_Tp>::type sval(val);
            good = good && !sval.isNaN() && !sval.isInf();
        }
        dst[i] = good ? 255 : 0;
    }
 }
 static void finiteMask(const Mat& src, Mat& dst)
 {
    dst.create(src.dims, &src.size[0], CV_8UC1);
    const Mat *arrays[]={&src, &dst, 0};
    Mat planes[2];
    NAryMatIterator it(arrays, planes);
    size_t total = planes[0].total();
    size_t i, nplanes = it.nplanes;
    int depth = src.depth(), cn = src.channels();
    for( i = 0; i < nplanes; i++, ++it )
    {
        const uchar* sptr = planes[0].ptr();
        uchar* dptr = planes[1].ptr();
        switch( depth )
        {
        case CV_32F: finiteMask_<float >((const  float*)sptr, dptr, total, cn); break;
        case CV_64F: finiteMask_<double>((const double*)sptr, dptr, total, cn); break;
        }
    }
 }
 }
 struct FiniteMaskOp : public BaseElemWiseOp
 {
    FiniteMaskOp() : BaseElemWiseOp(1, 0, 1, 1, Scalar::all(0)) {}
    void op(const vector<Mat>& src, Mat& dst, const Mat&)
    {
        cv::finiteMask(src[0], dst);
    }
    void refop(const vector<Mat>& src, Mat& dst, const Mat&)
    {
        reference::finiteMask(src[0], dst);
    }
    int getRandomType(RNG& rng)
    {
        return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_FLT, 1, 4);
    }
    double getMaxErr(int)
    {
        return 0;
    }
 };
 struct ConvertScaleOp : public BaseElemWiseOp
 {
@ -1573,6 +1655,8 @@ INSTANTIATE_TEST_CASE_P(Core_CmpS, ElemWiseTest, ::testing::Values(ElemWiseOpPtr
 INSTANTIATE_TEST_CASE_P(Core_InRangeS, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new InRangeSOp)));
 INSTANTIATE_TEST_CASE_P(Core_InRange, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new InRangeOp)));
 INSTANTIATE_TEST_CASE_P(Core_FiniteMask, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new FiniteMaskOp)));
 INSTANTIATE_TEST_CASE_P(Core_Flip, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new FlipOp)));
 INSTANTIATE_TEST_CASE_P(Core_Transpose, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new TransposeOp)));
 INSTANTIATE_TEST_CASE_P(Core_SetIdentity, ElemWiseTest, ::testing::Values(ElemWiseOpPtr(new SetIdentityOp)));
@ -2876,4 +2960,76 @@ TEST(Core_CartPolar, inplace)
    EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
 }
 // Check different values for finiteMask()
 template<typename _Tp>
 _Tp randomNan(RNG& rng);
 template<>
 float randomNan(RNG& rng)
 {
    uint32_t r = rng.next();
    Cv32suf v;
    v.u = r;
    // exp & set a bit to avoid zero mantissa
    v.u = v.u | 0x7f800001;
    return v.f;
 }
 template<>
 double randomNan(RNG& rng)
 {
    uint32_t r0 = rng.next();
    uint32_t r1 = rng.next();
    Cv64suf v;
    v.u = (uint64_t(r0) << 32) | uint64_t(r1);
    // exp &set a bit to avoid zero mantissa
    v.u = v.u | 0x7ff0000000000001;
    return v.f;
 }
 template<typename T>
 Mat generateFiniteMaskData(int cn, RNG& rng)
 {
    typedef typename reference::SoftType<T>::type SFT;
    SFT pinf = SFT::inf();
    SFT ninf = SFT::inf().setSign(true);
    const int len = 100;
    Mat_<T> plainData(1, cn*len);
    for(int i = 0; i < cn*len; i++)
    {
        int r = rng.uniform(0, 3);
        plainData(i) = r == 0 ? T(rng.uniform(0, 2) ? pinf : ninf) :
                       r == 1 ? randomNan<T>(rng) : T(0);
    }
    return Mat(plainData).reshape(cn);
 }
 typedef std::tuple<int, int> FiniteMaskFixtureParams;
 class FiniteMaskFixture : public ::testing::TestWithParam<FiniteMaskFixtureParams> {};
 TEST_P(FiniteMaskFixture, flags)
 {
    auto p = GetParam();
    int depth = get<0>(p);
    int channels = get<1>(p);
    RNG rng((uint64)ARITHM_RNG_SEED);
    Mat data = (depth == CV_32F) ? generateFiniteMaskData<float >(channels, rng)
                  /* CV_64F */   : generateFiniteMaskData<double>(channels, rng);
    Mat nans, gtNans;
    cv::finiteMask(data, nans);
    reference::finiteMask(data, gtNans);
    EXPECT_MAT_NEAR(nans, gtNans, 0);
 }
 // Params are: depth, channels 1 to 4
 INSTANTIATE_TEST_CASE_P(Core_FiniteMask, FiniteMaskFixture, ::testing::Combine(::testing::Values(CV_32F, CV_64F), ::testing::Range(1, 5)));
 }} // namespace
--- a/modules/core/test/test_precomp.hpp
+++ b/modules/core/test/test_precomp.hpp
@ -8,5 +8,6 @@
 #include "opencv2/ts/ocl_test.hpp"
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/hal/hal.hpp"
 #include "opencv2/core/softfloat.hpp"
 #endif