diff --git a/3rdparty/carotene/src/div.cpp b/3rdparty/carotene/src/div.cpp index cb5f1e7e94..38892acab3 100644 --- a/3rdparty/carotene/src/div.cpp +++ b/3rdparty/carotene/src/div.cpp @@ -151,6 +151,10 @@ void div(const Size2D &size, typedef typename internal::VecTraits::vec128 vec128; typedef typename internal::VecTraits::vec64 vec64; +#if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) + static_assert(std::numeric_limits::is_integer, "template implementation is for integer types only"); +#endif + if (scale == 0.0f || (std::numeric_limits::is_integer && (scale * std::numeric_limits::max()) < 1.0f && @@ -311,6 +315,10 @@ void recip(const Size2D &size, typedef typename internal::VecTraits::vec128 vec128; typedef typename internal::VecTraits::vec64 vec64; +#if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) + static_assert(std::numeric_limits::is_integer, "template implementation is for integer types only"); +#endif + if (scale == 0.0f || (std::numeric_limits::is_integer && scale < 1.0f && @@ -463,8 +471,6 @@ void div(const Size2D &size, return; } - float32x4_t v_zero = vdupq_n_f32(0.0f); - size_t roiw128 = size.width >= 3 ? size.width - 3 : 0; size_t roiw64 = size.width >= 1 ? size.width - 1 : 0; @@ -485,9 +491,7 @@ void div(const Size2D &size, float32x4_t v_src0 = vld1q_f32(src0 + j); float32x4_t v_src1 = vld1q_f32(src1 + j); - uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); - vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32( - vreinterpretq_u32_f32(vmulq_f32(v_src0, internal::vrecpq_f32(v_src1))), v_mask))); + vst1q_f32(dst + j, vmulq_f32(v_src0, internal::vrecpq_f32(v_src1))); } for (; j < roiw64; j += 2) @@ -495,14 +499,12 @@ void div(const Size2D &size, float32x2_t v_src0 = vld1_f32(src0 + j); float32x2_t v_src1 = vld1_f32(src1 + j); - uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); - vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32( - vreinterpret_u32_f32(vmul_f32(v_src0, internal::vrecp_f32(v_src1))), v_mask))); + vst1_f32(dst + j, vmul_f32(v_src0, internal::vrecp_f32(v_src1))); } for (; j < size.width; j++) { - dst[j] = src1[j] ? src0[j] / src1[j] : 0.0f; + dst[j] = src0[j] / src1[j]; } } } @@ -523,10 +525,8 @@ void div(const Size2D &size, float32x4_t v_src0 = vld1q_f32(src0 + j); float32x4_t v_src1 = vld1q_f32(src1 + j); - uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); - vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32( - vreinterpretq_u32_f32(vmulq_f32(vmulq_n_f32(v_src0, scale), - internal::vrecpq_f32(v_src1))), v_mask))); + vst1q_f32(dst + j, vmulq_f32(vmulq_n_f32(v_src0, scale), + internal::vrecpq_f32(v_src1))); } for (; j < roiw64; j += 2) @@ -534,15 +534,13 @@ void div(const Size2D &size, float32x2_t v_src0 = vld1_f32(src0 + j); float32x2_t v_src1 = vld1_f32(src1 + j); - uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); - vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32( - vreinterpret_u32_f32(vmul_f32(vmul_n_f32(v_src0, scale), - internal::vrecp_f32(v_src1))), v_mask))); + vst1_f32(dst + j, vmul_f32(vmul_n_f32(v_src0, scale), + internal::vrecp_f32(v_src1))); } for (; j < size.width; j++) { - dst[j] = src1[j] ? src0[j] * scale / src1[j] : 0.0f; + dst[j] = src0[j] * scale / src1[j]; } } } @@ -620,8 +618,6 @@ void reciprocal(const Size2D &size, return; } - float32x4_t v_zero = vdupq_n_f32(0.0f); - size_t roiw128 = size.width >= 3 ? size.width - 3 : 0; size_t roiw64 = size.width >= 1 ? size.width - 1 : 0; @@ -639,23 +635,19 @@ void reciprocal(const Size2D &size, float32x4_t v_src1 = vld1q_f32(src1 + j); - uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); - vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32( - vreinterpretq_u32_f32(internal::vrecpq_f32(v_src1)), v_mask))); + vst1q_f32(dst + j, internal::vrecpq_f32(v_src1)); } for (; j < roiw64; j += 2) { float32x2_t v_src1 = vld1_f32(src1 + j); - uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); - vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32( - vreinterpret_u32_f32(internal::vrecp_f32(v_src1)), v_mask))); + vst1_f32(dst + j, internal::vrecp_f32(v_src1)); } for (; j < size.width; j++) { - dst[j] = src1[j] ? 1.0f / src1[j] : 0; + dst[j] = 1.0f / src1[j]; } } } @@ -673,25 +665,19 @@ void reciprocal(const Size2D &size, float32x4_t v_src1 = vld1q_f32(src1 + j); - uint32x4_t v_mask = vceqq_f32(v_src1,v_zero); - vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32( - vreinterpretq_u32_f32(vmulq_n_f32(internal::vrecpq_f32(v_src1), - scale)),v_mask))); + vst1q_f32(dst + j, vmulq_n_f32(internal::vrecpq_f32(v_src1), scale)); } for (; j < roiw64; j += 2) { float32x2_t v_src1 = vld1_f32(src1 + j); - uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero)); - vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32( - vreinterpret_u32_f32(vmul_n_f32(internal::vrecp_f32(v_src1), - scale)), v_mask))); + vst1_f32(dst + j, vmul_n_f32(internal::vrecp_f32(v_src1), scale)); } for (; j < size.width; j++) { - dst[j] = src1[j] ? scale / src1[j] : 0; + dst[j] = scale / src1[j]; } } } diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index c8216b28f3..73456b1fb2 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -415,8 +415,13 @@ The function cv::divide divides one array by another: or a scalar by an array when there is no src1 : \f[\texttt{dst(I) = saturate(scale/src2(I))}\f] -When src2(I) is zero, dst(I) will also be zero. Different channels of -multi-channel arrays are processed independently. +Different channels of multi-channel arrays are processed independently. + +For integer types when src2(I) is zero, dst(I) will also be zero. + +@note In case of floating point data there is no special defined behavior for zero src2(I) values. +Regular floating-point division is used. +Expect correct IEEE-754 behaviour for floating-point data (with NaN, Inf result values). @note Saturation is not applied when the output array has the depth CV_32S. You may even get result of an incorrect sign in the case of overflow. diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index ce943e0ffb..7c504afb06 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -105,14 +105,18 @@ static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, int scalarcn = kercn == 3 ? 4 : kercn; int rowsPerWI = d.isIntel() ? 4 : 1; - sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", + const int dstDepth = srcdepth; + const int dstType = CV_MAKETYPE(dstDepth, kercn); + const int dstType1 = CV_MAKETYPE(dstDepth, 1); + const int scalarType = CV_MAKETYPE(srcdepth, scalarcn); + + sprintf(opts, "-D %s%s -D %s%s -D dstT=%s -D DEPTH_dst=%d -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop], - bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : - ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "", - bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) : - ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)), - bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) : - ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + bitwise ? ocl::memopTypeToStr(dstType) : ocl::typeToStr(dstType), + dstDepth, + bitwise ? ocl::memopTypeToStr(dstType1) : ocl::typeToStr(dstType1), + bitwise ? ocl::memopTypeToStr(scalarType) : ocl::typeToStr(scalarType), kercn, rowsPerWI); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); @@ -501,12 +505,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, char cvtstr[4][32], opts[1024]; sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s " - "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " + "-D dstT=%s -D DEPTH_dst=%d -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s", (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), - ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), + ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), ddepth, ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), ocl::typeToStr(wdepth), wdepth, @@ -1099,12 +1103,12 @@ static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, in const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; char cvt[40]; - String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" + String opts = format("-D %s -D srcT1=%s -D dstT=%s -D DEPTH_dst=%d -D workT=srcT1 -D cn=%d" " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s" " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s", haveScalar ? "UNARY_OP" : "BINARY_OP", ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)), - ocl::typeToStr(CV_8UC(kercn)), kercn, + ocl::typeToStr(CV_8UC(kercn)), CV_8U, kercn, ocl::convertTypeStr(depth1, CV_8U, kercn, cvt), operationMap[op], ocl::typeToStr(depth1), ocl::typeToStr(depth1), ocl::typeToStr(CV_8U), diff --git a/modules/core/src/arithm_core.hpp b/modules/core/src/arithm_core.hpp index 99b564cf74..7b7d6f7d85 100644 --- a/modules/core/src/arithm_core.hpp +++ b/modules/core/src/arithm_core.hpp @@ -516,7 +516,10 @@ div_i( const T* src1, size_t step1, const T* src2, size_t step2, for( ; i < width; i++ ) { T num = src1[i], denom = src2[i]; - dst[i] = denom != 0 ? saturate_cast(num*scale_f/denom) : (T)0; + T v = 0; + if (denom != 0) + v = saturate_cast(num*scale_f/denom); + dst[i] = v; } } } @@ -538,7 +541,7 @@ div_f( const T* src1, size_t step1, const T* src2, size_t step2, for( ; i < width; i++ ) { T num = src1[i], denom = src2[i]; - dst[i] = denom != 0 ? saturate_cast(num*scale_f/denom) : (T)0; + dst[i] = saturate_cast(num*scale_f/denom); } } } @@ -559,7 +562,10 @@ recip_i( const T* src2, size_t step2, for( ; i < width; i++ ) { T denom = src2[i]; - dst[i] = denom != 0 ? saturate_cast(scale_f/denom) : (T)0; + T v = 0; + if (denom != 0) + v = saturate_cast(scale_f/denom); + dst[i] = v; } } } @@ -580,7 +586,7 @@ recip_f( const T* src2, size_t step2, for( ; i < width; i++ ) { T denom = src2[i]; - dst[i] = denom != 0 ? saturate_cast(scale_f/denom) : (T)0; + dst[i] = saturate_cast(scale_f/denom); } } } diff --git a/modules/core/src/arithm_simd.hpp b/modules/core/src/arithm_simd.hpp index 5a37b4c200..98a0126d20 100644 --- a/modules/core/src/arithm_simd.hpp +++ b/modules/core/src/arithm_simd.hpp @@ -1433,7 +1433,6 @@ struct Div_SIMD return x; v_float32x4 v_scale = v_setall_f32((float)scale); - v_float32x4 v_zero = v_setzero_f32(); for ( ; x <= width - 8; x += 8) { @@ -1445,9 +1444,6 @@ struct Div_SIMD v_float32x4 res0 = f0 * v_scale / f2; v_float32x4 res1 = f1 * v_scale / f3; - res0 = v_select(f2 == v_zero, v_zero, res0); - res1 = v_select(f3 == v_zero, v_zero, res1); - v_store(dst + x, res0); v_store(dst + x + 4, res1); } @@ -1675,7 +1671,6 @@ struct Recip_SIMD return x; v_float32x4 v_scale = v_setall_f32((float)scale); - v_float32x4 v_zero = v_setzero_f32(); for ( ; x <= width - 8; x += 8) { @@ -1685,9 +1680,6 @@ struct Recip_SIMD v_float32x4 res0 = v_scale / f0; v_float32x4 res1 = v_scale / f1; - res0 = v_select(f0 == v_zero, v_zero, res0); - res1 = v_select(f1 == v_zero, v_zero, res1); - v_store(dst + x, res0); v_store(dst + x + 4, res1); } @@ -1712,7 +1704,6 @@ struct Div_SIMD return x; v_float64x2 v_scale = v_setall_f64(scale); - v_float64x2 v_zero = v_setzero_f64(); for ( ; x <= width - 4; x += 4) { @@ -1724,9 +1715,6 @@ struct Div_SIMD v_float64x2 res0 = f0 * v_scale / f2; v_float64x2 res1 = f1 * v_scale / f3; - res0 = v_select(f2 == v_zero, v_zero, res0); - res1 = v_select(f3 == v_zero, v_zero, res1); - v_store(dst + x, res0); v_store(dst + x + 2, res1); } @@ -1749,7 +1737,6 @@ struct Recip_SIMD return x; v_float64x2 v_scale = v_setall_f64(scale); - v_float64x2 v_zero = v_setzero_f64(); for ( ; x <= width - 4; x += 4) { @@ -1759,9 +1746,6 @@ struct Recip_SIMD v_float64x2 res0 = v_scale / f0; v_float64x2 res1 = v_scale / f1; - res0 = v_select(f0 == v_zero, v_zero, res0); - res1 = v_select(f1 == v_zero, v_zero, res1); - v_store(dst + x, res0); v_store(dst + x + 2, res1); } diff --git a/modules/core/src/convert_scale.cpp b/modules/core/src/convert_scale.cpp index 2758685f4a..ba9c023211 100644 --- a/modules/core/src/convert_scale.cpp +++ b/modules/core/src/convert_scale.cpp @@ -375,10 +375,10 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha int rowsPerWI = d.isIntel() ? 4 : 1; char cvt[2][50]; int wdepth = std::max(depth, CV_32F); - String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" + String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D DEPTH_dst=%d -D srcT1=%s" " -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s" " -D workT1=%s -D rowsPerWI=%d%s", - ocl::typeToStr(CV_8UC(kercn)), + ocl::typeToStr(CV_8UC(kercn)), CV_8U, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), wdepth, ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index e8067b5128..b1fb96ad2d 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -71,8 +71,8 @@ static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, in int rowsPerWI = d.isIntel() ? 4 : 1; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", - oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI, + format("-D %s -D %s -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", + oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth, rowsPerWI, double_support ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -238,9 +238,9 @@ static bool ocl_cartToPolar( InputArray _src1, InputArray _src2, return false; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), - depth, rowsPerWI, angleInDegrees ? "AD" : "AR", + format("-D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D OP_CTP_%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth, + rowsPerWI, angleInDegrees ? "AD" : "AR", doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -474,9 +474,10 @@ static bool ocl_polarToCart( InputArray _mag, InputArray _angle, return false; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s", - ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI, - depth, angleInDegrees ? "AD" : "AR", + format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D BINARY_OP -D OP_PTC_%s%s", + ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth, + rowsPerWI, + angleInDegrees ? "AD" : "AR", doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -1169,8 +1170,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", - ocl::typeToStr(depth), depth, rowsPerWI, op, + format("-D dstT=%s -D DEPTH_dst=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", + ocl::typeToStr(depth), depth, rowsPerWI, op, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; @@ -1560,8 +1561,8 @@ static bool ocl_patchNaNs( InputOutputArray _a, float value ) { int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d", - rowsPerWI)); + format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D DEPTH_dst=%d -D rowsPerWI=%d", + CV_32F, rowsPerWI)); if (k.empty()) return false; diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 7cd89c6222..e289716dea 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2375,10 +2375,10 @@ static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, Outp char cvt[2][50]; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s" + format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D DEPTH_dst=%d -D workT=%s -D convertToWT1=%s" " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s -D workT1=%s" " -D wdepth=%d%s -D rowsPerWI=%d", - ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), depth, ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(wdepth, depth, kercn, cvt[1]), diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 0b81d76183..d4165faae3 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -71,7 +71,30 @@ #pragma OPENCL FP_FAST_FMA ON #endif -#if depth <= 5 +#if !defined(DEPTH_dst) +#error "Kernel configuration error: DEPTH_dst value is required" +#elif !(DEPTH_dst >= 0 && DEPTH_dst <= 7) +#error "Kernel configuration error: invalid DEPTH_dst value" +#endif +#if defined(depth) +#error "Kernel configuration error: ambiguous 'depth' value is defined, use 'DEPTH_dst' instead" +#endif + + +#if DEPTH_dst < 5 /* CV_32F */ +#define CV_DST_TYPE_IS_INTEGER +#else +#define CV_DST_TYPE_IS_FP +#endif + +#if DEPTH_dst != 6 /* CV_64F */ +#define CV_DST_TYPE_FIT_32F 1 +#else +#define CV_DST_TYPE_FIT_32F 0 +#endif + + +#if CV_DST_TYPE_FIT_32F #define CV_PI M_PI_F #else #define CV_PI M_PI @@ -204,9 +227,15 @@ #define PROCESS_ELEM storedst(convertToDT(srcelem1 * scale * srcelem2)) #elif defined OP_DIV +#ifdef CV_DST_TYPE_IS_INTEGER #define PROCESS_ELEM \ workT e2 = srcelem2, zero = (workT)(0); \ storedst(convertToDT(e2 != zero ? srcelem1 / e2 : zero)) +#else +#define PROCESS_ELEM \ + workT e2 = srcelem2; \ + storedst(convertToDT(srcelem1 / e2)) +#endif #elif defined OP_DIV_SCALE #undef EXTRA_PARAMS @@ -217,9 +246,15 @@ #else #define EXTRA_PARAMS , scaleT scale #endif +#ifdef CV_DST_TYPE_IS_INTEGER #define PROCESS_ELEM \ workT e2 = srcelem2, zero = (workT)(0); \ storedst(convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2))) +#else +#define PROCESS_ELEM \ + workT e2 = srcelem2; \ + storedst(convertToDT(srcelem1 * (workT)(scale) / e2)) +#endif #elif defined OP_RDIV_SCALE #undef EXTRA_PARAMS @@ -230,16 +265,28 @@ #else #define EXTRA_PARAMS , scaleT scale #endif +#ifdef CV_DST_TYPE_IS_INTEGER #define PROCESS_ELEM \ workT e1 = srcelem1, zero = (workT)(0); \ storedst(convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1))) +#else +#define PROCESS_ELEM \ + workT e1 = srcelem1; \ + storedst(convertToDT(srcelem2 * (workT)(scale) / e1)) +#endif #elif defined OP_RECIP_SCALE #undef EXTRA_PARAMS #define EXTRA_PARAMS , scaleT scale +#ifdef CV_DST_TYPE_IS_INTEGER #define PROCESS_ELEM \ workT e1 = srcelem1, zero = (workT)(0); \ storedst(convertToDT(e1 != zero ? scale / e1 : zero)) +#else +#define PROCESS_ELEM \ + workT e1 = srcelem1; \ + storedst(convertToDT(scale / e1)) +#endif #elif defined OP_ADDW #undef EXTRA_PARAMS @@ -283,7 +330,7 @@ #define PROCESS_ELEM storedst(pown(srcelem1, srcelem2)) #elif defined OP_SQRT -#if depth <= 5 +#if CV_DST_TYPE_FIT_32F #define PROCESS_ELEM storedst(native_sqrt(srcelem1)) #else #define PROCESS_ELEM storedst(sqrt(srcelem1)) @@ -324,7 +371,7 @@ #endif #elif defined OP_CTP_AD || defined OP_CTP_AR -#if depth <= 5 +#if CV_DST_TYPE_FIT_32F #define CV_EPSILON FLT_EPSILON #else #define CV_EPSILON DBL_EPSILON diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index ba351e7977..b9a7d78d21 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2242,4 +2242,131 @@ TEST(Core_MeanStdDev, regression_multichannel) } } +template static inline +void testDivideInitData(Mat& src1, Mat& src2) +{ + CV_StaticAssert(std::numeric_limits::is_integer, ""); + const static T src1_[] = { + 0, 0, 0, 0, + 8, 8, 8, 8, + -8, -8, -8, -8 + }; + Mat(3, 4, traits::Type::value, (void*)src1_).copyTo(src1); + const static T src2_[] = { + 1, 2, 0, std::numeric_limits::max(), + 1, 2, 0, std::numeric_limits::max(), + 1, 2, 0, std::numeric_limits::max(), + }; + Mat(3, 4, traits::Type::value, (void*)src2_).copyTo(src2); +} + +template static inline +void testDivideInitDataFloat(Mat& src1, Mat& src2) +{ + CV_StaticAssert(!std::numeric_limits::is_integer, ""); + const static T src1_[] = { + 0, 0, 0, 0, + 8, 8, 8, 8, + -8, -8, -8, -8 + }; + Mat(3, 4, traits::Type::value, (void*)src1_).copyTo(src1); + const static T src2_[] = { + 1, 2, 0, std::numeric_limits::infinity(), + 1, 2, 0, std::numeric_limits::infinity(), + 1, 2, 0, std::numeric_limits::infinity(), + }; + Mat(3, 4, traits::Type::value, (void*)src2_).copyTo(src2); +} + +template <> inline void testDivideInitData(Mat& src1, Mat& src2) { testDivideInitDataFloat(src1, src2); } +template <> inline void testDivideInitData(Mat& src1, Mat& src2) { testDivideInitDataFloat(src1, src2); } + + +template static inline +void testDivideChecks(const Mat& dst) +{ + ASSERT_FALSE(dst.empty()); + CV_StaticAssert(std::numeric_limits::is_integer, ""); + for (int y = 0; y < dst.rows; y++) + { + for (int x = 0; x < dst.cols; x++) + { + if (x == 2) + { + EXPECT_EQ(0, dst.at(y, x)) << "dst(" << y << ", " << x << ") = " << dst.at(y, x); + } + } + } +} + +template static inline +void testDivideChecksFP(const Mat& dst) +{ + ASSERT_FALSE(dst.empty()); + CV_StaticAssert(!std::numeric_limits::is_integer, ""); + for (int y = 0; y < dst.rows; y++) + { + for (int x = 0; x < dst.cols; x++) + { + if (y == 0 && x == 2) + { + EXPECT_TRUE(cvIsNaN(dst.at(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at(y, x); + } + else if (x == 2) + { + EXPECT_TRUE(cvIsInf(dst.at(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at(y, x); + } + else + { + EXPECT_FALSE(cvIsNaN(dst.at(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at(y, x); + EXPECT_FALSE(cvIsInf(dst.at(y, x))) << "dst(" << y << ", " << x << ") = " << dst.at(y, x); + } + } + } +} + +template <> inline void testDivideChecks(const Mat& dst) { testDivideChecksFP(dst); } +template <> inline void testDivideChecks(const Mat& dst) { testDivideChecksFP(dst); } + + +template static inline +void testDivide() +{ + Mat src1, src2; + testDivideInitData(src1, src2); + ASSERT_FALSE(src1.empty()); ASSERT_FALSE(src2.empty()); + + Mat dst; + if (!isUMat) + { + cv::divide(src1, src2, dst); + } + else + { + UMat usrc1, usrc2, udst; + src1.copyTo(usrc1); + src2.copyTo(usrc2); + cv::divide(usrc1, usrc2, udst); + udst.copyTo(dst); + } + + testDivideChecks(dst); + + if (::testing::Test::HasFailure()) + { + std::cout << "src1 = " << std::endl << src1 << std::endl; + std::cout << "src2 = " << std::endl << src2 << std::endl; + std::cout << "dst = " << std::endl << dst << std::endl; + } +} + +TEST(Core_DivideRules, type_32s) { testDivide(); } +TEST(UMat_Core_DivideRules, type_32s) { testDivide(); } +TEST(Core_DivideRules, type_16s) { testDivide(); } +TEST(UMat_Core_DivideRules, type_16s) { testDivide(); } +TEST(Core_DivideRules, type_32f) { testDivide(); } +TEST(UMat_Core_DivideRules, type_32f) { testDivide(); } +TEST(Core_DivideRules, type_64f) { testDivide(); } +TEST(UMat_Core_DivideRules, type_64f) { testDivide(); } + }} // namespace diff --git a/modules/gapi/test/common/gapi_core_tests_inl.hpp b/modules/gapi/test/common/gapi_core_tests_inl.hpp index fb9b336c6d..c1a8190196 100644 --- a/modules/gapi/test/common/gapi_core_tests_inl.hpp +++ b/modules/gapi/test/common/gapi_core_tests_inl.hpp @@ -56,12 +56,14 @@ TEST_P(MathOpTest, MatricesAccuracyTest ) { if( doReverseOp ) { + in_mat1.setTo(1, in_mat1 == 0); // avoid zeros in divide input data out = cv::gapi::divRC(sc1, in1, scale, dtype); cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype); break; } else { + sc += Scalar(1, 1, 1, 1); // avoid zeros in divide input data out = cv::gapi::divC(in1, sc1, scale, dtype); cv::divide(in_mat1, sc, out_mat_ocv, scale, dtype); break; @@ -100,6 +102,7 @@ TEST_P(MathOpTest, MatricesAccuracyTest ) } case (DIV): { + in_mat2.setTo(1, in_mat2 == 0); // avoid zeros in divide input data out = cv::gapi::div(in1, in2, scale, dtype); cv::divide(in_mat1, in_mat2, out_mat_ocv, scale, dtype); break; @@ -187,7 +190,7 @@ TEST_P(MulDoubleTest, AccuracyTest) EXPECT_EQ(out_mat_gapi.size(), sz_in); } -TEST_P(DivTest, DivByZeroTest) +TEST_P(DivTest, DISABLED_DivByZeroTest) // https://github.com/opencv/opencv/pull/12826 { int type = 0, dtype = 0; cv::Size sz_in; @@ -217,7 +220,7 @@ TEST_P(DivTest, DivByZeroTest) } } -TEST_P(DivCTest, DivByZeroTest) +TEST_P(DivCTest, DISABLED_DivByZeroTest) // https://github.com/opencv/opencv/pull/12826 { int type = 0, dtype = 0; cv::Size sz_in;