Added new data types to cv::Mat & UMat (#23865)

* started working on adding 32u, 64u, 64s, bool and 16bf types to OpenCV

* core & imgproc tests seem to pass

* fixed a few compile errors and test failures on macOS x86

* hopefully fixed some compile problems and test failures

* fixed some more warnings and test failures

* trying to fix small deviations in perf_core & perf_imgproc by revering randf_64f to exact version used before

* trying to fix behavior of the new OpenCV with old plugins; there is (quite strong) assumption that video capture would give us frames with depth == CV_8U (0) or CV_16U (2). If depth is > 7 then it means that the plugin is built with the old OpenCV. It needs to be recompiled, of course and then this hack can be removed.

* try to repair the case when target arch does not have FP64 SIMD

* 1. fixed bug in itoa() found by alalek
2. restored ==, !=, > and < univ. intrinsics on ARM32/ARM64.
pull/24118/head
Vadim Pisarevsky 2 years ago committed by GitHub
parent fa91c1445e
commit 518486ed3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      modules/3d/misc/java/test/Cv3dTest.java
  2. 2
      modules/3d/test/test_odometry.cpp
  3. 4
      modules/calib/test/test_cameracalibration.cpp
  4. 4
      modules/calib/test/test_fisheye.cpp
  5. 10
      modules/calib/test/test_multiview_calib.cpp
  6. 45
      modules/core/include/opencv2/core/cvdef.h
  7. 6
      modules/core/include/opencv2/core/hal/hal.hpp
  8. 43
      modules/core/include/opencv2/core/hal/interface.h
  9. 20
      modules/core/include/opencv2/core/hal/intrin.hpp
  10. 21
      modules/core/include/opencv2/core/hal/intrin_avx.hpp
  11. 2
      modules/core/include/opencv2/core/hal/intrin_cpp.hpp
  12. 59
      modules/core/include/opencv2/core/hal/intrin_neon.hpp
  13. 8
      modules/core/include/opencv2/core/hal/intrin_sse.hpp
  14. 4
      modules/core/include/opencv2/core/mat.hpp
  15. 4
      modules/core/include/opencv2/core/mat.inl.hpp
  16. 6
      modules/core/include/opencv2/core/matx.hpp
  17. 38
      modules/core/include/opencv2/core/saturate.hpp
  18. 94
      modules/core/include/opencv2/core/traits.hpp
  19. 2
      modules/core/misc/java/src/java/core+CvType.java
  20. 2
      modules/core/misc/java/test/CvTypeTest.java
  21. 27
      modules/core/src/arithm.cpp
  22. 4
      modules/core/src/arithm.simd.hpp
  23. 2
      modules/core/src/channels.cpp
  24. 109
      modules/core/src/convert.dispatch.cpp
  25. 353
      modules/core/src/convert.hpp
  26. 598
      modules/core/src/convert.simd.hpp
  27. 457
      modules/core/src/convert_scale.simd.hpp
  28. 35
      modules/core/src/copy.cpp
  29. 4
      modules/core/src/matmul.dispatch.cpp
  30. 4
      modules/core/src/matmul.simd.hpp
  31. 2
      modules/core/src/matrix.cpp
  32. 2
      modules/core/src/mean.simd.hpp
  33. 7
      modules/core/src/merge.dispatch.cpp
  34. 3
      modules/core/src/minmax.cpp
  35. 4
      modules/core/src/norm.cpp
  36. 15
      modules/core/src/out.cpp
  37. 91
      modules/core/src/persistence.cpp
  38. 1
      modules/core/src/persistence.hpp
  39. 520
      modules/core/src/rand.cpp
  40. 7
      modules/core/src/split.dispatch.cpp
  41. 2
      modules/core/src/sum.simd.hpp
  42. 341
      modules/core/test/test_arithm.cpp
  43. 6
      modules/core/test/test_dxt.cpp
  44. 45
      modules/core/test/test_io.cpp
  45. 4
      modules/core/test/test_misc.cpp
  46. 6
      modules/imgproc/misc/java/test/ImgprocTest.java
  47. 8
      modules/imgproc/test/test_pc.cpp
  48. 2
      modules/stitching/src/exposure_compensate.cpp
  49. 142
      modules/ts/src/ts_func.cpp
  50. 9
      modules/ts/src/ts_perf.cpp
  51. 6
      modules/videoio/src/backend_plugin.cpp
  52. 18
      modules/videoio/test/test_precomp.hpp

@ -315,8 +315,8 @@ public class Cv3dTest extends OpenCVTestCase {
Mat truth_tvec = new Mat(3, 1, CvType.CV_64F);
truth_tvec.put(0, 0, -320, -240, 400);
assertMatEqual(truth_rvec, rvec, EPS);
assertMatEqual(truth_tvec, tvec, EPS);
assertMatEqual(truth_rvec, rvec, EPS*2);
assertMatEqual(truth_tvec, tvec, EPS*2);
}
public void testSolvePnPListOfPoint3ListOfPointMatMatMatMatBoolean() {

@ -227,7 +227,7 @@ void OdometryTest::run()
}
// compare rotation
double possibleError = algtype == OdometryAlgoType::COMMON ? 0.015f : 0.01f;
double possibleError = algtype == OdometryAlgoType::COMMON ? 0.02f : 0.02f;
Affine3f src = Affine3f(Vec3f(rvec), Vec3f(tvec));
Affine3f res = Affine3f(Vec3f(calcRvec), Vec3f(calcTvec));

@ -2010,8 +2010,8 @@ double CV_MultiviewCalibrationTest_CPP::calibrateStereoCamera( const vector<vect
img_pts2.copyTo(image_points_all[1][i]);
}
std::vector<Size> image_sizes (2, imageSize);
Mat visibility_mat = Mat_<bool>::ones(2, numImgs);
std::vector<bool> is_fisheye(2, false);
Mat visibility_mat = Mat_<uchar>::ones(2, numImgs);
std::vector<uchar> is_fisheye(2, false);
std::vector<int> all_flags(2, flags);
double rms = calibrateMultiview(objectPoints, image_points_all, image_sizes, visibility_mat,
Rs, Ts, Ks, distortions, rvecs, tvecs, is_fisheye, errors_mat, noArray(), false, all_flags);

@ -610,9 +610,9 @@ TEST_F(fisheyeTest, multiview_calibration)
right_pts.copyTo(image_points_all[1][i]);
}
std::vector<cv::Size> image_sizes(2, imageSize);
cv::Mat visibility_mat = cv::Mat_<bool>::ones(2, (int)leftPoints.size()), errors_mat, output_pairs;
cv::Mat visibility_mat = cv::Mat_<uchar>::ones(2, (int)leftPoints.size()), errors_mat, output_pairs;
std::vector<cv::Mat> Rs, Ts, Ks, distortions, rvecs0, tvecs0;
std::vector<bool> is_fisheye(2, true);
std::vector<uchar> is_fisheye(2, true);
int flag = 0;
flag |= cv::CALIB_RECOMPUTE_EXTRINSIC;
flag |= cv::CALIB_CHECK_COND;

@ -65,7 +65,7 @@ TEST(multiview_calibration, accuracy) {
std::vector<std::vector<cv::Vec3f>> objPoints;
std::vector<std::vector<cv::Mat>> image_points_all(num_cameras);
cv::Mat ones = cv::Mat_<float>::ones(1, num_pts);
std::vector<std::vector<bool>> visibility;
std::vector<std::vector<uchar>> visibility;
cv::Mat centroid = cv::Mat(cv::Matx31f(
(float)cv::mean(pattern.row(0)).val[0],
(float)cv::mean(pattern.row(1)).val[0],
@ -83,7 +83,7 @@ TEST(multiview_calibration, accuracy) {
cv::Mat pattern_new = (R * (pattern - centroid * ones) + centroid * ones + t * ones).t();
std::vector<cv::Mat> img_pts_cams(num_cameras);
std::vector<bool> visible(num_cameras, false);
std::vector<uchar> visible(num_cameras, (uchar)0);
int num_visible_patterns = 0;
for (int c = 0; c < num_cameras; c++) {
cv::Mat img_pts;
@ -108,7 +108,7 @@ TEST(multiview_calibration, accuracy) {
}
}
if (are_all_pts_in_image) {
visible[c] = true;
visible[c] = 1;
num_visible_patterns += 1;
img_pts.copyTo(img_pts_cams[c]);
}
@ -124,10 +124,10 @@ TEST(multiview_calibration, accuracy) {
break;
}
}
cv::Mat visibility_mat = cv::Mat_<bool>(num_cameras, (int)objPoints.size());
cv::Mat visibility_mat = cv::Mat_<uchar>(num_cameras, (int)objPoints.size());
for (int c = 0; c < num_cameras; c++) {
for (int f = 0; f < (int)objPoints.size(); f++) {
visibility_mat.at<bool>(c, f) = visibility[f][c];
visibility_mat.at<uchar>(c, f) = visibility[f][c];
}
}

@ -487,9 +487,13 @@ Cv64suf;
#define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT)
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
/** Size of each channel item,
0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
/** Size of an array/scalar single-channel value, 4 bits per type:
CV_8U - 1 byte
CV_8S - 1 byte
CV_16U - 2 bytes
...
*/
#define CV_ELEM_SIZE1(type) ((int)(0x4881228442211ULL >> (CV_MAT_DEPTH(type) * 4)) & 15)
#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
@ -963,6 +967,41 @@ protected:
#endif
};
class bfloat16_t
{
public:
bfloat16_t() : w(0) {}
explicit bfloat16_t(float x)
{
Cv32suf in;
in.f = x;
w = (ushort)(in.u >> 16);
}
operator float() const
{
Cv32suf out;
out.u = w << 16;
return out.f;
}
static bfloat16_t fromBits(ushort b)
{
bfloat16_t result;
result.w = b;
return result;
}
static bfloat16_t zero()
{
bfloat16_t result;
result.w = (ushort)0;
return result;
}
ushort bits() const { return w; }
protected:
ushort w;
};
}
#endif

@ -197,9 +197,11 @@ CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double*
CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
CV_EXPORTS void cvt16bf32f( const bfloat16_t* src, float* dst, int len );
CV_EXPORTS void cvt32f16bf( const float* src, bfloat16_t* dst, int len );
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len, int cn );
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len, int cn );
struct CV_EXPORTS DFT1D
{

@ -66,8 +66,8 @@ typedef signed char schar;
#define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0"
#define CV_CN_MAX 512
#define CV_CN_SHIFT 3
#define CV_CN_MAX 128
#define CV_CN_SHIFT 5
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
#define CV_8U 0
@ -78,9 +78,17 @@ typedef signed char schar;
#define CV_32F 5
#define CV_64F 6
#define CV_16F 7
#define CV_16BF 8
#define CV_Bool 9
#define CV_64U 10
#define CV_64S 11
#define CV_32U 12
#define CV_DEPTH_CURR_MAX 13
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
#define CV_IS_INT_TYPE(flags) (((1 << CV_MAT_DEPTH(flags)) & 0x1e1f) != 0)
#define CV_IS_FLOAT_TYPE(flags) (((1 << CV_MAT_DEPTH(flags)) & 0x1e0) != 0)
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
#define CV_MAKE_TYPE CV_MAKETYPE
@ -132,6 +140,37 @@ typedef signed char schar;
#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
#define CV_64SC1 CV_MAKETYPE(CV_64S,1)
#define CV_64SC2 CV_MAKETYPE(CV_64S,2)
#define CV_64SC3 CV_MAKETYPE(CV_64S,3)
#define CV_64SC4 CV_MAKETYPE(CV_64S,4)
#define CV_64SC(n) CV_MAKETYPE(CV_64S,(n))
#define CV_64UC1 CV_MAKETYPE(CV_64U,1)
#define CV_64UC2 CV_MAKETYPE(CV_64U,2)
#define CV_64UC3 CV_MAKETYPE(CV_64U,3)
#define CV_64UC4 CV_MAKETYPE(CV_64U,4)
#define CV_64UC(n) CV_MAKETYPE(CV_64U,(n))
#define CV_BoolC1 CV_MAKETYPE(CV_Bool,1)
#define CV_BoolC2 CV_MAKETYPE(CV_Bool,2)
#define CV_BoolC3 CV_MAKETYPE(CV_Bool,3)
#define CV_BoolC4 CV_MAKETYPE(CV_Bool,4)
#define CV_BoolC(n) CV_MAKETYPE(CV_Bool,(n))
#define CV_32UC1 CV_MAKETYPE(CV_32U,1)
#define CV_32UC2 CV_MAKETYPE(CV_32U,2)
#define CV_32UC3 CV_MAKETYPE(CV_32U,3)
#define CV_32UC4 CV_MAKETYPE(CV_32U,4)
#define CV_32UC(n) CV_MAKETYPE(CV_32U,(n))
#define CV_16BFC1 CV_MAKETYPE(CV_16BF,1)
#define CV_16BFC2 CV_MAKETYPE(CV_16BF,2)
#define CV_16BFC3 CV_MAKETYPE(CV_16BF,3)
#define CV_16BFC4 CV_MAKETYPE(CV_16BF,4)
#define CV_16BFC(n) CV_MAKETYPE(CV_16BF,(n))
//! @}
//! @name Comparison operation

@ -720,6 +720,22 @@ namespace CV__SIMD_NAMESPACE {
inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
//! @}
#ifndef OPENCV_HAL_HAVE_LOAD_STORE_BFLOAT16
inline v_float32 vx_load_expand(const bfloat16_t* ptr)
{
v_uint32 v = vx_load_expand((const ushort*)ptr);
return v_reinterpret_as_f32(v_shl<16>(v));
}
inline void v_pack_store(const bfloat16_t* ptr, v_float32 v)
{
v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v));
v_pack_store((short*)ptr, iv);
}
#endif
/** @brief SIMD processing state cleanup call */
inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
@ -1095,6 +1111,10 @@ namespace CV__SIMD_NAMESPACE {
#define CV_SIMD 0
#endif
#if (!defined CV_SIMD_64F) || (!CV_SIMD_64F)
typedef struct v_float64 { int dummy; } v_float64;
#endif
#include "simd_utils.impl.hpp"
#ifndef CV_DOXYGEN

@ -937,6 +937,11 @@ OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint32x8, v_int32x8, epi32, (int)0x80000000)
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a == b); }
inline v_int64x4 operator > (const v_int64x4& a, const v_int64x4& b)
{ return v_int64x4(_mm256_cmpgt_epi64(a.val, b.val)); }
inline v_int64x4 operator < (const v_int64x4& a, const v_int64x4& b)
{ return v_int64x4(_mm256_cmpgt_epi64(b.val, a.val)); }
OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_uint64x4)
OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4)
@ -3162,6 +3167,22 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
#endif
}
/*#define OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16 1
inline v_float32x8 v256_load_expand(const bfloat16_t* ptr)
{
__m128i bf = _mm_loadu_si128((const __m128i*)ptr);
__m256i f = _mm256_unpacklo_epi16(_mm256_setzero_si256(), _mm256_castsi128_si256(bf));
return v_float32x8(_mm256_castsi256_ps(f));
}
inline void v_pack_store(bfloat16_t* ptr, const v_float32x8& a)
{
__m256i f = _mm256_castps_si256(a.val);
f = _mm256_packs_epi32(_mm256_srai_epi32(f, 16), f);
_mm_storeu_si128((__m128i*)ptr, _v256_extract_low(f));
}*/
//
// end of FP16
//

@ -3250,6 +3250,8 @@ template<int n> inline v_reg<double, n/2> v_dotprod_expand_fast(const v_reg<int,
////// FP16 support ///////
#define OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16 1
inline v_reg<float, simd128_width / sizeof(float)>
v_load_expand(const float16_t* ptr)
{

@ -1057,44 +1057,61 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32)
#if defined(__aarch64__) || defined(_M_ARM64)
static inline uint64x2_t vmvnq_u64(uint64x2_t a)
{
uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF));
return veorq_u64(a, vx);
}
//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64)
//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b)
{ return v_uint64x2(vceqq_u64(a.val, b.val)); }
static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b)
{ return v_uint64x2(vmvnq_u64(vceqq_u64(a.val, b.val))); }
static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b)
{ return v_int64x2(vreinterpretq_s64_u64(vceqq_s64(a.val, b.val))); }
static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b)
{ return v_int64x2(vreinterpretq_s64_u64(vmvnq_u64(vceqq_s64(a.val, b.val)))); }
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
#else
static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b)
{
uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val));
uint32x4_t swapped = vrev64q_u32(cmp);
return v_uint64x2(vreinterpretq_u64_u32(vandq_u32(cmp, swapped)));
uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val),
vreinterpretq_u32_u64(b.val));
uint32x4_t v_eq = vandq_u32(cmp, vrev64q_u32(cmp));
return v_uint64x2(vreinterpretq_u64_u32(v_eq));
}
static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b)
{
uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val));
uint32x4_t swapped = vrev64q_u32(cmp);
uint64x2_t v_eq = vreinterpretq_u64_u32(vandq_u32(cmp, swapped));
uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF));
return v_uint64x2(veorq_u64(v_eq, vx));
uint64x2_t v_mask = vorrq_u64(vsubq_u64(a.val, b.val), vsubq_u64(b.val, a.val));
int64x2_t v_smask = vshrq_n_s64(vreinterpretq_s64_u64(v_mask), 63);
return v_uint64x2(vreinterpretq_u64_s64(v_smask));
}
static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b)
{
return v_reinterpret_as_s64(v_reinterpret_as_u64(a) == v_reinterpret_as_u64(b));
uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_s64(a.val),
vreinterpretq_u32_s64(b.val));
uint32x4_t v_eq = vandq_u32(cmp, vrev64q_u32(cmp));
return v_int64x2(vreinterpretq_s64_u32(v_eq));
}
static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b)
{
return v_reinterpret_as_s64(v_reinterpret_as_u64(a) != v_reinterpret_as_u64(b));
int64x2_t v_mask = vorrq_s64(vsubq_s64(a.val, b.val), vsubq_s64(b.val, a.val));
int64x2_t v_smask = vshrq_n_s64(v_mask, 63);
return v_int64x2(v_smask);
}
static inline v_uint64x2 operator > (const v_uint64x2& a, const v_uint64x2& b)
{
int64x2_t v_mask = vreinterpretq_s64_u64(vsubq_u64(b.val, a.val));
return v_uint64x2(vreinterpretq_u64_s64(vshrq_n_s64(v_mask, 63)));
}
static inline v_uint64x2 operator < (const v_uint64x2& a, const v_uint64x2& b)
{
int64x2_t v_mask = vreinterpretq_s64_u64(vsubq_u64(a.val, b.val));
return v_uint64x2(vreinterpretq_u64_s64(vshrq_n_s64(v_mask, 63)));
}
static inline v_int64x2 operator > (const v_int64x2& a, const v_int64x2& b)
{
int64x2_t v_mask = vsubq_s64(b.val, a.val);
return v_int64x2(vshrq_n_s64(v_mask, 63));
}
static inline v_int64x2 operator < (const v_int64x2& a, const v_int64x2& b)
{
int64x2_t v_mask = vsubq_s64(a.val, b.val);
return v_int64x2(vshrq_n_s64(v_mask, 63));
}
#endif
#if CV_SIMD128_64F
@ -1622,7 +1639,7 @@ inline int v_signmask(const v_uint64x2& a)
const int64x2_t signPosition = {0,1};
uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), signPosition);
uint64_t t0 = vaddvq_u64(v0);
return t0;
return (int)t0;
#else // #if CV_NEON_AARCH64
int64x1_t m0 = vdup_n_s64(0);
uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));

@ -1275,6 +1275,14 @@ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a == b); }
#endif
inline v_int64x2 operator > (const v_int64x2& a, const v_int64x2& b)
{
__m128i s = _mm_srli_epi64(_mm_sub_epi64(b.val, a.val), 63);
return v_int64x2(_mm_sub_epi64(_mm_setzero_si128(), s));
}
inline v_int64x2 operator < (const v_int64x2& a, const v_int64x2& b)
{ return b > a; }
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2)
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2)

@ -298,9 +298,9 @@ public:
DEPTH_MASK_32F = 1 << CV_32F,
DEPTH_MASK_64F = 1 << CV_64F,
DEPTH_MASK_16F = 1 << CV_16F,
DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
DEPTH_MASK_ALL = (1 << CV_DEPTH_CURR_MAX)-1,
DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1,
DEPTH_MASK_ALL_16F = DEPTH_MASK_ALL,
DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
};

@ -666,9 +666,7 @@ bool Mat::isSubmatrix() const
inline
size_t Mat::elemSize() const
{
size_t res = dims > 0 ? step.p[dims - 1] : 0;
CV_DbgAssert(res != 0);
return res;
return CV_ELEM_SIZE(flags);
}
inline

@ -442,6 +442,12 @@ typedef Vec<int, 4> Vec4i;
typedef Vec<int, 6> Vec6i;
typedef Vec<int, 8> Vec8i;
typedef Vec<int64_t, 2> Vec2l;
typedef Vec<int64_t, 3> Vec3l;
typedef Vec<int64_t, 4> Vec4l;
typedef Vec<int64_t, 6> Vec6l;
typedef Vec<int64_t, 8> Vec8l;
typedef Vec<float, 2> Vec2f;
typedef Vec<float, 3> Vec3f;
typedef Vec<float, 4> Vec4f;

@ -146,9 +146,8 @@ template<> inline unsigned saturate_cast<unsigned>(short v) { return (unsigned)
template<> inline unsigned saturate_cast<unsigned>(int v) { return (unsigned)std::max(v, (int)0); }
template<> inline unsigned saturate_cast<unsigned>(int64 v) { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); }
template<> inline unsigned saturate_cast<unsigned>(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); }
// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
template<> inline unsigned saturate_cast<unsigned>(float v) { return static_cast<unsigned>(cvRound(v)); }
template<> inline unsigned saturate_cast<unsigned>(double v) { return static_cast<unsigned>(cvRound(v)); }
template<> inline unsigned saturate_cast<unsigned>(float v) { return (unsigned)round(std::max(v, 0.f)); }
template<> inline unsigned saturate_cast<unsigned>(double v) { return (unsigned)round(std::max(v, 0.)); }
template<> inline uint64 saturate_cast<uint64>(schar v) { return (uint64)std::max(v, (schar)0); }
template<> inline uint64 saturate_cast<uint64>(short v) { return (uint64)std::max(v, (short)0); }
@ -156,9 +155,16 @@ template<> inline uint64 saturate_cast<uint64>(int v) { return (uint64)st
template<> inline uint64 saturate_cast<uint64>(int64 v) { return (uint64)std::max(v, (int64)0); }
template<> inline int64 saturate_cast<int64>(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); }
template<> inline int64 saturate_cast<int64>(float v) { return (int64)round((double)v); }
template<> inline int64 saturate_cast<int64>(double v) { return (int64)round(v); }
template<> inline uint64 saturate_cast<uint64>(float v) { return (int64)round((double)std::max(v, 0.f)); }
template<> inline uint64 saturate_cast<uint64>(double v) { return (int64)round(std::max(v, 0.)); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); }
template<typename _Tp> static inline _Tp saturate_cast(bfloat16_t v) { return saturate_cast<_Tp>((float)v); }
template<typename _Tp> static inline _Tp saturate_cast(bool v) { return saturate_cast<_Tp>(v ? 1 : 0); }
// in theory, we could use a LUT for 8u/8s->16f conversion,
// but with hardware support for FP32->FP16 conversion the current approach is preferable
@ -172,6 +178,32 @@ template<> inline float16_t saturate_cast<float16_t>(uint64 v) { return float16
template<> inline float16_t saturate_cast<float16_t>(int64 v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(float v) { return float16_t(v); }
template<> inline float16_t saturate_cast<float16_t>(double v) { return float16_t((float)v); }
template<> inline float16_t saturate_cast<float16_t>(bfloat16_t v) { return float16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(uchar v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(schar v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(ushort v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(short v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(unsigned v){ return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(int v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(uint64 v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(int64 v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(float v) { return bfloat16_t(v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(double v) { return bfloat16_t((float)v); }
template<> inline bfloat16_t saturate_cast<bfloat16_t>(float16_t v) { return bfloat16_t((float)v); }
template<> inline bool saturate_cast<bool>(uchar v) { return v != 0; }
template<> inline bool saturate_cast<bool>(schar v) { return v != 0; }
template<> inline bool saturate_cast<bool>(ushort v) { return v != 0; }
template<> inline bool saturate_cast<bool>(short v) { return v != 0; }
template<> inline bool saturate_cast<bool>(unsigned v){ return v != 0; }
template<> inline bool saturate_cast<bool>(int v){ return v != 0; }
template<> inline bool saturate_cast<bool>(float v){ return v != 0; }
template<> inline bool saturate_cast<bool>(double v){ return v != 0; }
template<> inline bool saturate_cast<bool>(uint64_t v){ return v != 0; }
template<> inline bool saturate_cast<bool>(int64_t v){ return v != 0; }
template<> inline bool saturate_cast<bool>(float16_t v){ return (float)v != 0; }
template<> inline bool saturate_cast<bool>(bfloat16_t v){ return (float)v != 0; }
//! @}

@ -134,9 +134,9 @@ public:
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_8U,
depth = CV_Bool,
channels = 1,
fmt = (int)'u',
fmt = (int)'b',
type = CV_MAKETYPE(depth, channels)
};
};
@ -231,6 +231,51 @@ public:
};
};
template<> class DataType<unsigned>
{
public:
typedef unsigned value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_32U,
channels = 1,
fmt = (int)'n',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<int64_t>
{
public:
typedef unsigned value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_64S,
channels = 1,
fmt = (int)'L',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<uint64_t>
{
public:
typedef unsigned value_type;
typedef value_type work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_64U,
channels = 1,
fmt = (int)'U',
type = CV_MAKETYPE(depth, channels)
};
};
template<> class DataType<float>
{
public:
@ -276,6 +321,21 @@ public:
};
};
template<> class DataType<bfloat16_t>
{
public:
typedef bfloat16_t value_type;
typedef float work_type;
typedef value_type channel_type;
typedef value_type vec_type;
enum { generic_type = 0,
depth = CV_16BF,
channels = 1,
fmt = (int)'H',
type = CV_MAKETYPE(depth, channels)
};
};
/** @brief A helper class for cv::DataType
The class is specialized for each fundamental numerical data type supported by OpenCV. It provides
@ -332,6 +392,12 @@ template<> class TypeDepth<CV_32S>
typedef int value_type;
};
template<> class TypeDepth<CV_32U>
{
enum { depth = CV_32U };
typedef unsigned value_type;
};
template<> class TypeDepth<CV_32F>
{
enum { depth = CV_32F };
@ -344,12 +410,36 @@ template<> class TypeDepth<CV_64F>
typedef double value_type;
};
template<> class TypeDepth<CV_64U>
{
enum { depth = CV_64U };
typedef uint64_t value_type;
};
template<> class TypeDepth<CV_64S>
{
enum { depth = CV_64S };
typedef int64_t value_type;
};
template<> class TypeDepth<CV_16F>
{
enum { depth = CV_16F };
typedef float16_t value_type;
};
template<> class TypeDepth<CV_16BF>
{
enum { depth = CV_16BF };
typedef bfloat16_t value_type;
};
template<> class TypeDepth<CV_Bool>
{
enum { depth = CV_Bool };
typedef bool value_type;
};
#endif
//! @}

@ -30,7 +30,7 @@ public final class CvType {
CV_64FC1 = CV_64FC(1), CV_64FC2 = CV_64FC(2), CV_64FC3 = CV_64FC(3), CV_64FC4 = CV_64FC(4),
CV_16FC1 = CV_16FC(1), CV_16FC2 = CV_16FC(2), CV_16FC3 = CV_16FC(3), CV_16FC4 = CV_16FC(4);
private static final int CV_CN_MAX = 512, CV_CN_SHIFT = 3, CV_DEPTH_MAX = (1 << CV_CN_SHIFT);
private static final int CV_CN_MAX = 128, CV_CN_SHIFT = 5, CV_DEPTH_MAX = (1 << CV_CN_SHIFT);
public static final int makeType(int depth, int channels) {
if (channels <= 0 || channels >= CV_CN_MAX) {

@ -65,7 +65,7 @@ public class CvTypeTest extends OpenCVTestCase {
public void testTypeToString() {
assertEquals("CV_32FC1", CvType.typeToString(CvType.CV_32F));
assertEquals("CV_32FC3", CvType.typeToString(CvType.CV_32FC3));
assertEquals("CV_32FC(128)", CvType.typeToString(CvType.CV_32FC(128)));
assertEquals("CV_32FC(127)", CvType.typeToString(CvType.CV_32FC(127)));
}
}

@ -329,7 +329,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
static BinaryFuncC* getMaxTab()
{
static BinaryFuncC maxTab[] =
static BinaryFuncC maxTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s),
@ -343,7 +343,7 @@ static BinaryFuncC* getMaxTab()
static BinaryFuncC* getMinTab()
{
static BinaryFuncC minTab[] =
static BinaryFuncC minTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s),
@ -617,7 +617,10 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
Size sz = getContinuousSize2D(src1, src2, dst, src1.channels());
tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
BinaryFuncC func = tab[depth1];
CV_Assert(func != 0);
func(src1.ptr(), src1.step, src2.ptr(), src2.step,
dst.ptr(), dst.step, sz.width, sz.height, usrdata);
return;
}
@ -868,7 +871,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
static BinaryFuncC* getAddTab()
{
static BinaryFuncC addTab[] =
static BinaryFuncC addTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s),
@ -882,7 +885,7 @@ static BinaryFuncC* getAddTab()
static BinaryFuncC* getSubTab()
{
static BinaryFuncC subTab[] =
static BinaryFuncC subTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s),
@ -896,7 +899,7 @@ static BinaryFuncC* getSubTab()
static BinaryFuncC* getAbsDiffTab()
{
static BinaryFuncC absDiffTab[] =
static BinaryFuncC absDiffTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s),
@ -949,7 +952,7 @@ namespace cv
static BinaryFuncC* getMulTab()
{
static BinaryFuncC mulTab[] =
static BinaryFuncC mulTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u,
(BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f,
@ -961,7 +964,7 @@ static BinaryFuncC* getMulTab()
static BinaryFuncC* getDivTab()
{
static BinaryFuncC divTab[] =
static BinaryFuncC divTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u,
(BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f,
@ -973,7 +976,7 @@ static BinaryFuncC* getDivTab()
static BinaryFuncC* getRecipTab()
{
static BinaryFuncC recipTab[] =
static BinaryFuncC recipTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u,
(BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f,
@ -1021,7 +1024,7 @@ UMat UMat::mul(InputArray m, double scale) const
static BinaryFuncC* getAddWeightedTab()
{
static BinaryFuncC addWeightedTab[] =
static BinaryFuncC addWeightedTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f,
@ -1052,7 +1055,7 @@ namespace cv
static BinaryFuncC getCmpFunc(int depth)
{
static BinaryFuncC cmpTab[] =
static BinaryFuncC cmpTab[CV_DEPTH_MAX] =
{
(BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s),
(BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s),
@ -1588,7 +1591,7 @@ typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2,
static InRangeFunc getInRangeFunc(int depth)
{
static InRangeFunc inRangeTab[] =
static InRangeFunc inRangeTab[CV_DEPTH_MAX] =
{
(InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
(InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),

@ -104,10 +104,6 @@ namespace cv { namespace hal {
#ifdef ARITHM_DEFINITIONS_ONLY
#if !CV_SIMD_64F
typedef int v_float64; // dummy
#endif
//=======================================
// Utility
//=======================================

@ -79,7 +79,7 @@ typedef void (*MixChannelsFunc)( const void** src, const int* sdelta,
static MixChannelsFunc getMixchFunc(int depth)
{
static MixChannelsFunc mixchTab[] =
static MixChannelsFunc mixchTab[CV_DEPTH_MAX] =
{
mixChannels8u, mixChannels8u, mixChannels16u,
mixChannels16u, mixChannels32s, mixChannels32s,

@ -23,116 +23,27 @@ void cvt32f16f(const float* src, float16_t* dst, int len)
CV_CPU_DISPATCH(cvt32f16f, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);
}
void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len)
void cvt32f16bf(const float* src, bfloat16_t* dst, int len)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(addRNGBias32f, (arr, scaleBiasPairs, len),
CV_CPU_DISPATCH(cvt32f16bf, (src, dst, len),
CV_CPU_DISPATCH_MODES_ALL);
}
void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len)
void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len, int cn)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(addRNGBias64f, (arr, scaleBiasPairs, len),
CV_CPU_DISPATCH(addRNGBias32f, (arr, scaleBiasPairs, len, cn),
CV_CPU_DISPATCH_MODES_ALL);
}
} // namespace
/* [TODO] Recover IPP calls
#if defined(HAVE_IPP)
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#else
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
#endif
#define DEF_CVT_FUNC(suffix, stype, dtype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len, int cn)
{
CV_INSTRUMENT_REGION();
CV_CPU_DISPATCH(addRNGBias64f, (arr, scaleBiasPairs, len, cn),
CV_CPU_DISPATCH_MODES_ALL);
}
#define DEF_CPY_FUNC(suffix, stype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
stype* dst, size_t dstep, Size size, double*) \
{ \
cpy_(src, sstep, dst, dstep, size); \
}
} // namespace
DEF_CPY_FUNC(8u, uchar)
DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs)
DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R)
DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R)
DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R)
DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs)
DEF_CVT_FUNC(64f8u, double, uchar)
DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs)
DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs)
DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs)
DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R)
DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs)
DEF_CVT_FUNC(64f8s, double, schar)
DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R)
DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs)
DEF_CPY_FUNC(16u, ushort)
DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
DEF_CVT_FUNC(64f16u, double, ushort)
DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R)
DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R)
DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
DEF_CVT_FUNC(32f16s, float, short)
DEF_CVT_FUNC(64f16s, double, short)
DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R)
DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R)
DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
DEF_CPY_FUNC(32s, int)
DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
DEF_CVT_FUNC(64f32s, double, int)
DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R)
DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R)
DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
DEF_CVT_FUNC(64f32f, double, float)
DEF_CVT_FUNC(8u64f, uchar, double)
DEF_CVT_FUNC(8s64f, schar, double)
DEF_CVT_FUNC(16u64f, ushort, double)
DEF_CVT_FUNC(16s64f, short, double)
DEF_CVT_FUNC(32s64f, int, double)
DEF_CVT_FUNC(32f64f, float, double)
DEF_CPY_FUNC(64s, int64)
*/
BinaryFunc getConvertFunc(int sdepth, int ddepth)
{

@ -28,12 +28,26 @@ static inline void vx_load_as(const short* ptr, v_float32& a)
static inline void vx_load_as(const int* ptr, v_float32& a)
{ a = v_cvt_f32(vx_load(ptr)); }
static inline void vx_load_as(const unsigned* ptr, v_float32& a)
{
v_uint32 delta = vx_setall_u32(0x80000000U);
v_uint32 ua = vx_load(ptr);
v_uint32 mask_a = (ua >= delta) & delta;
v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a));
// restore the original values
a -= fmask_a; // subtract 0 or a large negative number
}
static inline void vx_load_as(const float* ptr, v_float32& a)
{ a = vx_load(ptr); }
static inline void vx_load_as(const float16_t* ptr, v_float32& a)
{ a = vx_load_expand(ptr); }
static inline void vx_load_as(const bfloat16_t* ptr, v_float32& a)
{ a = vx_load_expand(ptr); }
static inline void v_store_as(ushort* ptr, const v_float32& a)
{ v_pack_u_store(ptr, v_round(a)); }
@ -43,12 +57,40 @@ static inline void v_store_as(short* ptr, const v_float32& a)
static inline void v_store_as(int* ptr, const v_float32& a)
{ v_store(ptr, v_round(a)); }
static inline void v_store_as(unsigned* ptr, const v_float32& a)
{
v_float32 z = vx_setzero_f32();
v_store(ptr, v_reinterpret_as_u32(v_round(v_max(a, z))));
}
static inline void v_store_as(float* ptr, const v_float32& a)
{ v_store(ptr, a); }
static inline void v_store_as(float16_t* ptr, const v_float32& a)
{ v_pack_store(ptr, a); }
static inline void v_store_as(bfloat16_t* ptr, const v_float32& a)
{ v_pack_store(ptr, a); }
static inline void v_store_as(int64_t* ptr, const v_float32& a)
{
v_int32 ia = v_round(a);
v_int64 ia_0, ia_1;
v_expand(ia, ia_0, ia_1);
v_store(ptr, ia_0);
v_store(ptr + v_int64::nlanes, ia_1);
}
static inline void v_store_as(uint64_t* ptr, const v_float32& a)
{
v_int32 ia = v_round(a);
v_uint64 ia_0, ia_1;
ia = v_max(ia, vx_setzero_s32());
v_expand(v_reinterpret_as_u32(ia), ia_0, ia_1);
v_store(ptr, ia_0);
v_store(ptr + v_int64::nlanes, ia_1);
}
static inline void vx_load_pair_as(const uchar* ptr, v_uint16& a, v_uint16& b)
{ v_expand(vx_load(ptr), a, b); }
@ -147,6 +189,115 @@ static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b)
b = v_cvt_f32(ib);
}
static inline void vx_load_pair_as(const int64_t* ptr, v_int32& a, v_int32& b)
{
const int int64_nlanes = v_int64::nlanes;
a = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes));
b = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3));
}
static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b)
{
v_int64 z = vx_setzero_s64();
v_int64 ia = vx_load(ptr), ib = vx_load(ptr + v_int64::nlanes);
ia &= (ia > z);
ib &= (ib > z);
a = v_reinterpret_as_u64(ia);
b = v_reinterpret_as_u64(ib);
}
static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b)
{
const int nlanes = v_int64::nlanes;
v_int64 z = vx_setzero_s64();
v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes);
v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3);
ia0 &= (ia0 > z);
ia1 &= (ia1 > z);
ib0 &= (ib0 > z);
ib1 &= (ib1 > z);
a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1));
b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1));
}
static inline void vx_load_pair_as(const uint64_t* ptr, v_float32& a, v_float32& b)
{
const int nlanes = v_uint64::nlanes;
float buf[v_uint64::nlanes*4];
for (int i = 0; i < nlanes*4; i++) {
buf[i] = (float)ptr[i];
}
a = vx_load(buf);
b = vx_load(buf + nlanes*2);
}
static inline void vx_load_pair_as(const int64_t* ptr, v_float32& a, v_float32& b)
{
const int nlanes = v_int64::nlanes;
float buf[v_int64::nlanes*4];
for (int i = 0; i < nlanes*4; i++) {
buf[i] = (float)ptr[i];
}
a = vx_load(buf);
b = vx_load(buf + nlanes*2);
}
static inline void vx_load_pair_as(const bool* ptr, v_float32& a, v_float32& b)
{
v_uint16 z = vx_setzero_u16();
v_uint16 uab = vx_load_expand((const uchar*)ptr);
uab = v_shr<15>(uab > z);
v_int32 ia, ib;
v_expand(v_reinterpret_as_s16(uab), ia, ib);
a = v_cvt_f32(ia);
b = v_cvt_f32(ib);
}
static inline void vx_load_as(const bool* ptr, v_float32& a)
{
v_uint32 z = vx_setzero_u32();
v_uint32 ua = vx_load_expand_q((const uchar*)ptr);
ua = v_shr<31>(ua > z);
a = v_cvt_f32(v_reinterpret_as_s32(ua));
}
static inline void vx_load_pair_as(const schar* ptr, v_uint32& a, v_uint32& b)
{
v_int16 ab = v_max(vx_load_expand(ptr), vx_setzero_s16());
v_expand(v_reinterpret_as_u16(ab), a, b);
}
static inline void vx_load_pair_as(const short* ptr, v_uint32& a, v_uint32& b)
{
v_int16 ab = v_max(vx_load(ptr), vx_setzero_s16());
v_expand(v_reinterpret_as_u16(ab), a, b);
}
static inline void vx_load_pair_as(const int* ptr, v_uint32& a, v_uint32& b)
{
v_int32 z = vx_setzero_s32();
v_int32 ia = v_max(vx_load(ptr), z);
v_int32 ib = v_max(vx_load(ptr + v_int32::nlanes), z);
a = v_reinterpret_as_u32(ia);
b = v_reinterpret_as_u32(ib);
}
static inline void vx_load_pair_as(const uint64_t* ptr, v_uint32& a, v_uint32& b)
{
const int int64_nlanes = v_int64::nlanes;
a = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes));
b = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3));
}
static inline void vx_load_pair_as(const uint64_t* ptr, v_int32& a, v_int32& b)
{
const int int64_nlanes = v_int64::nlanes;
v_uint32 ua = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes));
v_uint32 ub = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3));
a = v_reinterpret_as_s32(ua);
b = v_reinterpret_as_s32(ub);
}
static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b)
{ a = vx_load(ptr); b = vx_load(ptr + v_float32::nlanes); }
@ -156,6 +307,39 @@ static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32
b = vx_load_expand(ptr + v_float32::nlanes);
}
static inline void vx_load_pair_as(const bfloat16_t* ptr, v_float32& a, v_float32& b)
{
a = vx_load_expand(ptr);
b = vx_load_expand(ptr + v_float32::nlanes);
}
static inline void vx_load_pair_as(const unsigned* ptr, v_uint32& a, v_uint32& b)
{
a = vx_load(ptr);
b = vx_load(ptr + v_uint32::nlanes);
}
static inline void vx_load_pair_as(const unsigned* ptr, v_int32& a, v_int32& b)
{
a = v_reinterpret_as_s32(vx_load(ptr));
b = v_reinterpret_as_s32(vx_load(ptr + v_uint32::nlanes));
}
static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32& b)
{
v_uint32 delta = vx_setall_u32(0x80000000U);
v_uint32 ua = vx_load(ptr);
v_uint32 ub = vx_load(ptr + v_uint32::nlanes);
v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta;
v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31))
v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31))
a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a));
b = v_cvt_f32(v_reinterpret_as_s32(ub - mask_b));
// restore the original values
a -= fmask_a; // subtract 0 or a large negative number
b -= fmask_b; // subtract 0 or a large negative number
}
static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b)
{
v_store(ptr, v_pack(a, b));
@ -198,12 +382,33 @@ static inline void v_store_pair_as(int* ptr, const v_int32& a, const v_int32& b)
v_store(ptr + v_int32::nlanes, b);
}
static inline void v_store_pair_as(int64_t* ptr, const v_int32& a, const v_int32& b)
{
v_int64 q0, q1, q2, q3;
v_expand(a, q0, q1);
v_expand(b, q2, q3);
const int nlanes = v_int64::nlanes;
v_store(ptr, q0);
v_store(ptr + nlanes, q1);
v_store(ptr + nlanes*2, q2);
v_store(ptr + nlanes*3, q3);
}
static inline void v_store_pair_as(uchar* ptr, const v_float32& a, const v_float32& b)
{ v_pack_u_store(ptr, v_pack(v_round(a), v_round(b))); }
static inline void v_store_pair_as(schar* ptr, const v_float32& a, const v_float32& b)
{ v_pack_store(ptr, v_pack(v_round(a), v_round(b))); }
static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b)
{
v_float32 z = vx_setzero_f32();
v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(a != z));
v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(b != z));
v_uint16 mab = v_pack(ma, mb);
v_pack_store((uchar*)ptr, mab);
}
static inline void v_store_pair_as(ushort* ptr, const v_float32& a, const v_float32& b)
{ v_store(ptr, v_pack_u(v_round(a), v_round(b))); }
@ -220,14 +425,95 @@ static inline void v_store_pair_as(int* ptr, const v_float32& a, const v_float32
static inline void v_store_pair_as(float* ptr, const v_float32& a, const v_float32& b)
{ v_store(ptr, a); v_store(ptr + v_float32::nlanes, b); }
static inline void v_store_pair_as(unsigned* ptr, const v_float32& a, const v_float32& b)
{
v_int32 z = vx_setzero_s32();
v_int32 ia = v_max(v_round(a), z);
v_int32 ib = v_max(v_round(b), z);
v_store(ptr, v_reinterpret_as_u32(ia));
v_store(ptr + v_int32::nlanes, v_reinterpret_as_u32(ib));
}
static inline void v_store_pair_as(uchar* ptr, const v_uint32& a, const v_uint32& b)
{
v_pack_store(ptr, v_pack(a, b));
}
static inline void v_store_pair_as(ushort* ptr, const v_uint32& a, const v_uint32& b)
{
v_store(ptr, v_pack(a, b));
}
static inline void v_store_pair_as(unsigned* ptr, const v_uint32& a, const v_uint32& b)
{
v_store(ptr, a);
v_store(ptr + v_uint32::nlanes, b);
}
static inline void v_store_pair_as(uint64_t* ptr, const v_uint32& a, const v_uint32& b)
{
v_uint64 q0, q1, q2, q3;
v_expand(a, q0, q1);
v_expand(b, q2, q3);
const int nlanes = v_uint64::nlanes;
v_store(ptr, q0);
v_store(ptr + nlanes, q1);
v_store(ptr + nlanes*2, q2);
v_store(ptr + nlanes*3, q3);
}
static inline void v_store_pair_as(uint64_t* ptr, const v_uint64& a, const v_uint64& b)
{
v_store(ptr, a);
v_store(ptr + v_uint64::nlanes, b);
}
#if CV_SIMD_64F
static inline void vx_load_as(const uint64_t* ptr, v_float32& a)
{
v_float64 a_0 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr)));
v_float64 a_1 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + v_uint64::nlanes)));
a = v_cvt_f32(a_0, a_1);
}
static inline void vx_load_as(const int64_t* ptr, v_float32& a)
{
v_float64 a_0 = v_cvt_f64(vx_load(ptr));
v_float64 a_1 = v_cvt_f64(vx_load(ptr + v_uint64::nlanes));
a = v_cvt_f32(a_0, a_1);
}
static inline void vx_load_as(const double* ptr, v_float32& a)
{
v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes);
a = v_cvt_f32(v0, v1);
}
static inline void vx_load_pair_as(const bool* ptr, v_float64& a, v_float64& b)
{
v_uint32 z = vx_setzero_u32();
v_uint32 uab = vx_load_expand_q((const uchar*)ptr);
uab = v_shr<31>(uab > z);
v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab));
a = v_cvt_f64(fab);
b = v_cvt_f64_high(fab);
}
static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
{
v_float32 fab = vx_load_expand(ptr);
a = v_cvt_f64(fab);
b = v_cvt_f64_high(fab);
}
static inline void vx_load_pair_as(const bfloat16_t* ptr, v_float64& a, v_float64& b)
{
v_float32 fab = vx_load_expand(ptr);
a = v_cvt_f64(fab);
b = v_cvt_f64_high(fab);
}
static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b)
{
v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes);
@ -238,6 +524,13 @@ static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b)
b = v_combine_low(iv2, iv3);
}
static inline void vx_load_pair_as(const uint64_t* ptr, v_float64& a, v_float64& b)
{
const int int64_nlanes = v_int64::nlanes;
a = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr)));
b = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + int64_nlanes)));
}
static inline void vx_load_pair_as(const double* ptr, v_float32& a, v_float32& b)
{
v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes);
@ -294,11 +587,20 @@ static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b
b = vx_load(ptr + v_float64::nlanes);
}
static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
static inline void vx_load_pair_as(const int64_t* ptr, v_float64& a, v_float64& b)
{
v_float32 v0 = vx_load_expand(ptr);
a = v_cvt_f64(v0);
b = v_cvt_f64_high(v0);
a = v_cvt_f64(vx_load(ptr));
b = v_cvt_f64(vx_load(ptr + v_float64::nlanes));
}
static inline void vx_load_pair_as(const unsigned* ptr, v_float64& a, v_float64& b)
{
const int nlanes = v_uint64::nlanes;
double buf[v_uint64::nlanes*2];
for (int i = 0; i < nlanes*2; i++)
buf[i] = (double)ptr[i];
a = vx_load(buf);
b = vx_load(buf + nlanes);
}
static inline void v_store_as(double* ptr, const v_float32& a)
@ -354,6 +656,29 @@ static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_f
v_pack_store(ptr, v);
}
static inline void v_store_pair_as(uint64_t* ptr, const v_float64& a, const v_float64& b)
{
v_float64 z = vx_setzero_f64();
v_int64 ia, ib;
v_expand(v_round(v_max(a, z), v_max(b, z)), ia, ib);
v_store(ptr, v_reinterpret_as_u64(ia));
v_store(ptr + v_int64::nlanes, v_reinterpret_as_u64(ib));
}
static inline void v_store_pair_as(int64_t* ptr, const v_float64& a, const v_float64& b)
{
v_int64 ia, ib;
v_expand(v_round(a, b), ia, ib);
v_store(ptr, ia);
v_store(ptr + v_int64::nlanes, ib);
}
static inline void v_store_pair_as(unsigned* ptr, const v_float64& a, const v_float64& b)
{
v_int32 iab = v_max(v_round(a, b), vx_setzero_s32());
v_store(ptr, v_reinterpret_as_u32(iab));
}
#else
static inline void vx_load_as(const double* ptr, v_float32& a)
@ -366,6 +691,26 @@ static inline void vx_load_as(const double* ptr, v_float32& a)
a = vx_load(buf);
}
static inline void vx_load_as(const uint64_t* ptr, v_float32& a)
{
const int VECSZ = v_float32::nlanes;
float buf[VECSZ*2];
for( int i = 0; i < VECSZ; i++ )
buf[i] = saturate_cast<float>(ptr[i]);
a = vx_load(buf);
}
static inline void vx_load_as(const int64_t* ptr, v_float32& a)
{
const int VECSZ = v_float32::nlanes;
float buf[VECSZ*2];
for( int i = 0; i < VECSZ; i++ )
buf[i] = saturate_cast<float>(ptr[i]);
a = vx_load(buf);
}
template<typename _Tdvec>
static inline void vx_load_pair_as(const double* ptr, _Tdvec& a, _Tdvec& b)
{

@ -16,8 +16,10 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
void cvt16f32f(const float16_t* src, float* dst, int len);
void cvt32f16f(const float* src, float16_t* dst, int len);
void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len);
void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len);
void cvt16bf32f(const bfloat16_t* src, float* dst, int len);
void cvt32f16bf(const float* src, bfloat16_t* dst, int len);
void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len, int cn);
void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len, int cn);
CV_CPU_OPTIMIZATION_NAMESPACE_END
} // namespace cv::hal
@ -77,20 +79,63 @@ void cvt32f16f( const float* src, float16_t* dst, int len )
dst[j] = float16_t(src[j]);
}
void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
void cvt32f16bf( const float* src, bfloat16_t* dst, int len )
{
CV_INSTRUMENT_REGION();
// the loop is simple enough, so we let the compiler to vectorize it
for( int i = 0; i < len; i++ )
arr[i] += scaleBiasPairs[i*2 + 1];
int j = 0;
#if CV_SIMD
const int VECSZ = v_float32::nlanes;
for( ; j < len; j += VECSZ )
{
if( j > len - VECSZ )
{
if( j == 0 )
break;
j = len - VECSZ;
}
v_pack_store(dst + j, vx_load(src + j));
}
#endif
for( ; j < len; j++ )
dst[j] = bfloat16_t(src[j]);
}
void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len )
void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len, int cn )
{
CV_INSTRUMENT_REGION();
// the loop is simple enough, so we let the compiler to vectorize it
for( int i = 0; i < len; i++ )
arr[i] += scaleBiasPairs[i*2 + 1];
if (cn == 1) {
float bias = scaleBiasPairs[1];
for( int i = 0; i < len; i++ ) {
arr[i] += bias;
}
} else {
int k = 0;
len *= cn;
cn--;
for( int i = 0; i < len; i++ ) {
arr[i] += scaleBiasPairs[k*2 + 1];
k = (k + 1) & ((k >= cn) - 1);
}
}
}
void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len, int cn )
{
CV_INSTRUMENT_REGION();
if (cn == 1) {
double bias = scaleBiasPairs[1];
for( int i = 0; i < len; i++ ) {
arr[i] += bias;
}
} else {
int k = 0;
len *= cn;
cn--;
for( int i = 0; i < len; i++ ) {
arr[i] += scaleBiasPairs[k*2 + 1];
k = (k + 1) & ((k >= cn) - 1);
}
}
}
CV_CPU_OPTIMIZATION_NAMESPACE_END
@ -128,6 +173,35 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
}
}
template<typename _Ts, typename _Td, typename dummy> static inline void
cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
{
sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]);
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
{
int j = 0;
#if CV_SIMD_64F
const int VECSZ = v_float64::nlanes*2;
for( ; j < size.width; j += VECSZ )
{
if( j > size.width - VECSZ )
{
if( j == 0 || src == (_Ts*)dst )
break;
j = size.width - VECSZ;
}
v_float64 v0, v1;
vx_load_pair_as(src + j, v0, v1);
v_store_pair_as(dst + j, v0, v1);
}
#endif
for( ; j < size.width; j++ )
dst[j] = saturate_cast<_Td>(src[j]);
}
}
// in order to reduce the code size, for (16f <-> ...) conversions
// we add a conversion function without loop unrolling
template<typename _Ts, typename _Td, typename _Twvec> static inline void
@ -180,25 +254,102 @@ static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \
cvtfunc<_Ts, _Td, _Twvec>(src, sstep, dst, dstep, size); \
}
#define DEF_CVT2BOOL_FUNC(suffix, _Ts, shift) \
static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \
uchar* dst, size_t dstep, Size size, void*) \
{ \
CV_INSTRUMENT_REGION(); \
const _Ts* src = (const _Ts*)src_; \
sstep /= sizeof(src[0]); \
\
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { \
for ( int j = 0; j < size.width; j++ ) \
dst[j] = (src[j]<<shift) != 0; \
} \
}
#define DEF_CVTBOOL2_FUNC(suffix, _Td, scale) \
static void cvt##suffix(const uchar* src, size_t sstep, const uchar*, size_t, \
uchar* dst_, size_t dstep, Size size, void*) \
{ \
CV_INSTRUMENT_REGION(); \
_Td* dst = (_Td*)dst_; \
dstep /= sizeof(dst[0]); \
\
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { \
for ( int j = 0; j < size.width; j++ ) \
dst[j] = (_Td)((src[j] != 0)*scale); \
} \
}
#define DEF_CVT_SCALAR_FUNC(suffix, _Ts, _Td) \
static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \
uchar* dst_, size_t dstep, Size size, void*) \
{ \
CV_INSTRUMENT_REGION(); \
const _Ts* src = (const _Ts*)src_; \
_Td* dst = (_Td*)dst_; \
sstep /= sizeof(src[0]); \
dstep /= sizeof(dst[0]); \
\
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { \
for ( int j = 0; j < size.width; j++ ) \
dst[j] = saturate_cast<_Td>(src[j]); \
} \
}
#define DEF_CVT_SCALAR_FUNC_S2U(suffix, _Ts, _Td, _Tw) \
static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \
uchar* dst_, size_t dstep, Size size, void*) \
{ \
CV_INSTRUMENT_REGION(); \
const _Ts* src = (const _Ts*)src_; \
_Td* dst = (_Td*)dst_; \
sstep /= sizeof(src[0]); \
dstep /= sizeof(dst[0]); \
\
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { \
for ( int j = 0; j < size.width; j++ ) \
dst[j] = saturate_cast<_Td>(std::max((_Tw)src[j], (_Tw)0)); \
} \
}
////////////////////// 8u -> ... ////////////////////////
DEF_CVT_FUNC(8u8s, cvt_, uchar, schar, v_int16)
DEF_CVT_FUNC(8u16u, cvt_, uchar, ushort, v_uint16)
DEF_CVT_FUNC(8u16s, cvt_, uchar, short, v_int16)
DEF_CVT_FUNC(8u32s, cvt_, uchar, int, v_int32)
DEF_CVT_FUNC(8u32f, cvt_, uchar, float, v_float32)
DEF_CVT_FUNC(8u64f, cvt_, uchar, double, v_int32)
DEF_CVT_SCALAR_FUNC(8u64s, uchar, int64_t)
DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
DEF_CVT_FUNC(8u16bf, cvt1_, uchar, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(8u8b, uchar, 0)
////////////////////// 8s -> ... ////////////////////////
DEF_CVT_FUNC(8s8u, cvt_, schar, uchar, v_int16)
DEF_CVT_FUNC(8s16u, cvt_, schar, ushort, v_uint16)
DEF_CVT_FUNC(8s16s, cvt_, schar, short, v_int16)
DEF_CVT_FUNC(8s32u, cvt_, schar, unsigned, v_uint32)
DEF_CVT_FUNC(8s32s, cvt_, schar, int, v_int32)
DEF_CVT_FUNC(8s32f, cvt_, schar, float, v_float32)
DEF_CVT_FUNC(8s64f, cvt_, schar, double, v_int32)
DEF_CVT_FUNC(8s64u, cvt_, schar, uint64_t, v_uint32)
DEF_CVT_FUNC(8s64s, cvt_, schar, int64_t, v_int32)
DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
DEF_CVT_FUNC(8s16bf, cvt1_, schar, bfloat16_t, v_float32)
////////////////////// 8b -> ... ////////////////////////
DEF_CVTBOOL2_FUNC(8b8u, uchar, 1)
DEF_CVTBOOL2_FUNC(8b16s, short, 1)
DEF_CVTBOOL2_FUNC(8b32s, int, 1)
DEF_CVTBOOL2_FUNC(8b32f, float, 1)
DEF_CVTBOOL2_FUNC(8b64f, double, 1)
DEF_CVTBOOL2_FUNC(8b64s, int64_t, 1)
DEF_CVTBOOL2_FUNC(8b16f, uint16_t, 0x3c00) // float16_t(1.0f)
DEF_CVTBOOL2_FUNC(8b16bf, uint16_t, 0x3f80) // bfloat16_t(1.0f)
////////////////////// 16u -> ... ////////////////////////
@ -208,17 +359,37 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32)
DEF_CVT_FUNC(16u32s, cvt_, ushort, int, v_int32)
DEF_CVT_FUNC(16u32f, cvt_, ushort, float, v_float32)
DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32)
DEF_CVT_SCALAR_FUNC(16u64s, ushort, int64_t)
DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
DEF_CVT_FUNC(16u16bf, cvt1_, ushort, bfloat16_t, v_float32)
////////////////////// 16s -> ... ////////////////////////
DEF_CVT_FUNC(16s8u, cvt_, short, uchar, v_int16)
DEF_CVT_FUNC(16s8s, cvt_, short, schar, v_int16)
DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32)
DEF_CVT_FUNC(16s32u, cvt_, short, unsigned, v_uint32)
DEF_CVT_FUNC(16s32s, cvt_, short, int, v_int32)
DEF_CVT_FUNC(16s32f, cvt_, short, float, v_float32)
DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32)
DEF_CVT_FUNC(16s64u, cvt_, short, uint64_t, v_uint32)
DEF_CVT_FUNC(16s64s, cvt_, short, int64_t, v_int32)
DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
DEF_CVT_FUNC(16s16bf, cvt1_, short, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(16s8b, short, 0)
////////////////////// 32u -> ... ////////////////////////
DEF_CVT_FUNC(32u8u, cvt_, unsigned, uchar, v_uint32)
DEF_CVT_FUNC(32u8s, cvt_, unsigned, schar, v_int32)
DEF_CVT_FUNC(32u16u, cvt_, unsigned, ushort, v_uint32)
DEF_CVT_FUNC(32u16s, cvt_, unsigned, short, v_int32)
DEF_CVT_SCALAR_FUNC(32u32s, unsigned, int)
DEF_CVT_FUNC(32u32f, cvt_, unsigned, float, v_float32)
DEF_CVT_FUNC(32u64f, cvt_, unsigned, double, v_float32)
DEF_CVT_SCALAR_FUNC(32u64s, unsigned, int64_t)
DEF_CVT_FUNC(32u16f, cvt1_, unsigned, float16_t, v_float32)
DEF_CVT_FUNC(32u16bf, cvt1_, int, bfloat16_t, v_float32)
////////////////////// 32s -> ... ////////////////////////
@ -226,9 +397,14 @@ DEF_CVT_FUNC(32s8u, cvt_, int, uchar, v_int32)
DEF_CVT_FUNC(32s8s, cvt_, int, schar, v_int32)
DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32)
DEF_CVT_FUNC(32s16s, cvt_, int, short, v_int32)
DEF_CVT_FUNC(32s32u, cvt_, int, unsigned, v_uint32)
DEF_CVT_FUNC(32s32f, cvt_, int, float, v_float32)
DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32)
DEF_CVT_FUNC(32s64u, cvt_, int, uint64_t, v_uint32)
DEF_CVT_FUNC(32s64s, cvt_, int, int64_t, v_int32)
DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
DEF_CVT_FUNC(32s16bf, cvt1_, int, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(32s8b, int, 0)
////////////////////// 32f -> ... ////////////////////////
@ -236,9 +412,14 @@ DEF_CVT_FUNC(32f8u, cvt_, float, uchar, v_float32)
DEF_CVT_FUNC(32f8s, cvt_, float, schar, v_float32)
DEF_CVT_FUNC(32f16u, cvt_, float, ushort, v_float32)
DEF_CVT_FUNC(32f16s, cvt_, float, short, v_float32)
DEF_CVT_FUNC(32f32u, cvt_, float, unsigned, v_float32)
DEF_CVT_FUNC(32f32s, cvt_, float, int, v_float32)
DEF_CVT_FUNC(32f64f, cvt_, float, double, v_float32)
DEF_CVT_FUNC(32f64u, cvt_64f, float, uint64_t, v_float64)
DEF_CVT_FUNC(32f64s, cvt_64f, float, int64_t, v_float64)
DEF_CVT_FUNC(32f16f, cvt1_,float, float16_t, v_float32)
DEF_CVT_FUNC(32f16bf, cvt1_,float, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(32f8b, int, 1)
////////////////////// 64f -> ... ////////////////////////
@ -246,9 +427,14 @@ DEF_CVT_FUNC(64f8u, cvt_, double, uchar, v_int32)
DEF_CVT_FUNC(64f8s, cvt_, double, schar, v_int32)
DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32)
DEF_CVT_FUNC(64f16s, cvt_, double, short, v_int32)
DEF_CVT_FUNC(64f32u, cvt_64f, double, unsigned, v_float32)
DEF_CVT_FUNC(64f32s, cvt_, double, int, v_int32)
DEF_CVT_FUNC(64f32f, cvt_, double, float, v_float32)
DEF_CVT_FUNC(64f64u, cvt_64f, double, uint64_t, v_float64)
DEF_CVT_FUNC(64f64s, cvt_64f, double, int64_t, v_float32)
DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
DEF_CVT_FUNC(64f16bf, cvt1_,double, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(64f8b, int64_t, 1)
////////////////////// 16f -> ... ////////////////////////
@ -256,9 +442,56 @@ DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32)
DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32)
DEF_CVT_FUNC(16f32u, cvt1_, float16_t, unsigned, v_float32)
DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32)
DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32)
DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
DEF_CVT_FUNC(16f64u, cvt1_, float16_t, uint64_t, v_float32)
DEF_CVT_FUNC(16f64s, cvt1_, float16_t, int64_t, v_float32)
DEF_CVT_FUNC(16f16bf, cvt1_, float16_t, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(16f8b, short, 1)
////////////////////// 16bf -> ... ////////////////////////
DEF_CVT_FUNC(16bf8u, cvt_, bfloat16_t, uchar, v_float32)
DEF_CVT_FUNC(16bf8s, cvt_, bfloat16_t, schar, v_float32)
DEF_CVT_FUNC(16bf16u, cvt1_, bfloat16_t, ushort, v_float32)
DEF_CVT_FUNC(16bf16s, cvt1_, bfloat16_t, short, v_float32)
DEF_CVT_FUNC(16bf32u, cvt1_, bfloat16_t, unsigned, v_float32)
DEF_CVT_FUNC(16bf32s, cvt1_, bfloat16_t, int, v_float32)
DEF_CVT_FUNC(16bf32f, cvt1_, bfloat16_t, float, v_float32)
DEF_CVT_FUNC(16bf64f, cvt1_, bfloat16_t, double, v_float32)
DEF_CVT_FUNC(16bf64u, cvt1_, bfloat16_t, uint64_t, v_float32)
DEF_CVT_FUNC(16bf64s, cvt1_, bfloat16_t, int64_t, v_float32)
DEF_CVT_FUNC(16bf16f, cvt1_, bfloat16_t, float16_t, v_float32)
////////////////////// 64s -> ... ////////////////////////
DEF_CVT_FUNC(64s8u, cvt_, int64_t, uchar, v_int32)
DEF_CVT_FUNC(64s8s, cvt_, int64_t, schar, v_int32)
DEF_CVT_FUNC(64s16u, cvt_, int64_t, ushort, v_int32)
DEF_CVT_FUNC(64s16s, cvt_, int64_t, short, v_int32)
DEF_CVT_FUNC(64s32u, cvt_, int64_t, unsigned, v_uint32)
DEF_CVT_FUNC(64s32s, cvt_, int64_t, int, v_int32)
DEF_CVT_FUNC(64s32f, cvt_64f, int64_t, float, v_float32)
DEF_CVT_FUNC(64s64f, cvt_64f, int64_t, double, v_float64)
DEF_CVT_FUNC(64s64u, cvt_, int64_t, uint64_t, v_uint64)
DEF_CVT_FUNC(64s16f, cvt1_,int64_t, float16_t, v_float32)
DEF_CVT_FUNC(64s16bf, cvt1_, int64_t, bfloat16_t, v_float32)
DEF_CVT2BOOL_FUNC(64s8b, int64_t, 0)
////////////////////// 64u -> ... ////////////////////////
DEF_CVT_FUNC(64u8u, cvt_, uint64_t, uchar, v_int32)
DEF_CVT_FUNC(64u8s, cvt_, uint64_t, schar, v_int32)
DEF_CVT_FUNC(64u16u, cvt_, uint64_t, ushort, v_int32)
DEF_CVT_FUNC(64u16s, cvt_, uint64_t, short, v_int32)
DEF_CVT_FUNC(64u32u, cvt_, uint64_t, unsigned, v_uint32)
DEF_CVT_FUNC(64u32s, cvt_, uint64_t, int, v_int32)
DEF_CVT_FUNC(64u32f, cvt_64f, uint64_t, float, v_float64)
DEF_CVT_FUNC(64u64f, cvt_64f, uint64_t, double, v_float64)
DEF_CVT_FUNC(64u16f, cvt1_,uint64_t, float16_t, v_float32)
DEF_CVT_FUNC(64u16bf, cvt1_, uint64_t, bfloat16_t, v_float32)
///////////// "conversion" w/o conversion ///////////////
@ -274,147 +507,210 @@ static void cvt32s(const uchar* src, size_t sstep, const uchar*, size_t, uchar*
static void cvt64s(const uchar* src, size_t sstep, const uchar*, size_t, uchar* dst, size_t dstep, Size size, void*)
{ CV_INSTRUMENT_REGION(); cvtCopy((const uchar*)src, sstep, (uchar*)dst, dstep, size, 8); }
/* [TODO] Recover IPP calls
#if defined(HAVE_IPP)
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#else
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
#endif
#define DEF_CVT_FUNC(suffix, stype, dtype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CPY_FUNC(suffix, stype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
stype* dst, size_t dstep, Size size, double*) \
{ \
cpy_(src, sstep, dst, dstep, size); \
}
DEF_CPY_FUNC(8u, uchar)
DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs)
DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R)
DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R)
DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R)
DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs)
DEF_CVT_FUNC(64f8u, double, uchar)
DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs)
DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs)
DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs)
DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R)
DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs)
DEF_CVT_FUNC(64f8s, double, schar)
DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R)
DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs)
DEF_CPY_FUNC(16u, ushort)
DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
DEF_CVT_FUNC(64f16u, double, ushort)
DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R)
DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R)
DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
DEF_CVT_FUNC(32f16s, float, short)
DEF_CVT_FUNC(64f16s, double, short)
DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R)
DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R)
DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
DEF_CPY_FUNC(32s, int)
DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
DEF_CVT_FUNC(64f32s, double, int)
DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R)
DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R)
DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
DEF_CVT_FUNC(64f32f, double, float)
DEF_CVT_FUNC(8u64f, uchar, double)
DEF_CVT_FUNC(8s64f, schar, double)
DEF_CVT_FUNC(16u64f, ushort, double)
DEF_CVT_FUNC(16s64f, short, double)
DEF_CVT_FUNC(32s64f, int, double)
DEF_CVT_FUNC(32f64f, float, double)
DEF_CPY_FUNC(64s, int64)
*/
BinaryFunc getConvertFunc(int sdepth, int ddepth)
BinaryFunc getConvertFunc(int sdepth_, int ddepth_)
{
static BinaryFunc cvtTab[][8] =
{
{
(cvt8u), (cvt8s8u), (cvt16u8u),
(cvt16s8u), (cvt32s8u), (cvt32f8u),
(cvt64f8u), (cvt16f8u)
},
{
(cvt8u8s), cvt8u, (cvt16u8s),
(cvt16s8s), (cvt32s8s), (cvt32f8s),
(cvt64f8s), (cvt16f8s)
},
{
(cvt8u16u), (cvt8s16u), cvt16u,
(cvt16s16u), (cvt32s16u), (cvt32f16u),
(cvt64f16u), (cvt16f16u)
},
{
(cvt8u16s), (cvt8s16s), (cvt16u16s),
cvt16u, (cvt32s16s), (cvt32f16s),
(cvt64f16s), (cvt16f16s)
},
{
(cvt8u32s), (cvt8s32s), (cvt16u32s),
(cvt16s32s), cvt32s, (cvt32f32s),
(cvt64f32s), (cvt16f32s)
},
{
(cvt8u32f), (cvt8s32f), (cvt16u32f),
(cvt16s32f), (cvt32s32f), cvt32s,
(cvt64f32f), (cvt16f32f)
},
{
(cvt8u64f), (cvt8s64f), (cvt16u64f),
(cvt16s64f), (cvt32s64f), (cvt32f64f),
(cvt64s), (cvt16f64f)
},
{
(cvt8u16f), (cvt8s16f), (cvt16u16f), (cvt16s16f),
(cvt32s16f), (cvt32f16f), (cvt64f16f), (cvt16u)
}
};
return cvtTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
int sdepth = CV_MAT_DEPTH(sdepth_);
int ddepth = CV_MAT_DEPTH(ddepth_);
BinaryFunc func =
ddepth == CV_8U ? (
sdepth == CV_8U ? cvt8u :
sdepth == CV_8S ? cvt8s8u :
sdepth == CV_16U ? cvt16u8u :
sdepth == CV_16S ? cvt16s8u :
sdepth == CV_32U ? cvt32u8u :
sdepth == CV_32S ? cvt32s8u :
sdepth == CV_32F ? cvt32f8u :
sdepth == CV_64F ? cvt64f8u :
sdepth == CV_16F ? cvt16f8u :
sdepth == CV_16BF ? cvt16bf8u :
sdepth == CV_Bool ? cvt8b8u :
sdepth == CV_64U ? cvt64u8u :
sdepth == CV_64S ? cvt64s8u :
0) :
ddepth == CV_8S ? (
sdepth == CV_8U ? cvt8u8s :
sdepth == CV_8S ? cvt8u :
sdepth == CV_16U ? cvt16u8s :
sdepth == CV_16S ? cvt16s8s :
sdepth == CV_32U ? cvt32u8s :
sdepth == CV_32S ? cvt32s8s :
sdepth == CV_32F ? cvt32f8s :
sdepth == CV_64F ? cvt64f8s :
sdepth == CV_16F ? cvt16f8s :
sdepth == CV_16BF ? cvt16bf8s :
sdepth == CV_Bool ? cvt8b8u :
sdepth == CV_64U ? cvt64u8s :
sdepth == CV_64S ? cvt64s8s :
0) :
ddepth == CV_16U ? (
sdepth == CV_8U ? cvt8u16s : // same as cvt8u16u
sdepth == CV_8S ? cvt8s16u :
sdepth == CV_16U ? cvt16u :
sdepth == CV_16S ? cvt16s16u :
sdepth == CV_32U ? cvt32u16u :
sdepth == CV_32S ? cvt32s16u :
sdepth == CV_32F ? cvt32f16u :
sdepth == CV_64F ? cvt64f16u :
sdepth == CV_16F ? cvt16f16u :
sdepth == CV_16BF ? cvt16bf16u :
sdepth == CV_Bool ? cvt8b16s :
sdepth == CV_64U ? cvt64u16u :
sdepth == CV_64S ? cvt64s16u :
0) :
ddepth == CV_16S ? (
sdepth == CV_8U ? cvt8u16s :
sdepth == CV_8S ? cvt8s16s :
sdepth == CV_16U ? cvt16u16s :
sdepth == CV_16S ? cvt16u :
sdepth == CV_32U ? cvt32u16s :
sdepth == CV_32S ? cvt32s16s :
sdepth == CV_32F ? cvt32f16s :
sdepth == CV_64F ? cvt64f16s :
sdepth == CV_16F ? cvt16f16s :
sdepth == CV_16BF ? cvt16bf16s :
sdepth == CV_Bool ? cvt8b16s :
sdepth == CV_64U ? cvt64u16s :
sdepth == CV_64S ? cvt64s16s :
0) :
ddepth == CV_32U ? (
sdepth == CV_8U ? cvt8u32s : // same as cvt8u32u
sdepth == CV_8S ? cvt8s32u :
sdepth == CV_16U ? cvt16u32s : // same as cvt16u32u
sdepth == CV_16S ? cvt16s32u :
sdepth == CV_32U ? cvt32s :
sdepth == CV_32S ? cvt32s32u :
sdepth == CV_32F ? cvt32f32u :
sdepth == CV_64F ? cvt64f32u :
sdepth == CV_16F ? cvt16f32u :
sdepth == CV_16BF ? cvt16bf32u :
sdepth == CV_Bool ? cvt8b32s :
sdepth == CV_64U ? cvt64u32u :
sdepth == CV_64S ? cvt64s32u :
0) :
ddepth == CV_32S ? (
sdepth == CV_8U ? cvt8u32s :
sdepth == CV_8S ? cvt8s32s :
sdepth == CV_16U ? cvt16u32s :
sdepth == CV_16S ? cvt16s32s :
sdepth == CV_32U ? cvt32u32s :
sdepth == CV_32S ? cvt32s :
sdepth == CV_32F ? cvt32f32s :
sdepth == CV_64F ? cvt64f32s :
sdepth == CV_16F ? cvt16f32s :
sdepth == CV_16BF ? cvt16bf32s :
sdepth == CV_Bool ? cvt8b32s :
sdepth == CV_64U ? cvt64u32s :
sdepth == CV_64S ? cvt64s32s :
0) :
ddepth == CV_32F ? (
sdepth == CV_8U ? cvt8u32f :
sdepth == CV_8S ? cvt8s32f :
sdepth == CV_16U ? cvt16u32f :
sdepth == CV_16S ? cvt16s32f :
sdepth == CV_32U ? cvt32u32f :
sdepth == CV_32S ? cvt32s32f :
sdepth == CV_32F ? cvt32s :
sdepth == CV_64F ? cvt64f32f :
sdepth == CV_16F ? cvt16f32f :
sdepth == CV_16BF ? cvt16bf32f :
sdepth == CV_Bool ? cvt8b32f :
sdepth == CV_64U ? cvt64u32f :
sdepth == CV_64S ? cvt64s32f :
0) :
ddepth == CV_64F ? (
sdepth == CV_8U ? cvt8u64f :
sdepth == CV_8S ? cvt8s64f :
sdepth == CV_16U ? cvt16u64f :
sdepth == CV_16S ? cvt16s64f :
sdepth == CV_32U ? cvt32u64f :
sdepth == CV_32S ? cvt32s64f :
sdepth == CV_32F ? cvt32f64f :
sdepth == CV_64F ? cvt64s :
sdepth == CV_16F ? cvt16f64f :
sdepth == CV_16BF ? cvt16bf64f :
sdepth == CV_Bool ? cvt8b64f :
sdepth == CV_64U ? cvt64u64f :
sdepth == CV_64S ? cvt64s64f :
0) :
ddepth == CV_16F ? (
sdepth == CV_8U ? cvt8u16f :
sdepth == CV_8S ? cvt8s16f :
sdepth == CV_16U ? cvt16u16f :
sdepth == CV_16S ? cvt16s16f :
sdepth == CV_32U ? cvt32u16f :
sdepth == CV_32S ? cvt32s16f :
sdepth == CV_32F ? cvt32f16f :
sdepth == CV_64F ? cvt64f16f :
sdepth == CV_16F ? cvt16u :
sdepth == CV_16BF ? cvt16bf16f :
sdepth == CV_Bool ? cvt8b16f :
sdepth == CV_64U ? cvt64u16f :
sdepth == CV_64S ? cvt64s16f :
0) :
ddepth == CV_16BF ? (
sdepth == CV_8U ? cvt8u16bf :
sdepth == CV_8S ? cvt8s16bf :
sdepth == CV_16U ? cvt16u16bf :
sdepth == CV_16S ? cvt16s16bf :
sdepth == CV_32U ? cvt32u16bf :
sdepth == CV_32S ? cvt32s16bf :
sdepth == CV_32F ? cvt32f16bf :
sdepth == CV_64F ? cvt64f16bf :
sdepth == CV_16F ? cvt16f16bf :
sdepth == CV_16BF ? cvt16u :
sdepth == CV_Bool ? cvt8b16bf :
sdepth == CV_64U ? cvt64u16bf :
sdepth == CV_64S ? cvt64s16bf :
0) :
ddepth == CV_Bool ? (
sdepth == CV_8U ? cvt8u8b :
sdepth == CV_8S ? cvt8u8b :
sdepth == CV_16U ? cvt16s8b :
sdepth == CV_16S ? cvt16s8b :
sdepth == CV_32U ? cvt32s8b :
sdepth == CV_32S ? cvt32s8b :
sdepth == CV_32F ? cvt32f8b :
sdepth == CV_64F ? cvt64f8b :
sdepth == CV_16F ? cvt16f8b :
sdepth == CV_16BF ? cvt16f8b : // same as cvt16f8b
sdepth == CV_Bool ? cvt8u :
sdepth == CV_64U ? cvt64s8b :
sdepth == CV_64S ? cvt64s8b :
0) :
ddepth == CV_64U ? (
sdepth == CV_8U ? cvt8u64s : // same as cvt8u64u
sdepth == CV_8S ? cvt8s64u :
sdepth == CV_16U ? cvt16u64s : // same as cvt16u64u
sdepth == CV_16S ? cvt16s64u :
sdepth == CV_32U ? cvt32u64s : // same as cvt32u64u
sdepth == CV_32S ? cvt32s64u :
sdepth == CV_32F ? cvt32f64u :
sdepth == CV_64F ? cvt64f64u :
sdepth == CV_16F ? cvt16f64u :
sdepth == CV_16BF ? cvt16bf64u :
sdepth == CV_Bool ? cvt8b64s :
sdepth == CV_64U ? cvt64s :
sdepth == CV_64S ? cvt64s64u :
0) :
ddepth == CV_64S ? (
sdepth == CV_8U ? cvt8u64s :
sdepth == CV_8S ? cvt8s64s :
sdepth == CV_16U ? cvt16u64s :
sdepth == CV_16S ? cvt16s64s :
sdepth == CV_32U ? cvt32u64s :
sdepth == CV_32S ? cvt32s64s :
sdepth == CV_32F ? cvt32f64s :
sdepth == CV_64F ? cvt64f64s :
sdepth == CV_16F ? cvt16f64s :
sdepth == CV_16BF ? cvt16bf64s :
sdepth == CV_Bool ? cvt8b64s :
sdepth == CV_64U ? cvt64s :
sdepth == CV_64S ? cvt64s :
0) :
0;
CV_Assert(func != 0);
return func;
}
CV_CPU_OPTIMIZATION_NAMESPACE_END

@ -53,38 +53,18 @@ cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
}
}
// variant for conversions 16f <-> ... w/o unrolling
template<typename _Ts, typename _Td> inline void
cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
Size size, float a, float b )
static void
cvtabs_32f( const bool* src_, size_t sstep,
uchar* dst, size_t dstep,
Size size, float a, float b )
{
#if CV_SIMD
v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
const int VECSZ = v_float32::nlanes*2;
#endif
sstep /= sizeof(src[0]);
dstep /= sizeof(dst[0]);
const uchar* src = (const uchar*)src_;
uchar v0 = saturate_cast<uchar>(std::abs(b));
uchar v1 = saturate_cast<uchar>(std::abs(a + b));
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
{
int j = 0;
#if CV_SIMD
for( ; j < size.width; j += VECSZ )
{
if( j > size.width - VECSZ )
{
if( j == 0 || src == (_Ts*)dst )
break;
j = size.width - VECSZ;
}
v_float32 v0;
vx_load_as(src + j, v0);
v0 = v_fma(v0, va, vb);
v_store_as(dst + j, v_abs(v0));
}
#endif
for( ; j < size.width; j++ )
dst[j] = saturate_cast<_Td>(src[j]*a + b);
for (int j = 0; j < size.width; j++)
dst[j] = src[j] != 0 ? v1 : v0;
}
}
@ -217,145 +197,454 @@ static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, siz
cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
}
#define DEF_CVT_SCALE2BOOL_FUNC(suffix, stype, wtype) \
static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
uchar* dst, size_t dstep, Size size, void* scale_) \
{ \
const stype* src = (const stype*)src_; \
const double* scale = (const double*)scale_; \
wtype a = (wtype)scale[0], b = (wtype)scale[1]; \
sstep /= sizeof(src[0]); \
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) \
for (int j = 0; j < size.width; j++) \
dst[j] = (bool)((wtype)src[j]*a + b != 0); \
}
#define DEF_CVT_SCALEBOOL2_FUNC(suffix, dtype, wtype) \
static void cvtScale##suffix( const uchar* src, size_t sstep, const uchar*, size_t, \
uchar* dst_, size_t dstep, Size size, void* scale_) \
{ \
dtype* dst = (dtype*)dst_; \
const double* scale = (const double*)scale_; \
wtype a = (wtype)scale[0], b = (wtype)scale[1]; \
dtype v0 = saturate_cast<dtype>(b), v1 = saturate_cast<dtype>(a + b); \
dstep /= sizeof(dst[0]); \
for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) \
for (int j = 0; j < size.width; j++) \
dst[j] = src[j] != 0 ? v1 : v0; \
}
DEF_CVT_SCALE_ABS_FUNC(8u, cvtabs_32f, uchar, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(8s8u, cvtabs_32f, schar, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(8b8u, cvtabs_32f, bool, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(16u8u, cvtabs_32f, ushort, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(16s8u, cvtabs_32f, short, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(32u8u, cvtabs_32f, unsigned, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(32s8u, cvtabs_32f, int, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(32f8u, cvtabs_32f, float, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(64u8u, cvtabs_32f, uint64_t, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(64s8u, cvtabs_32f, int64_t, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(64f8u, cvtabs_32f, double, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(16f8u, cvtabs_32f, float16_t, uchar, float)
DEF_CVT_SCALE_ABS_FUNC(16bf8u, cvtabs_32f, bfloat16_t, uchar, float)
DEF_CVT_SCALE_FUNC(8u, cvt_32f, uchar, uchar, float)
DEF_CVT_SCALE_FUNC(8s8u, cvt_32f, schar, uchar, float)
DEF_CVT_SCALE_FUNC(16u8u, cvt_32f, ushort, uchar, float)
DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float)
DEF_CVT_SCALE_FUNC(32u8u, cvt_32f, unsigned, uchar, float)
DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float)
DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float)
DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float)
DEF_CVT_SCALE_FUNC(64u8u, cvt_32f, uint64_t, uchar, float)
DEF_CVT_SCALE_FUNC(64s8u, cvt_32f, int64_t, uchar, float)
DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, float16_t, uchar, float)
DEF_CVT_SCALE_FUNC(16bf8u, cvt_32f, bfloat16_t, uchar, float)
DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float)
DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float)
DEF_CVT_SCALE_FUNC(16u8s, cvt_32f, ushort, schar, float)
DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float)
DEF_CVT_SCALE_FUNC(32u8s, cvt_32f, unsigned, schar, float)
DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float)
DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float)
DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float)
DEF_CVT_SCALE_FUNC(64u8s, cvt_32f, uint64_t, schar, float)
DEF_CVT_SCALE_FUNC(64s8s, cvt_32f, int64_t, schar, float)
DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, float16_t, schar, float)
DEF_CVT_SCALE_FUNC(16bf8s, cvt_32f, bfloat16_t, schar, float)
DEF_CVT_SCALE2BOOL_FUNC(8u8b, uchar, float)
DEF_CVT_SCALE2BOOL_FUNC(8s8b, schar, float)
DEF_CVT_SCALE2BOOL_FUNC(16u8b, ushort, float)
DEF_CVT_SCALE2BOOL_FUNC(16s8b, short, float)
DEF_CVT_SCALE2BOOL_FUNC(32u8b, unsigned, float)
DEF_CVT_SCALE2BOOL_FUNC(32s8b, int, float)
DEF_CVT_SCALE2BOOL_FUNC(32f8b, float, float)
DEF_CVT_SCALE2BOOL_FUNC(64f8b, double, float)
DEF_CVT_SCALE2BOOL_FUNC(64u8b, uint64_t, float)
DEF_CVT_SCALE2BOOL_FUNC(64s8b, int64_t, float)
DEF_CVT_SCALE2BOOL_FUNC(16f8b, float16_t, float)
DEF_CVT_SCALE2BOOL_FUNC(16bf8b, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float)
DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float)
DEF_CVT_SCALE_FUNC(16u, cvt_32f, ushort, ushort, float)
DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float)
DEF_CVT_SCALE_FUNC(32u16u, cvt_32f, unsigned, ushort, float)
DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float)
DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float)
DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float)
DEF_CVT_SCALE_FUNC(64u16u, cvt_32f, uint64_t, ushort, float)
DEF_CVT_SCALE_FUNC(64s16u, cvt_32f, int64_t, ushort, float)
DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float)
DEF_CVT_SCALE_FUNC(16bf16u, cvt1_32f, bfloat16_t, ushort, float)
DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float)
DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float)
DEF_CVT_SCALE_FUNC(16u16s, cvt_32f, ushort, short, float)
DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float)
DEF_CVT_SCALE_FUNC(32u16s, cvt_32f, unsigned, short, float)
DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float)
DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float)
DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float)
DEF_CVT_SCALE_FUNC(64u16s, cvt_32f, uint64_t, short, float)
DEF_CVT_SCALE_FUNC(64s16s, cvt_32f, int64_t, short, float)
DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float)
DEF_CVT_SCALE_FUNC(16bf16s, cvt1_32f, bfloat16_t, short, float)
DEF_CVT_SCALE_FUNC(8u32u, cvt_32f, uchar, unsigned, float)
DEF_CVT_SCALE_FUNC(8s32u, cvt_32f, schar, unsigned, float)
DEF_CVT_SCALE_FUNC(16u32u, cvt_32f, ushort, unsigned, float)
DEF_CVT_SCALE_FUNC(16s32u, cvt_32f, short, unsigned, float)
DEF_CVT_SCALE_FUNC(32u, cvt_32f, unsigned, unsigned, float)
DEF_CVT_SCALE_FUNC(32s32u, cvt_64f, int, unsigned, double)
DEF_CVT_SCALE_FUNC(32f32u, cvt_32f, float, unsigned, float)
DEF_CVT_SCALE_FUNC(64f32u, cvt_64f, double, unsigned, double)
DEF_CVT_SCALE_FUNC(64u32u, cvt_32f, uint64_t, unsigned, float)
DEF_CVT_SCALE_FUNC(64s32u, cvt_32f, int64_t, unsigned, float)
DEF_CVT_SCALE_FUNC(16f32u, cvt1_32f, float16_t, unsigned, float)
DEF_CVT_SCALE_FUNC(16bf32u, cvt1_32f, bfloat16_t, unsigned, float)
DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float)
DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float)
DEF_CVT_SCALE_FUNC(16u32s, cvt_32f, ushort, int, float)
DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float)
DEF_CVT_SCALE_FUNC(32u32s, cvt_32f, unsigned, int, float)
DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double)
DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float)
DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double)
DEF_CVT_SCALE_FUNC(64u32s, cvt_32f, uint64_t, int, float)
DEF_CVT_SCALE_FUNC(64s32s, cvt_32f, int64_t, int, float)
DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float)
DEF_CVT_SCALE_FUNC(16bf32s, cvt1_32f, bfloat16_t, int, float)
DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float)
DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float)
DEF_CVT_SCALE_FUNC(16u32f, cvt_32f, ushort, float, float)
DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float)
DEF_CVT_SCALE_FUNC(32u32f, cvt_32f, unsigned, float, float)
DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float)
DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float)
DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double)
DEF_CVT_SCALE_FUNC(64u32f, cvt_32f, uint64_t, float, float)
DEF_CVT_SCALE_FUNC(64s32f, cvt_32f, int64_t, float, float)
DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float)
DEF_CVT_SCALE_FUNC(16bf32f, cvt1_32f, bfloat16_t, float, float)
DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double)
DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double)
DEF_CVT_SCALE_FUNC(16u64f, cvt_64f, ushort, double, double)
DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double)
DEF_CVT_SCALE_FUNC(32u64f, cvt_64f, unsigned, double, double)
DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double)
DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double)
DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double)
DEF_CVT_SCALE_FUNC(64u64f, cvt_64f, uint64_t, double, double)
DEF_CVT_SCALE_FUNC(64s64f, cvt_64f, int64_t, double, double)
DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double)
DEF_CVT_SCALE_FUNC(16bf64f, cvt_64f, bfloat16_t, double, double)
DEF_CVT_SCALE_FUNC(8u64u, cvt_64f, uchar, uint64_t, double)
DEF_CVT_SCALE_FUNC(8s64u, cvt_64f, schar, uint64_t, double)
DEF_CVT_SCALE_FUNC(16u64u, cvt_64f, ushort, uint64_t, double)
DEF_CVT_SCALE_FUNC(16s64u, cvt_64f, short, uint64_t, double)
DEF_CVT_SCALE_FUNC(32u64u, cvt_64f, unsigned, uint64_t, double)
DEF_CVT_SCALE_FUNC(32s64u, cvt_64f, int, uint64_t, double)
DEF_CVT_SCALE_FUNC(32f64u, cvt_64f, float, uint64_t, double)
DEF_CVT_SCALE_FUNC(64f64u, cvt_64f, double, uint64_t, double)
DEF_CVT_SCALE_FUNC(64u, cvt_64f, uint64_t, uint64_t, double)
DEF_CVT_SCALE_FUNC(64s64u, cvt_64f, int64_t, uint64_t, double)
DEF_CVT_SCALE_FUNC(16f64u, cvt_64f, float16_t, uint64_t, double)
DEF_CVT_SCALE_FUNC(16bf64u, cvt_64f, bfloat16_t, uint64_t, double)
DEF_CVT_SCALE_FUNC(8u64s, cvt_64f, uchar, int64_t, double)
DEF_CVT_SCALE_FUNC(8s64s, cvt_64f, schar, int64_t, double)
DEF_CVT_SCALE_FUNC(16u64s, cvt_64f, ushort, int64_t, double)
DEF_CVT_SCALE_FUNC(16s64s, cvt_64f, short, int64_t, double)
DEF_CVT_SCALE_FUNC(32u64s, cvt_64f, unsigned, int64_t, double)
DEF_CVT_SCALE_FUNC(32s64s, cvt_64f, int, int64_t, double)
DEF_CVT_SCALE_FUNC(32f64s, cvt_64f, float, int64_t, double)
DEF_CVT_SCALE_FUNC(64f64s, cvt_64f, double, int64_t, double)
DEF_CVT_SCALE_FUNC(64u64s, cvt_64f, uint64_t, int64_t, double)
DEF_CVT_SCALE_FUNC(64s, cvt_64f, int64_t, int64_t, double)
DEF_CVT_SCALE_FUNC(16f64s, cvt_64f, float16_t, int64_t, double)
DEF_CVT_SCALE_FUNC(16bf64s, cvt_64f, bfloat16_t, int64_t, double)
DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, float16_t, float)
DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, float16_t, float)
DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float)
DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, float16_t, float)
DEF_CVT_SCALE_FUNC(32u16f, cvt1_32f, unsigned, float16_t, float)
DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, float16_t, float)
DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, float16_t, float)
DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, float16_t, double)
DEF_CVT_SCALE_FUNC(64f16f, cvt1_32f, double, float16_t, float)
DEF_CVT_SCALE_FUNC(64u16f, cvt1_32f, uint64_t, float16_t, float)
DEF_CVT_SCALE_FUNC(64s16f, cvt1_32f, int64_t, float16_t, float)
DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float)
DEF_CVT_SCALE_FUNC(16bf16f, cvt1_32f, bfloat16_t, float16_t, float)
DEF_CVT_SCALE_FUNC(8u16bf, cvt1_32f, uchar, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(8s16bf, cvt1_32f, schar, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(16u16bf, cvt1_32f, ushort, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(16s16bf, cvt1_32f, short, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(32u16bf, cvt1_32f, unsigned, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(32s16bf, cvt1_32f, int, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(32f16bf, cvt1_32f, float, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(64f16bf, cvt1_32f, double, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(64u16bf, cvt1_32f, uint64_t, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(64s16bf, cvt1_32f, int64_t, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(16f16bf, cvt1_32f, float16_t, bfloat16_t, float)
DEF_CVT_SCALE_FUNC(16bf, cvt1_32f, bfloat16_t, bfloat16_t, float)
DEF_CVT_SCALEBOOL2_FUNC(8b8u, uchar, float)
DEF_CVT_SCALEBOOL2_FUNC(8b8s, schar, float)
DEF_CVT_SCALEBOOL2_FUNC(8b, bool, float)
DEF_CVT_SCALEBOOL2_FUNC(8b16u, ushort, float)
DEF_CVT_SCALEBOOL2_FUNC(8b16s, short, float)
DEF_CVT_SCALEBOOL2_FUNC(8b32u, unsigned, float)
DEF_CVT_SCALEBOOL2_FUNC(8b32s, int, float)
DEF_CVT_SCALEBOOL2_FUNC(8b32f, float, float)
DEF_CVT_SCALEBOOL2_FUNC(8b64u, uint64_t, double)
DEF_CVT_SCALEBOOL2_FUNC(8b64s, int64_t, double)
DEF_CVT_SCALEBOOL2_FUNC(8b64f, double, double)
DEF_CVT_SCALEBOOL2_FUNC(8b16f, float16_t, float)
DEF_CVT_SCALEBOOL2_FUNC(8b16bf, bfloat16_t, float)
BinaryFunc getCvtScaleAbsFunc(int depth)
{
static BinaryFunc cvtScaleAbsTab[] =
{
(BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u,
(BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u,
(BinaryFunc)cvtScaleAbs64f8u, 0
};
return cvtScaleAbsTab[depth];
BinaryFunc func =
depth == CV_8U ? (BinaryFunc)cvtScaleAbs8u :
depth == CV_8S ? (BinaryFunc)cvtScaleAbs8s8u :
depth == CV_Bool ? (BinaryFunc)cvtScaleAbs8b8u :
depth == CV_16U ? (BinaryFunc)cvtScaleAbs16u8u :
depth == CV_16S ? (BinaryFunc)cvtScaleAbs16s8u :
depth == CV_16F ? (BinaryFunc)cvtScaleAbs16f8u :
depth == CV_16BF ? (BinaryFunc)cvtScaleAbs16bf8u :
depth == CV_32U ? (BinaryFunc)cvtScaleAbs32u8u :
depth == CV_32S ? (BinaryFunc)cvtScaleAbs32s8u :
depth == CV_32F ? (BinaryFunc)cvtScaleAbs32f8u :
depth == CV_64U ? (BinaryFunc)cvtScaleAbs64u8u :
depth == CV_64S ? (BinaryFunc)cvtScaleAbs64s8u :
depth == CV_64F ? (BinaryFunc)cvtScaleAbs64f8u : 0;
CV_Assert(func != 0);
return func;
}
BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
BinaryFunc getConvertScaleFunc(int sdepth_, int ddepth_)
{
static BinaryFunc cvtScaleTab[][8] =
{
{
(BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
(BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
(BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u
},
{
(BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
(BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
(BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s
},
{
(BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
(BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
(BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u
},
{
(BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
(BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
(BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s
},
{
(BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
(BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
(BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s
},
{
(BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
(BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
(BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f
},
{
(BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
(BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
(BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f
},
{
(BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f,
(BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f,
(BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f
},
};
return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
int sdepth = CV_MAT_DEPTH(sdepth_);
int ddepth = CV_MAT_DEPTH(ddepth_);
BinaryFunc func =
ddepth == CV_8U ? (
sdepth == CV_8U ? cvtScale8u :
sdepth == CV_8S ? cvtScale8s8u :
sdepth == CV_Bool ? cvtScale8b8u :
sdepth == CV_16U ? cvtScale16u8u :
sdepth == CV_16S ? cvtScale16s8u :
sdepth == CV_32U ? cvtScale32u8u :
sdepth == CV_32S ? cvtScale32s8u :
sdepth == CV_32F ? cvtScale32f8u :
sdepth == CV_64F ? cvtScale64f8u :
sdepth == CV_16F ? cvtScale16f8u :
sdepth == CV_16BF ? cvtScale16bf8u :
sdepth == CV_64U ? cvtScale64u8u :
sdepth == CV_64S ? cvtScale64s8u :
0) :
ddepth == CV_8S ? (
sdepth == CV_8U ? cvtScale8u8s :
sdepth == CV_8S ? cvtScale8s :
sdepth == CV_Bool ? cvtScale8b8s :
sdepth == CV_16U ? cvtScale16u8s :
sdepth == CV_16S ? cvtScale16s8s :
sdepth == CV_32U ? cvtScale32u8s :
sdepth == CV_32S ? cvtScale32s8s :
sdepth == CV_32F ? cvtScale32f8s :
sdepth == CV_64F ? cvtScale64f8s :
sdepth == CV_16F ? cvtScale16f8s :
sdepth == CV_16BF ? cvtScale16bf8s :
sdepth == CV_64U ? cvtScale64u8s :
sdepth == CV_64S ? cvtScale64s8s :
0) :
ddepth == CV_16U ? (
sdepth == CV_8U ? cvtScale8u16u :
sdepth == CV_8S ? cvtScale8s16u :
sdepth == CV_Bool ? cvtScale8b16u :
sdepth == CV_16U ? cvtScale16u :
sdepth == CV_16S ? cvtScale16s16u :
sdepth == CV_32U ? cvtScale32u16u :
sdepth == CV_32S ? cvtScale32s16u :
sdepth == CV_32F ? cvtScale32f16u :
sdepth == CV_64F ? cvtScale64f16u :
sdepth == CV_16F ? cvtScale16f16u :
sdepth == CV_16BF ? cvtScale16bf16u :
sdepth == CV_64U ? cvtScale64u16u :
sdepth == CV_64S ? cvtScale64s16u :
0) :
ddepth == CV_16S ? (
sdepth == CV_8U ? cvtScale8u16s :
sdepth == CV_8S ? cvtScale8s16s :
sdepth == CV_Bool ? cvtScale8b16s :
sdepth == CV_16U ? cvtScale16u16s :
sdepth == CV_16S ? cvtScale16s :
sdepth == CV_32U ? cvtScale32u16s :
sdepth == CV_32S ? cvtScale32s16s :
sdepth == CV_32F ? cvtScale32f16s :
sdepth == CV_64F ? cvtScale64f16s :
sdepth == CV_16F ? cvtScale16f16s :
sdepth == CV_16BF ? cvtScale16bf16s :
sdepth == CV_64U ? cvtScale64u16s :
sdepth == CV_64S ? cvtScale64s16s :
0) :
ddepth == CV_32U ? (
sdepth == CV_8U ? cvtScale8u32u :
sdepth == CV_8S ? cvtScale8s32u :
sdepth == CV_Bool ? cvtScale8b32u :
sdepth == CV_16U ? cvtScale16u32u :
sdepth == CV_16S ? cvtScale16s32u :
sdepth == CV_32U ? cvtScale32u :
sdepth == CV_32S ? cvtScale32s32u :
sdepth == CV_32F ? cvtScale32f32u :
sdepth == CV_64F ? cvtScale64f32u :
sdepth == CV_16F ? cvtScale16f32u :
sdepth == CV_16BF ? cvtScale16bf32u :
sdepth == CV_64U ? cvtScale64u32u :
sdepth == CV_64S ? cvtScale64s32u :
0) :
ddepth == CV_32S ? (
sdepth == CV_8U ? cvtScale8u32s :
sdepth == CV_8S ? cvtScale8s32s :
sdepth == CV_Bool ? cvtScale8b32s :
sdepth == CV_16U ? cvtScale16u32s :
sdepth == CV_16S ? cvtScale16s32s :
sdepth == CV_32U ? cvtScale32u32s :
sdepth == CV_32S ? cvtScale32s :
sdepth == CV_32F ? cvtScale32f32s :
sdepth == CV_64F ? cvtScale64f32s :
sdepth == CV_16F ? cvtScale16f32s :
sdepth == CV_16BF ? cvtScale16bf32s :
sdepth == CV_64U ? cvtScale64u32s :
sdepth == CV_64S ? cvtScale64s32s :
0) :
ddepth == CV_32F ? (
sdepth == CV_8U ? cvtScale8u32f :
sdepth == CV_8S ? cvtScale8s32f :
sdepth == CV_Bool ? cvtScale8b32f :
sdepth == CV_16U ? cvtScale16u32f :
sdepth == CV_16S ? cvtScale16s32f :
sdepth == CV_32U ? cvtScale32u32f :
sdepth == CV_32S ? cvtScale32s32f :
sdepth == CV_32F ? cvtScale32f :
sdepth == CV_64F ? cvtScale64f32f :
sdepth == CV_16F ? cvtScale16f32f :
sdepth == CV_16BF ? cvtScale16bf32f :
sdepth == CV_64U ? cvtScale64u32f :
sdepth == CV_64S ? cvtScale64s32f :
0) :
ddepth == CV_64F ? (
sdepth == CV_8U ? cvtScale8u64f :
sdepth == CV_8S ? cvtScale8s64f :
sdepth == CV_Bool ? cvtScale8b64f :
sdepth == CV_16U ? cvtScale16u64f :
sdepth == CV_16S ? cvtScale16s64f :
sdepth == CV_32U ? cvtScale32u64f :
sdepth == CV_32S ? cvtScale32s64f :
sdepth == CV_32F ? cvtScale32f64f :
sdepth == CV_64F ? cvtScale64f :
sdepth == CV_16F ? cvtScale16f64f :
sdepth == CV_16BF ? cvtScale16bf64f :
sdepth == CV_64U ? cvtScale64u64f :
sdepth == CV_64S ? cvtScale64s64f :
0) :
ddepth == CV_16F ? (
sdepth == CV_8U ? cvtScale8u16f :
sdepth == CV_8S ? cvtScale8s16f :
sdepth == CV_Bool ? cvtScale8b16f :
sdepth == CV_16U ? cvtScale16u16f :
sdepth == CV_16S ? cvtScale16s16f :
sdepth == CV_32U ? cvtScale32u16f :
sdepth == CV_32S ? cvtScale32s16f :
sdepth == CV_32F ? cvtScale32f16f :
sdepth == CV_64F ? cvtScale64f16f :
sdepth == CV_16F ? cvtScale16f :
sdepth == CV_16BF ? cvtScale16bf16f :
sdepth == CV_64U ? cvtScale64u16f :
sdepth == CV_64S ? cvtScale64s16f :
0) :
ddepth == CV_16BF ? (
sdepth == CV_8U ? cvtScale8u16bf :
sdepth == CV_8S ? cvtScale8s16bf :
sdepth == CV_Bool ? cvtScale8b16bf :
sdepth == CV_16U ? cvtScale16u16bf :
sdepth == CV_16S ? cvtScale16s16bf :
sdepth == CV_32U ? cvtScale32u16bf :
sdepth == CV_32S ? cvtScale32s16bf :
sdepth == CV_32F ? cvtScale32f16bf :
sdepth == CV_64F ? cvtScale64f16bf :
sdepth == CV_16F ? cvtScale16f16bf :
sdepth == CV_16BF ? cvtScale16bf :
sdepth == CV_64U ? cvtScale64u16bf :
sdepth == CV_64S ? cvtScale64s16bf :
0) :
ddepth == CV_Bool ? (
sdepth == CV_8U ? cvtScale8u8b :
sdepth == CV_8S ? cvtScale8s8b :
sdepth == CV_Bool ? cvtScale8b :
sdepth == CV_16U ? cvtScale16u8b :
sdepth == CV_16S ? cvtScale16s8b :
sdepth == CV_32U ? cvtScale32u8b :
sdepth == CV_32S ? cvtScale32s8b :
sdepth == CV_32F ? cvtScale32f8b :
sdepth == CV_64F ? cvtScale64f8b :
sdepth == CV_16F ? cvtScale16f8b :
sdepth == CV_16BF ? cvtScale16bf8b :
sdepth == CV_64U ? cvtScale64u8b :
sdepth == CV_64S ? cvtScale64s8b :
0) :
ddepth == CV_64U ? (
sdepth == CV_8U ? cvtScale8u64u :
sdepth == CV_8S ? cvtScale8s64u :
sdepth == CV_Bool ? cvtScale8b64u :
sdepth == CV_16U ? cvtScale16u64u :
sdepth == CV_16S ? cvtScale16s64u :
sdepth == CV_32U ? cvtScale32u64u :
sdepth == CV_32S ? cvtScale32s64u :
sdepth == CV_32F ? cvtScale32f64u :
sdepth == CV_64F ? cvtScale64f64u :
sdepth == CV_16F ? cvtScale16f64u :
sdepth == CV_16BF ? cvtScale16bf64u :
sdepth == CV_64U ? cvtScale64u :
sdepth == CV_64S ? cvtScale64s64u :
0) :
ddepth == CV_64S ? (
sdepth == CV_8U ? cvtScale8u64s :
sdepth == CV_8S ? cvtScale8s64s :
sdepth == CV_Bool ? cvtScale8b64s :
sdepth == CV_16U ? cvtScale16u64s :
sdepth == CV_16S ? cvtScale16s64s :
sdepth == CV_32U ? cvtScale32u64s :
sdepth == CV_32S ? cvtScale32s64s :
sdepth == CV_32F ? cvtScale32f64s :
sdepth == CV_64F ? cvtScale64f64s :
sdepth == CV_16F ? cvtScale16f64s :
sdepth == CV_16BF ? cvtScale16bf64s :
sdepth == CV_64U ? cvtScale64u64s :
sdepth == CV_64S ? cvtScale64s :
0) :
0;
CV_Assert(func != 0);
return func;
}
#endif

@ -72,28 +72,43 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
switch(depth)
{
case CV_8U:
scalarToRawData_<uchar>(s, (uchar*)_buf, cn, unroll_to);
scalarToRawData_(s, (uchar*)_buf, cn, unroll_to);
break;
case CV_8S:
scalarToRawData_<schar>(s, (schar*)_buf, cn, unroll_to);
scalarToRawData_(s, (schar*)_buf, cn, unroll_to);
break;
case CV_Bool:
scalarToRawData_(s, (bool*)_buf, cn, unroll_to);
break;
case CV_16U:
scalarToRawData_<ushort>(s, (ushort*)_buf, cn, unroll_to);
scalarToRawData_(s, (ushort*)_buf, cn, unroll_to);
break;
case CV_16S:
scalarToRawData_<short>(s, (short*)_buf, cn, unroll_to);
scalarToRawData_(s, (short*)_buf, cn, unroll_to);
break;
case CV_16F:
scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to);
break;
case CV_16BF:
scalarToRawData_(s, (bfloat16_t*)_buf, cn, unroll_to);
break;
case CV_32U:
scalarToRawData_(s, (unsigned*)_buf, cn, unroll_to);
break;
case CV_32S:
scalarToRawData_<int>(s, (int*)_buf, cn, unroll_to);
scalarToRawData_(s, (int*)_buf, cn, unroll_to);
break;
case CV_32F:
scalarToRawData_<float>(s, (float*)_buf, cn, unroll_to);
scalarToRawData_(s, (float*)_buf, cn, unroll_to);
break;
case CV_64F:
scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
case CV_64U:
scalarToRawData_(s, (uint64_t*)_buf, cn, unroll_to);
break;
case CV_16F:
scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
case CV_64S:
scalarToRawData_(s, (int64_t*)_buf, cn, unroll_to);
break;
case CV_64F:
scalarToRawData_(s, (double*)_buf, cn, unroll_to);
break;
default:
CV_Error(CV_StsUnsupportedFormat,"");

@ -647,7 +647,7 @@ void scaleAdd(InputArray _src1, double alpha, InputArray _src2, OutputArray _dst
CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(),
ocl_scaleAdd(_src1, alpha, _src2, _dst, type))
if( depth < CV_32F )
if( depth != CV_32F && depth != CV_64F )
{
addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth);
return;
@ -979,7 +979,7 @@ typedef double (*DotProdFunc)(const uchar* src1, const uchar* src2, int len);
static DotProdFunc getDotProdFunc(int depth)
{
static DotProdFunc dotProdTab[] =
static DotProdFunc dotProdTab[CV_DEPTH_MAX] =
{
(DotProdFunc)GET_OPTIMIZED(dotProd_8u), (DotProdFunc)GET_OPTIMIZED(dotProd_8s),
(DotProdFunc)dotProd_16u, (DotProdFunc)dotProd_16s,

@ -1791,7 +1791,7 @@ diagtransform_64f(const double* src, double* dst, const double* m, int len, int
TransformFunc getTransformFunc(int depth)
{
static TransformFunc transformTab[] =
static TransformFunc transformTab[CV_DEPTH_MAX] =
{
(TransformFunc)transform_8u, (TransformFunc)transform_8s, (TransformFunc)transform_16u,
(TransformFunc)transform_16s, (TransformFunc)transform_32s, (TransformFunc)transform_32f,
@ -1803,7 +1803,7 @@ TransformFunc getTransformFunc(int depth)
TransformFunc getDiagTransformFunc(int depth)
{
static TransformFunc diagTransformTab[] =
static TransformFunc diagTransformTab[CV_DEPTH_MAX] =
{
(TransformFunc)diagtransform_8u, (TransformFunc)diagtransform_8s, (TransformFunc)diagtransform_16u,
(TransformFunc)diagtransform_16s, (TransformFunc)diagtransform_32s, (TransformFunc)diagtransform_32f,

@ -1151,7 +1151,7 @@ Mat Mat::reshape(int new_cn, int new_rows) const
}
if( new_rows > 0 )
{
int sz[] = { new_rows, (int)(total()/new_rows) };
int sz[] = { new_rows, (int)(total()*cn/new_rows) };
return reshape(new_cn, 2, sz);
}
}

@ -311,7 +311,7 @@ static int sqsum64f( const double* src, const uchar* mask, double* sum, double*
SumSqrFunc getSumSqrFunc(int depth)
{
CV_INSTRUMENT_REGION();
static SumSqrFunc sumSqrTab[] =
static SumSqrFunc sumSqrTab[CV_DEPTH_MAX] =
{
(SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
(SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0

@ -50,12 +50,15 @@ typedef void (*MergeFunc)(const uchar** src, uchar* dst, int len, int cn);
static MergeFunc getMergeFunc(int depth)
{
static MergeFunc mergeTab[] =
static MergeFunc mergeTab[CV_DEPTH_MAX] =
{
(MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u),
(MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u),
(MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge32s),
(MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u)
(MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u),
(MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u),
(MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge64s),
(MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), 0, 0, 0,
};
return mergeTab[depth];

@ -1002,7 +1002,8 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc
CV_Assert(!haveSrc2 || _src2.type() == type);
if (depth == CV_32S)
if (depth == CV_32S || depth == CV_8S || depth == CV_32U || depth == CV_64U ||
depth == CV_64S || depth == CV_16F || depth == CV_16BF)
return false;
if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)

@ -367,7 +367,7 @@ typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, in
static NormFunc getNormFunc(int normType, int depth)
{
static NormFunc normTab[3][8] =
static NormFunc normTab[3][CV_DEPTH_MAX] =
{
{
(NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
@ -388,7 +388,7 @@ static NormFunc getNormFunc(int normType, int depth)
static NormDiffFunc getNormDiffFunc(int normType, int depth)
{
static NormDiffFunc normDiffTab[3][8] =
static NormDiffFunc normDiffTab[3][CV_DEPTH_MAX] =
{
{
(NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,

@ -70,14 +70,19 @@ namespace cv
char braces[5];
void (FormattedImpl::*valueToStr)();
void valueToStrBool() { snprintf(buf, sizeof(buf), "%d", (int)mtx.ptr<uchar>(row, col)[cn] != 0); }
void valueToStr8u() { snprintf(buf, sizeof(buf), "%3d", (int)mtx.ptr<uchar>(row, col)[cn]); }
void valueToStr8s() { snprintf(buf, sizeof(buf), "%3d", (int)mtx.ptr<schar>(row, col)[cn]); }
void valueToStr16u() { snprintf(buf, sizeof(buf), "%d", (int)mtx.ptr<ushort>(row, col)[cn]); }
void valueToStr16s() { snprintf(buf, sizeof(buf), "%d", (int)mtx.ptr<short>(row, col)[cn]); }
void valueToStr32u() { snprintf(buf, sizeof(buf), "%u", mtx.ptr<unsigned>(row, col)[cn]); }
void valueToStr32s() { snprintf(buf, sizeof(buf), "%d", mtx.ptr<int>(row, col)[cn]); }
void valueToStr32f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr<float>(row, col)[cn]); }
void valueToStr64f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr<double>(row, col)[cn]); }
void valueToStr64u() { snprintf(buf, sizeof(buf), "%llu", (unsigned long long)mtx.ptr<uint64_t>(row, col)[cn]); }
void valueToStr64s() { snprintf(buf, sizeof(buf), "%lld", (long long)mtx.ptr<int64_t>(row, col)[cn]); }
void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr<float16_t>(row, col)[cn]); }
void valueToStr16bf() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr<bfloat16_t>(row, col)[cn]); }
void valueToStrOther() { buf[0] = 0; }
public:
@ -111,13 +116,19 @@ namespace cv
{
case CV_8U: valueToStr = &FormattedImpl::valueToStr8u; break;
case CV_8S: valueToStr = &FormattedImpl::valueToStr8s; break;
case CV_Bool: valueToStr = &FormattedImpl::valueToStrBool; break;
case CV_16U: valueToStr = &FormattedImpl::valueToStr16u; break;
case CV_16S: valueToStr = &FormattedImpl::valueToStr16s; break;
case CV_32U: valueToStr = &FormattedImpl::valueToStr32u; break;
case CV_32S: valueToStr = &FormattedImpl::valueToStr32s; break;
case CV_32F: valueToStr = &FormattedImpl::valueToStr32f; break;
case CV_64F: valueToStr = &FormattedImpl::valueToStr64f; break;
default: CV_Assert(mtx.depth() == CV_16F);
valueToStr = &FormattedImpl::valueToStr16f;
case CV_64U: valueToStr = &FormattedImpl::valueToStr64u; break;
case CV_64S: valueToStr = &FormattedImpl::valueToStr64s; break;
case CV_16F: valueToStr = &FormattedImpl::valueToStr16f; break;
case CV_16BF: valueToStr = &FormattedImpl::valueToStr16bf; break;
default:
CV_Error_(Error::StsError, ("unsupported matrix type %d\n", mtx.depth()));
}
}

@ -56,6 +56,28 @@ char* itoa( int _val, char* buffer, int /*radix*/ )
return ptr;
}
char* itoa( int64_t _val, char* buffer, int /*radix*/, bool _signed)
{
const int radix = 10;
char* ptr=buffer + 23 /* enough even for 64-bit integers */;
int sign = _signed && _val < 0 ? -1 : 1;
uint64_t val = !_signed ? (uint64_t)_val : abs(_val);
*ptr = '\0';
do
{
uint64_t r = val / radix;
*--ptr = (char)(val - (r*radix) + '0');
val = r;
}
while( val != 0 );
if( sign < 0 )
*--ptr = '-';
return ptr;
}
char* doubleToString( char* buf, size_t bufSize, double value, bool explicitZero )
{
Cv64suf val;
@ -142,12 +164,12 @@ char* floatToString( char* buf, size_t bufSize, float value, bool halfprecision,
return buf;
}
static const char symbols[9] = "ucwsifdh";
static const char symbols[] = "ucwsifdhHbLUn";
static char typeSymbol(int depth)
{
CV_StaticAssert(CV_64F == 6, "");
CV_CheckDepth(depth, depth >=0 && depth <= CV_16F, "");
CV_CheckDepth(depth, depth >= 0 && depth <= CV_32U, "");
return symbols[depth];
}
@ -264,13 +286,18 @@ int calcStructSize( const char* dt, int initial_size )
switch (v)
{
case 'u': { elem_max_size = std::max( elem_max_size, sizeof(uchar ) ); break; }
case 'b': { elem_max_size = std::max( elem_max_size, sizeof(bool ) ); break; }
case 'c': { elem_max_size = std::max( elem_max_size, sizeof(schar ) ); break; }
case 'w': { elem_max_size = std::max( elem_max_size, sizeof(ushort) ); break; }
case 's': { elem_max_size = std::max( elem_max_size, sizeof(short ) ); break; }
case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; }
case 'n': { elem_max_size = std::max( elem_max_size, sizeof(unsigned) ); break; }
case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; }
case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; }
case 'h': { elem_max_size = std::max(elem_max_size, sizeof(float16_t)); break; }
case 'h': { elem_max_size = std::max( elem_max_size, sizeof(float16_t)); break; }
case 'H': { elem_max_size = std::max( elem_max_size, sizeof(bfloat16_t)); break; }
case 'I': { elem_max_size = std::max( elem_max_size, sizeof(int64_t)); break; }
case 'U': { elem_max_size = std::max( elem_max_size, sizeof(uint64_t)); break; }
default:
CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt));
}
@ -1097,6 +1124,10 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s
ptr = fs::itoa(*(uchar *) data, buf, 10);
data++;
break;
case CV_Bool:
ptr = fs::itoa(*(uchar *) data != 0, buf, 10);
data++;
break;
case CV_8S:
ptr = fs::itoa(*(char *) data, buf, 10);
data++;
@ -1109,10 +1140,22 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s
ptr = fs::itoa(*(short *) data, buf, 10);
data += sizeof(short);
break;
case CV_32U:
ptr = fs::itoa((int64_t)*(unsigned*) data, buf, 10, false);
data += sizeof(unsigned);
break;
case CV_32S:
ptr = fs::itoa(*(int *) data, buf, 10);
data += sizeof(int);
break;
case CV_64U:
ptr = fs::itoa(*(uint64_t*) data, buf, 10, false);
data += sizeof(uint64_t);
break;
case CV_64S:
ptr = fs::itoa(*(int64_t*) data, buf, 10, true);
data += sizeof(int64_t);
break;
case CV_32F:
ptr = fs::floatToString(buf, sizeof(buf), *(float *) data, false, explicitZero);
data += sizeof(float);
@ -1121,10 +1164,14 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s
ptr = fs::doubleToString(buf, sizeof(buf), *(double *) data, explicitZero);
data += sizeof(double);
break;
case CV_16F: /* reference */
case CV_16F:
ptr = fs::floatToString(buf, sizeof(buf), (float) *(float16_t *) data, true, explicitZero);
data += sizeof(float16_t);
break;
case CV_16BF:
ptr = fs::floatToString(buf, sizeof(buf), (float) *(bfloat16_t *) data, true, explicitZero);
data += sizeof(bfloat16_t);
break;
default:
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported type");
return;
@ -2572,6 +2619,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(char*)data = saturate_cast<schar>(ival);
data++;
break;
case CV_Bool:
*(bool*)data = ival != 0;
data++;
break;
case CV_16U:
*(ushort*)data = saturate_cast<ushort>(ival);
data += sizeof(ushort);
@ -2580,6 +2631,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(short*)data = saturate_cast<short>(ival);
data += sizeof(short);
break;
case CV_32U:
*(unsigned*)data = (unsigned)std::max(ival, 0);
data += sizeof(unsigned);
break;
case CV_32S:
*(int*)data = ival;
data += sizeof(int);
@ -2588,6 +2643,14 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(float*)data = (float)ival;
data += sizeof(float);
break;
case CV_64U:
*(uint64_t*)data = (uint64_t)ival;
data += sizeof(uint64_t);
break;
case CV_64S:
*(int64_t*)data = (int64_t)ival;
data += sizeof(int64_t);
break;
case CV_64F:
*(double*)data = (double)ival;
data += sizeof(double);
@ -2596,6 +2659,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(float16_t*)data = float16_t((float)ival);
data += sizeof(float16_t);
break;
case CV_16BF:
*(bfloat16_t*)data = bfloat16_t((float)ival);
data += sizeof(bfloat16_t);
break;
default:
CV_Error( Error::StsUnsupportedFormat, "Unsupported type" );
}
@ -2622,6 +2689,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(short*)data = saturate_cast<short>(fval);
data += sizeof(short);
break;
case CV_32U:
*(int*)data = saturate_cast<unsigned>(fval);
data += sizeof(int);
break;
case CV_32S:
*(int*)data = saturate_cast<int>(fval);
data += sizeof(int);
@ -2630,6 +2701,14 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(float*)data = (float)fval;
data += sizeof(float);
break;
case CV_64U:
*(uint64_t*)data = (uint64_t)round(std::max(fval, 0.));
data += sizeof(uint64_t);
break;
case CV_64S:
*(int64_t*)data = (int64_t)round(std::max(fval, 0.));
data += sizeof(int64_t);
break;
case CV_64F:
*(double*)data = fval;
data += sizeof(double);
@ -2638,6 +2717,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si
*(float16_t*)data = float16_t((float)fval);
data += sizeof(float16_t);
break;
case CV_16BF:
*(bfloat16_t*)data = bfloat16_t((float)fval);
data += sizeof(bfloat16_t);
break;
default:
CV_Error( Error::StsUnsupportedFormat, "Unsupported type" );
}

@ -86,6 +86,7 @@ namespace fs
{
int strcasecmp(const char* str1, const char* str2);
char* itoa( int _val, char* buffer, int /*radix*/ );
char* itoa( int64_t _val, char* buffer, int /*radix*/, bool _signed );
char* floatToString( char* buf, size_t bufSize, float value, bool halfprecision, bool explicitZero );
char* doubleToString( char* buf, size_t bufSize, double value, bool explicitZero );

@ -51,38 +51,53 @@ namespace cv
Multiply-with-carry generator is used here:
temp = ( A*X(n) + carry )
X(n+1) = temp mod (2^32)
carry = temp / (2^32)
carry = floor (temp / (2^32))
*/
#define RNG_NEXT(x) ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
// make it jump-less
#define CN_NEXT(k) (((k) + 1) & (((k) >= cn) - 1))
enum
{
RNG_FLAG_SMALL = 0x40000000,
RNG_FLAG_STDMTX = 0x80000000
};
/***************************************************************************************\
* Pseudo-Random Number Generators (PRNGs) *
\***************************************************************************************/
template<typename T> static void
randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
randBits_( T* arr, int len, int cn, uint64* state, const Vec2l* p, int flags )
{
bool small_flag = (flags & RNG_FLAG_SMALL) != 0;
uint64 temp = *state;
int i;
int i, k = 0;
len *= cn;
--cn;
if( !small_flag )
{
for( i = 0; i <= len - 4; i += 4 )
{
int t0, t1;
int64_t t0, t1;
temp = RNG_NEXT(temp);
t0 = ((int)temp & p[i][0]) + p[i][1];
t0 = ((int64_t)temp & p[k][0]) + p[k][1];
k = CN_NEXT(k);
temp = RNG_NEXT(temp);
t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
t1 = ((int64_t)temp & p[k][0]) + p[k][1];
k = CN_NEXT(k);
arr[i] = saturate_cast<T>(t0);
arr[i+1] = saturate_cast<T>(t1);
temp = RNG_NEXT(temp);
t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
t0 = ((int64_t)temp & p[k][0]) + p[k][1];
k = CN_NEXT(k);
temp = RNG_NEXT(temp);
t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
t1 = ((int64_t)temp & p[k][0]) + p[k][1];
k = CN_NEXT(k);
arr[i+2] = saturate_cast<T>(t0);
arr[i+3] = saturate_cast<T>(t1);
}
@ -91,16 +106,23 @@ randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
{
for( i = 0; i <= len - 4; i += 4 )
{
int t0, t1, t;
int64_t t0, t1, t;
temp = RNG_NEXT(temp);
t = (int)temp;
t0 = (t & p[i][0]) + p[i][1];
t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
t = temp;
// p[i+...][0] is within 0..255 in this branch (small_flag==true),
// so we don't need to do (t>>...)&255,
// the upper bits will be cleaned with ... & p[i+...][0].
t0 = (t & p[k][0]) + p[k][1];
k = CN_NEXT(k);
t1 = ((t >> 8) & p[k][0]) + p[k][1];
k = CN_NEXT(k);
arr[i] = saturate_cast<T>(t0);
arr[i+1] = saturate_cast<T>(t1);
t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
t0 = ((t >> 16) & p[k][0]) + p[k][1];
k = CN_NEXT(k);
t1 = ((t >> 24) & p[k][0]) + p[k][1];
k = CN_NEXT(k);
arr[i+2] = saturate_cast<T>(t0);
arr[i+3] = saturate_cast<T>(t1);
}
@ -108,10 +130,11 @@ randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
for( ; i < len; i++ )
{
int t0;
int64_t t0;
temp = RNG_NEXT(temp);
t0 = ((int)temp & p[i][0]) + p[i][1];
t0 = ((int64_t)temp & p[k][0]) + p[k][1];
k = CN_NEXT(k);
arr[i] = saturate_cast<T>(t0);
}
@ -123,101 +146,145 @@ struct DivStruct
unsigned d;
unsigned M;
int sh1, sh2;
int delta;
int64_t delta;
uint64_t diff;
};
template<typename T> static void
randi_( T* arr, int len, uint64* state, const DivStruct* p )
randi_( T* arr, int len, int cn, uint64* state, const DivStruct* p )
{
uint64 temp = *state;
int k = 0;
len *= cn;
cn--;
for( int i = 0; i < len; i++ )
{
temp = RNG_NEXT(temp);
unsigned t = (unsigned)temp;
unsigned v = (unsigned)(((uint64)t * p[i].M) >> 32);
v = (v + ((t - v) >> p[i].sh1)) >> p[i].sh2;
v = t - v*p[i].d + p[i].delta;
arr[i] = saturate_cast<T>((int)v);
unsigned v = (unsigned)(((uint64)t * p[k].M) >> 32);
v = (v + ((t - v) >> p[k].sh1)) >> p[k].sh2;
int64_t res = (int64_t)(t - v*p[k].d) + p[k].delta;
k = CN_NEXT(k);
arr[i] = saturate_cast<T>(res);
}
*state = temp;
}
static void
randi_( int64_t* arr, int len, int cn, uint64* state, const DivStruct* p )
{
uint64 temp = *state;
int k = 0;
len *= cn;
cn--;
for( int i = 0; i < len; i++ )
{
temp = RNG_NEXT(temp);
unsigned t0 = (unsigned)temp;
temp = RNG_NEXT(temp);
unsigned t1 = (unsigned)temp;
int64_t t = (int64_t)((((uint64_t)t0 << 32) | t1) % p[k].diff) + p[k].delta;
k = CN_NEXT(k);
arr[i] = t;
}
*state = temp;
}
static void
randi_( uint64_t* arr, int len, int cn, uint64* state, const DivStruct* p )
{
uint64 temp = *state;
int k = 0;
len *= cn;
cn--;
for( int i = 0; i < len; i++ )
{
temp = RNG_NEXT(temp);
unsigned t0 = (unsigned)temp;
temp = RNG_NEXT(temp);
unsigned t1 = (unsigned)temp;
uint64_t t = (((uint64_t)t0 << 32) | t1) % p[k].diff;
int64_t delta = p[k].delta;
k = CN_NEXT(k);
arr[i] = delta >= 0 || t >= (uint64_t)-delta ? t + (uint64_t)delta : 0;
}
*state = temp;
}
#define DEF_RANDI_FUNC(suffix, type) \
static void randBits_##suffix(type* arr, int len, uint64* state, \
const Vec2i* p, void*, bool small_flag) \
{ randBits_(arr, len, state, p, small_flag); } \
static void randBits_##suffix(type* arr, int len, int cn, uint64* state, \
const Vec2l* p, void*, int flags) \
{ randBits_(arr, len, cn, state, p, flags); } \
\
static void randi_##suffix(type* arr, int len, uint64* state, \
const DivStruct* p, void*, bool ) \
{ randi_(arr, len, state, p); }
static void randi_##suffix(type* arr, int len, int cn, uint64* state, \
const DivStruct* p, void*, int) \
{ randi_(arr, len, cn, state, p); }
DEF_RANDI_FUNC(8u, uchar)
DEF_RANDI_FUNC(8b, bool)
DEF_RANDI_FUNC(8s, schar)
DEF_RANDI_FUNC(16u, ushort)
DEF_RANDI_FUNC(16s, short)
DEF_RANDI_FUNC(32u, unsigned)
DEF_RANDI_FUNC(32s, int)
DEF_RANDI_FUNC(64u, uint64_t)
DEF_RANDI_FUNC(64s, int64_t)
static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, void*, bool )
static void randf_16_or_32f( void* dst, int len_, int cn, uint64* state, const Vec2f* p, float* fbuf, int flags )
{
int depth = CV_MAT_DEPTH(flags);
uint64 temp = *state;
int k = 0, len = len_*cn;
float* arr = depth == CV_16F || depth == CV_16BF ? fbuf : (float*)dst;
cn--;
for( int i = 0; i < len; i++ )
{
int t = (int)(temp = RNG_NEXT(temp));
arr[i] = (float)(t*p[i][0]);
arr[i] = (float)(t*p[k][0]);
k = CN_NEXT(k);
}
*state = temp;
// add bias separately to make the generated random numbers
// more deterministic, independent of
// architecture details (FMA instruction use etc.)
hal::addRNGBias32f(arr, &p[0][0], len);
hal::addRNGBias32f(arr, &p[0][0], len_, cn+1);
if (depth == CV_16F)
hal::cvt32f16f(fbuf, (float16_t*)dst, len);
else if (depth == CV_16BF)
hal::cvt32f16bf(fbuf, (bfloat16_t*)dst, len);
}
static void
randf_64f( double* arr, int len, uint64* state, const Vec2d* p, void*, bool )
randf_64f( double* arr, int len_, int cn, uint64* state, const Vec2d* p, void*, int )
{
uint64 temp = *state;
int k = 0, len = len_*cn;
cn--;
for( int i = 0; i < len; i++ )
{
temp = RNG_NEXT(temp);
int64 v = (temp >> 32)|(temp << 32);
arr[i] = v*p[i][0];
}
*state = temp;
hal::addRNGBias64f(arr, &p[0][0], len);
}
static void randf_16f( float16_t* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool )
{
uint64 temp = *state;
for( int i = 0; i < len; i++ )
{
float f = (float)(int)(temp = RNG_NEXT(temp));
fbuf[i] = f*p[i][0];
int64_t v = (int64_t)((temp >> 32) | (temp << 32));
arr[i] = v*p[k][0];
k = CN_NEXT(k);
}
*state = temp;
// add bias separately to make the generated random numbers
// more deterministic, independent of
// architecture details (FMA instruction use etc.)
hal::addRNGBias32f(fbuf, &p[0][0], len);
hal::cvt32f16f(fbuf, arr, len);
hal::addRNGBias64f(arr, &p[0][0], len_, cn+1);
}
typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, void* tempbuf, bool small_flag);
typedef void (*RandFunc)(uchar* arr, int len, int cn, uint64* state,
const void* p, void* tempbuf, int flags);
static RandFunc randTab[][8] =
static RandFunc randTab[][16] =
{
{
(RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
(RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, (RandFunc)randf_16f
(RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u,
(RandFunc)randi_16s, (RandFunc)randi_32s, (RandFunc)randf_16_or_32f,
(RandFunc)randf_64f, (RandFunc)randf_16_or_32f, (RandFunc)randf_16_or_32f,
(RandFunc)randi_8b, (RandFunc)randi_64u, (RandFunc)randi_64s,
(RandFunc)randi_32u, 0, 0, 0
},
{
(RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
(RandFunc)randBits_32s, 0, 0, 0
(RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u,
(RandFunc)randBits_16s, (RandFunc)randBits_32s, 0, 0, 0, 0,
(RandFunc)randBits_8b, (RandFunc)randBits_64u, (RandFunc)randBits_64s,
(RandFunc)randBits_32u, 0, 0, 0
}
};
@ -309,90 +376,153 @@ double RNG::gaussian(double sigma)
return temp*sigma;
}
template<typename T, typename PT> static void
randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
randnScale_(float* src, T* dst, int len, int cn,
const PT* mean, const PT* stddev, int flags )
{
bool stdmtx = (flags & RNG_FLAG_STDMTX) != 0;
int i, j, k;
if( !stdmtx )
if( !stdmtx || cn == 1 )
{
if( cn == 1 )
{
PT b = mean[0], a = stddev[0];
PT a = stddev[0], b = mean[0];
for( i = 0; i < len; i++ )
dst[i] = saturate_cast<T>(src[i]*a + b);
}
else
{
for( i = 0; i < len; i++, src += cn, dst += cn )
for( k = 0; k < cn; k++ )
dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
len *= cn;
cn--;
for( i = k = 0; i < len; i++ ) {
dst[i] = saturate_cast<T>(src[i]*stddev[k] + mean[k]);
k = CN_NEXT(k);
}
}
}
else
{
for( i = 0; i < len; i++, src += cn, dst += cn )
len *= cn;
cn--;
for( i = j = 0; i < len; i++ )
{
for( j = 0; j < cn; j++ )
{
PT s = mean[j];
for( k = 0; k < cn; k++ )
s += src[k]*stddev[j*cn + k];
dst[j] = saturate_cast<T>(s);
}
PT s = mean[j];
int i0 = i - j;
for( k = 0; k <= cn; k++ )
s += src[i0 + k]*stddev[j*(cn+1) + k];
dst[i] = saturate_cast<T>(s);
j = CN_NEXT(j);
}
}
}
static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
static void randnScale_8s( const float* src, schar* dst, int len, int cn,
const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
static void randnScale_16s( const float* src, short* dst, int len, int cn,
const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
static void randnScale_32s( const float* src, int* dst, int len, int cn,
const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
static void randnScale_32f( const float* src, float* dst, int len, int cn,
const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
// special version for 16f, 16bf and 32f
static void
randnScale_16_or_32f(float* fbuf, float* dst, int len, int cn,
const float* mean, const float* stddev, int flags)
{
bool stdmtx = (flags & RNG_FLAG_STDMTX) != 0;
int depth = CV_MAT_DEPTH(flags);
float* arr = depth == CV_16F || depth == CV_16BF ? fbuf : dst;
int i, j, k;
static void randnScale_64f( const float* src, double* dst, int len, int cn,
const double* mean, const double* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
if( !stdmtx || cn == 1 )
{
if( cn == 1 )
{
float a = stddev[0], b = mean[0];
for( i = 0; i < len; i++ )
arr[i] = fbuf[i]*a + b;
}
else
{
len *= cn;
cn--;
for( i = k = 0; i < len; i++ ) {
arr[i] = fbuf[i]*stddev[k] + mean[k];
k = CN_NEXT(k);
}
}
}
else if( depth == CV_32F )
{
len *= cn;
cn--;
for( i = j = 0; i < len; i++ )
{
float s = mean[j];
int i0 = i - j;
for( k = 0; k <= cn; k++ )
s += fbuf[i0 + k]*stddev[j*(cn+1) + k];
dst[i] = s;
j = CN_NEXT(j);
}
}
else
{
float elembuf[CV_CN_MAX];
len *= cn;
for( i = 0; i < len; i += cn )
{
// since we process fbuf in-place,
// we need to copy each cn-channel element
// prior to matrix multiplication
for (j = 0; j < cn; j++)
elembuf[j] = fbuf[i + j];
for (j = 0; j < cn; j++) {
float s = mean[j];
for( k = 0; k < cn; k++ )
s += elembuf[k]*stddev[j*cn + k];
fbuf[i + j] = s;
}
}
}
if (depth == CV_16F)
hal::cvt32f16f(fbuf, (float16_t*)dst, len);
else if (depth == CV_16BF)
hal::cvt32f16bf(fbuf, (bfloat16_t*)dst, len);
}
typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
const uchar*, const uchar*, bool);
#define DEF_RANDNSCALE_FUNC(suffix, T, PT) \
static void randnScale_##suffix( float* src, T* dst, int len, int cn, \
const PT* mean, const PT* stddev, int flags ) \
{ randnScale_(src, dst, len, cn, mean, stddev, flags); }
DEF_RANDNSCALE_FUNC(8u, uchar, float)
DEF_RANDNSCALE_FUNC(8b, bool, float)
DEF_RANDNSCALE_FUNC(8s, schar, float)
DEF_RANDNSCALE_FUNC(16u, ushort, float)
DEF_RANDNSCALE_FUNC(16s, short, float)
DEF_RANDNSCALE_FUNC(32u, unsigned, float)
DEF_RANDNSCALE_FUNC(32s, int, float)
DEF_RANDNSCALE_FUNC(64u, uint64_t, double)
DEF_RANDNSCALE_FUNC(64s, int64_t, double)
DEF_RANDNSCALE_FUNC(64f, double, double)
typedef void (*RandnScaleFunc)(float* src, void* dst, int len, int cn,
const void* mean, const void* stddev, int flags);
static RandnScaleFunc randnScaleTab[] =
{
(RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
(RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
(RandnScaleFunc)randnScale_64f, 0
(RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_16_or_32f,
(RandnScaleFunc)randnScale_64f, (RandnScaleFunc)randnScale_16_or_32f, (RandnScaleFunc)randnScale_16_or_32f,
(RandnScaleFunc)randnScale_8b, (RandnScaleFunc)randnScale_64u, (RandnScaleFunc)randnScale_64s,
(RandnScaleFunc)randnScale_32u, 0, 0, 0
};
void RNG::fill( InputOutputArray _mat, int disttype,
InputArray _param1arg, InputArray _param2arg, bool saturateRange )
InputArray _param1arg, InputArray _param2arg,
bool saturateRange )
{
CV_Assert(!_mat.empty());
Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
int depth = mat.depth(), cn = mat.channels();
int j, depth = mat.depth(), cn = mat.channels();
int esz1 = CV_ELEM_SIZE(depth);
AutoBuffer<double> _parambuf;
int j, k;
bool fast_int_mode = false;
bool smallFlag = true;
bool small_flag = false;
RandFunc func = 0;
RandnScaleFunc scaleFunc = 0;
@ -405,10 +535,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
(_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
(_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
Vec2i* ip = 0;
Vec2d* dp = 0;
Vec2f* fp = 0;
DivStruct* ds = 0;
const void* uni_param = 0;
uchar* mean = 0;
uchar* stddev = 0;
bool stdmtx = false;
@ -417,47 +544,48 @@ void RNG::fill( InputOutputArray _mat, int disttype,
if( disttype == UNIFORM )
{
_parambuf.allocate(cn*8 + n1 + n2);
_parambuf.allocate((sizeof(DivStruct)+sizeof(double)-1)/sizeof(double) + cn*2 + n1 + n2);
double* parambuf = _parambuf.data();
double* p1 = _param1.ptr<double>();
double* p2 = _param2.ptr<double>();
if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
{
Mat tmp(_param1.size(), CV_64F, parambuf);
_param1.convertTo(tmp, CV_64F);
p1 = parambuf;
if( n1 < cn )
for( j = n1; j < cn; j++ )
p1[j] = p1[j-n1];
Mat tmp(_param1.size(), CV_64F, p1);
_param1.convertTo(tmp, CV_64F);
for( j = n1; j < cn; j++ )
p1[j] = p1[j-n1];
}
if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
{
Mat tmp(_param2.size(), CV_64F, parambuf + cn);
_param2.convertTo(tmp, CV_64F);
p2 = parambuf + cn;
if( n2 < cn )
for( j = n2; j < cn; j++ )
p2[j] = p2[j-n2];
Mat tmp(_param2.size(), CV_64F, p2);
_param2.convertTo(tmp, CV_64F);
for( j = n2; j < cn; j++ )
p2[j] = p2[j-n2];
}
if( depth <= CV_32S )
if( CV_IS_INT_TYPE(depth) )
{
ip = (Vec2i*)(parambuf + cn*2);
Vec2l* ip = (Vec2l*)(parambuf + cn*2);
for( j = 0, fast_int_mode = true; j < cn; j++ )
{
double a = std::min(p1[j], p2[j]);
double b = std::max(p1[j], p2[j]);
if( saturateRange )
{
a = std::max(a, depth == CV_8U || depth == CV_16U ? 0. :
depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
b = std::min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
a = std::max(a, depth == CV_8U || depth == CV_16U || depth == CV_32U ||
depth == CV_64U || depth == CV_Bool ? 0. :
depth == CV_8S ? -128. : depth == CV_16S ? -32768. :
depth == CV_32S ? (double)INT_MIN : (double)INT64_MIN);
b = std::min(b, depth == CV_8U ? 256. : depth == CV_Bool ? 2. : depth == CV_16U ? 65536. :
depth == CV_8S ? 128. : depth == CV_16S ? 32768. : depth == CV_32U ? (double)UINT_MAX :
depth == CV_32S ? (double)INT_MAX : (double)INT64_MAX);
}
ip[j][1] = cvCeil(a);
int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
ip[j][1] = (int64_t)ceil(a);
int64_t idiff = ip[j][0] = (int64_t)floor(b) - ip[j][1] - 1;
if (idiff < 0)
{
idiff = 0;
@ -467,30 +595,41 @@ void RNG::fill( InputOutputArray _mat, int disttype,
fast_int_mode = fast_int_mode && diff <= 4294967296. && (idiff & (idiff+1)) == 0;
if( fast_int_mode )
smallFlag = smallFlag && (idiff <= 255);
small_flag = idiff <= 255;
else
{
if( diff > INT_MAX )
ip[j][0] = INT_MAX;
if( a < INT_MIN/2 )
ip[j][1] = INT_MIN/2;
int64_t minval = INT32_MIN/2, maxval = INT32_MAX;
if (depth == CV_64S || depth == CV_64U)
{
minval = INT64_MIN/2;
maxval = INT64_MAX;
}
if( diff > (double)maxval )
ip[j][0] = maxval;
if( a < (double)minval )
ip[j][1] = minval;
}
}
uni_param = ip;
if( !fast_int_mode )
{
ds = (DivStruct*)(ip + cn);
DivStruct* ds = (DivStruct*)(ip + cn);
for( j = 0; j < cn; j++ )
{
ds[j].delta = ip[j][1];
unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
int l = 0;
while(((uint64)1 << l) < d)
l++;
ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
ds[j].sh1 = std::min(l, 1);
ds[j].sh2 = std::max(l - 1, 0);
ds[j].diff = ip[j][0];
if (depth != CV_64U && depth != CV_64S) {
unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
int l = 0;
while(((uint64)1 << l) < d)
l++;
ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
ds[j].sh1 = std::min(l, 1);
ds[j].sh2 = std::max(l - 1, 0);
}
}
uni_param = ds;
}
func = randTab[fast_int_mode ? 1 : 0][depth];
@ -508,21 +647,23 @@ void RNG::fill( InputOutputArray _mat, int disttype,
// dparam[0][i]*X + dparam[1][i]
if( depth != CV_64F )
{
fp = (Vec2f*)(parambuf + cn*2);
Vec2f* fp = (Vec2f*)(parambuf + cn*2);
for( j = 0; j < cn; j++ )
{
fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
fp[j][1] = (float)((p2[j] + p1[j])*0.5);
}
uni_param = fp;
}
else
{
dp = (Vec2d*)(parambuf + cn*2);
Vec2d* dp = (Vec2d*)(parambuf + cn*2);
for( j = 0; j < cn; j++ )
{
dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
dp[j][1] = ((p2[j] + p1[j])*0.5);
}
uni_param = dp;
}
func = randTab[0][depth];
@ -534,8 +675,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
_parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
double* parambuf = _parambuf.data();
int ptype = depth == CV_64F ? CV_64F : CV_32F;
int esz = (int)CV_ELEM_SIZE(ptype);
int ptype = esz1 == 8 ? CV_64F : CV_32F;
if( _param1.isContinuous() && _param1.type() == ptype && n1 >= cn)
mean = _param1.ptr();
@ -547,8 +687,8 @@ void RNG::fill( InputOutputArray _mat, int disttype,
}
if( n1 < cn )
for( j = n1*esz; j < cn*esz; j++ )
mean[j] = mean[j - n1*esz];
for( j = n1*esz1; j < cn*esz1; j++ )
mean[j] = mean[j - n1*esz1];
if( _param2.isContinuous() && _param2.type() == ptype && n2 >= cn)
stddev = _param2.ptr();
@ -560,8 +700,8 @@ void RNG::fill( InputOutputArray _mat, int disttype,
}
if( n2 < cn )
for( j = n2*esz; j < cn*esz; j++ )
stddev[j] = stddev[j - n2*esz];
for( j = n2*esz1; j < cn*esz1; j++ )
stddev[j] = stddev[j - n2*esz1];
stdmtx = _param2.rows == cn && _param2.cols == cn;
scaleFunc = randnScaleTab[depth];
@ -571,59 +711,18 @@ void RNG::fill( InputOutputArray _mat, int disttype,
CV_Error( CV_StsBadArg, "Unknown distribution type" );
const Mat* arrays[] = {&mat, 0};
uchar* ptr;
uchar* ptr = 0;
NAryMatIterator it(arrays, &ptr, 1);
int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
size_t esz = mat.elemSize();
AutoBuffer<double> buf;
uchar* param = 0;
float* nbuf = 0;
float* tmpbuf = 0;
float fbuf[BLOCK_SIZE + CV_CN_MAX];
int total = (int)it.size;
int blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
size_t esz = (size_t)esz1*cn;
int flags = mat.type();
if( disttype == UNIFORM )
{
buf.allocate(blockSize*cn*4);
param = (uchar*)(double*)buf.data();
if( depth <= CV_32S )
{
if( !fast_int_mode )
{
DivStruct* p = (DivStruct*)param;
for( j = 0; j < blockSize*cn; j += cn )
for( k = 0; k < cn; k++ )
p[j + k] = ds[k];
}
else
{
Vec2i* p = (Vec2i*)param;
for( j = 0; j < blockSize*cn; j += cn )
for( k = 0; k < cn; k++ )
p[j + k] = ip[k];
}
}
else if( depth != CV_64F )
{
Vec2f* p = (Vec2f*)param;
for( j = 0; j < blockSize*cn; j += cn )
for( k = 0; k < cn; k++ )
p[j + k] = fp[k];
if( depth == CV_16F )
tmpbuf = (float*)p + blockSize*cn*2;
}
else
{
Vec2d* p = (Vec2d*)param;
for( j = 0; j < blockSize*cn; j += cn )
for( k = 0; k < cn; k++ )
p[j + k] = dp[k];
}
}
flags |= (small_flag ? (int)RNG_FLAG_SMALL : 0);
else
{
buf.allocate((blockSize*cn+1)/2);
nbuf = (float*)(double*)buf.data();
}
flags |= (stdmtx ? (int)RNG_FLAG_STDMTX : 0);
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
@ -631,14 +730,13 @@ void RNG::fill( InputOutputArray _mat, int disttype,
{
int len = std::min(total - j, blockSize);
if( disttype == CV_RAND_UNI )
func( ptr, len*cn, &state, param, tmpbuf, smallFlag );
if( disttype == UNIFORM )
func(ptr + j*esz, len, cn, &state, uni_param, fbuf, flags);
else
{
randn_0_1_32f(nbuf, len*cn, &state);
scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
randn_0_1_32f(fbuf, len*cn, &state);
scaleFunc(fbuf, ptr + j*esz, len, cn, mean, stddev, flags);
}
ptr += len*esz;
}
}
}

@ -53,12 +53,15 @@ typedef void (*SplitFunc)(const uchar* src, uchar** dst, int len, int cn);
static SplitFunc getSplitFunc(int depth)
{
static SplitFunc splitTab[] =
static SplitFunc splitTab[CV_DEPTH_MAX] =
{
(SplitFunc)GET_OPTIMIZED(cv::hal::split8u), (SplitFunc)GET_OPTIMIZED(cv::hal::split8u),
(SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u),
(SplitFunc)GET_OPTIMIZED(cv::hal::split32s), (SplitFunc)GET_OPTIMIZED(cv::hal::split32s),
(SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u)
(SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u),
(SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split8u),
(SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split64s),
(SplitFunc)GET_OPTIMIZED(cv::hal::split32s), 0, 0, 0
};
return splitTab[depth];

@ -434,7 +434,7 @@ static int sum64f( const double* src, const uchar* mask, double* dst, int len, i
SumFunc getSumFunc(int depth)
{
static SumFunc sumTab[] =
static SumFunc sumTab[CV_DEPTH_MAX] =
{
(SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
(SumFunc)sum16u, (SumFunc)sum16s,

@ -40,7 +40,11 @@ struct BaseElemWiseOp
ninputs > 1 ? ARITHM_MAX_CHANNELS : 4);
}
virtual double getMaxErr(int depth) { return depth < CV_32F ? 1 : depth == CV_32F ? 1e-5 : 1e-12; }
virtual double getMaxErr(int depth)
{
return depth < CV_32F || depth == CV_32U || depth == CV_64U || depth == CV_64S ? 1 :
depth == CV_16F || depth == CV_16BF ? 1e-2 : depth == CV_32F ? 1e-5 : 1e-12;
}
virtual void generateScalars(int depth, RNG& rng)
{
const double m = 3.;
@ -93,12 +97,32 @@ struct BaseElemWiseOp
int context;
};
static const _OutputArray::DepthMask baseArithmTypeMask =
_OutputArray::DepthMask(
_OutputArray::DEPTH_MASK_8U |
_OutputArray::DEPTH_MASK_16U |
_OutputArray::DEPTH_MASK_16S |
_OutputArray::DEPTH_MASK_32S |
_OutputArray::DEPTH_MASK_32F |
_OutputArray::DEPTH_MASK_64F);
struct BaseAddOp : public BaseElemWiseOp
struct BaseArithmOp : public BaseElemWiseOp
{
BaseAddOp(int _ninputs, int _flags, double _alpha, double _beta, Scalar _gamma=Scalar::all(0))
BaseArithmOp(int _ninputs, int _flags, double _alpha, double _beta, Scalar _gamma=Scalar::all(0))
: BaseElemWiseOp(_ninputs, _flags, _alpha, _beta, _gamma) {}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, baseArithmTypeMask, 1,
ninputs > 1 ? ARITHM_MAX_CHANNELS : 4);
}
};
struct BaseAddOp : public BaseArithmOp
{
BaseAddOp(int _ninputs, int _flags, double _alpha, double _beta, Scalar _gamma=Scalar::all(0))
: BaseArithmOp(_ninputs, _flags, _alpha, _beta, _gamma) {}
void refop(const vector<Mat>& src, Mat& dst, const Mat& mask)
{
Mat temp;
@ -192,9 +216,9 @@ struct AddWeightedOp : public BaseAddOp
}
};
struct MulOp : public BaseElemWiseOp
struct MulOp : public BaseArithmOp
{
MulOp() : BaseElemWiseOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
MulOp() : BaseArithmOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
void getValueRange(int depth, double& minval, double& maxval)
{
minval = depth < CV_32S ? cvtest::getMinVal(depth) : depth == CV_32S ? -1000000 : -1000.;
@ -216,9 +240,9 @@ struct MulOp : public BaseElemWiseOp
}
};
struct DivOp : public BaseElemWiseOp
struct DivOp : public BaseArithmOp
{
DivOp() : BaseElemWiseOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
DivOp() : BaseArithmOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::divide(src[0], src[1], dst, alpha);
@ -233,9 +257,9 @@ struct DivOp : public BaseElemWiseOp
}
};
struct RecipOp : public BaseElemWiseOp
struct RecipOp : public BaseArithmOp
{
RecipOp() : BaseElemWiseOp(1, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
RecipOp() : BaseArithmOp(1, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::divide(alpha, src[0], dst);
@ -339,9 +363,9 @@ struct LogicSOp : public BaseElemWiseOp
char opcode;
};
struct MinOp : public BaseElemWiseOp
struct MinOp : public BaseArithmOp
{
MinOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
MinOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::min(src[0], src[1], dst);
@ -356,9 +380,9 @@ struct MinOp : public BaseElemWiseOp
}
};
struct MaxOp : public BaseElemWiseOp
struct MaxOp : public BaseArithmOp
{
MaxOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
MaxOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::max(src[0], src[1], dst);
@ -373,9 +397,9 @@ struct MaxOp : public BaseElemWiseOp
}
};
struct MinSOp : public BaseElemWiseOp
struct MinSOp : public BaseArithmOp
{
MinSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {}
MinSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::min(src[0], gamma[0], dst);
@ -390,9 +414,9 @@ struct MinSOp : public BaseElemWiseOp
}
};
struct MaxSOp : public BaseElemWiseOp
struct MaxSOp : public BaseArithmOp
{
MaxSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {}
MaxSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::max(src[0], gamma[0], dst);
@ -407,9 +431,9 @@ struct MaxSOp : public BaseElemWiseOp
}
};
struct CmpOp : public BaseElemWiseOp
struct CmpOp : public BaseArithmOp
{
CmpOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; }
CmpOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; }
void generateScalars(int depth, RNG& rng)
{
BaseElemWiseOp::generateScalars(depth, rng);
@ -425,7 +449,7 @@ struct CmpOp : public BaseElemWiseOp
}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1);
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
}
double getMaxErr(int)
@ -435,9 +459,9 @@ struct CmpOp : public BaseElemWiseOp
int cmpop;
};
struct CmpSOp : public BaseElemWiseOp
struct CmpSOp : public BaseArithmOp
{
CmpSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; }
CmpSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; }
void generateScalars(int depth, RNG& rng)
{
BaseElemWiseOp::generateScalars(depth, rng);
@ -455,7 +479,7 @@ struct CmpSOp : public BaseElemWiseOp
}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1);
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
}
double getMaxErr(int)
{
@ -478,7 +502,7 @@ struct CopyOp : public BaseElemWiseOp
}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_16F, 1, ARITHM_MAX_CHANNELS);
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, ARITHM_MAX_CHANNELS);
}
double getMaxErr(int)
{
@ -500,7 +524,7 @@ struct SetOp : public BaseElemWiseOp
}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_16F, 1, ARITHM_MAX_CHANNELS);
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, ARITHM_MAX_CHANNELS);
}
double getMaxErr(int)
{
@ -650,9 +674,9 @@ static void inRangeS(const Mat& src, const Scalar& lb, const Scalar& rb, Mat& ds
} // namespace
CVTEST_GUARD_SYMBOL(inRange);
struct InRangeSOp : public BaseElemWiseOp
struct InRangeSOp : public BaseArithmOp
{
InRangeSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA, 1, 1, Scalar::all(0)) {}
InRangeSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
cv::inRange(src[0], gamma, gamma1, dst);
@ -680,9 +704,9 @@ struct InRangeSOp : public BaseElemWiseOp
};
struct InRangeOp : public BaseElemWiseOp
struct InRangeOp : public BaseArithmOp
{
InRangeOp() : BaseElemWiseOp(3, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
InRangeOp() : BaseArithmOp(3, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
void op(const vector<Mat>& src, Mat& dst, const Mat&)
{
Mat lb, rb;
@ -725,7 +749,7 @@ struct ConvertScaleOp : public BaseElemWiseOp
}
double getMaxErr(int)
{
return ddepth <= CV_32S ? 2 : ddepth < CV_64F ? 1e-3 : 1e-12;
return ddepth <= CV_32S || ddepth == CV_32U || ddepth == CV_64U || ddepth == CV_64S ? 2 : ddepth == CV_64F ? 1e-12 : ddepth == CV_Bool ? 0 : ddepth == CV_16BF ? 1e-2 : 2e-3;
}
void generateScalars(int depth, RNG& rng)
{
@ -1018,9 +1042,9 @@ static void log(const Mat& src, Mat& dst)
} // namespace
struct ExpOp : public BaseElemWiseOp
struct ExpOp : public BaseArithmOp
{
ExpOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
ExpOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_FLT, 1, ARITHM_MAX_CHANNELS);
@ -1045,9 +1069,9 @@ struct ExpOp : public BaseElemWiseOp
};
struct LogOp : public BaseElemWiseOp
struct LogOp : public BaseArithmOp
{
LogOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
LogOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_FLT, 1, ARITHM_MAX_CHANNELS);
@ -1129,9 +1153,9 @@ static void cartToPolar(const Mat& mx, const Mat& my, Mat& mmag, Mat& mangle, bo
} // namespace
struct CartToPolarToCartOp : public BaseElemWiseOp
struct CartToPolarToCartOp : public BaseArithmOp
{
CartToPolarToCartOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0))
CartToPolarToCartOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0))
{
context = 3;
angleInDegrees = true;
@ -1173,9 +1197,9 @@ struct CartToPolarToCartOp : public BaseElemWiseOp
};
struct MeanOp : public BaseElemWiseOp
struct MeanOp : public BaseArithmOp
{
MeanOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
MeanOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
{
context = 3;
};
@ -1196,9 +1220,9 @@ struct MeanOp : public BaseElemWiseOp
};
struct SumOp : public BaseElemWiseOp
struct SumOp : public BaseArithmOp
{
SumOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
SumOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
{
context = 3;
};
@ -1219,13 +1243,13 @@ struct SumOp : public BaseElemWiseOp
};
struct CountNonZeroOp : public BaseElemWiseOp
struct CountNonZeroOp : public BaseArithmOp
{
CountNonZeroOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT+SUPPORT_MASK, 1, 1, Scalar::all(0))
CountNonZeroOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT+SUPPORT_MASK, 1, 1, Scalar::all(0))
{}
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, 1);
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
}
void op(const vector<Mat>& src, Mat& dst, const Mat& mask)
{
@ -1252,12 +1276,12 @@ struct CountNonZeroOp : public BaseElemWiseOp
};
struct MeanStdDevOp : public BaseElemWiseOp
struct MeanStdDevOp : public BaseArithmOp
{
Scalar sqmeanRef;
int cn;
MeanStdDevOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
MeanStdDevOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
{
cn = 0;
context = 7;
@ -1296,16 +1320,16 @@ struct MeanStdDevOp : public BaseElemWiseOp
};
struct NormOp : public BaseElemWiseOp
struct NormOp : public BaseArithmOp
{
NormOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
NormOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
{
context = 1;
normType = 0;
};
int getRandomType(RNG& rng)
{
int type = cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 4);
int type = cvtest::randomType(rng, baseArithmTypeMask, 1, 4);
for(;;)
{
normType = rng.uniform(1, 8);
@ -1343,15 +1367,15 @@ struct NormOp : public BaseElemWiseOp
};
struct MinMaxLocOp : public BaseElemWiseOp
struct MinMaxLocOp : public BaseArithmOp
{
MinMaxLocOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
MinMaxLocOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0))
{
context = ARITHM_MAX_NDIMS*2 + 2;
};
int getRandomType(RNG& rng)
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1);
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
}
void saveOutput(const vector<int>& minidx, const vector<int>& maxidx,
double minval, double maxval, Mat& dst)
@ -1389,16 +1413,16 @@ struct MinMaxLocOp : public BaseElemWiseOp
}
};
struct reduceArgMinMaxOp : public BaseElemWiseOp
struct reduceArgMinMaxOp : public BaseArithmOp
{
reduceArgMinMaxOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)),
reduceArgMinMaxOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)),
isLast(false), isMax(false), axis(0)
{
context = ARITHM_MAX_NDIMS*2 + 2;
};
int getRandomType(RNG& rng) override
{
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1);
return cvtest::randomType(rng, baseArithmTypeMask, 1, 1);
}
void getRandomSize(RNG& rng, vector<int>& size) override
{
@ -1568,82 +1592,82 @@ INSTANTIATE_TEST_CASE_P(Core_CartToPolarToCart, ElemWiseTest, ::testing::Values(
TEST(Core_ArithmMask, uninitialized)
{
RNG& rng = theRNG();
const int MAX_DIM=3;
int sizes[MAX_DIM];
for( int iter = 0; iter < 100; iter++ )
{
int dims = rng.uniform(1, MAX_DIM+1);
int depth = rng.uniform(CV_8U, CV_64F+1);
int cn = rng.uniform(1, 6);
int type = CV_MAKETYPE(depth, cn);
int op = rng.uniform(0, depth < CV_32F ? 5 : 2); // don't run binary operations between floating-point values
int depth1 = op <= 1 ? CV_64F : depth;
for (int k = 0; k < MAX_DIM; k++)
{
sizes[k] = k < dims ? rng.uniform(1, 30) : 0;
}
SCOPED_TRACE(cv::format("iter=%d dims=%d depth=%d cn=%d type=%d op=%d depth1=%d dims=[%d; %d; %d]",
iter, dims, depth, cn, type, op, depth1, sizes[0], sizes[1], sizes[2]));
Mat a(dims, sizes, type), a1;
Mat b(dims, sizes, type), b1;
Mat mask(dims, sizes, CV_8U);
Mat mask1;
Mat c, d;
rng.fill(a, RNG::UNIFORM, 0, 100);
rng.fill(b, RNG::UNIFORM, 0, 100);
// [-2,2) range means that the each generated random number
// will be one of -2, -1, 0, 1. Saturated to [0,255], it will become
// 0, 0, 0, 1 => the mask will be filled by ~25%.
rng.fill(mask, RNG::UNIFORM, -2, 2);
a.convertTo(a1, depth1);
b.convertTo(b1, depth1);
// invert the mask
cv::compare(mask, 0, mask1, CMP_EQ);
a1.setTo(0, mask1);
b1.setTo(0, mask1);
if( op == 0 )
{
cv::add(a, b, c, mask);
cv::add(a1, b1, d);
}
else if( op == 1 )
{
cv::subtract(a, b, c, mask);
cv::subtract(a1, b1, d);
}
else if( op == 2 )
{
cv::bitwise_and(a, b, c, mask);
cv::bitwise_and(a1, b1, d);
}
else if( op == 3 )
{
cv::bitwise_or(a, b, c, mask);
cv::bitwise_or(a1, b1, d);
}
else if( op == 4 )
{
cv::bitwise_xor(a, b, c, mask);
cv::bitwise_xor(a1, b1, d);
}
Mat d1;
d.convertTo(d1, depth);
EXPECT_LE(cvtest::norm(c, d1, CV_C), DBL_EPSILON);
}
Mat_<uchar> tmpSrc(100,100);
tmpSrc = 124;
Mat_<uchar> tmpMask(100,100);
tmpMask = 255;
Mat_<uchar> tmpDst(100,100);
tmpDst = 2;
tmpSrc.copyTo(tmpDst,tmpMask);
RNG& rng = theRNG();
const int MAX_DIM=3;
int sizes[MAX_DIM];
for( int iter = 0; iter < 100; iter++ )
{
int dims = rng.uniform(1, MAX_DIM+1);
int depth = rng.uniform(CV_8U, CV_64F+1);
int cn = rng.uniform(1, 6);
int type = CV_MAKETYPE(depth, cn);
int op = rng.uniform(0, depth < CV_32F ? 5 : 2); // don't run binary operations between floating-point values
int depth1 = op <= 1 ? CV_64F : depth;
for (int k = 0; k < MAX_DIM; k++)
{
sizes[k] = k < dims ? rng.uniform(1, 30) : 0;
}
SCOPED_TRACE(cv::format("iter=%d dims=%d depth=%d cn=%d type=%d op=%d depth1=%d dims=[%d; %d; %d]",
iter, dims, depth, cn, type, op, depth1, sizes[0], sizes[1], sizes[2]));
Mat a(dims, sizes, type), a1;
Mat b(dims, sizes, type), b1;
Mat mask(dims, sizes, CV_8U);
Mat mask1;
Mat c, d;
rng.fill(a, RNG::UNIFORM, 0, 100);
rng.fill(b, RNG::UNIFORM, 0, 100);
// [-2,2) range means that the each generated random number
// will be one of -2, -1, 0, 1. Saturated to [0,255], it will become
// 0, 0, 0, 1 => the mask will be filled by ~25%.
rng.fill(mask, RNG::UNIFORM, -2, 2);
a.convertTo(a1, depth1);
b.convertTo(b1, depth1);
// invert the mask
cv::compare(mask, 0, mask1, CMP_EQ);
a1.setTo(0, mask1);
b1.setTo(0, mask1);
if( op == 0 )
{
cv::add(a, b, c, mask);
cv::add(a1, b1, d);
}
else if( op == 1 )
{
cv::subtract(a, b, c, mask);
cv::subtract(a1, b1, d);
}
else if( op == 2 )
{
cv::bitwise_and(a, b, c, mask);
cv::bitwise_and(a1, b1, d);
}
else if( op == 3 )
{
cv::bitwise_or(a, b, c, mask);
cv::bitwise_or(a1, b1, d);
}
else if( op == 4 )
{
cv::bitwise_xor(a, b, c, mask);
cv::bitwise_xor(a1, b1, d);
}
Mat d1;
d.convertTo(d1, depth);
EXPECT_LE(cvtest::norm(c, d1, CV_C), DBL_EPSILON);
}
Mat_<uchar> tmpSrc(100,100);
tmpSrc = 124;
Mat_<uchar> tmpMask(100,100);
tmpMask = 255;
Mat_<uchar> tmpDst(100,100);
tmpDst = 2;
tmpSrc.copyTo(tmpDst,tmpMask);
}
TEST(Multiply, FloatingPointRounding)
@ -2273,35 +2297,35 @@ TEST(Core_minMaxIdx, regression_9207_2)
const int rows = 13;
const int cols = 15;
uchar mask_[rows*cols] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 255,
255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 255,
255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 0,
255, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 255, 0,
255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 255, 255, 0,
255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 0,
255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 255, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 255,
255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 255,
255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 0,
255, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 255, 0,
255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 255, 255, 0,
255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 0,
255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 255, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
uchar src_[15*13] = {
5, 5, 5, 5, 5, 6, 5, 2, 0, 4, 6, 6, 4, 1, 0,
6, 5, 4, 4, 5, 6, 6, 5, 2, 0, 4, 6, 5, 2, 0,
3, 2, 1, 1, 2, 4, 6, 6, 4, 2, 3, 4, 4, 2, 0,
1, 0, 0, 0, 0, 1, 4, 5, 4, 4, 4, 4, 3, 2, 0,
0, 0, 0, 0, 0, 0, 2, 3, 4, 4, 4, 3, 2, 1, 0,
0, 0, 0, 0, 0, 0, 0, 2, 3, 4, 3, 2, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 3, 3, 1, 0, 1,
0, 0, 0, 0, 0, 0, 1, 4, 5, 6, 5, 4, 3, 2, 0,
1, 0, 0, 0, 0, 0, 3, 5, 5, 4, 3, 4, 4, 3, 0,
2, 0, 0, 0, 0, 2, 5, 6, 5, 2, 2, 5, 4, 3, 0
};
5, 5, 5, 5, 5, 6, 5, 2, 0, 4, 6, 6, 4, 1, 0,
6, 5, 4, 4, 5, 6, 6, 5, 2, 0, 4, 6, 5, 2, 0,
3, 2, 1, 1, 2, 4, 6, 6, 4, 2, 3, 4, 4, 2, 0,
1, 0, 0, 0, 0, 1, 4, 5, 4, 4, 4, 4, 3, 2, 0,
0, 0, 0, 0, 0, 0, 2, 3, 4, 4, 4, 3, 2, 1, 0,
0, 0, 0, 0, 0, 0, 0, 2, 3, 4, 3, 2, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 3, 3, 1, 0, 1,
0, 0, 0, 0, 0, 0, 1, 4, 5, 6, 5, 4, 3, 2, 0,
1, 0, 0, 0, 0, 0, 3, 5, 5, 4, 3, 4, 4, 3, 0,
2, 0, 0, 0, 0, 2, 5, 6, 5, 2, 2, 5, 4, 3, 0
};
Mat mask(Size(cols, rows), CV_8UC1, mask_);
Mat src(Size(cols, rows), CV_8UC1, src_);
double minVal = -0.0, maxVal = -0.0;
@ -2715,7 +2739,6 @@ TEST(Core_CartPolar, inplace)
EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[1], uA[0]), cv::Exception);
EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
}
}} // namespace

@ -589,7 +589,7 @@ void CxCore_DXTBaseTest::get_test_array_types_and_sizes( int test_case_idx,
{
if( cn == 1 )
{
types[OUTPUT][0] = depth + 8;
types[OUTPUT][0] = CV_MAKETYPE(depth, 2);
sizes[TEMP][0] = size;
}
sizes[INPUT][0] = sizes[INPUT][1] = size;
@ -597,7 +597,7 @@ void CxCore_DXTBaseTest::get_test_array_types_and_sizes( int test_case_idx,
}
else if( /*(cn == 2 && (bits&32)) ||*/ (cn == 1 && allow_complex) )
{
types[TEMP][0] = depth + 8; // CV_??FC2
types[TEMP][0] = CV_MAKETYPE(depth, 2); // CV_??FC2
sizes[TEMP][0] = size;
size = cvSize(size.width/2+1, size.height);
@ -614,7 +614,7 @@ void CxCore_DXTBaseTest::get_test_array_types_and_sizes( int test_case_idx,
else
{
if( allow_complex )
types[OUTPUT][0] = depth + 8;
types[OUTPUT][0] = CV_MAKETYPE(depth, 2);
if( cn == 2 )
{

@ -680,7 +680,9 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo
reference.read(&reference_data[0], ref_sz);
reference.close();
EXPECT_EQ(reference_data, test_data);
if (useMemory) {
EXPECT_EQ(reference_data, test_data);
}
}
std::cout << "Storage size: " << sz << std::endl;
EXPECT_LE(sz, (size_t)6000);
@ -736,16 +738,14 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo
{
for (int j = 0; j < _2d_out.cols; ++j)
{
EXPECT_EQ(_2d_in.at<cv::Vec3b>(i, j), _2d_out.at<cv::Vec3b>(i, j));
if (::testing::Test::HasNonfatalFailure())
{
if (_2d_in.at<cv::Vec3b>(i, j) != _2d_out.at<cv::Vec3b>(i, j)) {
EXPECT_EQ(_2d_in.at<cv::Vec3b>(i, j), _2d_out.at<cv::Vec3b>(i, j));
printf("i = %d, j = %d\n", i, j);
errors++;
}
if (errors >= 3)
{
i = _2d_out.rows;
break;
if (++errors >= 3)
{
i = _2d_out.rows;
break;
}
}
}
}
@ -760,7 +760,10 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo
ASSERT_EQ(_rd_in.cols , _rd_out.cols);
ASSERT_EQ(_rd_in.dims , _rd_out.dims);
ASSERT_EQ(_rd_in.depth(), _rd_out.depth());
EXPECT_EQ(0, cv::norm(_rd_in, _rd_out, NORM_INF));
if (useMemory) {
EXPECT_EQ(0, cv::norm(_rd_in, _rd_out, NORM_INF));
}
}
}
@ -1901,15 +1904,25 @@ static void test_20279(FileStorage& fs)
EXPECT_EQ(CV_16FC3, m16fc3.type()) << typeToString(m16fc3.type());
//std::cout << m16fc3 << std::endl;
Mat m16bfc1, m16bfc3;
m16fc1.convertTo(m16bfc1, CV_16BF);
m16fc3.convertTo(m16bfc3, CV_16BF);
fs << "m16fc1" << m16fc1;
fs << "m16fc3" << m16fc3;
fs << "m16bfc1" << m16bfc1;
fs << "m16bfc3" << m16bfc3;
string content = fs.releaseAndGetString();
if (cvtest::debugLevel > 0) std::cout << content << std::endl;
FileStorage fs_read(content, FileStorage::READ + FileStorage::MEMORY);
Mat m16fc1_result;
Mat m16fc3_result;
Mat m16bfc1_result;
Mat m16bfc3_result;
fs_read["m16fc1"] >> m16fc1_result;
ASSERT_FALSE(m16fc1_result.empty());
EXPECT_EQ(CV_16FC1, m16fc1_result.type()) << typeToString(m16fc1_result.type());
@ -1919,6 +1932,16 @@ static void test_20279(FileStorage& fs)
ASSERT_FALSE(m16fc3_result.empty());
EXPECT_EQ(CV_16FC3, m16fc3_result.type()) << typeToString(m16fc3_result.type());
EXPECT_LE(cvtest::norm(m16fc3_result, m16fc3, NORM_INF), 1e-2);
fs_read["m16bfc1"] >> m16bfc1_result;
ASSERT_FALSE(m16bfc1_result.empty());
EXPECT_EQ(CV_16BFC1, m16bfc1_result.type()) << typeToString(m16bfc1_result.type());
EXPECT_LE(cvtest::norm(m16bfc1_result, m16bfc1, NORM_INF), 2e-2);
fs_read["m16bfc3"] >> m16bfc3_result;
ASSERT_FALSE(m16bfc3_result.empty());
EXPECT_EQ(CV_16BFC3, m16bfc3_result.type()) << typeToString(m16bfc3_result.type());
EXPECT_LE(cvtest::norm(m16bfc3_result, m16bfc3, NORM_INF), 2e-2);
}
TEST(Core_InputOutput, FileStorage_16F_xml)

@ -31,12 +31,12 @@ TEST(Core_OutputArrayCreate, _1997)
ASSERT_NO_THROW(local::create( mat(Rect(Point(), submatSize)), submatSize, mat.type() ));
}
TEST(Core_SaturateCast, NegativeNotClipped)
TEST(Core_SaturateCast, NegativesAreClipped)
{
double d = -1.0;
unsigned int val = cv::saturate_cast<unsigned int>(d);
ASSERT_EQ(0xffffffff, val);
ASSERT_EQ(0u, val);
}
template<typename T, typename U>

@ -216,19 +216,19 @@ public class ImgprocTest extends OpenCVTestCase {
public void testBoxFilterMatMatIntSize() {
Size size = new Size(3, 3);
Imgproc.boxFilter(gray0, dst, 8, size);
Imgproc.boxFilter(gray0, dst, 0, size);
assertMatEqual(gray0, dst);
// TODO_: write better test
}
public void testBoxFilterMatMatIntSizePointBoolean() {
Imgproc.boxFilter(gray255, dst, 8, size, anchorPoint, false);
Imgproc.boxFilter(gray255, dst, 0, size, anchorPoint, false);
assertMatEqual(gray255, dst);
// TODO_: write better test
}
public void testBoxFilterMatMatIntSizePointBooleanInt() {
Imgproc.boxFilter(gray255, dst, 8, size, anchorPoint, false, Core.BORDER_REFLECT);
Imgproc.boxFilter(gray255, dst, 0, size, anchorPoint, false, Core.BORDER_REFLECT);
assertMatEqual(gray255, dst);
// TODO_: write better test
}

@ -186,10 +186,10 @@ void CV_DivSpectrumsTest::get_test_array_types_and_sizes( int test_case_idx, vec
// Inputs are CCS-packed arrays. Prepare outputs and temporary inputs as complex matrices.
if( type == CV_32FC1 || type == CV_64FC1 )
{
types[OUTPUT][0] += 8;
types[REF_OUTPUT][0] += 8;
types[TEMP][0] += 8;
types[TEMP][1] += 8;
types[OUTPUT][0] += CV_DEPTH_MAX;
types[REF_OUTPUT][0] += CV_DEPTH_MAX;
types[TEMP][0] += CV_DEPTH_MAX;
types[TEMP][1] += CV_DEPTH_MAX;
}
}

@ -129,7 +129,7 @@ void GainCompensator::singleFeed(const std::vector<Point> &corners, const std::v
const int num_images = static_cast<int>(images.size());
Mat_<int> N(num_images, num_images); N.setTo(0);
Mat_<double> I(num_images, num_images); I.setTo(0);
Mat_<bool> skip(num_images, 1); skip.setTo(true);
Mat_<uchar> skip(num_images, 1); skip.setTo(1);
Mat subimg1, subimg2;
Mat_<uchar> submask1, submask2, intersect;

@ -72,10 +72,10 @@ int randomType(RNG& rng, _OutputArray::DepthMask typeMask, int minChannels, int
{
int channels = rng.uniform(minChannels, maxChannels+1);
int depth = 0;
CV_Assert((typeMask & _OutputArray::DEPTH_MASK_ALL_16F) != 0);
CV_Assert((typeMask & _OutputArray::DEPTH_MASK_ALL) != 0);
for(;;)
{
depth = rng.uniform(CV_8U, CV_16F+1);
depth = rng.uniform(CV_8U, CV_DEPTH_CURR_MAX);
if( ((1 << depth) & typeMask) != 0 )
break;
}
@ -246,8 +246,43 @@ convert_(const _Tp1* src, _Tp2* dst, size_t total, double alpha, double beta)
dst[i] = saturate_cast<_Tp2>(src[i]*alpha + beta);
}
template<typename _Tp1> inline void
convert_to_bool(const _Tp1* src, bool* dst,
size_t total, double alpha, double beta)
{
size_t i;
if( alpha == 1 && beta == 0 )
for( i = 0; i < total; i++ )
dst[i] = src[i] != 0;
else if( beta == 0 )
for( i = 0; i < total; i++ )
dst[i] = src[i]*alpha != 0;
else
for( i = 0; i < total; i++ )
dst[i] = src[i]*alpha + beta != 0;
}
template<typename _Tp2>
inline void
convert_(const bool* src_, _Tp2* dst,
size_t total, double alpha, double beta)
{
size_t i;
const uint8_t* src = (const uint8_t*)src_;
if( alpha == 1 && beta == 0 )
for( i = 0; i < total; i++ )
dst[i] = saturate_cast<_Tp2>(src[i] != 0);
else if( beta == 0 )
for( i = 0; i < total; i++ )
dst[i] = saturate_cast<_Tp2>((src[i] != 0)*alpha);
else
for( i = 0; i < total; i++ )
dst[i] = saturate_cast<_Tp2>((src[i] != 0)*alpha + beta);
}
template<typename _Tp> inline void
convertTo(const _Tp* src, void* dst, int dtype, size_t total, double alpha, double beta)
convertTo(const _Tp* src, void* dst, int dtype,
size_t total, double alpha, double beta)
{
switch( CV_MAT_DEPTH(dtype) )
{
@ -263,6 +298,9 @@ convertTo(const _Tp* src, void* dst, int dtype, size_t total, double alpha, doub
case CV_16S:
convert_(src, (short*)dst, total, alpha, beta);
break;
case CV_32U:
convert_(src, (unsigned*)dst, total, alpha, beta);
break;
case CV_32S:
convert_(src, (int*)dst, total, alpha, beta);
break;
@ -272,16 +310,35 @@ convertTo(const _Tp* src, void* dst, int dtype, size_t total, double alpha, doub
case CV_64F:
convert_(src, (double*)dst, total, alpha, beta);
break;
case CV_64U:
convert_(src, (uint64_t*)dst, total, alpha, beta);
break;
case CV_64S:
convert_(src, (int64_t*)dst, total, alpha, beta);
break;
case CV_16F:
convert_(src, (cv::float16_t*)dst, total, alpha, beta);
break;
case CV_16BF:
convert_(src, (cv::bfloat16_t*)dst, total, alpha, beta);
break;
case CV_Bool:
convert_to_bool(src, (bool*)dst, total, alpha, beta);
break;
default:
CV_Assert(0);
}
}
void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, double beta)
void convert(const Mat& src, cv::OutputArray _dst,
int dtype, double alpha, double beta)
{
if (dtype < 0) dtype = _dst.depth();
dtype = CV_MAKETYPE(CV_MAT_DEPTH(dtype), src.channels());
int sdepth = src.depth();
int ddepth = CV_MAT_DEPTH(dtype);
dtype = CV_MAKETYPE(ddepth, src.channels());
_dst.create(src.dims, &src.size[0], dtype);
Mat dst = _dst.getMat();
if( alpha == 0 )
@ -307,7 +364,7 @@ void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, doub
const uchar* sptr = planes[0].ptr();
uchar* dptr = planes[1].ptr();
switch( src.depth() )
switch( sdepth )
{
case CV_8U:
convertTo((const uchar*)sptr, dptr, dtype, total, alpha, beta);
@ -315,12 +372,18 @@ void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, doub
case CV_8S:
convertTo((const schar*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_Bool:
convertTo((const bool*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_16U:
convertTo((const ushort*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_16S:
convertTo((const short*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_32U:
convertTo((const unsigned*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_32S:
convertTo((const int*)sptr, dptr, dtype, total, alpha, beta);
break;
@ -330,6 +393,20 @@ void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, doub
case CV_64F:
convertTo((const double*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_64U:
convertTo((const uint64_t*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_64S:
convertTo((const int64_t*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_16F:
convertTo((const cv::float16_t*)sptr, dptr, dtype, total, alpha, beta);
break;
case CV_16BF:
convertTo((const cv::bfloat16_t*)sptr, dptr, dtype, total, alpha, beta);
break;
default:
CV_Error(CV_StsNotImplemented, "unknown/unsupported depth");
}
}
}
@ -1351,7 +1428,7 @@ double norm(InputArray _src, int normType, InputArray _mask)
double norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask)
{
Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
if( src1.depth() == CV_16F )
if( src1.depth() == CV_16F || src1.depth() == CV_16BF )
{
Mat src1_32f, src2_32f;
src1.convertTo(src1_32f, CV_32F);
@ -1769,10 +1846,10 @@ cmpUlpsInt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff,
size_t startidx, size_t& idx)
{
size_t i;
int realmaxdiff = 0;
int64_t realmaxdiff = 0;
for( i = 0; i < total; i++ )
{
int diff = std::abs(src1[i] - src2[i]);
int64_t diff = (int64_t)std::abs((int64_t)src1[i] - (int64_t)src2[i]);
if( realmaxdiff < diff )
{
realmaxdiff = diff;
@ -1780,7 +1857,7 @@ cmpUlpsInt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff,
idx = i + startidx;
}
}
return realmaxdiff;
return (double)realmaxdiff;
}
@ -2008,7 +2085,7 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff,
{
Mat arr = arr_, refarr = refarr_;
CV_Assert( arr.type() == refarr.type() && arr.size == refarr.size );
if( arr.depth() == CV_16F )
if( arr.depth() == CV_16F || arr.depth() == CV_16BF )
{
Mat arr32f, refarr32f;
arr.convertTo(arr32f, CV_32F);
@ -2017,7 +2094,8 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff,
refarr = refarr32f;
}
int ilevel = refarr.depth() <= CV_32S ? cvFloor(success_err_level) : 0;
int depth = refarr.depth();
int ilevel = depth <= CV_32S || depth == CV_32U || depth == CV_64U || depth == CV_64S ? cvFloor(success_err_level) : 0;
int result = CMP_EPS_OK;
const Mat *arrays[]={&arr, &refarr, 0};
@ -2025,14 +2103,13 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff,
NAryMatIterator it(arrays, planes);
size_t total = planes[0].total()*planes[0].channels(), j = total;
size_t i, nplanes = it.nplanes;
int depth = arr.depth();
size_t startidx = 1, idx = 0;
double realmaxdiff = 0, maxval = 0;
if(_realmaxdiff)
*_realmaxdiff = 0;
if( refarr.depth() >= CV_32F && !element_wise_relative_error )
if( !CV_IS_INT_TYPE(depth) && !element_wise_relative_error )
{
maxval = cvtest::norm( refarr, NORM_INF );
maxval = MAX(maxval, 1.);
@ -2048,6 +2125,9 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff,
case CV_8U:
realmaxdiff = cmpUlpsInt_((const uchar*)sptr1, (const uchar*)sptr2, total, ilevel, startidx, idx);
break;
case CV_Bool:
realmaxdiff = cmpUlpsInt_((const uchar*)sptr1, (const uchar*)sptr2, total, ilevel, startidx, idx);
break;
case CV_8S:
realmaxdiff = cmpUlpsInt_((const schar*)sptr1, (const schar*)sptr2, total, ilevel, startidx, idx);
break;
@ -2060,6 +2140,15 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff,
case CV_32S:
realmaxdiff = cmpUlpsInt_((const int*)sptr1, (const int*)sptr2, total, ilevel, startidx, idx);
break;
case CV_32U:
realmaxdiff = cmpUlpsInt_((const unsigned*)sptr1, (const unsigned*)sptr2, total, ilevel, startidx, idx);
break;
case CV_64S:
realmaxdiff = cmpUlpsInt_((const int64_t*)sptr1, (const int64_t*)sptr2, total, ilevel, startidx, idx);
break;
case CV_64U:
realmaxdiff = cmpUlpsInt_((const uint64_t*)sptr1, (const uint64_t*)sptr2, total, ilevel, startidx, idx);
break;
case CV_32F:
for( j = 0; j < total; j++ )
{
@ -2887,7 +2976,7 @@ std::ostream& operator << (std::ostream& out, const MatInfo& m)
out << "<Empty>";
else
{
static const char* depthstr[] = {"8u", "8s", "16u", "16s", "32s", "32f", "64f", "?"};
static const char* depthstr[] = {"8u", "8s", "16u", "16s", "32s", "32f", "64f", "16f", "16bf", "Bool", "64u", "64s", "32u", "?", "?", "?"};
out << depthstr[m.m->depth()] << "C" << m.m->channels() << " " << m.m->dims << "-dim (";
for( int i = 0; i < m.m->dims; i++ )
out << m.m->size[i] << (i < m.m->dims-1 ? " x " : ")");
@ -2930,7 +3019,6 @@ writeElems(std::ostream& out, const void* data, int nelems, int starpos)
}
}
static void writeElems(std::ostream& out, const void* data, int nelems, int depth, int starpos)
{
if(depth == CV_8U)
@ -2943,6 +3031,28 @@ static void writeElems(std::ostream& out, const void* data, int nelems, int dept
writeElems<short, int>(out, data, nelems, starpos);
else if(depth == CV_32S)
writeElems<int, int>(out, data, nelems, starpos);
else if(depth == CV_32U)
writeElems<unsigned, unsigned>(out, data, nelems, starpos);
else if(depth == CV_64U)
writeElems<uint64_t, uint64_t>(out, data, nelems, starpos);
else if(depth == CV_64S)
writeElems<int64_t, int64_t>(out, data, nelems, starpos);
else if(depth == CV_Bool)
writeElems<bool, int>(out, data, nelems, starpos);
else if(depth == CV_16F)
{
std::streamsize pp = out.precision();
out.precision(4);
writeElems<cv::float16_t, float>(out, data, nelems, starpos);
out.precision(pp);
}
else if(depth == CV_16BF)
{
std::streamsize pp = out.precision();
out.precision(4);
writeElems<cv::bfloat16_t, float>(out, data, nelems, starpos);
out.precision(pp);
}
else if(depth == CV_32F)
{
std::streamsize pp = out.precision();

@ -465,6 +465,15 @@ void Regression::verify(cv::FileNode node, cv::InputArray array, double eps, ERR
{
int expected_kind = (int)node["kind"];
int expected_type = (int)node["type"];
int array_type = array.type();
if (array_type != expected_type) {
// temporary hack; we optimistically assume that type in the computed and expected array should be the same.
// if they are different, it must be because of the change in type representation between OpenCV 5.x and OpenCV 2.x,3.x,4.x.
// need to add "type5" or something like that and use it in the newer files. Then type will always mean 'earlier than 5.x type'.
int depth = expected_type & 7;
int channels = ((expected_type >> 3) & 127) + 1;
expected_type = CV_MAKETYPE(depth, channels);
}
ASSERT_EQ(expected_kind, array.kind()) << " Argument \"" << node.name() << "\" has unexpected kind";
ASSERT_EQ(expected_type, array.type()) << " Argument \"" << node.name() << "\" has unexpected type";

@ -535,6 +535,12 @@ public:
cv::_OutputArray* dst = static_cast<cv::_OutputArray*>(userdata);
if (!dst)
return CV_ERROR_FAIL;
int depth = CV_MAT_DEPTH(type);
// [TODO] Remove this condition after rebuilding plugins or add a new
// version of plugins. Convert type from the old one to the new one (5 bits)
if (depth > 7) {
type = CV_MAKETYPE((type & 7), (type >> 3) + 1);
}
cv::Mat(cv::Size(width, height), type, (void*)data, step).copyTo(*dst);
return CV_ERROR_OK;
}

@ -54,7 +54,11 @@ static inline void PrintTo(const cv::VideoCaptureAPIs& api, std::ostream* os)
inline std::string fourccToString(int fourcc)
{
return cv::format("%c%c%c%c", fourcc & 255, (fourcc >> 8) & 255, (fourcc >> 16) & 255, (fourcc >> 24) & 255);
return cv::format("%c%c%c%c",
(char)(fourcc & 255),
(char)((fourcc >> 8) & 255),
(char)((fourcc >> 16) & 255),
(char)((fourcc >> 24) & 255));
}
inline std::string fourccToStringSafe(int fourcc)
@ -71,19 +75,19 @@ inline int fourccFromString(const std::string &fourcc)
return cv::VideoWriter::fourcc(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
}
inline void generateFrame(int i, int FRAME_COUNT, cv::Mat & frame)
inline void generateFrame(int i, int frame_count, cv::Mat & frame)
{
using namespace cv;
using namespace std;
int offset = (((i * 5) % FRAME_COUNT) - FRAME_COUNT / 2) * (frame.cols / 2) / FRAME_COUNT;
int offset = (((i * 5) % frame_count) - frame_count / 2) * (frame.cols / 2) / frame_count;
frame(cv::Rect(0, 0, frame.cols / 2 + offset, frame.rows)) = Scalar(255, 255, 255);
frame(cv::Rect(frame.cols / 2 + offset, 0, frame.cols - frame.cols / 2 - offset, frame.rows)) = Scalar(0, 0, 0);
ostringstream buf; buf << "Frame " << setw(2) << setfill('0') << i + 1;
std::string str = cv::format("%02d", i+1);
int baseLine = 0;
Size box = getTextSize(buf.str(), FONT_HERSHEY_COMPLEX, 2, 5, &baseLine);
putText(frame, buf.str(), Point((frame.cols - box.width) / 2, (frame.rows - box.height) / 2 + baseLine),
Size box = getTextSize(str, FONT_HERSHEY_COMPLEX, 2, 5, &baseLine);
putText(frame, str, Point((frame.cols - box.width) / 2, (frame.rows - box.height) / 2 + baseLine),
FONT_HERSHEY_COMPLEX, 2, Scalar(0, 0, 255), 5, LINE_AA);
Point p(i * frame.cols / (FRAME_COUNT - 1), i * frame.rows / (FRAME_COUNT - 1));
Point p(i * frame.cols / (frame_count - 1), i * frame.rows / (frame_count - 1));
circle(frame, p, 50, Scalar(200, 25, 55), 8, LINE_AA);
#if 0
imshow("frame", frame);

Loading…
Cancel
Save