diff --git a/CMakeLists.txt b/CMakeLists.txt index f8a3be499e..9b4ac4c351 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1047,7 +1047,9 @@ endif() if(CMAKE_GENERATOR MATCHES Xcode) status(" Xcode:" ${XCODE_VERSION}) endif() -if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio") +if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config") + status(" Configuration:" ${CMAKE_CONFIGURATION_TYPES}) +else() status(" Configuration:" ${CMAKE_BUILD_TYPE}) endif() diff --git a/README.md b/README.md index 0653a9e73e..b9897205ba 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,8 @@ * Homepage: * Courses: * Docs: -* Q&A forum: +* Q&A forum: + * previous forum (read only): * Issue tracking: * Additional OpenCV functionality: diff --git a/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html b/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html index ad2bb54c48..b9f6871ec0 100644 --- a/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html +++ b/doc/js_tutorials/js_assets/js_template_matching_matchTemplate.html @@ -74,7 +74,8 @@ let utils = new Utils('errorMessage'); utils.loadCode('codeSnippet', 'codeEditor'); utils.loadImageToCanvas('lena.jpg', 'imageCanvasInput'); utils.loadImageToCanvas('lenaFace.png', 'templateCanvasInput'); -utils.addFileInputHandler('fileInput', 'canvasInput'); +utils.addFileInputHandler('fileInput', 'imageCanvasInput'); +utils.addFileInputHandler('templateFileInput', 'templateCanvasInput'); let tryIt = document.getElementById('tryIt'); tryIt.addEventListener('click', () => { diff --git a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown index e2191e6d41..5a8c3b87fa 100644 --- a/doc/js_tutorials/js_setup/js_usage/js_usage.markdown +++ b/doc/js_tutorials/js_setup/js_usage/js_usage.markdown @@ -82,7 +82,7 @@ In this tutorial, we just show a cv.Mat on screen. To show a cv.Mat, you need a You can use cv.imshow to show cv.Mat on the canvas. @code{.js} -cv.imshow(mat, "outputCanvas"); +cv.imshow("outputCanvas", mat); @endcode Putting all of the steps together, the final index.html is shown below. diff --git a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown index 656f5423c5..dee4df774a 100644 --- a/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown +++ b/doc/py_tutorials/py_feature2d/py_sift_intro/py_sift_intro.markdown @@ -20,10 +20,10 @@ scale invariant. ![image](images/sift_scale_invariant.jpg) -So, in 2004, **D.Lowe**, University of British Columbia, came up with a new algorithm, Scale +In 2004, **D.Lowe**, University of British Columbia, came up with a new algorithm, Scale Invariant Feature Transform (SIFT) in his paper, **Distinctive Image Features from Scale-Invariant Keypoints**, which extract keypoints and compute its descriptors. *(This paper is easy to understand -and considered to be best material available on SIFT. So this explanation is just a short summary of +and considered to be best material available on SIFT. This explanation is just a short summary of this paper)*. There are mainly four steps involved in SIFT algorithm. We will see them one-by-one. @@ -102,16 +102,17 @@ reasons. In that case, ratio of closest-distance to second-closest distance is t greater than 0.8, they are rejected. It eliminates around 90% of false matches while discards only 5% correct matches, as per the paper. -So this is a summary of SIFT algorithm. For more details and understanding, reading the original -paper is highly recommended. Remember one thing, this algorithm is patented. So this algorithm is -included in [the opencv contrib repo](https://github.com/opencv/opencv_contrib) +This is a summary of SIFT algorithm. For more details and understanding, reading the original +paper is highly recommended. SIFT in OpenCV -------------- -So now let's see SIFT functionalities available in OpenCV. Let's start with keypoint detection and -draw them. First we have to construct a SIFT object. We can pass different parameters to it which -are optional and they are well explained in docs. +Now let's see SIFT functionalities available in OpenCV. Note that these were previously only +available in [the opencv contrib repo](https://github.com/opencv/opencv_contrib), but the patent +expired in the year 2020. So they are now included in the main repo. Let's start with keypoint +detection and draw them. First we have to construct a SIFT object. We can pass different +parameters to it which are optional and they are well explained in docs. @code{.py} import numpy as np import cv2 as cv diff --git a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown index 496b956aed..5edff16879 100644 --- a/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown +++ b/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.markdown @@ -224,7 +224,7 @@ First you apply the transform: - *theta*: The resolution of the parameter \f$\theta\f$ in radians. We use **1 degree** (CV_PI/180) - *threshold*: The minimum number of intersections to "*detect*" a line - - *minLinLength*: The minimum number of points that can form a line. Lines with less than + - *minLineLength*: The minimum number of points that can form a line. Lines with less than this number of points are disregarded. - *maxLineGap*: The maximum gap between two points to be considered in the same line. diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index 22c4a34e52..2f835bb9f8 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -257,221 +257,20 @@ struct v_float64x2 v128_t val; }; -namespace fallback -{ - -template struct v_reg -{ - typedef _Tp lane_type; - enum { nlanes = n }; - - explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } - - v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7) - { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - } - - v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, - _Tp s4, _Tp s5, _Tp s6, _Tp s7, - _Tp s8, _Tp s9, _Tp s10, _Tp s11, - _Tp s12, _Tp s13, _Tp s14, _Tp s15) - { - s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; - s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; - s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; - s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; - } - - v_reg() {} - - v_reg(const v_reg<_Tp, n> & r) - { - for( int i = 0; i < n; i++ ) - s[i] = r.s[i]; - } - - _Tp get0() const { return s[0]; } - - _Tp get(const int i) const { return s[i]; } - v_reg<_Tp, n> high() const - { - v_reg<_Tp, n> c; - int i; - for( i = 0; i < n/2; i++ ) - { - c.s[i] = s[i+(n/2)]; - c.s[i+(n/2)] = 0; - } - return c; - } - - static v_reg<_Tp, n> zero() - { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = (_Tp)0; - return c; - } - - static v_reg<_Tp, n> all(_Tp s) - { - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = s; - return c; - } - - template v_reg<_Tp2, n2> reinterpret_as() const - { - size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); - v_reg<_Tp2, n2> c; - std::memcpy(&c.s[0], &s[0], bytes); - return c; - } - - v_reg(const cv::v_uint8x16& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int8x16& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_uint16x8& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int16x8& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_uint32x4& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int32x4& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_float32x4& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_float64x2& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_uint64x2& v) { wasm_v128_store(&s, v.val); } - v_reg(const cv::v_int64x2& v) { wasm_v128_store(&s, v.val); } - - operator cv::v_uint8x16() const { return cv::v_uint8x16(wasm_v128_load(&s)); } - operator cv::v_int8x16() const { return cv::v_int8x16(wasm_v128_load(&s)); } - operator cv::v_uint16x8() const { return cv::v_uint16x8(wasm_v128_load(&s)); } - operator cv::v_int16x8() const { return cv::v_int16x8(wasm_v128_load(&s)); } - operator cv::v_uint32x4() const { return cv::v_uint32x4(wasm_v128_load(&s)); } - operator cv::v_int32x4() const { return cv::v_int32x4(wasm_v128_load(&s)); } - operator cv::v_float32x4() const { return cv::v_float32x4(wasm_v128_load(&s)); } - operator cv::v_float64x2() const { return cv::v_float64x2(wasm_v128_load(&s)); } - operator cv::v_uint64x2() const { return cv::v_uint64x2(wasm_v128_load(&s)); } - operator cv::v_int64x2() const { return cv::v_int64x2(wasm_v128_load(&s)); } - - _Tp s[n]; -}; - -typedef v_reg v_uint8x16; -typedef v_reg v_int8x16; -typedef v_reg v_uint16x8; -typedef v_reg v_int16x8; -typedef v_reg v_uint32x4; -typedef v_reg v_int32x4; -typedef v_reg v_float32x4; -typedef v_reg v_float64x2; -typedef v_reg v_uint64x2; -typedef v_reg v_int64x2; - -#define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ -template inline v_reg<_Tp, n> \ - operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return c; \ -} \ -template inline v_reg<_Tp, n>& \ - operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ - return a; \ -} - -OPENCV_HAL_IMPL_BIN_OP(+) -OPENCV_HAL_IMPL_BIN_OP(-) -OPENCV_HAL_IMPL_BIN_OP(*) -OPENCV_HAL_IMPL_BIN_OP(/) - -#define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ -template inline v_reg<_Tp, n> operator bit_op \ - (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return c; \ -} \ -template inline v_reg<_Tp, n>& operator \ - bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - for( int i = 0; i < n; i++ ) \ - a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ - V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ - return a; \ -} - -OPENCV_HAL_IMPL_BIT_OP(&) -OPENCV_HAL_IMPL_BIT_OP(|) -OPENCV_HAL_IMPL_BIT_OP(^) - -template inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); - } - return c; -} - -#define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ -template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp2, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i]); \ - return c; \ -} - -OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, - typename V_TypeTraits<_Tp>::abs_type) -OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int) -OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int) -OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int) -OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) - -#define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ -template inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cfunc(a.s[i], b.s[i]); \ - return c; \ -} - -#define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ -template inline _Tp func(const v_reg<_Tp, n>& a) \ -{ \ - _Tp c = a.s[0]; \ - for( int i = 1; i < n; i++ ) \ - c = cfunc(c, a.s[i]); \ - return c; \ -} - -OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) -OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) -OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) +namespace +{ +#define OPENCV_HAL_IMPL_REINTERPRET_INT(ft, tt) \ +inline tt reinterpret_int(ft x) { union { ft l; tt i; } v; v.l = x; return v.i; } +OPENCV_HAL_IMPL_REINTERPRET_INT(uchar, schar) +OPENCV_HAL_IMPL_REINTERPRET_INT(schar, schar) +OPENCV_HAL_IMPL_REINTERPRET_INT(ushort, short) +OPENCV_HAL_IMPL_REINTERPRET_INT(short, short) +OPENCV_HAL_IMPL_REINTERPRET_INT(unsigned, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(int, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(float, int) +OPENCV_HAL_IMPL_REINTERPRET_INT(uint64, int64) +OPENCV_HAL_IMPL_REINTERPRET_INT(int64, int64) +OPENCV_HAL_IMPL_REINTERPRET_INT(double, int64) static const unsigned char popCountTable[] = { @@ -492,1184 +291,7 @@ static const unsigned char popCountTable[] = 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; - -template -inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) -{ - v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); - for (int i = 0; i < (int)(n*sizeof(_Tp)); i++) - b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; - return b; -} - -template -inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) -{ - for( int i = 0; i < n; i++ ) - { - minval.s[i] = std::min(a.s[i], b.s[i]); - maxval.s[i] = std::max(a.s[i], b.s[i]); - } -} - -#define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ -template \ -inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef typename V_TypeTraits<_Tp>::int_type itype; \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ - return c; \ -} - -OPENCV_HAL_IMPL_CMP_OP(<) -OPENCV_HAL_IMPL_CMP_OP(>) -OPENCV_HAL_IMPL_CMP_OP(<=) -OPENCV_HAL_IMPL_CMP_OP(>=) -OPENCV_HAL_IMPL_CMP_OP(==) -OPENCV_HAL_IMPL_CMP_OP(!=) - -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} -template -inline v_reg v_not_nan(const v_reg& a) -{ - typedef typename V_TypeTraits::int_type itype; - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = V_TypeTraits::reinterpret_from_int((itype)-(int)(a.s[i] == a.s[i])); - return c; -} - -#define OPENCV_HAL_IMPL_ARITHM_OP(func, bin_op, cast_op, _Tp2) \ -template \ -inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - typedef _Tp2 rtype; \ - v_reg c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ - return c; \ -} - -OPENCV_HAL_IMPL_ARITHM_OP(v_add_wrap, +, (_Tp), _Tp) -OPENCV_HAL_IMPL_ARITHM_OP(v_sub_wrap, -, (_Tp), _Tp) -OPENCV_HAL_IMPL_ARITHM_OP(v_mul_wrap, *, (_Tp), _Tp) - -template inline T _absdiff(T a, T b) -{ - return a > b ? a - b : b - a; -} - -template -inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) -{ - typedef typename V_TypeTraits<_Tp>::abs_type rtype; - v_reg c; - const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); - for( int i = 0; i < n; i++ ) - { - rtype ua = a.s[i] ^ mask; - rtype ub = b.s[i] ^ mask; - c.s[i] = _absdiff(ua, ub); - } - return c; -} - -inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) -{ - v_float32x4 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) -{ - v_float64x2 c; - for( int i = 0; i < c.nlanes; i++ ) - c.s[i] = _absdiff(a.s[i], b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_absdiffs(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++) - c.s[i] = saturate_cast<_Tp>(std::abs(a.s[i] - b.s[i])); - return c; -} - -template -inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = 1.f/std::sqrt(a.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); - return c; -} - -template -inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; - return c; -} - -template -inline v_reg<_Tp, n> v_fma(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - v_reg<_Tp, n> d; - for( int i = 0; i < n; i++ ) - d.s[i] = a.s[i]*b.s[i] + c.s[i]; - return d; -} - -template -inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg<_Tp, n>& c) -{ - return v_fma(a, b, c); -} - -template inline v_reg::w_type, n/2> - v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg c; - for( int i = 0; i < (n/2); i++ ) - c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; - return c; -} - -template inline v_reg::w_type, n/2> - v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, const v_reg::w_type, n / 2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg s; - for( int i = 0; i < (n/2); i++ ) - s.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1] + c.s[i]; - return s; -} - -template inline v_reg::q_type, n/4> - v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3]; - return s; -} - -template inline v_reg::q_type, n/4> - v_dotprod_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - const v_reg::q_type, n / 4>& c) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg s; - for( int i = 0; i < (n/4); i++ ) - s.s[i] = (q_type)a.s[i*4 ]*b.s[i*4 ] + (q_type)a.s[i*4 + 1]*b.s[i*4 + 1] + - (q_type)a.s[i*4 + 2]*b.s[i*4 + 2] + (q_type)a.s[i*4 + 3]*b.s[i*4 + 3] + c.s[i]; - return s; -} - -template inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg::w_type, n/2>& c, - v_reg::w_type, n/2>& d) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i]*b.s[i]; - d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; - } -} - -template inline v_reg<_Tp, n> v_mul_hi(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg<_Tp, n> c; - for (int i = 0; i < n; i++) - c.s[i] = (_Tp)(((w_type)a.s[i] * b.s[i]) >> sizeof(_Tp)*8); - return c; -} - -template inline void v_hsum(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& c) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; - } -} - -#define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ -template inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ -{ \ - v_reg<_Tp, n> c; \ - for( int i = 0; i < n; i++ ) \ - c.s[i] = (_Tp)(a.s[i] shift_op imm); \ - return c; \ -} - -OPENCV_HAL_IMPL_SHIFT_OP(<< ) -OPENCV_HAL_IMPL_SHIFT_OP(>> ) - -#define OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(suffix,opA,opB) \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a) \ -{ \ - v_reg<_Tp, n> b; \ - for (int i = 0; i < n; i++) \ - { \ - int sIndex = i opA imm; \ - if (0 <= sIndex && sIndex < n) \ - { \ - b.s[i] = a.s[sIndex]; \ - } \ - else \ - { \ - b.s[i] = 0; \ - } \ - } \ - return b; \ -} \ -template inline v_reg<_Tp, n> v_rotate_##suffix(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ -{ \ - v_reg<_Tp, n> c; \ - for (int i = 0; i < n; i++) \ - { \ - int aIndex = i opA imm; \ - int bIndex = i opA imm opB n; \ - if (0 <= bIndex && bIndex < n) \ - { \ - c.s[i] = b.s[bIndex]; \ - } \ - else if (0 <= aIndex && aIndex < n) \ - { \ - c.s[i] = a.s[aIndex]; \ - } \ - else \ - { \ - c.s[i] = 0; \ - } \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(left, -, +) -OPENCV_HAL_IMPL_ROTATE_SHIFT_OP(right, +, -) - -template inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) -{ - typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; - for( int i = 1; i < n; i++ ) - c += a.s[i]; - return c; -} - -inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, - const v_float32x4& c, const v_float32x4& d) -{ - v_float32x4 r; - r.s[0] = a.s[0] + a.s[1] + a.s[2] + a.s[3]; - r.s[1] = b.s[0] + b.s[1] + b.s[2] + b.s[3]; - r.s[2] = c.s[0] + c.s[1] + c.s[2] + c.s[3]; - r.s[3] = d.s[0] + d.s[1] + d.s[2] + d.s[3]; - return r; -} - -template inline typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type v_reduce_sad(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typename V_TypeTraits< typename V_TypeTraits<_Tp>::abs_type >::sum_type c = _absdiff(a.s[0], b.s[0]); - for (int i = 1; i < n; i++) - c += _absdiff(a.s[i], b.s[i]); - return c; -} - -template inline int v_signmask(const v_reg<_Tp, n>& a) -{ - int mask = 0; - for( int i = 0; i < n; i++ ) - mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; - return mask; -} - -template inline bool v_check_all(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) - return false; - return true; -} - -template inline bool v_check_any(const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) - return true; - return false; -} - -template inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, - const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - typedef V_TypeTraits<_Tp> Traits; - typedef typename Traits::int_type int_type; - v_reg<_Tp, n> c; - for( int i = 0; i < n; i++ ) - { - int_type m = Traits::reinterpret_int(mask.s[i]); - CV_DbgAssert(m == 0 || m == (~(int_type)0)); // restrict mask values: 0 or 0xff/0xffff/etc - c.s[i] = m ? a.s[i] : b.s[i]; - } - return c; -} - -template inline void v_expand(const v_reg<_Tp, n>& a, - v_reg::w_type, n/2>& b0, - v_reg::w_type, n/2>& b1) -{ - for( int i = 0; i < (n/2); i++ ) - { - b0.s[i] = a.s[i]; - b1.s[i] = a.s[i+(n/2)]; - } -} - -template -inline v_reg::w_type, n/2> -v_expand_low(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i]; - return b; -} - -template -inline v_reg::w_type, n/2> -v_expand_high(const v_reg<_Tp, n>& a) -{ - v_reg::w_type, n/2> b; - for( int i = 0; i < (n/2); i++ ) - b.s[i] = a.s[i+(n/2)]; - return b; -} - -template inline v_reg::int_type, n> - v_reinterpret_as_int(const v_reg<_Tp, n>& a) -{ - v_reg::int_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); - return c; -} - -template inline v_reg::uint_type, n> - v_reinterpret_as_uint(const v_reg<_Tp, n>& a) -{ - v_reg::uint_type, n> c; - for( int i = 0; i < n; i++ ) - c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); - return c; -} - -template inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, - v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) -{ - int i; - for( i = 0; i < n/2; i++ ) - { - b0.s[i*2] = a0.s[i]; - b0.s[i*2+1] = a1.s[i]; - } - for( ; i < n; i++ ) - { - b1.s[i*2-n] = a0.s[i]; - b1.s[i*2-n+1] = a1.s[i]; - } -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load(const _Tp* ptr) -{ - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_aligned(const _Tp* ptr) -{ - return v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128>(ptr); -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_low(const _Tp* ptr) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template -inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_load_halves(const _Tp* loptr, const _Tp* hiptr) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for( int i = 0; i < c.nlanes/2; i++ ) - { - c.s[i] = loptr[i]; - c.s[i+c.nlanes/2] = hiptr[i]; - } - return c; -} - -template -inline v_reg::w_type, V_TypeTraits<_Tp>::nlanes128 / 2> -v_load_expand(const _Tp* ptr) -{ - typedef typename V_TypeTraits<_Tp>::w_type w_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template -inline v_reg::q_type, V_TypeTraits<_Tp>::nlanes128 / 4> -v_load_expand_q(const _Tp* ptr) -{ - typedef typename V_TypeTraits<_Tp>::q_type q_type; - v_reg::nlanes128> c; - for( int i = 0; i < c.nlanes; i++ ) - { - c.s[i] = ptr[i]; - } - return c; -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b) -{ - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - a.s[i] = ptr[i2]; - b.s[i] = ptr[i2+1]; - } -} - -template inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) -{ - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - a.s[i] = ptr[i3]; - b.s[i] = ptr[i3+1]; - c.s[i] = ptr[i3+2]; - } -} - -template -inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, - v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, - v_reg<_Tp, n>& d) -{ - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - a.s[i] = ptr[i4]; - b.s[i] = ptr[i4+1]; - c.s[i] = ptr[i4+2]; - d.s[i] = ptr[i4+3]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ - int i, i2; - for( i = i2 = 0; i < n; i++, i2 += 2 ) - { - ptr[i2] = a.s[i]; - ptr[i2+1] = b.s[i]; - } -} - -template -inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ - int i, i3; - for( i = i3 = 0; i < n; i++, i3 += 3 ) - { - ptr[i3] = a.s[i]; - ptr[i3+1] = b.s[i]; - ptr[i3+2] = c.s[i]; - } -} - -template inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, - const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, - const v_reg<_Tp, n>& d, - hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) -{ - int i, i4; - for( i = i4 = 0; i < n; i++, i4 += 4 ) - { - ptr[i4] = a.s[i]; - ptr[i4+1] = b.s[i]; - ptr[i4+2] = c.s[i]; - ptr[i4+3] = d.s[i]; - } -} - -template -inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < (n/2); i++ ) - ptr[i] = a.s[i+(n/2)]; -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/) -{ - for( int i = 0; i < n; i++ ) - ptr[i] = a.s[i]; -} - -template -inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i]; - c.s[i+(n/2)] = b.s[i]; - } - return c; -} - -template -inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> c; - for( int i = 0; i < (n/2); i++ ) - { - c.s[i] = a.s[i+(n/2)]; - c.s[i+(n/2)] = b.s[i+(n/2)]; - } - return c; -} - -template -inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, - v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) -{ - for( int i = 0; i < (n/2); i++ ) - { - low.s[i] = a.s[i]; - low.s[i+(n/2)] = b.s[i]; - high.s[i] = a.s[i+(n/2)]; - high.s[i+(n/2)] = b.s[i+(n/2)]; - } -} - -template -inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - v_reg<_Tp, n> r; - const int shift = n - s; - int i = 0; - for (; i < shift; ++i) - r.s[i] = a.s[i+s]; - for (; i < n; ++i) - r.s[i] = b.s[i-shift]; - return r; -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvRound(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = cvRound(b.s[i]); - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvFloor(a.s[i]); - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = cvCeil(a.s[i]); - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (int)(a.s[i]); - return c; -} - -template inline v_reg v_round(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvRound(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_floor(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvFloor(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_ceil(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = cvCeil(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_trunc(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (int)(a.s[i]); - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = (float)a.s[i]; - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = 0; - } - return c; -} - -template inline v_reg v_cvt_f32(const v_reg& a, const v_reg& b) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - { - c.s[i] = (float)a.s[i]; - c.s[i+n] = (float)b.s[i]; - } - return c; -} - -inline v_float64x2 v_cvt_f64(const v_int32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i+2]; - return c; -} - -inline v_float64x2 v_cvt_f64(const v_float32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i+2]; - return c; -} - -inline v_float64x2 v_cvt_f64(const v_int64x2& a) -{ - v_float64x2 c; - for( int i = 0; i < 2; i++ ) - c.s[i] = (double)a.s[i]; - return c; -} - -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i]]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_pairs(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 2] + i % 2]; - return c; -} -template inline v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> v_lut_quads(const _Tp* tab, const int* idx) -{ - v_reg<_Tp, V_TypeTraits<_Tp>::nlanes128> c; - for (int i = 0; i < V_TypeTraits<_Tp>::nlanes128; i++) - c.s[i] = tab[idx[i / 4] + i % 4]; - return c; -} - -template inline v_reg v_lut(const int* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const unsigned* tab, const v_reg& idx) -{ - v_reg c; - for (int i = 0; i < n; i++) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const float* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline v_reg v_lut(const double* tab, const v_reg& idx) -{ - v_reg c; - for( int i = 0; i < n; i++ ) - c.s[i] = tab[idx.s[i]]; - return c; -} - -template inline void v_lut_deinterleave(const float* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline void v_lut_deinterleave(const double* tab, const v_reg& idx, - v_reg& x, v_reg& y) -{ - for( int i = 0; i < n; i++ ) - { - int j = idx.s[i]; - x.s[i] = tab[j]; - y.s[i] = tab[j+1]; - } -} - -template inline v_reg<_Tp, n> v_interleave_pairs(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[4*i ] = vec.s[4*i ]; - c.s[4*i+1] = vec.s[4*i+2]; - c.s[4*i+2] = vec.s[4*i+1]; - c.s[4*i+3] = vec.s[4*i+3]; - } - return c; -} - -template inline v_reg<_Tp, n> v_interleave_quads(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/8; i++) - { - c.s[8*i ] = vec.s[8*i ]; - c.s[8*i+1] = vec.s[8*i+4]; - c.s[8*i+2] = vec.s[8*i+1]; - c.s[8*i+3] = vec.s[8*i+5]; - c.s[8*i+4] = vec.s[8*i+2]; - c.s[8*i+5] = vec.s[8*i+6]; - c.s[8*i+6] = vec.s[8*i+3]; - c.s[8*i+7] = vec.s[8*i+7]; - } - return c; -} - -template inline v_reg<_Tp, n> v_pack_triplets(const v_reg<_Tp, n>& vec) -{ - v_reg<_Tp, n> c; - for (int i = 0; i < n/4; i++) - { - c.s[3*i ] = vec.s[4*i ]; - c.s[3*i+1] = vec.s[4*i+1]; - c.s[3*i+2] = vec.s[4*i+2]; - } - return c; -} - -template -inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, - const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, - v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, - v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) -{ - b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); - b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); - b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); - b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); -} - -#define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } - -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ -inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } - -OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ -template inline _Tpvec \ - v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ -{ return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } - -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) -OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) -OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) - -#define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ -template inline _Tpvec v_shl(const _Tpvec& a) \ -{ return a << n; } - -OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_shr(const _Tpvec& a) \ -{ return a >> n; } - -OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ -template inline _Tpvec v_rshr(const _Tpvec& a) \ -{ \ - _Tpvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) -OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) -OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) - -#define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>(a.s[i]); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>(b.s[i]); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ -{ \ - _Tpnvec c; \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - { \ - c.s[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - c.s[i+_Tpvec::nlanes] = cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ - } \ - return c; \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>(a.s[i]); \ -} - -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -#define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix, cast) \ -template inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ -{ \ - for( int i = 0; i < _Tpvec::nlanes; i++ ) \ - ptr[i] = cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ -} - -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack, static_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u, saturate_cast) -OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u, saturate_cast) - -template -inline void _pack_b(_Tpm* mptr, const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) -{ - for (int i = 0; i < n; ++i) - { - mptr[i] = (_Tpm)a.s[i]; - mptr[i + n] = (_Tpm)b.s[i]; - } -} - -inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - return mask; -} - -inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, - const v_uint32x4& c, const v_uint32x4& d) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 8, c, d); - return mask; -} - -inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, - const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, - const v_uint64x2& g, const v_uint64x2& h) -{ - v_uint8x16 mask; - _pack_b(mask.s, a, b); - _pack_b(mask.s + 4, c, d); - _pack_b(mask.s + 8, e, f); - _pack_b(mask.s + 12, g, h); - return mask; -} - -inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) -{ - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); -} - -inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, - const v_float32x4& m1, const v_float32x4& m2, - const v_float32x4& m3) -{ - return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + m3.s[0], - v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + m3.s[1], - v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + m3.s[2], - v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + m3.s[3]); -} - -inline v_reg::nlanes128> -v_load_expand(const float16_t* ptr) -{ - v_reg::nlanes128> v; - for( int i = 0; i < v.nlanes; i++ ) - { - v.s[i] = ptr[i]; - } - return v; -} - -inline void -v_pack_store(float16_t* ptr, const v_reg::nlanes128>& v) -{ - for( int i = 0; i < v.nlanes; i++ ) - { - ptr[i] = float16_t(v.s[i]); - } -} - -inline void v_cleanup() {} -} // namespace fallback +} // namespace static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); @@ -2554,14 +1176,6 @@ inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ return _Tpvec(intrin(a.val, b.val)); \ } -#define OPENCV_HAL_IMPL_WASM_BIN_FUNC_FALLBACK(_Tpvec, func, intrin) \ -inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ -{ \ - fallback::_Tpvec a_(a); \ - fallback::_Tpvec b_(b); \ - return _Tpvec(fallback::func(a_, b_)); \ -} - OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_min, wasm_f32x4_min) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float32x4, v_max, wasm_f32x4_max) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_float64x2, v_min, wasm_f64x2_min) @@ -2654,8 +1268,24 @@ OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint16x8, v_sub_wrap, wasm_i16x8_sub) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int16x8, v_sub_wrap, wasm_i16x8_sub) #if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) >= (2000000) // details: https://github.com/opencv/opencv/issues/18097 ( https://github.com/emscripten-core/emscripten/issues/12018 ) -OPENCV_HAL_IMPL_WASM_BIN_FUNC_FALLBACK(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) -OPENCV_HAL_IMPL_WASM_BIN_FUNC_FALLBACK(v_int8x16, v_mul_wrap, wasm_i8x16_mul) +inline v_uint8x16 v_mul_wrap(const v_uint8x16& a, const v_uint8x16& b) +{ + uchar a_[16], b_[16]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + for (int i = 0; i < 16; i++) + a_[i] = (uchar)(a_[i] * b_[i]); + return wasm_v128_load(a_); +} +inline v_int8x16 v_mul_wrap(const v_int8x16& a, const v_int8x16& b) +{ + schar a_[16], b_[16]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + for (int i = 0; i < 16; i++) + a_[i] = (schar)(a_[i] * b_[i]); + return wasm_v128_load(a_); +} #else OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_uint8x16, v_mul_wrap, wasm_i8x16_mul) OPENCV_HAL_IMPL_WASM_BIN_FUNC(v_int8x16, v_mul_wrap, wasm_i8x16_mul) @@ -2919,13 +1549,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - fallback::v_store_low(ptr, a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \ + ptr[i] = a_[i]; \ } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - fallback::v_store_high(ptr, a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + for (int i = 0; i < (_Tpvec::nlanes / 2); i++) \ + ptr[i] = a_[i + (_Tpvec::nlanes / 2)]; \ } OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_uint8x16, uchar) @@ -2991,8 +1625,12 @@ OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(v_float32x4, float, v128_t, f32x4, f32x4) #define OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(_Tpvec, scalartype) \ inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - return fallback::v_reduce_sum(a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + scalartype c = a_[0]; \ + for (int i = 1; i < _Tpvec::nlanes; i++) \ + c += a_[i]; \ + return c; \ } OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_uint8x16, unsigned) @@ -3116,8 +1754,11 @@ inline v_uint32x4 v_popcount(const v_uint32x4& a) } inline v_uint64x2 v_popcount(const v_uint64x2& a) { - fallback::v_uint64x2 a_(a); - return fallback::v_popcount(a_); + uint64 a_[2], b_[2] = { 0 }; + wasm_v128_store(a_, a.val); + for (int i = 0; i < 16; i++) + b_[i / 8] += popCountTable[((uint8*)a_)[i]]; + return wasm_v128_load(b_); } inline v_uint8x16 v_popcount(const v_int8x16& a) { return v_popcount(v_reinterpret_as_u8(a)); } @@ -3131,8 +1772,12 @@ inline v_uint64x2 v_popcount(const v_int64x2& a) #define OPENCV_HAL_IMPL_WASM_CHECK_SIGNS(_Tpvec, suffix, scalarType) \ inline int v_signmask(const _Tpvec& a) \ { \ - fallback::_Tpvec a_(a); \ - return fallback::v_signmask(a_); \ + _Tpvec::lane_type a_[_Tpvec::nlanes]; \ + wasm_v128_store(a_, a.val); \ + int mask = 0; \ + for (int i = 0; i < _Tpvec::nlanes; i++) \ + mask |= (reinterpret_int(a_[i]) < 0) << i; \ + return mask; \ } \ inline bool v_check_all(const _Tpvec& a) \ { return wasm_i8x16_all_true(wasm_##suffix##_lt(a.val, wasm_##suffix##_splat(0))); } \ @@ -3287,22 +1932,35 @@ inline v_int32x4 v_ceil(const v_float32x4& a) inline v_int32x4 v_trunc(const v_float32x4& a) { return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); } -#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \ -inline _Tpnvec func(const _Tpvec& a) \ +#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc) \ +inline v_int32x4 func(const v_float64x2& a) \ { \ - fallback::_Tpvec a_(a); \ - return fallback::func(a_); \ + double a_[2]; \ + wasm_v128_store(a_, a.val); \ + int c_[4]; \ + c_[0] = cfunc(a_[i]); \ + c_[1] = cfunc(a_[i]); \ + c_[2] = 0; \ + c_[3] = 0; \ + return wasm_v128_load(c_); \ } -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound, v_float64x2, v_int32x4, double, int) -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor, v_float64x2, v_int32x4, double, int) -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil, v_float64x2, v_int32x4, double, int) -OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int, v_float64x2, v_int32x4, double, int) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_round, cvRound) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_floor, cvFloor) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_ceil, cvCeil) +OPENCV_HAL_IMPL_WASM_MATH_FUNC(v_trunc, int) inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) { - fallback::v_float64x2 a_(a), b_(b); - return fallback::v_round(a_, b_); + double a_[2], b_[2]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + int c_[4]; + c_[0] = cvRound(a_[0]); + c_[1] = cvRound(a_[1]); + c_[2] = cvRound(b_[0]); + c_[3] = cvRound(b_[1]); + return wasm_v128_load(c_); } #define OPENCV_HAL_IMPL_WASM_TRANSPOSE4x4(_Tpvec, suffix) \ @@ -3796,14 +2454,27 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a) inline v_float32x4 v_cvt_f32(const v_float64x2& a) { - fallback::v_float64x2 a_(a); - return fallback::v_cvt_f32(a_); + double a_[2]; + wasm_v128_store(a_, a.val); + float c_[4]; + c_[0] = (float)(a_[0]); + c_[1] = (float)(a_[1]); + c_[2] = 0; + c_[3] = 0; + return wasm_v128_load(c_); } inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) { - fallback::v_float64x2 a_(a), b_(b); - return fallback::v_cvt_f32(a_, b_); + double a_[2], b_[2]; + wasm_v128_store(a_, a.val); + wasm_v128_store(b_, b.val); + float c_[4]; + c_[0] = (float)(a_[0]); + c_[1] = (float)(a_[1]); + c_[2] = (float)(b_[0]); + c_[3] = (float)(b_[1]); + return wasm_v128_load(c_); } inline v_float64x2 v_cvt_f64(const v_int32x4& a) @@ -3812,8 +2483,12 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a) v128_t p = v128_cvti32x4_i64x2(a.val); return v_float64x2(wasm_f64x2_convert_i64x2(p)); #else - fallback::v_int32x4 a_(a); - return fallback::v_cvt_f64(a_); + int a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return wasm_v128_load(c_); #endif } @@ -3823,21 +2498,33 @@ inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) v128_t p = v128_cvti32x4_i64x2_high(a.val); return v_float64x2(wasm_f64x2_convert_i64x2(p)); #else - fallback::v_int32x4 a_(a); - return fallback::v_cvt_f64_high(a_); + int a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[2]); + c_[1] = (double)(a_[3]); + return wasm_v128_load(c_); #endif } inline v_float64x2 v_cvt_f64(const v_float32x4& a) { - fallback::v_float32x4 a_(a); - return fallback::v_cvt_f64(a_); + float a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return wasm_v128_load(c_); } inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) { - fallback::v_float32x4 a_(a); - return fallback::v_cvt_f64_high(a_); + float a_[4]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[2]); + c_[1] = (double)(a_[3]); + return wasm_v128_load(c_); } inline v_float64x2 v_cvt_f64(const v_int64x2& a) @@ -3845,8 +2532,12 @@ inline v_float64x2 v_cvt_f64(const v_int64x2& a) #ifdef __wasm_unimplemented_simd128__ return v_float64x2(wasm_f64x2_convert_i64x2(a.val)); #else - fallback::v_int64x2 a_(a); - return fallback::v_cvt_f64(a_); + int64 a_[2]; + wasm_v128_store(a_, a.val); + double c_[2]; + c_[0] = (double)(a_[0]); + c_[1] = (double)(a_[1]); + return wasm_v128_load(c_); #endif } @@ -4063,13 +2754,20 @@ inline v_float32x4 v_broadcast_element(const v_float32x4& a) inline v_float32x4 v_load_expand(const float16_t* ptr) { - return fallback::v_load_expand(ptr); + float a[4]; + for (int i = 0; i < 4; i++) + a[i] = ptr[i]; + return wasm_v128_load(a); } inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - fallback::v_float32x4 v_(v); - fallback::v_pack_store(ptr, v_); + double v_[4]; + wasm_v128_store(v_, v.val); + ptr[0] = float16_t(v_[0]); + ptr[1] = float16_t(v_[1]); + ptr[2] = float16_t(v_[2]); + ptr[3] = float16_t(v_[3]); } inline void v_cleanup() {} diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 679055de4a..e704911c12 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -11,6 +11,11 @@ Implementation of Tensorflow models parser #include "../precomp.hpp" +#include +#undef CV_LOG_STRIP_LEVEL +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 +#include + #ifdef HAVE_PROTOBUF #include "tf_io.hpp" @@ -93,7 +98,7 @@ void blobShapeFromTensor(const tensorflow::TensorProto &tensor, MatShape& shape) shape[i] = (int)_shape.dim(i).size(); } else - shape.resize(1, 1); // Scalar. + shape.resize(1, 1); // Scalar. // FIXIT: should be empty } else { @@ -258,7 +263,7 @@ const tensorflow::AttrValue& getLayerAttr(const tensorflow::NodeDef &layer, cons return layer.attr().at(name); } -static int getDataLayout(const tensorflow::NodeDef& layer) +static DataLayout getDataLayout(const tensorflow::NodeDef& layer) { if (hasLayerAttr(layer, "data_format")) { @@ -280,10 +285,13 @@ static inline std::string getNodeName(const std::string& tensorName) return tensorName.substr(0, tensorName.rfind(':')); } -static inline int getDataLayout(const std::string& layerName, - const std::map& data_layouts) +static inline +DataLayout getDataLayout( + const std::string& layerName, + const std::map& data_layouts +) { - std::map::const_iterator it = data_layouts.find(getNodeName(layerName)); + std::map::const_iterator it = data_layouts.find(getNodeName(layerName)); return it != data_layouts.end() ? it->second : DATA_LAYOUT_UNKNOWN; } @@ -439,15 +447,20 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in net.mutable_node()->DeleteSubrange(layer_index, 1); } -class TFImporter { +class TFImporter +{ public: - TFImporter(const char *model, const char *config = NULL); - TFImporter(const char *dataModel, size_t lenModel, + TFImporter(Net& net, const char *model, const char *config = NULL); + TFImporter(Net& net, const char *dataModel, size_t lenModel, const char *dataConfig = NULL, size_t lenConfig = 0); +protected: + Net& dstNet; + void populateNet(); - void populateNet(Net dstNet); + void parseNode(const tensorflow::NodeDef& layer); + + DataLayout predictOutputDataLayout(const tensorflow::NodeDef& layer); -private: void kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob); void connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, @@ -467,23 +480,53 @@ private: std::vector netInputsNames; std::vector netInputShapes; + + std::set layers_to_ignore; + std::map data_layouts; + + // find all Const layers for params + std::map value_id; + // A map with constant blobs which are shared between multiple layers. + std::map sharedWeights; + + std::map layer_id; }; -TFImporter::TFImporter(const char *model, const char *config) +TFImporter::TFImporter(Net& net, const char *model, const char *config) + : dstNet(net) { if (model && model[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model); ReadTFNetParamsFromBinaryFileOrDie(model, &netBin); + } if (config && config[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config); ReadTFNetParamsFromTextFileOrDie(config, &netTxt); + } + + populateNet(); } -TFImporter::TFImporter(const char *dataModel, size_t lenModel, - const char *dataConfig, size_t lenConfig) +TFImporter::TFImporter( + Net& net, + const char *dataModel, size_t lenModel, + const char *dataConfig, size_t lenConfig +) + : dstNet(net) { if (dataModel != NULL && lenModel > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)"); ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin); + } if (dataConfig != NULL && lenConfig > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)"); ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt); + } + populateNet(); } void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) @@ -612,84 +655,98 @@ const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDe static void addConstNodes(tensorflow::GraphDef& net, std::map& const_layers, std::set& layers_to_ignore) { + CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes..."); for (int li = 0; li < net.node_size(); li++) { const tensorflow::NodeDef &layer = net.node(li); String name = layer.name(); String type = layer.op(); - if (type == "Dequantize") + //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type); + + try { - // Example of Dequantize node: - // name: "conv2d_1/bias" - // op: "Dequantize" - // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) - // input: "conv2d_1/bias_quantized_min" - // input: "conv2d_1/bias_quantized_max" - // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) - // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) - CV_Assert(layer.input_size() == 3); - for (int i = 0; i < 3; ++i) - CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); - CV_Assert(hasLayerAttr(layer, "mode") && - getLayerAttr(layer, "mode").s() == "MIN_FIRST"); - - int tensorId = const_layers[layer.input(0)]; - int minId = const_layers[layer.input(1)]; - int maxId = const_layers[layer.input(2)]; - - tensorflow::TensorProto* tensor = net.mutable_node(tensorId) - ->mutable_attr()->at("value") - .mutable_tensor(); - CV_Assert(tensor->dtype() == tensorflow::DT_QUINT8); - - Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); - Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); - CV_Assert_N(qMin.total() == 1, qMin.type() == CV_32FC1, - qMax.total() == 1, qMax.type() == CV_32FC1); - - Mat content = getTensorContent(*tensor); - - float minVal = qMin.at(0); - float rangeScale = (qMax.at(0) - minVal) / 255; - CV_Assert(rangeScale >= 0); - content.convertTo(content, CV_32FC1, rangeScale, - rangeScale * cvRound(minVal / rangeScale)); - - tensor->set_dtype(tensorflow::DT_FLOAT); - tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); - - net.mutable_node(tensorId)->set_name(name); - CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); + if (type == "Dequantize") + { + // Example of Dequantize node: + // name: "conv2d_1/bias" + // op: "Dequantize" + // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) + // input: "conv2d_1/bias_quantized_min" + // input: "conv2d_1/bias_quantized_max" + // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) + // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) + CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only"); + for (int i = 0; i < 3; ++i) + CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); + CV_Assert(hasLayerAttr(layer, "mode") && + getLayerAttr(layer, "mode").s() == "MIN_FIRST"); + + int tensorId = const_layers[layer.input(0)]; + int minId = const_layers[layer.input(1)]; + int maxId = const_layers[layer.input(2)]; + + tensorflow::TensorProto* tensor = net.mutable_node(tensorId) + ->mutable_attr()->at("value") + .mutable_tensor(); + CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, ""); + + Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); + Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); + CV_CheckEQ(qMin.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMin.type(), CV_32FC1, ""); + CV_CheckEQ(qMax.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMax.type(), CV_32FC1, ""); + + Mat content = getTensorContent(*tensor); + + float minVal = qMin.at(0); + float rangeScale = (qMax.at(0) - minVal) / 255; + CV_Assert(rangeScale >= 0); + content.convertTo(content, CV_32FC1, rangeScale, + rangeScale * cvRound(minVal / rangeScale)); + + tensor->set_dtype(tensorflow::DT_FLOAT); + tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); + + net.mutable_node(tensorId)->set_name(name); + CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); + layers_to_ignore.insert(name); + continue; + } + else if (type != "Const") + continue; // only Const parameters are supported + + if (layer.attr().find("value") != layer.attr().end()) + { + CV_Assert(const_layers.insert(std::make_pair(name, li)).second); + } layers_to_ignore.insert(name); - continue; } - else if (type != "Const") - continue; // only Const parameters are supported - - if (layer.attr().find("value") != layer.attr().end()) + catch (const std::exception& e) { - CV_Assert(const_layers.insert(std::make_pair(name, li)).second); + CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what()); + throw; } - layers_to_ignore.insert(name); } + CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size()); } // If all inputs of specific layer have the same data layout we can say that // this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. -static int predictOutputDataLayout(const tensorflow::GraphDef& net, - const tensorflow::NodeDef& layer, - const std::map& data_layouts) +DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) { - int layout = getDataLayout(layer); + DataLayout layout = getDataLayout(layer); if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); return layout; + } // Determine layout by layer's inputs - std::map::const_iterator it; for (int i = 0, n = layer.input_size(); i < n; ++i) { - it = data_layouts.find(getNodeName(layer.input(i))); + std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); if (it != data_layouts.end()) { if (layout != DATA_LAYOUT_UNKNOWN) @@ -703,71 +760,72 @@ static int predictOutputDataLayout(const tensorflow::GraphDef& net, } if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); return layout; + } // Determine layout by layer's consumers recursively. - it = data_layouts.find(layer.name()); + std::map::const_iterator it = data_layouts.find(layer.name()); CV_Assert(it != data_layouts.end()); return it->second; } -void TFImporter::populateNet(Net dstNet) +void TFImporter::populateNet() { - if (!netTxt.ByteSize()) - removePhaseSwitches(netBin); + CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); - RemoveIdentityOps(netBin); - RemoveIdentityOps(netTxt); + CV_LOG_INFO(NULL, "DNN/TF: parsing model" + << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netBin.node_size() + ); - if (!netTxt.ByteSize()) + if (netTxt.ByteSize()) { - simplifySubgraphs(netBin); - sortByExecutionOrder(netBin); + CV_LOG_INFO(NULL, "DNN/TF: parsing config" + << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netTxt.node_size() + ); + + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + RemoveIdentityOps(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes"); + + sortByExecutionOrder(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes"); } else { - sortByExecutionOrder(netTxt); - } + removePhaseSwitches(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes"); - std::set layers_to_ignore; + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + + simplifySubgraphs(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes"); + sortByExecutionOrder(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes"); + } tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; int layersSize = net.node_size(); - std::map data_layouts; // Pre-fill data layouts where they are set explicitly. // Assuming that nodes are in topological order - for (int i = net.node_size() - 1; i >= 0; --i) + for (int i = layersSize - 1; i >= 0; --i) { const tensorflow::NodeDef& layer = net.node(i); std::string name = layer.name(); - int layout = getDataLayout(layer); - std::map::iterator it = data_layouts.find(name); - if (it != data_layouts.end()) - { - if (layout != DATA_LAYOUT_UNKNOWN) - { - if (it->second == DATA_LAYOUT_UNKNOWN) - it->second = layout; - else if (it->second != layout) - { - it->second = DATA_LAYOUT_UNKNOWN; - layout = DATA_LAYOUT_UNKNOWN; - } - } - else - layout = it->second; - } - else - data_layouts[name] = layout; + CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout..."); - // Specify input layers to have the same data layout. - for (int j = 0; j < layer.input_size(); ++j) + try { - name = getNodeName(layer.input(j)); - it = data_layouts.find(name); + DataLayout layout = getDataLayout(layer); + std::map::iterator it = data_layouts.find(name); if (it != data_layouts.end()) { if (layout != DATA_LAYOUT_UNKNOWN) @@ -775,38 +833,94 @@ void TFImporter::populateNet(Net dstNet) if (it->second == DATA_LAYOUT_UNKNOWN) it->second = layout; else if (it->second != layout) + { it->second = DATA_LAYOUT_UNKNOWN; + layout = DATA_LAYOUT_UNKNOWN; + } } + else + layout = it->second; } else data_layouts[name] = layout; + + // Specify input layers to have the same data layout. + for (int j = 0; j < layer.input_size(); ++j) + { + name = getNodeName(layer.input(j)); + it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + it->second = DATA_LAYOUT_UNKNOWN; + } + } + else + data_layouts[name] = layout; + } + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what()); + throw; } } - // find all Const layers for params - std::map value_id; - // A map with constant blobs which are shared between multiple layers. - std::map sharedWeights; addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); - std::map layer_id; for (int li = 0; li < layersSize; li++) { - tensorflow::NodeDef layer = net.node(li); - String name = layer.name(); - String type = layer.op(); + const tensorflow::NodeDef& layer = net.node(li); + + const std::string name = layer.name(); + const std::string type = layer.op(); + const int ninputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + + parseNode(layer); + } + + for (size_t i = 0; i < netInputsNames.size(); i++) + { + CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'"); + CV_Assert(!netInputsNames[i].empty()); + } + dstNet.setInputsNames(netInputsNames); + CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); +} + +void TFImporter::parseNode(const tensorflow::NodeDef& layer_) +{ + tensorflow::NodeDef layer = layer_; + + tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + + /*const*/ std::string name = layer.name(); + /*const*/ std::string type = layer.op(); + /*const*/ int num_inputs = layer.input_size(); + + try + { LayerParams layerParams; - if(layers_to_ignore.find(name) != layers_to_ignore.end()) - continue; + if (layers_to_ignore.find(name) != layers_to_ignore.end()) + { + CV_LOG_DEBUG(NULL, "DNN/TF: ignored"); + return; + } - int predictedLayout = predictOutputDataLayout(net, layer, data_layouts); + DataLayout predictedLayout = predictOutputDataLayout(layer); data_layouts[name] = predictedLayout; if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "MirrorPad" || type == "Conv3D") { + CV_CheckGT(num_inputs, 0, ""); // The first node of dilated convolution subgraph. // Extract input node, dilation rate and paddings. std::string input = layer.input(0); @@ -824,7 +938,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input" // input: "SpaceToBatchND/block_shape" // input: "SpaceToBatchND/paddings" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); CV_Assert(dilation.size() == 2); @@ -839,10 +953,14 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("pad_w", paddings.at(2)); CV_Assert(next_layers.size() == 1); - layer = net.node(next_layers[0].second); layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); name = layer.name(); type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } else if (type == "Pad" || type == "MirrorPad") { @@ -876,7 +994,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; connect(layer_id, dstNet, parsePin(input), id, 0); - continue; + return; } else { @@ -886,10 +1004,14 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("pad_h", paddings.at(4)); layerParams.set("pad_w", paddings.at(6)); - layer = net.node(next_layers[0].second); layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); name = layer.name(); type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } } @@ -1011,13 +1133,14 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "BiasAdd" || type == "Add" || type == "AddV2" || type == "Sub" || type=="AddN") { + CV_CheckGT(num_inputs, 0, ""); bool haveConst = false; - for(int ii = 0; !haveConst && ii < layer.input_size(); ++ii) + for(int ii = 0; !haveConst && ii < num_inputs; ++ii) { Pin input = parsePin(layer.input(ii)); haveConst = value_id.find(input.name) != value_id.end(); } - CV_Assert(!haveConst || layer.input_size() == 2); + CV_Assert(!haveConst || num_inputs == 2); if (haveConst) { @@ -1054,7 +1177,7 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Eltwise", layerParams); layer_id[name] = id; - for (int ii = 0; ii < layer.input_size(); ii++) + for (int ii = 0; ii < num_inputs; ii++) { Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) @@ -1065,7 +1188,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "MatMul") { - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); // For the object detection networks, TensorFlow Object Detection API // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) @@ -1077,7 +1200,7 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("bias_term", false); layerParams.blobs.resize(1); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); + StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); // FIXIT Use layers fusion instead if (next_layers.empty()) { next_layers = getNextLayers(net, name, "Add"); @@ -1135,8 +1258,9 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Reshape") { + CV_CheckGT(num_inputs, 0, ""); Pin inpId = parsePin(layer.input(0)); - int inpLayout = getDataLayout(layer.input(0), data_layouts); + DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts); // There are two possible implementations: reshape an input using // predefined sizes or use a second input blob as a source of new shape. if (value_id.find(layer.input(1)) != value_id.end()) @@ -1185,6 +1309,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Flatten" || type == "Squeeze") { + CV_CheckGT(num_inputs, 0, ""); Pin inpId = parsePin(layer.input(0)); int inpLayout = getDataLayout(layer.input(0), data_layouts); if (type == "Squeeze") @@ -1231,6 +1356,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Transpose") { + CV_CheckGT(num_inputs, 0, ""); Mat perm = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(perm.type() == CV_32SC1); int* permData = (int*)perm.data; @@ -1304,6 +1430,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "LRN") { + CV_CheckGT(num_inputs, 0, ""); if(hasLayerAttr(layer, "alpha")) { layerParams.set("alpha", getLayerAttr(layer, "alpha").f()); } @@ -1322,11 +1449,12 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "LRN", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "Concat" || type == "ConcatV2") { - int axisId = (type == "Concat" ? 0 : layer.input_size() - 1); + CV_CheckGT(num_inputs, 0, ""); + int axisId = (type == "Concat" ? 0 : num_inputs - 1); int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) @@ -1337,7 +1465,7 @@ void TFImporter::populateNet(Net dstNet) // input(0) or input(n-1) is concat_dim int from = (type == "Concat" ? 1 : 0); - int to = (type == "Concat" ? layer.input_size() : layer.input_size() - 1); + int to = (type == "Concat" ? num_inputs : num_inputs - 1); for (int ii = from; ii < to; ii++) { @@ -1370,6 +1498,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "MaxPool" || type == "MaxPool3D") { + CV_CheckGT(num_inputs, 0, ""); layerParams.set("pool", "max"); setKSize(layerParams, layer); @@ -1381,10 +1510,11 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "AvgPool" || type == "AvgPool3D") { + CV_CheckGT(num_inputs, 0, ""); layerParams.set("pool", "ave"); layerParams.set("ave_pool_padded_area", false); setKSize(layerParams, layer); @@ -1394,11 +1524,11 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "MaxPoolGrad") { - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); layerParams.set("pool_k_h", 0); layerParams.set("pool_k_w", 0); @@ -1457,7 +1587,7 @@ void TFImporter::populateNet(Net dstNet) // TODO: slicing input may be Const op // TODO: slicing kernels for convolutions - in current implementation it is impossible // TODO: add parsing num of slices parameter - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); // num_split // 1st blob is dims tensor int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); @@ -1480,7 +1610,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input_node" // input: "Slice/begin" // input: "Slice/size" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2)); CV_Assert_N(!begins.empty(), !sizes.empty()); @@ -1505,7 +1635,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "StridedSlice") { - CV_Assert(layer.input_size() == 4); + CV_CheckEQ(num_inputs, 4, ""); Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); @@ -1544,8 +1674,9 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Mul" || type == "RealDiv") { + CV_CheckGT(num_inputs, 0, ""); int constId = -1; - for(int ii = 0; ii < layer.input_size(); ++ii) + for(int ii = 0; ii < num_inputs; ++ii) { Pin input = parsePin(layer.input(ii)); if (value_id.find(input.name) != value_id.end()) @@ -1554,12 +1685,12 @@ void TFImporter::populateNet(Net dstNet) break; } } - CV_Assert((constId != -1) || (layer.input_size() == 2)); + CV_Assert((constId != -1) || (num_inputs == 2)); if (constId != -1) { // Multiplication by constant. - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); CV_Assert(scaleMat.type() == CV_32FC1); if (type == "RealDiv") @@ -1643,7 +1774,7 @@ void TFImporter::populateNet(Net dstNet) // Check if all the inputs have the same shape. bool equalInpShapes = true; MatShape outShape0; - for (int ii = 0; ii < layer.input_size() && !netInputShapes.empty(); ii++) + for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++) { Pin pin = parsePin(layer.input(ii)); int inpId = layer_id.find(pin.name)->second; @@ -1681,7 +1812,7 @@ void TFImporter::populateNet(Net dstNet) layer_id[name] = id; - for (int ii = 0; ii < layer.input_size(); ii++) + for (int ii = 0; ii < num_inputs; ii++) { Pin inp = parsePin(layer.input(ii)); if (layer_id.find(inp.name) == layer_id.end()) @@ -1698,9 +1829,7 @@ void TFImporter::populateNet(Net dstNet) // input: "BatchNorm/beta" // input: "BatchNorm/moving_mean" // input: "BatchNorm/moving_variance" - if (layer.input_size() != 5) - CV_Error(Error::StsNotImplemented, - "Expected gamma, beta, mean and std"); + CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std"); Pin inpId = parsePin(layer.input(0)); bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); @@ -1768,9 +1897,7 @@ void TFImporter::populateNet(Net dstNet) // input: "conv2d_transpose/output_shape" // input: "weights" // input: "input" - if (layer.input_size() != 3) - CV_Error(Error::StsNotImplemented, - "Expected output shape, weights and input nodes"); + CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); layerParams.set("bias_term", false); layerParams.blobs.resize(1); @@ -1845,8 +1972,7 @@ void TFImporter::populateNet(Net dstNet) // input: "lstm_block_wrapper/w_f_diag" // input: "lstm_block_wrapper/w_o_diag" // input: "lstm_block_wrapper/bias" - if (layer.input_size() != 9) - CV_Error(Error::StsNotImplemented, "Unexpected number of input nodes"); + CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes"); if (hasLayerAttr(layer, "forget_bias")) layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f()); @@ -1912,6 +2038,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear" || type == "FusedResizeAndPadConv2D") { + CV_CheckGT(num_inputs, 0, ""); std::string convWeights = ""; if (type == "FusedResizeAndPadConv2D") { @@ -1919,30 +2046,32 @@ void TFImporter::populateNet(Net dstNet) // input: "decoder/ResizeBilinear/size" // input: "decoder/decoder_conv0/Conv2D_dummy_paddings" // input: "decoder/decoder_conv0/weights" - CV_CheckEQ(layer.input_size(), 4, "Number of input for FusedResizeAndPadConv2D"); + CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D"); Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2)); CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode"); convWeights = layer.input(3); - layer.mutable_input()->DeleteSubrange(2, 2); + layer.mutable_input()->DeleteSubrange(2, 2); // FIXIT do NOT modify input model + num_inputs = layer.input_size(); name = name + "/resize"; if (hasLayerAttr(layer, "resize_align_corners")) { + // FIXIT do NOT modify input model layer.mutable_attr()->insert( ::google::protobuf::MapPair("align_corners", getLayerAttr(layer, "resize_align_corners"))); } } - if (layer.input_size() == 2) + if (num_inputs == 2) { Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, ""); layerParams.set("height", outSize.at(0, 0)); layerParams.set("width", outSize.at(0, 1)); } - else if (layer.input_size() == 3) + else if (num_inputs == 3) { Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); @@ -1952,7 +2081,7 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("zoom_factor_y", factorHeight.at(0)); } else - CV_Assert(layer.input_size() == 2 || layer.input_size() == 3); + CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, ""); if (type == "ResizeNearestNeighbor") layerParams.set("interpolation", "nearest"); @@ -1973,12 +2102,12 @@ void TFImporter::populateNet(Net dstNet) // Step back to add convolution if (type == "FusedResizeAndPadConv2D") { - tensorflow::NodeDef* conv = net.mutable_node(li); - conv->clear_input(); - conv->add_input(name); - conv->add_input(convWeights); - conv->set_op("Conv2D"); - li -= 1; + tensorflow::NodeDef conv = layer_; + conv.clear_input(); + conv.add_input(name); + conv.add_input(convWeights); + conv.set_op("Conv2D"); + parseNode(conv); } } else if (type == "L2Normalize") @@ -1986,7 +2115,7 @@ void TFImporter::populateNet(Net dstNet) // op: "L2Normalize" // input: "input" // input: "reduction_indices" (axis) - CV_Assert(layer.input_size() == 2); + CV_CheckEQ(num_inputs, 2, ""); Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(reductionIndices.type() == CV_32SC1); @@ -2011,6 +2140,7 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "PriorBox") { + CV_CheckEQ(num_inputs, 2, ""); if (hasLayerAttr(layer, "min_size")) layerParams.set("min_size", getLayerAttr(layer, "min_size").i()); if (hasLayerAttr(layer, "max_size")) @@ -2043,12 +2173,13 @@ void TFImporter::populateNet(Net dstNet) } else if (type == "Softmax") { + CV_CheckGT(num_inputs, 0, ""); if (hasLayerAttr(layer, "axis")) layerParams.set("axis", getLayerAttr(layer, "axis").i()); int id = dstNet.addLayer(name, "Softmax", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else if (type == "CropAndResize") { @@ -2056,7 +2187,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input" // input: "boxes" // input: "sizes" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2)); CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, ""); @@ -2084,6 +2215,7 @@ void TFImporter::populateNet(Net dstNet) // determine out shape: NxCxHxW --Slice--> 1xCxHxW // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape + CV_CheckGT(num_inputs, 0, ""); Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); CV_Assert(indices.type() == CV_32SC1); @@ -2218,6 +2350,7 @@ void TFImporter::populateNet(Net dstNet) // Example: given a list with "N" tensors of shape (C, H, W): // if axis == 0 then the output tensor will have the shape (N, C, H, W), // if axis == 1 then the output tensor will have the shape (C, N, H, W). + CV_CheckGT(num_inputs, 0, ""); CV_Assert(hasLayerAttr(layer, "axis")); int dim = (int)getLayerAttr(layer, "axis").i(); if (dim != 0) @@ -2225,7 +2358,7 @@ void TFImporter::populateNet(Net dstNet) CV_Assert(hasLayerAttr(layer, "N")); int num = (int)getLayerAttr(layer, "N").i(); - CV_Assert(layer.input_size() == num); + CV_CheckEQ(num_inputs, num, ""); std::string base_name = name + "/reshape_"; std::vector reshape_ids; for (int i = 0; i < num; i++) { @@ -2256,7 +2389,7 @@ void TFImporter::populateNet(Net dstNet) // input: "input" // input: "mix" // input: "max" - CV_Assert(layer.input_size() == 3); + CV_CheckEQ(num_inputs, 3, ""); Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1)); Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2)); @@ -2275,6 +2408,7 @@ void TFImporter::populateNet(Net dstNet) type == "Relu" || type == "Elu" || type == "Identity" || type == "Relu6") { + CV_CheckGT(num_inputs, 0, ""); std::string dnnType = type; if (type == "Abs") dnnType = "AbsVal"; else if (type == "Tanh") dnnType = "TanH"; @@ -2284,7 +2418,7 @@ void TFImporter::populateNet(Net dstNet) int id = dstNet.addLayer(name, dnnType, layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } else { @@ -2308,7 +2442,7 @@ void TFImporter::populateNet(Net dstNet) // All the Const input nodes are added to layer's blobs. std::vector inputsNames; - for (int i = 0; i < layer.input_size(); ++i) + for (int i = 0; i < num_inputs; ++i) { // Check if input is a Const node. if (value_id.find(layer.input(i)) != value_id.end()) @@ -2328,7 +2462,11 @@ void TFImporter::populateNet(Net dstNet) } } } - dstNet.setInputsNames(netInputsNames); + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "'. Exception: " << e.what()); + throw; + } } } // namespace @@ -2337,18 +2475,16 @@ void TFImporter::populateNet(Net dstNet) Net readNetFromTensorflow(const String &model, const String &config) { - TFImporter importer(model.c_str(), config.c_str()); Net net; - importer.populateNet(net); + TFImporter importer(net, model.c_str(), config.c_str()); return net; } Net readNetFromTensorflow(const char* bufferModel, size_t lenModel, const char* bufferConfig, size_t lenConfig) { - TFImporter importer(bufferModel, lenModel, bufferConfig, lenConfig); Net net; - importer.populateNet(net); + TFImporter importer(net, bufferModel, lenModel, bufferConfig, lenConfig); return net; } diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 533c2234c5..337c12826d 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -587,7 +587,7 @@ enum ColorConversionCodes { COLOR_YCrCb2BGR = 38, COLOR_YCrCb2RGB = 39, - COLOR_BGR2HSV = 40, //!< convert RGB/BGR to HSV (hue saturation value), @ref color_convert_rgb_hsv "color conversions" + COLOR_BGR2HSV = 40, //!< convert RGB/BGR to HSV (hue saturation value) with H range 0..180 if 8 bit image, @ref color_convert_rgb_hsv "color conversions" COLOR_RGB2HSV = 41, COLOR_BGR2Lab = 44, //!< convert RGB/BGR to CIE Lab, @ref color_convert_rgb_lab "color conversions" @@ -595,27 +595,27 @@ enum ColorConversionCodes { COLOR_BGR2Luv = 50, //!< convert RGB/BGR to CIE Luv, @ref color_convert_rgb_luv "color conversions" COLOR_RGB2Luv = 51, - COLOR_BGR2HLS = 52, //!< convert RGB/BGR to HLS (hue lightness saturation), @ref color_convert_rgb_hls "color conversions" + COLOR_BGR2HLS = 52, //!< convert RGB/BGR to HLS (hue lightness saturation) with H range 0..180 if 8 bit image, @ref color_convert_rgb_hls "color conversions" COLOR_RGB2HLS = 53, - COLOR_HSV2BGR = 54, //!< backward conversions to RGB/BGR + COLOR_HSV2BGR = 54, //!< backward conversions HSV to RGB/BGR with H range 0..180 if 8 bit image COLOR_HSV2RGB = 55, COLOR_Lab2BGR = 56, COLOR_Lab2RGB = 57, COLOR_Luv2BGR = 58, COLOR_Luv2RGB = 59, - COLOR_HLS2BGR = 60, + COLOR_HLS2BGR = 60, //!< backward conversions HLS to RGB/BGR with H range 0..180 if 8 bit image COLOR_HLS2RGB = 61, - COLOR_BGR2HSV_FULL = 66, + COLOR_BGR2HSV_FULL = 66, //!< convert RGB/BGR to HSV (hue saturation value) with H range 0..255 if 8 bit image, @ref color_convert_rgb_hsv "color conversions" COLOR_RGB2HSV_FULL = 67, - COLOR_BGR2HLS_FULL = 68, + COLOR_BGR2HLS_FULL = 68, //!< convert RGB/BGR to HLS (hue lightness saturation) with H range 0..255 if 8 bit image, @ref color_convert_rgb_hls "color conversions" COLOR_RGB2HLS_FULL = 69, - COLOR_HSV2BGR_FULL = 70, + COLOR_HSV2BGR_FULL = 70, //!< backward conversions HSV to RGB/BGR with H range 0..255 if 8 bit image COLOR_HSV2RGB_FULL = 71, - COLOR_HLS2BGR_FULL = 72, + COLOR_HLS2BGR_FULL = 72, //!< backward conversions HLS to RGB/BGR with H range 0..255 if 8 bit image COLOR_HLS2RGB_FULL = 73, COLOR_LBGR2Lab = 74, diff --git a/samples/python/tutorial_code/video/background_subtraction/bg_sub.py b/samples/python/tutorial_code/video/background_subtraction/bg_sub.py index 15330fc8b0..1bf3d2fdd8 100644 --- a/samples/python/tutorial_code/video/background_subtraction/bg_sub.py +++ b/samples/python/tutorial_code/video/background_subtraction/bg_sub.py @@ -18,7 +18,7 @@ else: ## [capture] capture = cv.VideoCapture(cv.samples.findFileOrKeep(args.input)) -if not capture.isOpened: +if not capture.isOpened(): print('Unable to open: ' + args.input) exit(0) ## [capture]