Merge remote-tracking branch 'upstream/3.4' into merge-3.4

4 years ago · 3e1673e8b2
parent 174753921d 5340dc6686
commit 3e1673e8b2
22 changed files with 1152 additions and 220 deletions
--- a/cmake/OpenCVFindIPP.cmake
+++ b/cmake/OpenCVFindIPP.cmake
@ -143,10 +143,25 @@ macro(ipp_detect_version)
        list(APPEND IPP_LIBRARIES ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX})
      else ()
        add_library(ipp${name} STATIC IMPORTED)
+        set(_filename "${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}")
        set_target_properties(ipp${name} PROPERTIES
          IMPORTED_LINK_INTERFACE_LIBRARIES ""
-          IMPORTED_LOCATION ${IPP_LIBRARY_DIR}/${IPP_LIB_PREFIX}${IPP_PREFIX}${name}${IPP_SUFFIX}${IPP_LIB_SUFFIX}
+          IMPORTED_LOCATION ${IPP_LIBRARY_DIR}/${_filename}
        )
+        if("${name}" STREQUAL "core")  # https://github.com/opencv/opencv/pull/19681
+          if(OPENCV_FORCE_IPP_EXCLUDE_LIBS OR OPENCV_FORCE_IPP_EXCLUDE_LIBS_CORE
+              OR (UNIX AND NOT ANDROID AND NOT APPLE
+                  AND (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+              )
+              AND NOT OPENCV_SKIP_IPP_EXCLUDE_LIBS_CORE
+          )
+            if(CMAKE_VERSION VERSION_LESS "3.13.0")
+              set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--exclude-libs,${_filename} ${CMAKE_SHARED_LINKER_FLAGS}")
+            else()
+              target_link_options(ipp${name} INTERFACE "LINKER:--exclude-libs,${_filename}")
+            endif()
+          endif()
+        endif()
        list(APPEND IPP_LIBRARIES ipp${name})
        if (NOT BUILD_SHARED_LIBS AND (HAVE_IPP_ICV OR ";${OPENCV_INSTALL_EXTERNAL_DEPENDENCIES};" MATCHES ";ipp;"))
          # CMake doesn't support "install(TARGETS ${IPP_PREFIX}${name} " command with imported targets
--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@ -879,7 +879,9 @@ endmacro()
 macro(_ocv_create_module)

  ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
-  set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
+  set(__module_headers ${OPENCV_MODULE_${the_module}_HEADERS})
+  list(SORT __module_headers)  # fix headers order, useful for bindings
+  set(OPENCV_MODULE_${the_module}_HEADERS ${__module_headers} CACHE INTERNAL "List of header files for ${the_module}")
  set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")

  # The condition we ought to be testing here is whether ocv_add_precompiled_headers will
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@ -255,6 +255,12 @@ PREDEFINED             = __cplusplus=1 \
                         CV_DEFAULT(x)=" = x" \
                         CV_NEON=1 \
                         CV_SSE2=1 \
+                         CV_SIMD128=1 \
+                         CV_SIMD256=1 \
+                         CV_SIMD512=1 \
+                         CV_SIMD128_64F=1 \
+                         CV_SIMD256_64F=1 \
+                         CV_SIMD512_64F=1 \
                         CV__DEBUG_NS_BEGIN= \
                         CV__DEBUG_NS_END= \
                         CV_DEPRECATED_EXTERNAL= \
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@ -104,7 +104,7 @@ template<typename _Tp> struct V_TypeTraits
 {
 };

-#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_, nlanes128_) \
+#define CV_INTRIN_DEF_TYPE_TRAITS(type, int_type_, uint_type_, abs_type_, w_type_, q_type_, sum_type_) \
    template<> struct V_TypeTraits<type> \
    { \
        typedef type value_type; \
@ -114,7 +114,6 @@ template<typename _Tp> struct V_TypeTraits
        typedef w_type_ w_type; \
        typedef q_type_ q_type; \
        typedef sum_type_ sum_type; \
-        enum { nlanes128 = nlanes128_ }; \
    \
        static inline int_type reinterpret_int(type x) \
        { \
@ -131,7 +130,7 @@ template<typename _Tp> struct V_TypeTraits
        } \
    }

-#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_, nlanes128_) \
+#define CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(type, int_type_, uint_type_, abs_type_, w_type_, sum_type_) \
    template<> struct V_TypeTraits<type> \
    { \
        typedef type value_type; \
@ -140,7 +139,6 @@ template<typename _Tp> struct V_TypeTraits
        typedef uint_type_ uint_type; \
        typedef w_type_ w_type; \
        typedef sum_type_ sum_type; \
-        enum { nlanes128 = nlanes128_ }; \
    \
        static inline int_type reinterpret_int(type x) \
        { \
@ -157,16 +155,16 @@ template<typename _Tp> struct V_TypeTraits
        } \
    }

-CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned, 16);
-CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int, 16);
-CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned, 8);
-CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int, 8);
-CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned, 4);
-CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int, 4);
-CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float, 4);
-CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64, 2);
-CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64, 2);
-CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double, 2);
+CV_INTRIN_DEF_TYPE_TRAITS(uchar, schar, uchar, uchar, ushort, unsigned, unsigned);
+CV_INTRIN_DEF_TYPE_TRAITS(schar, schar, uchar, uchar, short, int, int);
+CV_INTRIN_DEF_TYPE_TRAITS(ushort, short, ushort, ushort, unsigned, uint64, unsigned);
+CV_INTRIN_DEF_TYPE_TRAITS(short, short, ushort, ushort, int, int64, int);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(unsigned, int, unsigned, unsigned, uint64, unsigned);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int, int, unsigned, unsigned, int64, int);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(float, int, unsigned, float, double, float);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(uint64, int64, uint64, uint64, void, uint64);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(int64, int64, uint64, uint64, void, int64);
+CV_INTRIN_DEF_TYPE_TRAITS_NO_Q_TYPE(double, int64, uint64, double, void, double);

 #ifndef CV_DOXYGEN

@ -314,54 +312,6 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN

 //==================================================================================================

-#define CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
-    inline vtyp vx_setall_##short_typ(typ v) { return prefix##_setall_##short_typ(v); } \
-    inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
-    inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
-    inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
-    inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
-    inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
-    inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
-    inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); } \
-    inline vtyp vx_lut(const typ* ptr, const int* idx) { return prefix##_lut(ptr, idx); } \
-    inline vtyp vx_lut_pairs(const typ* ptr, const int* idx) { return prefix##_lut_pairs(ptr, idx); }
-
-#define CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
-    inline vtyp vx_lut_quads(const typ* ptr, const int* idx) { return prefix##_lut_quads(ptr, idx); }
-
-#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
-    inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
-
-#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
-    inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
-
-#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
-    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(typ, vtyp, prefix) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix)
-
-#define CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(prefix) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(uchar, v_uint8, u8, v_uint16, v_uint32, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(schar, v_int8, s8, v_int16, v_int32, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(ushort, v_uint16, u16, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(ushort, v_uint16, prefix) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(ushort, v_uint32, prefix) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(short, v_int16, s16, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(short, v_int16, prefix) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(short, v_int32, prefix) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(int, v_int32, s32, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(int, v_int32, prefix) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(int, v_int64, prefix) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(unsigned, v_uint32, u32, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(unsigned, v_uint32, prefix) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(unsigned, v_uint64, prefix) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(float, v_float32, f32, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_LUT_QUAD(float, v_float32, prefix) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(int64, v_int64, s64, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_INTRIN(uint64, v_uint64, u64, prefix, load) \
-    CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(float16_t, v_float32, prefix)
-
 template<typename _Tp> struct V_RegTraits
 {
 };
@ -421,6 +371,7 @@ template<typename _Tp> struct V_RegTraits
    CV_DEF_REG_TRAITS(v512, v_int64x8, int64, s64, v_uint64x8, void, void, v_int64x8, void);
    CV_DEF_REG_TRAITS(v512, v_float64x8, double, f64, v_float64x8, void, void, v_int64x8, v_int32x16);
 #endif
+//! @endcond

 #if CV_SIMD512 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 512)
 #define CV__SIMD_NAMESPACE simd512
@ -429,21 +380,33 @@ namespace CV__SIMD_NAMESPACE {
    #define CV_SIMD_64F CV_SIMD512_64F
    #define CV_SIMD_FP16 CV_SIMD512_FP16
    #define CV_SIMD_WIDTH 64
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @brief Maximum available vector register capacity 8-bit unsigned integer values
    typedef v_uint8x64    v_uint8;
+    //! @brief Maximum available vector register capacity 8-bit signed integer values
    typedef v_int8x64     v_int8;
+    //! @brief Maximum available vector register capacity 16-bit unsigned integer values
    typedef v_uint16x32   v_uint16;
+    //! @brief Maximum available vector register capacity 16-bit signed integer values
    typedef v_int16x32    v_int16;
+    //! @brief Maximum available vector register capacity 32-bit unsigned integer values
    typedef v_uint32x16   v_uint32;
+    //! @brief Maximum available vector register capacity 32-bit signed integer values
    typedef v_int32x16    v_int32;
+    //! @brief Maximum available vector register capacity 64-bit unsigned integer values
    typedef v_uint64x8    v_uint64;
+    //! @brief Maximum available vector register capacity 64-bit signed integer values
    typedef v_int64x8     v_int64;
+    //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
    typedef v_float32x16  v_float32;
-    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v512)
-#if CV_SIMD512_64F
+    #if CV_SIMD512_64F
+    //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
    typedef v_float64x8   v_float64;
-    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v512, load)
-#endif
-        inline void vx_cleanup() { v512_cleanup(); }
+    #endif
+//! @}
+
+    #define VXPREFIX(func) v512##func
 } // namespace
 using namespace CV__SIMD_NAMESPACE;
 #elif CV_SIMD256 && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 256)
@ -453,21 +416,33 @@ namespace CV__SIMD_NAMESPACE {
    #define CV_SIMD_64F CV_SIMD256_64F
    #define CV_SIMD_FP16 CV_SIMD256_FP16
    #define CV_SIMD_WIDTH 32
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @brief Maximum available vector register capacity 8-bit unsigned integer values
    typedef v_uint8x32   v_uint8;
+    //! @brief Maximum available vector register capacity 8-bit signed integer values
    typedef v_int8x32    v_int8;
+    //! @brief Maximum available vector register capacity 16-bit unsigned integer values
    typedef v_uint16x16  v_uint16;
+    //! @brief Maximum available vector register capacity 16-bit signed integer values
    typedef v_int16x16   v_int16;
+    //! @brief Maximum available vector register capacity 32-bit unsigned integer values
    typedef v_uint32x8   v_uint32;
+    //! @brief Maximum available vector register capacity 32-bit signed integer values
    typedef v_int32x8    v_int32;
+    //! @brief Maximum available vector register capacity 64-bit unsigned integer values
    typedef v_uint64x4   v_uint64;
+    //! @brief Maximum available vector register capacity 64-bit signed integer values
    typedef v_int64x4    v_int64;
+    //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
    typedef v_float32x8  v_float32;
-    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
    #if CV_SIMD256_64F
+    //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
    typedef v_float64x4  v_float64;
-    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
    #endif
-    inline void vx_cleanup() { v256_cleanup(); }
+//! @}
+
+    #define VXPREFIX(func) v256##func
 } // namespace
 using namespace CV__SIMD_NAMESPACE;
 #elif (CV_SIMD128 || CV_SIMD128_CPP) && (!defined(CV__SIMD_FORCE_WIDTH) || CV__SIMD_FORCE_WIDTH == 128)
@ -480,25 +455,228 @@ namespace CV__SIMD_NAMESPACE {
    #define CV_SIMD CV_SIMD128
    #define CV_SIMD_64F CV_SIMD128_64F
    #define CV_SIMD_WIDTH 16
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @brief Maximum available vector register capacity 8-bit unsigned integer values
    typedef v_uint8x16  v_uint8;
+    //! @brief Maximum available vector register capacity 8-bit signed integer values
    typedef v_int8x16   v_int8;
+    //! @brief Maximum available vector register capacity 16-bit unsigned integer values
    typedef v_uint16x8  v_uint16;
+    //! @brief Maximum available vector register capacity 16-bit signed integer values
    typedef v_int16x8   v_int16;
+    //! @brief Maximum available vector register capacity 32-bit unsigned integer values
    typedef v_uint32x4  v_uint32;
+    //! @brief Maximum available vector register capacity 32-bit signed integer values
    typedef v_int32x4   v_int32;
+    //! @brief Maximum available vector register capacity 64-bit unsigned integer values
    typedef v_uint64x2  v_uint64;
+    //! @brief Maximum available vector register capacity 64-bit signed integer values
    typedef v_int64x2   v_int64;
+    //! @brief Maximum available vector register capacity 32-bit floating point values (single precision)
    typedef v_float32x4 v_float32;
-    CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v)
    #if CV_SIMD128_64F
+    //! @brief Maximum available vector register capacity 64-bit floating point values (double precision)
    typedef v_float64x2 v_float64;
-    CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v, load)
    #endif
-    inline void vx_cleanup() { v_cleanup(); }
+//! @}
+
+    #define VXPREFIX(func) v##func
 } // namespace
 using namespace CV__SIMD_NAMESPACE;
 #endif

+namespace CV__SIMD_NAMESPACE {
+//! @addtogroup core_hal_intrin
+//! @{
+    //! @name Wide init with value
+    //! @{
+    //! @brief Create maximum available capacity vector with elements set to a specific value
+    inline v_uint8 vx_setall_u8(uchar v) { return VXPREFIX(_setall_u8)(v); }
+    inline v_int8 vx_setall_s8(schar v) { return VXPREFIX(_setall_s8)(v); }
+    inline v_uint16 vx_setall_u16(ushort v) { return VXPREFIX(_setall_u16)(v); }
+    inline v_int16 vx_setall_s16(short v) { return VXPREFIX(_setall_s16)(v); }
+    inline v_int32 vx_setall_s32(int v) { return VXPREFIX(_setall_s32)(v); }
+    inline v_uint32 vx_setall_u32(unsigned v) { return VXPREFIX(_setall_u32)(v); }
+    inline v_float32 vx_setall_f32(float v) { return VXPREFIX(_setall_f32)(v); }
+    inline v_int64 vx_setall_s64(int64 v) { return VXPREFIX(_setall_s64)(v); }
+    inline v_uint64 vx_setall_u64(uint64 v) { return VXPREFIX(_setall_u64)(v); }
+#if CV_SIMD_64F
+    inline v_float64 vx_setall_f64(double v) { return VXPREFIX(_setall_f64)(v); }
+#endif
+    //! @}
+
+    //! @name Wide init with zero
+    //! @{
+    //! @brief Create maximum available capacity vector with elements set to zero
+    inline v_uint8 vx_setzero_u8() { return VXPREFIX(_setzero_u8)(); }
+    inline v_int8 vx_setzero_s8() { return VXPREFIX(_setzero_s8)(); }
+    inline v_uint16 vx_setzero_u16() { return VXPREFIX(_setzero_u16)(); }
+    inline v_int16 vx_setzero_s16() { return VXPREFIX(_setzero_s16)(); }
+    inline v_int32 vx_setzero_s32() { return VXPREFIX(_setzero_s32)(); }
+    inline v_uint32 vx_setzero_u32() { return VXPREFIX(_setzero_u32)(); }
+    inline v_float32 vx_setzero_f32() { return VXPREFIX(_setzero_f32)(); }
+    inline v_int64 vx_setzero_s64() { return VXPREFIX(_setzero_s64)(); }
+    inline v_uint64 vx_setzero_u64() { return VXPREFIX(_setzero_u64)(); }
+#if CV_SIMD_64F
+    inline v_float64 vx_setzero_f64() { return VXPREFIX(_setzero_f64)(); }
+#endif
+    //! @}
+
+    //! @name Wide load from memory
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory
+    inline v_uint8 vx_load(const uchar * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int8 vx_load(const schar * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_uint16 vx_load(const ushort * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int16 vx_load(const short * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int32 vx_load(const int * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_uint32 vx_load(const unsigned * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_float32 vx_load(const float * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_int64 vx_load(const int64 * ptr) { return VXPREFIX(_load)(ptr); }
+    inline v_uint64 vx_load(const uint64 * ptr) { return VXPREFIX(_load)(ptr); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load(const double * ptr) { return VXPREFIX(_load)(ptr); }
+#endif
+    //! @}
+
+    //! @name Wide load from memory(aligned)
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory(aligned)
+    inline v_uint8 vx_load_aligned(const uchar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int8 vx_load_aligned(const schar * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_uint16 vx_load_aligned(const ushort * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int16 vx_load_aligned(const short * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int32 vx_load_aligned(const int * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_uint32 vx_load_aligned(const unsigned * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_float32 vx_load_aligned(const float * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_int64 vx_load_aligned(const int64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+    inline v_uint64 vx_load_aligned(const uint64 * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load_aligned(const double * ptr) { return VXPREFIX(_load_aligned)(ptr); }
+#endif
+    //! @}
+
+    //! @name Wide load lower half from memory
+    //! @{
+    //! @brief Load lower half of maximum available capacity register from memory
+    inline v_uint8 vx_load_low(const uchar * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int8 vx_load_low(const schar * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_uint16 vx_load_low(const ushort * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int16 vx_load_low(const short * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int32 vx_load_low(const int * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_uint32 vx_load_low(const unsigned * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_float32 vx_load_low(const float * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_int64 vx_load_low(const int64 * ptr) { return VXPREFIX(_load_low)(ptr); }
+    inline v_uint64 vx_load_low(const uint64 * ptr) { return VXPREFIX(_load_low)(ptr); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load_low(const double * ptr) { return VXPREFIX(_load_low)(ptr); }
+#endif
+    //! @}
+
+    //! @name Wide load halfs from memory
+    //! @{
+    //! @brief Load maximum available capacity register contents from two memory blocks
+    inline v_uint8 vx_load_halves(const uchar * ptr0, const uchar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int8 vx_load_halves(const schar * ptr0, const schar * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_uint16 vx_load_halves(const ushort * ptr0, const ushort * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int16 vx_load_halves(const short * ptr0, const short * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int32 vx_load_halves(const int * ptr0, const int * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_uint32 vx_load_halves(const unsigned * ptr0, const unsigned * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_float32 vx_load_halves(const float * ptr0, const float * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_int64 vx_load_halves(const int64 * ptr0, const int64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+    inline v_uint64 vx_load_halves(const uint64 * ptr0, const uint64 * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+#if CV_SIMD_64F
+    inline v_float64 vx_load_halves(const double * ptr0, const double * ptr1) { return VXPREFIX(_load_halves)(ptr0, ptr1); }
+#endif
+    //! @}
+
+    //! @name Wide LUT of elements
+    //! @{
+    //! @brief Load maximum available capacity register contents with array elements by provided indexes
+    inline v_uint8 vx_lut(const uchar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int8 vx_lut(const schar * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_uint16 vx_lut(const ushort * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int16 vx_lut(const short* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int32 vx_lut(const int* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_uint32 vx_lut(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_float32 vx_lut(const float* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_int64 vx_lut(const int64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+    inline v_uint64 vx_lut(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+#if CV_SIMD_64F
+    inline v_float64 vx_lut(const double* ptr, const int* idx) { return VXPREFIX(_lut)(ptr, idx); }
+#endif
+    //! @}
+
+    //! @name Wide LUT of element pairs
+    //! @{
+    //! @brief Load maximum available capacity register contents with array element pairs by provided indexes
+    inline v_uint8 vx_lut_pairs(const uchar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int8 vx_lut_pairs(const schar * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_uint16 vx_lut_pairs(const ushort * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int16 vx_lut_pairs(const short* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int32 vx_lut_pairs(const int* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_uint32 vx_lut_pairs(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_float32 vx_lut_pairs(const float* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_int64 vx_lut_pairs(const int64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+    inline v_uint64 vx_lut_pairs(const uint64 * ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+#if CV_SIMD_64F
+    inline v_float64 vx_lut_pairs(const double* ptr, const int* idx) { return VXPREFIX(_lut_pairs)(ptr, idx); }
+#endif
+    //! @}
+
+    //! @name Wide LUT of element quads
+    //! @{
+    //! @brief Load maximum available capacity register contents with array element quads by provided indexes
+    inline v_uint8 vx_lut_quads(const uchar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_int8 vx_lut_quads(const schar* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_uint16 vx_lut_quads(const ushort* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_int16 vx_lut_quads(const short* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_int32 vx_lut_quads(const int* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_uint32 vx_lut_quads(const unsigned* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    inline v_float32 vx_lut_quads(const float* ptr, const int* idx) { return VXPREFIX(_lut_quads)(ptr, idx); }
+    //! @}
+
+    //! @name Wide load with double expansion
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory with double expand
+    inline v_uint16 vx_load_expand(const uchar * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_int16 vx_load_expand(const schar * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_uint32 vx_load_expand(const ushort * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_int32 vx_load_expand(const short* ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_int64 vx_load_expand(const int* ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_uint64 vx_load_expand(const unsigned* ptr) { return VXPREFIX(_load_expand)(ptr); }
+    inline v_float32 vx_load_expand(const float16_t * ptr) { return VXPREFIX(_load_expand)(ptr); }
+    //! @}
+
+    //! @name Wide load with quad expansion
+    //! @{
+    //! @brief Load maximum available capacity register contents from memory with quad expand
+    inline v_uint32 vx_load_expand_q(const uchar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
+    inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); }
+    //! @}
+
+    /** @brief SIMD processing state cleanup call */
+    inline void vx_cleanup() { VXPREFIX(_cleanup)(); }
+
+
+//! @cond IGNORED
+
+    // backward compatibility
+    template<typename _Tp, typename _Tvec> static inline
+    void vx_store(_Tp* dst, const _Tvec& v) { return v_store(dst, v); }
+    // backward compatibility
+    template<typename _Tp, typename _Tvec> static inline
+    void vx_store_aligned(_Tp* dst, const _Tvec& v) { return v_store_aligned(dst, v); }
+
+//! @endcond
+
+
+//! @}
+    #undef VXPREFIX
+} // namespace
+
+//! @cond IGNORED
 #ifndef CV_SIMD_64F
 #define CV_SIMD_64F 0
 #endif
--- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp
--- a/modules/core/src/arithm.cpp
+++ b/modules/core/src/arithm.cpp
@ -627,7 +627,8 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
        (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) ||
        (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) )
    {
-        if( checkScalar(*psrc1, type2, kind1, kind2) )
+        if ((type1 == CV_64F && (sz1.height == 1 || sz1.height == 4)) &&
+            checkScalar(*psrc1, type2, kind1, kind2))
        {
            // src1 is a scalar; swap it with src2
            swap(psrc1, psrc2);
--- a/modules/core/src/mathfuncs_core.dispatch.cpp
+++ b/modules/core/src/mathfuncs_core.dispatch.cpp
@ -7,6 +7,10 @@
 #include "mathfuncs_core.simd.hpp"
 #include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content

+
+#define IPP_DISABLE_MAGNITUDE_32F 1  // accuracy: https://github.com/opencv/opencv/issues/19506
+
+
 namespace cv { namespace hal {

 ///////////////////////////////////// ATAN2 ////////////////////////////////////
@ -44,8 +48,25 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
    CV_INSTRUMENT_REGION();

    CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
+
+#ifdef HAVE_IPP
+    bool allowIPP = true;
+#ifdef IPP_DISABLE_MAGNITUDE_32F
+    if (cv::ipp::getIppTopFeatures() & (
+#if IPP_VERSION_X100 >= 201700
+            ippCPUID_AVX512F |
+#endif
+            ippCPUID_AVX2)
+    )
+    {
+        allowIPP = (len & 7) == 0;
+    }
+#endif
+
    // SSE42 performance issues
-    CV_IPP_RUN(IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42, CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
+    CV_IPP_RUN((IPP_VERSION_X100 > 201800 || cv::ipp::getIppTopFeatures() != ippCPUID_SSE42) && allowIPP,
+        CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
+#endif

    CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
        CV_CPU_DISPATCH_MODES_ALL);
--- a/modules/core/src/matrix_transform.cpp
+++ b/modules/core/src/matrix_transform.cpp
@ -536,8 +536,8 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
            {
                v_int32 t0 = vx_load((int*)(src0 + i));
                v_int32 t1 = vx_load((int*)(src1 + i));
-                vx_store((int*)(dst0 + i), t1);
-                vx_store((int*)(dst1 + i), t0);
+                v_store((int*)(dst0 + i), t1);
+                v_store((int*)(dst1 + i), t0);
            }
        }
 #if CV_STRONG_ALIGNMENT
@ -547,8 +547,8 @@ flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size,
            {
                v_uint8 t0 = vx_load(src0 + i);
                v_uint8 t1 = vx_load(src1 + i);
-                vx_store(dst0 + i, t1);
-                vx_store(dst1 + i, t0);
+                v_store(dst0 + i, t1);
+                v_store(dst1 + i, t0);
            }
        }
 #endif
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -2456,4 +2456,16 @@ TEST(Core_MinMaxIdx, rows_overflow)
 }


+TEST(Core_Magnitude, regression_19506)
+{
+    for (int N = 1; N <= 64; ++N)
+    {
+        Mat a(1, N, CV_32FC1, Scalar::all(1e-20));
+        Mat res;
+        magnitude(a, a, res);
+        EXPECT_LE(cvtest::norm(res, NORM_L1), 1e-15) << N;
+    }
+}
+
+
 }} // namespace
--- a/modules/core/test/test_intrin_utils.hpp
+++ b/modules/core/test/test_intrin_utils.hpp
@ -1466,7 +1466,7 @@ template<typename R> struct TheTest
        R r1 = vx_load_expand((const cv::float16_t*)data.a.d);
        R r2(r1);
        EXPECT_EQ(1.0f, r1.get0());
-        vx_store(data_f32.a.d, r2);
+        v_store(data_f32.a.d, r2);
        EXPECT_EQ(-2.0f, data_f32.a.d[R::nlanes - 1]);

        out.a.clear();
--- a/modules/core/test/test_operations.cpp
+++ b/modules/core/test/test_operations.cpp
@ -1551,4 +1551,14 @@ TEST(Core_MatExpr, empty_check_15760)
    EXPECT_THROW(Mat c = Mat().cross(Mat()), cv::Exception);
 }

+TEST(Core_Arithm, scalar_handling_19599)  // https://github.com/opencv/opencv/issues/19599 (OpenCV 4.x+ only)
+{
+    Mat a(1, 1, CV_32F, Scalar::all(1));
+    Mat b(4, 1, CV_64F, Scalar::all(1));  // MatExpr may convert Scalar to Mat
+    Mat c;
+    EXPECT_NO_THROW(cv::multiply(a, b, c));
+    EXPECT_EQ(1, c.cols);
+    EXPECT_EQ(1, c.rows);
+}
+
 }} // namespace
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1250,7 +1250,7 @@ public:
                                                             v20*vw20 + v21*vw21 + v22*vw22 + vbias;
                                            if (relu)
                                                vout = v_select(vout > z, vout, vout*vrc);
-                                            vx_store(outptr + out_j, vout);
+                                            v_store(outptr + out_j, vout);
                                        }
                                    }
                                #endif
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@ -2360,12 +2360,9 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer_)
                        // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC
                        LayerParams permLP;
                        int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
-                        permLP.set("order", DictValue::arrayInt<int*>(order, 4));
                        std::string permName = name + "/nchw";
-                        CV_Assert(layer_id.find(permName) == layer_id.end());
-                        int permId = dstNet.addLayer(permName, "Permute", permLP);
-                        layer_id[permName] = permId;
-                        connect(layer_id, dstNet, Pin(name), permId, 0);
+                        Pin inpId = Pin(name);
+                        addPermuteLayer(order, permName, inpId);

                        LayerParams squeezeLp;
                        std::string squeezeName = name + "/squeeze";
@ -2377,6 +2374,38 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer_)
                        connect(layer_id, dstNet, Pin(permName), squeezeId, 0);
                    }
                }
+                else if (axis == 1)
+                {
+                    int order[] = {0, 2, 3, 1};  // From OpenCV's NCHW to NHWC.
+                    Pin inpId = parsePin(layer.input(0));
+                    addPermuteLayer(order, name + "/nhwc", inpId);
+
+                    layerParams.set("pool", type == "Mean" ? "ave" : "sum");
+                    layerParams.set("kernel_h", 1);
+                    layerParams.set("global_pooling_w", true);
+                    int id = dstNet.addLayer(name, "Pooling", layerParams);
+                    layer_id[name] = id;
+                    connect(layer_id, dstNet, inpId, id, 0);
+
+                    if (!keepDims)
+                    {
+                        LayerParams squeezeLp;
+                        std::string squeezeName = name + "/squeeze";
+                        CV_Assert(layer_id.find(squeezeName) == layer_id.end());
+                        int channel_id = 3; // TF NHWC layout
+                        squeezeLp.set("axis", channel_id - 1);
+                        squeezeLp.set("end_axis", channel_id);
+                        int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
+                        layer_id[squeezeName] = squeezeId;
+                        connect(layer_id, dstNet, Pin(name), squeezeId, 0);
+                    }
+                    else
+                    {
+                        int order[] = {0, 3, 1, 2};  // From NHWC to OpenCV's NCHW.
+                        Pin inpId = parsePin(name);
+                        addPermuteLayer(order, name + "/nchw", inpId);
+                    }
+                }
            } else {
                if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
                    CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation.");
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@ -135,6 +135,16 @@ TEST_P(Test_TensorFlow_layers, reduce_sum)
    runTensorFlowNet("sum_pool_by_axis");
 }

+TEST_P(Test_TensorFlow_layers, reduce_sum_channel)
+{
+    runTensorFlowNet("reduce_sum_channel");
+}
+
+TEST_P(Test_TensorFlow_layers, reduce_sum_channel_keep_dims)
+{
+    runTensorFlowNet("reduce_sum_channel", false, 0.0, 0.0, false, "_keep_dims");
+}
+
 TEST_P(Test_TensorFlow_layers, conv_single_conv)
 {
    runTensorFlowNet("single_conv");
--- a/modules/imgproc/test/ocl/test_imgproc.cpp
+++ b/modules/imgproc/test/ocl/test_imgproc.cpp
@ -234,7 +234,10 @@ OCL_TEST_P(CornerMinEigenVal, Mat)
        OCL_OFF(cv::cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType));
        OCL_ON(cv::cornerMinEigenVal(usrc_roi, udst_roi, blockSize, apertureSize, borderType));

+        if (ocl::Device::getDefault().isIntel())
          Near(1e-5, true);
+        else
+          Near(0.1, true);  // using native_* OpenCL functions may lose accuracy
    }
 }

--- a/modules/python/src2/cv2.cpp
+++ b/modules/python/src2/cv2.cpp
@ -47,6 +47,7 @@

 #define CV_HAS_CONVERSION_ERROR(x) (((x) == -1) && PyErr_Occurred())

+static PyObject* opencv_error = NULL;

 class ArgInfo
 {
@ -69,14 +70,32 @@ struct PyOpenCV_Converter
    //static inline PyObject* from(const T& src);
 };

+// exception-safe pyopencv_to
+template<typename _Tp> static
+bool pyopencv_to_safe(PyObject* obj, _Tp& value, const ArgInfo& info)
+{
+    try
+    {
+        return pyopencv_to(obj, value, info);
+    }
+    catch (const std::exception &e)
+    {
+        PyErr_SetString(opencv_error, cv::format("Conversion error: %s, what: %s", info.name, e.what()).c_str());
+        return false;
+    }
+    catch (...)
+    {
+        PyErr_SetString(opencv_error, cv::format("Conversion error: %s", info.name).c_str());
+        return false;
+    }
+}
+
 template<typename T> static
 bool pyopencv_to(PyObject* obj, T& p, const ArgInfo& info) { return PyOpenCV_Converter<T>::to(obj, p, info); }

 template<typename T> static
 PyObject* pyopencv_from(const T& src) { return PyOpenCV_Converter<T>::from(src); }

-static PyObject* opencv_error = NULL;
-
 static bool isPythonBindingsDebugEnabled()
 {
    static bool param_debug = cv::utils::getConfigurationParameterBool("OPENCV_PYTHON_DEBUG", false);
@ -214,6 +233,11 @@ catch (const std::exception &e) \
 { \
    PyErr_SetString(opencv_error, e.what()); \
    return 0; \
+} \
+catch (...) \
+{ \
+    PyErr_SetString(opencv_error, "Unknown C++ exception from OpenCV code"); \
+    return 0; \
 }

 using namespace cv;
@ -2174,9 +2198,9 @@ static int convert_to_char(PyObject *o, char *dst, const ArgInfo& info)
 #include "pyopencv_generated_enums.h"

 #ifdef CVPY_DYNAMIC_INIT
-#define CVPY_TYPE(NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE_DYNAMIC(NAME, STORAGE, SNAME)
+#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME)
 #else
-#define CVPY_TYPE(NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE(NAME, STORAGE, SNAME)
+#define CVPY_TYPE(WNAME, NAME, STORAGE, SNAME, _1, _2) CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME)
 #endif
 #include "pyopencv_generated_types.h"
 #undef CVPY_TYPE
@ -2267,10 +2291,10 @@ static bool init_body(PyObject * m)
 #undef CVPY_MODULE

 #ifdef CVPY_DYNAMIC_INIT
-#define CVPY_TYPE(NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_DYNAMIC(NAME, return false, BASE, CONSTRUCTOR)
+#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, return false, BASE, CONSTRUCTOR)
    PyObject * pyopencv_NoBase_TypePtr = NULL;
 #else
-#define CVPY_TYPE(NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_STATIC(NAME, return false, BASE, CONSTRUCTOR)
+#define CVPY_TYPE(WNAME, NAME, _1, _2, BASE, CONSTRUCTOR) CVPY_TYPE_INIT_STATIC(WNAME, NAME, return false, BASE, CONSTRUCTOR)
    PyTypeObject * pyopencv_NoBase_TypePtr = NULL;
 #endif
    #include "pyopencv_generated_types.h"
--- a/modules/python/src2/gen2.py
+++ b/modules/python/src2/gen2.py
@ -47,7 +47,7 @@ gen_template_func_body = Template("""$code_decl
 gen_template_mappable = Template("""
    {
        ${mappable} _src;
-        if (pyopencv_to(src, _src, info))
+        if (pyopencv_to_safe(src, _src, info))
        {
            return cv_mappable_to(_src, dst);
        }
@ -91,7 +91,7 @@ gen_template_set_prop_from_map = Template("""
    if( PyMapping_HasKeyString(src, (char*)"$propname") )
    {
        tmp = PyMapping_GetItemString(src, (char*)"$propname");
-        ok = tmp && pyopencv_to(tmp, dst.$propname, ArgInfo("$propname", false));
+        ok = tmp && pyopencv_to_safe(tmp, dst.$propname, ArgInfo("$propname", false));
        Py_DECREF(tmp);
        if(!ok) return false;
    }""")
@ -145,7 +145,7 @@ static int pyopencv_${name}_set_${member}(pyopencv_${name}_t* p, PyObject *value
        PyErr_SetString(PyExc_TypeError, "Cannot delete the ${member} attribute");
        return -1;
    }
-    return pyopencv_to(value, p->v${access}${member}, ArgInfo("value", false)) ? 0 : -1;
+    return pyopencv_to_safe(value, p->v${access}${member}, ArgInfo("value", false)) ? 0 : -1;
 }
 """)

@ -163,7 +163,7 @@ static int pyopencv_${name}_set_${member}(pyopencv_${name}_t* p, PyObject *value
        failmsgp("Incorrect type of object (must be '${name}' or its derivative)");
        return -1;
    }
-    return pyopencv_to(value, _self_${access}${member}, ArgInfo("value", false)) ? 0 : -1;
+    return pyopencv_to_safe(value, _self_${access}${member}, ArgInfo("value", false)) ? 0 : -1;
 }
 """)

@ -266,7 +266,12 @@ class ClassInfo(object):

            for m in decl[2]:
                if m.startswith("="):
-                    self.wname = m[1:]
+                    wname = m[1:]
+                    npos = name.rfind('.')
+                    if npos >= 0:
+                        self.wname = normalize_class_name(name[:npos] + '.' + wname)
+                    else:
+                        self.wname = wname
                    customname = True
                elif m == "/Map":
                    self.ismap = True
@ -282,7 +287,7 @@ class ClassInfo(object):
        code = "static bool pyopencv_to(PyObject* src, %s& dst, const ArgInfo& info)\n{\n    PyObject* tmp;\n    bool ok;\n" % (self.cname)
        code += "".join([gen_template_set_prop_from_map.substitute(propname=p.name,proptype=p.tp) for p in self.props])
        if self.base:
-            code += "\n    return pyopencv_to(src, (%s&)dst, info);\n}\n" % all_classes[self.base].cname
+            code += "\n    return pyopencv_to_safe(src, (%s&)dst, info);\n}\n" % all_classes[self.base].cname
        else:
            code += "\n    return true;\n}\n"
        return code
@ -345,7 +350,8 @@ class ClassInfo(object):
        if self.constructor is not None:
            constructor_name = self.constructor.get_wrapper_name()

-        return "CVPY_TYPE({}, {}, {}, {}, {});\n".format(
+        return "CVPY_TYPE({}, {}, {}, {}, {}, {});\n".format(
+            self.wname,
            self.name,
            self.cname if self.issimple else "Ptr<{}>".format(self.cname),
            self.sname if self.issimple else "Ptr",
@ -668,7 +674,7 @@ class FuncInfo(object):
                        if a.tp == 'char':
                            code_cvt_list.append("convert_to_char(pyobj_%s, &%s, %s)" % (a.name, a.name, a.crepr()))
                        else:
-                            code_cvt_list.append("pyopencv_to(pyobj_%s, %s, %s)" % (a.name, a.name, a.crepr()))
+                            code_cvt_list.append("pyopencv_to_safe(pyobj_%s, %s, %s)" % (a.name, a.name, a.crepr()))

                all_cargs.append([arg_type_info, parse_name])

@ -919,7 +925,7 @@ class PythonWrapperGenerator(object):
        if classes:
            classname = normalize_class_name('.'.join(namespace+classes))
            bareclassname = classes[-1]
-        namespace = '.'.join(namespace)
+        namespace_str = '.'.join(namespace)

        isconstructor = name == bareclassname
        is_static = False
@ -944,23 +950,36 @@ class PythonWrapperGenerator(object):
        if is_static:
            # Add it as a method to the class
            func_map = self.classes[classname].methods
-            func = func_map.setdefault(name, FuncInfo(classname, name, cname, isconstructor, namespace, is_static))
+            func = func_map.setdefault(name, FuncInfo(classname, name, cname, isconstructor, namespace_str, is_static))
            func.add_variant(decl, isphantom)

            # Add it as global function
            g_name = "_".join(classes+[name])
-            func_map = self.namespaces.setdefault(namespace, Namespace()).funcs
-            func = func_map.setdefault(g_name, FuncInfo("", g_name, cname, isconstructor, namespace, False))
+            w_classes = []
+            for i in range(0, len(classes)):
+                classes_i = classes[:i+1]
+                classname_i = normalize_class_name('.'.join(namespace+classes_i))
+                w_classname = self.classes[classname_i].wname
+                namespace_prefix = normalize_class_name('.'.join(namespace)) + '_'
+                if w_classname.startswith(namespace_prefix):
+                    w_classname = w_classname[len(namespace_prefix):]
+                w_classes.append(w_classname)
+            g_wname = "_".join(w_classes+[name])
+            func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs
+            func = func_map.setdefault(g_name, FuncInfo("", g_name, cname, isconstructor, namespace_str, False))
            func.add_variant(decl, isphantom)
+            if g_wname != g_name:  # TODO OpenCV 5.0
+                wfunc = func_map.setdefault(g_wname, FuncInfo("", g_wname, cname, isconstructor, namespace_str, False))
+                wfunc.add_variant(decl, isphantom)
        else:
            if classname and not isconstructor:
                if not isphantom:
                    cname = barename
                func_map = self.classes[classname].methods
            else:
-                func_map = self.namespaces.setdefault(namespace, Namespace()).funcs
+                func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs

-            func = func_map.setdefault(name, FuncInfo(classname, name, cname, isconstructor, namespace, is_static))
+            func = func_map.setdefault(name, FuncInfo(classname, name, cname, isconstructor, namespace_str, is_static))
            func.add_variant(decl, isphantom)

        if classname and isconstructor:
--- a/modules/python/src2/hdr_parser.py
+++ b/modules/python/src2/hdr_parser.py
@ -260,6 +260,8 @@ class CppHeaderParser(object):
            l = l.replace("CV_EXPORTS_W_SIMPLE", "")
            modlist.append("/Simple")
        npos = l.find("CV_EXPORTS_AS")
+        if npos < 0:
+            npos = l.find('CV_WRAP_AS')
        if npos >= 0:
            macro_arg, npos3 = self.get_macro_arg(l, npos)
            modlist.append("=" + macro_arg)
@ -842,7 +844,11 @@ class CppHeaderParser(object):
                    continue
                state = SCAN
                l = re.sub(r'//(.+)?', '', l).strip()  # drop // comment
-                if l == '#if 0' or l == '#if defined(__OPENCV_BUILD)' or l == '#ifdef __OPENCV_BUILD':
+                if l in [
+                    '#if 0',
+                    '#if defined(__OPENCV_BUILD)', '#ifdef __OPENCV_BUILD',
+                    '#if !defined(OPENCV_BINDING_PARSER)', '#ifndef OPENCV_BINDING_PARSER',
+                ]:
                    state = DIRECTIVE_IF_0
                    depth_if_0 = 1
                continue
--- a/modules/python/src2/pycompat.hpp
+++ b/modules/python/src2/pycompat.hpp
@ -172,7 +172,7 @@ PyObject* pyopencv_from(const TYPE& src)
 #endif


-#define CVPY_TYPE_DECLARE(NAME, STORAGE, SNAME) \
+#define CVPY_TYPE_DECLARE(WNAME, NAME, STORAGE, SNAME) \
    struct pyopencv_##NAME##_t \
    { \
        PyObject_HEAD \
@ -181,7 +181,7 @@ PyObject* pyopencv_from(const TYPE& src)
    static PyTypeObject pyopencv_##NAME##_TypeXXX = \
    { \
        CVPY_TYPE_HEAD \
-        MODULESTR"."#NAME, \
+        MODULESTR"."#WNAME, \
        sizeof(pyopencv_##NAME##_t), \
    }; \
    static PyTypeObject * pyopencv_##NAME##_TypePtr = &pyopencv_##NAME##_TypeXXX; \
@ -208,12 +208,12 @@ PyObject* pyopencv_from(const TYPE& src)
    static PyObject* pyopencv_##NAME##_repr(PyObject* self) \
    { \
        char str[1000]; \
-        sprintf(str, "<"#NAME" %p>", self); \
+        sprintf(str, "<"#WNAME" %p>", self); \
        return PyString_FromString(str); \
    }


-#define CVPY_TYPE_INIT_STATIC(NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \
+#define CVPY_TYPE_INIT_STATIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \
    { \
        pyopencv_##NAME##_TypePtr->tp_base = pyopencv_##BASE##_TypePtr; \
        pyopencv_##NAME##_TypePtr->tp_dealloc = pyopencv_##NAME##_dealloc; \
@ -229,12 +229,12 @@ PyObject* pyopencv_from(const TYPE& src)
            ERROR_HANDLER; \
        } \
        CVPY_TYPE_INCREF(pyopencv_##NAME##_TypePtr); \
-        PyModule_AddObject(m, #NAME, (PyObject *)pyopencv_##NAME##_TypePtr); \
+        PyModule_AddObject(m, #WNAME, (PyObject *)pyopencv_##NAME##_TypePtr); \
    }

 //==================================================================================================

-#define CVPY_TYPE_DECLARE_DYNAMIC(NAME, STORAGE, SNAME) \
+#define CVPY_TYPE_DECLARE_DYNAMIC(WNAME, NAME, STORAGE, SNAME) \
    struct pyopencv_##NAME##_t \
    { \
        PyObject_HEAD \
@ -264,7 +264,7 @@ PyObject* pyopencv_from(const TYPE& src)
    static PyObject* pyopencv_##NAME##_repr(PyObject* self) \
    { \
        char str[1000]; \
-        sprintf(str, "<"#NAME" %p>", self); \
+        sprintf(str, "<"#WNAME" %p>", self); \
        return PyString_FromString(str); \
    } \
    static PyType_Slot pyopencv_##NAME##_Slots[] =  \
@ -280,14 +280,14 @@ PyObject* pyopencv_from(const TYPE& src)
    }; \
    static PyType_Spec pyopencv_##NAME##_Spec = \
    { \
-        MODULESTR"."#NAME, \
+        MODULESTR"."#WNAME, \
        sizeof(pyopencv_##NAME##_t), \
        0, \
        Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, \
        pyopencv_##NAME##_Slots  \
    };

-#define CVPY_TYPE_INIT_DYNAMIC(NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \
+#define CVPY_TYPE_INIT_DYNAMIC(WNAME, NAME, ERROR_HANDLER, BASE, CONSTRUCTOR) \
    { \
        pyopencv_##NAME##_Slots[0].pfunc /*tp_dealloc*/ = (void*)pyopencv_##NAME##_dealloc; \
        pyopencv_##NAME##_Slots[1].pfunc /*tp_repr*/ = (void*)pyopencv_##NAME##_repr; \
@ -302,7 +302,7 @@ PyObject* pyopencv_from(const TYPE& src)
        pyopencv_##NAME##_TypePtr = PyType_FromSpecWithBases(&pyopencv_##NAME##_Spec, bases); \
        if (!pyopencv_##NAME##_TypePtr) \
        { \
-            printf("Failed to init: " #NAME ", base (" #BASE ")" "\n"); \
+            printf("Failed to init: " #WNAME ", base (" #BASE ")" "\n"); \
            ERROR_HANDLER; \
        } \
        PyModule_AddObject(m, #NAME, (PyObject *)pyopencv_##NAME##_TypePtr); \
--- a/modules/videoio/src/cap_avfoundation.mm
+++ b/modules/videoio/src/cap_avfoundation.mm
@ -95,13 +95,14 @@ class CvCaptureCAM : public CvCapture {
    public:
        CvCaptureCAM(int cameraNum = -1) ;
        ~CvCaptureCAM();
-        virtual bool grabFrame();
-        virtual IplImage* retrieveFrame(int);
+        bool grabFrame() CV_OVERRIDE;
+        IplImage* retrieveFrame(int) CV_OVERRIDE;
+        double getProperty(int property_id) const CV_OVERRIDE;
+        bool setProperty(int property_id, double value) CV_OVERRIDE;
+        int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_AVFOUNDATION; }
+
        virtual IplImage* queryFrame();
-        virtual double getProperty(int property_id) const;
-        virtual bool setProperty(int property_id, double value);
        virtual int didStart();
-
    private:
        AVCaptureSession            *mCaptureSession;
        AVCaptureDeviceInput        *mCaptureDeviceInput;
@ -137,10 +138,12 @@ class CvCaptureFile : public CvCapture {
 public:
    CvCaptureFile(const char* filename) ;
    ~CvCaptureFile();
-    virtual bool grabFrame();
-    virtual IplImage* retrieveFrame(int);
-    virtual double getProperty(int property_id) const;
-    virtual bool setProperty(int property_id, double value);
+    bool grabFrame() CV_OVERRIDE;
+    IplImage* retrieveFrame(int) CV_OVERRIDE;
+    double getProperty(int property_id) const CV_OVERRIDE;
+    bool setProperty(int property_id, double value) CV_OVERRIDE;
+    int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_AVFOUNDATION; }
+
    virtual int didStart();
 private:
    AVAsset                  *mAsset;
--- a/modules/videoio/src/cap_avfoundation_mac.mm
+++ b/modules/videoio/src/cap_avfoundation_mac.mm
@ -101,12 +101,13 @@ class CvCaptureCAM : public CvCapture {
 public:
    CvCaptureCAM(int cameraNum = -1) ;
    ~CvCaptureCAM();
-    virtual bool grabFrame();
-    virtual IplImage* retrieveFrame(int);
-    virtual double getProperty(int property_id) const;
-    virtual bool setProperty(int property_id, double value);
-    virtual int didStart();
+    bool grabFrame() CV_OVERRIDE;
+    IplImage* retrieveFrame(int) CV_OVERRIDE;
+    double getProperty(int property_id) const CV_OVERRIDE;
+    bool setProperty(int property_id, double value) CV_OVERRIDE;
+    int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_AVFOUNDATION; }

+    virtual int didStart();

 private:
    AVCaptureSession            *mCaptureSession;
@ -143,12 +144,13 @@ class CvCaptureFile : public CvCapture {
 public:
    CvCaptureFile(const char* filename) ;
    ~CvCaptureFile();
-    virtual bool grabFrame();
-    virtual IplImage* retrieveFrame(int);
-    virtual double getProperty(int property_id) const;
-    virtual bool setProperty(int property_id, double value);
-    virtual int didStart();
+    bool grabFrame() CV_OVERRIDE;
+    IplImage* retrieveFrame(int) CV_OVERRIDE;
+    double getProperty(int property_id) const CV_OVERRIDE;
+    bool setProperty(int property_id, double value) CV_OVERRIDE;
+    int getCaptureDomain() /*const*/ CV_OVERRIDE { return cv::CAP_AVFOUNDATION; }

+    virtual int didStart();

 private:
    AVAsset                  *mAsset;
--- a/modules/videoio/src/cap_mjpeg_encoder.cpp
+++ b/modules/videoio/src/cap_mjpeg_encoder.cpp
@ -1167,6 +1167,8 @@ public:
        fdct_qtab(_fdct_qtab),
        cat_table(_cat_table)
    {
+#if 0  // disable parallel processing due to buffer overrun bug: https://github.com/opencv/opencv/issues/19634
+
        //empirically found value. if number of pixels is less than that value there is no sense to parallelize it.
        const int min_pixels_count = 96*96;

@ -1176,6 +1178,7 @@ public:
        {
            if(height*width > min_pixels_count)
            {
+                const int default_stripes_count = 4;
                stripes_count = default_stripes_count;
            }
        }
@ -1191,6 +1194,12 @@ public:

        stripes_count = std::min(stripes_count, max_stripes);

+#else
+        if (nstripes > 1)
+            CV_LOG_ONCE_WARNING(NULL, "VIDEOIO/MJPEG: parallel processing is disabled: https://github.com/opencv/opencv/issues/19634");
+        stripes_count = 1;
+#endif
+
        m_buffer_list.allocate_buffers(stripes_count, (height*width*2)/stripes_count);
    }

@ -1370,11 +1379,8 @@ private:
    const short (&fdct_qtab)[2][64];
    const uchar* cat_table;
    int stripes_count;
-    static const int default_stripes_count;
 };

-const int MjpegEncoder::default_stripes_count = 4;
-
 void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
 {
    //double total_cvt = 0, total_dct = 0;