Merge branch 4.x

1 year ago · c739117a7c
parent 788c7252dc dad8af6b17
commit c739117a7c
531 changed files with 20768 additions and 12802 deletions
--- a/3rdparty/carotene/CMakeLists.txt
+++ b/3rdparty/carotene/CMakeLists.txt
@ -42,6 +42,14 @@ endif()

 if(WITH_NEON)
    target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON")
+    if(NOT DEFINED CAROTENE_NEON_ARCH )
+    elseif(CAROTENE_NEON_ARCH EQUAL 8)
+	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=8")
+    elseif(CAROTENE_NEON_ARCH EQUAL 7)
+	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=7")
+    else()
+	    target_compile_definitions(carotene_objs PRIVATE "-DCAROTENE_NEON_ARCH=0")
+    endif()
 endif()

 # we add dummy file to fix XCode build
--- a/3rdparty/carotene/src/add_weighted.cpp
+++ b/3rdparty/carotene/src/add_weighted.cpp
@ -39,6 +39,7 @@

 #include "common.hpp"
 #include "vtransform.hpp"
+#include "vround_helper.hpp"

 namespace CAROTENE_NS {

@ -106,7 +107,7 @@ template <> struct wAdd<s32>
    {
        valpha = vdupq_n_f32(_alpha);
        vbeta = vdupq_n_f32(_beta);
-        vgamma = vdupq_n_f32(_gamma + 0.5);
+        vgamma = vdupq_n_f32(_gamma);
    }

    void operator() (const VecTraits<s32>::vec128 & v_src0,
@ -118,7 +119,7 @@ template <> struct wAdd<s32>

        vs1 = vmlaq_f32(vgamma, vs1, valpha);
        vs1 = vmlaq_f32(vs1, vs2, vbeta);
-        v_dst = vcvtq_s32_f32(vs1);
+        v_dst = vroundq_s32_f32(vs1);
    }

    void operator() (const VecTraits<s32>::vec64 & v_src0,
@ -130,7 +131,7 @@ template <> struct wAdd<s32>

        vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
        vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
-        v_dst = vcvt_s32_f32(vs1);
+        v_dst = vround_s32_f32(vs1);
    }

    void operator() (const s32 * src0, const s32 * src1, s32 * dst) const
@ -150,7 +151,7 @@ template <> struct wAdd<u32>
    {
        valpha = vdupq_n_f32(_alpha);
        vbeta = vdupq_n_f32(_beta);
-        vgamma = vdupq_n_f32(_gamma + 0.5);
+        vgamma = vdupq_n_f32(_gamma);
    }

    void operator() (const VecTraits<u32>::vec128 & v_src0,
@ -162,7 +163,7 @@ template <> struct wAdd<u32>

        vs1 = vmlaq_f32(vgamma, vs1, valpha);
        vs1 = vmlaq_f32(vs1, vs2, vbeta);
-        v_dst = vcvtq_u32_f32(vs1);
+        v_dst = vroundq_u32_f32(vs1);
    }

    void operator() (const VecTraits<u32>::vec64 & v_src0,
@ -174,7 +175,7 @@ template <> struct wAdd<u32>

        vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
        vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
-        v_dst = vcvt_u32_f32(vs1);
+        v_dst = vround_u32_f32(vs1);
    }

    void operator() (const u32 * src0, const u32 * src1, u32 * dst) const
--- a/3rdparty/carotene/src/blur.cpp
+++ b/3rdparty/carotene/src/blur.cpp
@ -41,6 +41,7 @@

 #include "common.hpp"
 #include "saturate_cast.hpp"
+#include "vround_helper.hpp"

 namespace CAROTENE_NS {

@ -198,7 +199,6 @@ void blur3x3(const Size2D &size, s32 cn,
 //#define FLOAT_VARIANT_1_9
 #ifdef FLOAT_VARIANT_1_9
    float32x4_t v1_9 = vdupq_n_f32 (1.0/9.0);
-    float32x4_t v0_5 = vdupq_n_f32 (.5);
 #else
    const int16x8_t vScale = vmovq_n_s16(3640);
 #endif
@ -283,8 +283,8 @@ void blur3x3(const Size2D &size, s32 cn,
                uint32x4_t tres2 = vmovl_u16(vget_high_u16(t0));
                float32x4_t vf1 = vmulq_f32(v1_9, vcvtq_f32_u32(tres1));
                float32x4_t vf2 = vmulq_f32(v1_9, vcvtq_f32_u32(tres2));
-                tres1 = vcvtq_u32_f32(vaddq_f32(vf1, v0_5));
-                tres2 = vcvtq_u32_f32(vaddq_f32(vf2, v0_5));
+                tres1 = internal::vroundq_u32_f32(vf1);
+                tres2 = internal::vroundq_u32_f32(vf2);
                t0 = vcombine_u16(vmovn_u32(tres1),vmovn_u32(tres2));
                vst1_u8(drow + x - 8, vmovn_u16(t0));
 #else
@ -445,8 +445,8 @@ void blur3x3(const Size2D &size, s32 cn,
                uint32x4_t tres2 = vmovl_u16(vget_high_u16(t0));
                float32x4_t vf1 = vmulq_f32(v1_9, vcvtq_f32_u32(tres1));
                float32x4_t vf2 = vmulq_f32(v1_9, vcvtq_f32_u32(tres2));
-                tres1 = vcvtq_u32_f32(vaddq_f32(vf1, v0_5));
-                tres2 = vcvtq_u32_f32(vaddq_f32(vf2, v0_5));
+                tres1 = internal::vroundq_u32_f32(vf1);
+                tres2 = internal::vroundq_u32_f32(vf2);
                t0 = vcombine_u16(vmovn_u32(tres1),vmovn_u32(tres2));
                vst1_u8(drow + x - 8, vmovn_u16(t0));
 #else
@ -508,7 +508,6 @@ void blur5x5(const Size2D &size, s32 cn,
 #define FLOAT_VARIANT_1_25
 #ifdef FLOAT_VARIANT_1_25
    float32x4_t v1_25 = vdupq_n_f32 (1.0f/25.0f);
-    float32x4_t v0_5 = vdupq_n_f32 (.5f);
 #else
    const int16x8_t vScale = vmovq_n_s16(1310);
 #endif
@ -752,8 +751,8 @@ void blur5x5(const Size2D &size, s32 cn,
            uint32x4_t tres2 = vmovl_u16(vget_high_u16(t0));
            float32x4_t vf1 = vmulq_f32(v1_25, vcvtq_f32_u32(tres1));
            float32x4_t vf2 = vmulq_f32(v1_25, vcvtq_f32_u32(tres2));
-            tres1 = vcvtq_u32_f32(vaddq_f32(vf1, v0_5));
-            tres2 = vcvtq_u32_f32(vaddq_f32(vf2, v0_5));
+            tres1 = internal::vroundq_u32_f32(vf1);
+            tres2 = internal::vroundq_u32_f32(vf2);
            t0 = vcombine_u16(vmovn_u32(tres1),vmovn_u32(tres2));
            vst1_u8(drow + x - 8, vmovn_u16(t0));
 #else
--- a/3rdparty/carotene/src/colorconvert.cpp
+++ b/3rdparty/carotene/src/colorconvert.cpp
@ -40,6 +40,7 @@
 #include "common.hpp"

 #include "saturate_cast.hpp"
+#include "vround_helper.hpp"

 namespace CAROTENE_NS {

@ -1166,17 +1167,10 @@ inline uint8x8x3_t convertToHSV(const uint8x8_t vR, const uint8x8_t vG, const ui
    vSt3 = vmulq_f32(vHF1, vDivTab);
    vSt4 = vmulq_f32(vHF2, vDivTab);

-    float32x4_t bias = vdupq_n_f32(0.5f);
-
-    vSt1 = vaddq_f32(vSt1, bias);
-    vSt2 = vaddq_f32(vSt2, bias);
-    vSt3 = vaddq_f32(vSt3, bias);
-    vSt4 = vaddq_f32(vSt4, bias);
-
-    uint32x4_t vRes1 = vcvtq_u32_f32(vSt1);
-    uint32x4_t vRes2 = vcvtq_u32_f32(vSt2);
-    uint32x4_t vRes3 = vcvtq_u32_f32(vSt3);
-    uint32x4_t vRes4 = vcvtq_u32_f32(vSt4);
+    uint32x4_t vRes1 = internal::vroundq_u32_f32(vSt1);
+    uint32x4_t vRes2 = internal::vroundq_u32_f32(vSt2);
+    uint32x4_t vRes3 = internal::vroundq_u32_f32(vSt3);
+    uint32x4_t vRes4 = internal::vroundq_u32_f32(vSt4);

    int32x4_t vH_L = vmovl_s16(vget_low_s16(vDiff4));
    int32x4_t vH_H = vmovl_s16(vget_high_s16(vDiff4));
--- a/3rdparty/carotene/src/common.hpp
+++ b/3rdparty/carotene/src/common.hpp
@ -58,6 +58,17 @@

 namespace CAROTENE_NS { namespace internal {

+#ifndef CAROTENE_NEON_ARCH
+#    if defined(__aarch64__) || defined(__aarch32__)
+#        define CAROTENE_NEON_ARCH 8
+#    else
+#        define CAROTENE_NEON_ARCH 7
+#    endif
+#endif
+#if ( !defined(__aarch64__) && !defined(__aarch32__) ) && (CAROTENE_NEON_ARCH == 8 )
+#    error("ARMv7 doen't support A32/A64 Neon instructions")
+#endif
+
 inline void prefetch(const void *ptr, size_t offset = 32*10)
 {
 #if defined __GNUC__
--- a/3rdparty/carotene/src/convert_scale.cpp
+++ b/3rdparty/carotene/src/convert_scale.cpp
@ -38,6 +38,7 @@
 */

 #include "common.hpp"
+#include "vround_helper.hpp"

 namespace CAROTENE_NS {

@ -185,7 +186,7 @@ CVTS_FUNC1(u8, 16,
 #else
 CVTS_FUNC1(u8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -209,10 +210,10 @@ CVTS_FUNC1(u8, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        int32x4_t vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        int32x4_t vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        int32x4_t vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        int32x4_t vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        uint16x8_t vRes1_u16 = vcombine_u16(vqmovun_s32(vline1_s32), vqmovun_s32(vline2_s32));
        uint16x8_t vRes2_u16 = vcombine_u16(vqmovun_s32(vline3_s32), vqmovun_s32(vline4_s32));
        vst1q_u8(_dst + i, vcombine_u8(vqmovn_u16(vRes1_u16), vqmovn_u16(vRes2_u16)));
@ -270,7 +271,7 @@ CVTS_FUNC(u8, s8, 16,
 #else
 CVTS_FUNC(u8, s8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -294,10 +295,10 @@ CVTS_FUNC(u8, s8, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        int32x4_t vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        int32x4_t vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        int32x4_t vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        int32x4_t vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        int16x8_t vRes1_u16 = vcombine_s16(vqmovn_s32(vline1_s32), vqmovn_s32(vline2_s32));
        int16x8_t vRes2_u16 = vcombine_s16(vqmovn_s32(vline3_s32), vqmovn_s32(vline4_s32));
        vst1q_s8(_dst + i, vcombine_s8(vqmovn_s16(vRes1_u16), vqmovn_s16(vRes2_u16)));
@ -355,7 +356,7 @@ CVTS_FUNC(u8, u16, 16,
 #else
 CVTS_FUNC(u8, u16, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -379,10 +380,10 @@ CVTS_FUNC(u8, u16, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        int32x4_t vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        int32x4_t vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        int32x4_t vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        int32x4_t vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        vst1q_u16(_dst + i + 0, vcombine_u16(vqmovun_s32(vline1_s32), vqmovun_s32(vline2_s32)));
        vst1q_u16(_dst + i + 8, vcombine_u16(vqmovun_s32(vline3_s32), vqmovun_s32(vline4_s32)));
    }
@ -439,7 +440,7 @@ CVTS_FUNC(u8, s16, 16,
 #else
 CVTS_FUNC(u8, s16, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -463,10 +464,10 @@ CVTS_FUNC(u8, s16, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        int32x4_t vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        int32x4_t vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        int32x4_t vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        int32x4_t vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        vst1q_s16(_dst + i + 0, vcombine_s16(vqmovn_s32(vline1_s32), vqmovn_s32(vline2_s32)));
        vst1q_s16(_dst + i + 8, vcombine_s16(vqmovn_s32(vline3_s32), vqmovn_s32(vline4_s32)));
    }
@ -526,7 +527,7 @@ CVTS_FUNC(u8, s32, 16,
 #else
 CVTS_FUNC(u8, s32, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -550,10 +551,10 @@ CVTS_FUNC(u8, s32, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        int32x4_t vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        int32x4_t vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        int32x4_t vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        int32x4_t vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        vst1q_s32(_dst + i + 0,  vline1_s32);
        vst1q_s32(_dst + i + 4,  vline2_s32);
        vst1q_s32(_dst + i + 8,  vline3_s32);
@ -693,7 +694,7 @@ CVTS_FUNC(s8, u8, 16,
 #else
 CVTS_FUNC(s8, u8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -717,10 +718,10 @@ CVTS_FUNC(s8, u8, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        uint16x8_t vRes1_u16 = vcombine_u16(vqmovun_s32(vline1_s32), vqmovun_s32(vline2_s32));
        uint16x8_t vRes2_u16 = vcombine_u16(vqmovun_s32(vline3_s32), vqmovun_s32(vline4_s32));
        vst1q_u8(_dst + i, vcombine_u8(vqmovn_u16(vRes1_u16), vqmovn_u16(vRes2_u16)));
@ -778,7 +779,7 @@ CVTS_FUNC1(s8, 16,
 #else
 CVTS_FUNC1(s8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -802,10 +803,10 @@ CVTS_FUNC1(s8, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        int16x8_t vRes1_s16 = vcombine_s16(vqmovn_s32(vline1_s32), vqmovn_s32(vline2_s32));
        int16x8_t vRes2_s16 = vcombine_s16(vqmovn_s32(vline3_s32), vqmovn_s32(vline4_s32));
        vst1q_s8(_dst + i, vcombine_s8(vqmovn_s16(vRes1_s16), vqmovn_s16(vRes2_s16)));
@ -863,7 +864,7 @@ CVTS_FUNC(s8, u16, 16,
 #else
 CVTS_FUNC(s8, u16, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -887,10 +888,10 @@ CVTS_FUNC(s8, u16, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        uint16x8_t vRes1_u16 = vcombine_u16(vqmovun_s32(vline1_s32), vqmovun_s32(vline2_s32));
        uint16x8_t vRes2_u16 = vcombine_u16(vqmovun_s32(vline3_s32), vqmovun_s32(vline4_s32));
        vst1q_u16(_dst + i + 0, vRes1_u16);
@ -949,7 +950,7 @@ CVTS_FUNC(s8, s16, 16,
 #else
 CVTS_FUNC(s8, s16, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -973,10 +974,10 @@ CVTS_FUNC(s8, s16, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        int16x8_t vRes1_s16 = vcombine_s16(vqmovn_s32(vline1_s32), vqmovn_s32(vline2_s32));
        int16x8_t vRes2_s16 = vcombine_s16(vqmovn_s32(vline3_s32), vqmovn_s32(vline4_s32));
        vst1q_s16(_dst + i + 0, vRes1_s16);
@ -1038,7 +1039,7 @@ CVTS_FUNC(s8, s32, 16,
 #else
 CVTS_FUNC(s8, s32, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 16)
    {
@ -1062,10 +1063,10 @@ CVTS_FUNC(s8, s32, 16,
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
        vline3_f32 = vaddq_f32(vline3_f32, vshift);
        vline4_f32 = vaddq_f32(vline4_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
-        vline3_s32 = vcvtq_s32_f32(vline3_f32);
-        vline4_s32 = vcvtq_s32_f32(vline4_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
+        vline3_s32 = internal::vroundq_s32_f32(vline3_f32);
+        vline4_s32 = internal::vroundq_s32_f32(vline4_f32);
        vst1q_s32(_dst + i + 0,  vline1_s32);
        vst1q_s32(_dst + i + 4,  vline2_s32);
        vst1q_s32(_dst + i + 8,  vline3_s32);
@ -1190,7 +1191,7 @@ CVTS_FUNC(u16, u8, 16,
 #else
 CVTS_FUNC(u16, u8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1204,8 +1205,8 @@ CVTS_FUNC(u16, u8, 16,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        uint8x8_t vRes = vqmovun_s16(vcombine_s16(vRes1, vRes2));
@ -1249,7 +1250,7 @@ CVTS_FUNC(u16, s8, 16,
 #else
 CVTS_FUNC(u16, s8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1263,8 +1264,8 @@ CVTS_FUNC(u16, s8, 16,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        int8x8_t vRes = vqmovn_s16(vcombine_s16(vRes1, vRes2));
@ -1307,7 +1308,7 @@ CVTS_FUNC1(u16, 16,
 #else
 CVTS_FUNC1(u16, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1321,8 +1322,8 @@ CVTS_FUNC1(u16, 16,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        uint16x4_t vRes1 = vqmovun_s32(vline1_s32);
        uint16x4_t vRes2 = vqmovun_s32(vline2_s32);
        vst1q_u16(_dst + i, vcombine_u16(vRes1, vRes2));
@ -1364,7 +1365,7 @@ CVTS_FUNC(u16, s16, 8,
 #else
 CVTS_FUNC(u16, s16, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1378,8 +1379,8 @@ CVTS_FUNC(u16, s16, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        vst1q_s16(_dst + i, vcombine_s16(vRes1, vRes2));
@ -1421,7 +1422,7 @@ CVTS_FUNC(u16, s32, 8,
 #else
 CVTS_FUNC(u16, s32, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1435,8 +1436,8 @@ CVTS_FUNC(u16, s32, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        vst1q_s32(_dst + i + 0, vline1_s32);
        vst1q_s32(_dst + i + 4, vline2_s32);
    }
@ -1530,7 +1531,7 @@ CVTS_FUNC(s16, u8, 16,
 #else
 CVTS_FUNC(s16, u8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1544,8 +1545,8 @@ CVTS_FUNC(s16, u8, 16,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        uint8x8_t vRes = vqmovun_s16(vcombine_s16(vRes1, vRes2));
@ -1589,7 +1590,7 @@ CVTS_FUNC(s16, s8, 16,
 #else
 CVTS_FUNC(s16, s8, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1603,8 +1604,8 @@ CVTS_FUNC(s16, s8, 16,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        int8x8_t vRes = vqmovn_s16(vcombine_s16(vRes1, vRes2));
@ -1647,7 +1648,7 @@ CVTS_FUNC(s16, u16, 8,
 #else
 CVTS_FUNC(s16, u16, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1661,8 +1662,8 @@ CVTS_FUNC(s16, u16, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        uint16x4_t vRes1 = vqmovun_s32(vline1_s32);
        uint16x4_t vRes2 = vqmovun_s32(vline2_s32);
        vst1q_u16(_dst + i, vcombine_u16(vRes1, vRes2));
@ -1704,7 +1705,7 @@ CVTS_FUNC1(s16, 16,
 #else
 CVTS_FUNC1(s16, 16,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1718,8 +1719,8 @@ CVTS_FUNC1(s16, 16,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        vst1q_s16(_dst + i, vcombine_s16(vRes1, vRes2));
@ -1761,7 +1762,7 @@ CVTS_FUNC(s16, s32, 8,
 #else
 CVTS_FUNC(s16, s32, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1775,8 +1776,8 @@ CVTS_FUNC(s16, s32, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        vst1q_s32(_dst + i + 0, vline1_s32);
        vst1q_s32(_dst + i + 4, vline2_s32);
    }
@ -1870,7 +1871,7 @@ CVTS_FUNC(s32, u8, 8,
 #else
 CVTS_FUNC(s32, u8, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1883,8 +1884,8 @@ CVTS_FUNC(s32, u8, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        uint16x4_t vRes1 = vqmovun_s32(vline1_s32);
        uint16x4_t vRes2 = vqmovun_s32(vline2_s32);
        uint8x8_t vRes = vqmovn_u16(vcombine_u16(vRes1, vRes2));
@ -1928,7 +1929,7 @@ CVTS_FUNC(s32, s8, 8,
 #else
 CVTS_FUNC(s32, s8, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1941,8 +1942,8 @@ CVTS_FUNC(s32, s8, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        int8x8_t vRes = vqmovn_s16(vcombine_s16(vRes1, vRes2));
@ -1985,7 +1986,7 @@ CVTS_FUNC(s32, u16, 8,
 #else
 CVTS_FUNC(s32, u16, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -1998,8 +1999,8 @@ CVTS_FUNC(s32, u16, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        uint16x4_t vRes1 = vqmovun_s32(vline1_s32);
        uint16x4_t vRes2 = vqmovun_s32(vline2_s32);
        vst1q_u16(_dst + i, vcombine_u16(vRes1, vRes2));
@ -2041,7 +2042,7 @@ CVTS_FUNC(s32, s16, 8,
 #else
 CVTS_FUNC(s32, s16, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -2054,8 +2055,8 @@ CVTS_FUNC(s32, s16, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        vst1q_s16(_dst + i, vcombine_s16(vRes1, vRes2));
@ -2097,7 +2098,7 @@ CVTS_FUNC1(s32, 8,
 #else
 CVTS_FUNC1(s32, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -2110,8 +2111,8 @@ CVTS_FUNC1(s32, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        vst1q_s32(_dst + i + 0, vline1_s32);
        vst1q_s32(_dst + i + 4, vline2_s32);
    }
@ -2272,7 +2273,7 @@ CVTS_FUNC(f32, s8, 8,
 #else
 CVTS_FUNC(f32, s8, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -2283,8 +2284,8 @@ CVTS_FUNC(f32, s8, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        int8x8_t vRes = vqmovn_s16(vcombine_s16(vRes1, vRes2));
@ -2325,7 +2326,7 @@ CVTS_FUNC(f32, u16, 8,
 #else
 CVTS_FUNC(f32, u16, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -2336,8 +2337,8 @@ CVTS_FUNC(f32, u16, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        uint32x4_t vline1_u32 = vcvtq_u32_f32(vline1_f32);
-        uint32x4_t vline2_u32 = vcvtq_u32_f32(vline2_f32);
+        uint32x4_t vline1_u32 = internal::vroundq_u32_f32(vline1_f32);
+        uint32x4_t vline2_u32 = internal::vroundq_u32_f32(vline2_f32);
        uint16x4_t vRes1 = vqmovn_u32(vline1_u32);
        uint16x4_t vRes2 = vqmovn_u32(vline2_u32);
        vst1q_u16(_dst + i, vcombine_u16(vRes1, vRes2));
@ -2377,7 +2378,7 @@ CVTS_FUNC(f32, s16, 8,
 #else
 CVTS_FUNC(f32, s16, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -2388,8 +2389,8 @@ CVTS_FUNC(f32, s16, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        int16x4_t vRes1 = vqmovn_s32(vline1_s32);
        int16x4_t vRes2 = vqmovn_s32(vline2_s32);
        vst1q_s16(_dst + i, vcombine_s16(vRes1, vRes2));
@ -2429,7 +2430,7 @@ CVTS_FUNC(f32, s32, 8,
 #else
 CVTS_FUNC(f32, s32, 8,
    float32x4_t vscale = vdupq_n_f32((f32)alpha);
-    float32x4_t vshift = vdupq_n_f32((f32)beta + 0.5f);,
+    float32x4_t vshift = vdupq_n_f32((f32)beta);,
 {
    for (size_t i = 0; i < w; i += 8)
    {
@ -2440,8 +2441,8 @@ CVTS_FUNC(f32, s32, 8,
        vline2_f32 = vmulq_f32(vline2_f32, vscale);
        vline1_f32 = vaddq_f32(vline1_f32, vshift);
        vline2_f32 = vaddq_f32(vline2_f32, vshift);
-        int32x4_t vline1_s32 = vcvtq_s32_f32(vline1_f32);
-        int32x4_t vline2_s32 = vcvtq_s32_f32(vline2_f32);
+        int32x4_t vline1_s32 = internal::vroundq_s32_f32(vline1_f32);
+        int32x4_t vline2_s32 = internal::vroundq_s32_f32(vline2_f32);
        vst1q_s32(_dst + i + 0, vline1_s32);
        vst1q_s32(_dst + i + 4, vline2_s32);
    }
--- a/3rdparty/carotene/src/div.cpp
+++ b/3rdparty/carotene/src/div.cpp
@ -39,6 +39,7 @@

 #include "common.hpp"
 #include "vtransform.hpp"
+#include "vround_helper.hpp"

 #include <cstring>
 #include <cfloat>
@ -51,13 +52,6 @@ namespace {

 #ifdef CAROTENE_NEON

-inline float32x4_t vroundq(const float32x4_t& v)
-{
-    const int32x4_t signMask = vdupq_n_s32(1 << 31), half = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
-    float32x4_t v_addition = vreinterpretq_f32_s32(vorrq_s32(half, vandq_s32(signMask, vreinterpretq_s32_f32(v))));
-    return vaddq_f32(v, v_addition);
-}
-
 template <typename T>
 inline T divSaturateQ(const T &v1, const T &v2, const float scale)
 {
@ -69,17 +63,10 @@ inline T divSaturateQ(const T &v1, const T &v2, const float scale)
 }
 template <>
 inline int32x4_t divSaturateQ<int32x4_t>(const int32x4_t &v1, const int32x4_t &v2, const float scale)
-{ return vcvtq_s32_f32(vroundq(vmulq_f32(vmulq_n_f32(vcvtq_f32_s32(v1), scale), internal::vrecpq_f32(vcvtq_f32_s32(v2))))); }
+{ return internal::vroundq_s32_f32(vmulq_f32(vmulq_n_f32(vcvtq_f32_s32(v1), scale), internal::vrecpq_f32(vcvtq_f32_s32(v2)))); }
 template <>
 inline uint32x4_t divSaturateQ<uint32x4_t>(const uint32x4_t &v1, const uint32x4_t &v2, const float scale)
-{ return vcvtq_u32_f32(vroundq(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2))))); }
-
-inline float32x2_t vround(const float32x2_t& v)
-{
-    const int32x2_t signMask = vdup_n_s32(1 << 31), half = vreinterpret_s32_f32(vdup_n_f32(0.5f));
-    float32x2_t v_addition = vreinterpret_f32_s32(vorr_s32(half, vand_s32(signMask, vreinterpret_s32_f32(v))));
-    return vadd_f32(v, v_addition);
-}
+{ return internal::vroundq_u32_f32(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2)))); }

 template <typename T>
 inline T divSaturate(const T &v1, const T &v2, const float scale)
@ -88,10 +75,10 @@ inline T divSaturate(const T &v1, const T &v2, const float scale)
 }
 template <>
 inline int32x2_t divSaturate<int32x2_t>(const int32x2_t &v1, const int32x2_t &v2, const float scale)
-{ return vcvt_s32_f32(vround(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2))))); }
+{ return internal::vround_s32_f32(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2)))); }
 template <>
 inline uint32x2_t divSaturate<uint32x2_t>(const uint32x2_t &v1, const uint32x2_t &v2, const float scale)
-{ return vcvt_u32_f32(vround(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2))))); }
+{ return internal::vround_u32_f32(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2)))); }


 template <typename T>
@ -157,8 +144,8 @@ void div(const Size2D &size,

    if (scale == 0.0f ||
        (std::numeric_limits<T>::is_integer &&
-         (scale * std::numeric_limits<T>::max()) <  1.0f &&
-         (scale * std::numeric_limits<T>::max()) > -1.0f))
+         (scale * static_cast<float>(std::numeric_limits<T>::max())) <  1.0f &&
+         (scale * static_cast<float>(std::numeric_limits<T>::max())) > -1.0f))
    {
        for (size_t y = 0; y < size.height; ++y)
        {
--- a/3rdparty/carotene/src/phase.cpp
+++ b/3rdparty/carotene/src/phase.cpp
@ -41,6 +41,7 @@
 #include <cmath>

 #include "common.hpp"
+#include "vround_helper.hpp"

 namespace CAROTENE_NS {

@ -121,8 +122,6 @@ void phase(const Size2D &size,
    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;

-    float32x4_t v_05 = vdupq_n_f32(0.5f);
-
    for (size_t i = 0; i < size.height; ++i)
    {
        const s16 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
@ -149,8 +148,8 @@ void phase(const Size2D &size,
            float32x4_t v_dst32f1;
            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f1)

-            uint16x8_t v_dst16s0 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f0, v_05))),
-                                                vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f1, v_05))));
+            uint16x8_t v_dst16s0 = vcombine_u16(vmovn_u32(internal::vroundq_u32_f32(v_dst32f0)),
+                                                vmovn_u32(internal::vroundq_u32_f32(v_dst32f1)));

            // 1
            v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src01)));
@ -161,8 +160,8 @@ void phase(const Size2D &size,
            v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src11)));
            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f1)

-            uint16x8_t v_dst16s1 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f0, v_05))),
-                                                vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f1, v_05))));
+            uint16x8_t v_dst16s1 = vcombine_u16(vmovn_u32(internal::vroundq_u32_f32(v_dst32f0)),
+                                                vmovn_u32(internal::vroundq_u32_f32(v_dst32f1)));

            vst1q_u8(dst + j, vcombine_u8(vmovn_u16(v_dst16s0),
                                          vmovn_u16(v_dst16s1)));
@ -182,8 +181,8 @@ void phase(const Size2D &size,
            float32x4_t v_dst32f1;
            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f1)

-            uint16x8_t v_dst = vcombine_u16(vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f0, v_05))),
-                                            vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f1, v_05))));
+            uint16x8_t v_dst = vcombine_u16(vmovn_u32(internal::vroundq_u32_f32(v_dst32f0)),
+                                            vmovn_u32(internal::vroundq_u32_f32(v_dst32f1)));

            vst1_u8(dst + j, vmovn_u16(v_dst));
        }
--- a/3rdparty/carotene/src/vround_helper.hpp
+++ b/3rdparty/carotene/src/vround_helper.hpp
@ -0,0 +1,102 @@
+/*
+ * By downloading, copying, installing or using the software you agree to this license.
+ * If you do not agree to this license, do not download, install,
+ * copy or use the software.
+ *
+ *
+ *                           License Agreement
+ *                For Open Source Computer Vision Library
+ *                        (3-clause BSD License)
+ *
+ * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
+ * Third party copyrights are property of their respective owners.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *   * Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions and the following disclaimer in the documentation
+ *     and/or other materials provided with the distribution.
+ *
+ *   * Neither the names of the copyright holders nor the names of the contributors
+ *     may be used to endorse or promote products derived from this software
+ *     without specific prior written permission.
+ *
+ * This software is provided by the copyright holders and contributors "as is" and
+ * any express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are disclaimed.
+ * In no event shall copyright holders or contributors be liable for any direct,
+ * indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused
+ * and on any theory of liability, whether in contract, strict liability,
+ * or tort (including negligence or otherwise) arising in any way out of
+ * the use of this software, even if advised of the possibility of such damage.
+ */
+
+#ifndef CAROTENE_SRC_VROUND_HELPER_HPP
+#define CAROTENE_SRC_VROUND_HELPER_HPP
+
+#include "common.hpp"
+#include "vtransform.hpp"
+
+#ifdef CAROTENE_NEON
+
+/**
+ * This helper header is for rounding from float32xN to uin32xN or int32xN to nearest, ties to even.
+ * See https://en.wikipedia.org/wiki/Rounding#Rounding_half_to_even
+ */
+
+// See https://github.com/opencv/opencv/pull/24271#issuecomment-1867318007
+#define CAROTENE_ROUND_DELTA (12582912.0f)
+
+namespace CAROTENE_NS { namespace internal {
+
+inline uint32x4_t vroundq_u32_f32(const float32x4_t val)
+{
+#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+    return vcvtnq_u32_f32(val);
+#else
+    const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
+    return vcvtq_u32_f32(vsubq_f32(vaddq_f32(val, delta), delta));
+#endif
+}
+
+inline uint32x2_t vround_u32_f32(const float32x2_t val)
+{
+#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+    return vcvtn_u32_f32(val);
+#else
+    const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
+    return vcvt_u32_f32(vsub_f32(vadd_f32(val, delta), delta));
+#endif
+}
+
+inline int32x4_t vroundq_s32_f32(const float32x4_t val)
+{
+#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+    return vcvtnq_s32_f32(val);
+#else
+    const float32x4_t delta = vdupq_n_f32(CAROTENE_ROUND_DELTA);
+    return vcvtq_s32_f32(vsubq_f32(vaddq_f32(val, delta), delta));
+#endif
+}
+
+inline int32x2_t vround_s32_f32(const float32x2_t val)
+{
+#if CAROTENE_NEON_ARCH >= 8 /* get ready for ARMv9 */
+    return vcvtn_s32_f32(val);
+#else
+    const float32x2_t delta = vdup_n_f32(CAROTENE_ROUND_DELTA);
+    return vcvt_s32_f32(vsub_f32(vadd_f32(val, delta), delta));
+#endif
+}
+
+} }
+
+#endif // CAROTENE_NEON
+
+#endif
--- a/3rdparty/ffmpeg/ffmpeg.cmake
+++ b/3rdparty/ffmpeg/ffmpeg.cmake
@ -1,8 +1,8 @@
-# Binaries branch name: ffmpeg/4.x_20230622
-# Binaries were created for OpenCV: 61d48dd0f8d1cc1a115d26998705a61478f64a3c
-ocv_update(FFMPEG_BINARIES_COMMIT "7da61f0695eabf8972a2c302bf1632a3d99fb0d5")
-ocv_update(FFMPEG_FILE_HASH_BIN32 "4aaef1456e282e5ef665d65555f47f56")
-ocv_update(FFMPEG_FILE_HASH_BIN64 "38a638851e064c591ce812e27ed43f1f")
+# Binaries branch name: ffmpeg/4.x_20231225
+# Binaries were created for OpenCV: 62f1a7410d5e5e03d6cee5c95549bf61d5ee98db
+ocv_update(FFMPEG_BINARIES_COMMIT "fbac408a47977ee4265f39e7659d33f1dfef5216")
+ocv_update(FFMPEG_FILE_HASH_BIN32 "9b755ecbbade0a5b78332e9b4ef2dd1b")
+ocv_update(FFMPEG_FILE_HASH_BIN64 "cb4db51ee9a423e6168b9d08bee61efc")
 ocv_update(FFMPEG_FILE_HASH_CMAKE "8862c87496e2e8c375965e1277dee1c7")

 function(download_win_ffmpeg script_var)
--- a/3rdparty/libspng/CMakeLists.txt
+++ b/3rdparty/libspng/CMakeLists.txt
@ -23,7 +23,6 @@ if(MSVC)
 endif(MSVC)

 add_library(${SPNG_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${spng_headers} ${spng_sources})
-ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-variable)
 target_link_libraries(${SPNG_LIBRARY} ${ZLIB_LIBRARIES})

 set_target_properties(${SPNG_LIBRARY}
--- a/3rdparty/libspng/LICENSE
+++ b/3rdparty/libspng/LICENSE
@ -1,6 +1,6 @@
 BSD 2-Clause License

-Copyright (c) 2018-2022, Randy <randy408@protonmail.com>
+Copyright (c) 2018-2023, Randy <randy408@protonmail.com>
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
--- a/3rdparty/libspng/spng.c
+++ b/3rdparty/libspng/spng.c
@ -2691,6 +2691,7 @@ static int read_non_idat_chunks(spng_ctx *ctx)
            if(!memcmp(chunk.type, type_exif, 4))
            {
                if(ctx->file.exif) return SPNG_EDUP_EXIF;
+                if(!chunk.length) return SPNG_EEXIF;

                ctx->file.exif = 1;

@ -4999,11 +5000,11 @@ void spng_ctx_free(spng_ctx *ctx)
    spng__free(ctx, ctx->prev_scanline_buf);
    spng__free(ctx, ctx->filtered_scanline_buf);

-    spng_free_fn *free_func = ctx->alloc.free_fn;
+    spng_free_fn *free_fn = ctx->alloc.free_fn;

    memset(ctx, 0, sizeof(spng_ctx));

-    free_func(ctx);
+    free_fn(ctx);
 }

 static int buffer_read_fn(spng_ctx *ctx, void *user, void *data, size_t n)
@ -5743,7 +5744,8 @@ int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp)
    SPNG_SET_CHUNK_BOILERPLATE(iccp);

    if(check_png_keyword(iccp->profile_name)) return SPNG_EICCP_NAME;
-    if(!iccp->profile_len || iccp->profile_len > UINT_MAX) return 1;
+    if(!iccp->profile_len) return SPNG_ECHUNK_SIZE;
+    if(iccp->profile_len > spng_u32max) return SPNG_ECHUNK_STDLEN;

    if(ctx->iccp.profile && !ctx->user.iccp) spng__free(ctx, ctx->iccp.profile);

--- a/3rdparty/libspng/spng.h
+++ b/3rdparty/libspng/spng.h
@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (BSD-2-Clause AND libpng-2.0) */
+/* SPDX-License-Identifier: BSD-2-Clause */
 #ifndef SPNG_H
 #define SPNG_H

@ -28,7 +28,7 @@ extern "C" {

 #define SPNG_VERSION_MAJOR 0
 #define SPNG_VERSION_MINOR 7
-#define SPNG_VERSION_PATCH 3
+#define SPNG_VERSION_PATCH 4

 enum spng_errno
 {
--- a/3rdparty/openexr/IlmImf/ImfConvert.cpp
+++ b/3rdparty/openexr/IlmImf/ImfConvert.cpp
@ -107,7 +107,7 @@ floatToUint (float f)
    if (isNegative (f) || isNan (f))
 	return 0;

-    if (isInfinity (f) || f > UINT_MAX)
+    if (isInfinity (f) || f > (float)UINT_MAX)
 	return UINT_MAX;

    return (unsigned int) f;
--- a/3rdparty/openjpeg/openjp2/ht_dec.c
+++ b/3rdparty/openjpeg/openjp2/ht_dec.c
@ -55,6 +55,10 @@
 #define OPJ_COMPILER_GNUC
 #endif

+#if defined(OPJ_COMPILER_MSVC) && defined(_M_ARM64)
+#include <arm64_neon.h>
+#endif
+
 //************************************************************************/
 /** @brief Displays the error message for disabling the decoding of SPP and
  * MRP passes
@ -71,6 +75,9 @@ OPJ_UINT32 population_count(OPJ_UINT32 val)
 {
 #if defined(OPJ_COMPILER_MSVC) && (defined(_M_IX86) || defined(_M_AMD64))
    return (OPJ_UINT32)__popcnt(val);
+#elif defined(OPJ_COMPILER_MSVC) && defined(_M_ARM64)
+    const __n64 temp = neon_cnt(__uint64ToN64_v(val));
+    return neon_addv8(temp).n8_i8[0];
 #elif (defined OPJ_COMPILER_GNUC)
    return (OPJ_UINT32)__builtin_popcount(val);
 #else
--- a/3rdparty/openjpeg/openjp2/j2k.c
+++ b/3rdparty/openjpeg/openjp2/j2k.c
@ -7796,7 +7796,7 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
                                       image->comps[0].h * image->comps[0].prec) /
                                      ((double)parameters->tcp_rates[parameters->tcp_numlayers - 1] * 8 *
                                       image->comps[0].dx * image->comps[0].dy));
-            if (temp_size > INT_MAX) {
+            if (temp_size > (OPJ_FLOAT32)INT_MAX) {
                parameters->max_cs_size = INT_MAX;
            } else {
                parameters->max_cs_size = (int) floor(temp_size);
--- a/3rdparty/openjpeg/openjp2/tcd.c
+++ b/3rdparty/openjpeg/openjp2/tcd.c
@ -2262,7 +2262,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_decode(opj_tcd_t *p_tcd)
            for (j = 0; j < l_height; ++j) {
                for (i = 0; i < l_width; ++i) {
                    OPJ_FLOAT32 l_value = *((OPJ_FLOAT32 *) l_current_ptr);
-                    if (l_value > INT_MAX) {
+                    if (l_value > (OPJ_FLOAT32)INT_MAX) {
                        *l_current_ptr = l_max;
                    } else if (l_value < INT_MIN) {
                        *l_current_ptr = l_min;
--- a/3rdparty/readme.txt
+++ b/3rdparty/readme.txt
@ -31,7 +31,7 @@ libpng                Portable Network Graphics library.
                      
 libspng               Portable Network Graphics library.
                      The license and copyright notes can be found in libspng/LICENSE.
-                      See libspng home page https://www.libspng.org
+                      See libspng home page https://libspng.org
                      for details and links to the source code
                      
                      WITH_SPNG CMake option must be ON to add libspng support to imgcodecs
@ -41,7 +41,6 @@ libtiff               Tag Image File Format (TIFF) Software
                      Copyright (c) 1991-1997 Silicon Graphics, Inc.
                      See libtiff home page #1 http://www.simplesystems.org/libtiff/
                                            #2 https://libtiff.gitlab.io/libtiff/
-                                            #3 http://libtiff.maptools.org/
                      for details and links to the source code

                      WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs.
--- a/3rdparty/zlib/ChangeLog
+++ b/3rdparty/zlib/ChangeLog
@ -1,6 +1,24 @@

                ChangeLog file for zlib

+Changes in 1.3 (18 Aug 2023)
+- Remove K&R function definitions and zlib2ansi
+- Fix bug in deflateBound() for level 0 and memLevel 9
+- Fix bug when gzungetc() is used immediately after gzopen()
+- Fix bug when using gzflush() with a very small buffer
+- Fix crash when gzsetparams() attempted for transparent write
+- Fix test/example.c to work with FORCE_STORED
+- Rewrite of zran in examples (see zran.c version history)
+- Fix minizip to allow it to open an empty zip file
+- Fix reading disk number start on zip64 files in minizip
+- Fix logic error in minizip argument processing
+- Add minizip testing to Makefile
+- Read multiple bytes instead of byte-by-byte in minizip unzip.c
+- Add memory sanitizer to configure (--memory)
+- Various portability improvements
+- Various documentation improvements
+- Various spelling and typo corrections
+
 Changes in 1.2.13 (13 Oct 2022)
 - Fix configure issue that discarded provided CC definition
 - Correct incorrect inputs provided to the CRC functions
@ -1445,7 +1463,7 @@ Changes in 0.99 (27 Jan 96)
 - fix typo in Make_vms.com (f$trnlnm -> f$getsyi)
 - in fcalloc, normalize pointer if size > 65520 bytes
 - don't use special fcalloc for 32 bit Borland C++
- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
+- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc.
 - use Z_BINARY instead of BINARY
 - document that gzclose after gzdopen will close the file
 - allow "a" as mode in gzopen
--- a/3rdparty/zlib/README
+++ b/3rdparty/zlib/README
@ -1,6 +1,6 @@
 ZLIB DATA COMPRESSION LIBRARY

-zlib 1.2.13 is a general purpose data compression library.  All the code is
+zlib 1.3 is a general purpose data compression library.  All the code is
 thread safe.  The data format used by the zlib library is described by RFCs
 (Request for Comments) 1950 to 1952 in the files
 http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
@ -29,18 +29,17 @@ PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.

 Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
 issue of Dr.  Dobb's Journal; a copy of the article is available at
-http://marknelson.us/1997/01/01/zlib-engine/ .
+https://marknelson.us/posts/1997/01/01/zlib-engine.html .

-The changes made in version 1.2.13 are documented in the file ChangeLog.
+The changes made in version 1.3 are documented in the file ChangeLog.

 Unsupported third party contributions are provided in directory contrib/ .

-zlib is available in Java using the java.util.zip package, documented at
-http://java.sun.com/developer/technicalArticles/Programming/compression/ .
+zlib is available in Java using the java.util.zip package. Follow the API
+Documentation link at: https://docs.oracle.com/search/?q=java.util.zip .

-A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
-at CPAN (Comprehensive Perl Archive Network) sites, including
-http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
+A Perl interface to zlib and bzip2 written by Paul Marquess <pmqs@cpan.org>
+can be found at https://github.com/pmqs/IO-Compress .

 A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
 available in Python 1.5 and later versions, see
@ -64,7 +63,7 @@ Notes for some targets:
 - zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
  when compiled with cc.

- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
+- On Digital Unix 4.0D (formerly OSF/1) on AlphaServer, the cc option -std1 is
  necessary to get gzprintf working correctly. This is done by configure.

 - zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
@ -84,7 +83,7 @@ Acknowledgments:

 Copyright notice:

- (C) 1995-2022 Jean-loup Gailly and Mark Adler
+ (C) 1995-2023 Jean-loup Gailly and Mark Adler

  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the authors be held liable for any damages
--- a/3rdparty/zlib/adler32.c
+++ b/3rdparty/zlib/adler32.c
@ -7,8 +7,6 @@

 #include "zutil.h"

-local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
-
 #define BASE 65521U     /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
@ -60,11 +58,7 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
 #endif

 /* ========================================================================= */
-uLong ZEXPORT adler32_z(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    z_size_t len;
-{
+uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) {
    unsigned long sum2;
    unsigned n;

@ -131,20 +125,12 @@ uLong ZEXPORT adler32_z(adler, buf, len)
 }

 /* ========================================================================= */
-uLong ZEXPORT adler32(adler, buf, len)
-    uLong adler;
-    const Bytef *buf;
-    uInt len;
-{
+uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) {
    return adler32_z(adler, buf, len);
 }

 /* ========================================================================= */
-local uLong adler32_combine_(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off64_t len2;
-{
+local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) {
    unsigned long sum1;
    unsigned long sum2;
    unsigned rem;
@ -169,18 +155,10 @@ local uLong adler32_combine_(adler1, adler2, len2)
 }

 /* ========================================================================= */
-uLong ZEXPORT adler32_combine(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off_t len2;
-{
+uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) {
    return adler32_combine_(adler1, adler2, len2);
 }

-uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
-    uLong adler1;
-    uLong adler2;
-    z_off64_t len2;
-{
+uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) {
    return adler32_combine_(adler1, adler2, len2);
 }
--- a/3rdparty/zlib/compress.c
+++ b/3rdparty/zlib/compress.c
@ -19,13 +19,8 @@
   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
   Z_STREAM_ERROR if the level parameter is invalid.
 */
-int ZEXPORT compress2(dest, destLen, source, sourceLen, level)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-    int level;
-{
+int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source,
+                      uLong sourceLen, int level) {
    z_stream stream;
    int err;
    const uInt max = (uInt)-1;
@ -65,12 +60,8 @@ int ZEXPORT compress2(dest, destLen, source, sourceLen, level)

 /* ===========================================================================
 */
-int ZEXPORT compress(dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
+int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source,
+                     uLong sourceLen) {
    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
 }

@ -78,9 +69,7 @@ int ZEXPORT compress(dest, destLen, source, sourceLen)
     If the default memLevel or windowBits for deflateInit() is changed, then
   this function needs to be updated.
 */
-uLong ZEXPORT compressBound(sourceLen)
-    uLong sourceLen;
-{
+uLong ZEXPORT compressBound(uLong sourceLen) {
    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
           (sourceLen >> 25) + 13;
 }
--- a/3rdparty/zlib/crc32.c
+++ b/3rdparty/zlib/crc32.c
@ -103,19 +103,6 @@
 #  define ARMCRC32
 #endif

-/* Local functions. */
-local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
-local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
-
-#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
-    local z_word_t byte_swap OF((z_word_t word));
-#endif
-
-#if defined(W) && !defined(ARMCRC32)
-    local z_crc_t crc_word OF((z_word_t data));
-    local z_word_t crc_word_big OF((z_word_t data));
-#endif
-
 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
 /*
  Swap the bytes in a z_word_t to convert between little and big endian. Any
@ -123,9 +110,7 @@ local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
  instruction, if one is available. This assumes that word_t is either 32 bits
  or 64 bits.
 */
-local z_word_t byte_swap(word)
-    z_word_t word;
-{
+local z_word_t byte_swap(z_word_t word) {
 #  if W == 8
    return
        (word & 0xff00000000000000) >> 56 |
@ -146,24 +131,77 @@ local z_word_t byte_swap(word)
 }
 #endif

+#ifdef DYNAMIC_CRC_TABLE
+/* =========================================================================
+ * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
+ * below.
+ */
+   local z_crc_t FAR x2n_table[32];
+#else
+/* =========================================================================
+ * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
+ * of x for combining CRC-32s, all made by make_crc_table().
+ */
+#  include "crc32.h"
+#endif
+
 /* CRC polynomial. */
 #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */

-#ifdef DYNAMIC_CRC_TABLE
+/*
+  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
+  reflected. For speed, this requires that a not be zero.
+ */
+local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
+    z_crc_t m, p;
+
+    m = (z_crc_t)1 << 31;
+    p = 0;
+    for (;;) {
+        if (a & m) {
+            p ^= b;
+            if ((a & (m - 1)) == 0)
+                break;
+        }
+        m >>= 1;
+        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
+    }
+    return p;
+}

+/*
+  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
+  initialized.
+ */
+local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
+    z_crc_t p;
+
+    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
+    while (n) {
+        if (n & 1)
+            p = multmodp(x2n_table[k & 31], p);
+        n >>= 1;
+        k++;
+    }
+    return p;
+}
+
+#ifdef DYNAMIC_CRC_TABLE
+/* =========================================================================
+ * Build the tables for byte-wise and braided CRC-32 calculations, and a table
+ * of powers of x for combining CRC-32s.
+ */
 local z_crc_t FAR crc_table[256];
-local z_crc_t FAR x2n_table[32];
-local void make_crc_table OF((void));
 #ifdef W
   local z_word_t FAR crc_big_table[256];
   local z_crc_t FAR crc_braid_table[W][256];
   local z_word_t FAR crc_braid_big_table[W][256];
-   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
+   local void braid(z_crc_t [][256], z_word_t [][256], int, int);
 #endif
 #ifdef MAKECRCH
-   local void write_table OF((FILE *, const z_crc_t FAR *, int));
-   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
-   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
+   local void write_table(FILE *, const z_crc_t FAR *, int);
+   local void write_table32hi(FILE *, const z_word_t FAR *, int);
+   local void write_table64(FILE *, const z_word_t FAR *, int);
 #endif /* MAKECRCH */

 /*
@ -176,7 +214,6 @@ local void make_crc_table OF((void));

 /* Definition of once functionality. */
 typedef struct once_s once_t;
-local void once OF((once_t *, void (*)(void)));

 /* Check for the availability of atomics. */
 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
@ -196,10 +233,7 @@ struct once_s {
  invoke once() at the same time. The state must be a once_t initialized with
  ONCE_INIT.
 */
-local void once(state, init)
-    once_t *state;
-    void (*init)(void);
-{
+local void once(once_t *state, void (*init)(void)) {
    if (!atomic_load(&state->done)) {
        if (atomic_flag_test_and_set(&state->begun))
            while (!atomic_load(&state->done))
@ -222,10 +256,7 @@ struct once_s {

 /* Test and set. Alas, not atomic, but tries to minimize the period of
   vulnerability. */
-local int test_and_set OF((int volatile *));
-local int test_and_set(flag)
-    int volatile *flag;
-{
+local int test_and_set(int volatile *flag) {
    int was;

    was = *flag;
@ -234,10 +265,7 @@ local int test_and_set(flag)
 }

 /* Run the provided init() function once. This is not thread-safe. */
-local void once(state, init)
-    once_t *state;
-    void (*init)(void);
-{
+local void once(once_t *state, void (*init)(void)) {
    if (!state->done) {
        if (test_and_set(&state->begun))
            while (!state->done)
@ -279,8 +307,7 @@ local once_t made = ONCE_INIT;
  combinations of CRC register values and incoming bytes.
 */

-local void make_crc_table()
-{
+local void make_crc_table(void) {
    unsigned i, j, n;
    z_crc_t p;

@ -447,11 +474,7 @@ local void make_crc_table()
   Write the 32-bit values in table[0..k-1] to out, five per line in
   hexadecimal separated by commas.
 */
-local void write_table(out, table, k)
-    FILE *out;
-    const z_crc_t FAR *table;
-    int k;
-{
+local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
    int n;

    for (n = 0; n < k; n++)
@ -464,11 +487,7 @@ local void write_table(out, table, k)
   Write the high 32-bits of each value in table[0..k-1] to out, five per line
   in hexadecimal separated by commas.
 */
-local void write_table32hi(out, table, k)
-FILE *out;
-const z_word_t FAR *table;
-int k;
-{
+local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
    int n;

    for (n = 0; n < k; n++)
@ -484,11 +503,7 @@ int k;
  bits. If not, then the type cast and format string can be adjusted
  accordingly.
 */
-local void write_table64(out, table, k)
-    FILE *out;
-    const z_word_t FAR *table;
-    int k;
-{
+local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
    int n;

    for (n = 0; n < k; n++)
@ -498,8 +513,7 @@ local void write_table64(out, table, k)
 }

 /* Actually do the deed. */
-int main()
-{
+int main(void) {
    make_crc_table();
    return 0;
 }
@ -511,12 +525,7 @@ int main()
  Generate the little and big-endian braid tables for the given n and z_word_t
  size w. Each array must have room for w blocks of 256 elements.
 */
-local void braid(ltl, big, n, w)
-    z_crc_t ltl[][256];
-    z_word_t big[][256];
-    int n;
-    int w;
-{
+local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
    int k;
    z_crc_t i, p, q;
    for (k = 0; k < w; k++) {
@ -531,69 +540,13 @@ local void braid(ltl, big, n, w)
 }
 #endif

-#else /* !DYNAMIC_CRC_TABLE */
-/* ========================================================================
- * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
- * of x for combining CRC-32s, all made by make_crc_table().
- */
-#include "crc32.h"
 #endif /* DYNAMIC_CRC_TABLE */

-/* ========================================================================
- * Routines used for CRC calculation. Some are also required for the table
- * generation above.
- */
-
-/*
-  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
-  reflected. For speed, this requires that a not be zero.
- */
-local z_crc_t multmodp(a, b)
-    z_crc_t a;
-    z_crc_t b;
-{
-    z_crc_t m, p;
-
-    m = (z_crc_t)1 << 31;
-    p = 0;
-    for (;;) {
-        if (a & m) {
-            p ^= b;
-            if ((a & (m - 1)) == 0)
-                break;
-        }
-        m >>= 1;
-        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
-    }
-    return p;
-}
-
-/*
-  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
-  initialized.
- */
-local z_crc_t x2nmodp(n, k)
-    z_off64_t n;
-    unsigned k;
-{
-    z_crc_t p;
-
-    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
-    while (n) {
-        if (n & 1)
-            p = multmodp(x2n_table[k & 31], p);
-        n >>= 1;
-        k++;
-    }
-    return p;
-}
-
 /* =========================================================================
 * This function can be used by asm versions of crc32(), and to force the
 * generation of the CRC tables in a threaded application.
 */
-const z_crc_t FAR * ZEXPORT get_crc_table()
-{
+const z_crc_t FAR * ZEXPORT get_crc_table(void) {
 #ifdef DYNAMIC_CRC_TABLE
    once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
@ -619,11 +572,8 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
 #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
 #define Z_BATCH_MIN 800             /* fewest words in a final batch */

-unsigned long ZEXPORT crc32_z(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
+unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
+                              z_size_t len) {
    z_crc_t val;
    z_word_t crc1, crc2;
    const z_word_t *word;
@ -723,18 +673,14 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
  least-significant byte of the word as the first byte of data, without any pre
  or post conditioning. This is used to combine the CRCs of each braid.
 */
-local z_crc_t crc_word(data)
-    z_word_t data;
-{
+local z_crc_t crc_word(z_word_t data) {
    int k;
    for (k = 0; k < W; k++)
        data = (data >> 8) ^ crc_table[data & 0xff];
    return (z_crc_t)data;
 }

-local z_word_t crc_word_big(data)
-    z_word_t data;
-{
+local z_word_t crc_word_big(z_word_t data) {
    int k;
    for (k = 0; k < W; k++)
        data = (data << 8) ^
@ -745,11 +691,8 @@ local z_word_t crc_word_big(data)
 #endif

 /* ========================================================================= */
-unsigned long ZEXPORT crc32_z(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    z_size_t len;
-{
+unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
+                              z_size_t len) {
    /* Return initial CRC, if requested. */
    if (buf == Z_NULL) return 0;

@ -781,8 +724,8 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
        words = (z_word_t const *)buf;

        /* Do endian check at execution time instead of compile time, since ARM
-           processors can change the endianess at execution time. If the
-           compiler knows what the endianess will be, it can optimize out the
+           processors can change the endianness at execution time. If the
+           compiler knows what the endianness will be, it can optimize out the
           check and the unused branch. */
        endian = 1;
        if (*(unsigned char *)&endian) {
@ -1069,20 +1012,13 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
 #endif

 /* ========================================================================= */
-unsigned long ZEXPORT crc32(crc, buf, len)
-    unsigned long crc;
-    const unsigned char FAR *buf;
-    uInt len;
-{
+unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
+                            uInt len) {
    return crc32_z(crc, buf, len);
 }

 /* ========================================================================= */
-uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off64_t len2;
-{
+uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
 #ifdef DYNAMIC_CRC_TABLE
    once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
@ -1090,18 +1026,12 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
 }

 /* ========================================================================= */
-uLong ZEXPORT crc32_combine(crc1, crc2, len2)
-    uLong crc1;
-    uLong crc2;
-    z_off_t len2;
-{
+uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
    return crc32_combine64(crc1, crc2, (z_off64_t)len2);
 }

 /* ========================================================================= */
-uLong ZEXPORT crc32_combine_gen64(len2)
-    z_off64_t len2;
-{
+uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
 #ifdef DYNAMIC_CRC_TABLE
    once(&made, make_crc_table);
 #endif /* DYNAMIC_CRC_TABLE */
@ -1109,17 +1039,11 @@ uLong ZEXPORT crc32_combine_gen64(len2)
 }

 /* ========================================================================= */
-uLong ZEXPORT crc32_combine_gen(len2)
-    z_off_t len2;
-{
+uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
    return crc32_combine_gen64((z_off64_t)len2);
 }

 /* ========================================================================= */
-uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
-    uLong crc1;
-    uLong crc2;
-    uLong op;
-{
+uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
    return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
 }
--- a/3rdparty/zlib/deflate.c
+++ b/3rdparty/zlib/deflate.c
@ -1,5 +1,5 @@
 /* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
+ * Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -52,7 +52,7 @@
 #include "deflate.h"

 const char deflate_copyright[] =
-   " deflate 1.2.13 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
+   " deflate 1.3 Copyright 1995-2023 Jean-loup Gailly and Mark Adler ";
 /*
  If you use the zlib library in a product, an acknowledgment is welcome
  in the documentation of your product. If for some reason you cannot
@ -60,9 +60,6 @@ const char deflate_copyright[] =
  copyright string in the executable of your product.
 */

-/* ===========================================================================
- *  Function prototypes.
- */
 typedef enum {
    need_more,      /* block not completed, need more input or more output */
    block_done,     /* block flush performed */
@ -70,29 +67,16 @@ typedef enum {
    finish_done     /* finish done, accept no more input or output */
 } block_state;

-typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+typedef block_state (*compress_func)(deflate_state *s, int flush);
 /* Compression function. Returns the block state after the call. */

-local int deflateStateCheck      OF((z_streamp strm));
-local void slide_hash     OF((deflate_state *s));
-local void fill_window    OF((deflate_state *s));
-local block_state deflate_stored OF((deflate_state *s, int flush));
-local block_state deflate_fast   OF((deflate_state *s, int flush));
+local block_state deflate_stored(deflate_state *s, int flush);
+local block_state deflate_fast(deflate_state *s, int flush);
 #ifndef FASTEST
-local block_state deflate_slow   OF((deflate_state *s, int flush));
-#endif
-local block_state deflate_rle    OF((deflate_state *s, int flush));
-local block_state deflate_huff   OF((deflate_state *s, int flush));
-local void lm_init        OF((deflate_state *s));
-local void putShortMSB    OF((deflate_state *s, uInt b));
-local void flush_pending  OF((z_streamp strm));
-local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
-local uInt longest_match  OF((deflate_state *s, IPos cur_match));
-
-#ifdef ZLIB_DEBUG
-local  void check_match OF((deflate_state *s, IPos start, IPos match,
-                            int length));
+local block_state deflate_slow(deflate_state *s, int flush);
 #endif
+local block_state deflate_rle(deflate_state *s, int flush);
+local block_state deflate_huff(deflate_state *s, int flush);

 /* ===========================================================================
 * Local data
@ -195,9 +179,12 @@ local const config configuration_table[10] = {
 * bit values at the expense of memory usage). We slide even when level == 0 to
 * keep the hash table consistent if we switch back to level > 0 later.
 */
-local void slide_hash(s)
-    deflate_state *s;
-{
+#if defined(__has_feature)
+#  if __has_feature(memory_sanitizer)
+     __attribute__((no_sanitize("memory")))
+#  endif
+#endif
+local void slide_hash(deflate_state *s) {
    unsigned n, m;
    Posf *p;
    uInt wsize = s->w_size;
@ -221,30 +208,177 @@ local void slide_hash(s)
 #endif
 }

+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local unsigned read_buf(z_streamp strm, Bytef *buf, unsigned size) {
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    zmemcpy(buf, strm->next_in, len);
+    if (strm->state->wrap == 1) {
+        strm->adler = adler32(strm->adler, buf, len);
+    }
+#ifdef GZIP
+    else if (strm->state->wrap == 2) {
+        strm->adler = crc32(strm->adler, buf, len);
+    }
+#endif
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return len;
+}
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(deflate_state *s) {
+    unsigned n;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize + MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+            if (s->insert > s->strstart)
+                s->insert = s->strstart;
+            slide_hash(s);
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) break;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead + s->insert >= MIN_MATCH) {
+            uInt str = s->strstart - s->insert;
+            s->ins_h = s->window[str];
+            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+            while (s->insert) {
+                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+                s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+                s->head[s->ins_h] = (Pos)str;
+                str++;
+                s->insert--;
+                if (s->lookahead + s->insert < MIN_MATCH)
+                    break;
+            }
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+    /* If the WIN_INIT bytes after the end of the current data have never been
+     * written, then zero those bytes in order to avoid memory check reports of
+     * the use of uninitialized (or uninitialised as Julian writes) bytes by
+     * the longest match routines.  Update the high water mark for the next
+     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
+     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
+     */
+    if (s->high_water < s->window_size) {
+        ulg curr = s->strstart + (ulg)(s->lookahead);
+        ulg init;
+
+        if (s->high_water < curr) {
+            /* Previous high water mark below current data -- zero WIN_INIT
+             * bytes or up to end of window, whichever is less.
+             */
+            init = s->window_size - curr;
+            if (init > WIN_INIT)
+                init = WIN_INIT;
+            zmemzero(s->window + curr, (unsigned)init);
+            s->high_water = curr + init;
+        }
+        else if (s->high_water < (ulg)curr + WIN_INIT) {
+            /* High water mark at or above current data, but below current data
+             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+             * to end of window, whichever is less.
+             */
+            init = (ulg)curr + WIN_INIT - s->high_water;
+            if (init > s->window_size - s->high_water)
+                init = s->window_size - s->high_water;
+            zmemzero(s->window + s->high_water, (unsigned)init);
+            s->high_water += init;
+        }
+    }
+
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "not enough room for search");
+}
+
 /* ========================================================================= */
-int ZEXPORT deflateInit_(strm, level, version, stream_size)
-    z_streamp strm;
-    int level;
-    const char *version;
-    int stream_size;
-{
+int ZEXPORT deflateInit_(z_streamp strm, int level, const char *version,
+                         int stream_size) {
    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
                         Z_DEFAULT_STRATEGY, version, stream_size);
    /* To do: ignore strm->next_in if we use it as window */
 }

 /* ========================================================================= */
-int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
-                  version, stream_size)
-    z_streamp strm;
-    int  level;
-    int  method;
-    int  windowBits;
-    int  memLevel;
-    int  strategy;
-    const char *version;
-    int stream_size;
-{
+int ZEXPORT deflateInit2_(z_streamp strm, int level, int method,
+                          int windowBits, int memLevel, int strategy,
+                          const char *version, int stream_size) {
    deflate_state *s;
    int wrap = 1;
    static const char my_version[] = ZLIB_VERSION;
@ -386,9 +520,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
 /* =========================================================================
 * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
 */
-local int deflateStateCheck(strm)
-    z_streamp strm;
-{
+local int deflateStateCheck(z_streamp strm) {
    deflate_state *s;
    if (strm == Z_NULL ||
        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
@ -409,11 +541,8 @@ local int deflateStateCheck(strm)
 }

 /* ========================================================================= */
-int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength)
-    z_streamp strm;
-    const Bytef *dictionary;
-    uInt  dictLength;
-{
+int ZEXPORT deflateSetDictionary(z_streamp strm, const Bytef *dictionary,
+                                 uInt  dictLength) {
    deflate_state *s;
    uInt str, n;
    int wrap;
@ -478,11 +607,8 @@ int ZEXPORT deflateSetDictionary(strm, dictionary, dictLength)
 }

 /* ========================================================================= */
-int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength)
-    z_streamp strm;
-    Bytef *dictionary;
-    uInt  *dictLength;
-{
+int ZEXPORT deflateGetDictionary(z_streamp strm, Bytef *dictionary,
+                                 uInt *dictLength) {
    deflate_state *s;
    uInt len;

@ -500,9 +626,7 @@ int ZEXPORT deflateGetDictionary(strm, dictionary, dictLength)
 }

 /* ========================================================================= */
-int ZEXPORT deflateResetKeep(strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateResetKeep(z_streamp strm) {
    deflate_state *s;

    if (deflateStateCheck(strm)) {
@ -537,10 +661,32 @@ int ZEXPORT deflateResetKeep(strm)
    return Z_OK;
 }

+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init(deflate_state *s) {
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->insert = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+}
+
 /* ========================================================================= */
-int ZEXPORT deflateReset(strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateReset(z_streamp strm) {
    int ret;

    ret = deflateResetKeep(strm);
@ -550,10 +696,7 @@ int ZEXPORT deflateReset(strm)
 }

 /* ========================================================================= */
-int ZEXPORT deflateSetHeader(strm, head)
-    z_streamp strm;
-    gz_headerp head;
-{
+int ZEXPORT deflateSetHeader(z_streamp strm, gz_headerp head) {
    if (deflateStateCheck(strm) || strm->state->wrap != 2)
        return Z_STREAM_ERROR;
    strm->state->gzhead = head;
@ -561,11 +704,7 @@ int ZEXPORT deflateSetHeader(strm, head)
 }

 /* ========================================================================= */
-int ZEXPORT deflatePending(strm, pending, bits)
-    unsigned *pending;
-    int *bits;
-    z_streamp strm;
-{
+int ZEXPORT deflatePending(z_streamp strm, unsigned *pending, int *bits) {
    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
    if (pending != Z_NULL)
        *pending = strm->state->pending;
@ -575,11 +714,7 @@ int ZEXPORT deflatePending(strm, pending, bits)
 }

 /* ========================================================================= */
-int ZEXPORT deflatePrime(strm, bits, value)
-    z_streamp strm;
-    int bits;
-    int value;
-{
+int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) {
    deflate_state *s;
    int put;

@ -602,11 +737,7 @@ int ZEXPORT deflatePrime(strm, bits, value)
 }

 /* ========================================================================= */
-int ZEXPORT deflateParams(strm, level, strategy)
-    z_streamp strm;
-    int level;
-    int strategy;
-{
+int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) {
    deflate_state *s;
    compress_func func;

@ -651,13 +782,8 @@ int ZEXPORT deflateParams(strm, level, strategy)
 }

 /* ========================================================================= */
-int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
-    z_streamp strm;
-    int good_length;
-    int max_lazy;
-    int nice_length;
-    int max_chain;
-{
+int ZEXPORT deflateTune(z_streamp strm, int good_length, int max_lazy,
+                        int nice_length, int max_chain) {
    deflate_state *s;

    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -693,10 +819,7 @@ int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
 *
 * Shifts are used to approximate divisions, for speed.
 */
-uLong ZEXPORT deflateBound(strm, sourceLen)
-    z_streamp strm;
-    uLong sourceLen;
-{
+uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) {
    deflate_state *s;
    uLong fixedlen, storelen, wraplen;

@ -752,7 +875,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen)

    /* if not default parameters, return one of the conservative bounds */
    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
-        return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen;
+        return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) +
+               wraplen;

    /* default settings: return tight bound for that case -- ~0.03% overhead
       plus a small constant */
@ -765,10 +889,7 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
 * IN assertion: the stream state is correct and there is enough room in
 * pending_buf.
 */
-local void putShortMSB(s, b)
-    deflate_state *s;
-    uInt b;
-{
+local void putShortMSB(deflate_state *s, uInt b) {
    put_byte(s, (Byte)(b >> 8));
    put_byte(s, (Byte)(b & 0xff));
 }
@ -779,9 +900,7 @@ local void putShortMSB(s, b)
 * applications may wish to modify it to avoid allocating a large
 * strm->next_out buffer and copying into it. (See also read_buf()).
 */
-local void flush_pending(strm)
-    z_streamp strm;
-{
+local void flush_pending(z_streamp strm) {
    unsigned len;
    deflate_state *s = strm->state;

@ -812,10 +931,7 @@ local void flush_pending(strm)
    } while (0)

 /* ========================================================================= */
-int ZEXPORT deflate(strm, flush)
-    z_streamp strm;
-    int flush;
-{
+int ZEXPORT deflate(z_streamp strm, int flush) {
    int old_flush; /* value of flush param for previous deflate call */
    deflate_state *s;

@ -1127,9 +1243,7 @@ int ZEXPORT deflate(strm, flush)
 }

 /* ========================================================================= */
-int ZEXPORT deflateEnd(strm)
-    z_streamp strm;
-{
+int ZEXPORT deflateEnd(z_streamp strm) {
    int status;

    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -1153,11 +1267,10 @@ int ZEXPORT deflateEnd(strm)
 * To simplify the source, this is not supported for 16-bit MSDOS (which
 * doesn't have enough memory anyway to duplicate compression states).
 */
-int ZEXPORT deflateCopy(dest, source)
-    z_streamp dest;
-    z_streamp source;
-{
+int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) {
 #ifdef MAXSEG_64K
+    (void)dest;
+    (void)source;
    return Z_STREAM_ERROR;
 #else
    deflate_state *ds;
@ -1205,66 +1318,6 @@ int ZEXPORT deflateCopy(dest, source)
 #endif /* MAXSEG_64K */
 }

-/* ===========================================================================
- * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read.  All deflate() input goes through
- * this function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it.
- * (See also flush_pending()).
- */
-local unsigned read_buf(strm, buf, size)
-    z_streamp strm;
-    Bytef *buf;
-    unsigned size;
-{
-    unsigned len = strm->avail_in;
-
-    if (len > size) len = size;
-    if (len == 0) return 0;
-
-    strm->avail_in  -= len;
-
-    zmemcpy(buf, strm->next_in, len);
-    if (strm->state->wrap == 1) {
-        strm->adler = adler32(strm->adler, buf, len);
-    }
-#ifdef GZIP
-    else if (strm->state->wrap == 2) {
-        strm->adler = crc32(strm->adler, buf, len);
-    }
-#endif
-    strm->next_in  += len;
-    strm->total_in += len;
-
-    return len;
-}
-
-/* ===========================================================================
- * Initialize the "longest match" routines for a new zlib stream
- */
-local void lm_init(s)
-    deflate_state *s;
-{
-    s->window_size = (ulg)2L*s->w_size;
-
-    CLEAR_HASH(s);
-
-    /* Set the default configuration parameters:
-     */
-    s->max_lazy_match   = configuration_table[s->level].max_lazy;
-    s->good_match       = configuration_table[s->level].good_length;
-    s->nice_match       = configuration_table[s->level].nice_length;
-    s->max_chain_length = configuration_table[s->level].max_chain;
-
-    s->strstart = 0;
-    s->block_start = 0L;
-    s->lookahead = 0;
-    s->insert = 0;
-    s->match_length = s->prev_length = MIN_MATCH-1;
-    s->match_available = 0;
-    s->ins_h = 0;
-}
-
 #ifndef FASTEST
 /* ===========================================================================
 * Set match_start to the longest match starting at the given string and
@ -1275,10 +1328,7 @@ local void lm_init(s)
 *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
 * OUT assertion: the match length is not greater than s->lookahead.
 */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
+local uInt longest_match(deflate_state *s, IPos cur_match) {
    unsigned chain_length = s->max_chain_length;/* max hash chain length */
    register Bytef *scan = s->window + s->strstart; /* current string */
    register Bytef *match;                      /* matched string */
@ -1426,10 +1476,7 @@ local uInt longest_match(s, cur_match)
 /* ---------------------------------------------------------------------------
 * Optimized version for FASTEST only
 */
-local uInt longest_match(s, cur_match)
-    deflate_state *s;
-    IPos cur_match;                             /* current match */
-{
+local uInt longest_match(deflate_state *s, IPos cur_match) {
    register Bytef *scan = s->window + s->strstart; /* current string */
    register Bytef *match;                       /* matched string */
    register int len;                           /* length of current match */
@ -1490,11 +1537,7 @@ local uInt longest_match(s, cur_match)
 /* ===========================================================================
 * Check that the match at match_start is indeed a match.
 */
-local void check_match(s, start, match, length)
-    deflate_state *s;
-    IPos start, match;
-    int length;
-{
+local void check_match(deflate_state *s, IPos start, IPos match, int length) {
    /* check that the match is indeed a match */
    if (zmemcmp(s->window + match,
                s->window + start, length) != EQUAL) {
@ -1514,137 +1557,6 @@ local void check_match(s, start, match, length)
 #  define check_match(s, start, match, length)
 #endif /* ZLIB_DEBUG */

-/* ===========================================================================
- * Fill the window when the lookahead becomes insufficient.
- * Updates strstart and lookahead.
- *
- * IN assertion: lookahead < MIN_LOOKAHEAD
- * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
- *    At least one byte has been read, or avail_in == 0; reads are
- *    performed for at least two bytes (required for the zip translate_eol
- *    option -- not supported here).
- */
-local void fill_window(s)
-    deflate_state *s;
-{
-    unsigned n;
-    unsigned more;    /* Amount of free space at the end of the window. */
-    uInt wsize = s->w_size;
-
-    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
-    do {
-        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
-
-        /* Deal with !@#$% 64K limit: */
-        if (sizeof(int) <= 2) {
-            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-                more = wsize;
-
-            } else if (more == (unsigned)(-1)) {
-                /* Very unlikely, but possible on 16 bit machine if
-                 * strstart == 0 && lookahead == 1 (input done a byte at time)
-                 */
-                more--;
-            }
-        }
-
-        /* If the window is almost full and there is insufficient lookahead,
-         * move the upper half to the lower one to make room in the upper half.
-         */
-        if (s->strstart >= wsize + MAX_DIST(s)) {
-
-            zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more);
-            s->match_start -= wsize;
-            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
-            s->block_start -= (long) wsize;
-            if (s->insert > s->strstart)
-                s->insert = s->strstart;
-            slide_hash(s);
-            more += wsize;
-        }
-        if (s->strm->avail_in == 0) break;
-
-        /* If there was no sliding:
-         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-         *    more == window_size - lookahead - strstart
-         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-         * => more >= window_size - 2*WSIZE + 2
-         * In the BIG_MEM or MMAP case (not yet supported),
-         *   window_size == input_size + MIN_LOOKAHEAD  &&
-         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-         * Otherwise, window_size == 2*WSIZE so more >= 2.
-         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-         */
-        Assert(more >= 2, "more < 2");
-
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-        s->lookahead += n;
-
-        /* Initialize the hash value now that we have some input: */
-        if (s->lookahead + s->insert >= MIN_MATCH) {
-            uInt str = s->strstart - s->insert;
-            s->ins_h = s->window[str];
-            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
-#if MIN_MATCH != 3
-            Call UPDATE_HASH() MIN_MATCH-3 more times
-#endif
-            while (s->insert) {
-                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
-#ifndef FASTEST
-                s->prev[str & s->w_mask] = s->head[s->ins_h];
-#endif
-                s->head[s->ins_h] = (Pos)str;
-                str++;
-                s->insert--;
-                if (s->lookahead + s->insert < MIN_MATCH)
-                    break;
-            }
-        }
-        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-         * but this is not important since only literal bytes will be emitted.
-         */
-
-    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
-    /* If the WIN_INIT bytes after the end of the current data have never been
-     * written, then zero those bytes in order to avoid memory check reports of
-     * the use of uninitialized (or uninitialised as Julian writes) bytes by
-     * the longest match routines.  Update the high water mark for the next
-     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
-     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
-     */
-    if (s->high_water < s->window_size) {
-        ulg curr = s->strstart + (ulg)(s->lookahead);
-        ulg init;
-
-        if (s->high_water < curr) {
-            /* Previous high water mark below current data -- zero WIN_INIT
-             * bytes or up to end of window, whichever is less.
-             */
-            init = s->window_size - curr;
-            if (init > WIN_INIT)
-                init = WIN_INIT;
-            zmemzero(s->window + curr, (unsigned)init);
-            s->high_water = curr + init;
-        }
-        else if (s->high_water < (ulg)curr + WIN_INIT) {
-            /* High water mark at or above current data, but below current data
-             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
-             * to end of window, whichever is less.
-             */
-            init = (ulg)curr + WIN_INIT - s->high_water;
-            if (init > s->window_size - s->high_water)
-                init = s->window_size - s->high_water;
-            zmemzero(s->window + s->high_water, (unsigned)init);
-            s->high_water += init;
-        }
-    }
-
-    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
-           "not enough room for search");
-}
-
 /* ===========================================================================
 * Flush the current block, with given end-of-file flag.
 * IN assertion: strstart is set to the end of the current match.
@ -1687,10 +1599,7 @@ local void fill_window(s)
 * copied. It is most efficient with large input and output buffers, which
 * maximizes the opportunities to have a single copy from next_in to next_out.
 */
-local block_state deflate_stored(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_stored(deflate_state *s, int flush) {
    /* Smallest worthy block size when not flushing or finishing. By default
     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
     * large input and output buffers, the stored block size will be larger.
@ -1874,10 +1783,7 @@ local block_state deflate_stored(s, flush)
 * new strings in the dictionary only for unmatched strings or for short
 * matches. It is used only for the fast compression options.
 */
-local block_state deflate_fast(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_fast(deflate_state *s, int flush) {
    IPos hash_head;       /* head of the hash chain */
    int bflush;           /* set if current block must be flushed */

@ -1976,10 +1882,7 @@ local block_state deflate_fast(s, flush)
 * evaluation for matches: a match is finally adopted only if there is
 * no better match at the next window position.
 */
-local block_state deflate_slow(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_slow(deflate_state *s, int flush) {
    IPos hash_head;          /* head of hash chain */
    int bflush;              /* set if current block must be flushed */

@ -2107,10 +2010,7 @@ local block_state deflate_slow(s, flush)
 * one.  Do not maintain a hash table.  (It will be regenerated if this run of
 * deflate switches away from Z_RLE.)
 */
-local block_state deflate_rle(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_rle(deflate_state *s, int flush) {
    int bflush;             /* set if current block must be flushed */
    uInt prev;              /* byte at distance one to match */
    Bytef *scan, *strend;   /* scan goes up to strend for length of run */
@ -2181,10 +2081,7 @@ local block_state deflate_rle(s, flush)
 * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
 * (It will be regenerated if this run of deflate switches away from Huffman.)
 */
-local block_state deflate_huff(s, flush)
-    deflate_state *s;
-    int flush;
-{
+local block_state deflate_huff(deflate_state *s, int flush) {
    int bflush;             /* set if current block must be flushed */

    for (;;) {
--- a/3rdparty/zlib/deflate.h
+++ b/3rdparty/zlib/deflate.h
@ -291,14 +291,14 @@ typedef struct internal_state {
   memory checker errors from longest match routines */

        /* in trees.c */
-void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
-int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
-void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
-                        ulg stored_len, int last));
-void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
-void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
-void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
-                        ulg stored_len, int last));
+void ZLIB_INTERNAL _tr_init(deflate_state *s);
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+                                   ulg stored_len, int last);
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
+void ZLIB_INTERNAL _tr_align(deflate_state *s);
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+                                    ulg stored_len, int last);

 #define d_code(dist) \
   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
--- a/3rdparty/zlib/gzclose.c
+++ b/3rdparty/zlib/gzclose.c
@ -8,9 +8,7 @@
 /* gzclose() is in a separate file so that it is linked in only if it is used.
   That way the other gzclose functions can be used instead to avoid linking in
   unneeded compression or decompression routines. */
-int ZEXPORT gzclose(file)
-    gzFile file;
-{
+int ZEXPORT gzclose(gzFile file) {
 #ifndef NO_GZCOMPRESS
    gz_statep state;

--- a/3rdparty/zlib/gzguts.h
+++ b/3rdparty/zlib/gzguts.h
@ -7,9 +7,8 @@
 #  ifndef _LARGEFILE_SOURCE
 #    define _LARGEFILE_SOURCE 1
 #  endif
-#  ifdef _FILE_OFFSET_BITS
-#    undef _FILE_OFFSET_BITS
-#  endif
+#  undef _FILE_OFFSET_BITS
+#  undef _TIME_BITS
 #endif

 #ifdef HAVE_HIDDEN
@ -119,8 +118,8 @@

 /* gz* functions always use library allocation functions */
 #ifndef STDC
-  extern voidp  malloc OF((uInt size));
-  extern void   free   OF((voidpf ptr));
+  extern voidp  malloc(uInt size);
+  extern void   free(voidpf ptr);
 #endif

 /* get errno and strerror definition */
@ -138,10 +137,10 @@

 /* provide prototypes for these when building zlib without LFS */
 #if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
-    ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-    ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
-    ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
-    ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+    ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+    ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int);
+    ZEXTERN z_off64_t ZEXPORT gztell64(gzFile);
+    ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile);
 #endif

 /* default memLevel */
@ -203,9 +202,9 @@ typedef struct {
 typedef gz_state FAR *gz_statep;

 /* shared functions */
-void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
+void ZLIB_INTERNAL gz_error(gz_statep, int, const char *);
 #if defined UNDER_CE
-char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error);
 #endif

 /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
@ -214,6 +213,6 @@ char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
 #ifdef INT_MAX
 #  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
 #else
-unsigned ZLIB_INTERNAL gz_intmax OF((void));
+unsigned ZLIB_INTERNAL gz_intmax(void);
 #  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
 #endif
--- a/3rdparty/zlib/gzlib.c
+++ b/3rdparty/zlib/gzlib.c
@ -15,10 +15,6 @@
 #endif
 #endif

-/* Local functions */
-local void gz_reset OF((gz_statep));
-local gzFile gz_open OF((const void *, int, const char *));
-
 #if defined UNDER_CE

 /* Map the Windows error number in ERROR to a locale-dependent error message
@ -30,9 +26,7 @@ local gzFile gz_open OF((const void *, int, const char *));

   The gz_strwinerror function does not change the current setting of
   GetLastError. */
-char ZLIB_INTERNAL *gz_strwinerror(error)
-     DWORD error;
-{
+char ZLIB_INTERNAL *gz_strwinerror(DWORD error) {
    static char buf[1024];

    wchar_t *msgbuf;
@ -72,9 +66,7 @@ char ZLIB_INTERNAL *gz_strwinerror(error)
 #endif /* UNDER_CE */

 /* Reset gzip file state */
-local void gz_reset(state)
-    gz_statep state;
-{
+local void gz_reset(gz_statep state) {
    state->x.have = 0;              /* no output data available */
    if (state->mode == GZ_READ) {   /* for reading ... */
        state->eof = 0;             /* not at end of file */
@ -90,11 +82,7 @@ local void gz_reset(state)
 }

 /* Open a gzip file either by name or file descriptor. */
-local gzFile gz_open(path, fd, mode)
-    const void *path;
-    int fd;
-    const char *mode;
-{
+local gzFile gz_open(const void *path, int fd, const char *mode) {
    gz_statep state;
    z_size_t len;
    int oflag;
@ -269,26 +257,17 @@ local gzFile gz_open(path, fd, mode)
 }

 /* -- see zlib.h -- */
-gzFile ZEXPORT gzopen(path, mode)
-    const char *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen(const char *path, const char *mode) {
    return gz_open(path, -1, mode);
 }

 /* -- see zlib.h -- */
-gzFile ZEXPORT gzopen64(path, mode)
-    const char *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen64(const char *path, const char *mode) {
    return gz_open(path, -1, mode);
 }

 /* -- see zlib.h -- */
-gzFile ZEXPORT gzdopen(fd, mode)
-    int fd;
-    const char *mode;
-{
+gzFile ZEXPORT gzdopen(int fd, const char *mode) {
    char *path;         /* identifier for error messages */
    gzFile gz;

@ -306,19 +285,13 @@ gzFile ZEXPORT gzdopen(fd, mode)

 /* -- see zlib.h -- */
 #ifdef WIDECHAR
-gzFile ZEXPORT gzopen_w(path, mode)
-    const wchar_t *path;
-    const char *mode;
-{
+gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) {
    return gz_open(path, -2, mode);
 }
 #endif

 /* -- see zlib.h -- */
-int ZEXPORT gzbuffer(file, size)
-    gzFile file;
-    unsigned size;
-{
+int ZEXPORT gzbuffer(gzFile file, unsigned size) {
    gz_statep state;

    /* get internal structure and check integrity */
@ -335,16 +308,14 @@ int ZEXPORT gzbuffer(file, size)
    /* check and set requested size */
    if ((size << 1) < size)
        return -1;              /* need to be able to double it */
-    if (size < 2)
-        size = 2;               /* need two bytes to check magic header */
+    if (size < 8)
+        size = 8;               /* needed to behave well with flushing */
    state->want = size;
    return 0;
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzrewind(file)
-    gzFile file;
-{
+int ZEXPORT gzrewind(gzFile file) {
    gz_statep state;

    /* get internal structure */
@ -365,11 +336,7 @@ int ZEXPORT gzrewind(file)
 }

 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gzseek64(file, offset, whence)
-    gzFile file;
-    z_off64_t offset;
-    int whence;
-{
+z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) {
    unsigned n;
    z_off64_t ret;
    gz_statep state;
@ -442,11 +409,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence)
 }

 /* -- see zlib.h -- */
-z_off_t ZEXPORT gzseek(file, offset, whence)
-    gzFile file;
-    z_off_t offset;
-    int whence;
-{
+z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) {
    z_off64_t ret;

    ret = gzseek64(file, (z_off64_t)offset, whence);
@ -454,9 +417,7 @@ z_off_t ZEXPORT gzseek(file, offset, whence)
 }

 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gztell64(file)
-    gzFile file;
-{
+z_off64_t ZEXPORT gztell64(gzFile file) {
    gz_statep state;

    /* get internal structure and check integrity */
@ -471,9 +432,7 @@ z_off64_t ZEXPORT gztell64(file)
 }

 /* -- see zlib.h -- */
-z_off_t ZEXPORT gztell(file)
-    gzFile file;
-{
+z_off_t ZEXPORT gztell(gzFile file) {
    z_off64_t ret;

    ret = gztell64(file);
@ -481,9 +440,7 @@ z_off_t ZEXPORT gztell(file)
 }

 /* -- see zlib.h -- */
-z_off64_t ZEXPORT gzoffset64(file)
-    gzFile file;
-{
+z_off64_t ZEXPORT gzoffset64(gzFile file) {
    z_off64_t offset;
    gz_statep state;

@ -504,9 +461,7 @@ z_off64_t ZEXPORT gzoffset64(file)
 }

 /* -- see zlib.h -- */
-z_off_t ZEXPORT gzoffset(file)
-    gzFile file;
-{
+z_off_t ZEXPORT gzoffset(gzFile file) {
    z_off64_t ret;

    ret = gzoffset64(file);
@ -514,9 +469,7 @@ z_off_t ZEXPORT gzoffset(file)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzeof(file)
-    gzFile file;
-{
+int ZEXPORT gzeof(gzFile file) {
    gz_statep state;

    /* get internal structure and check integrity */
@ -531,10 +484,7 @@ int ZEXPORT gzeof(file)
 }

 /* -- see zlib.h -- */
-const char * ZEXPORT gzerror(file, errnum)
-    gzFile file;
-    int *errnum;
-{
+const char * ZEXPORT gzerror(gzFile file, int *errnum) {
    gz_statep state;

    /* get internal structure and check integrity */
@ -552,9 +502,7 @@ const char * ZEXPORT gzerror(file, errnum)
 }

 /* -- see zlib.h -- */
-void ZEXPORT gzclearerr(file)
-    gzFile file;
-{
+void ZEXPORT gzclearerr(gzFile file) {
    gz_statep state;

    /* get internal structure and check integrity */
@ -578,11 +526,7 @@ void ZEXPORT gzclearerr(file)
   memory).  Simply save the error message as a static string.  If there is an
   allocation failure constructing the error message, then convert the error to
   out of memory. */
-void ZLIB_INTERNAL gz_error(state, err, msg)
-    gz_statep state;
-    int err;
-    const char *msg;
-{
+void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) {
    /* free previously allocated message and clear */
    if (state->msg != NULL) {
        if (state->err != Z_MEM_ERROR)
@ -624,8 +568,7 @@ void ZLIB_INTERNAL gz_error(state, err, msg)
   available) -- we need to do this to cover cases where 2's complement not
   used, since C standard permits 1's complement and sign-bit representations,
   otherwise we could just use ((unsigned)-1) >> 1 */
-unsigned ZLIB_INTERNAL gz_intmax()
-{
+unsigned ZLIB_INTERNAL gz_intmax(void) {
    unsigned p, q;

    p = 1;
--- a/3rdparty/zlib/gzread.c
+++ b/3rdparty/zlib/gzread.c
@ -5,25 +5,12 @@

 #include "gzguts.h"

-/* Local functions */
-local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
-local int gz_avail OF((gz_statep));
-local int gz_look OF((gz_statep));
-local int gz_decomp OF((gz_statep));
-local int gz_fetch OF((gz_statep));
-local int gz_skip OF((gz_statep, z_off64_t));
-local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
-
 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
   state->fd, and update state->eof, state->err, and state->msg as appropriate.
   This function needs to loop on read(), since read() is not guaranteed to
   read the number of bytes requested, depending on the type of descriptor. */
-local int gz_load(state, buf, len, have)
-    gz_statep state;
-    unsigned char *buf;
-    unsigned len;
-    unsigned *have;
-{
+local int gz_load(gz_statep state, unsigned char *buf, unsigned len,
+                  unsigned *have) {
    int ret;
    unsigned get, max = ((unsigned)-1 >> 2) + 1;

@ -53,9 +40,7 @@ local int gz_load(state, buf, len, have)
   If strm->avail_in != 0, then the current data is moved to the beginning of
   the input buffer, and then the remainder of the buffer is loaded with the
   available data from the input file. */
-local int gz_avail(state)
-    gz_statep state;
-{
+local int gz_avail(gz_statep state) {
    unsigned got;
    z_streamp strm = &(state->strm);

@ -88,9 +73,7 @@ local int gz_avail(state)
   case, all further file reads will be directly to either the output buffer or
   a user buffer.  If decompressing, the inflate state will be initialized.
   gz_look() will return 0 on success or -1 on failure. */
-local int gz_look(state)
-    gz_statep state;
-{
+local int gz_look(gz_statep state) {
    z_streamp strm = &(state->strm);

    /* allocate read buffers and inflate memory */
@ -170,9 +153,7 @@ local int gz_look(state)
   data.  If the gzip stream completes, state->how is reset to LOOK to look for
   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
   on success, -1 on failure. */
-local int gz_decomp(state)
-    gz_statep state;
-{
+local int gz_decomp(gz_statep state) {
    int ret = Z_OK;
    unsigned had;
    z_streamp strm = &(state->strm);
@ -224,9 +205,7 @@ local int gz_decomp(state)
   looked for to determine whether to copy or decompress.  Returns -1 on error,
   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
   end of the input file has been reached and all data has been processed.  */
-local int gz_fetch(state)
-    gz_statep state;
-{
+local int gz_fetch(gz_statep state) {
    z_streamp strm = &(state->strm);

    do {
@ -254,10 +233,7 @@ local int gz_fetch(state)
 }

 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
-local int gz_skip(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
+local int gz_skip(gz_statep state, z_off64_t len) {
    unsigned n;

    /* skip over len bytes or reach end-of-file, whichever comes first */
@ -289,11 +265,7 @@ local int gz_skip(state, len)
   input.  Return the number of bytes read.  If zero is returned, either the
   end of file was reached, or there was an error.  state->err must be
   consulted in that case to determine which. */
-local z_size_t gz_read(state, buf, len)
-    gz_statep state;
-    voidp buf;
-    z_size_t len;
-{
+local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) {
    z_size_t got;
    unsigned n;

@ -370,11 +342,7 @@ local z_size_t gz_read(state, buf, len)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzread(file, buf, len)
-    gzFile file;
-    voidp buf;
-    unsigned len;
-{
+int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) {
    gz_statep state;

    /* get internal structure */
@ -406,12 +374,7 @@ int ZEXPORT gzread(file, buf, len)
 }

 /* -- see zlib.h -- */
-z_size_t ZEXPORT gzfread(buf, size, nitems, file)
-    voidp buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
+z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) {
    z_size_t len;
    gz_statep state;

@ -442,9 +405,7 @@ z_size_t ZEXPORT gzfread(buf, size, nitems, file)
 #else
 #  undef gzgetc
 #endif
-int ZEXPORT gzgetc(file)
-    gzFile file;
-{
+int ZEXPORT gzgetc(gzFile file) {
    unsigned char buf[1];
    gz_statep state;

@ -469,17 +430,12 @@ int ZEXPORT gzgetc(file)
    return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
 }

-int ZEXPORT gzgetc_(file)
-gzFile file;
-{
+int ZEXPORT gzgetc_(gzFile file) {
    return gzgetc(file);
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzungetc(c, file)
-    int c;
-    gzFile file;
-{
+int ZEXPORT gzungetc(int c, gzFile file) {
    gz_statep state;

    /* get internal structure */
@ -487,6 +443,10 @@ int ZEXPORT gzungetc(c, file)
        return -1;
    state = (gz_statep)file;

+    /* in case this was just opened, set up the input buffer */
+    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
+        (void)gz_look(state);
+
    /* check that we're reading and that there's no (serious) error */
    if (state->mode != GZ_READ ||
        (state->err != Z_OK && state->err != Z_BUF_ERROR))
@ -536,11 +496,7 @@ int ZEXPORT gzungetc(c, file)
 }

 /* -- see zlib.h -- */
-char * ZEXPORT gzgets(file, buf, len)
-    gzFile file;
-    char *buf;
-    int len;
-{
+char * ZEXPORT gzgets(gzFile file, char *buf, int len) {
    unsigned left, n;
    char *str;
    unsigned char *eol;
@ -600,9 +556,7 @@ char * ZEXPORT gzgets(file, buf, len)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzdirect(file)
-    gzFile file;
-{
+int ZEXPORT gzdirect(gzFile file) {
    gz_statep state;

    /* get internal structure */
@ -620,9 +574,7 @@ int ZEXPORT gzdirect(file)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzclose_r(file)
-    gzFile file;
-{
+int ZEXPORT gzclose_r(gzFile file) {
    int ret, err;
    gz_statep state;

--- a/3rdparty/zlib/gzwrite.c
+++ b/3rdparty/zlib/gzwrite.c
@ -5,18 +5,10 @@

 #include "gzguts.h"

-/* Local functions */
-local int gz_init OF((gz_statep));
-local int gz_comp OF((gz_statep, int));
-local int gz_zero OF((gz_statep, z_off64_t));
-local z_size_t gz_write OF((gz_statep, voidpc, z_size_t));
-
 /* Initialize state for writing a gzip file.  Mark initialization by setting
   state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
   success. */
-local int gz_init(state)
-    gz_statep state;
-{
+local int gz_init(gz_statep state) {
    int ret;
    z_streamp strm = &(state->strm);

@ -70,10 +62,7 @@ local int gz_init(state)
   deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
   reset to start a new gzip stream.  If gz->direct is true, then simply write
   to the output file without compressing, and ignore flush. */
-local int gz_comp(state, flush)
-    gz_statep state;
-    int flush;
-{
+local int gz_comp(gz_statep state, int flush) {
    int ret, writ;
    unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
    z_streamp strm = &(state->strm);
@ -151,10 +140,7 @@ local int gz_comp(state, flush)

 /* Compress len zeros to output.  Return -1 on a write error or memory
   allocation failure by gz_comp(), or 0 on success. */
-local int gz_zero(state, len)
-    gz_statep state;
-    z_off64_t len;
-{
+local int gz_zero(gz_statep state, z_off64_t len) {
    int first;
    unsigned n;
    z_streamp strm = &(state->strm);
@ -184,11 +170,7 @@ local int gz_zero(state, len)

 /* Write len bytes from buf to file.  Return the number of bytes written.  If
   the returned value is less than len, then there was an error. */
-local z_size_t gz_write(state, buf, len)
-    gz_statep state;
-    voidpc buf;
-    z_size_t len;
-{
+local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) {
    z_size_t put = len;

    /* if len is zero, avoid unnecessary operations */
@ -252,11 +234,7 @@ local z_size_t gz_write(state, buf, len)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzwrite(file, buf, len)
-    gzFile file;
-    voidpc buf;
-    unsigned len;
-{
+int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) {
    gz_statep state;

    /* get internal structure */
@ -280,12 +258,8 @@ int ZEXPORT gzwrite(file, buf, len)
 }

 /* -- see zlib.h -- */
-z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
-    voidpc buf;
-    z_size_t size;
-    z_size_t nitems;
-    gzFile file;
-{
+z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems,
+                          gzFile file) {
    z_size_t len;
    gz_statep state;

@ -310,10 +284,7 @@ z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzputc(file, c)
-    gzFile file;
-    int c;
-{
+int ZEXPORT gzputc(gzFile file, int c) {
    unsigned have;
    unsigned char buf[1];
    gz_statep state;
@ -358,10 +329,7 @@ int ZEXPORT gzputc(file, c)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzputs(file, s)
-    gzFile file;
-    const char *s;
-{
+int ZEXPORT gzputs(gzFile file, const char *s) {
    z_size_t len, put;
    gz_statep state;

@ -388,8 +356,7 @@ int ZEXPORT gzputs(file, s)
 #include <stdarg.h>

 /* -- see zlib.h -- */
-int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
-{
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) {
    int len;
    unsigned left;
    char *next;
@ -460,8 +427,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
    return len;
 }

-int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) {
    va_list va;
    int ret;

@ -474,13 +440,10 @@ int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
 #else /* !STDC && !Z_HAVE_STDARG_H */

 /* -- see zlib.h -- */
-int ZEXPORTVA gzprintf(file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
-    gzFile file;
-    const char *format;
-    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
-        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
-{
+int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3,
+                       int a4, int a5, int a6, int a7, int a8, int a9, int a10,
+                       int a11, int a12, int a13, int a14, int a15, int a16,
+                       int a17, int a18, int a19, int a20) {
    unsigned len, left;
    char *next;
    gz_statep state;
@ -562,10 +525,7 @@ int ZEXPORTVA gzprintf(file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
 #endif

 /* -- see zlib.h -- */
-int ZEXPORT gzflush(file, flush)
-    gzFile file;
-    int flush;
-{
+int ZEXPORT gzflush(gzFile file, int flush) {
    gz_statep state;

    /* get internal structure */
@ -594,11 +554,7 @@ int ZEXPORT gzflush(file, flush)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzsetparams(file, level, strategy)
-    gzFile file;
-    int level;
-    int strategy;
-{
+int ZEXPORT gzsetparams(gzFile file, int level, int strategy) {
    gz_statep state;
    z_streamp strm;

@ -609,7 +565,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
    strm = &(state->strm);

    /* check that we're writing and that there's no error */
-    if (state->mode != GZ_WRITE || state->err != Z_OK)
+    if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct)
        return Z_STREAM_ERROR;

    /* if no change is requested, then do nothing */
@ -636,9 +592,7 @@ int ZEXPORT gzsetparams(file, level, strategy)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzclose_w(file)
-    gzFile file;
-{
+int ZEXPORT gzclose_w(gzFile file) {
    int ret = Z_OK;
    gz_statep state;

--- a/3rdparty/zlib/infback.c
+++ b/3rdparty/zlib/infback.c
@ -15,9 +15,6 @@
 #include "inflate.h"
 #include "inffast.h"

-/* function prototypes */
-local void fixedtables OF((struct inflate_state FAR *state));
-
 /*
   strm provides memory allocation functions in zalloc and zfree, or
   Z_NULL to use the library memory allocation functions.
@ -25,13 +22,9 @@ local void fixedtables OF((struct inflate_state FAR *state));
   windowBits is in the range 8..15, and window is a user-supplied
   window and output buffer that is 2**windowBits bytes.
 */
-int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
-z_streamp strm;
-int windowBits;
-unsigned char FAR *window;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits,
+                             unsigned char FAR *window, const char *version,
+                             int stream_size) {
    struct inflate_state FAR *state;

    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
@ -80,9 +73,7 @@ int stream_size;
   used for threaded applications, since the rewriting of the tables and virgin
   may not be thread-safe.
 */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
+local void fixedtables(struct inflate_state FAR *state) {
 #ifdef BUILDFIXED
    static int virgin = 1;
    static code *lenfix, *distfix;
@ -248,13 +239,8 @@ struct inflate_state FAR *state;
   inflateBack() can also return Z_STREAM_ERROR if the input parameters
   are not correct, i.e. strm is Z_NULL or the state was not initialized.
 */
-int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
-z_streamp strm;
-in_func in;
-void FAR *in_desc;
-out_func out;
-void FAR *out_desc;
-{
+int ZEXPORT inflateBack(z_streamp strm, in_func in, void FAR *in_desc,
+                        out_func out, void FAR *out_desc) {
    struct inflate_state FAR *state;
    z_const unsigned char FAR *next;    /* next input */
    unsigned char FAR *put;     /* next output */
@ -632,9 +618,7 @@ void FAR *out_desc;
    return ret;
 }

-int ZEXPORT inflateBackEnd(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateBackEnd(z_streamp strm) {
    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
        return Z_STREAM_ERROR;
    ZFREE(strm, strm->state);
--- a/3rdparty/zlib/inffast.c
+++ b/3rdparty/zlib/inffast.c
@ -47,10 +47,7 @@
      requires strm->avail_out >= 258 for each loop to avoid checking for
      output space.
 */
-void ZLIB_INTERNAL inflate_fast(strm, start)
-z_streamp strm;
-unsigned start;         /* inflate()'s starting value for strm->avail_out */
-{
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) {
    struct inflate_state FAR *state;
    z_const unsigned char FAR *in;      /* local strm->next_in */
    z_const unsigned char FAR *last;    /* have enough input while in < last */
--- a/3rdparty/zlib/inffast.h
+++ b/3rdparty/zlib/inffast.h
@ -8,4 +8,4 @@
   subject to change. Applications should only use zlib.h.
 */

-void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
+void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start);
--- a/3rdparty/zlib/inflate.c
+++ b/3rdparty/zlib/inflate.c
@ -91,20 +91,7 @@
 #  endif
 #endif

-/* function prototypes */
-local int inflateStateCheck OF((z_streamp strm));
-local void fixedtables OF((struct inflate_state FAR *state));
-local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
-                           unsigned copy));
-#ifdef BUILDFIXED
-   void makefixed OF((void));
-#endif
-local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
-                              unsigned len));
-
-local int inflateStateCheck(strm)
-z_streamp strm;
-{
+local int inflateStateCheck(z_streamp strm) {
    struct inflate_state FAR *state;
    if (strm == Z_NULL ||
        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
@ -116,9 +103,7 @@ z_streamp strm;
    return 0;
 }

-int ZEXPORT inflateResetKeep(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateResetKeep(z_streamp strm) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -142,9 +127,7 @@ z_streamp strm;
    return Z_OK;
 }

-int ZEXPORT inflateReset(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateReset(z_streamp strm) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -155,10 +138,7 @@ z_streamp strm;
    return inflateResetKeep(strm);
 }

-int ZEXPORT inflateReset2(strm, windowBits)
-z_streamp strm;
-int windowBits;
-{
+int ZEXPORT inflateReset2(z_streamp strm, int windowBits) {
    int wrap;
    struct inflate_state FAR *state;

@ -195,12 +175,8 @@ int windowBits;
    return inflateReset(strm);
 }

-int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
-z_streamp strm;
-int windowBits;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateInit2_(z_streamp strm, int windowBits,
+                          const char *version, int stream_size) {
    int ret;
    struct inflate_state FAR *state;

@ -240,22 +216,17 @@ int stream_size;
    return ret;
 }

-int ZEXPORT inflateInit_(strm, version, stream_size)
-z_streamp strm;
-const char *version;
-int stream_size;
-{
+int ZEXPORT inflateInit_(z_streamp strm, const char *version,
+                         int stream_size) {
    return inflateInit2_(strm, DEF_WBITS, version, stream_size);
 }

-int ZEXPORT inflatePrime(strm, bits, value)
-z_streamp strm;
-int bits;
-int value;
-{
+int ZEXPORT inflatePrime(z_streamp strm, int bits, int value) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (bits == 0)
+        return Z_OK;
    state = (struct inflate_state FAR *)strm->state;
    if (bits < 0) {
        state->hold = 0;
@ -279,9 +250,7 @@ int value;
   used for threaded applications, since the rewriting of the tables and virgin
   may not be thread-safe.
 */
-local void fixedtables(state)
-struct inflate_state FAR *state;
-{
+local void fixedtables(struct inflate_state FAR *state) {
 #ifdef BUILDFIXED
    static int virgin = 1;
    static code *lenfix, *distfix;
@ -343,7 +312,7 @@ struct inflate_state FAR *state;

    a.out > inffixed.h
 */
-void makefixed()
+void makefixed(void)
 {
    unsigned low, size;
    struct inflate_state state;
@ -397,11 +366,7 @@ void makefixed()
   output will fall in the output data, making match copies simpler and faster.
   The advantage may be dependent on the size of the processor's data caches.
 */
-local int updatewindow(strm, end, copy)
-z_streamp strm;
-const Bytef *end;
-unsigned copy;
-{
+local int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) {
    struct inflate_state FAR *state;
    unsigned dist;

@ -623,10 +588,7 @@ unsigned copy;
   will return Z_BUF_ERROR if it has not reached the end of the stream.
 */

-int ZEXPORT inflate(strm, flush)
-z_streamp strm;
-int flush;
-{
+int ZEXPORT inflate(z_streamp strm, int flush) {
    struct inflate_state FAR *state;
    z_const unsigned char FAR *next;    /* next input */
    unsigned char FAR *put;     /* next output */
@ -1302,9 +1264,7 @@ int flush;
    return ret;
 }

-int ZEXPORT inflateEnd(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateEnd(z_streamp strm) {
    struct inflate_state FAR *state;
    if (inflateStateCheck(strm))
        return Z_STREAM_ERROR;
@ -1316,11 +1276,8 @@ z_streamp strm;
    return Z_OK;
 }

-int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-Bytef *dictionary;
-uInt *dictLength;
-{
+int ZEXPORT inflateGetDictionary(z_streamp strm, Bytef *dictionary,
+                                 uInt *dictLength) {
    struct inflate_state FAR *state;

    /* check state */
@ -1339,11 +1296,8 @@ uInt *dictLength;
    return Z_OK;
 }

-int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
-z_streamp strm;
-const Bytef *dictionary;
-uInt dictLength;
-{
+int ZEXPORT inflateSetDictionary(z_streamp strm, const Bytef *dictionary,
+                                 uInt dictLength) {
    struct inflate_state FAR *state;
    unsigned long dictid;
    int ret;
@ -1374,10 +1328,7 @@ uInt dictLength;
    return Z_OK;
 }

-int ZEXPORT inflateGetHeader(strm, head)
-z_streamp strm;
-gz_headerp head;
-{
+int ZEXPORT inflateGetHeader(z_streamp strm, gz_headerp head) {
    struct inflate_state FAR *state;

    /* check state */
@ -1402,11 +1353,8 @@ gz_headerp head;
   called again with more data and the *have state.  *have is initialized to
   zero for the first call.
 */
-local unsigned syncsearch(have, buf, len)
-unsigned FAR *have;
-const unsigned char FAR *buf;
-unsigned len;
-{
+local unsigned syncsearch(unsigned FAR *have, const unsigned char FAR *buf,
+                          unsigned len) {
    unsigned got;
    unsigned next;

@ -1425,9 +1373,7 @@ unsigned len;
    return next;
 }

-int ZEXPORT inflateSync(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateSync(z_streamp strm) {
    unsigned len;               /* number of bytes to look at or looked at */
    int flags;                  /* temporary to save header status */
    unsigned long in, out;      /* temporary to save total_in and total_out */
@ -1483,9 +1429,7 @@ z_streamp strm;
   block. When decompressing, PPP checks that at the end of input packet,
   inflate is waiting for these length bytes.
 */
-int ZEXPORT inflateSyncPoint(strm)
-z_streamp strm;
-{
+int ZEXPORT inflateSyncPoint(z_streamp strm) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -1493,10 +1437,7 @@ z_streamp strm;
    return state->mode == STORED && state->bits == 0;
 }

-int ZEXPORT inflateCopy(dest, source)
-z_streamp dest;
-z_streamp source;
-{
+int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) {
    struct inflate_state FAR *state;
    struct inflate_state FAR *copy;
    unsigned char FAR *window;
@ -1540,10 +1481,7 @@ z_streamp source;
    return Z_OK;
 }

-int ZEXPORT inflateUndermine(strm, subvert)
-z_streamp strm;
-int subvert;
-{
+int ZEXPORT inflateUndermine(z_streamp strm, int subvert) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -1558,10 +1496,7 @@ int subvert;
 #endif
 }

-int ZEXPORT inflateValidate(strm, check)
-z_streamp strm;
-int check;
-{
+int ZEXPORT inflateValidate(z_streamp strm, int check) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
@ -1573,9 +1508,7 @@ int check;
    return Z_OK;
 }

-long ZEXPORT inflateMark(strm)
-z_streamp strm;
-{
+long ZEXPORT inflateMark(z_streamp strm) {
    struct inflate_state FAR *state;

    if (inflateStateCheck(strm))
@ -1586,9 +1519,7 @@ z_streamp strm;
            (state->mode == MATCH ? state->was - state->length : 0));
 }

-unsigned long ZEXPORT inflateCodesUsed(strm)
-z_streamp strm;
-{
+unsigned long ZEXPORT inflateCodesUsed(z_streamp strm) {
    struct inflate_state FAR *state;
    if (inflateStateCheck(strm)) return (unsigned long)-1;
    state = (struct inflate_state FAR *)strm->state;
--- a/3rdparty/zlib/inftrees.c
+++ b/3rdparty/zlib/inftrees.c
@ -1,5 +1,5 @@
 /* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2022 Mark Adler
+ * Copyright (C) 1995-2023 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -9,7 +9,7 @@
 #define MAXBITS 15

 const char inflate_copyright[] =
-   " inflate 1.2.13 Copyright 1995-2022 Mark Adler ";
+   " inflate 1.3 Copyright 1995-2023 Mark Adler ";
 /*
  If you use the zlib library in a product, an acknowledgment is welcome
  in the documentation of your product. If for some reason you cannot
@ -29,14 +29,9 @@ const char inflate_copyright[] =
   table index bits.  It will differ if the request is greater than the
   longest code or if it is less than the shortest code.
 */
-int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
-codetype type;
-unsigned short FAR *lens;
-unsigned codes;
-code FAR * FAR *table;
-unsigned FAR *bits;
-unsigned short FAR *work;
-{
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+                                unsigned codes, code FAR * FAR *table,
+                                unsigned FAR *bits, unsigned short FAR *work) {
    unsigned len;               /* a code's length in bits */
    unsigned sym;               /* index of code symbols */
    unsigned min, max;          /* minimum and maximum code lengths */
@ -62,7 +57,7 @@ unsigned short FAR *work;
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 194, 65};
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 198, 203};
    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
--- a/3rdparty/zlib/inftrees.h
+++ b/3rdparty/zlib/inftrees.h
@ -57,6 +57,6 @@ typedef enum {
    DISTS
 } codetype;

-int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens,
-                             unsigned codes, code FAR * FAR *table,
-                             unsigned FAR *bits, unsigned short FAR *work));
+int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens,
+                                unsigned codes, code FAR * FAR *table,
+                                unsigned FAR *bits, unsigned short FAR *work);
--- a/3rdparty/zlib/trees.c
+++ b/3rdparty/zlib/trees.c
@ -122,39 +122,116 @@ struct static_tree_desc_s {
    int     max_length;          /* max bit length for the codes */
 };

-local const static_tree_desc  static_l_desc =
+#ifdef NO_INIT_GLOBAL_POINTERS
+#  define TCONST
+#else
+#  define TCONST const
+#endif
+
+local TCONST static_tree_desc static_l_desc =
 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};

-local const static_tree_desc  static_d_desc =
+local TCONST static_tree_desc static_d_desc =
 {static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};

-local const static_tree_desc  static_bl_desc =
+local TCONST static_tree_desc static_bl_desc =
 {(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};

 /* ===========================================================================
- * Local (static) routines in this file.
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
 */
+local unsigned bi_reverse(unsigned code, int len) {
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}

-local void tr_static_init OF((void));
-local void init_block     OF((deflate_state *s));
-local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
-local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
-local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
-local void build_tree     OF((deflate_state *s, tree_desc *desc));
-local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
-local int  build_bl_tree  OF((deflate_state *s));
-local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
-                              int blcodes));
-local void compress_block OF((deflate_state *s, const ct_data *ltree,
-                              const ct_data *dtree));
-local int  detect_data_type OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned code, int len));
-local void bi_windup      OF((deflate_state *s));
-local void bi_flush       OF((deflate_state *s));
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(deflate_state *s) {
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(deflate_state *s) {
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->bits_sent = (s->bits_sent + 7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) {
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    unsigned code = 0;         /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        code = (code + bl_count[bits - 1]) << 1;
+        next_code[bits] = (ush)code;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
+
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+            n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));
+    }
+}

 #ifdef GEN_TREES_H
-local void gen_trees_header OF((void));
+local void gen_trees_header(void);
 #endif

 #ifndef ZLIB_DEBUG
@ -167,27 +244,12 @@ local void gen_trees_header OF((void));
       send_bits(s, tree[c].Code, tree[c].Len); }
 #endif

-/* ===========================================================================
- * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
- */
-#define put_short(s, w) { \
-    put_byte(s, (uch)((w) & 0xff)); \
-    put_byte(s, (uch)((ush)(w) >> 8)); \
-}
-
 /* ===========================================================================
 * Send a value on a given number of bits.
 * IN assertion: length <= 16 and value fits in length bits.
 */
 #ifdef ZLIB_DEBUG
-local void send_bits      OF((deflate_state *s, int value, int length));
-
-local void send_bits(s, value, length)
-    deflate_state *s;
-    int value;  /* value to send */
-    int length; /* number of bits */
-{
+local void send_bits(deflate_state *s, int value, int length) {
    Tracevv((stderr," l %2d v %4x ", length, value));
    Assert(length > 0 && length <= 15, "invalid length");
    s->bits_sent += (ulg)length;
@ -229,8 +291,7 @@ local void send_bits(s, value, length)
 /* ===========================================================================
 * Initialize the various 'constant' tables.
 */
-local void tr_static_init()
-{
+local void tr_static_init(void) {
 #if defined(GEN_TREES_H) || !defined(STDC)
    static int static_init_done = 0;
    int n;        /* iterates over tree elements */
@ -323,8 +384,7 @@ local void tr_static_init()
      ((i) == (last)? "\n};\n\n" :    \
       ((i) % (width) == (width) - 1 ? ",\n" : ", "))

-void gen_trees_header()
-{
+void gen_trees_header(void) {
    FILE *header = fopen("trees.h", "w");
    int i;

@ -373,12 +433,26 @@ void gen_trees_header()
 }
 #endif /* GEN_TREES_H */

+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(deflate_state *s) {
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->sym_next = s->matches = 0;
+}
+
 /* ===========================================================================
 * Initialize the tree data structures for a new zlib stream.
 */
-void ZLIB_INTERNAL _tr_init(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_init(deflate_state *s) {
    tr_static_init();

    s->l_desc.dyn_tree = s->dyn_ltree;
@ -401,24 +475,6 @@ void ZLIB_INTERNAL _tr_init(s)
    init_block(s);
 }

-/* ===========================================================================
- * Initialize a new block.
- */
-local void init_block(s)
-    deflate_state *s;
-{
-    int n; /* iterates over tree elements */
-
-    /* Initialize the trees. */
-    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
-    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
-    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
-
-    s->dyn_ltree[END_BLOCK].Freq = 1;
-    s->opt_len = s->static_len = 0L;
-    s->sym_next = s->matches = 0;
-}
-
 #define SMALLEST 1
 /* Index within the heap array of least frequent node in the Huffman tree */

@ -448,11 +504,7 @@ local void init_block(s)
 * when the heap property is re-established (each father smaller than its
 * two sons).
 */
-local void pqdownheap(s, tree, k)
-    deflate_state *s;
-    ct_data *tree;  /* the tree to restore */
-    int k;               /* node to move down */
-{
+local void pqdownheap(deflate_state *s, ct_data *tree, int k) {
    int v = s->heap[k];
    int j = k << 1;  /* left son of k */
    while (j <= s->heap_len) {
@ -483,10 +535,7 @@ local void pqdownheap(s, tree, k)
 *     The length opt_len is updated; static_len is also updated if stree is
 *     not null.
 */
-local void gen_bitlen(s, desc)
-    deflate_state *s;
-    tree_desc *desc;    /* the tree descriptor */
-{
+local void gen_bitlen(deflate_state *s, tree_desc *desc) {
    ct_data *tree        = desc->dyn_tree;
    int max_code         = desc->max_code;
    const ct_data *stree = desc->stat_desc->static_tree;
@ -561,48 +610,9 @@ local void gen_bitlen(s, desc)
    }
 }

-/* ===========================================================================
- * Generate the codes for a given tree and bit counts (which need not be
- * optimal).
- * IN assertion: the array bl_count contains the bit length statistics for
- * the given tree and the field len is set for all tree elements.
- * OUT assertion: the field code is set for all tree elements of non
- *     zero code length.
- */
-local void gen_codes(tree, max_code, bl_count)
-    ct_data *tree;             /* the tree to decorate */
-    int max_code;              /* largest code with non zero frequency */
-    ushf *bl_count;            /* number of codes at each bit length */
-{
-    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
-    unsigned code = 0;         /* running code value */
-    int bits;                  /* bit index */
-    int n;                     /* code index */
-
-    /* The distribution counts are first used to generate the code values
-     * without bit reversal.
-     */
-    for (bits = 1; bits <= MAX_BITS; bits++) {
-        code = (code + bl_count[bits - 1]) << 1;
-        next_code[bits] = (ush)code;
-    }
-    /* Check that the bit counts in bl_count are consistent. The last code
-     * must be all ones.
-     */
-    Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
-            "inconsistent bit counts");
-    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
-
-    for (n = 0;  n <= max_code; n++) {
-        int len = tree[n].Len;
-        if (len == 0) continue;
-        /* Now reverse the bits */
-        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
-
-        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
-            n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));
-    }
-}
+#ifdef DUMP_BL_TREE
+#  include <stdio.h>
+#endif

 /* ===========================================================================
 * Construct one Huffman tree and assigns the code bit strings and lengths.
@ -612,10 +622,7 @@ local void gen_codes(tree, max_code, bl_count)
 *     and corresponding code. The length opt_len is updated; static_len is
 *     also updated if stree is not null. The field max_code is set.
 */
-local void build_tree(s, desc)
-    deflate_state *s;
-    tree_desc *desc; /* the tree descriptor */
-{
+local void build_tree(deflate_state *s, tree_desc *desc) {
    ct_data *tree         = desc->dyn_tree;
    const ct_data *stree  = desc->stat_desc->static_tree;
    int elems             = desc->stat_desc->elems;
@ -700,11 +707,7 @@ local void build_tree(s, desc)
 * Scan a literal or distance tree to determine the frequencies of the codes
 * in the bit length tree.
 */
-local void scan_tree(s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree;   /* the tree to be scanned */
-    int max_code;    /* and its largest code of non zero frequency */
-{
+local void scan_tree(deflate_state *s, ct_data *tree, int max_code) {
    int n;                     /* iterates over all tree elements */
    int prevlen = -1;          /* last emitted length */
    int curlen;                /* length of current code */
@ -745,11 +748,7 @@ local void scan_tree(s, tree, max_code)
 * Send a literal or distance tree in compressed form, using the codes in
 * bl_tree.
 */
-local void send_tree(s, tree, max_code)
-    deflate_state *s;
-    ct_data *tree; /* the tree to be scanned */
-    int max_code;       /* and its largest code of non zero frequency */
-{
+local void send_tree(deflate_state *s, ct_data *tree, int max_code) {
    int n;                     /* iterates over all tree elements */
    int prevlen = -1;          /* last emitted length */
    int curlen;                /* length of current code */
@ -796,9 +795,7 @@ local void send_tree(s, tree, max_code)
 * Construct the Huffman tree for the bit lengths and return the index in
 * bl_order of the last bit length code to send.
 */
-local int build_bl_tree(s)
-    deflate_state *s;
-{
+local int build_bl_tree(deflate_state *s) {
    int max_blindex;  /* index of last bit length code of non zero freq */

    /* Determine the bit length frequencies for literal and distance trees */
@ -831,10 +828,8 @@ local int build_bl_tree(s)
 * lengths of the bit length codes, the literal tree and the distance tree.
 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
 */
-local void send_all_trees(s, lcodes, dcodes, blcodes)
-    deflate_state *s;
-    int lcodes, dcodes, blcodes; /* number of codes for each tree */
-{
+local void send_all_trees(deflate_state *s, int lcodes, int dcodes,
+                          int blcodes) {
    int rank;                    /* index in bl_order */

    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
@ -860,12 +855,8 @@ local void send_all_trees(s, lcodes, dcodes, blcodes)
 /* ===========================================================================
 * Send a stored block
 */
-void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
-    deflate_state *s;
-    charf *buf;       /* input block */
-    ulg stored_len;   /* length of input block */
-    int last;         /* one if this is the last block for a file */
-{
+void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
+                                    ulg stored_len, int last) {
    send_bits(s, (STORED_BLOCK<<1) + last, 3);  /* send block type */
    bi_windup(s);        /* align on byte boundary */
    put_short(s, (ush)stored_len);
@ -884,9 +875,7 @@ void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
 /* ===========================================================================
 * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
 */
-void ZLIB_INTERNAL _tr_flush_bits(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) {
    bi_flush(s);
 }

@ -894,9 +883,7 @@ void ZLIB_INTERNAL _tr_flush_bits(s)
 * Send one empty static block to give enough lookahead for inflate.
 * This takes 10 bits, of which 7 may remain in the bit buffer.
 */
-void ZLIB_INTERNAL _tr_align(s)
-    deflate_state *s;
-{
+void ZLIB_INTERNAL _tr_align(deflate_state *s) {
    send_bits(s, STATIC_TREES<<1, 3);
    send_code(s, END_BLOCK, static_ltree);
 #ifdef ZLIB_DEBUG
@ -905,16 +892,99 @@ void ZLIB_INTERNAL _tr_align(s)
    bi_flush(s);
 }

+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(deflate_state *s, const ct_data *ltree,
+                          const ct_data *dtree) {
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned sx = 0;    /* running index in sym_buf */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->sym_next != 0) do {
+        dist = s->sym_buf[sx++] & 0xff;
+        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
+        lc = s->sym_buf[sx++];
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code + LITERALS + 1, ltree);   /* send length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= (unsigned)base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check that the overlay between pending_buf and sym_buf is ok: */
+        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
+
+    } while (sx < s->sym_next);
+
+    send_code(s, END_BLOCK, ltree);
+}
+
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
+ * - TEXT if the two conditions below are satisfied:
+ *    a) There are no non-portable control characters belonging to the
+ *       "block list" (0..6, 14..25, 28..31).
+ *    b) There is at least one printable character belonging to the
+ *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ * - BINARY otherwise.
+ * - The following partially-portable control characters form a
+ *   "gray list" that is ignored in this detection algorithm:
+ *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local int detect_data_type(deflate_state *s) {
+    /* block_mask is the bit mask of block-listed bytes
+     * set bits 0..6, 14..25, and 28..31
+     * 0xf3ffc07f = binary 11110011111111111100000001111111
+     */
+    unsigned long block_mask = 0xf3ffc07fUL;
+    int n;
+
+    /* Check for non-textual ("block-listed") bytes. */
+    for (n = 0; n <= 31; n++, block_mask >>= 1)
+        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+            return Z_BINARY;
+
+    /* Check for textual ("allow-listed") bytes. */
+    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+            || s->dyn_ltree[13].Freq != 0)
+        return Z_TEXT;
+    for (n = 32; n < LITERALS; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            return Z_TEXT;
+
+    /* There are no "block-listed" or "allow-listed" bytes:
+     * this stream either is empty or has tolerated ("gray-listed") bytes only.
+     */
+    return Z_BINARY;
+}
+
 /* ===========================================================================
 * Determine the best encoding for the current block: dynamic trees, static
 * trees or store, and write out the encoded block.
 */
-void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
-    deflate_state *s;
-    charf *buf;       /* input block, or NULL if too old */
-    ulg stored_len;   /* length of input block */
-    int last;         /* one if this is the last block for a file */
-{
+void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
+                                   ulg stored_len, int last) {
    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
    int max_blindex = 0;  /* index of last bit length code of non zero freq */

@ -1011,11 +1081,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
 * Save the match info and tally the frequency counts. Return true if
 * the current block must be flushed.
 */
-int ZLIB_INTERNAL _tr_tally(s, dist, lc)
-    deflate_state *s;
-    unsigned dist;  /* distance of matched string */
-    unsigned lc;    /* match length - MIN_MATCH or unmatched char (dist==0) */
-{
+int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
    s->sym_buf[s->sym_next++] = (uch)dist;
    s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
    s->sym_buf[s->sym_next++] = (uch)lc;
@ -1035,147 +1101,3 @@ int ZLIB_INTERNAL _tr_tally(s, dist, lc)
    }
    return (s->sym_next == s->sym_end);
 }
-
-/* ===========================================================================
- * Send the block data compressed using the given Huffman trees
- */
-local void compress_block(s, ltree, dtree)
-    deflate_state *s;
-    const ct_data *ltree; /* literal tree */
-    const ct_data *dtree; /* distance tree */
-{
-    unsigned dist;      /* distance of matched string */
-    int lc;             /* match length or unmatched char (if dist == 0) */
-    unsigned sx = 0;    /* running index in sym_buf */
-    unsigned code;      /* the code to send */
-    int extra;          /* number of extra bits to send */
-
-    if (s->sym_next != 0) do {
-        dist = s->sym_buf[sx++] & 0xff;
-        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
-        lc = s->sym_buf[sx++];
-        if (dist == 0) {
-            send_code(s, lc, ltree); /* send a literal byte */
-            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
-        } else {
-            /* Here, lc is the match length - MIN_MATCH */
-            code = _length_code[lc];
-            send_code(s, code + LITERALS + 1, ltree);   /* send length code */
-            extra = extra_lbits[code];
-            if (extra != 0) {
-                lc -= base_length[code];
-                send_bits(s, lc, extra);       /* send the extra length bits */
-            }
-            dist--; /* dist is now the match distance - 1 */
-            code = d_code(dist);
-            Assert (code < D_CODES, "bad d_code");
-
-            send_code(s, code, dtree);       /* send the distance code */
-            extra = extra_dbits[code];
-            if (extra != 0) {
-                dist -= (unsigned)base_dist[code];
-                send_bits(s, dist, extra);   /* send the extra distance bits */
-            }
-        } /* literal or match pair ? */
-
-        /* Check that the overlay between pending_buf and sym_buf is ok: */
-        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
-
-    } while (sx < s->sym_next);
-
-    send_code(s, END_BLOCK, ltree);
-}
-
-/* ===========================================================================
- * Check if the data type is TEXT or BINARY, using the following algorithm:
- * - TEXT if the two conditions below are satisfied:
- *    a) There are no non-portable control characters belonging to the
- *       "block list" (0..6, 14..25, 28..31).
- *    b) There is at least one printable character belonging to the
- *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
- * - BINARY otherwise.
- * - The following partially-portable control characters form a
- *   "gray list" that is ignored in this detection algorithm:
- *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
- * IN assertion: the fields Freq of dyn_ltree are set.
- */
-local int detect_data_type(s)
-    deflate_state *s;
-{
-    /* block_mask is the bit mask of block-listed bytes
-     * set bits 0..6, 14..25, and 28..31
-     * 0xf3ffc07f = binary 11110011111111111100000001111111
-     */
-    unsigned long block_mask = 0xf3ffc07fUL;
-    int n;
-
-    /* Check for non-textual ("block-listed") bytes. */
-    for (n = 0; n <= 31; n++, block_mask >>= 1)
-        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
-            return Z_BINARY;
-
-    /* Check for textual ("allow-listed") bytes. */
-    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
-            || s->dyn_ltree[13].Freq != 0)
-        return Z_TEXT;
-    for (n = 32; n < LITERALS; n++)
-        if (s->dyn_ltree[n].Freq != 0)
-            return Z_TEXT;
-
-    /* There are no "block-listed" or "allow-listed" bytes:
-     * this stream either is empty or has tolerated ("gray-listed") bytes only.
-     */
-    return Z_BINARY;
-}
-
-/* ===========================================================================
- * Reverse the first len bits of a code, using straightforward code (a faster
- * method would use a table)
- * IN assertion: 1 <= len <= 15
- */
-local unsigned bi_reverse(code, len)
-    unsigned code; /* the value to invert */
-    int len;       /* its bit length */
-{
-    register unsigned res = 0;
-    do {
-        res |= code & 1;
-        code >>= 1, res <<= 1;
-    } while (--len > 0);
-    return res >> 1;
-}
-
-/* ===========================================================================
- * Flush the bit buffer, keeping at most 7 bits in it.
- */
-local void bi_flush(s)
-    deflate_state *s;
-{
-    if (s->bi_valid == 16) {
-        put_short(s, s->bi_buf);
-        s->bi_buf = 0;
-        s->bi_valid = 0;
-    } else if (s->bi_valid >= 8) {
-        put_byte(s, (Byte)s->bi_buf);
-        s->bi_buf >>= 8;
-        s->bi_valid -= 8;
-    }
-}
-
-/* ===========================================================================
- * Flush the bit buffer and align the output on a byte boundary
- */
-local void bi_windup(s)
-    deflate_state *s;
-{
-    if (s->bi_valid > 8) {
-        put_short(s, s->bi_buf);
-    } else if (s->bi_valid > 0) {
-        put_byte(s, (Byte)s->bi_buf);
-    }
-    s->bi_buf = 0;
-    s->bi_valid = 0;
-#ifdef ZLIB_DEBUG
-    s->bits_sent = (s->bits_sent + 7) & ~7;
-#endif
-}
--- a/3rdparty/zlib/uncompr.c
+++ b/3rdparty/zlib/uncompr.c
@ -24,12 +24,8 @@
   Z_DATA_ERROR if the input data was corrupted, including if the input data is
   an incomplete zlib stream.
 */
-int ZEXPORT uncompress2(dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong *sourceLen;
-{
+int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, const Bytef *source,
+                        uLong *sourceLen) {
    z_stream stream;
    int err;
    const uInt max = (uInt)-1;
@ -83,11 +79,7 @@ int ZEXPORT uncompress2(dest, destLen, source, sourceLen)
           err;
 }

-int ZEXPORT uncompress(dest, destLen, source, sourceLen)
-    Bytef *dest;
-    uLongf *destLen;
-    const Bytef *source;
-    uLong sourceLen;
-{
+int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, const Bytef *source,
+                       uLong sourceLen) {
    return uncompress2(dest, destLen, source, &sourceLen);
 }
--- a/3rdparty/zlib/zconf.h
+++ b/3rdparty/zlib/zconf.h
@ -241,7 +241,11 @@
 #endif

 #ifdef Z_SOLO
-   typedef unsigned long z_size_t;
+#  ifdef _WIN64
+     typedef unsigned long long z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
 #else
 #  define z_longlong long long
 #  if defined(NO_SIZE_T)
@ -520,7 +524,7 @@ typedef uLong FAR uLongf;
 #if !defined(_WIN32) && defined(Z_LARGE64)
 #  define z_off64_t off64_t
 #else
-#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#  if defined(_WIN32) && !defined(__GNUC__)
 #    define z_off64_t __int64
 #  else
 #    define z_off64_t z_off_t
--- a/3rdparty/zlib/zlib.h
+++ b/3rdparty/zlib/zlib.h
@ -1,7 +1,7 @@
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.13, October 13th, 2022
+  version 1.3, August 18th, 2023

-  Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
+  Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler

  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the authors be held liable for any damages
@ -37,11 +37,11 @@
 extern "C" {
 #endif

-#define ZLIB_VERSION "1.2.13"
-#define ZLIB_VERNUM 0x12d0
+#define ZLIB_VERSION "1.3"
+#define ZLIB_VERNUM 0x1300
 #define ZLIB_VER_MAJOR 1
-#define ZLIB_VER_MINOR 2
-#define ZLIB_VER_REVISION 13
+#define ZLIB_VER_MINOR 3
+#define ZLIB_VER_REVISION 0
 #define ZLIB_VER_SUBREVISION 0

 /*
@ -78,8 +78,8 @@ extern "C" {
  even in the case of corrupted input.
 */

-typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
-typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size);
+typedef void   (*free_func)(voidpf opaque, voidpf address);

 struct internal_state;

@ -217,7 +217,7 @@ typedef gz_header FAR *gz_headerp;

                        /* basic functions */

-ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+ZEXTERN const char * ZEXPORT zlibVersion(void);
 /* The application can compare zlibVersion and ZLIB_VERSION for consistency.
   If the first character differs, the library code actually used is not
   compatible with the zlib.h header file used by the application.  This check
@ -225,12 +225,12 @@ ZEXTERN const char * ZEXPORT zlibVersion OF((void));
 */

 /*
-ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level);

     Initializes the internal stream state for compression.  The fields
   zalloc, zfree and opaque must be initialized before by the caller.  If
   zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
-   allocation functions.
+   allocation functions.  total_in, total_out, adler, and msg are initialized.

     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
   1 gives best speed, 9 gives best compression, 0 gives no compression at all
@ -247,7 +247,7 @@ ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
 */


-ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush);
 /*
    deflate compresses as much data as possible, and stops when the input
  buffer becomes empty or the output buffer becomes full.  It may introduce
@ -320,8 +320,8 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
  with the same value of the flush parameter and more output space (updated
  avail_out), until the flush is complete (deflate returns with non-zero
  avail_out).  In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
-  avail_out is greater than six to avoid repeated flush markers due to
-  avail_out == 0 on return.
+  avail_out is greater than six when the flush marker begins, in order to avoid
+  repeated flush markers upon calling deflate() again when avail_out == 0.

    If the parameter flush is set to Z_FINISH, pending input is processed,
  pending output is flushed and deflate returns with Z_STREAM_END if there was
@ -360,7 +360,7 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
 */


-ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+ZEXTERN int ZEXPORT deflateEnd(z_streamp strm);
 /*
     All dynamically allocated data structures for this stream are freed.
   This function discards any unprocessed input and does not flush any pending
@ -375,7 +375,7 @@ ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));


 /*
-ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateInit(z_streamp strm);

     Initializes the internal stream state for decompression.  The fields
   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
@ -383,7 +383,8 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
   read or consumed.  The allocation of a sliding window will be deferred to
   the first call of inflate (if the decompression does not complete on the
   first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
-   them to use default allocation functions.
+   them to use default allocation functions.  total_in, total_out, adler, and
+   msg are initialized.

     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
@ -397,7 +398,7 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
 */


-ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush);
 /*
    inflate decompresses as much data as possible, and stops when the input
  buffer becomes empty or the output buffer becomes full.  It may introduce
@ -517,7 +518,7 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
 */


-ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateEnd(z_streamp strm);
 /*
     All dynamically allocated data structures for this stream are freed.
   This function discards any unprocessed input and does not flush any pending
@ -535,12 +536,12 @@ ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
 */

 /*
-ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
-                                     int  level,
-                                     int  method,
-                                     int  windowBits,
-                                     int  memLevel,
-                                     int  strategy));
+ZEXTERN int ZEXPORT deflateInit2(z_streamp strm,
+                                 int level,
+                                 int method,
+                                 int windowBits,
+                                 int memLevel,
+                                 int strategy);

     This is another version of deflateInit with more compression options.  The
   fields zalloc, zfree and opaque must be initialized before by the caller.
@ -607,9 +608,9 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
   compression: this will be done by deflate().
 */

-ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
-                                             const Bytef *dictionary,
-                                             uInt  dictLength));
+ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm,
+                                         const Bytef *dictionary,
+                                         uInt  dictLength);
 /*
     Initializes the compression dictionary from the given byte sequence
   without producing any compressed output.  When using the zlib format, this
@ -651,9 +652,9 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
   not perform any compression: this will be done by deflate().
 */

-ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
-                                             Bytef *dictionary,
-                                             uInt  *dictLength));
+ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm,
+                                         Bytef *dictionary,
+                                         uInt  *dictLength);
 /*
     Returns the sliding dictionary being maintained by deflate.  dictLength is
   set to the number of bytes in the dictionary, and that many bytes are copied
@ -673,8 +674,8 @@ ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
   stream state is inconsistent.
 */

-ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
-                                    z_streamp source));
+ZEXTERN int ZEXPORT deflateCopy(z_streamp dest,
+                                z_streamp source);
 /*
     Sets the destination stream as a complete copy of the source stream.

@ -691,20 +692,20 @@ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
   destination.
 */

-ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+ZEXTERN int ZEXPORT deflateReset(z_streamp strm);
 /*
     This function is equivalent to deflateEnd followed by deflateInit, but
   does not free and reallocate the internal compression state.  The stream
   will leave the compression level and any other attributes that may have been
-   set unchanged.
+   set unchanged.  total_in, total_out, adler, and msg are initialized.

     deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
   stream state was inconsistent (such as zalloc or state being Z_NULL).
 */

-ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
-                                      int level,
-                                      int strategy));
+ZEXTERN int ZEXPORT deflateParams(z_streamp strm,
+                                  int level,
+                                  int strategy);
 /*
     Dynamically update the compression level and compression strategy.  The
   interpretation of level and strategy is as in deflateInit2().  This can be
@ -729,7 +730,7 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
   Then no more input data should be provided before the deflateParams() call.
   If this is done, the old level and strategy will be applied to the data
   compressed before deflateParams(), and the new level and strategy will be
-   applied to the the data compressed after deflateParams().
+   applied to the data compressed after deflateParams().

     deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
   state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
@ -740,11 +741,11 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
   retried with more output space.
 */

-ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
-                                    int good_length,
-                                    int max_lazy,
-                                    int nice_length,
-                                    int max_chain));
+ZEXTERN int ZEXPORT deflateTune(z_streamp strm,
+                                int good_length,
+                                int max_lazy,
+                                int nice_length,
+                                int max_chain);
 /*
     Fine tune deflate's internal compression parameters.  This should only be
   used by someone who understands the algorithm used by zlib's deflate for
@ -757,8 +758,8 @@ ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
 */

-ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
-                                       uLong sourceLen));
+ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm,
+                                   uLong sourceLen);
 /*
     deflateBound() returns an upper bound on the compressed size after
   deflation of sourceLen bytes.  It must be called after deflateInit() or
@ -772,9 +773,9 @@ ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
   than Z_FINISH or Z_NO_FLUSH are used.
 */

-ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
-                                       unsigned *pending,
-                                       int *bits));
+ZEXTERN int ZEXPORT deflatePending(z_streamp strm,
+                                   unsigned *pending,
+                                   int *bits);
 /*
     deflatePending() returns the number of bytes and bits of output that have
   been generated, but not yet provided in the available output.  The bytes not
@ -787,9 +788,9 @@ ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
   stream state was inconsistent.
 */

-ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
-                                     int bits,
-                                     int value));
+ZEXTERN int ZEXPORT deflatePrime(z_streamp strm,
+                                 int bits,
+                                 int value);
 /*
     deflatePrime() inserts bits in the deflate output stream.  The intent
   is that this function is used to start off the deflate output with the bits
@ -804,8 +805,8 @@ ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
   source stream state was inconsistent.
 */

-ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
-                                         gz_headerp head));
+ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm,
+                                     gz_headerp head);
 /*
     deflateSetHeader() provides gzip header information for when a gzip
   stream is requested by deflateInit2().  deflateSetHeader() may be called
@ -821,16 +822,17 @@ ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
   gzip file" and give up.

     If deflateSetHeader is not used, the default gzip header has text false,
-   the time set to zero, and os set to 255, with no extra, name, or comment
-   fields.  The gzip header is returned to the default state by deflateReset().
+   the time set to zero, and os set to the current operating system, with no
+   extra, name, or comment fields.  The gzip header is returned to the default
+   state by deflateReset().

     deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
   stream state was inconsistent.
 */

 /*
-ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
-                                     int  windowBits));
+ZEXTERN int ZEXPORT inflateInit2(z_streamp strm,
+                                 int windowBits);

     This is another version of inflateInit with an extra parameter.  The
   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
@ -883,9 +885,9 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
   deferred until inflate() is called.
 */

-ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
-                                             const Bytef *dictionary,
-                                             uInt  dictLength));
+ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm,
+                                         const Bytef *dictionary,
+                                         uInt  dictLength);
 /*
     Initializes the decompression dictionary from the given uncompressed byte
   sequence.  This function must be called immediately after a call of inflate,
@ -906,9 +908,9 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
   inflate().
 */

-ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
-                                             Bytef *dictionary,
-                                             uInt  *dictLength));
+ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm,
+                                         Bytef *dictionary,
+                                         uInt  *dictLength);
 /*
     Returns the sliding dictionary being maintained by inflate.  dictLength is
   set to the number of bytes in the dictionary, and that many bytes are copied
@ -921,7 +923,7 @@ ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
   stream state is inconsistent.
 */

-ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateSync(z_streamp strm);
 /*
     Skips invalid compressed data until a possible full flush point (see above
   for the description of deflate with Z_FULL_FLUSH) can be found, or until all
@ -940,8 +942,8 @@ ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
   input each time, until success or end of the input data.
 */

-ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
-                                    z_streamp source));
+ZEXTERN int ZEXPORT inflateCopy(z_streamp dest,
+                                z_streamp source);
 /*
     Sets the destination stream as a complete copy of the source stream.

@ -956,18 +958,19 @@ ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
   destination.
 */

-ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateReset(z_streamp strm);
 /*
     This function is equivalent to inflateEnd followed by inflateInit,
   but does not free and reallocate the internal decompression state.  The
   stream will keep attributes that may have been set by inflateInit2.
+   total_in, total_out, adler, and msg are initialized.

     inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
   stream state was inconsistent (such as zalloc or state being Z_NULL).
 */

-ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
-                                      int windowBits));
+ZEXTERN int ZEXPORT inflateReset2(z_streamp strm,
+                                  int windowBits);
 /*
     This function is the same as inflateReset, but it also permits changing
   the wrap and window size requests.  The windowBits parameter is interpreted
@ -980,9 +983,9 @@ ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
   the windowBits parameter is invalid.
 */

-ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
-                                     int bits,
-                                     int value));
+ZEXTERN int ZEXPORT inflatePrime(z_streamp strm,
+                                 int bits,
+                                 int value);
 /*
     This function inserts bits in the inflate input stream.  The intent is
   that this function is used to start inflating at a bit position in the
@ -1001,7 +1004,7 @@ ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
   stream state was inconsistent.
 */

-ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+ZEXTERN long ZEXPORT inflateMark(z_streamp strm);
 /*
     This function returns two values, one in the lower 16 bits of the return
   value, and the other in the remaining upper bits, obtained by shifting the
@ -1029,8 +1032,8 @@ ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
   source stream state was inconsistent.
 */

-ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
-                                         gz_headerp head));
+ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm,
+                                     gz_headerp head);
 /*
     inflateGetHeader() requests that gzip header information be stored in the
   provided gz_header structure.  inflateGetHeader() may be called after
@ -1070,8 +1073,8 @@ ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
 */

 /*
-ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
-                                        unsigned char FAR *window));
+ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits,
+                                    unsigned char FAR *window);

     Initialize the internal stream state for decompression using inflateBack()
   calls.  The fields zalloc, zfree and opaque in strm must be initialized
@ -1091,13 +1094,13 @@ ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
   the version of the header file.
 */

-typedef unsigned (*in_func) OF((void FAR *,
-                                z_const unsigned char FAR * FAR *));
-typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+typedef unsigned (*in_func)(void FAR *,
+                            z_const unsigned char FAR * FAR *);
+typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned);

-ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
-                                    in_func in, void FAR *in_desc,
-                                    out_func out, void FAR *out_desc));
+ZEXTERN int ZEXPORT inflateBack(z_streamp strm,
+                                in_func in, void FAR *in_desc,
+                                out_func out, void FAR *out_desc);
 /*
     inflateBack() does a raw inflate with a single call using a call-back
   interface for input and output.  This is potentially more efficient than
@ -1165,7 +1168,7 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
   cannot return Z_OK.
 */

-ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm);
 /*
     All memory allocated by inflateBackInit() is freed.

@ -1173,7 +1176,7 @@ ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
   state was inconsistent.
 */

-ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+ZEXTERN uLong ZEXPORT zlibCompileFlags(void);
 /* Return flags indicating compile-time options.

    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
@ -1226,8 +1229,8 @@ ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
   you need special options.
 */

-ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
-                                 const Bytef *source, uLong sourceLen));
+ZEXTERN int ZEXPORT compress(Bytef *dest,   uLongf *destLen,
+                             const Bytef *source, uLong sourceLen);
 /*
     Compresses the source buffer into the destination buffer.  sourceLen is
   the byte length of the source buffer.  Upon entry, destLen is the total size
@ -1241,9 +1244,9 @@ ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
   buffer.
 */

-ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
-                                  const Bytef *source, uLong sourceLen,
-                                  int level));
+ZEXTERN int ZEXPORT compress2(Bytef *dest,   uLongf *destLen,
+                              const Bytef *source, uLong sourceLen,
+                              int level);
 /*
     Compresses the source buffer into the destination buffer.  The level
   parameter has the same meaning as in deflateInit.  sourceLen is the byte
@ -1257,15 +1260,15 @@ ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
   Z_STREAM_ERROR if the level parameter is invalid.
 */

-ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen);
 /*
     compressBound() returns an upper bound on the compressed size after
   compress() or compress2() on sourceLen bytes.  It would be used before a
   compress() or compress2() call to allocate the destination buffer.
 */

-ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
-                                   const Bytef *source, uLong sourceLen));
+ZEXTERN int ZEXPORT uncompress(Bytef *dest,   uLongf *destLen,
+                               const Bytef *source, uLong sourceLen);
 /*
     Decompresses the source buffer into the destination buffer.  sourceLen is
   the byte length of the source buffer.  Upon entry, destLen is the total size
@ -1282,8 +1285,8 @@ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
   buffer with the uncompressed data up to that point.
 */

-ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
-                                    const Bytef *source, uLong *sourceLen));
+ZEXTERN int ZEXPORT uncompress2(Bytef *dest,   uLongf *destLen,
+                                const Bytef *source, uLong *sourceLen);
 /*
     Same as uncompress, except that sourceLen is a pointer, where the
   length of the source is *sourceLen.  On return, *sourceLen is the number of
@ -1302,7 +1305,7 @@ ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
 typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */

 /*
-ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
+ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode);

     Open the gzip (.gz) file at path for reading and decompressing, or
   compressing and writing.  The mode parameter is as in fopen ("rb" or "wb")
@ -1339,7 +1342,7 @@ ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
   file could not be opened.
 */

-ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode);
 /*
     Associate a gzFile with the file descriptor fd.  File descriptors are
   obtained from calls like open, dup, creat, pipe or fileno (if the file has
@ -1362,7 +1365,7 @@ ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
   will not detect if fd is invalid (unless fd is -1).
 */

-ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size);
 /*
     Set the internal buffer size used by this library's functions for file to
   size.  The default buffer size is 8192 bytes.  This function must be called
@ -1378,7 +1381,7 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
   too late.
 */

-ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy);
 /*
     Dynamically update the compression level and strategy for file.  See the
   description of deflateInit2 for the meaning of these parameters. Previously
@ -1389,7 +1392,7 @@ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
   or Z_MEM_ERROR if there is a memory allocation error.
 */

-ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len);
 /*
     Read and decompress up to len uncompressed bytes from file into buf.  If
   the input file is not in gzip format, gzread copies the given number of
@ -1419,8 +1422,8 @@ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
   Z_STREAM_ERROR.
 */

-ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
-                                     gzFile file));
+ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems,
+                                 gzFile file);
 /*
     Read and decompress up to nitems items of size size from file into buf,
   otherwise operating as gzread() does.  This duplicates the interface of
@ -1445,14 +1448,14 @@ ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
   file, resetting and retrying on end-of-file, when size is not 1.
 */

-ZEXTERN int ZEXPORT gzwrite OF((gzFile file, voidpc buf, unsigned len));
+ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len);
 /*
     Compress and write the len uncompressed bytes at buf to file. gzwrite
   returns the number of uncompressed bytes written or 0 in case of error.
 */

-ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
-                                      z_size_t nitems, gzFile file));
+ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size,
+                                  z_size_t nitems, gzFile file);
 /*
     Compress and write nitems items of size size from buf to file, duplicating
   the interface of stdio's fwrite(), with size_t request and return types.  If
@ -1465,7 +1468,7 @@ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
   is returned, and the error state is set to Z_STREAM_ERROR.
 */

-ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...);
 /*
     Convert, format, compress, and write the arguments (...) to file under
   control of the string format, as in fprintf.  gzprintf returns the number of
@ -1480,7 +1483,7 @@ ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
   This can be determined using zlibCompileFlags().
 */

-ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s);
 /*
     Compress and write the given null-terminated string s to file, excluding
   the terminating null character.
@ -1488,7 +1491,7 @@ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
     gzputs returns the number of characters written, or -1 in case of error.
 */

-ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len);
 /*
     Read and decompress bytes from file into buf, until len-1 characters are
   read, or until a newline character is read and transferred to buf, or an
@ -1502,13 +1505,13 @@ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
   buf are indeterminate.
 */

-ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+ZEXTERN int ZEXPORT gzputc(gzFile file, int c);
 /*
     Compress and write c, converted to an unsigned char, into file.  gzputc
   returns the value that was written, or -1 in case of error.
 */

-ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+ZEXTERN int ZEXPORT gzgetc(gzFile file);
 /*
     Read and decompress one byte from file.  gzgetc returns this byte or -1
   in case of end of file or error.  This is implemented as a macro for speed.
@ -1517,7 +1520,7 @@ ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
   points to has been clobbered or not.
 */

-ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+ZEXTERN int ZEXPORT gzungetc(int c, gzFile file);
 /*
     Push c back onto the stream for file to be read as the first character on
   the next read.  At least one character of push-back is always allowed.
@ -1529,7 +1532,7 @@ ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
   gzseek() or gzrewind().
 */

-ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+ZEXTERN int ZEXPORT gzflush(gzFile file, int flush);
 /*
     Flush all pending output to file.  The parameter flush is as in the
   deflate() function.  The return value is the zlib error number (see function
@ -1545,8 +1548,8 @@ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
 */

 /*
-ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
-                                   z_off_t offset, int whence));
+ZEXTERN z_off_t ZEXPORT gzseek(gzFile file,
+                               z_off_t offset, int whence);

     Set the starting position to offset relative to whence for the next gzread
   or gzwrite on file.  The offset represents a number of bytes in the
@ -1564,7 +1567,7 @@ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
   would be before the current position.
 */

-ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+ZEXTERN int ZEXPORT    gzrewind(gzFile file);
 /*
     Rewind file. This function is supported only for reading.

@ -1572,7 +1575,7 @@ ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
 */

 /*
-ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+ZEXTERN z_off_t ZEXPORT    gztell(gzFile file);

     Return the starting position for the next gzread or gzwrite on file.
   This position represents a number of bytes in the uncompressed data stream,
@ -1583,7 +1586,7 @@ ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
 */

 /*
-ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
+ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file);

     Return the current compressed (actual) read or write offset of file.  This
   offset includes the count of bytes that precede the gzip stream, for example
@ -1592,7 +1595,7 @@ ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
   be used for a progress indicator.  On error, gzoffset() returns -1.
 */

-ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+ZEXTERN int ZEXPORT gzeof(gzFile file);
 /*
     Return true (1) if the end-of-file indicator for file has been set while
   reading, false (0) otherwise.  Note that the end-of-file indicator is set
@ -1607,7 +1610,7 @@ ZEXTERN int ZEXPORT gzeof OF((gzFile file));
   has grown since the previous end of file was detected.
 */

-ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+ZEXTERN int ZEXPORT gzdirect(gzFile file);
 /*
     Return true (1) if file is being copied directly while reading, or false
   (0) if file is a gzip stream being decompressed.
@ -1628,7 +1631,7 @@ ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
   gzip file reading and decompression, which may not be desired.)
 */

-ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+ZEXTERN int ZEXPORT    gzclose(gzFile file);
 /*
     Flush all pending output for file, if necessary, close file and
   deallocate the (de)compression state.  Note that once file is closed, you
@ -1641,8 +1644,8 @@ ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
   last read ended in the middle of a gzip stream, or Z_OK on success.
 */

-ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
-ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_r(gzFile file);
+ZEXTERN int ZEXPORT gzclose_w(gzFile file);
 /*
     Same as gzclose(), but gzclose_r() is only for use when reading, and
   gzclose_w() is only for use when writing or appending.  The advantage to
@ -1653,7 +1656,7 @@ ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
   zlib library.
 */

-ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum);
 /*
     Return the error message for the last error which occurred on file.
   errnum is set to zlib error number.  If an error occurred in the file system
@ -1669,7 +1672,7 @@ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
   functions above that do not distinguish those cases in their return values.
 */

-ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+ZEXTERN void ZEXPORT gzclearerr(gzFile file);
 /*
     Clear the error and end-of-file flags for file.  This is analogous to the
   clearerr() function in stdio.  This is useful for continuing to read a gzip
@ -1686,7 +1689,7 @@ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
   library.
 */

-ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len);
 /*
     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
   return the updated checksum. An Adler-32 value is in the range of a 32-bit
@ -1706,15 +1709,15 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
     if (adler != original_adler) error();
 */

-ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
-                                    z_size_t len));
+ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf,
+                                z_size_t len);
 /*
     Same as adler32(), but with a size_t length.
 */

 /*
-ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
-                                          z_off_t len2));
+ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2,
+                                      z_off_t len2);

     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
@ -1724,7 +1727,7 @@ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
   negative, the result has no meaning or utility.
 */

-ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
+ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len);
 /*
     Update a running CRC-32 with the bytes buf[0..len-1] and return the
   updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer.
@ -1742,14 +1745,14 @@ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
     if (crc != original_crc) error();
 */

-ZEXTERN uLong ZEXPORT crc32_z OF((uLong crc, const Bytef *buf,
-                                  z_size_t len));
+ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf,
+                              z_size_t len);
 /*
     Same as crc32(), but with a size_t length.
 */

 /*
-ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2);

     Combine two CRC-32 check values into one.  For two sequences of bytes,
   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
@ -1759,13 +1762,13 @@ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
 */

 /*
-ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t len2));
+ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2);

     Return the operator corresponding to length len2, to be used with
   crc32_combine_op().
 */

-ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op));
+ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op);
 /*
     Give the same result as crc32_combine(), using op in place of len2. op is
   is generated from len2 by crc32_combine_gen(). This will be faster than
@ -1778,20 +1781,20 @@ ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op));
 /* deflateInit and inflateInit are macros to allow checking the zlib version
 * and the compiler's view of z_stream:
 */
-ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
-                                     const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
-                                     const char *version, int stream_size));
-ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
-                                      int windowBits, int memLevel,
-                                      int strategy, const char *version,
-                                      int stream_size));
-ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
-                                      const char *version, int stream_size));
-ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
-                                         unsigned char FAR *window,
-                                         const char *version,
-                                         int stream_size));
+ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level,
+                                 const char *version, int stream_size);
+ZEXTERN int ZEXPORT inflateInit_(z_streamp strm,
+                                 const char *version, int stream_size);
+ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int  level, int  method,
+                                  int windowBits, int memLevel,
+                                  int strategy, const char *version,
+                                  int stream_size);
+ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int  windowBits,
+                                  const char *version, int stream_size);
+ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits,
+                                     unsigned char FAR *window,
+                                     const char *version,
+                                     int stream_size);
 #ifdef Z_PREFIX_SET
 #  define z_deflateInit(strm, level) \
          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
@ -1836,7 +1839,7 @@ struct gzFile_s {
    unsigned char *next;
    z_off64_t pos;
 };
-ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
+ZEXTERN int ZEXPORT gzgetc_(gzFile file);       /* backward compatibility */
 #ifdef Z_PREFIX_SET
 #  undef z_gzgetc
 #  define z_gzgetc(g) \
@ -1853,13 +1856,13 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 * without large file support, _LFS64_LARGEFILE must also be true
 */
 #ifdef Z_LARGE64
-   ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-   ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
-   ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
-   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
-   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
-   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
-   ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off64_t));
+   ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+   ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int);
+   ZEXTERN z_off64_t ZEXPORT gztell64(gzFile);
+   ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile);
+   ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t);
+   ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t);
 #endif

 #if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
@ -1881,50 +1884,50 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 #    define crc32_combine_gen crc32_combine_gen64
 #  endif
 #  ifndef Z_LARGE64
-     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
-     ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
-     ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
-     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
-     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
-     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
-     ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t));
+     ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *);
+     ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int);
+     ZEXTERN z_off_t ZEXPORT gztell64(gzFile);
+     ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile);
+     ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t);
+     ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t);
+     ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t);
 #  endif
 #else
-   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
-   ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
-   ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
-   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t));
+   ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *);
+   ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int);
+   ZEXTERN z_off_t ZEXPORT gztell(gzFile);
+   ZEXTERN z_off_t ZEXPORT gzoffset(gzFile);
+   ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t);
 #endif

 #else /* Z_SOLO */

-   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
-   ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t));
+   ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t);
+   ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t);

 #endif /* !Z_SOLO */

 /* undocumented functions */
-ZEXTERN const char   * ZEXPORT zError           OF((int));
-ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp));
-ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table    OF((void));
-ZEXTERN int            ZEXPORT inflateUndermine OF((z_streamp, int));
-ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
-ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF((z_streamp));
-ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
-ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
+ZEXTERN const char   * ZEXPORT zError(int);
+ZEXTERN int            ZEXPORT inflateSyncPoint(z_streamp);
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void);
+ZEXTERN int            ZEXPORT inflateUndermine(z_streamp, int);
+ZEXTERN int            ZEXPORT inflateValidate(z_streamp, int);
+ZEXTERN unsigned long  ZEXPORT inflateCodesUsed(z_streamp);
+ZEXTERN int            ZEXPORT inflateResetKeep(z_streamp);
+ZEXTERN int            ZEXPORT deflateResetKeep(z_streamp);
 #if defined(_WIN32) && !defined(Z_SOLO)
-ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
-                                            const char *mode));
+ZEXTERN gzFile         ZEXPORT gzopen_w(const wchar_t *path,
+                                        const char *mode);
 #endif
 #if defined(STDC) || defined(Z_HAVE_STDARG_H)
 #  ifndef Z_SOLO
-ZEXTERN int            ZEXPORTVA gzvprintf Z_ARG((gzFile file,
-                                                  const char *format,
-                                                  va_list va));
+ZEXTERN int            ZEXPORTVA gzvprintf(gzFile file,
+                                           const char *format,
+                                           va_list va);
 #  endif
 #endif

--- a/3rdparty/zlib/zutil.c
+++ b/3rdparty/zlib/zutil.c
@ -24,13 +24,11 @@ z_const char * const z_errmsg[10] = {
 };


-const char * ZEXPORT zlibVersion()
-{
+const char * ZEXPORT zlibVersion(void) {
    return ZLIB_VERSION;
 }

-uLong ZEXPORT zlibCompileFlags()
-{
+uLong ZEXPORT zlibCompileFlags(void) {
    uLong flags;

    flags = 0;
@ -121,9 +119,7 @@ uLong ZEXPORT zlibCompileFlags()
 #  endif
 int ZLIB_INTERNAL z_verbose = verbose;

-void ZLIB_INTERNAL z_error(m)
-    char *m;
-{
+void ZLIB_INTERNAL z_error(char *m) {
    fprintf(stderr, "%s\n", m);
    exit(1);
 }
@ -132,9 +128,7 @@ void ZLIB_INTERNAL z_error(m)
 /* exported to allow conversion of error code to string for compress() and
 * uncompress()
 */
-const char * ZEXPORT zError(err)
-    int err;
-{
+const char * ZEXPORT zError(int err) {
    return ERR_MSG(err);
 }

@ -148,22 +142,14 @@ const char * ZEXPORT zError(err)

 #ifndef HAVE_MEMCPY

-void ZLIB_INTERNAL zmemcpy(dest, source, len)
-    Bytef* dest;
-    const Bytef* source;
-    uInt  len;
-{
+void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len) {
    if (len == 0) return;
    do {
        *dest++ = *source++; /* ??? to be unrolled */
    } while (--len != 0);
 }

-int ZLIB_INTERNAL zmemcmp(s1, s2, len)
-    const Bytef* s1;
-    const Bytef* s2;
-    uInt  len;
-{
+int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len) {
    uInt j;

    for (j = 0; j < len; j++) {
@ -172,10 +158,7 @@ int ZLIB_INTERNAL zmemcmp(s1, s2, len)
    return 0;
 }

-void ZLIB_INTERNAL zmemzero(dest, len)
-    Bytef* dest;
-    uInt  len;
-{
+void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len) {
    if (len == 0) return;
    do {
        *dest++ = 0;  /* ??? to be unrolled */
@ -216,8 +199,7 @@ local ptr_table table[MAX_PTR];
 * a protected system like OS/2. Use Microsoft C instead.
 */

-voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size)
-{
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) {
    voidpf buf;
    ulg bsize = (ulg)items*size;

@ -242,8 +224,7 @@ voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size)
    return buf;
 }

-void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
    int n;

    (void)opaque;
@ -279,14 +260,12 @@ void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
 #  define _hfree   hfree
 #endif

-voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size)
-{
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size) {
    (void)opaque;
    return _halloc((long)items, size);
 }

-void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
    (void)opaque;
    _hfree(ptr);
 }
@ -299,25 +278,18 @@ void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr)
 #ifndef MY_ZCALLOC /* Any system without a special alloc function */

 #ifndef STDC
-extern voidp  malloc OF((uInt size));
-extern voidp  calloc OF((uInt items, uInt size));
-extern void   free   OF((voidpf ptr));
+extern voidp malloc(uInt size);
+extern voidp calloc(uInt items, uInt size);
+extern void free(voidpf ptr);
 #endif

-voidpf ZLIB_INTERNAL zcalloc(opaque, items, size)
-    voidpf opaque;
-    unsigned items;
-    unsigned size;
-{
+voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) {
    (void)opaque;
    return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
                              (voidpf)calloc(items, size);
 }

-void ZLIB_INTERNAL zcfree(opaque, ptr)
-    voidpf opaque;
-    voidpf ptr;
-{
+void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) {
    (void)opaque;
    free(ptr);
 }
--- a/3rdparty/zlib/zutil.h
+++ b/3rdparty/zlib/zutil.h
@ -191,9 +191,9 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 /* provide prototypes for these when building zlib without LFS */
 #if !defined(_WIN32) && \
    (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
-    ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
-    ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
-    ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t));
+    ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t);
+    ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t);
+    ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t);
 #endif

        /* common defaults */
@ -232,16 +232,16 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #    define zmemzero(dest, len) memset(dest, 0, len)
 #  endif
 #else
-   void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
-   int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
-   void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len));
+   void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len);
+   int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len);
+   void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len);
 #endif

 /* Diagnostic functions */
 #ifdef ZLIB_DEBUG
 #  include <stdio.h>
   extern int ZLIB_INTERNAL z_verbose;
-   extern void ZLIB_INTERNAL z_error OF((char *m));
+   extern void ZLIB_INTERNAL z_error(char *m);
 #  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
 #  define Trace(x) {if (z_verbose>=0) fprintf x ;}
 #  define Tracev(x) {if (z_verbose>0) fprintf x ;}
@ -258,9 +258,9 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #endif

 #ifndef Z_SOLO
-   voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items,
-                                    unsigned size));
-   void ZLIB_INTERNAL zcfree  OF((voidpf opaque, voidpf ptr));
+   voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items,
+                                unsigned size);
+   void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr);
 #endif

 #define ZALLOC(strm, items, size) \
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -5,7 +5,6 @@
 #      $ cmake <PATH_TO_OPENCV_ROOT>
 #
 # ----------------------------------------------------------------------------
-
 # Disable in-source builds to prevent source tree corruption.
 if(" ${CMAKE_SOURCE_DIR}" STREQUAL " ${CMAKE_BINARY_DIR}")
  message(FATAL_ERROR "
@ -74,6 +73,10 @@ if(POLICY CMP0077)
  cmake_policy(SET CMP0077 NEW)  # CMake 3.13+: option() honors normal variables.
 endif()

+if(POLICY CMP0091)
+  cmake_policy(SET CMP0091 NEW) # CMake 3.15+: leave MSVC runtime selection out of default CMAKE_<LANG>_FLAGS_<CONFIG> flags
+endif()
+
 if(POLICY CMP0146)
  cmake_policy(SET CMP0146 OLD)  # CMake 3.27+: use CMake FindCUDA if available.
 endif()
@ -240,9 +243,9 @@ OCV_OPTION(BUILD_CLAPACK            "Build CLapack from source"          (((WIN3
 # Optional 3rd party components
 # ===================================================
 OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_DC1394_2)
-OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O (iOS/Mac)" ON
+OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O (iOS/visionOS/Mac)" ON
  VISIBLE_IF APPLE
  VERIFY HAVE_AVFOUNDATION)
 OCV_OPTION(WITH_AVIF "Enable AVIF support" OFF
@ -251,15 +254,15 @@ OCV_OPTION(WITH_CAP_IOS "Enable iOS video capture" ON
  VISIBLE_IF IOS
  VERIFY HAVE_CAP_IOS)
 OCV_OPTION(WITH_CAROTENE "Use NVidia carotene acceleration library for ARM platform" (NOT CV_DISABLE_OPTIMIZATION)
-  VISIBLE_IF (ARM OR AARCH64) AND NOT IOS)
+  VISIBLE_IF (ARM OR AARCH64) AND NOT IOS AND NOT XROS)
 OCV_OPTION(WITH_CPUFEATURES "Use cpufeatures Android library" ON
  VISIBLE_IF ANDROID
  VERIFY HAVE_CPUFEATURES)
 OCV_OPTION(WITH_VTK "Include VTK library support (and build opencv_viz module eiher)" ON
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT AND NOT CMAKE_CROSSCOMPILING
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT AND NOT CMAKE_CROSSCOMPILING
  VERIFY HAVE_VTK)
 OCV_OPTION(WITH_CUDA "Include NVidia Cuda Runtime support" OFF
-  VISIBLE_IF NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_CUDA)
 OCV_OPTION(WITH_CUFFT "Include NVidia Cuda Fast Fourier Transform (FFT) library support" WITH_CUDA
  VISIBLE_IF WITH_CUDA
@ -280,10 +283,10 @@ OCV_OPTION(WITH_EIGEN "Include Eigen2/Eigen3 support" (NOT CV_DISABLE_OPTIMIZATI
  VISIBLE_IF NOT WINRT
  VERIFY HAVE_EIGEN)
 OCV_OPTION(WITH_FFMPEG "Include FFMPEG support" (NOT ANDROID)
-  VISIBLE_IF NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_FFMPEG)
 OCV_OPTION(WITH_GSTREAMER "Include Gstreamer support" ON
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_GSTREAMER AND GSTREAMER_VERSION VERSION_GREATER "0.99")
 OCV_OPTION(WITH_GTK "Include GTK support" ON
  VISIBLE_IF UNIX AND NOT APPLE AND NOT ANDROID
@ -295,7 +298,7 @@ OCV_OPTION(WITH_WAYLAND "Include Wayland support" OFF
        VISIBLE_IF UNIX AND NOT APPLE AND NOT ANDROID
        VERIFY HAVE_WAYLAND)
 OCV_OPTION(WITH_IPP "Include Intel IPP support" (NOT MINGW AND NOT CV_DISABLE_OPTIMIZATION)
-  VISIBLE_IF (X86_64 OR X86) AND NOT WINRT AND NOT IOS
+  VISIBLE_IF (X86_64 OR X86) AND NOT WINRT AND NOT IOS AND NOT XROS
  VERIFY HAVE_IPP)
 OCV_OPTION(WITH_VULKAN "Include Vulkan support" OFF
  VISIBLE_IF TRUE
@ -308,10 +311,10 @@ OCV_OPTION(WITH_WEBNN "Include WebNN support" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_WEBNN)
 OCV_OPTION(WITH_JASPER "Include JPEG2K support (Jasper)" ON
-  VISIBLE_IF NOT IOS
+  VISIBLE_IF NOT IOS AND NOT XROS
  VERIFY HAVE_JASPER)
 OCV_OPTION(WITH_OPENJPEG "Include JPEG2K support (OpenJPEG)" ON
-  VISIBLE_IF NOT IOS
+  VISIBLE_IF NOT IOS AND NOT XROS
  VERIFY HAVE_OPENJPEG)
 OCV_OPTION(WITH_JPEG "Include JPEG support" ON
  VISIBLE_IF TRUE
@ -329,10 +332,10 @@ OCV_OPTION(WITH_OPENVX "Include OpenVX support" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_OPENVX)
 OCV_OPTION(WITH_OPENNI "Include OpenNI support" OFF
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_OPENNI)
 OCV_OPTION(WITH_OPENNI2 "Include OpenNI2 support" OFF
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_OPENNI2)
 OCV_OPTION(WITH_PNG "Include PNG support" ON
  VISIBLE_IF TRUE
@ -344,19 +347,19 @@ OCV_OPTION(WITH_GDCM "Include DICOM support" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_GDCM)
 OCV_OPTION(WITH_PVAPI "Include Prosilica GigE support" OFF
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_PVAPI)
 OCV_OPTION(WITH_ARAVIS "Include Aravis GigE support" OFF
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT AND NOT WIN32
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT AND NOT WIN32
  VERIFY HAVE_ARAVIS_API)
 OCV_OPTION(WITH_QT "Build with Qt Backend support" OFF
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_QT)
 OCV_OPTION(WITH_WIN32UI "Build with Win32 UI Backend support" ON
  VISIBLE_IF WIN32 AND NOT WINRT
  VERIFY HAVE_WIN32UI)
 OCV_OPTION(WITH_TBB "Include Intel TBB support" OFF
-  VISIBLE_IF NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_TBB)
 OCV_OPTION(WITH_HPX "Include Ste||ar Group HPX support" OFF
  VISIBLE_IF TRUE
@ -368,7 +371,7 @@ OCV_OPTION(WITH_PTHREADS_PF "Use pthreads-based parallel_for" ON
  VISIBLE_IF NOT WIN32 OR MINGW
  VERIFY HAVE_PTHREADS_PF)
 OCV_OPTION(WITH_TIFF "Include TIFF support" ON
-  VISIBLE_IF NOT IOS
+  VISIBLE_IF NOT IOS AND NOT XROS
  VERIFY HAVE_TIFF)
 OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON
  VISIBLE_IF UNIX AND NOT ANDROID AND NOT APPLE
@ -395,20 +398,23 @@ OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_CLP)
 OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" (NOT ANDROID AND NOT CV_DISABLE_OPTIMIZATION)
-  VISIBLE_IF NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_OPENCL)
 OCV_OPTION(WITH_OPENCL_SVM "Include OpenCL Shared Virtual Memory support" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_OPENCL_SVM) # experimental
 OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_CLAMDFFT)
 OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_CLAMDBLAS)
 OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON
  VISIBLE_IF WIN32 AND NOT WINRT
  VERIFY HAVE_DIRECTX)
+OCV_OPTION(WITH_DIRECTML "Include DirectML support" ON
+  VISIBLE_IF WIN32 AND NOT WINRT
+  VERIFY HAVE_DIRECTML)
 OCV_OPTION(WITH_OPENCL_D3D11_NV "Include NVIDIA OpenCL D3D11 support" WITH_DIRECTX
  VISIBLE_IF WIN32 AND NOT WINRT
  VERIFY HAVE_OPENCL_D3D11_NV)
@ -425,13 +431,13 @@ OCV_OPTION(WITH_MFX "Include Intel Media SDK support" OFF
  VISIBLE_IF (UNIX AND NOT ANDROID) OR (WIN32 AND NOT WINRT AND NOT MINGW)
  VERIFY HAVE_MFX)
 OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF
-  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT WINRT
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS AND NOT WINRT
  VERIFY HAVE_GDAL)
 OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" OFF
-  VISIBLE_IF UNIX AND NOT ANDROID AND NOT IOS
+  VISIBLE_IF UNIX AND NOT ANDROID AND NOT IOS AND NOT XROS
  VERIFY HAVE_GPHOTO2)
 OCV_OPTION(WITH_LAPACK "Include Lapack library support" (NOT CV_DISABLE_OPTIMIZATION)
-  VISIBLE_IF NOT ANDROID AND NOT IOS
+  VISIBLE_IF NOT ANDROID AND NOT IOS AND NOT XROS
  VERIFY HAVE_LAPACK)
 OCV_OPTION(WITH_ITT "Include Intel ITT support" ON
  VISIBLE_IF NOT APPLE_FRAMEWORK
@ -451,7 +457,7 @@ OCV_OPTION(WITH_IMGCODEC_PXM "Include PNM (PBM,PGM,PPM) and PAM formats support"
 OCV_OPTION(WITH_IMGCODEC_PFM "Include PFM formats support" ON
  VISIBLE_IF TRUE
  VERIFY HAVE_IMGCODEC_PFM)
-OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" ON
+OCV_OPTION(WITH_QUIRC "Include library QR-code decoding" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_QUIRC)
 OCV_OPTION(WITH_ANDROID_MEDIANDK "Use Android Media NDK for Video I/O (Android)" (ANDROID_NATIVE_API_LEVEL GREATER 20)
@ -466,6 +472,7 @@ OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF
 OCV_OPTION(WITH_TIMVX "Include Tim-VX support" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_TIMVX)
+# attention: Astra2, Gemini2, and Gemini2L cameras currently only support Windows and Linux kernel versions no higher than 4.15, and higher versions of Linux kernel may have exceptions.
 OCV_OPTION(WITH_OBSENSOR "Include obsensor support (Orbbec RGB-D modules: Astra+/Femto)" ON
  VISIBLE_IF (WIN32 AND NOT ARM AND NOT WINRT AND NOT MINGW) OR ( UNIX AND NOT APPLE AND NOT ANDROID)
  VERIFY HAVE_OBSENSOR)
@ -511,7 +518,7 @@ OCV_OPTION(INSTALL_TESTS            "Install accuracy and performance test binar
 # OpenCV build options
 # ===================================================
 OCV_OPTION(ENABLE_CCACHE              "Use ccache"                                               (UNIX AND (CMAKE_GENERATOR MATCHES "Makefile" OR CMAKE_GENERATOR MATCHES "Ninja" OR CMAKE_GENERATOR MATCHES "Xcode")) )
-OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers"                                  MSVC IF (MSVC OR (NOT IOS AND NOT CMAKE_CROSSCOMPILING) ) )
+OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers"                                  MSVC IF (MSVC OR (NOT IOS AND NOT XROS AND NOT CMAKE_CROSSCOMPILING) ) )
 OCV_OPTION(ENABLE_DELAYLOAD           "Enable delayed loading of OpenCV DLLs"                    OFF VISIBLE_IF MSVC AND BUILD_SHARED_LIBS)
 OCV_OPTION(ENABLE_SOLUTION_FOLDERS    "Solution folder in Visual Studio or in other IDEs"        (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) )
 OCV_OPTION(ENABLE_PROFILING           "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF  IF CV_GCC )
@ -521,8 +528,8 @@ OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"
 OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"                                   ON   IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
 OCV_OPTION(ENABLE_FAST_MATH           "Enable compiler options for fast math optimizations on FP computations (not recommended)" OFF)
 if(NOT IOS AND (NOT ANDROID OR OPENCV_ANDROID_USE_LEGACY_FLAGS) AND CMAKE_CROSSCOMPILING)  # Use CPU_BASELINE instead
-OCV_OPTION(ENABLE_NEON                "Enable NEON instructions"                                 (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
-OCV_OPTION(ENABLE_VFPV3               "Enable VFPv3-D32 instructions"                            OFF  IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
+OCV_OPTION(ENABLE_NEON                "Enable NEON instructions"                                 (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS OR XROS) )
+OCV_OPTION(ENABLE_VFPV3               "Enable VFPv3-D32 instructions"                            OFF  IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS OR XROS) )
 endif()
 OCV_OPTION(ENABLE_NOISY_WARNINGS      "Show all warnings even if they are too noisy"             OFF )
 OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors"                                 OFF )
@ -544,6 +551,9 @@ OCV_OPTION(OPENCV_ENABLE_MEMALIGN     "Enable posix_memalign or memalign usage"
 OCV_OPTION(OPENCV_DISABLE_FILESYSTEM_SUPPORT "Disable filesystem support" OFF)
 OCV_OPTION(OPENCV_DISABLE_THREAD_SUPPORT "Build the library without multi-threaded code." OFF)
 OCV_OPTION(OPENCV_SEMIHOSTING         "Build the library for semihosting target (Arm). See https://developer.arm.com/documentation/100863/latest." OFF)
+OCV_OPTION(ENABLE_CUDA_FIRST_CLASS_LANGUAGE "Enable CUDA as a first class language, if enabled dependant projects will need to use CMake >= 3.18" OFF
+  VISIBLE_IF (WITH_CUDA AND NOT CMAKE_VERSION VERSION_LESS 3.18)
+  VERIFY HAVE_CUDA)

 OCV_OPTION(ENABLE_PYLINT              "Add target with Pylint checks"                            (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) )
 OCV_OPTION(ENABLE_FLAKE8              "Add target with Python flake8 checker"                    (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) )
@ -647,7 +657,7 @@ endif()
 ocv_cmake_hook(POST_CMAKE_BUILD_OPTIONS)

 # --- Python Support ---
-if(NOT IOS)
+if(NOT IOS AND NOT XROS)
  include(cmake/OpenCVDetectPython.cmake)
 endif()

@ -655,6 +665,51 @@ include(cmake/OpenCVCompilerOptions.cmake)

 ocv_cmake_hook(POST_COMPILER_OPTIONS)

+# --- CUDA Support ---
+if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
+  if(CMAKE_VERSION VERSION_LESS 3.18)
+    message(WARNING "CUDA: First class language only supported for CMake versions >= 3.18, falling back to FindCUDA!")
+    set(ENABLE_CUDA_FIRST_CLASS_LANGUAGE OFF CACHE BOOL "Enable CUDA as a first class language, if enabled dependant projects will need to use CMake >= 3.18" FORCE)
+  else()
+
+    # Check CUDA_PATH if supplied
+    if(UNIX AND CUDA_PATH AND NOT ENV{CUDA_PATH})
+      set(ENV{CUDA_PATH} ${CUDA_PATH})
+    elseif(WIN32 AND CUDA_PATH)
+      set(ENV{PATH} "${CUDA_PATH}\\bin\;$ENV{PATH}")
+    endif()
+    include(CheckLanguage)
+    check_language(CUDA)
+
+    # Fallback to checking default locations
+    if(NOT CMAKE_CUDA_COMPILER)
+      # Checking windows default search location isn't possible because the CUDA Toolkit is installed to C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/vXX.X
+      if(WIN32)
+        if(CMAKE_GENERATOR MATCHES "Visual Studio")
+          message(STATUS "CUDA: Not detected, when using stand alone installations with the Visual Studio generator the path to the CUDA toolkit should be manually specified with -Tcuda=. e.g. -Tcuda=\"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/vXX.X\"")
+        else()
+          message(STATUS "CUDA: Not detected, for stand alone installations the path to the CUDA toolkit should be manually specified with -DCUDA_PATH=. e.g. -DCUDA_PATH=\"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/vXX.X\"")
+        endif()
+      elseif(UNIX)
+        message(STATUS "CUDA: Not detected, make sure you have performed the mandatory Post-installation actions described in the CUDA installation guide.\n   For stand alone installations you can set the CUDA_PATH environmental or CMake variable. e.g. export CUDA_PATH=/usr/local/cuda-XX.X or -DCUDA_PATH=/usr/local/cuda-XX.X.")
+        message(STATUS "CUDA: Falling back to searching for the CUDA compiler in its default location (/usr/local/cuda)")
+        set(CUDA_PATH "/usr/local/cuda" CACHE INTERNAL "")
+        set(ENV{CUDA_PATH} ${CUDA_PATH})
+        unset(CMAKE_CUDA_COMPILER CACHE)
+        unset(CMAKE_CUDA_COMPILER)
+        check_language(CUDA)
+      endif()
+    endif()
+
+    cmake_policy(SET CMP0092 NEW) # CMake 3.15+: leave warning flags out of default CMAKE_<LANG>_FLAGS flags.
+    if(CMAKE_CUDA_COMPILER)
+      enable_language(CUDA)
+    elseif(UNIX)
+      message(WARNING "CUDA: Not detected!  If you are not using the default host compiler (g++) then you need to specify both CMAKE_CUDA_HOST_COMPILER and CMAKE_CUDA_COMPILER. e.g. -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/clang++ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc.")
+    endif()
+  endif()
+endif()
+
 # ----------------------------------------------------------------------------
 #       CHECK FOR SYSTEM LIBRARIES, OPTIONS, ETC..
 # ----------------------------------------------------------------------------
@ -732,7 +787,7 @@ include(cmake/OpenCVModule.cmake)
 #  Detect endianness of build platform
 # ----------------------------------------------------------------------------

-if(IOS)
+if(IOS OR XROS)
  # test_big_endian needs try_compile, which doesn't work for iOS
  # http://public.kitware.com/Bug/view.php?id=12288
  set(WORDS_BIGENDIAN 0)
@ -847,6 +902,10 @@ endif()
 if(WITH_DIRECTX)
  include(cmake/OpenCVDetectDirectX.cmake)
 endif()
+# --- DirectML ---
+if(WITH_DIRECTML)
+  include(cmake/OpenCVDetectDirectML.cmake)
+endif()

 if(WITH_VTK)
  include(cmake/OpenCVDetectVTK.cmake)
@ -907,7 +966,15 @@ foreach(hal ${OpenCV_HAL})
    if(";${CPU_BASELINE_FINAL};" MATCHES ";NEON;")
      add_subdirectory(3rdparty/carotene/hal)
      ocv_hal_register(CAROTENE_HAL_LIBRARIES CAROTENE_HAL_HEADERS CAROTENE_HAL_INCLUDE_DIRS)
-      list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION})")
+
+      if( NOT DEFINED CAROTENE_NEON_ARCH)
+          set(CAROTENE_NEON_MSG "Auto detected")
+      elseif( CAROTENE_NEON_ARCH GREATER 7)
+          set(CAROTENE_NEON_MSG "Force ARMv8+")
+      else()
+          set(CAROTENE_NEON_MSG "Force ARMv7")
+      endif()
+      list(APPEND OpenCV_USED_HAL "carotene (ver ${CAROTENE_HAL_VERSION}, ${CAROTENE_NEON_MSG})")
    else()
      message(STATUS "Carotene: NEON is not available, disabling carotene...")
    endif()
@ -979,10 +1046,6 @@ if(BUILD_EXAMPLES OR BUILD_ANDROID_EXAMPLES OR INSTALL_ANDROID_EXAMPLES OR INSTA
  add_subdirectory(samples)
 endif()

-if(ANDROID)
-  add_subdirectory(platforms/android/service)
-endif()
-
 # ----------------------------------------------------------------------------
 # Finalization: generate configuration-based files
 # ----------------------------------------------------------------------------
@ -1003,7 +1066,7 @@ include(cmake/OpenCVGenAndroidMK.cmake)
 # Generate OpenCVConfig.cmake and OpenCVConfig-version.cmake for cmake projects
 include(cmake/OpenCVGenConfig.cmake)

-# Generate Info.plist for the IOS framework
+# Generate Info.plist for the iOS/visionOS framework
 if(APPLE_FRAMEWORK)
  include(cmake/OpenCVGenInfoPlist.cmake)
 endif()
@ -1399,12 +1462,14 @@ if(WITH_AVIF OR HAVE_AVIF)
  endif()
 endif()

-if(WITH_PNG OR HAVE_PNG OR WITH_SPNG)
-  if(WITH_SPNG)
+if(WITH_SPNG)
+  if(BUILD_SPNG)
    status("    PNG:" "build-${SPNG_LIBRARY} (ver ${SPNG_VERSION})")
-  else()
-    status("    PNG:"  PNG_FOUND  THEN "${PNG_LIBRARY} (ver ${PNG_VERSION})" ELSE "build (ver ${PNG_VERSION})")
+  elseif(HAVE_SPNG)
+    status("    PNG:" "${SPNG_LIBRARY} (ver ${SPNG_VERSION})")
  endif()
+elseif(WITH_PNG OR HAVE_PNG)
+  status("    PNG:"  PNG_FOUND  THEN "${PNG_LIBRARY} (ver ${PNG_VERSION_STRING})" ELSE "build (ver ${PNG_VERSION_STRING})")
 endif()

 if(WITH_TIFF OR HAVE_TIFF)
@ -1413,7 +1478,7 @@ endif()

 if(HAVE_OPENJPEG)
  status("    JPEG 2000:" OpenJPEG_FOUND
-      THEN "OpenJPEG (ver ${OPENJPEG_MAJOR_VERSION}.${OPENJPEG_MINOR_VERSION}.${OPENJPEG_BUILD_VERSION})"
+      THEN "OpenJPEG (ver ${OPENJPEG_VERSION})"
      ELSE "build (ver ${OPENJPEG_VERSION})"
  )
 elseif(HAVE_JASPER)
@ -1554,6 +1619,11 @@ if(WITH_GPHOTO2 OR HAVE_GPHOTO2)
  status("    gPhoto2:"        HAVE_GPHOTO2        THEN "YES"                                 ELSE NO)
 endif()

+if(ANDROID)
+  status("   MEDIANDK:"         HAVE_ANDROID_MEDIANDK THEN "YES"                              ELSE NO)
+  status("   NDK Camera:"       HAVE_ANDROID_NATIVE_CAMERA THEN "YES"                         ELSE NO)
+endif()
+
 # Order is similar to CV_PARALLEL_FRAMEWORK in core/src/parallel.cpp
 ocv_build_features_string(parallel_status EXCLUSIVE
  IF HAVE_TBB THEN "TBB (ver ${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR} interface ${TBB_INTERFACE_VERSION})"
--- a/README.md
+++ b/README.md
@ -1,5 +1,9 @@
 ## OpenCV: Open Source Computer Vision Library

+### Keep OpenCV Free
+
+OpenCV is raising funds to keep the library free for everyone, and we need the support of the entire community to do it. [Donate to OpenCV on IndieGoGo](http://igg.me/at/opencv5) before the campaign ends on December 16 to show your support.
+
 ### Resources

 * Homepage: <https://opencv.org>
@ -22,3 +26,13 @@ Please read the [contribution guidelines](https://github.com/opencv/opencv/wiki/
 * Include tests and documentation;
 * Clean up "oops" commits before submitting;
 * Follow the [coding style guide](https://github.com/opencv/opencv/wiki/Coding_Style_Guide).
+
+### Additional Resources
+
+* [Submit your OpenCV-based project](https://form.jotform.com/233105358823151) for inclusion in Community Friday on opencv.org
+* [Subscribe to the OpenCV YouTube Channel](http://youtube.com/@opencvofficial) featuring OpenCV Live, an hour-long streaming show
+* [Follow OpenCV on LinkedIn](http://linkedin.com/company/opencv/) for daily posts showing the state-of-the-art in computer vision &AI
+* [Apply to be an OpenCV Volunteer](https://form.jotform.com/232745316792159) to help organize events and online campaigns as well as amplify them
+* [Follow OpenCV on Mastodon](http://mastodon.social/@opencv) in the Fediverse
+* [Follow OpenCV on Twitter](https://twitter.com/opencvlive)
+* [OpenCV.ai](https://opencv.ai): Computer Vision and AI development services from the OpenCV team.
--- a/cmake/FindONNX.cmake
+++ b/cmake/FindONNX.cmake
@ -11,7 +11,9 @@ if(ONNXRT_ROOT_DIR)
  find_library(ORT_LIB onnxruntime
    ${ONNXRT_ROOT_DIR}/lib
    CMAKE_FIND_ROOT_PATH_BOTH)
+  # The location of headers varies across different versions of ONNX Runtime
  find_path(ORT_INCLUDE onnxruntime_cxx_api.h
+    ${ONNXRT_ROOT_DIR}/include/onnxruntime/
    ${ONNXRT_ROOT_DIR}/include/onnxruntime/core/session
    CMAKE_FIND_ROOT_PATH_BOTH)
 endif()
@ -32,6 +34,14 @@ if(ORT_LIB AND ORT_INCLUDE)
      HAVE_ONNX_DML
  )

+  # Check CoreML Execution Provider availability
+  get_filename_component(coreml_dir ${ONNXRT_ROOT_DIR}/include/onnxruntime/core/providers/coreml ABSOLUTE)
+  detect_onxxrt_ep(
+      coreml_provider_factory.h
+      ${coreml_dir}
+      HAVE_ONNX_COREML
+  )
+
  set(HAVE_ONNX TRUE)
  # For CMake output only
  set(ONNX_LIBRARIES "${ORT_LIB}" CACHE STRING "ONNX Runtime libraries")
--- a/cmake/OpenCVCRTLinkage.cmake
+++ b/cmake/OpenCVCRTLinkage.cmake
@ -33,34 +33,44 @@ endif()
 # Ignore warning: This object file does not define any previously undefined public symbols, ...
 set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /IGNORE:4221")

+if(POLICY CMP0091)
+  cmake_policy(GET CMP0091 MSVC_RUNTIME_SET_BY_ABSTRACTION)
+endif()
+
 if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
-  foreach(flag_var
-          CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
-          CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
-          CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-          CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-    if(${flag_var} MATCHES "/MD")
-      string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
-    endif()
-    if(${flag_var} MATCHES "/MDd")
-      string(REGEX REPLACE "/MDd" "/MTd" ${flag_var} "${${flag_var}}")
-    endif()
-  endforeach(flag_var)
+  if(MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
+    set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+  else()
+    foreach(flag_var
+            CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+            CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
+            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+            CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif()
+      if(${flag_var} MATCHES "/MDd")
+        string(REGEX REPLACE "/MDd" "/MTd" ${flag_var} "${${flag_var}}")
+      endif()
+    endforeach(flag_var)
+  endif()

  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:atlthunk.lib")
  set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} /NODEFAULTLIB:libcmt.lib /NODEFAULTLIB:libcpmt.lib /NODEFAULTLIB:msvcrt.lib")
  set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /NODEFAULTLIB:libcmtd.lib /NODEFAULTLIB:libcpmtd.lib /NODEFAULTLIB:msvcrtd.lib")
 else()
-  foreach(flag_var
-          CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
-          CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
-          CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-          CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-    if(${flag_var} MATCHES "/MT")
-      string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
-    endif()
-    if(${flag_var} MATCHES "/MTd")
-      string(REGEX REPLACE "/MTd" "/MDd" ${flag_var} "${${flag_var}}")
-    endif()
-  endforeach(flag_var)
+  if(NOT MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
+    foreach(flag_var
+            CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+            CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
+            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+            CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MT")
+        string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
+      endif()
+      if(${flag_var} MATCHES "/MTd")
+        string(REGEX REPLACE "/MTd" "/MDd" ${flag_var} "${${flag_var}}")
+      endif()
+    endforeach(flag_var)
+  endif()
 endif()
--- a/cmake/OpenCVCompilerOptimizations.cmake
+++ b/cmake/OpenCVCompilerOptimizations.cmake
@ -49,7 +49,7 @@

 set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F")
 list(APPEND CPU_ALL_OPTIMIZATIONS "AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
-list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16 NEON_DOTPROD)
+list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16 NEON_DOTPROD NEON_FP16 NEON_BF16)
 list(APPEND CPU_ALL_OPTIMIZATIONS MSA)
 list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
 list(APPEND CPU_ALL_OPTIMIZATIONS RVV)
@ -349,12 +349,18 @@ elseif(ARM OR AARCH64)
    ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16;NEON_DOTPROD;NEON_FP16;NEON_BF16")
    ocv_update(CPU_NEON_FLAGS_ON "")
    ocv_update(CPU_FP16_IMPLIES "NEON")
-    ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "-march=armv8.2-a+dotprod")
    ocv_update(CPU_NEON_DOTPROD_IMPLIES "NEON")
-    ocv_update(CPU_NEON_FP16_FLAGS_ON "-march=armv8.2-a+fp16")
    ocv_update(CPU_NEON_FP16_IMPLIES "NEON")
-    ocv_update(CPU_NEON_BF16_FLAGS_ON "-march=armv8.2-a+fp16+bf16")
    ocv_update(CPU_NEON_BF16_IMPLIES "NEON")
+    if(MSVC)
+      ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "")
+      ocv_update(CPU_NEON_FP16_FLAGS_ON "")
+      ocv_update(CPU_NEON_BF16_FLAGS_ON "")
+    else()
+      ocv_update(CPU_NEON_DOTPROD_FLAGS_ON "-march=armv8.2-a+dotprod")
+      ocv_update(CPU_NEON_FP16_FLAGS_ON "-march=armv8.2-a+fp16")
+      ocv_update(CPU_NEON_BF16_FLAGS_ON "-march=armv8.2-a+bf16")
+    endif()
    set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
    set(CPU_DISPATCH "NEON_FP16;NEON_BF16;NEON_DOTPROD" CACHE STRING "${HELP_CPU_DISPATCH}")
  endif()
@ -403,11 +409,8 @@ elseif(LOONGARCH64)
  ocv_update(CPU_KNOWN_OPTIMIZATIONS "LSX;LASX")
  ocv_update(CPU_LSX_FLAGS_ON "-mlsx")
  ocv_update(CPU_LASX_FLAGS_ON "-mlasx")
-  if("${CPU_BASELINE_DISABLE}" STREQUAL "LASX")
-    set(CPU_BASELINE "LSX" CACHE  STRING "${HELP_CPU_BASELINE}")
-  else()
-    set(CPU_BASELINE "LASX" CACHE STRING "${HELP_CPU_BASELINE}")
-  endif()
+  set(CPU_BASELINE "LSX" CACHE STRING "${HELP_CPU_BASELINE}")
+  set(CPU_DISPATCH "LASX" CACHE STRING "${HELP_CPU_DISPATCH}")

 endif()

@ -480,6 +483,25 @@ macro(ocv_check_compiler_optimization OPT)
  endif()
 endmacro()

+macro(ocv_cpu_aarch64_baseline_merge_feature_options FEATURE_NAME_LIST FLAG_STRING COMMON_OPTION)
+  if(NOT MSVC)
+    unset(_POSTFIX)
+    # Check each feature option
+    foreach(OPT IN LISTS ${FEATURE_NAME_LIST})
+      string(FIND "${${FLAG_STRING}}" "${CPU_${OPT}_FLAGS_ON}" OPT_FOUND)
+      if(NOT ${OPT_FOUND} EQUAL -1)
+        string(REPLACE "${COMMON_OPTION}" "" TRAILING_PART "${CPU_${OPT}_FLAGS_ON}")
+        string(APPEND _POSTFIX "${TRAILING_PART}")
+        string(REPLACE " ${CPU_${OPT}_FLAGS_ON}" "" ${FLAG_STRING} ${${FLAG_STRING}})
+      endif()
+    endforeach()
+    # If more than one option found, merge them
+    if(NOT "x${_POSTFIX}" STREQUAL "x")
+      set(${FLAG_STRING} "${${FLAG_STRING}} ${COMMON_OPTION}${_POSTFIX}")
+    endif()
+  endif()
+endmacro()
+
 foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
  set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "")
  if("${CPU_${OPT}_FLAGS_ON}" STREQUAL "disabled")
@ -573,6 +595,13 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
  endif()
 endforeach()

+if(AARCH64)
+    # Define the list of NEON options to check
+    set(NEON_OPTIONS_LIST NEON_DOTPROD NEON_FP16 NEON_BF16)
+    set(BASE_ARCHITECTURE "-march=armv8.2-a")
+    ocv_cpu_aarch64_baseline_merge_feature_options(NEON_OPTIONS_LIST CPU_BASELINE_FLAGS ${BASE_ARCHITECTURE})
+endif()
+
 foreach(OPT ${CPU_BASELINE_REQUIRE})
  if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
    message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})")
--- a/cmake/OpenCVDetectCUDA.cmake
+++ b/cmake/OpenCVDetectCUDA.cmake
@ -1,10 +1,10 @@
 if((WIN32 AND NOT MSVC) OR OPENCV_CMAKE_FORCE_CUDA)
-  message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).")
+  message(STATUS "CUDA: Compilation is disabled (due to only Visual Studio compiler supported on your platform).")
  return()
 endif()

 if((NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA)
-  message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).")
+  message(STATUS "CUDA: Compilation is disabled (due to Clang unsupported on your platform).")
  return()
 endif()

@ -31,443 +31,143 @@ else()
  list(REMOVE_AT CMAKE_MODULE_PATH 0)
 endif()

-if(CUDA_FOUND)
-  unset(CUDA_nvcuvenc_LIBRARY CACHE)
-  set(HAVE_CUDA 1)
-  if(NOT CUDA_VERSION VERSION_LESS 11.0)
-    # CUDA 11.0 removes nppicom
-    ocv_list_filterout(CUDA_nppi_LIBRARY "nppicom")
-    ocv_list_filterout(CUDA_npp_LIBRARY "nppicom")
-  endif()
-
-  if(WITH_CUFFT)
-    set(HAVE_CUFFT 1)
-  endif()
-
-  if(WITH_CUBLAS)
-    set(HAVE_CUBLAS 1)
-  endif()
-
-  if(WITH_CUDNN)
-      set(CMAKE_MODULE_PATH "${OpenCV_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
-      find_host_package(CUDNN "${MIN_VER_CUDNN}")
-      list(REMOVE_AT CMAKE_MODULE_PATH 0)
-
-      if(CUDNN_FOUND)
-        set(HAVE_CUDNN 1)
-      endif()
-  endif()
+if(NOT CUDA_FOUND)
+  unset(CUDA_ARCH_BIN CACHE)
+  unset(CUDA_ARCH_PTX CACHE)
+  return()
+endif()

-  if(WITH_NVCUVID OR WITH_NVCUVENC)
-    macro(ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result)
-      # place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR
-      find_path(_header_result
-        ${_filename}
-        PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}"
-        ENV CUDA_PATH
-        ENV CUDA_INC_PATH
-        PATH_SUFFIXES include
-        NO_DEFAULT_PATH
-        )
-      if("x${_header_result}" STREQUAL "x_header_result-NOTFOUND")
-        set(${_result} 0)
-      else()
-        set(${_result} 1)
-      endif()
-      unset(_header_result CACHE)
-    endmacro()
-    if(WITH_NVCUVID)
-      ocv_cuda_SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
-      ocv_cuda_SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
-      find_cuda_helper_libs(nvcuvid)
-      if(CUDA_nvcuvid_LIBRARY AND (${HAVE_NVCUVID_HEADER} OR ${HAVE_DYNLINK_NVCUVID_HEADER}))
-        # make sure to have both header and library before enabling
-        set(HAVE_NVCUVID 1)
-      endif()
-    endif()
-    if(WITH_NVCUVENC)
-      ocv_cuda_SEARCH_NVCUVID_HEADER("nvEncodeAPI.h" HAVE_NVCUVENC_HEADER)
-      if(WIN32)
-        find_cuda_helper_libs(nvencodeapi)
-      else()
-        find_cuda_helper_libs(nvidia-encode)
-      endif()
-      if((CUDA_nvencodeapi_LIBRARY OR CUDA_nvidia-encode_LIBRARY) AND ${HAVE_NVCUVENC_HEADER})
-        set(HAVE_NVCUVENC 1)
-      endif()
-    endif()
-  endif()
+unset(CUDA_nvcuvenc_LIBRARY CACHE)
+set(HAVE_CUDA 1)
+if(NOT CUDA_VERSION VERSION_LESS 11.0)
+  # CUDA 11.0 removes nppicom
+  ocv_list_filterout(CUDA_nppi_LIBRARY "nppicom")
+  ocv_list_filterout(CUDA_npp_LIBRARY "nppicom")
+endif()

-  message(STATUS "CUDA detected: " ${CUDA_VERSION})
+if(WITH_CUFFT)
+  set(HAVE_CUFFT 1)
+endif()

-  OCV_OPTION(CUDA_ENABLE_DEPRECATED_GENERATION "Enable deprecated generations in the list" OFF)
-  set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper")
-  if(CUDA_ENABLE_DEPRECATED_GENERATION)
-    set(_generations "Fermi" "${_generations}")
-    set(_generations "Kepler" "${_generations}")
-  endif()
-  set(_arch_fermi    "2.0")
-  set(_arch_kepler   "3.0;3.5;3.7")
-  set(_arch_maxwell  "5.0;5.2")
-  set(_arch_pascal   "6.0;6.1")
-  set(_arch_volta    "7.0")
-  set(_arch_turing   "7.5")
-  set(_arch_ampere   "8.0;8.6")
-  set(_arch_lovelace "8.9")
-  set(_arch_hopper   "9.0")
-  if(NOT CMAKE_CROSSCOMPILING)
-    list(APPEND _generations "Auto")
-  endif()
-  set(CUDA_GENERATION "" CACHE STRING "Build CUDA device code only for specific GPU architecture. Leave empty to build for all architectures.")
-  if( CMAKE_VERSION VERSION_GREATER "2.8" )
-    set_property( CACHE CUDA_GENERATION PROPERTY STRINGS "" ${_generations} )
-  endif()
+if(WITH_CUBLAS)
+  set(HAVE_CUBLAS 1)
+endif()

-  if(CUDA_GENERATION)
-    if(NOT ";${_generations};" MATCHES ";${CUDA_GENERATION};")
-      string(REPLACE ";" ", " _generations "${_generations}")
-      message(FATAL_ERROR "ERROR: ${_generations} Generations are supported.")
-    endif()
-    unset(CUDA_ARCH_BIN CACHE)
-    unset(CUDA_ARCH_PTX CACHE)
-  endif()
+if(WITH_CUDNN)
+    set(CMAKE_MODULE_PATH "${OpenCV_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+    find_host_package(CUDNN "${MIN_VER_CUDNN}")
+    list(REMOVE_AT CMAKE_MODULE_PATH 0)

-  if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin")
-    # already specified by user
-  elseif(CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER}")
-    get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH)
-    # C compiler doesn't work with --run option, forcing C++ compiler instead
-    if(CUDA_HOST_COMPILER STREQUAL c_compiler_realpath OR CUDA_HOST_COMPILER STREQUAL CMAKE_C_COMPILER)
-      if(DEFINED CMAKE_CXX_COMPILER)
-        get_filename_component(cxx_compiler_realpath "${CMAKE_CXX_COMPILER}" REALPATH)
-        LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${cxx_compiler_realpath}")
-      else()
-        message(STATUS "CUDA: CMAKE_CXX_COMPILER is not available. You may need to specify CUDA_HOST_COMPILER.")
-      endif()
-    else()
-      LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}")
-    endif()
-  elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path
-    get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
-    LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}")
-  else()
-    if(CUDA_HOST_COMPILER)
-      message(STATUS "CUDA: CUDA_HOST_COMPILER='${CUDA_HOST_COMPILER}' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that")
+    if(CUDNN_FOUND)
+      set(HAVE_CUDNN 1)
    endif()
-  endif()
+endif()

-  macro(ocv_filter_available_architecture result_list)
-    set(__cache_key_check "${ARGN} : ${CUDA_NVCC_EXECUTABLE} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS}")
-    if(DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check)
-      set(${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC}")
-    else()
-      set(CC_LIST ${ARGN})
-      foreach(target_arch ${CC_LIST})
-        string(REPLACE "." "" target_arch_short "${target_arch}")
-        set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}")
-        set(_cmd "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" --compile)
-        execute_process(
-            COMMAND ${_cmd}
-            WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
-            RESULT_VARIABLE _nvcc_res
-            OUTPUT_VARIABLE _nvcc_out
-            ERROR_VARIABLE _nvcc_err
-            #ERROR_QUIET
-            OUTPUT_STRIP_TRAILING_WHITESPACE
-        )
-        if(OPENCV_CMAKE_CUDA_DEBUG)
-          message(WARNING "COMMAND: ${_cmd}")
-          message(STATUS "Result: ${_nvcc_res}")
-          message(STATUS "Out: ${_nvcc_out}")
-          message(STATUS "Err: ${_nvcc_err}")
-        endif()
-        if(_nvcc_res EQUAL 0)
-          LIST(APPEND ${result_list} "${target_arch}")
-        endif()
-      endforeach()
-      string(STRIP "${${result_list}}" ${result_list})
-      if(" ${${result_list}}" STREQUAL " ")
-        message(WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable")
-      endif()
+include(cmake/OpenCVDetectCUDAUtils.cmake)

-      # cache detected values
-      set(OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "")
-      set(OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check}" CACHE INTERNAL "")
-    endif()
-  endmacro()
-
-  macro(ocv_detect_native_cuda_arch status output)
-    set(OPENCV_CUDA_DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
-    set(__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
-    if(DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check)
-      set(${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC}")
-      set(${status} 0)
-    else()
-      execute_process(
-          COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}
-          WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
-          RESULT_VARIABLE ${status}
-          OUTPUT_VARIABLE _nvcc_out
-          ERROR_VARIABLE _nvcc_err
-          ERROR_QUIET
-          OUTPUT_STRIP_TRAILING_WHITESPACE
-      )
-      if(OPENCV_CMAKE_CUDA_DEBUG)
-        message(WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
-        message(STATUS "Result: ${${status}}")
-        message(STATUS "Out: ${_nvcc_out}")
-        message(STATUS "Err: ${_nvcc_err}")
-      endif()
-      string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any
+if(WITH_NVCUVID OR WITH_NVCUVENC)
+  set(cuda_toolkit_dirs "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}")
+  ocv_check_for_nvidia_video_codec_sdk("${cuda_toolkit_dirs}")
+endif()

-      if(${status} EQUAL 0)
-        # cache detected values
-        set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${output}} CACHE INTERNAL "")
-        set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "")
-      endif()
-    endif()
-  endmacro()
-
-  set(__cuda_arch_ptx ${CUDA_ARCH_PTX})
-  if(CUDA_GENERATION STREQUAL "Fermi")
-    set(__cuda_arch_bin ${_arch_fermi})
-  elseif(CUDA_GENERATION STREQUAL "Kepler")
-    set(__cuda_arch_bin ${_arch_kepler})
-  elseif(CUDA_GENERATION STREQUAL "Maxwell")
-    set(__cuda_arch_bin ${_arch_maxwell})
-  elseif(CUDA_GENERATION STREQUAL "Pascal")
-    set(__cuda_arch_bin ${_arch_pascal})
-  elseif(CUDA_GENERATION STREQUAL "Volta")
-    set(__cuda_arch_bin ${_arch_volta})
-  elseif(CUDA_GENERATION STREQUAL "Turing")
-    set(__cuda_arch_bin ${_arch_turing})
-  elseif(CUDA_GENERATION STREQUAL "Ampere")
-    set(__cuda_arch_bin ${_arch_ampere})
-  elseif(CUDA_GENERATION STREQUAL "Lovelace")
-    set(__cuda_arch_bin ${_arch_lovelace})
-  elseif(CUDA_GENERATION STREQUAL "Hopper")
-    set(__cuda_arch_bin ${_arch_hopper})
-  elseif(CUDA_GENERATION STREQUAL "Auto")
-    ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
-    if(NOT _nvcc_res EQUAL 0)
-      message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
-    else()
-      string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}")
-    endif()
-  elseif(CUDA_ARCH_BIN)
-    message(STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN}")
-    set(__cuda_arch_bin ${CUDA_ARCH_BIN})
-  endif()
+message(STATUS "CUDA detected: " ${CUDA_VERSION})

-  if(NOT DEFINED __cuda_arch_bin AND NOT DEFINED __cuda_arch_ptx)
-    if(ARM)
-      set(__cuda_arch_bin "3.2")
-      set(__cuda_arch_ptx "")
-    elseif(AARCH64)
-      if(NOT CMAKE_CROSSCOMPILING)
-        ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
-      else()
-        set(_nvcc_res -1)  # emulate error, see below
-      endif()
-      if(NOT _nvcc_res EQUAL 0)
-        message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
-        # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7)
-        ocv_filter_available_architecture(__cuda_arch_bin
-            5.3
-            6.2
-            7.2
-            7.0
-            8.7
-        )
-      else()
-        set(__cuda_arch_bin "${_nvcc_out}")
-      endif()
-      set(__cuda_arch_ptx "")
-    else()
-      ocv_filter_available_architecture(__cuda_arch_bin
-          ${_arch_fermi}
-          ${_arch_kepler}
-          ${_arch_maxwell}
-          ${_arch_pascal}
-          ${_arch_volta}
-          ${_arch_turing}
-          ${_arch_ampere}
-          ${_arch_lovelace}
-          ${_arch_hopper}
-      )
-      list(GET __cuda_arch_bin -1 __cuda_arch_ptx)
-    endif()
-  endif()
+ocv_set_cuda_detection_nvcc_flags(CUDA_HOST_COMPILER)
+ocv_set_cuda_arch_bin_and_ptx(${CUDA_NVCC_EXECUTABLE})

-  set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
-  set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
+# NVCC flags to be set
+set(NVCC_FLAGS_EXTRA "")

-  string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
-  string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
+# These vars will be passed into the templates
+set(OPENCV_CUDA_ARCH_BIN "")
+set(OPENCV_CUDA_ARCH_PTX "")
+set(OPENCV_CUDA_ARCH_FEATURES "")

-  # Check if user specified 1.0/2.1 compute capability: we don't support it
-  macro(ocv_wipeout_deprecated_cc target_cc)
-    if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " ${target_cc}")
-      message(SEND_ERROR "CUDA: ${target_cc} compute capability is not supported - exclude it from ARCH/PTX list and re-run CMake")
-    endif()
-  endmacro()
-  ocv_wipeout_deprecated_cc("1.0")
-  ocv_wipeout_deprecated_cc("2.1")
-
-  # NVCC flags to be set
-  set(NVCC_FLAGS_EXTRA "")
-
-  # These vars will be passed into the templates
-  set(OPENCV_CUDA_ARCH_BIN "")
-  set(OPENCV_CUDA_ARCH_PTX "")
-  set(OPENCV_CUDA_ARCH_FEATURES "")
-
-  # Tell NVCC to add binaries for the specified GPUs
-  string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}")
-  foreach(ARCH IN LISTS ARCH_LIST)
-    if(ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")
-      # User explicitly specified PTX for the concrete BIN
-      set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
-      set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}")
-      set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}")
-    else()
-      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
-      set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
-      set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}")
-      set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
-    endif()
-  endforeach()
-  set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -D_FORCE_INLINES)
-
-  # Tell NVCC to add PTX intermediate code for the specified architectures
-  string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
-  foreach(ARCH IN LISTS ARCH_LIST)
-    set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
-    set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
+# Tell NVCC to add binaries for the specified GPUs
+string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}")
+foreach(ARCH IN LISTS ARCH_LIST)
+  if(ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")
+    # User explicitly specified PTX for the concrete BIN
+    set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
+    set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}")
+    set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}")
+  else()
+    # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
+    set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
+    set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}")
    set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
-  endforeach()
-
-  # These vars will be processed in other scripts
-  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
-  set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
-
-  if(ANDROID)
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xptxas;-dlcm=ca")
-  endif()
-
-  # Tell NVCC the maximum number of threads to be used to execute the compilation steps in parallel
-  # (option --threads was introduced in version 11.2)
-  if(NOT CUDA_VERSION VERSION_LESS "11.2")
-    if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT $ENV{CMAKE_BUILD_PARALLEL_LEVEL} STREQUAL "")
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "--threads=$ENV{CMAKE_BUILD_PARALLEL_LEVEL}")
-    endif()
-  endif()
-
-  message(STATUS "CUDA NVCC target flags: ${CUDA_NVCC_FLAGS}")
-
-  OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF)
-
-  if(CUDA_FAST_MATH)
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math)
  endif()
+endforeach()
+set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -D_FORCE_INLINES)

-  OCV_OPTION(CUDA_ENABLE_DELAYLOAD "Enable delayed loading of CUDA DLLs" OFF VISIBLE_IF MSVC)
-
-  mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
-
-  macro(ocv_cuda_filter_options)
-    foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
-      set(${var}_backup_in_cuda_compile_ "${${var}}")
-
-      if (CV_CLANG)
-        # we remove -Winconsistent-missing-override and -Qunused-arguments
-        # just in case we are compiling CUDA with gcc but OpenCV with clang
-        string(REPLACE "-Winconsistent-missing-override" "" ${var} "${${var}}")
-        string(REPLACE "-Qunused-arguments" "" ${var} "${${var}}")
-      endif()
-
-      # we remove /EHa as it generates warnings under windows
-      string(REPLACE "/EHa" "" ${var} "${${var}}")
+# Tell NVCC to add PTX intermediate code for the specified architectures
+string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
+foreach(ARCH IN LISTS ARCH_LIST)
+  set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
+  set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
+  set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
+endforeach()

-      # we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
-      string(REPLACE "-ggdb3" "" ${var} "${${var}}")
+# These vars will be processed in other scripts
+set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
+set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")

-      # we remove -Wsign-promo as it generates warnings under linux
-      string(REPLACE "-Wsign-promo" "" ${var} "${${var}}")
+if(ANDROID)
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xptxas;-dlcm=ca")
+endif()

-      # we remove -Wno-sign-promo as it generates warnings under linux
-      string(REPLACE "-Wno-sign-promo" "" ${var} "${${var}}")
+ocv_set_nvcc_threads_for_vs()

-      # we remove -Wno-delete-non-virtual-dtor because it's used for C++ compiler
-      # but NVCC uses C compiler by default
-      string(REPLACE "-Wno-delete-non-virtual-dtor" "" ${var} "${${var}}")
+message(STATUS "CUDA: NVCC target flags ${CUDA_NVCC_FLAGS}")

-      # we remove -frtti because it's used for C++ compiler
-      # but NVCC uses C compiler by default
-      string(REPLACE "-frtti" "" ${var} "${${var}}")
+OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF)

-      string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}")
+if(CUDA_FAST_MATH)
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math)
+endif()

-      # cc1: warning: command line option '-Wsuggest-override' is valid for C++/ObjC++ but not for C
-      string(REPLACE "-Wsuggest-override" "" ${var} "${${var}}")
+OCV_OPTION(CUDA_ENABLE_DELAYLOAD "Enable delayed loading of CUDA DLLs" OFF VISIBLE_IF MSVC)

-      # issue: #11552 (from OpenCVCompilerOptions.cmake)
-      string(REGEX REPLACE "-Wimplicit-fallthrough(=[0-9]+)? " "" ${var} "${${var}}")
+mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)

-      # removal of custom specified options
-      if(OPENCV_CUDA_NVCC_FILTEROUT_OPTIONS)
-        foreach(__flag ${OPENCV_CUDA_NVCC_FILTEROUT_OPTIONS})
-          string(REPLACE "${__flag}" "" ${var} "${${var}}")
-        endforeach()
-      endif()
-    endforeach()
-  endmacro()
-
-  macro(ocv_cuda_compile VAR)
-    ocv_cuda_filter_options()
-
-    if(BUILD_SHARED_LIBS)
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -DCVAPI_EXPORTS)
-    endif()
-
-    if(UNIX OR APPLE)
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
-      if(NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_DEBUG} ${CUDA_NVCC_FLAGS}" MATCHES "-std=")
-        if(CUDA_VERSION VERSION_LESS "11.0")
-          list(APPEND CUDA_NVCC_FLAGS "--std=c++11")
-        else()
-          list(APPEND CUDA_NVCC_FLAGS "--std=c++14")
-        endif()
+macro(ocv_check_windows_crt_linkage)
+  # The new MSVC runtime abstraction is only useable if CUDA is a first class language
+  if(WIN32 AND POLICY CMP0091)
+    cmake_policy(GET CMP0091 MSVC_RUNTIME_SET_BY_ABSTRACTION)
+    if(MSVC_RUNTIME_SET_BY_ABSTRACTION STREQUAL "NEW")
+      if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT)
+        set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MT")
+        set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MTd")
+      else()
+        set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} " /MD")
+        set(CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} " /MDd")
      endif()
    endif()
-    if(APPLE)
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
-    endif()
-
-    if(WIN32 AND NOT (CUDA_VERSION VERSION_LESS "11.2"))
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcudafe --display_error_number --diag-suppress 1394,1388)
-    endif()
+  endif()
+endmacro()

-    if(CMAKE_CROSSCOMPILING AND (ARM OR AARCH64))
-      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xlinker --unresolved-symbols=ignore-in-shared-libs)
-    endif()
+macro(ocv_cuda_compile VAR)
+  ocv_cuda_filter_options()
+  ocv_check_windows_crt_linkage()
+  ocv_nvcc_flags()

-    # disabled because of multiple warnings during building nvcc auto generated files
-    if(CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.6.0")
-      ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-but-set-variable)
+  if(UNIX OR APPLE)
+    if(NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_DEBUG} ${CUDA_NVCC_FLAGS}" MATCHES "-std=")
+      if(CUDA_VERSION VERSION_LESS "11.0")
+        list(APPEND CUDA_NVCC_FLAGS "--std=c++11")
+      else()
+        list(APPEND CUDA_NVCC_FLAGS "--std=c++14")
+      endif()
    endif()
+  endif()

-    CUDA_COMPILE(${VAR} ${ARGN})
+  CUDA_COMPILE(${VAR} ${ARGN})

-    foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
-      set(${var} "${${var}_backup_in_cuda_compile_}")
-      unset(${var}_backup_in_cuda_compile_)
-    endforeach()
-  endmacro()
-else()
-  unset(CUDA_ARCH_BIN CACHE)
-  unset(CUDA_ARCH_PTX CACHE)
-endif()
+  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+    set(${var} "${${var}_backup_in_cuda_compile_}")
+    unset(${var}_backup_in_cuda_compile_)
+  endforeach()
+endmacro()

 if(HAVE_CUDA)
  set(CUDA_LIBS_PATH "")
@ -525,36 +225,13 @@ if(HAVE_CUDA)
  endif()
 endif()

-
-# ----------------------------------------------------------------------------
-# Add CUDA libraries (needed for apps/tools, samples)
-# ----------------------------------------------------------------------------
 if(HAVE_CUDA)
-  # details: https://github.com/NVIDIA/nvidia-docker/issues/775
-  if(" ${CUDA_CUDA_LIBRARY}" MATCHES "/stubs/libcuda.so" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND)
-    set(CUDA_STUB_ENABLED_LINK_WORKAROUND 1)
-    if(EXISTS "${CUDA_CUDA_LIBRARY}" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND_RPATH_LINK)
-      set(CUDA_STUB_TARGET_PATH "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/")
-      execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
-          RESULT_VARIABLE CUDA_STUB_SYMLINK_RESULT)
-      if(NOT CUDA_STUB_SYMLINK_RESULT EQUAL 0)
-        execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
-          RESULT_VARIABLE CUDA_STUB_COPY_RESULT)
-        if(NOT CUDA_STUB_COPY_RESULT EQUAL 0)
-          set(CUDA_STUB_ENABLED_LINK_WORKAROUND 0)
-        endif()
-      endif()
-      if(CUDA_STUB_ENABLED_LINK_WORKAROUND)
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,\"${CUDA_STUB_TARGET_PATH}\"")
-      endif()
-    else()
-      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-shlib-undefined")
-    endif()
-    if(NOT CUDA_STUB_ENABLED_LINK_WORKAROUND)
-      message(WARNING "CUDA: workaround for stubs/libcuda.so.1 is not applied")
-    endif()
-  endif()
+  ocv_apply_cuda_stub_workaround("${CUDA_CUDA_LIBRARY}")
+  ocv_check_cuda_delayed_load("${CUDA_TOOLKIT_ROOT_DIR}")

+  # ----------------------------------------------------------------------------
+  # Add CUDA libraries (needed for apps/tools, samples)
+  # ----------------------------------------------------------------------------
  set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
  if(HAVE_CUBLAS)
    set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cublas_LIBRARY})
@ -572,19 +249,4 @@ if(HAVE_CUDA)
      set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}${p})
    endif()
  endforeach()
-
-  if(MSVC AND CUDA_ENABLE_DELAYLOAD)
-    set(DELAYFLAGS "delayimp.lib")
-    file(GLOB CUDA_DLLS "${CUDA_TOOLKIT_ROOT_DIR}/bin/*.dll")
-    foreach(d ${CUDA_DLLS})
-      cmake_path(GET "d" FILENAME DLL_NAME)
-      if(NOT ${DLL_NAME} MATCHES "cudart")
-        set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:${DLL_NAME}")
-      endif()
-    endforeach()
-    set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:nvcuda.dll /DELAYLOAD:nvml.dll /IGNORE:4199")
-    set(CMAKE_EXE_LINKER_FLAGS       "${CMAKE_EXE_LINKER_FLAGS} ${DELAYFLAGS}")
-    set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DELAYFLAGS}")
-    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DELAYFLAGS}")
-  endif()
 endif()
--- a/cmake/OpenCVDetectCUDALanguage.cmake
+++ b/cmake/OpenCVDetectCUDALanguage.cmake
@ -0,0 +1,154 @@
+#######################
+# Previously in FindCUDA and still required for FindCUDNN
+macro(FIND_CUDA_HELPER_LIBS _name)
+  if(CMAKE_CROSSCOMPILING AND (ARM OR AARCH64))
+    set(_cuda_cross_arm_lib_dir "lib/stubs")
+  endif()
+  find_library(CUDA_${_name}_LIBRARY ${_name}
+    NAMES ${_name}
+    PATHS "${CUDAToolkit_LIBRARY_ROOT}"
+    PATH_SUFFIXES "lib/x64" "lib64" ${_cuda_cross_arm_lib_dir} "lib/Win32" "lib"
+    DOC "\"${_name}\" library"
+    )
+  mark_as_advanced(CUDA_${_name}_LIBRARY)
+endmacro()
+#######################
+include(cmake/OpenCVDetectCUDAUtils.cmake)
+
+if((WIN32 AND NOT MSVC) OR OPENCV_CMAKE_FORCE_CUDA)
+  message(STATUS "CUDA: Compilation is disabled (due to only Visual Studio compiler supported on your platform).")
+  return()
+endif()
+
+if((NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA)
+  message(STATUS "CUDA: Compilation is disabled (due to Clang unsupported on your platform).")
+  return()
+endif()
+
+#set(OPENCV_CMAKE_CUDA_DEBUG 1)
+
+find_package(CUDAToolkit)
+if(CMAKE_CUDA_COMPILER AND CUDAToolkit_FOUND)
+  set(CUDA_FOUND TRUE)
+  set(CUDA_TOOLKIT_INCLUDE ${CUDAToolkit_INCLUDE_DIRS})
+  set(CUDA_VERSION_STRING ${CUDAToolkit_VERSION})
+  set(CUDA_VERSION ${CUDAToolkit_VERSION})
+  if(NOT CUDA_VERSION VERSION_LESS 11.0)
+      set(CMAKE_CUDA_STANDARD 14)
+  else()
+      set(CMAKE_CUDA_STANDARD 11)
+  endif()
+  if(UNIX AND NOT BUILD_SHARED_LIBS)
+      set(CUDA_LIB_EXT "_static")
+  endif()
+endif()
+
+if(NOT CUDA_FOUND)
+  unset(CUDA_ARCH_BIN CACHE)
+  unset(CUDA_ARCH_PTX CACHE)
+  return()
+endif()
+
+set(HAVE_CUDA 1)
+
+if(WITH_CUFFT)
+  set(HAVE_CUFFT 1)
+endif()
+
+if(WITH_CUBLAS)
+  set(HAVE_CUBLAS 1)
+endif()
+
+if(WITH_CUDNN)
+    set(CMAKE_MODULE_PATH "${OpenCV_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+    find_host_package(CUDNN "${MIN_VER_CUDNN}")
+    list(REMOVE_AT CMAKE_MODULE_PATH 0)
+
+    if(CUDNN_FOUND)
+      set(HAVE_CUDNN 1)
+    endif()
+endif()
+
+if(WITH_NVCUVID OR WITH_NVCUVENC)
+  ocv_check_for_nvidia_video_codec_sdk("${CUDAToolkit_LIBRARY_ROOT}")
+endif()
+
+ocv_check_for_cmake_cuda_architectures()
+ocv_set_cuda_detection_nvcc_flags(CMAKE_CUDA_HOST_COMPILER)
+ocv_set_cuda_arch_bin_and_ptx(${CUDAToolkit_NVCC_EXECUTABLE})
+
+# NVCC flags to be set
+set(NVCC_FLAGS_EXTRA "")
+
+# These vars will be passed into the templates
+set(OPENCV_CUDA_ARCH_BIN "")
+set(OPENCV_CUDA_ARCH_PTX "")
+set(OPENCV_CUDA_ARCH_FEATURES "")
+
+# Tell NVCC to add binaries for the specified GPUs
+string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}")
+foreach(ARCH IN LISTS ARCH_LIST)
+  if(ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")
+    # User explicitly specified PTX for the concrete BIN
+    set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${CMAKE_MATCH_2}-virtual;${CMAKE_MATCH_1}-real;)
+    set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}")
+    set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}")
+  else()
+    # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
+    set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${ARCH}-real;)
+    set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}")
+    set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
+  endif()
+endforeach()
+set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -D_FORCE_INLINES)
+
+# Tell NVCC to add PTX intermediate code for the specified architectures
+string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
+foreach(ARCH IN LISTS ARCH_LIST)
+  set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${ARCH}-virtual;)
+  set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
+  set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}")
+endforeach()
+
+ocv_set_nvcc_threads_for_vs()
+
+# These vars will be processed in other scripts
+set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
+set(OpenCV_CUDA_CC "${CMAKE_CUDA_ARCHITECTURES}")
+
+if(ANDROID)
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xptxas;-dlcm=ca")
+endif()
+
+message(STATUS "CUDA: NVCC target flags ${CUDA_NVCC_FLAGS}")
+
+OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF)
+
+if(CUDA_FAST_MATH)
+  set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math)
+endif()
+
+OCV_OPTION(CUDA_ENABLE_DELAYLOAD "Enable delayed loading of CUDA DLLs" OFF VISIBLE_IF MSVC)
+
+mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
+
+macro(ocv_cuda_unfilter_options)
+  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+    set(${var} "${${var}_backup_in_cuda_compile_}")
+    unset(${var}_backup_in_cuda_compile_)
+  endforeach()
+endmacro()
+
+macro(ocv_cuda_compile_flags)
+  ocv_cuda_filter_options()
+  ocv_nvcc_flags()
+  set(CMAKE_CXX_FLAGS_CUDA ${CMAKE_CXX_FLAGS})
+  set(CMAKE_CXX_FLAGS_RELEASE_CUDA ${CMAKE_CXX_FLAGS_RELEASE})
+  set(CMAKE_CXX_FLAGS_DEBUG_CUDA ${CMAKE_CXX_FLAGS_DEBUG})
+  ocv_cuda_unfilter_options()
+endmacro()
+
+if(HAVE_CUDA)
+  ocv_apply_cuda_stub_workaround("${CUDA_cuda_driver_LIBRARY}")
+  ocv_check_cuda_delayed_load("${cuda_toolkit_root_dir}")
+endif()
--- a/cmake/OpenCVDetectCUDAUtils.cmake
+++ b/cmake/OpenCVDetectCUDAUtils.cmake
@ -0,0 +1,442 @@
+macro(ocv_check_for_nvidia_video_codec_sdk cuda_toolkit_dirs)
+  macro(ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result)
+    # place header file under CUDAToolkit_LIBRARY_ROOT
+    find_path(_header_result
+      ${_filename}
+      PATHS ${cuda_toolkit_dirs}
+      PATH_SUFFIXES include
+      NO_DEFAULT_PATH
+      )
+    if("x${_header_result}" STREQUAL "x_header_result-NOTFOUND")
+      set(${_result} 0)
+    else()
+      set(${_result} 1)
+    endif()
+    unset(_header_result CACHE)
+  endmacro()
+  if(WITH_NVCUVID)
+    ocv_cuda_SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
+    # make sure to have both header and library before enabling
+    if(${HAVE_NVCUVID_HEADER})
+      find_cuda_helper_libs(nvcuvid)
+      if(CUDA_nvcuvid_LIBRARY)
+        set(HAVE_NVCUVID 1)
+        message(STATUS "Found NVCUVID: ${CUDA_nvcuvid_LIBRARY}")
+      else()
+        if(WIN32)
+          message(STATUS "NVCUVID: Library not found, WITH_NVCUVID requires Nvidia decoding library nvcuvid.lib to either be inside ${cuda_toolkit_dirs}/lib or its location manually set with CUDA_nvcuvid_LIBRARY, i.e. CUDA_nvcuvid_LIBRARY=${cuda_toolkit_dirs}/lib/nvcuvid.lib")
+        else()
+          message(STATUS "NVCUVID: Library not found, WITH_NVCUVID requires the Nvidia decoding shared library nvcuvid.so from the driver installation or the location of the stub library to be manually set with CUDA_nvcuvid_LIBRARY i.e. CUDA_nvcuvid_LIBRARY=/home/user/Video_Codec_SDK_X.X.X/Lib/linux/stubs/x86_64/nvcuvid.so")
+        endif()
+      endif()
+    else()
+      message(STATUS "NVCUVID: Header not found, WITH_NVCUVID requires Nvidia decoding library header ${cuda_toolkit_dirs}/include/nvcuvid.h")
+    endif()
+  endif()
+
+  if(WITH_NVCUVENC)
+    ocv_cuda_SEARCH_NVCUVID_HEADER("nvEncodeAPI.h" HAVE_NVCUVENC_HEADER)
+    if(${HAVE_NVCUVENC_HEADER})
+      if(WIN32)
+        find_cuda_helper_libs(nvencodeapi)
+      else()
+        find_cuda_helper_libs(nvidia-encode)
+      endif()
+      if(CUDA_nvencodeapi_LIBRARY OR CUDA_nvidia-encode_LIBRARY)
+        set(HAVE_NVCUVENC 1)
+        message(STATUS "Found NVCUVENC: ${CUDA_nvencodeapi_LIBRARY} ${CUDA_nvidia-encode_LIBRARY}")
+      else()
+        if(WIN32)
+          message(STATUS "NVCUVENC: Library not found, WITH_NVCUVENC requires Nvidia encoding library nvencodeapi.lib to either be inside ${cuda_toolkit_dirs}/lib or its location manually set with CUDA_nvencodeapi_LIBRARY, i.e. CUDA_nvencodeapi_LIBRARY=${cuda_toolkit_dirs}/lib/nvencodeapi.lib")
+        else()
+          message(STATUS "NVCUVENC: Library not found, WITH_NVCUVENC requires the Nvidia encoding shared library libnvidia-encode.so from the driver installation or the location of the stub library to be manually set with CUDA_nvidia-encode_LIBRARY i.e. CUDA_nvidia-encode_LIBRARY=/home/user/Video_Codec_SDK_X.X.X/Lib/linux/stubs/x86_64/libnvidia-encode.so")
+        endif()
+      endif()
+    else()
+      message(STATUS "NVCUVENC: Header not found, WITH_NVCUVENC requires Nvidia encoding library header ${cuda_toolkit_dirs}/include/nvEncodeAPI.h")
+    endif()
+  endif()
+endmacro()
+
+# Use CMAKE_CUDA_ARCHITECTURES if provided: order of preference CMAKE_CUDA_ARCHITECTURES > CUDA_GENERATION > CUDA_ARCH_BIN and/or CUDA_ARCH_PTX
+function(ocv_check_for_cmake_cuda_architectures)
+  if(NOT CMAKE_CUDA_ARCHITECTURES)
+    return()
+  endif()
+  if(CMAKE_CUDA_ARCHITECTURES STREQUAL "all" OR CMAKE_CUDA_ARCHITECTURES STREQUAL "all-major" OR CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
+    message(WARNING "CUDA: CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}, special values all, all-major and native are not supported by OpenCV, specify only CUDA real and/or virtual architectures or use combinations of CUDA_ARCH_BIN and CUDA_ARCH_PTX or specify the CUDA_GENERATION where -DCUDA_GENERATION=Auto is equivalent to native!")
+    return()
+  endif()
+  set(internal_ptx "")
+  set(internal_bin "")
+  foreach(ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
+    if(ARCH MATCHES "([0-9]+)\-real")
+      set(internal_bin ${internal_bin} ${CMAKE_MATCH_1};)
+    elseif(ARCH MATCHES "([0-9]+)\-virtual")
+      set(internal_ptx ${internal_ptx} ${CMAKE_MATCH_1};)
+    elseif(ARCH MATCHES "([0-9]+)")
+      set(internal_bin ${internal_bin} ${CMAKE_MATCH_1};)
+      set(internal_ptx ${internal_ptx} ${CMAKE_MATCH_1};)
+    endif()
+  endforeach()
+  if(internal_bin OR internal_ptx)
+    unset(CUDA_ARCH_BIN CACHE)
+    unset(CUDA_ARCH_PTX CACHE)
+  endif()
+  if(internal_ptx)
+    set(CUDA_ARCH_PTX ${internal_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for (see https://docs.opencv.org/5.x/d2/dbc/cuda_intro.html)")
+  endif()
+  if(internal_bin)
+    set(CUDA_ARCH_BIN ${internal_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported (see https://docs.opencv.org/5.x/d2/dbc/cuda_intro.html)")
+  endif()
+  set(CMAKE_CUDA_ARCHITECTURES "" PARENT)
+  unset(CUDA_GENERATION CACHE)
+endfunction()
+
+macro(ocv_initialize_nvidia_device_generations)
+  OCV_OPTION(CUDA_ENABLE_DEPRECATED_GENERATION "Enable deprecated generations in the list" OFF)
+  set(_generations "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Lovelace" "Hopper")
+  if(CUDA_ENABLE_DEPRECATED_GENERATION)
+    set(_generations "Fermi" "${_generations}")
+    set(_generations "Kepler" "${_generations}")
+  endif()
+  set(_arch_fermi    "2.0")
+  set(_arch_kepler   "3.0;3.5;3.7")
+  set(_arch_maxwell  "5.0;5.2")
+  set(_arch_pascal   "6.0;6.1")
+  set(_arch_volta    "7.0")
+  set(_arch_turing   "7.5")
+  set(_arch_ampere   "8.0;8.6")
+  set(_arch_lovelace "8.9")
+  set(_arch_hopper   "9.0")
+  if(NOT CMAKE_CROSSCOMPILING)
+    list(APPEND _generations "Auto")
+  endif()
+  set(CUDA_GENERATION "" CACHE STRING "Build CUDA device code only for specific GPU architecture. Leave empty to build for all architectures (see https://docs.opencv.org/5.x/d2/dbc/cuda_intro.html).")
+  if( CMAKE_VERSION VERSION_GREATER "2.8" )
+    set_property( CACHE CUDA_GENERATION PROPERTY STRINGS "" ${_generations} )
+  endif()
+
+  if(CUDA_GENERATION)
+    if(NOT ";${_generations};" MATCHES ";${CUDA_GENERATION};")
+      string(REPLACE ";" ", " _generations "${_generations}")
+      message(FATAL_ERROR "ERROR: ${_generations} Generations are supported.")
+    endif()
+    unset(CUDA_ARCH_BIN CACHE)
+    unset(CUDA_ARCH_PTX CACHE)
+  endif()
+endmacro()
+
+macro(ocv_set_cuda_detection_nvcc_flags cuda_host_compiler_var)
+  if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin")
+  # already specified by user
+  elseif(${cuda_host_compiler_var} AND EXISTS "${${cuda_host_compiler_var}}")
+    get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH)
+    # C compiler doesn't work with --run option, forcing C++ compiler instead
+    if(${cuda_host_compiler_var} STREQUAL c_compiler_realpath OR ${cuda_host_compiler_var} STREQUAL CMAKE_C_COMPILER)
+      if(DEFINED CMAKE_CXX_COMPILER)
+        get_filename_component(cxx_compiler_realpath "${CMAKE_CXX_COMPILER}" REALPATH)
+        LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${cxx_compiler_realpath}")
+      else()
+        message(STATUS "CUDA: CMAKE_CXX_COMPILER is not available. You may need to specify ${cuda_host_compiler_var}.")
+      endif()
+    else()
+      LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${${cuda_host_compiler_var}}")
+    endif()
+  elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path
+    get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
+    LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}")
+  else()
+    if(${cuda_host_compiler_var})
+      message(STATUS "CUDA: ${cuda_host_compiler_var}='${cuda_host_compiler}' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that")
+    endif()
+  endif()
+endmacro()
+
+macro(ocv_filter_available_architecture nvcc_executable result_list)
+  set(__cache_key_check "${ARGN} : ${nvcc_executable} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS}")
+  if(DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check)
+    set(${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC}")
+  else()
+    set(CC_LIST ${ARGN})
+    foreach(target_arch ${CC_LIST})
+      string(REPLACE "." "" target_arch_short "${target_arch}")
+      set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}")
+      set(_cmd "${nvcc_executable}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" --compile)
+      execute_process(
+          COMMAND ${_cmd}
+          WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
+          RESULT_VARIABLE _nvcc_res
+          OUTPUT_VARIABLE _nvcc_out
+          ERROR_VARIABLE _nvcc_err
+          #ERROR_QUIET
+          OUTPUT_STRIP_TRAILING_WHITESPACE
+      )
+      if(OPENCV_CMAKE_CUDA_DEBUG)
+        message(WARNING "COMMAND: ${_cmd}")
+        message(STATUS "Result: ${_nvcc_res}")
+        message(STATUS "Out: ${_nvcc_out}")
+        message(STATUS "Err: ${_nvcc_err}")
+      endif()
+      if(_nvcc_res EQUAL 0)
+        LIST(APPEND ${result_list} "${target_arch}")
+      endif()
+    endforeach()
+    string(STRIP "${${result_list}}" ${result_list})
+    if(" ${${result_list}}" STREQUAL " ")
+      message(WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable")
+    endif()
+
+    # cache detected values
+    set(OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "")
+    set(OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check}" CACHE INTERNAL "")
+  endif()
+endmacro()
+
+macro(ocv_detect_native_cuda_arch nvcc_executable status output)
+  set(OPENCV_CUDA_DETECT_ARCHS_COMMAND "${nvcc_executable}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
+  set(__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
+  if(DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check)
+    set(${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC}")
+    set(${status} 0)
+  else()
+    execute_process(
+        COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}
+        WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
+        RESULT_VARIABLE ${status}
+        OUTPUT_VARIABLE _nvcc_out
+        ERROR_VARIABLE _nvcc_err
+        ERROR_QUIET
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+    )
+    if(OPENCV_CMAKE_CUDA_DEBUG)
+      message(WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
+      message(STATUS "Result: ${${status}}")
+      message(STATUS "Out: ${_nvcc_out}")
+      message(STATUS "Err: ${_nvcc_err}")
+    endif()
+    string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any
+
+    if(${status} EQUAL 0)
+      # cache detected values
+      set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${output}} CACHE INTERNAL "")
+      set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "")
+    endif()
+  endif()
+endmacro()
+
+macro(ocv_set_cuda_arch_bin_and_ptx nvcc_executable)
+  ocv_initialize_nvidia_device_generations()
+  set(__cuda_arch_ptx ${CUDA_ARCH_PTX})
+  if(CUDA_GENERATION STREQUAL "Fermi")
+    set(__cuda_arch_bin ${_arch_fermi})
+  elseif(CUDA_GENERATION STREQUAL "Kepler")
+    set(__cuda_arch_bin ${_arch_kepler})
+  elseif(CUDA_GENERATION STREQUAL "Maxwell")
+    set(__cuda_arch_bin ${_arch_maxwell})
+  elseif(CUDA_GENERATION STREQUAL "Pascal")
+    set(__cuda_arch_bin ${_arch_pascal})
+  elseif(CUDA_GENERATION STREQUAL "Volta")
+    set(__cuda_arch_bin ${_arch_volta})
+  elseif(CUDA_GENERATION STREQUAL "Turing")
+    set(__cuda_arch_bin ${_arch_turing})
+  elseif(CUDA_GENERATION STREQUAL "Ampere")
+    set(__cuda_arch_bin ${_arch_ampere})
+  elseif(CUDA_GENERATION STREQUAL "Lovelace")
+    set(__cuda_arch_bin ${_arch_lovelace})
+  elseif(CUDA_GENERATION STREQUAL "Hopper")
+    set(__cuda_arch_bin ${_arch_hopper})
+  elseif(CUDA_GENERATION STREQUAL "Auto")
+    ocv_detect_native_cuda_arch(${nvcc_executable} _nvcc_res _nvcc_out)
+    if(NOT _nvcc_res EQUAL 0)
+      message(STATUS "CUDA: Automatic detection of CUDA generation failed. Going to build for all known architectures")
+    else()
+      string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}")
+    endif()
+  elseif(CUDA_ARCH_BIN)
+    message(STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN}")
+    set(__cuda_arch_bin ${CUDA_ARCH_BIN})
+  endif()
+
+  if(NOT DEFINED __cuda_arch_bin AND NOT DEFINED __cuda_arch_ptx)
+    if(ARM)
+      set(__cuda_arch_bin "3.2")
+      set(__cuda_arch_ptx "")
+    elseif(AARCH64)
+      if(NOT CMAKE_CROSSCOMPILING)
+        ocv_detect_native_cuda_arch(${nvcc_executable} _nvcc_res _nvcc_out)
+      else()
+        set(_nvcc_res -1)  # emulate error, see below
+      endif()
+      if(NOT _nvcc_res EQUAL 0)
+        message(STATUS "CUDA: Automatic detection of CUDA generation failed. Going to build for all known architectures")
+        # TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0) Orin (8.7)
+        ocv_filter_available_architecture(${nvcc_executable} __cuda_arch_bin
+            5.3
+            6.2
+            7.2
+            7.0
+            8.7
+        )
+      else()
+        set(__cuda_arch_bin "${_nvcc_out}")
+      endif()
+      set(__cuda_arch_ptx "")
+    else()
+      ocv_filter_available_architecture(${nvcc_executable} __cuda_arch_bin
+          ${_arch_fermi}
+          ${_arch_kepler}
+          ${_arch_maxwell}
+          ${_arch_pascal}
+          ${_arch_volta}
+          ${_arch_turing}
+          ${_arch_ampere}
+          ${_arch_lovelace}
+          ${_arch_hopper}
+      )
+      list(GET __cuda_arch_bin -1 __cuda_arch_ptx)
+    endif()
+  endif()
+
+  set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported (see https://docs.opencv.org/5.x/d2/dbc/cuda_intro.html)")
+  set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for (see https://docs.opencv.org/5.x/d2/dbc/cuda_intro.html)")
+  string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
+  string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
+
+  # Check if user specified 1.0/2.1 compute capability: we don't support it
+  macro(ocv_wipeout_deprecated_cc target_cc)
+    if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " ${target_cc}")
+      message(SEND_ERROR "CUDA: ${target_cc} compute capability is not supported - exclude it from ARCH/PTX list and re-run CMake")
+    endif()
+  endmacro()
+  ocv_wipeout_deprecated_cc("1.0")
+  ocv_wipeout_deprecated_cc("2.1")
+endmacro()
+
+macro(ocv_set_nvcc_threads_for_vs)
+  # Tell NVCC the maximum number of threads to be used to execute the compilation steps in parallel
+  # (option --threads was introduced in version 11.2)
+  if(NOT CUDA_VERSION VERSION_LESS "11.2")
+    if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT $ENV{CMAKE_BUILD_PARALLEL_LEVEL} STREQUAL "")
+      set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "--threads=$ENV{CMAKE_BUILD_PARALLEL_LEVEL}")
+    endif()
+  endif()
+endmacro()
+
+macro(ocv_cuda_filter_options)
+  foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
+    set(${var}_backup_in_cuda_compile_ "${${var}}")
+
+    if (CV_CLANG)
+      # we remove -Winconsistent-missing-override and -Qunused-arguments
+      # just in case we are compiling CUDA with gcc but OpenCV with clang
+      string(REPLACE "-Winconsistent-missing-override" "" ${var} "${${var}}")
+      string(REPLACE "-Qunused-arguments" "" ${var} "${${var}}")
+    endif()
+
+    # we remove /EHa as it generates warnings under windows
+    string(REPLACE "/EHa" "" ${var} "${${var}}")
+
+    # we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1)
+    string(REPLACE "-ggdb3" "" ${var} "${${var}}")
+
+    # we remove -Wsign-promo as it generates warnings under linux
+    string(REPLACE "-Wsign-promo" "" ${var} "${${var}}")
+
+    # we remove -Wno-sign-promo as it generates warnings under linux
+    string(REPLACE "-Wno-sign-promo" "" ${var} "${${var}}")
+
+    # we remove -Wno-delete-non-virtual-dtor because it's used for C++ compiler
+    # but NVCC uses C compiler by default
+    string(REPLACE "-Wno-delete-non-virtual-dtor" "" ${var} "${${var}}")
+
+    # we remove -frtti because it's used for C++ compiler
+    # but NVCC uses C compiler by default
+    string(REPLACE "-frtti" "" ${var} "${${var}}")
+
+    string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}")
+
+    # cc1: warning: command line option '-Wsuggest-override' is valid for C++/ObjC++ but not for C
+    string(REPLACE "-Wsuggest-override" "" ${var} "${${var}}")
+
+    # issue: #11552 (from OpenCVCompilerOptions.cmake)
+    string(REGEX REPLACE "-Wimplicit-fallthrough(=[0-9]+)? " "" ${var} "${${var}}")
+
+    # removal of custom specified options
+    if(OPENCV_CUDA_NVCC_FILTEROUT_OPTIONS)
+      foreach(__flag ${OPENCV_CUDA_NVCC_FILTEROUT_OPTIONS})
+        string(REPLACE "${__flag}" "" ${var} "${${var}}")
+      endforeach()
+    endif()
+  endforeach()
+endmacro()
+
+macro(ocv_nvcc_flags)
+  if(BUILD_SHARED_LIBS)
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler=-DCVAPI_EXPORTS)
+  endif()
+
+  if(UNIX OR APPLE)
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler=-fPIC)
+  endif()
+  if(APPLE)
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler=-fno-finite-math-only)
+  endif()
+
+  if(WIN32 AND NOT (CUDA_VERSION VERSION_LESS "11.2"))
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcudafe --display_error_number --diag-suppress 1394,1388)
+  endif()
+
+  if(CMAKE_CROSSCOMPILING AND (ARM OR AARCH64))
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xlinker --unresolved-symbols=ignore-in-shared-libs)
+  endif()
+
+  # disabled because of multiple warnings during building nvcc auto generated files
+  if(CV_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.6.0")
+    ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-but-set-variable)
+  endif()
+endmacro()
+
+macro(ocv_apply_cuda_stub_workaround cuda_driver_library_path)
+  # details: https://github.com/NVIDIA/nvidia-docker/issues/775
+  if(" ${cuda_driver_library_path}" MATCHES "/stubs/libcuda.so" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND)
+    set(CUDA_STUB_ENABLED_LINK_WORKAROUND 1)
+    if(EXISTS "${cuda_driver_library_path}" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND_RPATH_LINK)
+      set(CUDA_STUB_TARGET_PATH "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/")
+      execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${cuda_driver_library_path}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
+          RESULT_VARIABLE CUDA_STUB_SYMLINK_RESULT)
+      if(NOT CUDA_STUB_SYMLINK_RESULT EQUAL 0)
+        execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${cuda_driver_library_path}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
+          RESULT_VARIABLE CUDA_STUB_COPY_RESULT)
+        if(NOT CUDA_STUB_COPY_RESULT EQUAL 0)
+          set(CUDA_STUB_ENABLED_LINK_WORKAROUND 0)
+        endif()
+      endif()
+      if(CUDA_STUB_ENABLED_LINK_WORKAROUND)
+        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,\"${CUDA_STUB_TARGET_PATH}\"")
+      endif()
+    else()
+      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-shlib-undefined")
+    endif()
+    if(NOT CUDA_STUB_ENABLED_LINK_WORKAROUND)
+      message(WARNING "CUDA: Workaround for stubs/libcuda.so.1 is not applied")
+    endif()
+  endif()
+endmacro()
+
+macro(ocv_check_cuda_delayed_load cuda_toolkit_root_dir)
+  if(MSVC AND CUDA_ENABLE_DELAYLOAD)
+    set(DELAYFLAGS "delayimp.lib")
+    file(GLOB CUDA_DLLS "${cuda_toolkit_root_dir}/bin/*.dll")
+    foreach(d ${CUDA_DLLS})
+      cmake_path(GET "d" FILENAME DLL_NAME)
+      if(NOT ${DLL_NAME} MATCHES "cudart")
+        set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:${DLL_NAME}")
+      endif()
+    endforeach()
+    set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:nvcuda.dll /DELAYLOAD:nvml.dll /IGNORE:4199")
+    set(CMAKE_EXE_LINKER_FLAGS       "${CMAKE_EXE_LINKER_FLAGS} ${DELAYFLAGS}")
+    set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DELAYFLAGS}")
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DELAYFLAGS}")
+  endif()
+endmacro()
--- a/cmake/OpenCVDetectCXXCompiler.cmake
+++ b/cmake/OpenCVDetectCXXCompiler.cmake
@ -28,22 +28,8 @@ if(NOT DEFINED CV_GCC AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
 endif()
 if(NOT DEFINED CV_CLANG AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")  # Clang or AppleClang (see CMP0025)
  set(CV_CLANG 1)
-  set(CMAKE_COMPILER_IS_CLANGCXX 1)  # TODO next release: remove this
-  set(CMAKE_COMPILER_IS_CLANGCC 1)   # TODO next release: remove this
 endif()

-function(access_CMAKE_COMPILER_IS_CLANGCXX)
-  if(NOT OPENCV_SUPPRESS_DEPRECATIONS)
-    message(WARNING "DEPRECATED: CMAKE_COMPILER_IS_CLANGCXX support is deprecated in OpenCV.
-    Consider using:
-    - CV_GCC    # GCC
-    - CV_CLANG  # Clang or AppleClang (see CMP0025)
-")
-  endif()
-endfunction()
-variable_watch(CMAKE_COMPILER_IS_CLANGCXX access_CMAKE_COMPILER_IS_CLANGCXX)
-variable_watch(CMAKE_COMPILER_IS_CLANGCC access_CMAKE_COMPILER_IS_CLANGCXX)
-

 # ----------------------------------------------------------------------------
 # Detect Intel ICC compiler
--- a/cmake/OpenCVDetectDirectML.cmake
+++ b/cmake/OpenCVDetectDirectML.cmake
@ -0,0 +1,13 @@
+if(WIN32)
+  try_compile(__VALID_DIRECTML
+    "${OpenCV_BINARY_DIR}"
+    "${OpenCV_SOURCE_DIR}/cmake/checks/directml.cpp"
+    LINK_LIBRARIES d3d12 dxcore directml
+    OUTPUT_VARIABLE TRY_OUT
+  )
+  if(NOT __VALID_DIRECTML)
+    message(STATUS "No support for DirectML (d3d12, dxcore, directml libs are required)")
+    return()
+  endif()
+  set(HAVE_DIRECTML ON)
+endif()
--- a/cmake/OpenCVDetectPython.cmake
+++ b/cmake/OpenCVDetectPython.cmake
@ -171,7 +171,7 @@ if(NOT ${found})
      endif()
    endif()

-    if(NOT ANDROID AND NOT IOS)
+    if(NOT ANDROID AND NOT IOS AND NOT XROS)
      if(CMAKE_HOST_UNIX)
        execute_process(COMMAND ${_executable} -c "from sysconfig import *; print(get_path('purelib'))"
                        RESULT_VARIABLE _cvpy_process
@ -233,7 +233,7 @@ if(NOT ${found})
                          OUTPUT_STRIP_TRAILING_WHITESPACE)
        endif()
      endif()
-    endif(NOT ANDROID AND NOT IOS)
+    endif(NOT ANDROID AND NOT IOS AND NOT XROS)
  endif()

  # Export return values
--- a/cmake/OpenCVFindCANN.cmake
+++ b/cmake/OpenCVFindCANN.cmake
@ -46,6 +46,17 @@ if(CANN_INSTALL_DIR)
        set(HAVE_CANN OFF)
        return()
    endif()
+    #  * libacl_op_compiler.so
+    set(lib_acl_op_compiler "${CANN_INSTALL_DIR}/lib64")
+    find_library(found_lib_acl_op_compiler NAMES acl_op_compiler PATHS ${lib_acl_op_compiler} NO_DEFAULT_PATH)
+    if(found_lib_acl_op_compiler)
+        set(lib_acl_op_compiler ${found_lib_acl_op_compiler})
+        message(STATUS "CANN: libacl_op_compiler.so is found at ${lib_acl_op_compiler}")
+    else()
+        message(STATUS "CANN: Missing libacl_op_compiler.so. Turning off HAVE_CANN")
+        set(HAVE_CANN OFF)
+        return()
+    endif()
    #  * libgraph.so
    set(lib_graph "${CANN_INSTALL_DIR}/compiler/lib64")
    find_library(found_lib_graph NAMES graph PATHS ${lib_graph} NO_DEFAULT_PATH)
@ -90,6 +101,7 @@ if(CANN_INSTALL_DIR)

    set(libs_cann "")
    list(APPEND libs_cann ${lib_ascendcl})
+    list(APPEND libs_cann ${lib_acl_op_compiler})
    list(APPEND libs_cann ${lib_opsproto})
    list(APPEND libs_cann ${lib_graph})
    list(APPEND libs_cann ${lib_ge_compiler})
--- a/cmake/OpenCVFindLibsGUI.cmake
+++ b/cmake/OpenCVFindLibsGUI.cmake
@ -77,7 +77,7 @@ endif(WITH_OPENGL)

 # --- Cocoa ---
 if(APPLE)
-  if(NOT IOS AND CV_CLANG)
+  if(NOT IOS AND NOT XROS AND CV_CLANG)
    set(HAVE_COCOA YES)
  endif()
 endif()
--- a/cmake/OpenCVFindLibsGrfmt.cmake
+++ b/cmake/OpenCVFindLibsGrfmt.cmake
@ -34,7 +34,7 @@ if(NOT ZLIB_FOUND)
  set(ZLIB_INCLUDE_DIRS ${ZLIB_INCLUDE_DIR})
  set(ZLIB_LIBRARIES ${ZLIB_LIBRARY})

-  ocv_parse_header2(ZLIB "${${ZLIB_LIBRARY}_SOURCE_DIR}/zlib.h" ZLIB_VERSION)
+  ocv_parse_header_version(ZLIB "${${ZLIB_LIBRARY}_SOURCE_DIR}/zlib.h" ZLIB_VERSION)
 endif()

 # --- libavif (optional) ---
@ -202,6 +202,7 @@ if(WITH_OPENJPEG)
    endif()
  else()
    set(HAVE_OPENJPEG YES)
+    set(OPENJPEG_VERSION "${OPENJPEG_MAJOR_VERSION}.${OPENJPEG_MINOR_VERSION}.${OPENJPEG_BUILD_VERSION}")
    message(STATUS "Found system OpenJPEG: ${OPENJPEG_LIBRARIES} "
            "(found version \"${OPENJPEG_VERSION}\")")
  endif()
@ -232,16 +233,38 @@ if(WITH_JASPER AND NOT HAVE_OPENJPEG)
 endif()

 if(WITH_SPNG)
-  set(SPNG_LIBRARY libspng CACHE INTERNAL "")
-  set(SPNG_LIBRARIES ${SPNG_LIBRARY})
-  add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libspng")
-  set(SPNG_INCLUDE_DIR "${${SPNG_LIBRARY}_SOURCE_DIR}" CACHE INTERNAL "")
-  set(SPNG_DEFINITIONS "")
-  ocv_parse_header("${SPNG_INCLUDE_DIR}/spng.h" SPNG_VERSION_LINES SPNG_VERSION_MAJOR SPNG_VERSION_MINOR SPNG_VERSION_PATCH)
-
-  set(HAVE_SPNG YES)
-  set(SPNG_VERSION "${SPNG_VERSION_MAJOR}.${SPNG_VERSION_MINOR}.${SPNG_VERSION_PATCH}")
-  message(STATUS "imgcodecs: PNG codec will use SPNG, version: ${SPNG_VERSION} ")
+  if(BUILD_SPNG)
+    ocv_clear_vars(PNG_FOUND)
+  else()
+    # CMakeConfig bug in SPNG, include is missing there in version 0.7.4 and older
+    # See https://github.com/randy408/libspng/pull/264
+    include(CMakeFindDependencyMacro)
+    find_package(SPNG QUIET)
+    if(SPNG_FOUND)
+      set(SPNG_LIBRARY "spng::spng" CACHE INTERNAL "")
+      set(SPNG_LIBRARIES ${SPNG_LIBRARY})
+    else()
+      if(PkgConfig_FOUND)
+        pkg_check_modules(SPNG QUIET spng)
+      endif()
+    endif()
+    if(SPNG_FOUND)
+      set(HAVE_SPNG YES)
+      message(STATUS "imgcodecs: PNG codec will use SPNG, version: ${SPNG_VERSION}")
+    endif()
+  endif()
+  if(NOT SPNG_FOUND)
+    set(SPNG_LIBRARY libspng CACHE INTERNAL "")
+    set(SPNG_LIBRARIES ${SPNG_LIBRARY})
+    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libspng")
+    set(SPNG_INCLUDE_DIR "${${SPNG_LIBRARY}_SOURCE_DIR}" CACHE INTERNAL "")
+    set(SPNG_DEFINITIONS "")
+    ocv_parse_header("${SPNG_INCLUDE_DIR}/spng.h" SPNG_VERSION_LINES SPNG_VERSION_MAJOR SPNG_VERSION_MINOR SPNG_VERSION_PATCH)
+
+    set(HAVE_SPNG YES)
+    set(SPNG_VERSION "${SPNG_VERSION_MAJOR}.${SPNG_VERSION_MINOR}.${SPNG_VERSION_PATCH}")
+    message(STATUS "imgcodecs: PNG codec will use SPNG, version: ${SPNG_VERSION} ")
+  endif()
 endif()

 # --- libpng (optional, should be searched after zlib) ---
@ -250,31 +273,21 @@ if(NOT HAVE_SPNG AND WITH_PNG)
    ocv_clear_vars(PNG_FOUND)
  else()
    ocv_clear_internal_cache_vars(PNG_LIBRARY PNG_INCLUDE_DIR)
-    include(FindPNG)
-    if(PNG_FOUND)
-      include(CheckIncludeFile)
-      check_include_file("${PNG_PNG_INCLUDE_DIR}/libpng/png.h" HAVE_LIBPNG_PNG_H)
-      if(HAVE_LIBPNG_PNG_H)
-        ocv_parse_header("${PNG_PNG_INCLUDE_DIR}/libpng/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
-      else()
-        ocv_parse_header("${PNG_PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
-      endif()
-    endif()
+    find_package(PNG QUIET)
  endif()

  if(NOT PNG_FOUND)
-    ocv_clear_vars(PNG_LIBRARY PNG_LIBRARIES PNG_INCLUDE_DIR PNG_PNG_INCLUDE_DIR HAVE_LIBPNG_PNG_H PNG_DEFINITIONS)
+    ocv_clear_vars(PNG_LIBRARY PNG_LIBRARIES PNG_INCLUDE_DIR PNG_DEFINITIONS)

    set(PNG_LIBRARY libpng CACHE INTERNAL "")
    set(PNG_LIBRARIES ${PNG_LIBRARY})
    add_subdirectory("${OpenCV_SOURCE_DIR}/3rdparty/libpng")
    set(PNG_INCLUDE_DIR "${${PNG_LIBRARY}_SOURCE_DIR}" CACHE INTERNAL "")
    set(PNG_DEFINITIONS "")
-    ocv_parse_header("${PNG_INCLUDE_DIR}/png.h" PNG_VERSION_LINES PNG_LIBPNG_VER_MAJOR PNG_LIBPNG_VER_MINOR PNG_LIBPNG_VER_RELEASE)
+    ocv_parse_header_version(PNG "${PNG_INCLUDE_DIR}/png.h" PNG_LIBPNG_VER_STRING)
  endif()

  set(HAVE_PNG YES)
-  set(PNG_VERSION "${PNG_LIBPNG_VER_MAJOR}.${PNG_LIBPNG_VER_MINOR}.${PNG_LIBPNG_VER_RELEASE}")
 endif()


--- a/cmake/OpenCVFindLibsPerf.cmake
+++ b/cmake/OpenCVFindLibsPerf.cmake
@ -40,7 +40,11 @@ endif()

 # --- CUDA ---
 if(WITH_CUDA)
-  include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDA.cmake")
+  if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
+    include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDALanguage.cmake")
+  else()
+    include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCUDA.cmake")
+  endif()
  if(NOT HAVE_CUDA)
    message(WARNING "OpenCV is not able to find/configure CUDA SDK (required by WITH_CUDA).
 CUDA support will be disabled in OpenCV build.
--- a/cmake/OpenCVGenAndroidMK.cmake
+++ b/cmake/OpenCVGenAndroidMK.cmake
@ -48,7 +48,7 @@ if(ANDROID)
  string(REPLACE "opencv_" "" OPENCV_MODULES_CONFIGMAKE "${OPENCV_MODULES_CONFIGMAKE}")

  if(BUILD_FAT_JAVA_LIB)
-    set(OPENCV_LIBS_CONFIGMAKE java4)
+    set(OPENCV_LIBS_CONFIGMAKE java5)
  else()
    set(OPENCV_LIBS_CONFIGMAKE "${OPENCV_MODULES_CONFIGMAKE}")
  endif()
--- a/cmake/OpenCVGenConfig.cmake
+++ b/cmake/OpenCVGenConfig.cmake
@ -12,7 +12,11 @@ else()
 endif()

 if(HAVE_CUDA)
-  ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-CUDA.cmake.in" CUDA_CONFIGCMAKE @ONLY)
+  if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
+    ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-CUDALanguage.cmake.in" CUDA_CONFIGCMAKE @ONLY)
+  else()
+    ocv_cmake_configure("${CMAKE_CURRENT_LIST_DIR}/templates/OpenCVConfig-CUDA.cmake.in" CUDA_CONFIGCMAKE @ONLY)
+  endif()
 endif()

 if(ANDROID)
@ -120,7 +124,6 @@ endif()

 if(ANDROID)
  ocv_gen_config("${CMAKE_BINARY_DIR}/unix-install" "abi-${ANDROID_NDK_ABI_NAME}" "OpenCVConfig.root-ANDROID.cmake.in")
-  install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION "${OPENCV_CONFIG_INSTALL_PATH}" COMPONENT dev)
 endif()

 # --------------------------------------------------------------------------------------------
--- a/cmake/OpenCVGenInfoPlist.cmake
+++ b/cmake/OpenCVGenInfoPlist.cmake
@ -13,6 +13,14 @@ if(IOS)
    configure_file("${OpenCV_SOURCE_DIR}/platforms/ios/Info.plist.in"
                   "${CMAKE_BINARY_DIR}/ios/Info.plist")
  endif()
+elseif(XROS)
+  if(APPLE_FRAMEWORK AND DYNAMIC_PLIST)
+    configure_file("${OpenCV_SOURCE_DIR}/platforms/ios/Info.Dynamic.plist.in"
+                   "${CMAKE_BINARY_DIR}/visionos/Info.plist")
+  else()
+    configure_file("${OpenCV_SOURCE_DIR}/platforms/ios/Info.plist.in"
+                   "${CMAKE_BINARY_DIR}/visionos/Info.plist")
+  endif()
 elseif(APPLE)
  configure_file("${OpenCV_SOURCE_DIR}/platforms/osx/Info.plist.in"
                 "${CMAKE_BINARY_DIR}/osx/Info.plist")
--- a/cmake/OpenCVGenPkgconfig.cmake
+++ b/cmake/OpenCVGenPkgconfig.cmake
@ -1,4 +1,4 @@
-if(MSVC OR IOS)
+if(MSVC OR IOS OR XROS)
  return()
 endif()

--- a/cmake/OpenCVModule.cmake
+++ b/cmake/OpenCVModule.cmake
@ -1002,7 +1002,7 @@ macro(_ocv_create_module)
                                          INTERFACE ${OPENCV_MODULE_${the_module}_DEPS_EXT}
  )
  ocv_target_link_libraries(${the_module} PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_HAL_LINKER_LIBS} ${IPP_LIBS} ${ARGN})
-  if (HAVE_CUDA)
+  if (NOT ENABLE_CUDA_FIRST_CLASS_LANGUAGE AND HAVE_CUDA)
    ocv_target_link_libraries(${the_module} PRIVATE ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
  endif()

--- a/cmake/OpenCVUtils.cmake
+++ b/cmake/OpenCVUtils.cmake
@ -1430,6 +1430,18 @@ macro(ocv_parse_header2 LIBNAME HDR_PATH VARNAME)
  endif()
 endmacro()

+# set ${LIBNAME}_VERSION_STRING to ${LIBVER} without quotes
+macro(ocv_parse_header_version LIBNAME HDR_PATH LIBVER)
+  ocv_clear_vars(${LIBNAME}_VERSION_STRING)
+  set(${LIBNAME}_H "")
+  if(EXISTS "${HDR_PATH}")
+    file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${LIBVER}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
+  endif()
+  if(${LIBNAME}_H)
+    string(REGEX REPLACE "^.*[ \t]${LIBVER}[ \t]+\"([0-9\.]+)\"$" "\\1" ${LIBNAME}_VERSION_STRING "${${LIBNAME}_H}")
+  endif()
+endmacro()
+
 ################################################################################################
 # short command to setup source group
 function(ocv_source_group group)
@ -1545,13 +1557,23 @@ function(_ocv_append_target_includes target)
  endif()
 endfunction()

+macro(ocv_add_cuda_compile_flags)
+  ocv_cuda_compile_flags()
+  target_compile_options(${target} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: ${CUDA_NVCC_FLAGS}
+  "-Xcompiler=${CMAKE_CXX_FLAGS_CUDA} $<$<CONFIG:Debug>:${CMAKE_CXX_FLAGS_DEBUG_CUDA}> \
+  $<$<CONFIG:Release>:${CMAKE_CXX_FLAGS_RELEASE_CUDA}>" >)
+endmacro()
+
 function(ocv_add_executable target)
  add_executable(${target} ${ARGN})
+  if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE AND HAVE_CUDA)
+    ocv_add_cuda_compile_flags()
+  endif()
  _ocv_append_target_includes(${target})
 endfunction()

 function(ocv_add_library target)
-  if(HAVE_CUDA AND ARGN MATCHES "\\.cu")
+  if(NOT ENABLE_CUDA_FIRST_CLASS_LANGUAGE AND HAVE_CUDA AND ARGN MATCHES "\\.cu")
    ocv_include_directories(${CUDA_INCLUDE_DIRS})
    ocv_cuda_compile(cuda_objs ${ARGN})
    set(OPENCV_MODULE_${target}_CUDA_OBJECTS ${cuda_objs} CACHE INTERNAL "Compiled CUDA object files")
@ -1559,12 +1581,16 @@ function(ocv_add_library target)

  add_library(${target} ${ARGN} ${cuda_objs})

+  if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE AND HAVE_CUDA)
+    ocv_add_cuda_compile_flags()
+  endif()
+
  if(APPLE_FRAMEWORK AND BUILD_SHARED_LIBS)
    message(STATUS "Setting Apple target properties for ${target}")

    set(CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG 1)

-    if(IOS AND NOT MAC_CATALYST)
+    if((IOS OR XROS) AND NOT MAC_CATALYST)
      set(OPENCV_APPLE_INFO_PLIST "${CMAKE_BINARY_DIR}/ios/Info.plist")
    else()
      set(OPENCV_APPLE_INFO_PLIST "${CMAKE_BINARY_DIR}/osx/Info.plist")
--- a/cmake/android/android_gradle_projects.cmake
+++ b/cmake/android/android_gradle_projects.cmake
@ -1,8 +1,8 @@
 # https://developer.android.com/studio/releases/gradle-plugin
-set(ANDROID_GRADLE_PLUGIN_VERSION "3.2.1" CACHE STRING "Android Gradle Plugin version")
+set(ANDROID_GRADLE_PLUGIN_VERSION "7.3.1" CACHE STRING "Android Gradle Plugin version")
 message(STATUS "Android Gradle Plugin version: ${ANDROID_GRADLE_PLUGIN_VERSION}")

-set(KOTLIN_PLUGIN_VERSION "1.4.10" CACHE STRING "Kotlin Plugin version")
+set(KOTLIN_PLUGIN_VERSION "1.5.20" CACHE STRING "Kotlin Plugin version")
 message(STATUS "Kotlin Plugin version: ${KOTLIN_PLUGIN_VERSION}")

 if(BUILD_KOTLIN_EXTENSIONS)
@ -13,16 +13,16 @@ else()
  set(KOTLIN_STD_LIB "" CACHE STRING "Kotlin Standard Library dependency")
 endif()

-set(GRADLE_VERSION "5.6.4" CACHE STRING "Gradle version")
+set(GRADLE_VERSION "7.6.3" CACHE STRING "Gradle version")
 message(STATUS "Gradle version: ${GRADLE_VERSION}")

-set(ANDROID_COMPILE_SDK_VERSION "26" CACHE STRING "Android compileSdkVersion")
+set(ANDROID_COMPILE_SDK_VERSION "31" CACHE STRING "Android compileSdkVersion")
 if(ANDROID_NATIVE_API_LEVEL GREATER 21)
  set(ANDROID_MIN_SDK_VERSION "${ANDROID_NATIVE_API_LEVEL}" CACHE STRING "Android minSdkVersion")
 else()
  set(ANDROID_MIN_SDK_VERSION "21" CACHE STRING "Android minSdkVersion")
 endif()
-set(ANDROID_TARGET_SDK_VERSION "26" CACHE STRING "Android minSdkVersion")
+set(ANDROID_TARGET_SDK_VERSION "31" CACHE STRING "Android minSdkVersion")

 set(ANDROID_BUILD_BASE_DIR "${OpenCV_BINARY_DIR}/opencv_android" CACHE INTERNAL "")
 set(ANDROID_TMP_INSTALL_BASE_DIR "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/install/opencv_android")
@ -109,21 +109,39 @@ if(NOT OPENCV_SKIP_ANDROID_FORCE_CMAKE)
    get_filename_component(_CMAKE_INSTALL_DIR "${CMAKE_ROOT}" PATH)
    get_filename_component(_CMAKE_INSTALL_DIR "${_CMAKE_INSTALL_DIR}" PATH)
  endif()
-  ocv_update_file("${ANDROID_BUILD_BASE_DIR}/local.properties" "cmake.dir=${_CMAKE_INSTALL_DIR}")
+  ocv_update_file("${ANDROID_BUILD_BASE_DIR}/local.properties" "cmake.dir=${_CMAKE_INSTALL_DIR}\nndk.dir=${ANDROID_NDK}")
 endif()

 file(WRITE "${ANDROID_BUILD_BASE_DIR}/settings.gradle" "
+gradle.ext {
+    // possible options: 'maven_central', 'maven_local', 'sdk_path'
+    opencv_source = 'sdk_path'
+}
+
 include ':opencv'
 ")

 file(WRITE "${ANDROID_TMP_INSTALL_BASE_DIR}/settings.gradle" "
 rootProject.name = 'opencv_samples'

-def opencvsdk='../'
-//def opencvsdk='/<path to OpenCV-android-sdk>'
-//println opencvsdk
-include ':opencv'
-project(':opencv').projectDir = new File(opencvsdk + '/sdk')
+gradle.ext {
+    // possible options: 'maven_central', 'maven_local', 'sdk_path'
+    opencv_source = 'sdk_path'
+}
+
+if (gradle.opencv_source == 'maven_local') {
+    gradle.ext {
+        opencv_maven_path = '<path_to_maven_repo>'
+    }
+}
+
+if (gradle.opencv_source == 'sdk_path') {
+    def opencvsdk = '../'
+    //def opencvsdk='/<path to OpenCV-android-sdk>'
+    //println opencvsdk
+    include ':opencv'
+    project(':opencv').projectDir = new File(opencvsdk + '/sdk')
+}
 ")

 ocv_check_environment_variables(OPENCV_GRADLE_VERBOSE_OPTIONS)
@ -207,9 +225,20 @@ include ':${__dir}'
  configure_file("${path}/build.gradle.in" "${ANDROID_TMP_INSTALL_BASE_DIR}/${__dir}/build.gradle" @ONLY)
  install(FILES "${ANDROID_TMP_INSTALL_BASE_DIR}/${__dir}/build.gradle" DESTINATION "${ANDROID_INSTALL_SAMPLES_DIR}/${__dir}" COMPONENT samples)

-  file(APPEND "${ANDROID_TMP_INSTALL_BASE_DIR}/settings.gradle" "
+  # HACK: AAR packages generated from current OpenCV project has incomple prefab part
+  # and cannot be used for native linkage against OpenCV.
+  # Alternative way to build AAR: https://github.com/opencv/opencv/blob/5.x/platforms/android/build_java_shared_aar.py
+  if("${__dir}" STREQUAL "tutorial-2-mixedprocessing" OR "${__dir}" STREQUAL "tutorial-4-opencl")
+    file(APPEND "${ANDROID_TMP_INSTALL_BASE_DIR}/settings.gradle" "
+if (gradle.opencv_source == 'sdk_path') {
+    include ':${__dir}'
+}
+")
+  else()
+    file(APPEND "${ANDROID_TMP_INSTALL_BASE_DIR}/settings.gradle" "
 include ':${__dir}'
 ")
+  endif()

 endmacro()

--- a/cmake/checks/cpu_fp16.cpp
+++ b/cmake/checks/cpu_fp16.cpp
@ -11,7 +11,7 @@ int test()
    _mm_storel_epi64((__m128i*)dst, v_dst);
    return (int)dst[0];
 }
-#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
+#elif (defined __GNUC__ && (defined __arm__ || defined __aarch64__)) || (defined _MSC_VER && defined _M_ARM64)
 #include "arm_neon.h"
 int test()
 {
--- a/cmake/checks/cpu_neon_bf16.cpp
+++ b/cmake/checks/cpu_neon_bf16.cpp
@ -1,4 +1,4 @@
-#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
+#if (defined __GNUC__ && (defined __arm__ || defined __aarch64__)) || (defined _MSC_VER && defined _M_ARM64)
 #include <stdio.h>
 #include "arm_neon.h"

--- a/cmake/checks/cpu_neon_dotprod.cpp
+++ b/cmake/checks/cpu_neon_dotprod.cpp
@ -1,6 +1,6 @@
 #include <stdio.h>

-#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
+#if (defined __GNUC__ && (defined __arm__ || defined __aarch64__)) || (defined _MSC_VER && defined _M_ARM64)
 #include "arm_neon.h"
 int test()
 {
--- a/cmake/checks/cpu_neon_fp16.cpp
+++ b/cmake/checks/cpu_neon_fp16.cpp
@ -1,6 +1,6 @@
 #include <stdio.h>

-#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
+#if (defined __GNUC__ && (defined __arm__ || defined __aarch64__)) || (defined _MSC_VER && defined _M_ARM64)
 #include "arm_neon.h"

 float16x8_t vld1q_as_f16(const float* src)
--- a/cmake/checks/directml.cpp
+++ b/cmake/checks/directml.cpp
@ -0,0 +1,38 @@
+#include <initguid.h>
+
+#include <d3d11.h>
+#include <dxgi1_2.h>
+#include <dxgi1_4.h>
+#include <dxgi.h>
+#include <dxcore.h>
+#include <dxcore_interface.h>
+#include <d3d12.h>
+#include <directml.h>
+
+int main(int /*argc*/, char** /*argv*/)
+{
+    IDXCoreAdapterFactory* factory;
+    DXCoreCreateAdapterFactory(__uuidof(IDXCoreAdapterFactory), (void**)&factory);
+
+    IDXCoreAdapterList* adapterList;
+    const GUID dxGUIDs[] = { DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE };
+    factory->CreateAdapterList(ARRAYSIZE(dxGUIDs), dxGUIDs, __uuidof(IDXCoreAdapterList), (void**)&adapterList);
+
+    IDXCoreAdapter* adapter;
+    adapterList->GetAdapter(0u, __uuidof(IDXCoreAdapter), (void**)&adapter);
+
+    D3D_FEATURE_LEVEL d3dFeatureLevel = D3D_FEATURE_LEVEL_1_0_CORE;
+    ID3D12Device* d3d12Device = NULL;
+    D3D12CreateDevice((IUnknown*)adapter, d3dFeatureLevel, __uuidof(ID3D11Device), (void**)&d3d12Device);
+
+    D3D12_COMMAND_LIST_TYPE commandQueueType = D3D12_COMMAND_LIST_TYPE_COMPUTE;
+    ID3D12CommandQueue* cmdQueue;
+    D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
+    commandQueueDesc.Type = commandQueueType;
+
+    d3d12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue), (void**)&cmdQueue);
+    IDMLDevice* dmlDevice;
+    DMLCreateDevice(d3d12Device, DML_CREATE_DEVICE_FLAG_NONE, IID_PPV_ARGS(&dmlDevice));
+
+    return 0;
+}
--- a/cmake/templates/OpenCVConfig-CUDALanguage.cmake.in
+++ b/cmake/templates/OpenCVConfig-CUDALanguage.cmake.in
@ -0,0 +1,31 @@
+# Version Compute Capability from which OpenCV has been compiled is remembered
+set(OpenCV_COMPUTE_CAPABILITIES "@OpenCV_CUDA_CC@")
+
+set(OpenCV_CUDA_VERSION "@CUDA_VERSION_STRING@")
+set(OpenCV_USE_CUBLAS   "@HAVE_CUBLAS@")
+set(OpenCV_USE_CUFFT    "@HAVE_CUFFT@")
+set(OpenCV_USE_NVCUVID  "@HAVE_NVCUVID@")
+set(OpenCV_USE_NVCUVENC "@HAVE_NVCUVENC@")
+set(OpenCV_CUDNN_VERSION    "@CUDNN_VERSION@")
+set(OpenCV_USE_CUDNN        "@HAVE_CUDNN@")
+set(ENABLE_CUDA_FIRST_CLASS_LANGUAGE  ON)
+
+if(NOT CUDAToolkit_FOUND)
+  if(NOT CMAKE_VERSION VERSION_LESS 3.18)
+    if(UNIX AND NOT CMAKE_CUDA_COMPILER AND NOT CUDAToolkit_ROOT)
+      message(STATUS "Checking for CUDAToolkit in default location (/usr/local/cuda)")
+      set(CUDA_PATH "/usr/local/cuda" CACHE INTERNAL "")
+      set(ENV{CUDA_PATH} ${CUDA_PATH})
+    endif()
+    find_package(CUDAToolkit ${OpenCV_CUDA_VERSION} EXACT REQUIRED)
+  else()
+    message(FATAL_ERROR "Using OpenCV compiled with CUDA as first class language requires CMake \>= 3.18.")
+  endif()
+else()
+  if(CUDAToolkit_FOUND)
+    set(CUDA_VERSION_STRING ${CUDAToolkit_VERSION})
+  endif()
+  if(NOT CUDA_VERSION_STRING VERSION_EQUAL OpenCV_CUDA_VERSION)
+      message(FATAL_ERROR "OpenCV library was compiled with CUDA ${OpenCV_CUDA_VERSION} support. Please, use the same version or rebuild OpenCV with CUDA ${CUDA_VERSION_STRING}")
+  endif()
+endif()
--- a/cmake/templates/cvconfig.h.in
+++ b/cmake/templates/cvconfig.h.in
@ -75,9 +75,6 @@
 /* IJG JPEG codec */
 #cmakedefine HAVE_JPEG

-/* libpng/png.h needs to be included */
-#cmakedefine HAVE_LIBPNG_PNG_H
-
 /* GDCM DICOM codec */
 #cmakedefine HAVE_GDCM

--- a/doc/LICENSE_CHANGE_NOTICE.txt
+++ b/doc/LICENSE_CHANGE_NOTICE.txt
@ -1,4 +1,4 @@
-Starting from OpenCV 4.5-pre (2020 August) OpenCV has changed the license from BSD to Apache 2. See https://opencv.org/opencv-is-to-change-the-license-to-apache-2/ and https://github.com/opencv/opencv/wiki/OE-32.--Change-OpenCV-License-to-Apache-2 for details.
+Starting from OpenCV 4.5-pre (2020 August) OpenCV has changed the license from BSD to Apache 2. See https://opencv.org/blog/opencv-is-to-change-the-license-to-apache-2/ and https://github.com/opencv/opencv/wiki/OE-32.--Change-OpenCV-License-to-Apache-2 for details.

 Here is the original OpenCV license:
 ------------------------------------------------------------------------------------
--- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
+++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
@ -54,7 +54,7 @@ repository](https://github.com/opencv/opencv.git).

 ### Obtaining the Latest Stable OpenCV Version

-   Go to our [releases page](http://opencv.org/releases.html).
+-   Go to our [releases page](https://opencv.org/releases).
 -   Download the source archive and unpack it.

 ### Obtaining the Cutting-edge OpenCV from the Git Repository
--- a/doc/opencv.bib
+++ b/doc/opencv.bib
@ -1391,3 +1391,85 @@
    YEAR       = {2016},
    MONTH      = {October},
 }
+@inproceedings{BarathGCRANSAC,
+  author = {Barath, Daniel and Matas, Jiri},
+  title = {Graph-Cut RANSAC},
+  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  month = {June},
+  year = {2018}
+}
+@misc{barath2019progressive,
+  title={Progressive NAPSAC: sampling from gradually growing neighborhoods},
+  author={Barath, Daniel and Ivashechkin, Maksym and Matas, Jiri},
+  year={2019},
+  eprint={1906.02295},
+  archivePrefix={arXiv},
+  primaryClass={cs.CV}
+}
+@inproceedings{BarathMAGSAC,
+  author = {Barath, Daniel and Noskova, Jana and Ivashechkin, Maksym and Matas, Jiri},
+  title = {MAGSAC++, a Fast, Reliable and Accurate Robust Estimator},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  month = {June},
+  year = {2020}
+}
+@inproceedings{ChumPROSAC,
+  title = {Matching with {PROSAC} - Progressive Sampling Consensus},
+  author = {Chum, Ondrej and  Matas, Jiri},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year = {2005}
+}
+@inproceedings{ChumLORANSAC,
+  title = {Locally Optimized {RANSAC}},
+  author = {Chum, Ondrej and Matas, Jiri and Kittler, Josef},
+  booktitle = {DAGM},
+  year = {2003}
+}
+@inproceedings{ChumEpipolar,
+  author={Chum, Ondrej and Werner, Tomas and Matas, Jiri},
+  booktitle={Proceedings of the 17th International Conference on Pattern Recognition. ICPR 2004},
+  title={Epipolar geometry estimation via RANSAC benefits from the oriented epipolar constraint},
+  year={2004},
+  volume={1},
+  pages={112-115 Vol.1}
+}
+@inproceedings{ChumDominant,
+  title = {Epipolar Geometry Estimation Unaffected by the Dominant Plane},
+  author = {Chum, Ondrej and Werner, Tomas and  Matas, Jiri.},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year = {2005}
+}
+@article{FischlerRANSAC,
+  author = {Fischler, Martin A. and Bolles, Robert C.},
+  title = {Random Sample Consensus: A Paradigm for Model Fitting with Applications to Image Analysis and Automated Cartography},
+  year = {1981},
+  publisher = {Association for Computing Machinery},
+  volume = {24},
+  number = {6},
+  month = {jun},
+  pages = {381–395},
+  numpages = {15}
+}
+@article{Matas2005RandomizedRW,
+  title={Randomized RANSAC with sequential probability ratio test},
+  author={Matas, Jiri and Chum, Ondrej},
+  journal={Tenth IEEE International Conference on Computer Vision (ICCV) Volume 1},
+  year={2005},
+  volume={2},
+  pages={1727-1732 Vol. 2}
+}
+@inproceedings{MyattNAPSAC,
+  author = {Myatt, D. and Torr, Philip and Nasuto, Slawomir and Bishop, John and Craddock, R.},
+  year = {2002},
+  booktitle = {Proceedings of the British Machine Vision Conference (BMVC)},
+  title = {NAPSAC: High Noise, High Dimensional Robust Estimation - it's in the Bag}
+}
+@article{SteweniusRecent,
+  author = {Stewenius, Henrik and Engels, Christopher and Nister, David},
+  year = {2006},
+  month = {06},
+  pages = {284-294},
+  title = {Recent developments on direct relative orientation},
+  volume = {60},
+  journal = {ISPRS Journal of Photogrammetry and Remote Sensing}
+}
--- a/doc/pattern_tools/test_charuco_board.py
+++ b/doc/pattern_tools/test_charuco_board.py
@ -32,7 +32,7 @@ class aruco_objdetect_test(NewOpenCVTests):
                aruco_dict = cv.aruco.getPredefinedDictionary(aruco_type[aruco_type_i])
                board = cv.aruco.CharucoBoard((cols, rows), square_size, marker_size, aruco_dict)
                charuco_detector = cv.aruco.CharucoDetector(board)
-                from_cv_img = board.generateImage((cols*square_size*10, rows*square_size*10))
+                from_cv_img = board.generateImage((cols*square_size, rows*square_size))

                #draw desk using svg
                fd1, filesvg = tempfile.mkstemp(prefix="out", suffix=".svg")
@ -48,15 +48,20 @@ class aruco_objdetect_test(NewOpenCVTests):
                    pm.make_charuco_board()
                    pm.save()
                    drawing = svg2rlg(filesvg)
-                    renderPM.drawToFile(drawing, filepng, fmt='PNG', dpi=720)
+                    renderPM.drawToFile(drawing, filepng, fmt='PNG', dpi=72)
                    from_svg_img = cv.imread(filepng)
+                    _charucoCorners, _charuco_ids_svg, marker_corners_svg, marker_ids_svg = charuco_detector.detectBoard(from_svg_img)
+                    _charucoCorners, _charuco_ids_cv, marker_corners_cv, marker_ids_cv = charuco_detector.detectBoard(from_cv_img)
+                    marker_corners_svg_map, marker_corners_cv_map = {}, {}
+                    for i in range(len(marker_ids_svg)):
+                        marker_corners_svg_map[int(marker_ids_svg[i])] = marker_corners_svg[i]
+                    for i in range(len(marker_ids_cv)):
+                        marker_corners_cv_map[int(marker_ids_cv[i])] = marker_corners_cv[i]

-                    #test
-                    _charucoCorners, _charucoIds, markerCorners_svg, markerIds_svg = charuco_detector.detectBoard(from_svg_img)
-                    _charucoCorners, _charucoIds, markerCorners_cv, markerIds_cv = charuco_detector.detectBoard(from_cv_img)
-
-                    np.testing.assert_allclose(markerCorners_svg, markerCorners_cv, 0.1, 0.1)
-                    np.testing.assert_allclose(markerIds_svg, markerIds_cv, 0.1, 0.1)
+                    for key_svg in marker_corners_svg_map.keys():
+                        marker_svg = marker_corners_svg_map[key_svg]
+                        marker_cv = marker_corners_cv_map[key_svg]
+                        np.testing.assert_allclose(marker_svg, marker_cv, 0.1, 0.1)
                finally:
                    if os.path.exists(filesvg):
                        os.remove(filesvg)
@ -85,7 +90,7 @@ class aruco_objdetect_test(NewOpenCVTests):
                aruco_dict = cv.aruco.getPredefinedDictionary(aruco_type)
                board = cv.aruco.CharucoBoard((cols, rows), square_size, marker_size, aruco_dict)
                charuco_detector = cv.aruco.CharucoDetector(board)
-                from_cv_img = board.generateImage((cols*square_size*10, rows*square_size*10))
+                from_cv_img = board.generateImage((cols*square_size, rows*square_size))

                #draw desk using svg
                fd1, filesvg = tempfile.mkstemp(prefix="out", suffix=".svg")
@ -100,15 +105,22 @@ class aruco_objdetect_test(NewOpenCVTests):
                    pm.make_charuco_board()
                    pm.save()
                    drawing = svg2rlg(filesvg)
-                    renderPM.drawToFile(drawing, filepng, fmt='PNG', dpi=720)
+                    renderPM.drawToFile(drawing, filepng, fmt='PNG', dpi=72)
                    from_svg_img = cv.imread(filepng)

                    #test
-                    _charucoCorners, _charucoIds, markerCorners_svg, markerIds_svg = charuco_detector.detectBoard(from_svg_img)
-                    _charucoCorners, _charucoIds, markerCorners_cv, markerIds_cv = charuco_detector.detectBoard(from_cv_img)
+                    _charucoCorners, _charuco_ids_svg, marker_corners_svg, marker_ids_svg = charuco_detector.detectBoard(from_svg_img)
+                    _charucoCorners, _charuco_ids_cv, marker_corners_cv, marker_ids_cv = charuco_detector.detectBoard(from_cv_img)
+                    marker_corners_svg_map, marker_corners_cv_map = {}, {}
+                    for i in range(len(marker_ids_svg)):
+                        marker_corners_svg_map[int(marker_ids_svg[i])] = marker_corners_svg[i]
+                    for i in range(len(marker_ids_cv)):
+                        marker_corners_cv_map[int(marker_ids_cv[i])] = marker_corners_cv[i]

-                    np.testing.assert_allclose(markerCorners_svg, markerCorners_cv, 0.1, 0.1)
-                    np.testing.assert_allclose(markerIds_svg, markerIds_cv, 0.1, 0.1)
+                    for key_svg in marker_corners_svg_map.keys():
+                        marker_svg = marker_corners_svg_map[key_svg]
+                        marker_cv = marker_corners_cv_map[key_svg]
+                        np.testing.assert_allclose(marker_svg, marker_cv, 0.1, 0.1)
                finally:
                    if os.path.exists(filesvg):
                        os.remove(filesvg)
--- a/doc/py_tutorials/py_bindings/py_bindings_basics/py_bindings_basics.markdown
+++ b/doc/py_tutorials/py_bindings/py_bindings_basics/py_bindings_basics.markdown
@ -79,9 +79,12 @@ Functions are extended using `CV_EXPORTS_W` macro. An example is shown below.
@code{.cpp}
 CV_EXPORTS_W void equalizeHist( InputArray src, OutputArray dst );
@endcode
-Header parser can understand the input and output arguments from keywords like
-InputArray, OutputArray etc. But sometimes, we may need to hardcode inputs and outputs. For that,
-macros like `CV_OUT`, `CV_IN_OUT` etc. are used.
+Header parser can understand the input and output arguments from keywords like InputArray,
+OutputArray etc. The arguments semantics are kept in Python: anything that is modified in C++
+will be modified in Python. And vice-versa read-only Python objects cannot be modified by OpenCV,
+if they are used as output. Such situation will cause Python exception. Sometimes, the parameters
+that are passed by reference in C++ may be used as input, output or both.
+Macros `CV_OUT`, `CV_IN_OUT` allow to solve ambiguity and generate correct bindings.
@code{.cpp}
 CV_EXPORTS_W void minEnclosingCircle( InputArray points,
                                     CV_OUT Point2f& center, CV_OUT float& radius );
--- a/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown
+++ b/doc/py_tutorials/py_gui/py_video_display/py_video_display.markdown
@ -111,7 +111,7 @@ frames per second (fps) and frame size should be passed. And the last one is the
 `True`, the encoder expect color frame, otherwise it works with grayscale frame.

 [FourCC](http://en.wikipedia.org/wiki/FourCC) is a 4-byte code used to specify the video codec. The
-list of available codes can be found in [fourcc.org](http://www.fourcc.org/codecs.php). It is
+list of available codes can be found in [fourcc.org](https://fourcc.org/codecs.php). It is
 platform dependent. The following codecs work fine for me.

 -   In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high
--- a/doc/py_tutorials/py_ml/py_knn/py_knn_understanding/py_knn_understanding.markdown
+++ b/doc/py_tutorials/py_ml/py_knn/py_knn_understanding/py_knn_understanding.markdown
@ -141,7 +141,7 @@ Additional Resources
 --------------------

 1.  [NPTEL notes on Pattern Recognition, Chapter
-    11](https://nptel.ac.in/courses/106/108/106108057/)
+    11](https://nptel.ac.in/courses/106108057)
 2.  [Wikipedia article on Nearest neighbor search](https://en.wikipedia.org/wiki/Nearest_neighbor_search)
 3.  [Wikipedia article on k-d tree](https://en.wikipedia.org/wiki/K-d_tree)

--- a/doc/py_tutorials/py_ml/py_svm/py_svm_basics/py_svm_basics.markdown
+++ b/doc/py_tutorials/py_ml/py_svm/py_svm_basics/py_svm_basics.markdown
@ -129,7 +129,6 @@ Additional Resources
 --------------------

 -#  [NPTEL notes on Statistical Pattern Recognition, Chapters
-    25-29](http://www.nptel.ac.in/courses/106108057/26).
-
+    25-29](https://nptel.ac.in/courses/117108048)
 Exercises
 ---------
--- a/doc/tutorials/calib3d/camera_multiview_calibration/multiview_calibration.markdown
+++ b/doc/tutorials/calib3d/camera_multiview_calibration/multiview_calibration.markdown
@ -4,6 +4,7 @@ Multi-view Camera Calibration Tutorial {#tutorial_multiview_camera_calibration}
@tableofcontents

@prev_tutorial{tutorial_interactive_calibration}
+@next_tutorial{tutorial_usac}

 |    |    |
 | -: | :- |
--- a/doc/tutorials/calib3d/table_of_content_calib3d.markdown
+++ b/doc/tutorials/calib3d/table_of_content_calib3d.markdown
@ -7,3 +7,4 @@ Camera calibration and 3D reconstruction (calib3d module) {#tutorial_table_of_co
 -   @subpage tutorial_real_time_pose
 -   @subpage tutorial_interactive_calibration
 -   @subpage tutorial_multiview_camera_calibration
+-   @subpage tutorial_usac
--- a/doc/tutorials/calib3d/usac.markdown
+++ b/doc/tutorials/calib3d/usac.markdown
@ -1,14 +1,19 @@
---
-author:
- Maksym Ivashechkin
-bibliography: 'bibs.bib'
-csl: 'acm-sigchi-proceedings.csl'
-date: August 2020
-title: 'Google Summer of Code: Improvement of Random Sample Consensus in OpenCV'
-...
+USAC: Improvement of Random Sample Consensus in OpenCV {#tutorial_usac}
+==============================
+
+@tableofcontents
+
+@prev_tutorial{tutorial_multiview_camera_calibration}
+
+|    |    |
+| -: | :- |
+| Original author | Maksym Ivashechkin |
+| Compatibility | OpenCV >= 4.0 |
+
+This work was integrated as part of the Google Summer of Code (August 2020).

 Contribution
-============
+------

 The integrated part to OpenCV `3d` module is RANSAC-based universal
 framework USAC (`namespace usac`) written in C++. The framework includes
@ -20,25 +25,25 @@ components:

 1.  Sampling method:

-    1.  Uniform – standard RANSAC sampling proposed in \[8\] which draw
+    1.  Uniform – standard RANSAC sampling proposed in @cite FischlerRANSAC which draw
        minimal subset independently uniformly at random. *The default
        option in proposed framework*.

-    2.  PROSAC – method \[4\] that assumes input data points sorted by
+    2.  PROSAC – method @cite ChumPROSAC that assumes input data points sorted by
        quality so sampling can start from the most promising points.
        Correspondences for this method can be sorted e.g., by ratio of
        descriptor distances of the best to second match obtained from
        SIFT detector. *This is method is recommended to use because it
        can find good model and terminate much earlier*.

-    3.  NAPSAC – sampling method \[10\] which takes initial point
+    3.  NAPSAC – sampling method @cite MyattNAPSAC which takes initial point
        uniformly at random and the rest of points for minimal sample in
        the neighborhood of initial point. This is method can be
        potentially useful when models are localized. For example, for
        plane fitting. However, in practise struggles from degenerate
        issues and defining optimal neighborhood size.

-    4.  Progressive-NAPSAC – sampler \[2\] which is similar to NAPSAC,
+    4.  Progressive-NAPSAC – sampler @cite barath2019progressive which is similar to NAPSAC,
        although it starts from local and gradually converges to
        global sampling. This method can be quite useful if local models
        are expected but distribution of data can be arbitrary. The
@ -56,7 +61,7 @@ components:
        default option in framework*. The model might not have as many
        inliers as using RANSAC score, however will be more accurate.

-    3.  MAGSAC – threshold-free method \[3\] to compute score. Using,
+    3.  MAGSAC – threshold-free method @cite BarathMAGSAC to compute score. Using,
        although, maximum sigma (standard deviation of noise) level to
        marginalize residual of point over sigma. Score of the point
        represents likelihood of point being inlier. *Recommended option
@ -86,7 +91,7 @@ components:

 4.  Degeneracy:

-    1.  DEGENSAC – method \[7\] which for Fundamental matrix estimation
+    1.  DEGENSAC – method @cite ChumDominant which for Fundamental matrix estimation
        efficiently verifies and recovers model which has at least 5
        points in minimal sample lying on the dominant plane.

@ -96,11 +101,11 @@ components:
        in minimal sample lie on the same side w.r.t. to any line
        crossing any two points in sample (does not assume reflection).

-    3.  Oriented epipolar constraint – method \[6\] for epipolar
+    3.  Oriented epipolar constraint – method @cite ChumEpipolar for epipolar
        geometry which verifies model (fundamental and essential matrix)
        to have points visible in the front of the camera.

-5.  SPRT verification – method \[9\] which verifies model by its
+5.  SPRT verification – method @cite Matas2005RandomizedRW which verifies model by its
    evaluation on randomly shuffled points using statistical properties
    given by probability of inlier, relative time for estimation,
    average number of output models etc. Significantly speeding up
@ -109,17 +114,17 @@ components:

 6.  Local Optimization:

-    1.  Locally Optimized RANSAC – method \[5\] that iteratively
+    1.  Locally Optimized RANSAC – method @cite ChumLORANSAC that iteratively
        improves so-far-the-best model by non-minimal estimation. *The
        default option in framework. This procedure is the fastest and
        not worse than others local optimization methods.*

-    2.  Graph-Cut RANSAC – method \[1\] that refine so-far-the-best
+    2.  Graph-Cut RANSAC – method @cite BarathGCRANSAC that refine so-far-the-best
        model, however, it exploits spatial coherence of the
        data points. *This procedure is quite precise however
        computationally slower.*

-    3.  Sigma Consensus – method \[3\] which improves model by applying
+    3.  Sigma Consensus – method @cite BarathMAGSAC which improves model by applying
        non-minimal weighted estimation, where weights are computed with
        the same logic as in MAGSAC score. This method is better to use
        together with MAGSAC score.
@ -152,7 +157,7 @@ components:

    4.  Essential matrix – 4 null vectors are found using
        Gaussian elimination. Then the solver based on Gröbner basis
-        described in \[11\] is used. Essential matrix can be computed
+        described in @cite SteweniusRecent is used. Essential matrix can be computed
        only if <span style="font-variant:small-caps;">LAPACK</span> or
        <span style="font-variant:small-caps;">Eigen</span> are
        installed as it requires eigen decomposition with complex
@ -180,12 +185,12 @@ sequentially. However, using default options of framework parallel
 RANSAC is not deterministic since it depends on how often each thread is
 running. The easiest way to make it deterministic is using PROSAC
 sampler without SPRT and Local Optimization and not for Fundamental
-matrix, because they internally use random generators.\
-\
+matrix, because they internally use random generators.
+
 For NAPSAC, Progressive NAPSAC or Graph-Cut methods is required to build
 a neighborhood graph. In framework there are 3 options to do it:

-1.  `NEIGH_FLANN_KNN` – estimate neighborhood graph using OpenCV FLANN
+1.  NEIGH_FLANN_KNN – estimate neighborhood graph using OpenCV FLANN
    K nearest-neighbors. The default value for KNN is 7. KNN method may
    work good for sampling but not good for GC-RANSAC.

@ -193,14 +198,14 @@ a neighborhood graph. In framework there are 3 options to do it:
    points which distance is less than 20 pixels.

 3.  `NEIGH_GRID` – for finding points’ neighborhood tiles points in
-    cells using hash-table. The method is described in \[2\]. Less
+    cells using hash-table. The method is described in @cite barath2019progressive. Less
    accurate than `NEIGH_FLANN_RADIUS`, although significantly faster.

 Note, `NEIGH_FLANN_RADIUS` and `NEIGH_FLANN_RADIUS` are not able to PnP
-solver, since there are 3D object points.\
-\
-New flags:
+solver, since there are 3D object points.

+New flags:
+------
 1.  `USAC_DEFAULT` – has standard LO-RANSAC.

 2.  `USAC_PARALLEL` – has LO-RANSAC and RANSACs run in parallel.
@ -220,9 +225,10 @@ New flags:

 Every flag uses SPRT verification. And in the end the final
 so-far-the-best model is polished by non minimal estimation of all found
-inliers.\
-\
+inliers.
+
 A few other important parameters:
+------

 1.  `randomGeneratorState` – since every USAC solver is deterministic in
    OpenCV (i.e., for the same points and parameters returns the
@ -240,6 +246,7 @@ A few other important parameters:
    estimation on low number of points is faster and more robust.

 Samples:
+------

 There are three new sample files in opencv/samples directory.

@ -260,48 +267,3 @@ There are three new sample files in opencv/samples directory.
 3.  `essential_mat_reconstr.py` – the same functionality as in .cpp
    file, however instead of clustering points to plane the 3D map of
    object points is plot.
-
-References:
-
-1\. Daniel Barath and Jiří Matas. 2018. Graph-Cut RANSAC. In *Proceedings
-of the iEEE conference on computer vision and pattern recognition*,
-6733–6741.
-
-2\. Daniel Barath, Maksym Ivashechkin, and Jiri Matas. 2019. Progressive
-NAPSAC: Sampling from gradually growing neighborhoods. *arXiv preprint
-arXiv:1906.02295*.
-
-3\. Daniel Barath, Jana Noskova, Maksym Ivashechkin, and Jiri Matas.
-2020. MAGSAC++, a fast, reliable and accurate robust estimator. In
-*Proceedings of the iEEE/CVF conference on computer vision and pattern
-recognition (cVPR)*.
-
-4\. O. Chum and J. Matas. 2005. Matching with PROSAC-progressive sample
-consensus. In *Computer vision and pattern recognition*.
-
-5\. O. Chum, J. Matas, and J. Kittler. 2003. Locally optimized RANSAC. In
-*Joint pattern recognition symposium*.
-
-6\. O. Chum, T. Werner, and J. Matas. 2004. Epipolar geometry estimation
-via RANSAC benefits from the oriented epipolar constraint. In
-*International conference on pattern recognition*.
-
-7\. Ondrej Chum, Tomas Werner, and Jiri Matas. 2005. Two-view geometry
-estimation unaffected by a dominant plane. In *2005 iEEE computer
-society conference on computer vision and pattern recognition
-(cVPR’05)*, 772–779.
-
-8\. M. A. Fischler and R. C. Bolles. 1981. Random sample consensus: A
-paradigm for model fitting with applications to image analysis and
-automated cartography. *Communications of the ACM*.
-
-9\. Jiri Matas and Ondrej Chum. 2005. Randomized RANSAC with sequential
-probability ratio test. In *Tenth iEEE international conference on
-computer vision (iCCV’05) volume 1*, 1727–1732.
-
-10\. D. R. Myatt, P. H. S. Torr, S. J. Nasuto, J. M. Bishop, and R.
-Craddock. 2002. NAPSAC: High noise, high dimensional robust estimation.
-In *In bMVC02*, 458–467.
-
-11\. Henrik Stewénius, Christopher Engels, and David Nistér. 2006. Recent
-developments on direct relative orientation.
--- a/doc/tutorials/core/univ_intrin/univ_intrin.markdown
+++ b/doc/tutorials/core/univ_intrin/univ_intrin.markdown
@ -245,7 +245,7 @@ In the following section, we will vectorize a simple convolution function for si

 You may learn more about convolution from the previous tutorial. We use the same naive implementation from the previous tutorial and compare it to the vectorized version.

-The full tutorial code is [here](https://github.com/opencv/opencv/tree/5.x/samples/cpp/tutorial_code/univ_intrin/univ_intrin.cpp).
+The full tutorial code is [here](https://github.com/opencv/opencv/tree/5.x/samples/cpp/tutorial_code/core/univ_intrin/univ_intrin.cpp).

 ### Vectorizing Convolution

--- a/doc/tutorials/dnn/dnn_android/10_opencv_dependency.png
+++ b/doc/tutorials/dnn/dnn_android/10_opencv_dependency.png
--- a/doc/tutorials/dnn/dnn_android/1_start_new_project.png
+++ b/doc/tutorials/dnn/dnn_android/1_start_new_project.png
--- a/doc/tutorials/dnn/dnn_android/2_start_new_project.png
+++ b/doc/tutorials/dnn/dnn_android/2_start_new_project.png
--- a/doc/tutorials/dnn/dnn_android/3_start_new_project.png
+++ b/doc/tutorials/dnn/dnn_android/3_start_new_project.png
--- a/doc/tutorials/dnn/dnn_android/4_start_new_project.png
+++ b/doc/tutorials/dnn/dnn_android/4_start_new_project.png
--- a/doc/tutorials/dnn/dnn_android/5_setup.png
+++ b/doc/tutorials/dnn/dnn_android/5_setup.png
--- a/doc/tutorials/dnn/dnn_android/6_run_empty_project.png
+++ b/doc/tutorials/dnn/dnn_android/6_run_empty_project.png
--- a/doc/tutorials/dnn/dnn_android/7_import_module.png
+++ b/doc/tutorials/dnn/dnn_android/7_import_module.png
--- a/doc/tutorials/dnn/dnn_android/8_import_module.png
+++ b/doc/tutorials/dnn/dnn_android/8_import_module.png
--- a/doc/tutorials/dnn/dnn_android/9_opencv_dependency.png
+++ b/doc/tutorials/dnn/dnn_android/9_opencv_dependency.png
--- a/doc/tutorials/dnn/dnn_android/dnn_android.markdown
+++ b/doc/tutorials/dnn/dnn_android/dnn_android.markdown
@ -1,107 +1 @@
-# How to run deep networks on Android device {#tutorial_dnn_android}
-
-@tableofcontents
-
-@prev_tutorial{tutorial_dnn_openvino}
-@next_tutorial{tutorial_dnn_yolo}
-
-|    |    |
-| -: | :- |
-| Original author | Dmitry Kurtaev |
-| Compatibility | OpenCV >= 3.3 |
-
-## Introduction
-In this tutorial you'll know how to run deep learning networks on Android device
-using OpenCV deep learning module.
-
-Tutorial was written for the following versions of corresponding software:
- Android Studio 2.3.3
- OpenCV 3.3.0+
-
-## Requirements
-
- Download and install Android Studio from https://developer.android.com/studio.
-
- Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, `opencv-5.X.Y-android-sdk.zip`).
-
- Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. We need a configuration file `MobileNetSSD_deploy.prototxt` and weights `MobileNetSSD_deploy.caffemodel`.
-
-## Create an empty Android Studio project
- Open Android Studio. Start a new project. Let's call it `opencv_mobilenet`.
-![](1_start_new_project.png)
-
- Keep default target settings.
-![](2_start_new_project.png)
-
- Use "Empty Activity" template. Name activity as `MainActivity` with a
-corresponding layout `activity_main`.
-![](3_start_new_project.png)
-
-  ![](4_start_new_project.png)
-
- Wait until a project was created. Go to `Run->Edit Configurations`.
-Choose `USB Device` as target device for runs.
-![](5_setup.png)
-Plug in your device and run the project. It should be installed and launched
-successfully before we'll go next.
-@note Read @ref tutorial_android_dev_intro in case of problems.
-
-![](6_run_empty_project.png)
-
-## Add OpenCV dependency
-
- Go to `File->New->Import module` and provide a path to `unpacked_OpenCV_package/sdk/java`. The name of module detects automatically.
-Disable all features that Android Studio will suggest you on the next window.
-![](7_import_module.png)
-
-  ![](8_import_module.png)
-
- Open two files:
-
-  1. `AndroidStudioProjects/opencv_mobilenet/app/build.gradle`
-
-  2. `AndroidStudioProjects/opencv_mobilenet/openCVLibrary330/build.gradle`
-
-  Copy both `compileSdkVersion` and `buildToolsVersion` from the first file to
-  the second one.
-
-  `compileSdkVersion 14` -> `compileSdkVersion 26`
-
-  `buildToolsVersion "25.0.0"` -> `buildToolsVersion "26.0.1"`
-
- Make the project. There is no errors should be at this point.
-
- Go to `File->Project Structure`. Add OpenCV module dependency.
-![](9_opencv_dependency.png)
-
-  ![](10_opencv_dependency.png)
-
- Install once an appropriate OpenCV manager from `unpacked_OpenCV_package/apk`
-to target device.
-@code
-adb install OpenCV_3.3.0_Manager_3.30_armeabi-v7a.apk
-@endcode
-
- Congratulations! We're ready now to make a sample using OpenCV.
-
-## Make a sample
-Our sample will takes pictures from a camera, forwards it into a deep network and
-receives a set of rectangles, class identifiers and confidence values in `[0, 1]`
-range.
-
- First of all, we need to add a necessary widget which displays processed
-frames. Modify `app/src/main/res/layout/activity_main.xml`:
-@include android/mobilenet-objdetect/res/layout/activity_main.xml
-
- Put downloaded `MobileNetSSD_deploy.prototxt` and `MobileNetSSD_deploy.caffemodel`
-into `app/build/intermediates/assets/debug` folder.
-
- Modify `/app/src/main/AndroidManifest.xml` to enable full-screen mode, set up
-a correct screen orientation and allow to use a camera.
-@include android/mobilenet-objdetect/gradle/AndroidManifest.xml
-
- Replace content of `app/src/main/java/org/opencv/samples/opencv_mobilenet/MainActivity.java`:
-@include android/mobilenet-objdetect/src/org/opencv/samples/opencv_mobilenet/MainActivity.java
-
- Launch an application and make a fun!
-![](11_demo.jpg)
+The page was moved to @ref tutorial_android_dnn_intro
--- a/doc/tutorials/dnn/dnn_openvino/dnn_openvino.markdown
+++ b/doc/tutorials/dnn/dnn_openvino/dnn_openvino.markdown
@ -2,7 +2,7 @@ OpenCV usage with OpenVINO {#tutorial_dnn_openvino}
 =====================

@prev_tutorial{tutorial_dnn_googlenet}
-@next_tutorial{tutorial_dnn_android}
+@next_tutorial{tutorial_dnn_yolo}

 |    |    |
 | -: | :- |
--- a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
+++ b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@ -3,7 +3,7 @@ YOLO DNNs  {#tutorial_dnn_yolo}

@tableofcontents

-@prev_tutorial{tutorial_dnn_android}
+@prev_tutorial{tutorial_dnn_openvino}
@next_tutorial{tutorial_dnn_javascript}

 |    |    |
--- a/doc/tutorials/dnn/table_of_content_dnn.markdown
+++ b/doc/tutorials/dnn/table_of_content_dnn.markdown
@ -3,7 +3,6 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}

 -   @subpage tutorial_dnn_googlenet
 -   @subpage tutorial_dnn_openvino
-   @subpage tutorial_dnn_android
 -   @subpage tutorial_dnn_yolo
 -   @subpage tutorial_dnn_javascript
 -   @subpage tutorial_dnn_custom_layers
--- a/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown
+++ b/doc/tutorials/imgproc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.markdown
@ -117,6 +117,6 @@ References
 - [SmartDeblur] - SmartDeblur site

 <!-- invisible references list -->
-[Digital Image Processing]: http://web.ipac.caltech.edu/staff/fmasci/home/astro_refs/Digital_Image_Processing_2ndEd.pdf
+[Digital Image Processing]: http://web.ipac.caltech.edu/staff/fmasci/home/RefMaterial/ImageProc/Book_DigitalImageProcessing.pdf
 [Image Deblurring in Matlab]: https://www.mathworks.com/help/images/image-deblurring.html
 [SmartDeblur]: http://yuzhikov.com/articles/BlurredImagesRestoration1.htm
--- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.markdown
+++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.markdown
@ -1,255 +0,0 @@
-OpenCV4Android SDK {#tutorial_O4A_SDK}
-==================
-
-@prev_tutorial{tutorial_android_dev_intro}
-@next_tutorial{tutorial_dev_with_OCV_on_Android}
-
-|    |    |
-| -: | :- |
-| Original author | Vsevolod Glumov |
-| Compatibility | OpenCV >= 3.0 |
-
-@warning
-This tutorial is deprecated.
-
-This tutorial was designed to help you with installation and configuration of OpenCV4Android SDK.
-
-This guide was written with MS Windows 7 in mind, though it should work with GNU Linux and Apple Mac
-OS as well.
-
-This tutorial assumes you have the following software installed and configured:
-
-   JDK
-   Android SDK and NDK
-   Eclipse IDE
-   ADT and CDT plugins for Eclipse
-
-If you need help with anything of the above, you may refer to our @ref tutorial_android_dev_intro guide.
-
-If you encounter any error after thoroughly following these steps, feel free to contact us via
-[OpenCV4Android](https://groups.google.com/group/android-opencv/) discussion group or OpenCV [Q&A
-forum](https://forum.opencv.org). We'll do our best to help you out.
-
-General info
------------
-
-OpenCV4Android SDK package enables development of Android applications with use of OpenCV library.
-
-The structure of package contents looks as follows:
-
-    OpenCV-2.4.9-android-sdk
-    |_ apk
-    |   |_ OpenCV_2.4.9_binary_pack_armv7a.apk
-    |   |_ OpenCV_2.4.9_Manager_2.18_XXX.apk
-    |
-    |_ doc
-    |_ samples
-    |_ sdk
-    |    |_ etc
-    |    |_ java
-    |    |_ native
-    |          |_ 3rdparty
-    |          |_ jni
-    |          |_ libs
-    |               |_ armeabi
-    |               |_ armeabi-v7a
-    |               |_ x86
-    |
-    |_ LICENSE
-    |_ README.android
-
-   `sdk` folder contains OpenCV API and libraries for Android:
-   `sdk/java` folder contains an Android library Eclipse project providing OpenCV Java API that can
-    be imported into developer's workspace;
-   `sdk/native` folder contains OpenCV C++ headers (for JNI code) and native Android libraries
-    (\*.so and \*.a) for ARM-v5, ARM-v7a and x86 architectures;
-   `sdk/etc` folder contains Haar and LBP cascades distributed with OpenCV.
-   `apk` folder contains Android packages that should be installed on the target Android device to
-    enable OpenCV library access via OpenCV Manager API (see details below).
-
-    On production devices that have access to Google Play Market (and Internet) these packages will
-    be installed from Market on the first start of an application using OpenCV Manager API. But
-    devkits without Market or Internet connection require this packages to be installed manually.
-    Install the Manager.apk and optional binary_pack.apk if it needed. See `Manager Selection`
-    for details.
-
-    @note Installation from Internet is the preferable way since OpenCV team may publish updated
-    versions of this packages on the Market.
-
-   `samples` folder contains sample applications projects
-    and their prebuilt packages (APK). Import them into Eclipse workspace (like described below) and
-    browse the code to learn possible ways of OpenCV use on Android.
-
-   `doc` folder contains various OpenCV documentation in PDF format. It's also available online at
-    <http://docs.opencv.org>.
-    @note The most recent docs (nightly build) are at <http://docs.opencv.org/5.x>. Generally, it's more
-    up-to-date, but can refer to not-yet-released functionality.
-    @todo I'm not sure that this is the best place to talk about OpenCV Manager
-
-Starting from version 2.4.3 OpenCV4Android SDK uses OpenCV Manager API for library initialization.
-OpenCV Manager is an Android service based solution providing the following benefits for OpenCV
-applications developers:
-
-   Compact apk-size, since all applications use the same binaries from Manager and do not store
-    native libs within themselves;
-   Hardware specific optimizations are automatically enabled on all supported platforms;
-   Automatic updates and bug fixes;
-   Trusted OpenCV library source. All packages with OpenCV are published on Google Play;
-
-
-Manual OpenCV4Android SDK setup
-------------------------------
-
-### Get the OpenCV4Android SDK
-
-#  Go to the [OpenCV download page on
-    SourceForge](http://sourceforge.net/projects/opencvlibrary/files/) and download
-    the latest available version. This tutorial is based on this package: [OpenCV-2.4.9-android-sdk.zip](http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.9/OpenCV-2.4.9-android-sdk.zip/download).
-#  Create a new folder for Android with OpenCV development. For this tutorial we have unpacked
-    OpenCV SDK to the `C:\Work\OpenCV4Android\` directory.
-
-    @note Better to use a path without spaces in it. Otherwise you may have problems with ndk-build.
-
-#  Unpack the SDK archive into the chosen directory.
-
-    You can unpack it using any popular archiver (e.g with 7-Zip):
-
-    ![](images/android_package_7zip.png)
-
-    On Unix you can use the following command:
-    @code{.bash}
-    unzip ~/Downloads/OpenCV-2.4.9-android-sdk.zip
-    @endcode
-
-### Import OpenCV library and samples to the Eclipse
-
-#  Start Eclipse and choose your workspace location.
-
-    We recommend to start working with OpenCV for Android from a new clean workspace. A new Eclipse
-    workspace can for example be created in the folder where you have unpacked OpenCV4Android SDK
-    package:
-
-    ![](images/eclipse_1_choose_workspace.png)
-
-#  Import OpenCV library and samples into workspace.
-
-    OpenCV library is packed as a ready-for-use [Android Library
-    Project](http://developer.android.com/guide/developing/projects/index.html#LibraryProjects). You
-    can simply reference it in your projects.
-
-    Each sample included into the `OpenCV-2.4.9-android-sdk.zip` is a regular Android project that
-    already references OpenCV library. Follow the steps below to import OpenCV and samples into the
-    workspace:
-
-    -   Right click on the Package Explorer window and choose Import... option from the context
-        menu:
-
-        ![](images/eclipse_5_import_command.png)
-
-    -   In the main panel select General --\> Existing Projects into Workspace and press Next
-        button:
-
-        ![](images/eclipse_6_import_existing_projects.png)
-
-    -   In the Select root directory field locate your OpenCV package folder. Eclipse should
-        automatically locate OpenCV library and samples:
-
-        ![](images/eclipse_7_select_projects.png)
-
-    -   Click Finish button to complete the import operation.
-
-    @note OpenCV samples are indeed **dependent** on OpenCV library project so don't forget to import it to your workspace as well.
-
-    After clicking Finish button Eclipse will load all selected projects into workspace, and you
-    have to wait some time while it is building OpenCV samples. Just give a minute to Eclipse to
-    complete initialization.
-
-    ![](images/eclipse_cdt_cfg4.png)
-
-    Once Eclipse completes build you will have the clean workspace without any build errors:
-
-    ![](images/eclipse_10_crystal_clean.png)
-
-@anchor tutorial_O4A_SDK_samples
-### Running OpenCV Samples
-
-At this point you should be able to build and run the samples. Keep in mind, that face-detection and
-Tutorial 2 - Mixed Processing include some native code and require Android NDK and NDK/CDT plugin
-for Eclipse to build working applications. If you haven't installed these tools, see the
-corresponding section of @ref tutorial_android_dev_intro.
-
-**warning**
-
-Please consider that some samples use Android Java Camera API, which is accessible with an AVD.
-
-@note Recent *Android SDK tools, revision 19+* can run ARM v7a OS images but they available not for
-all Android versions.
-
-Well, running samples from Eclipse is very simple:
-
-   Connect your device with adb tool from Android SDK or create an emulator with camera support.
-    -   See [Managing Virtual Devices](http://developer.android.com/guide/developing/devices/index.html) document for help
-        with Android Emulator.
-    -   See [Using Hardware Devices](http://developer.android.com/guide/developing/device.html) for
-        help with real devices (not emulators).
-
-   Select project you want to start in Package Explorer and just press Ctrl + F11 or select option
-    Run --\> Run from the main menu, or click Run button on the toolbar.
-
-    @note Android Emulator can take several minutes to start. So, please, be patient. \* On the first
-    run Eclipse will ask you about the running mode for your application:
-
-    ![](images/eclipse_11_run_as.png)
-
-   Select the Android Application option and click OK button. Eclipse will install and run the
-    sample.
-
-    Chances are that on the first launch you will not have the [OpenCV
-    Manager](https://docs.google.com/a/itseez.com/presentation/d/1EO_1kijgBg_BsjNp2ymk-aarg-0K279_1VZRcPplSuk/present#slide=id.p)
-    package installed. In this case you will see the following message:
-
-    ![](images/android_emulator_opencv_manager_fail.png)
-
-    To get rid of the message you will need to install OpenCV Manager and the appropriate
-    OpenCV binary pack. Simply tap Yes if you have *Google Play Market* installed on your
-    device/emulator. It will redirect you to the corresponding page on *Google Play Market*.
-
-    If you have no access to the *Market*, which is often the case with emulators - you will need to
-    install the packages from OpenCV4Android SDK folder manually. See `Manager Selection` for
-    details.
-    @code{.sh}
-    <Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.9_Manager_2.18_armv7a-neon.apk
-    @endcode
-
-    @note armeabi, armv7a-neon, arm7a-neon-android8, mips and x86 stand for platform targets:
-        -   armeabi is for ARM v5 and ARM v6 architectures with Android API 8+,
-        -   armv7a-neon is for NEON-optimized ARM v7 with Android API 9+,
-        -   arm7a-neon-android8 is for NEON-optimized ARM v7 with Android API 8,
-        -   mips is for MIPS architecture with Android API 9+,
-        -   x86 is for Intel x86 CPUs with Android API 9+.
-
-    @note
-    If using hardware device for testing/debugging, run the following command to learn its CPU
-    architecture:
-    @code{.sh}
-    adb shell getprop ro.product.cpu.abi
-    @endcode
-    If you're using an AVD emulator, go Window \> AVD Manager to see the list of available devices.
-    Click Edit in the context menu of the selected device. In the window, which then pop-ups, find
-    the CPU field.
-
-    @note
-    You may also see section `Manager Selection` for details.
-
-    When done, you will be able to run OpenCV samples on your device/emulator seamlessly.
-
-   Here is Sample - image-manipulations sample, running on top of stock camera-preview of the
-    emulator.
-
-    ![](images/emulator_canny.png)
-
-What's next
-----------
-
-Now, when you have your instance of OpenCV4Adroid SDK set up and configured, you may want to proceed
-to using OpenCV in your own application. You can learn how to do that in a separate @ref tutorial_dev_with_OCV_on_Android tutorial.
--- a/Show More
+++ b/Show More