Merge branch 4.x

3 years ago · 0fbd58bef9
parent 3d12581798 767857c516
commit 0fbd58bef9
324 changed files with 34970 additions and 9791 deletions
--- a/.github/workflows/PR-4.x-U20.yaml
+++ b/.github/workflows/PR-4.x-U20.yaml
@ -0,0 +1,163 @@
+name: PR:4.x U20
+
+on:
+  pull_request:
+    branches:
+      - 4.x
+
+env:
+  EXTRA_CMAKE_OPTIONS: '-DBUILD_DOCS=ON -DPYTHON_DEFAULT_EXECUTABLE=/usr/bin/python3 -DBUILD_EXAMPLES=ON -DOPENCV_ENABLE_NONFREE=ON -DENABLE_CCACHE=OFF'
+  OPENCV_TEST_DATA_PATH: '/opencv_extra/testdata'
+  OPENCV_DOCKER_WORKDIR: '/__w/opencv/opencv'
+  PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+  SOURCE_BRANCH_NAME: ${{ github.head_ref }}
+  TARGET_BRANCH_NAME: ${{ github.base_ref }}
+  ANT_HOME: '/usr/share/ant'
+  PYTHONPATH: /opencv-build/python_loader:$PYTHONPATH
+
+jobs:
+  BuildAndTest:
+    runs-on: ubuntu-20.04
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: quay.io/asenyaev/opencv-ubuntu:20.04
+    steps:
+    - name: PR info
+      run: |
+        echo "PR Author: ${{ env.PR_AUTHOR }}"
+        echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}"
+        echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}"
+    - name: Clean
+      run: find . -mindepth 1 -delete
+    - name: Fetch opencv
+      uses: actions/checkout@v3
+      with:
+        repository: opencv/opencv
+        ref: ${{ env.TARGET_BRANCH_NAME }}
+        fetch-depth: 0
+    - name: Merge opencv with ${{ env.SOURCE_BRANCH_NAME }} branch
+      run: |
+        cd ${{ env.OPENCV_DOCKER_WORKDIR }}
+        git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }}
+        git config user.email "opencv.ci"
+        git config user.name "opencv.ci"
+        git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}"
+    - name: Clone opencv_extra
+      run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_extra.git /opencv_extra
+    - name: Configure OpenCV
+      run: |
+        cd /opencv-build
+        cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} ${{ env.OPENCV_DOCKER_WORKDIR }}
+    - name: Build OpenCV
+      run: |
+        cd /opencv-build
+        ninja
+    - name: Accuracy:calib3d
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_calib3d
+    - name: Accuracy:core
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_core
+    - name: Accuracy:dnn
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_dnn
+    - name: Accuracy:features2d
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_features2d
+    - name: Accuracy:flann
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_flann
+    - name: Accuracy:gapi
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_gapi
+    - name: Accuracy:highgui
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_highgui
+    - name: Accuracy:imgcodecs
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgcodecs
+    - name: Accuracy:imgproc
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_imgproc
+    - name: Accuracy:ml
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_ml
+    - name: Accuracy:objdetect
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_objdetect
+    - name: Accuracy:photo
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_photo
+    - name: Accuracy:stitching
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_stitching
+    - name: Accuracy:video
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_video
+    - name: Accuracy:videoio
+      run: cd /opencv-build && xvfb-run -a bin/opencv_test_videoio
+    - name: Performance:calib3d
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_calib3d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:core
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_core --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:dnn
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_dnn --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:features2d
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_features2d --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:gapi
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_gapi --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:imgcodecs
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgcodecs --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:imgproc
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_imgproc --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:objdetect
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_objdetect --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:photo
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_photo --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:stitching
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_stitching --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:video
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_video --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Performance:videoio
+      run: cd /opencv-build && xvfb-run -a bin/opencv_perf_videoio --perf_impl=plain --perf_min_samples=1 --perf_force_samples=1 --perf_verify_sanity --skip_unstable=1
+    - name: Python3
+      run: |
+        cd ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/python/test
+        python3 ./test.py --repo ../../../ -v
+    - name: Java
+      run: cd /opencv-build && xvfb-run -a python3 ${{ env.OPENCV_DOCKER_WORKDIR }}/modules/ts/misc/run.py . -a -t java
+    - name: Save Unit Test Results
+      uses: actions/upload-artifact@v3
+      if: always()
+      with:
+        name: junit-html
+        path: /opencv-build/java_test/testResults/junit-noframes.html
+    - name: Pylint
+      run: cd /opencv-build && cmake --build . --config release --target check_pylint -- -j4
+
+  BuildContrib:
+    runs-on: ubuntu-20.04
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: quay.io/asenyaev/opencv-ubuntu:20.04
+    steps:
+    - name: PR info
+      run: |
+        echo "PR Author: ${{ env.PR_AUTHOR }}"
+        echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}"
+        echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}"
+    - name: Clean
+      run: find . -mindepth 1 -delete
+    - name: Fetch opencv
+      uses: actions/checkout@v3
+      with:
+        repository: opencv/opencv
+        ref: ${{ env.TARGET_BRANCH_NAME }}
+        fetch-depth: 0
+    - name: Merge opencv with a test branch
+      run: |
+        cd ${{ env.OPENCV_DOCKER_WORKDIR }}
+        git config --global --add safe.directory ${{ env.OPENCV_DOCKER_WORKDIR }}
+        git config user.email "opencv.ci"
+        git config user.name "opencv.ci"
+        git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}"
+    - name: Clone opencv_contrib
+      run: git clone --single-branch --branch ${{ env.TARGET_BRANCH_NAME }} --depth 1 https://github.com/opencv/opencv_contrib.git /opencv_contrib
+    - name: Configure OpenCV Contrib
+      run: |
+        cd /opencv-contrib-build
+        cmake -G Ninja ${{ env.EXTRA_CMAKE_OPTIONS }} -DOPENCV_EXTRA_MODULES_PATH=/opencv_contrib/modules ${{ env.OPENCV_DOCKER_WORKDIR }}
+    - name: Build OpenCV Contrib
+      run: |
+        cd /opencv-contrib-build
+        ninja
--- a/.github/workflows/timvx_backend_tests.yml
+++ b/.github/workflows/timvx_backend_tests.yml
@ -0,0 +1,104 @@
+name: TIM-VX Backend
+
+on:
+  pull_request:
+    branches: [ 4.x ]
+    types: [ labeled, opened, synchronize, reopened ]
+
+
+jobs:
+  x86-simulator-build-test:
+    runs-on: ubuntu-20.04
+    # Docker image from https://hub.docker.com/r/yuentau/ocv_ubuntu
+    container: docker.io/yuentau/ocv_ubuntu:20.04
+    env:
+      PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+      SOURCE_BRANCH_NAME: ${{ github.head_ref }}
+      TARGET_BRANCH_NAME: ${{ github.base_ref }}
+    steps:
+      - name: info
+        run: |
+          echo "PR Author: ${{ env.PR_AUTHOR }}"
+          echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}"
+          echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}"
+      - name: clean
+        shell: bash
+        run: find . -mindepth 1 -delete
+      - name: fetch opencv
+        uses: actions/checkout@v3
+        with:
+          repository: opencv/opencv
+          ref: ${{ env.TARGET_BRANCH_NAME }}
+          fetch-depth: 0
+          path: opencv
+      - name: merge opencv with test branch
+        shell: bash
+        run: |
+          cd opencv
+          git config user.email "opencv.ci"
+          git config user.name "opencv.ci"
+          git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories
+      - name: configure
+        run: |
+          cmake -B build -DWITH_TIMVX=ON -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON -DBUILD_PERF_TESTS=ON -DBUILD_TESTS=ON -DBUILD_EXAMPLES=OFF -DBUILD_DOCS=OFF -DWITH_OPENCL=OFF opencv
+      - name: build
+        run: cmake --build build --target install -j $(nproc)
+
+  khadas-vim3-tests:
+    if: contains(github.event.pull_request.labels.*.name, 'category:dnn_timvx')
+    concurrency:
+      group: khadas-vim3
+      cancel-in-progress: false
+    runs-on: [self-hosted, Linux, ARM64, khadas-vim3]
+    env:
+      PR_AUTHOR: ${{ github.event.pull_request.user.login }}
+      SOURCE_BRANCH_NAME: ${{ github.head_ref }}
+      TARGET_BRANCH_NAME: ${{ github.base_ref }}
+    steps:
+      - name: info
+        run: |
+          echo "PR Author: ${{ env.PR_AUTHOR }}"
+          echo "Source branch name: ${{ env.SOURCE_BRANCH_NAME }}"
+          echo "Target branch name: ${{ env.TARGET_BRANCH_NAME }}"
+      - name: clean
+        shell: bash
+        run: find . -mindepth 1 -delete
+      - name: fetch opencv
+        uses: actions/checkout@v3
+        with:
+          repository: opencv/opencv
+          ref: ${{ env.TARGET_BRANCH_NAME }}
+          fetch-depth: 0
+          path: opencv
+      - name: merge opencv with test branch
+        shell: bash
+        run: |
+          cd opencv
+          git config user.email "opencv.ci"
+          git config user.name "opencv.ci"
+          git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories
+      - name: fetch opencv_extra
+        uses: actions/checkout@v3
+        with:
+          repository: opencv/opencv_extra
+          path: opencv_extra
+      - name: merge opencv_extra with test branch
+        shell: bash
+        run: |
+          RET=$(git ls-remote --heads "https://github.com/${{ env.PR_AUTHOR }}/opencv_extra" "${{ env.SOURCE_BRANCH_NAME }}")
+          if [[ ! -z "$RET" ]]; then
+            cd opencv_extra
+            git config user.email "opencv.ci"
+            git config user.name "opencv.ci"
+            git pull -v "https://github.com/${{ env.PR_AUTHOR }}/opencv_extra" "${{ env.SOURCE_BRANCH_NAME }}" --allow-unrelated-histories
+          else
+            echo "no merge since ${{ env.PR_AUTHOR }}/opencv_extra does not have branch ${{ env.SOURCE_BRANCH_NAME }}"
+          fi
+      - name: configure
+        run: |
+          cmake -B build -D CMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=./install -DWITH_TIMVX=ON -DWITH_OPENCL=OFF -DWITH_EIGEN=OFF opencv
+      - name: build
+        run: cmake --build build --target opencv_test_dnn -j 4
+      - name: unit tests for int8 layers
+        run: |
+          OPENCV_TEST_DATA_PATH=./opencv_extra/testdata ./build/bin/opencv_test_dnn --gtest_filter="Test_Int8_layers.*/1"
--- a/3rdparty/libtim-vx/tim-vx.cmake
+++ b/3rdparty/libtim-vx/tim-vx.cmake
@ -0,0 +1,73 @@
+set(TIMVX_COMMIT_HASH "1d9c7ab941b3d8d9c4d28d80058402725731e3d6")
+set(OCV_TIMVX_DIR "${OpenCV_BINARY_DIR}/3rdparty/libtim-vx")
+set(OCV_TIMVX_SOURCE_PATH "${OCV_TIMVX_DIR}/TIM-VX-${TIMVX_COMMIT_HASH}")
+
+# Download TIM-VX source code
+if(EXISTS "${OCV_TIMVX_SOURCE_PATH}")
+    message(STATUS "TIM-VX: Use cache of TIM-VX source code at ${OCV_TIMVX_SOURCE_PATH}")
+    set(TIMVX_FOUND ON)
+else()
+    set(OCV_TIMVX_FILENAME "${TIMVX_COMMIT_HASH}.zip")
+    set(OCV_TIMVX_URL "https://github.com/VeriSilicon/TIM-VX/archive/")
+    set(timvx_zip_md5sum 92619cc4498014ac7a09834d5e33ebd5)
+
+    ocv_download(FILENAME ${OCV_TIMVX_FILENAME}
+                 HASH ${timvx_zip_md5sum}
+                 URL "${OCV_TIMVX_URL}"
+                 DESTINATION_DIR "${OCV_TIMVX_DIR}"
+                 ID "TIM-VX"
+                 STATUS res
+                 UNPACK RELATIVE_URL)
+    if(res)
+        set(TIMVX_FOUND ON)
+        message(STATUS "TIM-VX: Source code downloaded at ${OCV_TIMVX_SOURCE_PATH}.") 
+    else()
+        set(TIMVX_FOUND OFF)
+        message(STATUS "TIM-VX: Failed to download source code from github. Turning off TIMVX_FOUND")
+        return()
+    endif()
+endif()
+
+# set VIVANTE SDK especially for x86_64 which comes along with TIM-VX source code
+if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
+    set(VIVANTE_SDK_DIR "${OCV_TIMVX_SOURCE_PATH}/prebuilt-sdk/x86_64_linux")
+    message(STATUS "TIM-VX: Build from source using prebuilt x86_64 VIVANTE SDK.")
+endif()
+
+# Verify if requested VIVANTE SDK libraries are all found
+find_vivante_sdk_libs(missing ${VIVANTE_SDK_DIR})
+if(missing)
+    message(STATUS "TIM-VX: Failed to find ${missing} in ${VIVANTE_SDK_DIR}/lib. Turning off TIMVX_VIV_FOUND")
+    set(TIMVX_VIV_FOUND OFF)
+else()
+    message(STATUS "TIM-VX: dependent VIVANTE SDK libraries are found at ${VIVANTE_SDK_DIR}/lib.")
+    set(TIMVX_VIV_FOUND ON)
+endif()
+
+if(TIMVX_VIV_FOUND)
+    # vars used by TIM-VX CMake scripts
+    set(EXTERNAL_VIV_SDK "${VIVANTE_SDK_DIR}" CACHE INTERNAL "" FORCE)
+    set(VIV_SDK_DRIVER_PREFIX "lib" CACHE INTERNAL "" FORCE)
+endif()
+
+if(TIMVX_FOUND AND TIMVX_VIV_FOUND)
+    set(BUILD_TIMVX ON)
+else()
+    return()
+endif()
+
+if(BUILD_TIMVX)
+    set(HAVE_TIMVX 1)
+
+    ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wstrict-prototypes -Wundef -Wsign-compare -Wmissing-prototypes -Wmissing-declarations -Wstrict-aliasing -Wunused-but-set-variable -Wmaybe-uninitialized -Wshadow -Wsuggest-override -Wswitch)
+    ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-parameter -Wstrict-prototypes -Wundef -Wsign-compare -Wunused-but-set-variable -Wshadow -Wsuggest-override -Wmissing-declarations -Wswitch)
+
+    set(TIMVX_INC_DIR "${OCV_TIMVX_SOURCE_PATH}/include" CACHE INTERNAL "TIM-VX include directory")
+    if(EXISTS "${OCV_TIMVX_SOURCE_PATH}/CMakeLists.txt")
+        add_subdirectory("${OCV_TIMVX_SOURCE_PATH}" "${OCV_TIMVX_DIR}/build")
+    else()
+        message(WARNING "TIM-VX: Missing 'CMakeLists.txt' in the source code: ${OCV_TIMVX_SOURCE_PATH}")
+    endif()
+    ocv_install_target(tim-vx EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
+    set(TIMVX_LIB "tim-vx")
+endif()
--- a/3rdparty/readme.txt
+++ b/3rdparty/readme.txt
@ -37,7 +37,7 @@ libtiff               Tag Image File Format (TIFF) Software
                      WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs.
 ------------------------------------------------------------------------------------
 zlib                  General purpose LZ77 compression library
-                      Copyright (C) 1995-2012 Jean-loup Gailly and Mark Adler.
+                      Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler.
                      See zlib home page http://www.zlib.net
                      for details and links to the source code
 ------------------------------------------------------------------------------------
--- a/3rdparty/zlib/CMakeLists.txt
+++ b/3rdparty/zlib/CMakeLists.txt
@ -83,6 +83,7 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wshorten-64-to-32 -Wattributes -Wstrict-prot
    -Wundef  # _LFS64_LARGEFILE is not defined
    /wd4267  # MSVS 2015 (x64) + zlib 1.2.11
    -Wimplicit-fallthrough
+    /wd4244  # MSVS + zlib 1.2.12: warning C4244: '=': conversion from 'ush' to 'uchf', possible loss of data
 )

 set_target_properties(${ZLIB_LIBRARY} PROPERTIES
--- a/3rdparty/zlib/ChangeLog
+++ b/3rdparty/zlib/ChangeLog
@ -1,6 +1,69 @@

                ChangeLog file for zlib

+Changes in 1.2.12 (27 Mar 2022)
+- Cygwin does not have _wopen(), so do not create gzopen_w() there
+- Permit a deflateParams() parameter change as soon as possible
+- Limit hash table inserts after switch from stored deflate
+- Fix bug when window full in deflate_stored()
+- Fix CLEAR_HASH macro to be usable as a single statement
+- Avoid a conversion error in gzseek when off_t type too small
+- Have Makefile return non-zero error code on test failure
+- Avoid some conversion warnings in gzread.c and gzwrite.c
+- Update use of errno for newer Windows CE versions
+- Small speedup to inflate [psumbera]
+- Return an error if the gzputs string length can't fit in an int
+- Add address checking in clang to -w option of configure
+- Don't compute check value for raw inflate if asked to validate
+- Handle case where inflateSync used when header never processed
+- Avoid the use of ptrdiff_t
+- Avoid an undefined behavior of memcpy() in gzappend()
+- Avoid undefined behaviors of memcpy() in gz*printf()
+- Avoid an undefined behavior of memcpy() in _tr_stored_block()
+- Make the names in functions declarations identical to definitions
+- Remove old assembler code in which bugs have manifested
+- Fix deflateEnd() to not report an error at start of raw deflate
+- Add legal disclaimer to README
+- Emphasize the need to continue decompressing gzip members
+- Correct the initialization requirements for deflateInit2()
+- Fix a bug that can crash deflate on some input when using Z_FIXED
+- Assure that the number of bits for deflatePrime() is valid
+- Use a structure to make globals in enough.c evident
+- Use a macro for the printf format of big_t in enough.c
+- Clean up code style in enough.c, update version
+- Use inline function instead of macro for index in enough.c
+- Clarify that prefix codes are counted in enough.c
+- Show all the codes for the maximum tables size in enough.c
+- Add gznorm.c example, which normalizes gzip files
+- Fix the zran.c example to work on a multiple-member gzip file
+- Add tables for crc32_combine(), to speed it up by a factor of 200
+- Add crc32_combine_gen() and crc32_combine_op() for fast combines
+- Speed up software CRC-32 computation by a factor of 1.5 to 3
+- Use atomic test and set, if available, for dynamic CRC tables
+- Don't bother computing check value after successful inflateSync()
+- Correct comment in crc32.c
+- Add use of the ARMv8 crc32 instructions when requested
+- Use ARM crc32 instructions if the ARM architecture has them
+- Explicitly note that the 32-bit check values are 32 bits
+- Avoid adding empty gzip member after gzflush with Z_FINISH
+- Fix memory leak on error in gzlog.c
+- Fix error in comment on the polynomial representation of a byte
+- Clarify gz* function interfaces, referring to parameter names
+- Change macro name in inflate.c to avoid collision in VxWorks
+- Correct typo in blast.c
+- Improve portability of contrib/minizip
+- Fix indentation in minizip's zip.c
+- Replace black/white with allow/block. (theresa-m)
+- minizip warning fix if MAXU32 already defined. (gvollant)
+- Fix unztell64() in minizip to work past 4GB. (Daniël Hörchner)
+- Clean up minizip to reduce warnings for testing
+- Add fallthrough comments for gcc
+- Eliminate use of ULL constants
+- Separate out address sanitizing from warnings in configure
+- Remove destructive aspects of make distclean
+- Check for cc masquerading as gcc or clang in configure
+- Fix crc32.c to compile local functions only if used
+
 Changes in 1.2.11 (15 Jan 2017)
 - Fix deflate stored bug when pulling last block from window
 - Permit immediate deflateParams changes before any deflate input
@ -511,7 +574,7 @@ Changes in 1.2.3.5 (8 Jan 2010)
 - Don't use _vsnprintf on later versions of MSVC [Lowman]
 - Add CMake build script and input file [Lowman]
 - Update contrib/minizip to 1.1 [Svensson, Vollant]
- Moved nintendods directory from contrib to .
+- Moved nintendods directory from contrib to root
 - Replace gzio.c with a new set of routines with the same functionality
 - Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above
 - Update contrib/minizip to 1.1b
@ -685,7 +748,7 @@ Changes in 1.2.2.4 (11 July 2005)
 - Be more strict on incomplete code sets in inflate_table() and increase
  ENOUGH and MAXD -- this repairs a possible security vulnerability for
  invalid inflate input.  Thanks to Tavis Ormandy and Markus Oberhumer for
-  discovering the vulnerability and providing test cases.
+  discovering the vulnerability and providing test cases
 - Add ia64 support to configure for HP-UX [Smith]
 - Add error return to gzread() for format or i/o error [Levin]
 - Use malloc.h for OS/2 [Necasek]
@ -721,7 +784,7 @@ Changes in 1.2.2.2 (30 December 2004)
 - Add Z_FIXED strategy option to deflateInit2() to force fixed trees
 - Add updated make_vms.com [Coghlan], update README
 - Create a new "examples" directory, move gzappend.c there, add zpipe.c,
-  fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html.
+  fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html
 - Add FAQ entry and comments in deflate.c on uninitialized memory access
 - Add Solaris 9 make options in configure [Gilbert]
 - Allow strerror() usage in gzio.c for STDC
@ -792,7 +855,7 @@ Changes in 1.2.1.1 (9 January 2004)
 - Fix a big fat bug in inftrees.c that prevented decoding valid
  dynamic blocks with only literals and no distance codes --
  Thanks to "Hot Emu" for the bug report and sample file
- Add a note to puff.c on no distance codes case.
+- Add a note to puff.c on no distance codes case

 Changes in 1.2.1 (17 November 2003)
 - Remove a tab in contrib/gzappend/gzappend.c
@ -1036,14 +1099,14 @@ Changes in 1.2.0 (9 March 2003)
 - Add contrib/puff/ simple inflate for deflate format description

 Changes in 1.1.4 (11 March 2002)
- ZFREE was repeated on same allocation on some error conditions.
+- ZFREE was repeated on same allocation on some error conditions
  This creates a security problem described in
  http://www.zlib.org/advisory-2002-03-11.txt
 - Returned incorrect error (Z_MEM_ERROR) on some invalid data
 - Avoid accesses before window for invalid distances with inflate window
-  less than 32K.
+  less than 32K
 - force windowBits > 8 to avoid a bug in the encoder for a window size
-  of 256 bytes. (A complete fix will be available in 1.1.5).
+  of 256 bytes. (A complete fix will be available in 1.1.5)

 Changes in 1.1.3 (9 July 1998)
 - fix "an inflate input buffer bug that shows up on rare but persistent
@ -1117,7 +1180,7 @@ Changes in 1.1.1 (27 Feb 98)
 - remove block truncation heuristic which had very marginal effect for zlib
  (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the
  compression ratio on some files. This also allows inlining _tr_tally for
-  matches in deflate_slow.
+  matches in deflate_slow
 - added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier)

 Changes in 1.1.0 (24 Feb 98)
@ -1162,7 +1225,7 @@ Changes in 1.0.8 (27 Jan 1998)
 - include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong)
 - use constant arrays for the static trees in trees.c instead of computing
  them at run time (thanks to Ken Raeburn for this suggestion). To create
-  trees.h, compile with GEN_TREES_H and run "make test".
+  trees.h, compile with GEN_TREES_H and run "make test"
 - check return code of example in "make test" and display result
 - pass minigzip command line options to file_compress
 - simplifying code of inflateSync to avoid gcc 2.8 bug
@ -1201,12 +1264,12 @@ Changes in 1.0.6 (19 Jan 1998)
 - add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and
  gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code)
 - Fix a deflate bug occurring only with compression level 0 (thanks to
-  Andy Buckler for finding this one).
- In minigzip, pass transparently also the first byte for .Z files.
+  Andy Buckler for finding this one)
+- In minigzip, pass transparently also the first byte for .Z files
 - return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress()
 - check Z_FINISH in inflate (thanks to Marc Schluper)
 - Implement deflateCopy (thanks to Adam Costello)
- make static libraries by default in configure, add --shared option.
+- make static libraries by default in configure, add --shared option
 - move MSDOS or Windows specific files to directory msdos
 - suppress the notion of partial flush to simplify the interface
  (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4)
@ -1218,7 +1281,7 @@ Changes in 1.0.6 (19 Jan 1998)
 - added Makefile.nt (thanks to Stephen Williams)
 - added the unsupported "contrib" directory:
   contrib/asm386/ by Gilles Vollant <info@winimage.com>
-        386 asm code replacing longest_match().
+        386 asm code replacing longest_match()
   contrib/iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
        A C++ I/O streams interface to the zlib gz* functions
   contrib/iostream2/  by Tyge Løvset <Tyge.Lovset@cmr.no>
@ -1226,7 +1289,7 @@ Changes in 1.0.6 (19 Jan 1998)
   contrib/untgz/  by "Pedro A. Aranda Guti\irrez" <paag@tid.es>
        A very simple tar.gz file extractor using zlib
   contrib/visual-basic.txt by Carlos Rios <c_rios@sonda.cl>
-        How to use compress(), uncompress() and the gz* functions from VB.
+        How to use compress(), uncompress() and the gz* functions from VB
 - pass params -f (filtered data), -h (huffman only), -1 to -9 (compression
  level) in minigzip (thanks to Tom Lane)

@ -1235,8 +1298,8 @@ Changes in 1.0.6 (19 Jan 1998)
 - add undocumented function inflateSyncPoint() (hack for Paul Mackerras)
 - add undocumented function zError to convert error code to string
  (for Tim Smithers)
- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code.
- Use default memcpy for Symantec MSDOS compiler.
+- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code
+- Use default memcpy for Symantec MSDOS compiler
 - Add EXPORT keyword for check_func (needed for Windows DLL)
 - add current directory to LD_LIBRARY_PATH for "make test"
 - create also a link for libz.so.1
@ -1249,7 +1312,7 @@ Changes in 1.0.6 (19 Jan 1998)
 - allow compilation with ANSI keywords only enabled for TurboC in large model
 - avoid "versionString"[0] (Borland bug)
 - add NEED_DUMMY_RETURN for Borland
- use variable z_verbose for tracing in debug mode (L. Peter Deutsch).
+- use variable z_verbose for tracing in debug mode (L. Peter Deutsch)
 - allow compilation with CC
 - defined STDC for OS/2 (David Charlap)
 - limit external names to 8 chars for MVS (Thomas Lund)
@ -1259,7 +1322,7 @@ Changes in 1.0.6 (19 Jan 1998)
 - use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau)
 - added makelcc.bat for lcc-win32 (Tom St Denis)
 - in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe)
- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion.
+- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion
 - check for unistd.h in configure (for off_t)
 - remove useless check parameter in inflate_blocks_free
 - avoid useless assignment of s->check to itself in inflate_blocks_new
@ -1280,7 +1343,7 @@ Changes in 1.0.5 (3 Jan 98)
 Changes in 1.0.4 (24 Jul 96)
 - In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF
  bit, so the decompressor could decompress all the correct data but went
-  on to attempt decompressing extra garbage data. This affected minigzip too.
+  on to attempt decompressing extra garbage data. This affected minigzip too
 - zlibVersion and gzerror return const char* (needed for DLL)
 - port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno)
 - use z_error only for DEBUG (avoid problem with DLLs)
@ -1310,7 +1373,7 @@ Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion]
 - fix array overlay in deflate.c which sometimes caused bad compressed data
 - fix inflate bug with empty stored block
 - fix MSDOS medium model which was broken in 0.99
- fix deflateParams() which could generate bad compressed data.
+- fix deflateParams() which could generate bad compressed data
 - Bytef is define'd instead of typedef'ed (work around Borland bug)
 - added an INDEX file
 - new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32),
@ -1331,7 +1394,7 @@ Changes in 0.99 (27 Jan 96)
 - allow preset dictionary shared between compressor and decompressor
 - allow compression level 0 (no compression)
 - add deflateParams in zlib.h: allow dynamic change of compression level
-  and compression strategy.
+  and compression strategy
 - test large buffers and deflateParams in example.c
 - add optional "configure" to build zlib as a shared library
 - suppress Makefile.qnx, use configure instead
@ -1373,30 +1436,30 @@ Changes in 0.99 (27 Jan 96)
 - use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
 - use Z_BINARY instead of BINARY
 - document that gzclose after gzdopen will close the file
- allow "a" as mode in gzopen.
+- allow "a" as mode in gzopen
 - fix error checking in gzread
 - allow skipping .gz extra-field on pipes
 - added reference to Perl interface in README
 - put the crc table in FAR data (I dislike more and more the medium model :)
 - added get_crc_table
- added a dimension to all arrays (Borland C can't count).
+- added a dimension to all arrays (Borland C can't count)
 - workaround Borland C bug in declaration of inflate_codes_new & inflate_fast
 - guard against multiple inclusion of *.h (for precompiled header on Mac)
- Watcom C pretends to be Microsoft C small model even in 32 bit mode.
+- Watcom C pretends to be Microsoft C small model even in 32 bit mode
 - don't use unsized arrays to avoid silly warnings by Visual C++:
     warning C4746: 'inflate_mask' : unsized array treated as  '__far'
-     (what's wrong with far data in far model?).
+     (what's wrong with far data in far model?)
 - define enum out of inflate_blocks_state to allow compilation with C++

 Changes in 0.95 (16 Aug 95)
 - fix MSDOS small and medium model (now easier to adapt to any compiler)
 - inlined send_bits
 - fix the final (:-) bug for deflate with flush (output was correct but
-  not completely flushed in rare occasions).
+  not completely flushed in rare occasions)
 - default window size is same for compression and decompression
-  (it's now sufficient to set MAX_WBITS in zconf.h).
+  (it's now sufficient to set MAX_WBITS in zconf.h)
 - voidp -> voidpf and voidnp -> voidp (for consistency with other
-  typedefs and because voidnp was not near in large model).
+  typedefs and because voidnp was not near in large model)

 Changes in 0.94 (13 Aug 95)
 - support MSDOS medium model
@ -1405,12 +1468,12 @@ Changes in 0.94 (13 Aug 95)
 - added support for VMS
 - allow a compression level in gzopen()
 - gzflush now calls fflush
- For deflate with flush, flush even if no more input is provided.
+- For deflate with flush, flush even if no more input is provided
 - rename libgz.a as libz.a
 - avoid complex expression in infcodes.c triggering Turbo C bug
 - work around a problem with gcc on Alpha (in INSERT_STRING)
 - don't use inline functions (problem with some gcc versions)
- allow renaming of Byte, uInt, etc... with #define.
+- allow renaming of Byte, uInt, etc... with #define
 - avoid warning about (unused) pointer before start of array in deflate.c
 - avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c
 - avoid reserved word 'new' in trees.c
@ -1429,7 +1492,7 @@ Changes in 0.92 (3 May 95)
 - no memcpy on Pyramid
 - suppressed inftest.c
 - optimized fill_window, put longest_match inline for gcc
- optimized inflate on stored blocks.
+- optimized inflate on stored blocks
 - untabify all sources to simplify patches

 Changes in 0.91 (2 May 95)
@ -1447,7 +1510,7 @@ Changes in 0.9 (1 May 95)
 - let again gzread copy uncompressed data unchanged (was working in 0.71)
 - deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented
 - added a test of inflateSync in example.c
- moved MAX_WBITS to zconf.h because users might want to change that.
+- moved MAX_WBITS to zconf.h because users might want to change that
 - document explicitly that zalloc(64K) on MSDOS must return a normalized
  pointer (zero offset)
 - added Makefiles for Microsoft C, Turbo C, Borland C++
@ -1456,7 +1519,7 @@ Changes in 0.9 (1 May 95)
 Changes in 0.8 (29 April 95)
 - added fast inflate (inffast.c)
 - deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this
-  is incompatible with previous versions of zlib which returned Z_OK.
+  is incompatible with previous versions of zlib which returned Z_OK
 - work around a TurboC compiler bug (bad code for b << 0, see infutil.h)
  (actually that was not a compiler bug, see 0.81 above)
 - gzread no longer reads one extra byte in certain cases
@ -1466,50 +1529,50 @@ Changes in 0.8 (29 April 95)

 Changes in 0.71 (14 April 95)
 - Fixed more MSDOS compilation problems :( There is still a bug with
-  TurboC large model.
+  TurboC large model

 Changes in 0.7 (14 April 95)
- Added full inflate support.
+- Added full inflate support
 - Simplified the crc32() interface. The pre- and post-conditioning
  (one's complement) is now done inside crc32(). WARNING: this is
-  incompatible with previous versions; see zlib.h for the new usage.
+  incompatible with previous versions; see zlib.h for the new usage

 Changes in 0.61 (12 April 95)
- workaround for a bug in TurboC. example and minigzip now work on MSDOS.
+- workaround for a bug in TurboC. example and minigzip now work on MSDOS

 Changes in 0.6 (11 April 95)
 - added minigzip.c
 - added gzdopen to reopen a file descriptor as gzFile
- added transparent reading of non-gziped files in gzread.
+- added transparent reading of non-gziped files in gzread
 - fixed bug in gzread (don't read crc as data)
- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose).
+- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose)
 - don't allocate big arrays in the stack (for MSDOS)
 - fix some MSDOS compilation problems

 Changes in 0.5:
 - do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but
-  not yet Z_FULL_FLUSH.
+  not yet Z_FULL_FLUSH
 - support decompression but only in a single step (forced Z_FINISH)
- added opaque object for zalloc and zfree.
+- added opaque object for zalloc and zfree
 - added deflateReset and inflateReset
- added a variable zlib_version for consistency checking.
- renamed the 'filter' parameter of deflateInit2 as 'strategy'.
-  Added Z_FILTERED and Z_HUFFMAN_ONLY constants.
+- added a variable zlib_version for consistency checking
+- renamed the 'filter' parameter of deflateInit2 as 'strategy'
+  Added Z_FILTERED and Z_HUFFMAN_ONLY constants

 Changes in 0.4:
- avoid "zip" everywhere, use zlib instead of ziplib.
+- avoid "zip" everywhere, use zlib instead of ziplib
 - suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush
-  if compression method == 8.
+  if compression method == 8
 - added adler32 and crc32
 - renamed deflateOptions as deflateInit2, call one or the other but not both
- added the method parameter for deflateInit2.
+- added the method parameter for deflateInit2
 - added inflateInit2
 - simplied considerably deflateInit and inflateInit by not supporting
  user-provided history buffer. This is supported only in deflateInit2
-  and inflateInit2.
+  and inflateInit2

 Changes in 0.3:
 - prefix all macro names with Z_
- use Z_FINISH instead of deflateEnd to finish compression.
+- use Z_FINISH instead of deflateEnd to finish compression
 - added Z_HUFFMAN_ONLY
 - added gzerror()
--- a/3rdparty/zlib/README
+++ b/3rdparty/zlib/README
@ -1,6 +1,6 @@
 ZLIB DATA COMPRESSION LIBRARY

-zlib 1.2.11 is a general purpose data compression library.  All the code is
+zlib 1.2.12 is a general purpose data compression library.  All the code is
 thread safe.  The data format used by the zlib library is described by RFCs
 (Request for Comments) 1950 to 1952 in the files
 http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
@ -31,7 +31,7 @@ Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
 issue of Dr.  Dobb's Journal; a copy of the article is available at
 http://marknelson.us/1997/01/01/zlib-engine/ .

-The changes made in version 1.2.11 are documented in the file ChangeLog.
+The changes made in version 1.2.12 are documented in the file ChangeLog.

 Unsupported third party contributions are provided in directory contrib/ .

@ -84,7 +84,7 @@ Acknowledgments:

 Copyright notice:

- (C) 1995-2017 Jean-loup Gailly and Mark Adler
+ (C) 1995-2022 Jean-loup Gailly and Mark Adler

  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the authors be held liable for any damages
@ -108,7 +108,10 @@ Copyright notice:
 If you use the zlib library in a product, we would appreciate *not* receiving
 lengthy legal documents to sign.  The sources are provided for free but without
 warranty of any kind.  The library has been entirely written by Jean-loup
-Gailly and Mark Adler; it does not include third-party code.
+Gailly and Mark Adler; it does not include third-party code.  We make all
+contributions to and distributions of this project solely in our personal
+capacity, and are not conveying any rights to any intellectual property of
+any third parties.

 If you redistribute modified sources, we would appreciate that you include in
 the file ChangeLog history information documenting your changes.  Please read
--- a/3rdparty/zlib/crc32.c
+++ b/3rdparty/zlib/crc32.c
--- a/3rdparty/zlib/crc32.h
+++ b/3rdparty/zlib/crc32.h
--- a/3rdparty/zlib/deflate.c
+++ b/3rdparty/zlib/deflate.c
@ -1,5 +1,5 @@
 /* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+ * Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -52,7 +52,7 @@
 #include "deflate.h"

 const char deflate_copyright[] =
-   " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
+   " deflate 1.2.12 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
 /*
  If you use the zlib library in a product, an acknowledgment is welcome
  in the documentation of your product. If for some reason you cannot
@ -190,8 +190,11 @@ local const config configuration_table[10] = {
 * prev[] will be initialized on the fly.
 */
 #define CLEAR_HASH(s) \
-    s->head[s->hash_size-1] = NIL; \
-    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+    do { \
+        s->head[s->hash_size-1] = NIL; \
+        zmemzero((Bytef *)s->head, \
+                 (unsigned)(s->hash_size-1)*sizeof(*s->head)); \
+    } while (0)

 /* ===========================================================================
 * Slide the hash table when sliding the window down (could be avoided with 32
@ -252,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
    int wrap = 1;
    static const char my_version[] = ZLIB_VERSION;

-    ushf *overlay;
-    /* We overlay pending_buf and d_buf+l_buf. This works since the average
-     * output size for (length,distance) codes is <= 24 bits.
-     */
-
    if (version == Z_NULL || version[0] != my_version[0] ||
        stream_size != sizeof(z_stream)) {
        return Z_VERSION_ERROR;
@ -326,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,

    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */

-    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
-    s->pending_buf = (uchf *) overlay;
-    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+    /* We overlay pending_buf and sym_buf. This works since the average size
+     * for length/distance pairs over any compressed block is assured to be 31
+     * bits or less.
+     *
+     * Analysis: The longest fixed codes are a length code of 8 bits plus 5
+     * extra bits, for lengths 131 to 257. The longest fixed distance codes are
+     * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
+     * possible fixed-codes length/distance pair is then 31 bits total.
+     *
+     * sym_buf starts one-fourth of the way into pending_buf. So there are
+     * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
+     * in sym_buf is three bytes -- two for the distance and one for the
+     * literal/length. As each symbol is consumed, the pointer to the next
+     * sym_buf value to read moves forward three bytes. From that symbol, up to
+     * 31 bits are written to pending_buf. The closest the written pending_buf
+     * bits gets to the next sym_buf symbol to read is just before the last
+     * code is written. At that time, 31*(n-2) bits have been written, just
+     * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
+     * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
+     * symbols are written.) The closest the writing gets to what is unread is
+     * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
+     * can range from 128 to 32768.
+     *
+     * Therefore, at a minimum, there are 142 bits of space between what is
+     * written and what is read in the overlain buffers, so the symbols cannot
+     * be overwritten by the compressed data. That space is actually 139 bits,
+     * due to the three-bit fixed-code block header.
+     *
+     * That covers the case where either Z_FIXED is specified, forcing fixed
+     * codes, or when the use of fixed codes is chosen, because that choice
+     * results in a smaller compressed block than dynamic codes. That latter
+     * condition then assures that the above analysis also covers all dynamic
+     * blocks. A dynamic-code block will only be chosen to be emitted if it has
+     * fewer bits than a fixed-code block would for the same set of symbols.
+     * Therefore its average symbol length is assured to be less than 31. So
+     * the compressed data for a dynamic block also cannot overwrite the
+     * symbols from which it is being constructed.
+     */
+
+    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
+    s->pending_buf_size = (ulg)s->lit_bufsize * 4;

    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
        s->pending_buf == Z_NULL) {
@ -337,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
        deflateEnd (strm);
        return Z_MEM_ERROR;
    }
-    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
-    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+    s->sym_buf = s->pending_buf + s->lit_bufsize;
+    s->sym_end = (s->lit_bufsize - 1) * 3;
+    /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */

    s->level = level;
    s->strategy = strategy;
@ -488,13 +528,13 @@ int ZEXPORT deflateResetKeep (strm)
 #ifdef GZIP
        s->wrap == 2 ? GZIP_STATE :
 #endif
-        s->wrap ? INIT_STATE : BUSY_STATE;
+        INIT_STATE;
    strm->adler =
 #ifdef GZIP
        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
 #endif
        adler32(0L, Z_NULL, 0);
-    s->last_flush = Z_NO_FLUSH;
+    s->last_flush = -2;

    _tr_init(s);

@ -549,7 +589,8 @@ int ZEXPORT deflatePrime (strm, bits, value)

    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
    s = strm->state;
-    if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
+    if (bits < 0 || bits > 16 ||
+        s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
        return Z_BUF_ERROR;
    do {
        put = Buf_size - s->bi_valid;
@ -587,12 +628,12 @@ int ZEXPORT deflateParams(strm, level, strategy)
    func = configuration_table[s->level].func;

    if ((strategy != s->strategy || func != configuration_table[level].func) &&
-        s->high_water) {
+        s->last_flush != -2) {
        /* Flush the last buffer: */
        int err = deflate(strm, Z_BLOCK);
        if (err == Z_STREAM_ERROR)
            return err;
-        if (strm->avail_out == 0)
+        if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead)
            return Z_BUF_ERROR;
    }
    if (s->level != level) {
@ -811,6 +852,8 @@ int ZEXPORT deflate (strm, flush)
    }

    /* Write the header */
+    if (s->status == INIT_STATE && s->wrap == 0)
+        s->status = BUSY_STATE;
    if (s->status == INIT_STATE) {
        /* zlib header */
        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
@ -1108,7 +1151,6 @@ int ZEXPORT deflateCopy (dest, source)
 #else
    deflate_state *ds;
    deflate_state *ss;
-    ushf *overlay;


    if (deflateStateCheck(source) || dest == Z_NULL) {
@ -1128,8 +1170,7 @@ int ZEXPORT deflateCopy (dest, source)
    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
-    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
-    ds->pending_buf = (uchf *) overlay;
+    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);

    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
        ds->pending_buf == Z_NULL) {
@ -1143,8 +1184,7 @@ int ZEXPORT deflateCopy (dest, source)
    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);

    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+    ds->sym_buf = ds->pending_buf + ds->lit_bufsize;

    ds->l_desc.dyn_tree = ds->dyn_ltree;
    ds->d_desc.dyn_tree = ds->dyn_dtree;
@ -1513,6 +1553,8 @@ local void fill_window(s)
            s->match_start -= wsize;
            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
            s->block_start -= (long) wsize;
+            if (s->insert > s->strstart)
+                s->insert = s->strstart;
            slide_hash(s);
            more += wsize;
        }
@ -1742,6 +1784,7 @@ local block_state deflate_stored(s, flush)
            s->matches = 2;         /* clear hash */
            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
            s->strstart = s->w_size;
+            s->insert = s->strstart;
        }
        else {
            if (s->window_size - s->strstart <= used) {
@ -1750,12 +1793,14 @@ local block_state deflate_stored(s, flush)
                zmemcpy(s->window, s->window + s->w_size, s->strstart);
                if (s->matches < 2)
                    s->matches++;   /* add a pending slide_hash() */
+                if (s->insert > s->strstart)
+                    s->insert = s->strstart;
            }
            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
            s->strstart += used;
+            s->insert += MIN(used, s->w_size - s->insert);
        }
        s->block_start = s->strstart;
-        s->insert += MIN(used, s->w_size - s->insert);
    }
    if (s->high_water < s->strstart)
        s->high_water = s->strstart;
@ -1770,7 +1815,7 @@ local block_state deflate_stored(s, flush)
        return block_done;

    /* Fill the window with any remaining input. */
-    have = s->window_size - s->strstart - 1;
+    have = s->window_size - s->strstart;
    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
        /* Slide the window down. */
        s->block_start -= s->w_size;
@ -1779,12 +1824,15 @@ local block_state deflate_stored(s, flush)
        if (s->matches < 2)
            s->matches++;           /* add a pending slide_hash() */
        have += s->w_size;          /* more space now */
+        if (s->insert > s->strstart)
+            s->insert = s->strstart;
    }
    if (have > s->strm->avail_in)
        have = s->strm->avail_in;
    if (have) {
        read_buf(s->strm, s->window + s->strstart, have);
        s->strstart += have;
+        s->insert += MIN(have, s->w_size - s->insert);
    }
    if (s->high_water < s->strstart)
        s->high_water = s->strstart;
@ -1912,7 +1960,7 @@ local block_state deflate_fast(s, flush)
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->last_lit)
+    if (s->sym_next)
        FLUSH_BLOCK(s, 0);
    return block_done;
 }
@ -2043,7 +2091,7 @@ local block_state deflate_slow(s, flush)
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->last_lit)
+    if (s->sym_next)
        FLUSH_BLOCK(s, 0);
    return block_done;
 }
@ -2118,7 +2166,7 @@ local block_state deflate_rle(s, flush)
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->last_lit)
+    if (s->sym_next)
        FLUSH_BLOCK(s, 0);
    return block_done;
 }
@ -2157,7 +2205,7 @@ local block_state deflate_huff(s, flush)
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->last_lit)
+    if (s->sym_next)
        FLUSH_BLOCK(s, 0);
    return block_done;
 }
--- a/3rdparty/zlib/deflate.h
+++ b/3rdparty/zlib/deflate.h
@ -1,5 +1,5 @@
 /* deflate.h -- internal compression state
- * Copyright (C) 1995-2016 Jean-loup Gailly
+ * Copyright (C) 1995-2018 Jean-loup Gailly
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -217,7 +217,7 @@ typedef struct internal_state {
    /* Depth of each subtree used as tie breaker for trees of equal frequency
     */

-    uchf *l_buf;          /* buffer for literals or lengths */
+    uchf *sym_buf;        /* buffer for distances and literals/lengths */

    uInt  lit_bufsize;
    /* Size of match buffer for literals/lengths.  There are 4 reasons for
@ -239,13 +239,8 @@ typedef struct internal_state {
     *   - I can't count above 4
     */

-    uInt last_lit;      /* running index in l_buf */
-
-    ushf *d_buf;
-    /* Buffer for distances. To simplify the code, d_buf and l_buf have
-     * the same number of elements. To use different lengths, an extra flag
-     * array would be necessary.
-     */
+    uInt sym_next;      /* running index in sym_buf */
+    uInt sym_end;       /* symbol table full when sym_next reaches this */

    ulg opt_len;        /* bit length of current block with optimal trees */
    ulg static_len;     /* bit length of current block with static trees */
@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,

 # define _tr_tally_lit(s, c, flush) \
  { uch cc = (c); \
-    s->d_buf[s->last_lit] = 0; \
-    s->l_buf[s->last_lit++] = cc; \
+    s->sym_buf[s->sym_next++] = 0; \
+    s->sym_buf[s->sym_next++] = 0; \
+    s->sym_buf[s->sym_next++] = cc; \
    s->dyn_ltree[cc].Freq++; \
-    flush = (s->last_lit == s->lit_bufsize-1); \
+    flush = (s->sym_next == s->sym_end); \
   }
 # define _tr_tally_dist(s, distance, length, flush) \
  { uch len = (uch)(length); \
    ush dist = (ush)(distance); \
-    s->d_buf[s->last_lit] = dist; \
-    s->l_buf[s->last_lit++] = len; \
+    s->sym_buf[s->sym_next++] = dist; \
+    s->sym_buf[s->sym_next++] = dist >> 8; \
+    s->sym_buf[s->sym_next++] = len; \
    dist--; \
    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
    s->dyn_dtree[d_code(dist)].Freq++; \
-    flush = (s->last_lit == s->lit_bufsize-1); \
+    flush = (s->sym_next == s->sym_end); \
  }
 #else
 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
--- a/3rdparty/zlib/gzguts.h
+++ b/3rdparty/zlib/gzguts.h
@ -1,5 +1,5 @@
 /* gzguts.h -- zlib internal header definitions for gz* operations
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
+ * Copyright (C) 2004-2019 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -39,7 +39,7 @@
 #  include <io.h>
 #endif

-#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_WIN32)
 #  define WIDECHAR
 #endif

@ -190,6 +190,7 @@ typedef struct {
        /* just for writing */
    int level;              /* compression level */
    int strategy;           /* compression strategy */
+    int reset;              /* true if a reset is pending after a Z_FINISH */
        /* seek request */
    z_off64_t skip;         /* amount to skip (already rewound if backwards) */
    int seek;               /* true if seek request pending */
--- a/3rdparty/zlib/gzlib.c
+++ b/3rdparty/zlib/gzlib.c
@ -1,11 +1,11 @@
 /* gzlib.c -- zlib functions common to reading and writing gzip files
- * Copyright (C) 2004-2017 Mark Adler
+ * Copyright (C) 2004-2019 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

 #include "gzguts.h"

-#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__)
+#if defined(_WIN32) && !defined(__BORLANDC__)
 #  define LSEEK _lseeki64
 #else
 #if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
@ -81,6 +81,8 @@ local void gz_reset(state)
        state->past = 0;            /* have not read past end yet */
        state->how = LOOK;          /* look for gzip header */
    }
+    else                            /* for writing ... */
+        state->reset = 0;           /* no deflateReset pending */
    state->seek = 0;                /* no seek request pending */
    gz_error(state, Z_OK, NULL);    /* clear error */
    state->x.pos = 0;               /* no uncompressed data yet */
@ -397,7 +399,7 @@ z_off64_t ZEXPORT gzseek64(file, offset, whence)
    /* if within raw area while reading, just go there */
    if (state->mode == GZ_READ && state->how == COPY &&
            state->x.pos + offset >= 0) {
-        ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR);
+        ret = LSEEK(state->fd, offset - (z_off64_t)state->x.have, SEEK_CUR);
        if (ret == -1)
            return -1;
        state->x.have = 0;
--- a/3rdparty/zlib/gzread.c
+++ b/3rdparty/zlib/gzread.c
@ -1,5 +1,5 @@
 /* gzread.c -- zlib functions for reading gzip files
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
+ * Copyright (C) 2004-2017 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -314,9 +314,9 @@ local z_size_t gz_read(state, buf, len)
    got = 0;
    do {
        /* set n to the maximum amount of len that fits in an unsigned int */
-        n = -1;
+        n = (unsigned)-1;
        if (n > len)
-            n = (int)len;
+            n = (unsigned)len;

        /* first just try copying data from the output buffer */
        if (state->x.have) {
@ -397,7 +397,7 @@ int ZEXPORT gzread(file, buf, len)
    }

    /* read len or fewer bytes to buf */
-    len = (int)gz_read(state, buf, len);
+    len = (unsigned)gz_read(state, buf, len);

    /* check for an error */
    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
@ -447,7 +447,6 @@ z_size_t ZEXPORT gzfread(buf, size, nitems, file)
 int ZEXPORT gzgetc(file)
    gzFile file;
 {
-    int ret;
    unsigned char buf[1];
    gz_statep state;

@ -469,8 +468,7 @@ int ZEXPORT gzgetc(file)
    }

    /* nothing there -- try gz_read() */
-    ret = (int)gz_read(state, buf, 1);
-    return ret < 1 ? -1 : buf[0];
+    return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
 }

 int ZEXPORT gzgetc_(file)
--- a/3rdparty/zlib/gzwrite.c
+++ b/3rdparty/zlib/gzwrite.c
@ -1,5 +1,5 @@
 /* gzwrite.c -- zlib functions for writing gzip files
- * Copyright (C) 2004-2017 Mark Adler
+ * Copyright (C) 2004-2019 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -97,6 +97,15 @@ local int gz_comp(state, flush)
        return 0;
    }

+    /* check for a pending reset */
+    if (state->reset) {
+        /* don't start a new gzip member unless there is data to write */
+        if (strm->avail_in == 0)
+            return 0;
+        deflateReset(strm);
+        state->reset = 0;
+    }
+
    /* run deflate() on provided input until it produces no more output */
    ret = Z_OK;
    do {
@ -134,7 +143,7 @@ local int gz_comp(state, flush)

    /* if that completed a deflate stream, allow another to start */
    if (flush == Z_FINISH)
-        deflateReset(strm);
+        state->reset = 1;

    /* all done, no errors */
    return 0;
@ -209,7 +218,7 @@ local z_size_t gz_write(state, buf, len)
                              state->in);
            copy = state->size - have;
            if (copy > len)
-                copy = (int)len;
+                copy = (unsigned)len;
            memcpy(state->in + have, buf, copy);
            state->strm.avail_in += copy;
            state->x.pos += copy;
@ -229,7 +238,7 @@ local z_size_t gz_write(state, buf, len)
        do {
            unsigned n = (unsigned)-1;
            if (n > len)
-                n = (int)len;
+                n = (unsigned)len;
            state->strm.avail_in = n;
            state->x.pos += n;
            if (gz_comp(state, Z_NO_FLUSH) == -1)
@ -349,12 +358,11 @@ int ZEXPORT gzputc(file, c)
 }

 /* -- see zlib.h -- */
-int ZEXPORT gzputs(file, str)
+int ZEXPORT gzputs(file, s)
    gzFile file;
-    const char *str;
+    const char *s;
 {
-    int ret;
-    z_size_t len;
+    z_size_t len, put;
    gz_statep state;

    /* get internal structure */
@ -367,9 +375,13 @@ int ZEXPORT gzputs(file, str)
        return -1;

    /* write string */
-    len = strlen(str);
-    ret = (int)gz_write(state, str, len);
-    return ret == 0 && len != 0 ? -1 : ret;
+    len = strlen(s);
+    if ((int)len < 0 || (unsigned)len != len) {
+        gz_error(state, Z_STREAM_ERROR, "string length does not fit in int");
+        return -1;
+    }
+    put = gz_write(state, s, len);
+    return put < len ? -1 : (int)len;
 }

 #if defined(STDC) || defined(Z_HAVE_STDARG_H)
@ -441,7 +453,7 @@ int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
        strm->avail_in = state->size;
        if (gz_comp(state, Z_NO_FLUSH) == -1)
            return state->err;
-        memcpy(state->in, state->in + state->size, left);
+        memmove(state->in, state->in + state->size, left);
        strm->next_in = state->in;
        strm->avail_in = left;
    }
@ -540,7 +552,7 @@ int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
        strm->avail_in = state->size;
        if (gz_comp(state, Z_NO_FLUSH) == -1)
            return state->err;
-        memcpy(state->in, state->in + state->size, left);
+        memmove(state->in, state->in + state->size, left);
        strm->next_in = state->in;
        strm->avail_in = left;
    }
--- a/3rdparty/zlib/infback.c
+++ b/3rdparty/zlib/infback.c
@ -1,5 +1,5 @@
 /* infback.c -- inflate using a call-back interface
- * Copyright (C) 1995-2016 Mark Adler
+ * Copyright (C) 1995-2022 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -477,6 +477,7 @@ void FAR *out_desc;
            }
            Tracev((stderr, "inflate:       codes ok\n"));
            state->mode = LEN;
+                /* fallthrough */

        case LEN:
            /* use inflate_fast() if we have enough input and output */
--- a/3rdparty/zlib/inffast.c
+++ b/3rdparty/zlib/inffast.c
@ -70,7 +70,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
    code const FAR *dcode;      /* local strm->distcode */
    unsigned lmask;             /* mask for first level of length codes */
    unsigned dmask;             /* mask for first level of distance codes */
-    code here;                  /* retrieved table entry */
+    code const *here;           /* retrieved table entry */
    unsigned op;                /* code bits, operation, extra bits, or */
                                /*  window position, window bytes to copy */
    unsigned len;               /* match length, unused bytes */
@ -107,20 +107,20 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
            hold += (unsigned long)(*in++) << bits;
            bits += 8;
        }
-        here = lcode[hold & lmask];
+        here = lcode + (hold & lmask);
      dolen:
-        op = (unsigned)(here.bits);
+        op = (unsigned)(here->bits);
        hold >>= op;
        bits -= op;
-        op = (unsigned)(here.op);
+        op = (unsigned)(here->op);
        if (op == 0) {                          /* literal */
-            Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+            Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
                    "inflate:         literal '%c'\n" :
-                    "inflate:         literal 0x%02x\n", here.val));
-            *out++ = (unsigned char)(here.val);
+                    "inflate:         literal 0x%02x\n", here->val));
+            *out++ = (unsigned char)(here->val);
        }
        else if (op & 16) {                     /* length base */
-            len = (unsigned)(here.val);
+            len = (unsigned)(here->val);
            op &= 15;                           /* number of extra bits */
            if (op) {
                if (bits < op) {
@ -138,14 +138,14 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                hold += (unsigned long)(*in++) << bits;
                bits += 8;
            }
-            here = dcode[hold & dmask];
+            here = dcode + (hold & dmask);
          dodist:
-            op = (unsigned)(here.bits);
+            op = (unsigned)(here->bits);
            hold >>= op;
            bits -= op;
-            op = (unsigned)(here.op);
+            op = (unsigned)(here->op);
            if (op & 16) {                      /* distance base */
-                dist = (unsigned)(here.val);
+                dist = (unsigned)(here->val);
                op &= 15;                       /* number of extra bits */
                if (bits < op) {
                    hold += (unsigned long)(*in++) << bits;
@ -264,7 +264,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                }
            }
            else if ((op & 64) == 0) {          /* 2nd level distance code */
-                here = dcode[here.val + (hold & ((1U << op) - 1))];
+                here = dcode + here->val + (hold & ((1U << op) - 1));
                goto dodist;
            }
            else {
@ -274,7 +274,7 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
            }
        }
        else if ((op & 64) == 0) {              /* 2nd level length code */
-            here = lcode[here.val + (hold & ((1U << op) - 1))];
+            here = lcode + here->val + (hold & ((1U << op) - 1));
            goto dolen;
        }
        else if (op & 32) {                     /* end-of-block */
--- a/3rdparty/zlib/inflate.c
+++ b/3rdparty/zlib/inflate.c
@ -1,5 +1,5 @@
 /* inflate.c -- zlib decompression
- * Copyright (C) 1995-2016 Mark Adler
+ * Copyright (C) 1995-2022 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -130,6 +130,7 @@ z_streamp strm;
    state->mode = HEAD;
    state->last = 0;
    state->havedict = 0;
+    state->flags = -1;
    state->dmax = 32768U;
    state->head = Z_NULL;
    state->hold = 0;
@ -448,10 +449,10 @@ unsigned copy;

 /* check function to use adler32() for zlib or crc32() for gzip */
 #ifdef GUNZIP
-#  define UPDATE(check, buf, len) \
+#  define UPDATE_CHECK(check, buf, len) \
    (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
 #else
-#  define UPDATE(check, buf, len) adler32(check, buf, len)
+#  define UPDATE_CHECK(check, buf, len) adler32(check, buf, len)
 #endif

 /* check macros for header crc */
@ -671,7 +672,6 @@ int flush;
                state->mode = FLAGS;
                break;
            }
-            state->flags = 0;           /* expect zlib header */
            if (state->head != Z_NULL)
                state->head->done = -1;
            if (!(state->wrap & 1) ||   /* check if zlib header allowed */
@ -698,6 +698,7 @@ int flush;
                break;
            }
            state->dmax = 1U << len;
+            state->flags = 0;               /* indicate zlib header */
            Tracev((stderr, "inflate:   zlib header ok\n"));
            strm->adler = state->check = adler32(0L, Z_NULL, 0);
            state->mode = hold & 0x200 ? DICTID : TYPE;
@ -723,6 +724,7 @@ int flush;
                CRC2(state->check, hold);
            INITBITS();
            state->mode = TIME;
+                /* fallthrough */
        case TIME:
            NEEDBITS(32);
            if (state->head != Z_NULL)
@ -731,6 +733,7 @@ int flush;
                CRC4(state->check, hold);
            INITBITS();
            state->mode = OS;
+                /* fallthrough */
        case OS:
            NEEDBITS(16);
            if (state->head != Z_NULL) {
@ -741,6 +744,7 @@ int flush;
                CRC2(state->check, hold);
            INITBITS();
            state->mode = EXLEN;
+                /* fallthrough */
        case EXLEN:
            if (state->flags & 0x0400) {
                NEEDBITS(16);
@ -754,6 +758,7 @@ int flush;
            else if (state->head != Z_NULL)
                state->head->extra = Z_NULL;
            state->mode = EXTRA;
+                /* fallthrough */
        case EXTRA:
            if (state->flags & 0x0400) {
                copy = state->length;
@ -776,6 +781,7 @@ int flush;
            }
            state->length = 0;
            state->mode = NAME;
+                /* fallthrough */
        case NAME:
            if (state->flags & 0x0800) {
                if (have == 0) goto inf_leave;
@ -797,6 +803,7 @@ int flush;
                state->head->name = Z_NULL;
            state->length = 0;
            state->mode = COMMENT;
+                /* fallthrough */
        case COMMENT:
            if (state->flags & 0x1000) {
                if (have == 0) goto inf_leave;
@ -817,6 +824,7 @@ int flush;
            else if (state->head != Z_NULL)
                state->head->comment = Z_NULL;
            state->mode = HCRC;
+                /* fallthrough */
        case HCRC:
            if (state->flags & 0x0200) {
                NEEDBITS(16);
@ -840,6 +848,7 @@ int flush;
            strm->adler = state->check = ZSWAP32(hold);
            INITBITS();
            state->mode = DICT;
+                /* fallthrough */
        case DICT:
            if (state->havedict == 0) {
                RESTORE();
@ -847,8 +856,10 @@ int flush;
            }
            strm->adler = state->check = adler32(0L, Z_NULL, 0);
            state->mode = TYPE;
+                /* fallthrough */
        case TYPE:
            if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
        case TYPEDO:
            if (state->last) {
                BYTEBITS();
@ -899,8 +910,10 @@ int flush;
            INITBITS();
            state->mode = COPY_;
            if (flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
        case COPY_:
            state->mode = COPY;
+                /* fallthrough */
        case COPY:
            copy = state->length;
            if (copy) {
@ -936,6 +949,7 @@ int flush;
            Tracev((stderr, "inflate:       table sizes ok\n"));
            state->have = 0;
            state->mode = LENLENS;
+                /* fallthrough */
        case LENLENS:
            while (state->have < state->ncode) {
                NEEDBITS(3);
@ -957,6 +971,7 @@ int flush;
            Tracev((stderr, "inflate:       code lengths ok\n"));
            state->have = 0;
            state->mode = CODELENS;
+                /* fallthrough */
        case CODELENS:
            while (state->have < state->nlen + state->ndist) {
                for (;;) {
@ -1040,8 +1055,10 @@ int flush;
            Tracev((stderr, "inflate:       codes ok\n"));
            state->mode = LEN_;
            if (flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
        case LEN_:
            state->mode = LEN;
+                /* fallthrough */
        case LEN:
            if (have >= 6 && left >= 258) {
                RESTORE();
@ -1091,6 +1108,7 @@ int flush;
            }
            state->extra = (unsigned)(here.op) & 15;
            state->mode = LENEXT;
+                /* fallthrough */
        case LENEXT:
            if (state->extra) {
                NEEDBITS(state->extra);
@ -1101,6 +1119,7 @@ int flush;
            Tracevv((stderr, "inflate:         length %u\n", state->length));
            state->was = state->length;
            state->mode = DIST;
+                /* fallthrough */
        case DIST:
            for (;;) {
                here = state->distcode[BITS(state->distbits)];
@ -1128,6 +1147,7 @@ int flush;
            state->offset = (unsigned)here.val;
            state->extra = (unsigned)(here.op) & 15;
            state->mode = DISTEXT;
+                /* fallthrough */
        case DISTEXT:
            if (state->extra) {
                NEEDBITS(state->extra);
@ -1144,6 +1164,7 @@ int flush;
 #endif
            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
            state->mode = MATCH;
+                /* fallthrough */
        case MATCH:
            if (left == 0) goto inf_leave;
            copy = out - left;
@ -1203,7 +1224,7 @@ int flush;
                state->total += out;
                if ((state->wrap & 4) && out)
                    strm->adler = state->check =
-                        UPDATE(state->check, put - out, out);
+                        UPDATE_CHECK(state->check, put - out, out);
                out = left;
                if ((state->wrap & 4) && (
 #ifdef GUNZIP
@ -1219,10 +1240,11 @@ int flush;
            }
 #ifdef GUNZIP
            state->mode = LENGTH;
+                /* fallthrough */
        case LENGTH:
            if (state->wrap && state->flags) {
                NEEDBITS(32);
-                if (hold != (state->total & 0xffffffffUL)) {
+                if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) {
                    strm->msg = (char *)"incorrect length check";
                    state->mode = BAD;
                    break;
@ -1232,6 +1254,7 @@ int flush;
            }
 #endif
            state->mode = DONE;
+                /* fallthrough */
        case DONE:
            ret = Z_STREAM_END;
            goto inf_leave;
@ -1241,6 +1264,7 @@ int flush;
        case MEM:
            return Z_MEM_ERROR;
        case SYNC:
+                /* fallthrough */
        default:
            return Z_STREAM_ERROR;
        }
@ -1266,7 +1290,7 @@ int flush;
    state->total += out;
    if ((state->wrap & 4) && out)
        strm->adler = state->check =
-            UPDATE(state->check, strm->next_out - out, out);
+            UPDATE_CHECK(state->check, strm->next_out - out, out);
    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
                      (state->mode == TYPE ? 128 : 0) +
                      (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
@ -1402,6 +1426,7 @@ int ZEXPORT inflateSync(strm)
 z_streamp strm;
 {
    unsigned len;               /* number of bytes to look at or looked at */
+    int flags;                  /* temporary to save header status */
    unsigned long in, out;      /* temporary to save total_in and total_out */
    unsigned char buf[4];       /* to restore bit buffer to byte string */
    struct inflate_state FAR *state;
@ -1434,9 +1459,15 @@ z_streamp strm;

    /* return no joy or set up to restart inflate() on a new block */
    if (state->have != 4) return Z_DATA_ERROR;
+    if (state->flags == -1)
+        state->wrap = 0;    /* if no header yet, treat as raw */
+    else
+        state->wrap &= ~4;  /* no point in computing a check value now */
+    flags = state->flags;
    in = strm->total_in;  out = strm->total_out;
    inflateReset(strm);
    strm->total_in = in;  strm->total_out = out;
+    state->flags = flags;
    state->mode = TYPE;
    return Z_OK;
 }
@ -1532,7 +1563,7 @@ int check;

    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
    state = (struct inflate_state FAR *)strm->state;
-    if (check)
+    if (check && state->wrap)
        state->wrap |= 4;
    else
        state->wrap &= ~4;
--- a/3rdparty/zlib/inflate.h
+++ b/3rdparty/zlib/inflate.h
@ -1,5 +1,5 @@
 /* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2016 Mark Adler
+ * Copyright (C) 1995-2019 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -86,7 +86,8 @@ struct inflate_state {
    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
                                   bit 2 true to validate check value */
    int havedict;               /* true if dictionary provided */
-    int flags;                  /* gzip header method and flags (0 if zlib) */
+    int flags;                  /* gzip header method and flags, 0 if zlib, or
+                                   -1 if raw or no header yet */
    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
    unsigned long check;        /* protected copy of check value */
    unsigned long total;        /* protected copy of output count */
--- a/3rdparty/zlib/inftrees.c
+++ b/3rdparty/zlib/inftrees.c
@ -1,5 +1,5 @@
 /* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2017 Mark Adler
+ * Copyright (C) 1995-2022 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -9,7 +9,7 @@
 #define MAXBITS 15

 const char inflate_copyright[] =
-   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
+   " inflate 1.2.12 Copyright 1995-2022 Mark Adler ";
 /*
  If you use the zlib library in a product, an acknowledgment is welcome
  in the documentation of your product. If for some reason you cannot
@ -62,7 +62,7 @@ unsigned short FAR *work;
        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202};
    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
--- a/3rdparty/zlib/trees.c
+++ b/3rdparty/zlib/trees.c
@ -1,5 +1,5 @@
 /* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-2017 Jean-loup Gailly
+ * Copyright (C) 1995-2021 Jean-loup Gailly
 * detect_data_type() function provided freely by Cosmin Truta, 2006
 * For conditions of distribution and use, see copyright notice in zlib.h
 */
@ -149,7 +149,7 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
 local void compress_block OF((deflate_state *s, const ct_data *ltree,
                              const ct_data *dtree));
 local int  detect_data_type OF((deflate_state *s));
-local unsigned bi_reverse OF((unsigned value, int length));
+local unsigned bi_reverse OF((unsigned code, int len));
 local void bi_windup      OF((deflate_state *s));
 local void bi_flush       OF((deflate_state *s));

@ -416,7 +416,7 @@ local void init_block(s)

    s->dyn_ltree[END_BLOCK].Freq = 1;
    s->opt_len = s->static_len = 0L;
-    s->last_lit = s->matches = 0;
+    s->sym_next = s->matches = 0;
 }

 #define SMALLEST 1
@ -870,7 +870,8 @@ void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
    bi_windup(s);        /* align on byte boundary */
    put_short(s, (ush)stored_len);
    put_short(s, (ush)~stored_len);
-    zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
+    if (stored_len)
+        zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
    s->pending += stored_len;
 #ifdef ZLIB_DEBUG
    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
@ -947,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)

        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
-                s->last_lit));
+                s->sym_next / 3));

        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;

@ -1016,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
    unsigned dist;  /* distance of matched string */
    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
 {
-    s->d_buf[s->last_lit] = (ush)dist;
-    s->l_buf[s->last_lit++] = (uch)lc;
+    s->sym_buf[s->sym_next++] = dist;
+    s->sym_buf[s->sym_next++] = dist >> 8;
+    s->sym_buf[s->sym_next++] = lc;
    if (dist == 0) {
        /* lc is the unmatched char */
        s->dyn_ltree[lc].Freq++;
@ -1032,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
        s->dyn_dtree[d_code(dist)].Freq++;
    }
-
-#ifdef TRUNCATE_BLOCK
-    /* Try to guess if it is profitable to stop the current block here */
-    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
-        /* Compute an upper bound for the compressed length */
-        ulg out_length = (ulg)s->last_lit*8L;
-        ulg in_length = (ulg)((long)s->strstart - s->block_start);
-        int dcode;
-        for (dcode = 0; dcode < D_CODES; dcode++) {
-            out_length += (ulg)s->dyn_dtree[dcode].Freq *
-                (5L+extra_dbits[dcode]);
-        }
-        out_length >>= 3;
-        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
-               s->last_lit, in_length, out_length,
-               100L - out_length*100L/in_length));
-        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
-    }
-#endif
-    return (s->last_lit == s->lit_bufsize-1);
-    /* We avoid equality with lit_bufsize because of wraparound at 64K
-     * on 16 bit machines and because stored blocks are restricted to
-     * 64K-1 bytes.
-     */
+    return (s->sym_next == s->sym_end);
 }

 /* ===========================================================================
@ -1068,13 +1047,14 @@ local void compress_block(s, ltree, dtree)
 {
    unsigned dist;      /* distance of matched string */
    int lc;             /* match length or unmatched char (if dist == 0) */
-    unsigned lx = 0;    /* running index in l_buf */
+    unsigned sx = 0;    /* running index in sym_buf */
    unsigned code;      /* the code to send */
    int extra;          /* number of extra bits to send */

-    if (s->last_lit != 0) do {
-        dist = s->d_buf[lx];
-        lc = s->l_buf[lx++];
+    if (s->sym_next != 0) do {
+        dist = s->sym_buf[sx++] & 0xff;
+        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
+        lc = s->sym_buf[sx++];
        if (dist == 0) {
            send_code(s, lc, ltree); /* send a literal byte */
            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
@ -1099,11 +1079,10 @@ local void compress_block(s, ltree, dtree)
            }
        } /* literal or match pair ? */

-        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
-        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
-               "pendingBuf overflow");
+        /* Check that the overlay between pending_buf and sym_buf is ok: */
+        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");

-    } while (lx < s->last_lit);
+    } while (sx < s->sym_next);

    send_code(s, END_BLOCK, ltree);
 }
@ -1112,9 +1091,9 @@ local void compress_block(s, ltree, dtree)
 * Check if the data type is TEXT or BINARY, using the following algorithm:
 * - TEXT if the two conditions below are satisfied:
 *    a) There are no non-portable control characters belonging to the
- *       "black list" (0..6, 14..25, 28..31).
+ *       "block list" (0..6, 14..25, 28..31).
 *    b) There is at least one printable character belonging to the
- *       "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
 * - BINARY otherwise.
 * - The following partially-portable control characters form a
 *   "gray list" that is ignored in this detection algorithm:
@ -1124,19 +1103,19 @@ local void compress_block(s, ltree, dtree)
 local int detect_data_type(s)
    deflate_state *s;
 {
-    /* black_mask is the bit mask of black-listed bytes
+    /* block_mask is the bit mask of block-listed bytes
     * set bits 0..6, 14..25, and 28..31
     * 0xf3ffc07f = binary 11110011111111111100000001111111
     */
-    unsigned long black_mask = 0xf3ffc07fUL;
+    unsigned long block_mask = 0xf3ffc07fUL;
    int n;

-    /* Check for non-textual ("black-listed") bytes. */
-    for (n = 0; n <= 31; n++, black_mask >>= 1)
-        if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+    /* Check for non-textual ("block-listed") bytes. */
+    for (n = 0; n <= 31; n++, block_mask >>= 1)
+        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
            return Z_BINARY;

-    /* Check for textual ("white-listed") bytes. */
+    /* Check for textual ("allow-listed") bytes. */
    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
            || s->dyn_ltree[13].Freq != 0)
        return Z_TEXT;
@ -1144,7 +1123,7 @@ local int detect_data_type(s)
        if (s->dyn_ltree[n].Freq != 0)
            return Z_TEXT;

-    /* There are no "black-listed" or "white-listed" bytes:
+    /* There are no "block-listed" or "allow-listed" bytes:
     * this stream either is empty or has tolerated ("gray-listed") bytes only.
     */
    return Z_BINARY;
--- a/3rdparty/zlib/zlib.h
+++ b/3rdparty/zlib/zlib.h
@ -1,7 +1,7 @@
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.11, January 15th, 2017
+  version 1.2.12, March 11th, 2022

-  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+  Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler

  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the authors be held liable for any damages
@ -37,11 +37,11 @@
 extern "C" {
 #endif

-#define ZLIB_VERSION "1.2.11"
-#define ZLIB_VERNUM 0x12b0
+#define ZLIB_VERSION "1.2.12"
+#define ZLIB_VERNUM 0x12c0
 #define ZLIB_VER_MAJOR 1
 #define ZLIB_VER_MINOR 2
-#define ZLIB_VER_REVISION 11
+#define ZLIB_VER_REVISION 12
 #define ZLIB_VER_SUBREVISION 0

 /*
@ -543,8 +543,7 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
                                     int  strategy));

     This is another version of deflateInit with more compression options.  The
-   fields next_in, zalloc, zfree and opaque must be initialized before by the
-   caller.
+   fields zalloc, zfree and opaque must be initialized before by the caller.

     The method parameter is the compression method.  It must be Z_DEFLATED in
   this version of the library.
@ -712,11 +711,12 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
   used to switch between compression and straight copy of the input data, or
   to switch to a different kind of input data requiring a different strategy.
   If the compression approach (which is a function of the level) or the
-   strategy is changed, and if any input has been consumed in a previous
-   deflate() call, then the input available so far is compressed with the old
-   level and strategy using deflate(strm, Z_BLOCK).  There are three approaches
-   for the compression levels 0, 1..3, and 4..9 respectively.  The new level
-   and strategy will take effect at the next call of deflate().
+   strategy is changed, and if there have been any deflate() calls since the
+   state was initialized or reset, then the input available so far is
+   compressed with the old level and strategy using deflate(strm, Z_BLOCK).
+   There are three approaches for the compression levels 0, 1..3, and 4..9
+   respectively.  The new level and strategy will take effect at the next call
+   of deflate().

     If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does
   not have enough output space to complete, then the parameter change will not
@ -865,9 +865,11 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
   detection, or add 16 to decode only the gzip format (the zlib format will
   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is a
   CRC-32 instead of an Adler-32.  Unlike the gunzip utility and gzread() (see
-   below), inflate() will not automatically decode concatenated gzip streams.
-   inflate() will return Z_STREAM_END at the end of the gzip stream.  The state
-   would need to be reset to continue decoding a subsequent gzip stream.
+   below), inflate() will *not* automatically decode concatenated gzip members.
+   inflate() will return Z_STREAM_END at the end of the gzip member.  The state
+   would need to be reset to continue decoding a subsequent gzip member.  This
+   *must* be done if there is more data after a gzip member, in order for the
+   decompression to be compliant with the gzip standard (RFC 1952).

     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
@ -1302,14 +1304,14 @@ typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */
 /*
 ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));

-     Opens a gzip (.gz) file for reading or writing.  The mode parameter is as
-   in fopen ("rb" or "wb") but can also include a compression level ("wb9") or
-   a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only
-   compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F'
-   for fixed code compression as in "wb9F".  (See the description of
-   deflateInit2 for more information about the strategy parameter.)  'T' will
-   request transparent writing or appending with no compression and not using
-   the gzip format.
+     Open the gzip (.gz) file at path for reading and decompressing, or
+   compressing and writing.  The mode parameter is as in fopen ("rb" or "wb")
+   but can also include a compression level ("wb9") or a strategy: 'f' for
+   filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h",
+   'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression
+   as in "wb9F".  (See the description of deflateInit2 for more information
+   about the strategy parameter.)  'T' will request transparent writing or
+   appending with no compression and not using the gzip format.

     "a" can be used instead of "w" to request that the gzip stream that will
   be written be appended to the file.  "+" will result in an error, since
@ -1339,9 +1341,9 @@ ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));

 ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
 /*
-     gzdopen associates a gzFile with the file descriptor fd.  File descriptors
-   are obtained from calls like open, dup, creat, pipe or fileno (if the file
-   has been previously opened with fopen).  The mode parameter is as in gzopen.
+     Associate a gzFile with the file descriptor fd.  File descriptors are
+   obtained from calls like open, dup, creat, pipe or fileno (if the file has
+   been previously opened with fopen).  The mode parameter is as in gzopen.

     The next call of gzclose on the returned gzFile will also close the file
   descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
@ -1362,13 +1364,13 @@ ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));

 ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
 /*
-     Set the internal buffer size used by this library's functions.  The
-   default buffer size is 8192 bytes.  This function must be called after
-   gzopen() or gzdopen(), and before any other calls that read or write the
-   file.  The buffer memory allocation is always deferred to the first read or
-   write.  Three times that size in buffer space is allocated.  A larger buffer
-   size of, for example, 64K or 128K bytes will noticeably increase the speed
-   of decompression (reading).
+     Set the internal buffer size used by this library's functions for file to
+   size.  The default buffer size is 8192 bytes.  This function must be called
+   after gzopen() or gzdopen(), and before any other calls that read or write
+   the file.  The buffer memory allocation is always deferred to the first read
+   or write.  Three times that size in buffer space is allocated.  A larger
+   buffer size of, for example, 64K or 128K bytes will noticeably increase the
+   speed of decompression (reading).

     The new buffer size also affects the maximum length for gzprintf().

@ -1378,9 +1380,9 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));

 ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
 /*
-     Dynamically update the compression level or strategy.  See the description
-   of deflateInit2 for the meaning of these parameters.  Previously provided
-   data is flushed before the parameter change.
+     Dynamically update the compression level and strategy for file.  See the
+   description of deflateInit2 for the meaning of these parameters. Previously
+   provided data is flushed before applying the parameter changes.

     gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not
   opened for writing, Z_ERRNO if there is an error writing the flushed data,
@ -1389,7 +1391,7 @@ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));

 ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
 /*
-     Reads the given number of uncompressed bytes from the compressed file.  If
+     Read and decompress up to len uncompressed bytes from file into buf.  If
   the input file is not in gzip format, gzread copies the given number of
   bytes into the buffer directly from the file.

@ -1420,11 +1422,11 @@ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
 ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
                                     gzFile file));
 /*
-     Read up to nitems items of size size from file to buf, otherwise operating
-   as gzread() does.  This duplicates the interface of stdio's fread(), with
-   size_t request and return types.  If the library defines size_t, then
-   z_size_t is identical to size_t.  If not, then z_size_t is an unsigned
-   integer type that can contain a pointer.
+     Read and decompress up to nitems items of size size from file into buf,
+   otherwise operating as gzread() does.  This duplicates the interface of
+   stdio's fread(), with size_t request and return types.  If the library
+   defines size_t, then z_size_t is identical to size_t.  If not, then z_size_t
+   is an unsigned integer type that can contain a pointer.

     gzfread() returns the number of full items read of size size, or zero if
   the end of the file was reached and a full item could not be read, or if
@ -1443,18 +1445,16 @@ ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
   file, reseting and retrying on end-of-file, when size is not 1.
 */

-ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
-                                voidpc buf, unsigned len));
+ZEXTERN int ZEXPORT gzwrite OF((gzFile file, voidpc buf, unsigned len));
 /*
-     Writes the given number of uncompressed bytes into the compressed file.
-   gzwrite returns the number of uncompressed bytes written or 0 in case of
-   error.
+     Compress and write the len uncompressed bytes at buf to file. gzwrite
+   returns the number of uncompressed bytes written or 0 in case of error.
 */

 ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
                                      z_size_t nitems, gzFile file));
 /*
-     gzfwrite() writes nitems items of size size from buf to file, duplicating
+     Compress and write nitems items of size size from buf to file, duplicating
   the interface of stdio's fwrite(), with size_t request and return types.  If
   the library defines size_t, then z_size_t is identical to size_t.  If not,
   then z_size_t is an unsigned integer type that can contain a pointer.
@ -1467,22 +1467,22 @@ ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,

 ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
 /*
-     Converts, formats, and writes the arguments to the compressed file under
-   control of the format string, as in fprintf.  gzprintf returns the number of
+     Convert, format, compress, and write the arguments (...) to file under
+   control of the string format, as in fprintf.  gzprintf returns the number of
   uncompressed bytes actually written, or a negative zlib error code in case
   of error.  The number of uncompressed bytes written is limited to 8191, or
   one less than the buffer size given to gzbuffer().  The caller should assure
   that this limit is not exceeded.  If it is exceeded, then gzprintf() will
   return an error (0) with nothing written.  In this case, there may also be a
   buffer overflow with unpredictable consequences, which is possible only if
-   zlib was compiled with the insecure functions sprintf() or vsprintf()
+   zlib was compiled with the insecure functions sprintf() or vsprintf(),
   because the secure snprintf() or vsnprintf() functions were not available.
   This can be determined using zlibCompileFlags().
 */

 ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
 /*
-     Writes the given null-terminated string to the compressed file, excluding
+     Compress and write the given null-terminated string s to file, excluding
   the terminating null character.

     gzputs returns the number of characters written, or -1 in case of error.
@ -1490,11 +1490,12 @@ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));

 ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
 /*
-     Reads bytes from the compressed file until len-1 characters are read, or a
-   newline character is read and transferred to buf, or an end-of-file
-   condition is encountered.  If any characters are read or if len == 1, the
-   string is terminated with a null character.  If no characters are read due
-   to an end-of-file or len < 1, then the buffer is left untouched.
+     Read and decompress bytes from file into buf, until len-1 characters are
+   read, or until a newline character is read and transferred to buf, or an
+   end-of-file condition is encountered.  If any characters are read or if len
+   is one, the string is terminated with a null character.  If no characters
+   are read due to an end-of-file or len is less than one, then the buffer is
+   left untouched.

     gzgets returns buf which is a null-terminated string, or it returns NULL
   for end-of-file or in case of error.  If there was an error, the contents at
@ -1503,13 +1504,13 @@ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));

 ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
 /*
-     Writes c, converted to an unsigned char, into the compressed file.  gzputc
+     Compress and write c, converted to an unsigned char, into file.  gzputc
   returns the value that was written, or -1 in case of error.
 */

 ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
 /*
-     Reads one byte from the compressed file.  gzgetc returns this byte or -1
+     Read and decompress one byte from file.  gzgetc returns this byte or -1
   in case of end of file or error.  This is implemented as a macro for speed.
   As such, it does not do all of the checking the other functions do.  I.e.
   it does not check to see if file is NULL, nor whether the structure file
@ -1518,8 +1519,8 @@ ZEXTERN int ZEXPORT gzgetc OF((gzFile file));

 ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
 /*
-     Push one character back onto the stream to be read as the first character
-   on the next read.  At least one character of push-back is allowed.
+     Push c back onto the stream for file to be read as the first character on
+   the next read.  At least one character of push-back is always allowed.
   gzungetc() returns the character pushed, or -1 on failure.  gzungetc() will
   fail if c is -1, and may fail if a character has been pushed but not read
   yet.  If gzungetc is used immediately after gzopen or gzdopen, at least the
@ -1530,9 +1531,9 @@ ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));

 ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
 /*
-     Flushes all pending output into the compressed file.  The parameter flush
-   is as in the deflate() function.  The return value is the zlib error number
-   (see function gzerror below).  gzflush is only permitted when writing.
+     Flush all pending output to file.  The parameter flush is as in the
+   deflate() function.  The return value is the zlib error number (see function
+   gzerror below).  gzflush is only permitted when writing.

     If the flush parameter is Z_FINISH, the remaining data is written and the
   gzip stream is completed in the output.  If gzwrite() is called again, a new
@ -1547,8 +1548,8 @@ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
 ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
                                   z_off_t offset, int whence));

-     Sets the starting position for the next gzread or gzwrite on the given
-   compressed file.  The offset represents a number of bytes in the
+     Set the starting position to offset relative to whence for the next gzread
+   or gzwrite on file.  The offset represents a number of bytes in the
   uncompressed data stream.  The whence parameter is defined as in lseek(2);
   the value SEEK_END is not supported.

@ -1565,18 +1566,18 @@ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,

 ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
 /*
-     Rewinds the given file. This function is supported only for reading.
+     Rewind file. This function is supported only for reading.

-     gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+     gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET).
 */

 /*
 ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));

-     Returns the starting position for the next gzread or gzwrite on the given
-   compressed file.  This position represents a number of bytes in the
-   uncompressed data stream, and is zero when starting, even if appending or
-   reading a gzip stream from the middle of a file using gzdopen().
+     Return the starting position for the next gzread or gzwrite on file.
+   This position represents a number of bytes in the uncompressed data stream,
+   and is zero when starting, even if appending or reading a gzip stream from
+   the middle of a file using gzdopen().

     gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
 */
@ -1584,22 +1585,22 @@ ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
 /*
 ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));

-     Returns the current offset in the file being read or written.  This offset
-   includes the count of bytes that precede the gzip stream, for example when
-   appending or when using gzdopen() for reading.  When reading, the offset
-   does not include as yet unused buffered input.  This information can be used
-   for a progress indicator.  On error, gzoffset() returns -1.
+     Return the current compressed (actual) read or write offset of file.  This
+   offset includes the count of bytes that precede the gzip stream, for example
+   when appending or when using gzdopen() for reading.  When reading, the
+   offset does not include as yet unused buffered input.  This information can
+   be used for a progress indicator.  On error, gzoffset() returns -1.
 */

 ZEXTERN int ZEXPORT gzeof OF((gzFile file));
 /*
-     Returns true (1) if the end-of-file indicator has been set while reading,
-   false (0) otherwise.  Note that the end-of-file indicator is set only if the
-   read tried to go past the end of the input, but came up short.  Therefore,
-   just like feof(), gzeof() may return false even if there is no more data to
-   read, in the event that the last read request was for the exact number of
-   bytes remaining in the input file.  This will happen if the input file size
-   is an exact multiple of the buffer size.
+     Return true (1) if the end-of-file indicator for file has been set while
+   reading, false (0) otherwise.  Note that the end-of-file indicator is set
+   only if the read tried to go past the end of the input, but came up short.
+   Therefore, just like feof(), gzeof() may return false even if there is no
+   more data to read, in the event that the last read request was for the exact
+   number of bytes remaining in the input file.  This will happen if the input
+   file size is an exact multiple of the buffer size.

     If gzeof() returns true, then the read functions will return no more data,
   unless the end-of-file indicator is reset by gzclearerr() and the input file
@ -1608,7 +1609,7 @@ ZEXTERN int ZEXPORT gzeof OF((gzFile file));

 ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
 /*
-     Returns true (1) if file is being copied directly while reading, or false
+     Return true (1) if file is being copied directly while reading, or false
   (0) if file is a gzip stream being decompressed.

     If the input file is empty, gzdirect() will return true, since the input
@ -1629,8 +1630,8 @@ ZEXTERN int ZEXPORT gzdirect OF((gzFile file));

 ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
 /*
-     Flushes all pending output if necessary, closes the compressed file and
-   deallocates the (de)compression state.  Note that once file is closed, you
+     Flush all pending output for file, if necessary, close file and
+   deallocate the (de)compression state.  Note that once file is closed, you
   cannot call gzerror with file, since its structures have been deallocated.
   gzclose must not be called more than once on the same file, just as free
   must not be called more than once on the same allocation.
@ -1654,10 +1655,10 @@ ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));

 ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
 /*
-     Returns the error message for the last error which occurred on the given
-   compressed file.  errnum is set to zlib error number.  If an error occurred
-   in the file system and not in the compression library, errnum is set to
-   Z_ERRNO and the application may consult errno to get the exact error code.
+     Return the error message for the last error which occurred on file.
+   errnum is set to zlib error number.  If an error occurred in the file system
+   and not in the compression library, errnum is set to Z_ERRNO and the
+   application may consult errno to get the exact error code.

     The application must not modify the returned string.  Future calls to
   this function may invalidate the previously returned string.  If file is
@ -1670,7 +1671,7 @@ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));

 ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
 /*
-     Clears the error and end-of-file flags for file.  This is analogous to the
+     Clear the error and end-of-file flags for file.  This is analogous to the
   clearerr() function in stdio.  This is useful for continuing to read a gzip
   file that is being written concurrently.
 */
@ -1688,8 +1689,9 @@ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
 ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
 /*
     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
-   return the updated checksum.  If buf is Z_NULL, this function returns the
-   required initial value for the checksum.
+   return the updated checksum. An Adler-32 value is in the range of a 32-bit
+   unsigned integer. If buf is Z_NULL, this function returns the required
+   initial value for the checksum.

     An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed
   much faster.
@ -1722,12 +1724,13 @@ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
   negative, the result has no meaning or utility.
 */

-ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
 /*
     Update a running CRC-32 with the bytes buf[0..len-1] and return the
-   updated CRC-32.  If buf is Z_NULL, this function returns the required
-   initial value for the crc.  Pre- and post-conditioning (one's complement) is
-   performed within this function so it shouldn't be done by the application.
+   updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer.
+   If buf is Z_NULL, this function returns the required initial value for the
+   crc. Pre- and post-conditioning (one's complement) is performed within this
+   function so it shouldn't be done by the application.

   Usage example:

@ -1739,7 +1742,7 @@ ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
     if (crc != original_crc) error();
 */

-ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf,
+ZEXTERN uLong ZEXPORT crc32_z OF((uLong crc, const Bytef *buf,
                                  z_size_t len));
 /*
     Same as crc32(), but with a size_t length.
@ -1755,6 +1758,20 @@ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
   len2.
 */

+/*
+ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t len2));
+
+     Return the operator corresponding to length len2, to be used with
+   crc32_combine_op().
+*/
+
+ZEXTERN uLong ZEXPORT crc32_combine_op OF((uLong crc1, uLong crc2, uLong op));
+/*
+     Give the same result as crc32_combine(), using op in place of len2. op is
+   is generated from len2 by crc32_combine_gen(). This will be faster than
+   crc32_combine() if the generated op is used more than once.
+*/
+

                        /* various hacks, don't look :) */

@ -1842,6 +1859,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+   ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off64_t));
 #endif

 #if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
@ -1852,6 +1870,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 #    define z_gzoffset z_gzoffset64
 #    define z_adler32_combine z_adler32_combine64
 #    define z_crc32_combine z_crc32_combine64
+#    define z_crc32_combine_gen z_crc32_combine_gen64
 #  else
 #    define gzopen gzopen64
 #    define gzseek gzseek64
@ -1859,6 +1878,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 #    define gzoffset gzoffset64
 #    define adler32_combine adler32_combine64
 #    define crc32_combine crc32_combine64
+#    define crc32_combine_gen crc32_combine_gen64
 #  endif
 #  ifndef Z_LARGE64
     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
@ -1867,6 +1887,7 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+     ZEXTERN uLong ZEXPORT crc32_combine_gen64 OF((z_off_t));
 #  endif
 #else
   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
@ -1875,12 +1896,14 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t));
 #endif

 #else /* Z_SOLO */

   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine_gen OF((z_off_t));

 #endif /* !Z_SOLO */

@ -1893,7 +1916,7 @@ ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
 ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF ((z_streamp));
 ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
 ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
-#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO)
+#if defined(_WIN32) && !defined(Z_SOLO)
 ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
                                            const char *mode));
 #endif
--- a/3rdparty/zlib/zutil.c
+++ b/3rdparty/zlib/zutil.c
@ -137,7 +137,7 @@ const char * ZEXPORT zError(err)
 }

 #if defined(_WIN32_WCE) && _WIN32_WCE < 0x800
-    /* The Microsoft C Run-Time Library for Windows CE doesn't have
+    /* The older Microsoft C Run-Time Library for Windows CE doesn't have
     * errno.  We define it as a global variable to simplify porting.
     * Its value is always 0 and should not be used.
     */
--- a/3rdparty/zlib/zutil.h
+++ b/3rdparty/zlib/zutil.h
@ -1,5 +1,5 @@
 /* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

@ -29,10 +29,6 @@
 #  include <stdlib.h>
 #endif

-#ifdef Z_SOLO
-   typedef long ptrdiff_t;  /* guess -- will be caught if guess is wrong */
-#endif
-
 #ifndef local
 #  define local static
 #endif
@ -46,6 +42,17 @@ typedef unsigned short ush;
 typedef ush FAR ushf;
 typedef unsigned long  ulg;

+#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (ULONG_MAX == 0xffffffffffffffff)
+#    define Z_U8 unsigned long
+#  elif (ULLONG_MAX == 0xffffffffffffffff)
+#    define Z_U8 unsigned long long
+#  elif (UINT_MAX == 0xffffffffffffffff)
+#    define Z_U8 unsigned
+#  endif
+#endif
+
 extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 /* (size given to avoid silly warnings with Visual C++) */

@ -169,13 +176,7 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */

 #if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX
 #  if defined(_WIN32_WCE)
-#    if _WIN32_WCE < 0x800
-#      define fdopen(fd,mode) NULL /* No fdopen() */
-#      ifndef _PTRDIFF_T_DEFINED
-         typedef int ptrdiff_t;
-#        define _PTRDIFF_T_DEFINED
-#      endif
-#  endif
+#    define fdopen(fd,mode) NULL /* No fdopen() */
 #  else
 #    define fdopen(fd,type)  _fdopen(fd,type)
 #  endif
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -454,6 +454,9 @@ OCV_OPTION(WITH_TENGINE "Include Arm Inference Tengine support" OFF
 OCV_OPTION(WITH_ONNX "Include Microsoft ONNX Runtime support" OFF
  VISIBLE_IF TRUE
  VERIFY HAVE_ONNX)
+OCV_OPTION(WITH_TIMVX "Include Tim-VX support" OFF
+  VISIBLE_IF TRUE
+  VERIFY HAVE_TIMVX)

 # OpenCV build components
 # ===================================================
@ -740,6 +743,9 @@ include(cmake/OpenCVFindProtobuf.cmake)
 if(WITH_TENGINE)
  include(cmake/OpenCVFindTengine.cmake)
 endif()
+if(WITH_TIMVX)
+  include(cmake/OpenCVFindTIMVX.cmake)
+endif()

 # ----------------------------------------------------------------------------
 #  Detect other 3rd-party libraries/tools
@ -1656,6 +1662,16 @@ if(WITH_WEBNN OR HAVE_WEBNN)
  endif()
 endif()

+if(WITH_TIMVX)
+  status("")
+  status("  Tim-VX:"     HAVE_TIMVX THEN "YES" ELSE "NO")
+  if(HAVE_TIMVX)
+    status("    Include path"  TIMVX_INCLUDE_DIR THEN "${TIMVX_INCLUDE_DIR}" ELSE "NO")
+    status("    Link libraries:" TIMVX_LIBRARY THEN "${TIMVX_LIBRARY}" ELSE "NO")
+    status("    VIVANTE SDK path" VIVANTE_SDK_DIR THEN "${VIVANTE_SDK_DIR}" ELSE "NO")
+  endif()
+endif()
+
 if(WITH_OPENCL OR HAVE_OPENCL)
  ocv_build_features_string(opencl_features
    IF HAVE_OPENCL_SVM THEN "SVM"
--- a/cmake/OpenCVCompilerOptions.cmake
+++ b/cmake/OpenCVCompilerOptions.cmake
@ -314,6 +314,10 @@ if(MSVC)
    set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS")
    set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS")
  endif()
+
+  if(AARCH64 AND NOT MSVC_VERSION LESS 1930)
+    set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES")
+  endif()
 endif()

 if(PROJECT_NAME STREQUAL "OpenCV")
--- a/cmake/OpenCVDownload.cmake
+++ b/cmake/OpenCVDownload.cmake
@ -37,6 +37,50 @@ file(WRITE "${OPENCV_DOWNLOAD_LOG}" "#use_cache \"${OPENCV_DOWNLOAD_PATH}\"\n")
 file(REMOVE "${OPENCV_DOWNLOAD_WITH_CURL}")
 file(REMOVE "${OPENCV_DOWNLOAD_WITH_WGET}")

+ocv_check_environment_variables(OPENCV_DOWNLOAD_MIRROR_ID)
+
+function(ocv_init_download_mirror)
+  if(NOT DEFINED OPENCV_DOWNLOAD_MIRROR_ID)
+    # Run `git remote get-url origin` to get remote source
+    execute_process(
+      COMMAND
+        git remote get-url origin
+      WORKING_DIRECTORY
+        ${CMAKE_SOURCE_DIR}
+      RESULT_VARIABLE
+        RESULT_STATUS
+      OUTPUT_VARIABLE
+        OCV_GIT_ORIGIN_URL_OUTPUT
+      ERROR_QUIET
+    )
+    # if non-git, OCV_GIT_ORIGIN_URL_OUTPUT is empty
+    if(NOT OCV_GIT_ORIGIN_URL_OUTPUT)
+      message(STATUS "ocv_init_download: OpenCV source tree is not fetched as git repository. 3rdparty resources will be downloaded from github.com by default.")
+      return()
+    else()
+      # Check if git origin is github.com
+      string(FIND "${OCV_GIT_ORIGIN_URL_OUTPUT}" "github.com" _found_github)
+      if(NOT ${_found_github} EQUAL -1)
+        set(OPENCV_DOWNLOAD_MIRROR_ID "github" CACHE STRING "")
+      endif()
+      # Check if git origin is gitcode.net
+      string(FIND "${OCV_GIT_ORIGIN_URL_OUTPUT}" "gitcode.net" _found_gitcode)
+      if(NOT ${_found_gitcode} EQUAL -1)
+        set(OPENCV_DOWNLOAD_MIRROR_ID "gitcode" CACHE STRING "")
+      endif()
+    endif()
+  endif()
+
+  if(OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "gitcode" OR OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "custom")
+    message(STATUS "ocv_init_download: Using ${OPENCV_DOWNLOAD_MIRROR_ID}-hosted mirror to download 3rdparty components.")
+    ocv_cmake_hook_append(OPENCV_DOWNLOAD_PRE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/mirrors/${OPENCV_DOWNLOAD_MIRROR_ID}.cmake")
+  elseif(OPENCV_DOWNLOAD_MIRROR_ID STREQUAL "github")
+    return()
+  else()
+    message(STATUS "ocv_init_download: Unable to recognize git server of OpenCV source code. Using github.com to download 3rdparty components.")
+  endif()
+endfunction()
+
 function(ocv_download)
  cmake_parse_arguments(DL "UNPACK;RELATIVE_URL" "FILENAME;HASH;DESTINATION_DIR;ID;STATUS" "URL" ${ARGN})

@ -67,6 +111,8 @@ function(ocv_download)
    set(${DL_STATUS} TRUE PARENT_SCOPE)
  endif()

+  ocv_cmake_hook(OPENCV_DOWNLOAD_PRE)
+
  # Check CMake cache for already processed tasks
  string(FIND "${DL_DESTINATION_DIR}" "${CMAKE_BINARY_DIR}" DL_BINARY_PATH_POS)
  if(DL_BINARY_PATH_POS EQUAL 0)
@ -115,7 +161,7 @@ function(ocv_download)
  if(DL_ID)
    set(__msg_prefix "${DL_ID}: ")
  endif()
-  message(STATUS "${__msg_prefix}Download: ${DL_FILENAME}")
+  message(STATUS "${__msg_prefix}Downloading ${DL_FILENAME} from ${DL_URL}")

  # Copy mode: check if copy destination exists and is correct
  if(NOT DL_UNPACK)
@ -252,3 +298,8 @@ ${OPENCV_DOWNLOAD_LOG}
    set(${OCV_DOWNLOAD_HASH_NAME} "${DL_HASH}" CACHE INTERNAL "")
  endif()
 endfunction()
+
+# ----------------------------------------------------------------------------
+#  Initialize download in case mirror is used
+# ----------------------------------------------------------------------------
+ocv_init_download_mirror()
--- a/cmake/OpenCVFindTIMVX.cmake
+++ b/cmake/OpenCVFindTIMVX.cmake
@ -0,0 +1,69 @@
+set(TIMVX_INSTALL_DIR "" CACHE PATH "Path to libtim-vx installation")
+set(VIVANTE_SDK_DIR "" CACHE PATH "Path to VIVANTE SDK needed by TIM-VX.")
+set(VIVANTE_SDK_LIB_CANDIDATES "OpenVX;VSC;GAL;ArchModelSw;NNArchPerf" CACHE STRING "VIVANTE SDK library candidates")
+
+# Ensure VIVANTE SDK library candidates are present in given search path
+function(find_vivante_sdk_libs _viv_notfound _viv_search_path)
+    foreach(one ${VIVANTE_SDK_LIB_CANDIDATES})
+        #NO_DEFAULT_PATH is used to ensure VIVANTE SDK libs are from one only source
+        find_library(VIV_${one}_LIB ${one} PATHS "${_viv_search_path}/lib" NO_DEFAULT_PATH)
+        if(NOT VIV_${one}_LIB)
+            list(APPEND _viv_notfound_list ${one})
+        endif()
+    endforeach()
+    set(${_viv_notfound} ${_viv_notfound_list} PARENT_SCOPE)
+endfunction()
+# Default value for VIVANTE_SDK_DIR: /usr
+if(NOT VIVANTE_SDK_DIR)
+    set(VIVANTE_SDK_DIR "/usr")
+endif()
+# Environment variable VIVANTE_SDK_DIR overrides the one in this script
+if(DEFINED ENV{VIVANTE_SDK_DIR})
+    set(VIVANTE_SDK_DIR $ENV{VIVANTE_SDK_DIR})
+    message(STATUS "TIM-VX: Load VIVANTE_SDK_DIR from system environment: ${VIVANTE_SDK_DIR}")
+endif()
+
+
+# Compile with pre-installed TIM-VX; Or compile together with TIM-VX from source
+if(TIMVX_INSTALL_DIR AND NOT BUILD_TIMVX)
+    message(STATUS "TIM-VX: Use binaries at ${TIMVX_INSTALL_DIR}")
+    set(BUILD_TIMVX OFF)
+
+    set(TIMVX_INC_DIR "${TIMVX_INSTALL_DIR}/include" CACHE INTERNAL "TIM-VX include directory")
+    find_library(TIMVX_LIB "tim-vx" PATHS "${TIMVX_INSTALL_DIR}/lib")
+    if(TIMVX_LIB)
+        set(TIMVX_FOUND ON)
+    else()
+        set(TIMVX_FOUND OFF)
+    endif()
+
+    # Verify if requested VIVANTE SDK libraries are all found
+    find_vivante_sdk_libs(missing ${VIVANTE_SDK_DIR})
+    if(missing)
+        message(STATUS "TIM-VX: Failed to find ${missing} in ${VIVANTE_SDK_DIR}/lib. Turning off TIMVX_VIV_FOUND")
+        set(TIMVX_VIV_FOUND OFF)
+    else()
+        message(STATUS "TIM-VX: dependent VIVANTE SDK libraries are found at ${VIVANTE_SDK_DIR}/lib.")
+        set(TIMVX_VIV_FOUND ON)
+    endif()
+else()
+    message(STATUS "TIM-VX: Build from source")
+    include("${OpenCV_SOURCE_DIR}/3rdparty/libtim-vx/tim-vx.cmake")
+endif()
+
+if(TIMVX_FOUND AND TIMVX_VIV_FOUND)
+    set(HAVE_TIMVX 1)
+
+    message(STATUS "TIM-VX: Found TIM-VX includes: ${TIMVX_INC_DIR}")
+    message(STATUS "TIM-VX: Found TIM-VX library: ${TIMVX_LIB}")
+    set(TIMVX_LIBRARY   ${TIMVX_LIB})
+    set(TIMVX_INCLUDE_DIR   ${TIMVX_INC_DIR})
+
+    message(STATUS "TIM-VX: Found VIVANTE SDK libraries: ${VIVANTE_SDK_DIR}/lib")
+    link_directories(${VIVANTE_SDK_DIR}/lib)
+endif()
+
+MARK_AS_ADVANCED(
+	TIMVX_INC_DIR
+	TIMVX_LIB
+)
--- a/cmake/checks/cpu_neon.cpp
+++ b/cmake/checks/cpu_neon.cpp
@ -1,6 +1,7 @@
 #include <stdio.h>

 #if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64))
+# define _ARM64_DISTINCT_NEON_TYPES
 # include <Intrin.h>
 # include <arm_neon.h>
 # define CV_NEON 1
--- a/cmake/mirrors/custom.cmake
+++ b/cmake/mirrors/custom.cmake
@ -0,0 +1,91 @@
+# Gitlab-style mirror
+# CMake scripts look for opencv/opencv_3rdparty,
+#  OAID/Tengine, 01org/tbb(oneAPI/oneTBB), opencv/ade
+#  from OPENCV_DOWNLOAD_MIRROR
+ocv_update(OPENCV_DOWNLOAD_MIRROR_URL "")
+
+######
+# Download via commit id
+######
+# Tengine
+ocv_update(TENGINE_PKG_MD5_CUSTOM "")
+ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e
+# NVIDIA_OPTICAL_FLOW
+ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE "")
+ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191)
+# TIM-VX
+ocv_update(TIM-VX_PKG_MD5_GITCODE "")
+ocv_update(TIM-VX_PKG_MD5_ORIGINAL 92619cc4498014ac7a09834d5e33ebd5)
+
+######
+# Download from release page
+#####
+# TBB
+ocv_update(TBB_RELEASE_CUSTOM "")
+ocv_update(TBB_PKG_NAME_CUSTOM "")
+ocv_update(TBB_PKG_MD5_CUSTOM "")
+ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2
+# ADE
+ocv_update(ADE_RELEASE_CUSTOM "")
+ocv_update(ADE_PKG_NAME_CUSTOM "")
+ocv_update(ADE_PKG_MD5_CUSTOM "")
+ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f
+
+macro(ocv_download_url_custom_usercontent OWNER)
+  string(REPLACE "/" ";" DL_URL_split ${DL_URL})
+  list(GET DL_URL_split 5 __COMMIT_ID)
+  list(GET DL_URL_split 6 __PKG_NAME)
+  set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${OWNER}/opencv_3rdparty/-/raw/${__COMMIT_ID}/${__PKG_NAME}/")
+endmacro()
+macro(ocv_download_url_custom_archive_commit_id)
+  if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m")
+    message(WARNING "ocv_download: specify ${DL_ID}_PKG_MD5_CUSTOM to download ${DL_ID} from custom source.")
+  elseif(${DL_ID}_PKG_MD5_ORIGINAL STREQUAL "${DL_HASH}")
+    string(REPLACE "/" ";" DL_URL_split ${DL_URL})
+    list(GET DL_URL_split 3 __OWNER)
+    list(GET DL_URL_split 4 __REPO_NAME)
+    set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/")
+    set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}")
+  else()
+    message(WARNING "No information about mirrors for downloading ${DL_FILENAME} from URL='${DL_URL}' and MD5=${DL_HASH}.")
+  endif()
+endmacro()
+macro(ocv_download_url_custom_archive_release)
+    if("m${${DL_ID}_RELEASE_CUSTOM}" STREQUAL "m")
+      message(WARNING "ocv_download: specify ${DL_ID}_RELEASE_CUSTOM to download ${DL_ID} from custom source.")
+      return()
+    endif()
+    if("m${${DL_ID}_PKG_NAME_CUSTOM}" STREQUAL "m")
+      message(WARNING "ocv_download: specify ${DL_ID}_PKG_NAME_CUSTOM to download ${DL_ID} from custom source.")
+      return()
+    endif()
+    if("m${${DL_ID}_PKG_MD5_CUSTOM}" STREQUAL "m")
+      message(WARNING "ocv_download: specify ${DL_ID}_PKG_MD5_CUSTOM to download ${DL_ID} from custom source.")
+      return()
+    endif()
+    string(REPLACE "/" ";" DL_URL_split ${DL_URL})
+    list(GET DL_URL_split 3 __OWNER)
+    list(GET DL_URL_split 4 __REPO_NAME)
+    set(DL_URL "https://${OPENCV_DOWNLOAD_MIRROR_URL}/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_RELEASE_CUSTOM}/${__REPO_NAME}-")
+    set(DL_HASH "${${DL_ID}_PKG_MD5_CUSTOM}")
+endmacro()
+
+if("m${OPENCV_DOWNLOAD_MIRROR_URL}" STREQUAL "m")
+  message(WARNING "ocv_download: specify OPENCV_DOWNLOAD_MIRROR_URL to use custom mirror.")
+else()
+  if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "data") OR (DL_ID STREQUAL "xfeatures2d/boostdesc") OR (DL_ID STREQUAL "xfeatures2d/vgg"))
+    ocv_download_url_custom_usercontent(opencv)
+  elseif(DL_ID STREQUAL "wechat_qrcode")
+    ocv_download_url_gitcode_usercontent(WeChatCV)
+  elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
+    ocv_download_url_custom_archive_commit_id()
+  elseif(DL_ID STREQUAL "TBB")
+    ocv_download_url_custom_archive_release()
+    set(OPENCV_TBB_SUBDIR "${TBB_PKG_NAME_CUSTOM}" PARENT_SCOPE)
+  elseif(DL_ID STREQUAL "ADE")
+    ocv_download_url_custom_archive_release()
+    set(ade_subdir "${ADE_PKG_NAME_CUSTOM}" PARENT_SCOPE)
+  else()
+    message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror ${OPENCV_DOWNLOAD_MIRROR_URL}. Use original source instead.")
+  endif()
+endif()
--- a/cmake/mirrors/gitcode.cmake
+++ b/cmake/mirrors/gitcode.cmake
@ -0,0 +1,86 @@
+######
+# Download via commit id
+######
+# Tengine
+ocv_update(TENGINE_PKG_MD5_GITCODE 1b5908632b557275cd6e85b0c03f9690)
+ocv_update(TENGINE_PKG_MD5_ORIGINAL 23f61ebb1dd419f1207d8876496289c5) # same as tengine_md5sum for TENGINE commit of e89cf8870de2ff0a80cfe626c0b52b2a16fb302e
+# NVIDIA_OPTICAL_FLOW
+ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_GITCODE 8d5b7eeb24d6ca9c6bcfdff4196d5b47)
+ocv_update(NVIDIA_OPTICAL_FLOW_PKG_MD5_ORIGINAL a73cd48b18dcc0cc8933b30796074191)
+# TIM-VX
+ocv_update(TIM-VX_PKG_MD5_GITCODE 3f2a548b40b170668aaa60d4f60ba40b)
+ocv_update(TIM-VX_PKG_MD5_ORIGINAL 92619cc4498014ac7a09834d5e33ebd5)
+
+######
+# Download from release page
+#####
+# TBB
+ocv_update(TBB_RELEASE_GITCODE "v2020.2")
+ocv_update(TBB_PKG_NAME_GITCODE "tbb-${TBB_RELEASE_GITCODE}")
+ocv_update(TBB_PKG_MD5_GITCODE 4eeafdf16a90cb66e39a31c8d6c6804e)
+ocv_update(TBB_PKG_MD5_ORIGINAL 5af6f6c2a24c2043e62e47205e273b1f) # same as OPENCV_TBB_RELEASE_MD5 for TBB release of v2020.2
+# ADE
+ocv_update(ADE_RELEASE_GITCODE "v0.1.1f")
+ocv_update(ADE_PKG_NAME_GITCODE "ade-${ADE_RELEASE_GITCODE}")
+ocv_update(ADE_PKG_MD5_GITCODE c12909e0ccfa93138c820ba91ff37b3c)
+ocv_update(ADE_PKG_MD5_ORIGINAL b624b995ec9c439cbc2e9e6ee940d3a2) # same as ade_md5 for ADE release of v0.1.1f
+
+#
+# Replace download links for packages in opencv/opencv_3rdparty:
+# 1. Extract repo owner and repo name from DL_URL.
+# 2. Put repo owner and repo name into the placeholders of new DL_URL.
+#
+macro(ocv_download_url_gitcode_usercontent OWNER)
+  string(REPLACE "/" ";" DL_URL_split ${DL_URL})
+  list(GET DL_URL_split 5 __COMMIT_ID)
+  list(GET DL_URL_split 6 __PKG_NAME)
+  set(DL_URL "https://gitcode.net/${OWNER}/opencv_3rdparty/-/raw/${__COMMIT_ID}/")
+  if(__PKG_NAME)
+    set(DL_URL "${DL_URL}${__PKG_NAME}/")
+  endif()
+endmacro()
+#
+# Replace download links and checksums for archives/releases in other repositories:
+# 1. Check if versions matched. If not matched, download from github instead.
+# 2. Extract repo owner and repo name from DL_URL.
+# 3. Put repo owner and repo name into the placeholders of new DL_URL.
+# 4. Replace DL_HASH with the one downloaded from gitcode.net.
+#
+macro(ocv_download_url_gitcode_archive_commit_id)
+  if(DL_HASH STREQUAL "${${DL_ID}_PKG_MD5_ORIGINAL}")
+    string(REPLACE "/" ";" DL_URL_split ${DL_URL})
+    list(GET DL_URL_split 3 __OWNER)
+    list(GET DL_URL_split 4 __REPO_NAME)
+    set(DL_URL "https://gitcode.net/mirrors/${__OWNER}/${__REPO_NAME}/-/archive/")
+    set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}")
+  else()
+    message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.")
+  endif()
+endmacro()
+macro(ocv_download_url_gitcode_archive_release)
+  if(DL_HASH STREQUAL "${${DL_ID}_PKG_MD5_ORIGINAL}")
+    string(REPLACE "/" ";" DL_URL_split ${DL_URL})
+    list(GET DL_URL_split 3 __OWNER)
+    list(GET DL_URL_split 4 __REPO_NAME)
+    set(DL_URL "https://gitcode.net/${__OWNER}/${__REPO_NAME}/-/archive/${${DL_ID}_RELEASE_GITCODE}/${__REPO_NAME}-")
+    set(DL_HASH "${${DL_ID}_PKG_MD5_GITCODE}")
+  else()
+    message(WARNING "Package ${DL_ID} from mirror gitcode.net is outdated and will be downloaded from github.com instead.")
+  endif()
+endmacro()
+
+if((DL_ID STREQUAL "FFMPEG") OR (DL_ID STREQUAL "IPPICV") OR (DL_ID STREQUAL "data") OR (DL_ID STREQUAL "xfeatures2d/boostdesc") OR (DL_ID STREQUAL "xfeatures2d/vgg"))
+  ocv_download_url_gitcode_usercontent(opencv)
+elseif(DL_ID STREQUAL "wechat_qrcode")
+  ocv_download_url_gitcode_usercontent(mirrors/WeChatCV)
+elseif((DL_ID STREQUAL "TENGINE") OR (DL_ID STREQUAL "NVIDIA_OPTICAL_FLOW") OR (DL_ID STREQUAL "TIM-VX"))
+  ocv_download_url_gitcode_archive_commit_id()
+elseif(DL_ID STREQUAL "TBB")
+  ocv_download_url_gitcode_archive_release()
+  set(OPENCV_TBB_SUBDIR "${TBB_PKG_NAME_GITCODE}" PARENT_SCOPE)
+elseif(DL_ID STREQUAL "ADE")
+  ocv_download_url_gitcode_archive_release()
+  set(ade_subdir "${ADE_PKG_NAME_GITCODE}" PARENT_SCOPE)
+else()
+  message(STATUS "ocv_download: Unknown download ID ${DL_ID} for using mirror gitcode.net. Use original source instead.")
+endif()
--- a/doc/js_tutorials/js_assets/webnn-electron/package.json
+++ b/doc/js_tutorials/js_assets/webnn-electron/package.json
@ -1,7 +1,7 @@
 {
  "name": "image_classification",
  "version": "0.0.1",
-  "description": "An Electon.js example of image_classification using webnn-native",
+  "description": "An Electron.js example of image_classification using webnn-native",
  "main": "main.js",
  "author": "WebNN-native Authors",
  "license": "Apache-2.0",
--- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
+++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown
@ -97,10 +97,10 @@ Building OpenCV.js from Source
    @endcode

    @note
-    The loader is implemented as a js file in the path `<opencv_js_dir>/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the broswer and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application.
+    The loader is implemented as a js file in the path `<opencv_js_dir>/bin/loader.js`. The loader utilizes the [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) to detect the features of the browser and load corresponding OpenCV.js automatically. To use it, you need to use the UMD version of [WebAssembly Feature Detection](https://github.com/GoogleChromeLabs/wasm-feature-detect) and introduce the `loader.js` in your Web application.

    Example Code:
-    @code{.javascipt}
+    @code{.javascript}
    // Set paths configuration
    let pathsConfig = {
        wasm: "../../build_wasm/opencv.js",
@ -173,7 +173,7 @@ This snippet and the following require [Node.js](https://nodejs.org) to be insta

 ### Headless with Puppeteer

-Alternatively tests can run with [GoogleChrome/puppeteer](https://github.com/GoogleChrome/puppeteer#readme) which is a version of Google Chrome that runs in the terminal (useful for Continuos integration like travis CI, etc)
+Alternatively tests can run with [GoogleChrome/puppeteer](https://github.com/GoogleChrome/puppeteer#readme) which is a version of Google Chrome that runs in the terminal (useful for Continuous integration like travis CI, etc)

@code{.sh}
 cd build_js/bin
@ -229,7 +229,7 @@ node tests.js
    The simd optimization is experimental as wasm simd is still in development.

    @note
-    Now only emscripten LLVM upstream backend supports wasm simd, refering to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first:
+    Now only emscripten LLVM upstream backend supports wasm simd, referring to https://emscripten.org/docs/porting/simd.html. So you need to setup upstream backend environment with the following command first:
    @code{.bash}
    ./emsdk update
    ./emsdk install latest-upstream
--- a/doc/tutorials/calib3d/usac.markdown
+++ b/doc/tutorials/calib3d/usac.markdown
@ -244,9 +244,9 @@ Samples:
 There are three new sample files in opencv/samples directory.

 1.  `epipolar_lines.cpp` – input arguments of `main` function are two
-    pathes to images. Then correspondences are found using
+    paths to images. Then correspondences are found using
    SIFT detector. Fundamental matrix is found using RANSAC from
-    tentaive correspondences and epipolar lines are plot.
+    tentative correspondences and epipolar lines are plot.

 2.  `essential_mat_reconstr.cpp` – input arguments are path to data file
    containing image names and single intrinsic matrix and directory
--- a/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown
+++ b/doc/tutorials/core/how_to_use_OpenCV_parallel_for_new/how_to_use_OpenCV_parallel_for_new.markdown
@ -92,7 +92,7 @@ We then fill value to the corresponding pixel in the dst image.

 ### Parallel implementation

-When looking at the sequential implementation, we can notice that each pixel depends on multiple neighbouring pixels but only one pixel is edited at a time. Thus, to optimize the computation, we can split the image into stripes and parallely perform convolution on each, by exploiting the multi-core architecture of modern processor. The OpenCV @ref cv::parallel_for_ framework automatically decides how to split the computation efficiently and does most of the work for us.
+When looking at the sequential implementation, we can notice that each pixel depends on multiple neighbouring pixels but only one pixel is edited at a time. Thus, to optimize the computation, we can split the image into stripes and parallelly perform convolution on each, by exploiting the multi-core architecture of modern processor. The OpenCV @ref cv::parallel_for_ framework automatically decides how to split the computation efficiently and does most of the work for us.

@note Although values of a pixel in a particular stripe may depend on pixel values outside the stripe, these are only read only operations and hence will not cause undefined behaviour.

--- a/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown
+++ b/doc/tutorials/dnn/dnn_halide_scheduling/dnn_halide_scheduling.markdown
@ -70,7 +70,7 @@ Sometimes networks built using blocked structure that means some layer are
 identical or quite similar. If you want to apply the same scheduling for
 different layers accurate to tiling or vectorization factors, define scheduling
 patterns in section `patterns` at the beginning of scheduling file.
-Also, your patters may use some parametric variables.
+Also, your patterns may use some parametric variables.
@code
 # At the beginning of the file
 patterns:
--- a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown
+++ b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown
@ -29,8 +29,8 @@ Before recognition, you should `setVocabulary` and `setDecodeType`.
 - "CTC-prefix-beam-search", the output of the text recognition model should be a probability matrix same with "CTC-greedy".
    - The algorithm is proposed at Hannun's [paper](https://arxiv.org/abs/1408.2873).
    - `setDecodeOptsCTCPrefixBeamSearch` could be used to control the beam size in search step.
-    - To futher optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary
-       but only the number of `vocPruneSize` tokens with top probabilty.
+    - To further optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary
+       but only the number of `vocPruneSize` tokens with top probability.

@ref cv::dnn::TextRecognitionModel::recognize() is the main function for text recognition.
 - The input image should be a cropped text image or an image with `roiRects`
--- a/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown
+++ b/doc/tutorials/gapi/anisotropic_segmentation/porting_anisotropic_segmentation.markdown
@ -142,7 +142,7 @@ being a Graph API, doesn't force its users to do that.
 However, a graph is still built implicitly when a cv::GComputation
 object is defined. It may be useful to inspect how the resulting graph
 looks like to check if it is generated correctly and if it really
-represents our alrogithm. It is also useful to learn the structure of
+represents our algorithm. It is also useful to learn the structure of
 the graph to see if it has any redundancies.

 G-API allows to dump generated graphs to `.dot` files which then
--- a/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown
+++ b/doc/tutorials/gapi/interactive_face_detection/interactive_face_detection.markdown
@ -241,7 +241,7 @@ pipeline is compiled for streaming:
 cv::GComputation::compileStreaming() triggers a special video-oriented
 form of graph compilation where G-API is trying to optimize
 throughput. Result of this compilation is an object of special type
-cv::GStreamingCompiled -- in constract to a traditional callable
+cv::GStreamingCompiled -- in contrast to a traditional callable
 cv::GCompiled, these objects are closer to media players in their
 semantics.

--- a/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown
+++ b/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.markdown
@ -79,7 +79,7 @@ The main function is rather simple, as follows from the comments we do the follo
   In general callback functions are used to react to some kind of signal, in our
   case it's trackbar's state change.
   Explicit one-time call of `thresh_callback` is necessary to display
-   the "Contours" window simultaniously with the "Source" window.
+   the "Contours" window simultaneously with the "Source" window.

@add_toggle_cpp
@snippet samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp trackbar
--- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown
+++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.markdown
@ -240,7 +240,7 @@ taken:
 Hello OpenCV Sample
 -------------------

-Here are basic steps to guide you trough the process of creating a simple OpenCV-centric
+Here are basic steps to guide you through the process of creating a simple OpenCV-centric
 application. It will be capable of accessing camera output, processing it and displaying the result.

 -#  Open Eclipse IDE, create a new clean workspace, create a new Android project
--- a/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown
+++ b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown
@ -20,7 +20,7 @@ This pretty-printer can show element type, `is_continuous`, `is_submatrix` flags

 # Installation {#tutorial_linux_gdb_pretty_printer_installation}

-Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convinient place, rename `gdbinit` to `.gdbinit`  and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path.
+Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convenient place, rename `gdbinit` to `.gdbinit`  and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path.

 In order to check version of python bundled with your gdb, use the following commands from the gdb shell:

@ -34,5 +34,5 @@ If the version of python 3 installed in your system doesn't match the version in

 # Usage {#tutorial_linux_gdb_pretty_printer_usage}

-The fields in a debugger prefixed with `view_` are pseudo-fields added for convinience, the rest are left as is.
-If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controlls everything matrix display-related.
+The fields in a debugger prefixed with `view_` are pseudo-fields added for convenience, the rest are left as is.
+If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controls everything matrix display-related.
--- a/doc/tutorials/ios/image_manipulation/image_manipulation.markdown
+++ b/doc/tutorials/ios/image_manipulation/image_manipulation.markdown
@ -22,7 +22,7 @@ Introduction
 In *OpenCV* all the image processing operations are usually carried out on the *Mat* structure. In
 iOS however, to render an image on screen it have to be an instance of the *UIImage* class. To
 convert an *OpenCV Mat* to an *UIImage* we use the *Core Graphics* framework available in iOS. Below
-is the code needed to covert back and forth between Mat's and UIImage's.
+is the code needed to convert back and forth between Mat's and UIImage's.
@code{.m}
 - (cv::Mat)cvMatFromUIImage:(UIImage *)image
 {
--- a/modules/3d/test/test_undistort_points.cpp
+++ b/modules/3d/test/test_undistort_points.cpp
@ -1,34 +1,24 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
+
+#include <opencv2/ts/cuda_test.hpp> // EXPECT_MAT_NEAR
 #include "test_precomp.hpp"

 namespace opencv_test { namespace {

-class CV_UndistortTest : public cvtest::BaseTest
+class UndistortPointsTest : public ::testing::Test
 {
-public:
-    CV_UndistortTest();
-    ~CV_UndistortTest();
 protected:
-    void run(int);
-private:
    void generate3DPointCloud(vector<Point3f>& points, Point3f pmin = Point3f(-1,
    -1, 5), Point3f pmax = Point3f(1, 1, 10));
    void generateCameraMatrix(Mat& cameraMatrix);
    void generateDistCoeffs(Mat& distCoeffs, int count);

-    double thresh;
-    RNG rng;
+    double thresh = 1.0e-2;
 };

-CV_UndistortTest::CV_UndistortTest()
-{
-    thresh = 1.0e-2;
-}
-CV_UndistortTest::~CV_UndistortTest() {}
-
-void CV_UndistortTest::generate3DPointCloud(vector<Point3f>& points, Point3f pmin, Point3f pmax)
+void UndistortPointsTest::generate3DPointCloud(vector<Point3f>& points, Point3f pmin, Point3f pmax)
 {
    RNG rng_Point = cv::theRNG(); // fix the seed to use "fixed" input 3D points
    for (size_t i = 0; i < points.size(); i++)
@ -39,31 +29,35 @@ void CV_UndistortTest::generate3DPointCloud(vector<Point3f>& points, Point3f pmi
        points[i] = Point3f(_x, _y, _z);
    }
 }
-void CV_UndistortTest::generateCameraMatrix(Mat& cameraMatrix)
+
+void UndistortPointsTest::generateCameraMatrix(Mat& cameraMatrix)
 {
    const double fcMinVal = 1e-3;
    const double fcMaxVal = 100;
    cameraMatrix.create(3, 3, CV_64FC1);
    cameraMatrix.setTo(Scalar(0));
-    cameraMatrix.at<double>(0,0) = rng.uniform(fcMinVal, fcMaxVal);
-    cameraMatrix.at<double>(1,1) = rng.uniform(fcMinVal, fcMaxVal);
-    cameraMatrix.at<double>(0,2) = rng.uniform(fcMinVal, fcMaxVal);
-    cameraMatrix.at<double>(1,2) = rng.uniform(fcMinVal, fcMaxVal);
+    cameraMatrix.at<double>(0,0) = theRNG().uniform(fcMinVal, fcMaxVal);
+    cameraMatrix.at<double>(1,1) = theRNG().uniform(fcMinVal, fcMaxVal);
+    cameraMatrix.at<double>(0,2) = theRNG().uniform(fcMinVal, fcMaxVal);
+    cameraMatrix.at<double>(1,2) = theRNG().uniform(fcMinVal, fcMaxVal);
    cameraMatrix.at<double>(2,2) = 1;
 }
-void CV_UndistortTest::generateDistCoeffs(Mat& distCoeffs, int count)
+
+void UndistortPointsTest::generateDistCoeffs(Mat& distCoeffs, int count)
 {
    distCoeffs = Mat::zeros(count, 1, CV_64FC1);
    for (int i = 0; i < count; i++)
-        distCoeffs.at<double>(i,0) = rng.uniform(0.0, 1.0e-3);
+        distCoeffs.at<double>(i,0) = theRNG().uniform(-0.1, 0.1);
 }

-void CV_UndistortTest::run(int /* start_from */)
+TEST_F(UndistortPointsTest, accuracy)
 {
    Mat intrinsics, distCoeffs;
    generateCameraMatrix(intrinsics);
+
    vector<Point3f> points(500);
    generate3DPointCloud(points);
+
    vector<Point2f> projectedPoints;
    projectedPoints.resize(points.size());

@ -71,10 +65,15 @@ void CV_UndistortTest::run(int /* start_from */)
    for (int idx = 0; idx < 3; idx++)
    {
        generateDistCoeffs(distCoeffs, modelMembersCount[idx]);
-        projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), Mat::zeros(3,1,CV_64FC1), intrinsics, distCoeffs, projectedPoints);
+
+        projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1),
+                      Mat::zeros(3,1,CV_64FC1), intrinsics,
+                      distCoeffs, projectedPoints);

        vector<Point2f> realUndistortedPoints;
-        projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1), Mat::zeros(3,1,CV_64FC1), intrinsics,  Mat::zeros(4,1,CV_64FC1), realUndistortedPoints);
+        projectPoints(Mat(points), Mat::zeros(3,1,CV_64FC1),
+                      Mat::zeros(3,1,CV_64FC1), intrinsics,
+                      Mat::zeros(4,1,CV_64FC1), realUndistortedPoints);

        Mat undistortedPoints;
        undistortPoints(Mat(projectedPoints), undistortedPoints, intrinsics, distCoeffs);
@ -82,44 +81,43 @@ void CV_UndistortTest::run(int /* start_from */)
        Mat p;
        perspectiveTransform(undistortedPoints, p, intrinsics);
        undistortedPoints = p;
-        double diff = cvtest::norm(Mat(realUndistortedPoints), undistortedPoints, NORM_L2);
-        if (diff > thresh)
-        {
-            ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY);
-            return;
-        }
-        ts->set_failed_test_info(cvtest::TS::OK);
+
+        EXPECT_MAT_NEAR(realUndistortedPoints, undistortedPoints.t(), thresh);
    }
 }

-TEST(Calib3d_Undistort, accuracy) { CV_UndistortTest test; test.safe_run(); }
-
-TEST(Calib3d_Undistort, stop_criteria)
+TEST_F(UndistortPointsTest, stop_criteria)
 {
    Mat cameraMatrix = (Mat_<double>(3,3,CV_64F) << 857.48296979, 0, 968.06224829,
                                                        0, 876.71824265, 556.37145899,
                                                        0, 0, 1);
    Mat distCoeffs = (Mat_<double>(5,1,CV_64F) <<
                      -2.57614020e-01, 8.77086999e-02, -2.56970803e-04, -5.93390389e-04, -1.52194091e-02);
-    RNG rng(2);
-    Point2d pt_distorted(rng.uniform(0.0, 1920.0), rng.uniform(0.0, 1080.0));
+
+    Point2d pt_distorted(theRNG().uniform(0.0, 1920.0), theRNG().uniform(0.0, 1080.0));
+
    std::vector<Point2d> pt_distorted_vec;
    pt_distorted_vec.push_back(pt_distorted);
+
    const double maxError = 1e-6;
    TermCriteria criteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 100, maxError);
+
    std::vector<Point2d> pt_undist_vec;
    undistortPoints(pt_distorted_vec, pt_undist_vec, cameraMatrix, distCoeffs, noArray(), noArray(), criteria);

-    std::vector<Point2d> pt_redistorted_vec;
    std::vector<Point3d> pt_undist_vec_homogeneous;
-    pt_undist_vec_homogeneous.push_back( Point3d(pt_undist_vec[0].x, pt_undist_vec[0].y, 1.0) );
-    projectPoints(pt_undist_vec_homogeneous, Mat::zeros(3,1,CV_64F), Mat::zeros(3,1,CV_64F), cameraMatrix, distCoeffs, pt_redistorted_vec);
+    pt_undist_vec_homogeneous.emplace_back(pt_undist_vec[0].x, pt_undist_vec[0].y, 1.0 );
+
+    std::vector<Point2d> pt_redistorted_vec;
+    projectPoints(pt_undist_vec_homogeneous, Mat::zeros(3,1,CV_64F),
+                  Mat::zeros(3,1,CV_64F), cameraMatrix, distCoeffs, pt_redistorted_vec);
+
    const double obtainedError = sqrt( pow(pt_distorted.x - pt_redistorted_vec[0].x, 2) + pow(pt_distorted.y - pt_redistorted_vec[0].y, 2) );

    ASSERT_LE(obtainedError, maxError);
 }

-TEST(undistortPoints, regression_14583)
+TEST_F(UndistortPointsTest, regression_14583)
 {
    const int col = 720;
    // const int row = 540;
--- a/modules/calib/include/opencv2/calib.hpp
+++ b/modules/calib/include/opencv2/calib.hpp
@ -1469,10 +1469,13 @@ number of points in the view.
@param R Rectification transformation in the object space: 3x3 1-channel, or vector: 3x1/1x3
 1-channel or 1x1 3-channel
@param P New camera intrinsic matrix (3x3) or new projection matrix (3x4)
+@param criteria Termination criteria
@param undistorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
 */
 CV_EXPORTS_W void undistortPoints(InputArray distorted, OutputArray undistorted,
-    InputArray K, InputArray D, InputArray R = noArray(), InputArray P  = noArray());
+    InputArray K, InputArray D, InputArray R = noArray(), InputArray P  = noArray(),
+    TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 10, 1e-8));
+

 /** @brief Computes undistortion and rectification maps for image transform by cv::remap(). If D is empty zero
 distortion is used, if R or P is empty identity matrixes are used.
--- a/modules/calib/perf/perf_undistort.cpp
+++ b/modules/calib/perf/perf_undistort.cpp
@ -0,0 +1,44 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+#include "perf_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+using PerfIntType = perf::TestBaseWithParam<std::tuple<int>>;
+PERF_TEST_P(PerfIntType, fisheye_undistortPoints,
+                                            (testing::Values(1e2, 1e3, 1e4)))
+{
+    const cv::Size imageSize(1280, 800);
+
+    /* Set camera matrix */
+    const cv::Matx33d K(558.478087865323,  0, 620.458515360843,
+                         0, 560.506767351568, 381.939424848348,
+                         0,               0,                1);
+
+    /* Set distortion coefficients */
+    Mat D(1, 4, CV_64F);
+    theRNG().fill(D, RNG::UNIFORM, -1.e-5, 1.e-5);
+
+    int pointsNumber = std::get<0>(GetParam());
+
+    /* Create two-channel points matrix */
+    cv::Mat xy[2] = {};
+    xy[0].create(pointsNumber, 1, CV_64F);
+    theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, imageSize.width); // x
+    xy[1].create(pointsNumber, 1, CV_64F);
+    theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, imageSize.height); // y
+
+    cv::Mat points;
+    merge(xy, 2, points);
+
+    /* Set fixed iteration number to check only c++ code, not algo convergence */
+    TermCriteria termCriteria(TermCriteria::MAX_ITER, 10, 0);
+
+    Mat undistortedPoints;
+    TEST_CYCLE() fisheye::undistortPoints(points, undistortedPoints, K, D, noArray(), noArray(), termCriteria);
+
+    SANITY_CHECK_NOTHING();
+}
+
+}} // namespace
--- a/modules/calib/src/fisheye.cpp
+++ b/modules/calib/src/fisheye.cpp
@ -320,7 +320,8 @@ void cv::fisheye::distortPoints(InputArray undistorted, OutputArray distorted, I
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /// cv::fisheye::undistortPoints

-void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D, InputArray R, InputArray P)
+void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D,
+                                   InputArray R, InputArray P, TermCriteria criteria)
 {
    CV_INSTRUMENT_REGION();

@ -332,6 +333,8 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted
    CV_Assert(R.empty() || R.size() == Size(3, 3) || R.total() * R.channels() == 3);
    CV_Assert(D.total() == 4 && K.size() == Size(3, 3) && (K.depth() == CV_32F || K.depth() == CV_64F));

+    CV_Assert(criteria.isValid());
+
    Vec2d f, c;
    if (K.depth() == CV_32F)
    {
@ -374,6 +377,15 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted
    size_t n = distorted.total();
    int sdepth = distorted.depth();

+    const bool isEps = criteria.type & TermCriteria::EPS;
+
+    /* Define max count for solver iterations */
+    int maxCount = std::numeric_limits<int>::max();
+    if (criteria.type & TermCriteria::MAX_ITER) {
+        maxCount = criteria.maxCount;
+    }
+
+
    for(size_t i = 0; i < n; i++ )
    {
        Vec2d pi = sdepth == CV_32F ? (Vec2d)srcf[i] : srcd[i];  // image point
@ -391,13 +403,11 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted

        double scale = 0.0;

-        if (fabs(theta_d) > 1e-8)
+        if (!isEps || fabs(theta_d) > criteria.epsilon)
        {
            // compensate distortion iteratively

-            const double EPS = 1e-8; // or std::numeric_limits<double>::epsilon();
-
-            for (int j = 0; j < 10; j++)
+            for (int j = 0; j < maxCount; j++)
            {
                double theta2 = theta*theta, theta4 = theta2*theta2, theta6 = theta4*theta2, theta8 = theta6*theta2;
                double k0_theta2 = k[0] * theta2, k1_theta4 = k[1] * theta4, k2_theta6 = k[2] * theta6, k3_theta8 = k[3] * theta8;
@ -405,7 +415,8 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted
                double theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) /
                                   (1 + 3*k0_theta2 + 5*k1_theta4 + 7*k2_theta6 + 9*k3_theta8);
                theta = theta - theta_fix;
-                if (fabs(theta_fix) < EPS)
+
+                if (isEps && (fabs(theta_fix) < criteria.epsilon))
                {
                    converged = true;
                    break;
@ -424,7 +435,7 @@ void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted
        // so we can check whether theta has changed the sign during the optimization
        bool theta_flipped = ((theta_d < 0 && theta > 0) || (theta_d > 0 && theta < 0));

-        if (converged && !theta_flipped)
+        if ((converged || !isEps) && !theta_flipped)
        {
            Vec2d pu = pw * scale; //undistorted point

--- a/modules/calib/test/test_fisheye.cpp
+++ b/modules/calib/test/test_fisheye.cpp
@ -101,6 +101,55 @@ TEST_F(fisheyeTest, projectPoints)
    EXPECT_MAT_NEAR(distorted0, distorted2, 1e-10);
 }

+TEST_F(fisheyeTest, distortUndistortPoints)
+{
+    int width = imageSize.width;
+    int height = imageSize.height;
+
+    /* Create test points */
+    std::vector<cv::Point2d> points0Vector;
+    cv::Mat principalPoints = (cv::Mat_<double>(5, 2) << K(0, 2), K(1, 2), // (cx, cy)
+                                                                    /* Image corners */
+                                                                    0, 0,
+                                                                    0, height,
+                                                                    width, 0,
+                                                                    width, height
+                                                                    );
+
+    /* Random points inside image */
+    cv::Mat xy[2] = {};
+    xy[0].create(100, 1, CV_64F);
+    theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x
+    xy[1].create(100, 1, CV_64F);
+    theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y
+
+    cv::Mat randomPoints;
+    merge(xy, 2, randomPoints);
+
+    cv::Mat points0;
+    cv::vconcat(principalPoints.reshape(2), randomPoints, points0);
+
+    /* Test with random D set */
+    for (size_t i = 0; i < 10; ++i) {
+        cv::Mat D(1, 4, CV_64F);
+        theRNG().fill(D, cv::RNG::UNIFORM, -0.00001, 0.00001);
+
+        /* Distort -> Undistort */
+        cv::Mat distortedPoints;
+        cv::fisheye::distortPoints(points0, distortedPoints, K, D);
+        cv::Mat undistortedPoints;
+        cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, D);
+
+        EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8);
+
+        /* Undistort -> Distort */
+        cv::fisheye::undistortPoints(points0, undistortedPoints, K, D);
+        cv::fisheye::distortPoints(undistortedPoints, distortedPoints, K, D);
+
+        EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8);
+    }
+}
+
 TEST_F(fisheyeTest, undistortImage)
 {
    cv::Matx33d theK = this->K;
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -1739,6 +1739,16 @@ should be done separately if needed.
 */
 CV_EXPORTS_W void transpose(InputArray src, OutputArray dst);

+/** @brief Transpose for n-dimensional matrices.
+ *
+ * @note Input should be continuous single-channel matrix.
+ * @param src input array.
+ * @param order a permutation of [0,1,..,N-1] where N is the number of axes of src.
+ * The i’th axis of dst will correspond to the axis numbered order[i] of the input.
+ * @param dst output array of the same type as src.
+ */
+CV_EXPORTS_W void transposeND(InputArray src, const std::vector<int>& order, OutputArray dst);
+
 /** @brief Performs the matrix transformation of every array element.

 The function cv::transform performs the matrix transformation of every
--- a/modules/core/include/opencv2/core/bindings_utils.hpp
+++ b/modules/core/include/opencv2/core/bindings_utils.hpp
@ -223,6 +223,53 @@ namespace nested {
 CV_WRAP static inline bool testEchoBooleanFunction(bool flag) {
    return flag;
 }
+
+class CV_EXPORTS_W CV_WRAP_AS(ExportClassName) OriginalClassName
+{
+public:
+    struct CV_EXPORTS_W_SIMPLE Params
+    {
+        CV_PROP_RW int int_value;
+        CV_PROP_RW float float_value;
+
+        CV_WRAP explicit Params(int int_param = 123, float float_param = 3.5f)
+        {
+            int_value = int_param;
+            float_value = float_param;
+        }
+    };
+
+    explicit OriginalClassName(const OriginalClassName::Params& params = OriginalClassName::Params())
+    {
+        params_ = params;
+    }
+
+    CV_WRAP int getIntParam() const
+    {
+        return params_.int_value;
+    }
+
+    CV_WRAP float getFloatParam() const
+    {
+        return params_.float_value;
+    }
+
+    CV_WRAP static std::string originalName()
+    {
+        return "OriginalClassName";
+    }
+
+    CV_WRAP static Ptr<OriginalClassName>
+    create(const OriginalClassName::Params& params = OriginalClassName::Params())
+    {
+        return makePtr<OriginalClassName>(params);
+    }
+
+private:
+    OriginalClassName::Params params_;
+};
+
+typedef OriginalClassName::Params OriginalClassName_Params;
 } // namespace nested

 namespace fs {
--- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp
@ -1037,12 +1037,12 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \
    return (scalartype)msa_sum_##suffix(a.val); \
 }

-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32)
 OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32)

 inline uint64 v_reduce_sum(const v_uint64x2& a)
--- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp
@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,

 inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
 {
-    return v_int16x8(vcombine_s16(
-                                  vshrn_n_s32(vmull_s16( vget_low_s16(a.val),  vget_low_s16(b.val)), 16),
-                                  vshrn_n_s32(
 #if CV_NEON_AARCH64
-                                    vmull_high_s16(a.val, b.val)
+    int32x4_t c = vmull_high_s16(a.val, b.val);
 #else // #if CV_NEON_AARCH64
-                                    vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val))
+    int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
 #endif // #if CV_NEON_AARCH64
-                                    , 16)
+    return v_int16x8(vcombine_s16(
+                                  vshrn_n_s32(vmull_s16( vget_low_s16(a.val),  vget_low_s16(b.val)), 16),
+                                  vshrn_n_s32(c, 16)
                                 ));
 }
 inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
 {
-    return v_uint16x8(vcombine_u16(
-                                   vshrn_n_u32(vmull_u16( vget_low_u16(a.val),  vget_low_u16(b.val)), 16),
-                                   vshrn_n_u32(
 #if CV_NEON_AARCH64
-                                    vmull_high_u16(a.val, b.val)
+    uint32x4_t c = vmull_high_u16(a.val, b.val);
 #else // #if CV_NEON_AARCH64
-                                    vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val))
+    uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
 #endif // #if CV_NEON_AARCH64
-                                    , 16)
+    return v_uint16x8(vcombine_u16(
+                                   vshrn_n_u32(vmull_u16( vget_low_u16(a.val),  vget_low_u16(b.val)), 16),
+                                   vshrn_n_u32(c, 16)
                                  ));
 }

@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a)
 {
    float32x4_t a_ = a.val;
    int32x4_t result;
+#if defined _MSC_VER
+    result = vcvtnq_s32_f32(a_);
+#else
    __asm__ ("fcvtns %0.4s, %1.4s"
             : "=w"(result)
             : "w"(a_)
             : /* No clobbers */);
+#endif
    return v_int32x4(result);
 }
 #else
--- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
@ -230,6 +230,7 @@ inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src, size_t vl)

 //////////// Types ////////////

+#ifndef __clang__
 struct v_uint8x16
 {
    typedef uchar lane_type;
@ -531,7 +532,358 @@ struct v_float64x2
    double val[2];
 };
 #endif
+#else
+struct v_uint8x16
+{
+    typedef uchar lane_type;
+    enum { nlanes = 16 };
+
+    v_uint8x16() {}
+    explicit v_uint8x16(vuint8m1_t v)
+    {
+        *pval = v;
+    }
+    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
+               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
+    {
+        uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        *pval = vle8_v_u8m1(v, nlanes);
+    }
+    operator vuint8m1_t() const
+    {
+        return *pval;
+    }
+    uchar get0() const
+    {
+        return vmv_x(*pval);
+    }
+    inline v_uint8x16& operator=(const v_uint8x16& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_uint8x16(const v_uint8x16& vec) {
+        *pval = *(vec.pval);
+    }
+    uchar val[16];
+    vuint8m1_t* pval = (vuint8m1_t*)val;
+};
+
+struct v_int8x16
+{
+    typedef schar lane_type;
+    enum { nlanes = 16 };
+
+    v_int8x16() {}
+    explicit v_int8x16(vint8m1_t v)
+    {
+        *pval = v;
+    }
+    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
+               schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
+    {
+        schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
+        *pval = vle8_v_i8m1(v, nlanes);
+    }
+    operator vint8m1_t() const
+    {
+        return *pval;
+    }
+    schar get0() const
+    {
+        return vmv_x(*pval);
+    }
+    inline v_int8x16& operator=(const v_int8x16& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_int8x16(const v_int8x16& vec) {
+        *pval = *(vec.pval);
+    }
+    schar val[16];
+    vint8m1_t* pval = (vint8m1_t*)val;
+};
+
+struct v_uint16x8
+{
+    typedef ushort lane_type;
+    enum { nlanes = 8 };
+
+    v_uint16x8() {}
+    explicit v_uint16x8(vuint16m1_t v)
+    {
+        *pval = v;
+    }
+    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
+    {
+        ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        *pval = vle16_v_u16m1(v, nlanes);
+    }
+    operator vuint16m1_t() const
+    {
+        return *pval;
+    }
+    ushort get0() const
+    {
+        return vmv_x(*pval);
+    }
+
+    inline v_uint16x8& operator=(const v_uint16x8& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_uint16x8(const v_uint16x8& vec) {
+        *pval = *(vec.pval);
+    }
+    ushort val[8];
+    vuint16m1_t* pval = (vuint16m1_t*)val;
+};
+
+struct v_int16x8
+{
+    typedef short lane_type;
+    enum { nlanes = 8 };
+
+    v_int16x8() {}
+    explicit v_int16x8(vint16m1_t v)
+    {
+        *pval = v;
+    }
+    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
+    {
+        short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
+        *pval = vle16_v_i16m1(v, nlanes);
+    }
+    operator vint16m1_t() const
+    {
+        return *pval;
+    }
+    short get0() const
+    {
+        return vmv_x(*pval);
+    }
+
+    inline v_int16x8& operator=(const v_int16x8& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_int16x8(const v_int16x8& vec) {
+        *pval = *(vec.pval);
+    }
+    short val[8];
+    vint16m1_t* pval = (vint16m1_t*)val;
+};
+
+struct v_uint32x4
+{
+    typedef unsigned lane_type;
+    enum { nlanes = 4 };
+
+    v_uint32x4() {}
+    explicit v_uint32x4(vuint32m1_t v)
+    {
+        *pval = v;
+    }
+    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+    {
+        unsigned v[] = {v0, v1, v2, v3};
+        *pval = vle32_v_u32m1(v, nlanes);
+    }
+    operator vuint32m1_t() const
+    {
+        return *pval;
+    }
+    unsigned get0() const
+    {
+        return vmv_x(*pval);
+    }

+    inline v_uint32x4& operator=(const v_uint32x4& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_uint32x4(const v_uint32x4& vec) {
+        *pval = *(vec.pval);
+    }
+    unsigned val[4];
+    vuint32m1_t* pval = (vuint32m1_t*)val;
+};
+
+struct v_int32x4
+{
+    typedef int lane_type;
+    enum { nlanes = 4 };
+
+    v_int32x4() {}
+    explicit v_int32x4(vint32m1_t v)
+    {
+        *pval = v;
+    }
+    v_int32x4(int v0, int v1, int v2, int v3)
+    {
+        int v[] = {v0, v1, v2, v3};
+        *pval = vle32_v_i32m1(v, nlanes);
+    }
+    operator vint32m1_t() const
+    {
+        return *pval;
+    }
+    int get0() const
+    {
+        return vmv_x(*pval);
+    }
+
+    inline v_int32x4& operator=(const v_int32x4& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_int32x4(const v_int32x4& vec) {
+        *pval = *(vec.pval);
+    }
+    int val[4];
+    vint32m1_t* pval = (vint32m1_t*)val;
+};
+
+struct v_float32x4
+{
+    typedef float lane_type;
+    enum { nlanes = 4 };
+
+    v_float32x4() {}
+    explicit v_float32x4(vfloat32m1_t v)
+    {
+        *pval = v;
+    }
+    v_float32x4(float v0, float v1, float v2, float v3)
+    {
+        float v[] = {v0, v1, v2, v3};
+        *pval = vle32_v_f32m1(v, nlanes);
+    }
+    operator vfloat32m1_t() const
+    {
+        return *pval;
+    }
+    float get0() const
+    {
+        return vfmv_f(*pval);
+    }
+    inline v_float32x4& operator=(const v_float32x4& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_float32x4(const v_float32x4& vec) {
+        *pval = *(vec.pval);
+    }
+    float val[4];
+    vfloat32m1_t* pval = (vfloat32m1_t*)val;
+};
+
+struct v_uint64x2
+{
+    typedef uint64 lane_type;
+    enum { nlanes = 2 };
+
+    v_uint64x2() {}
+    explicit v_uint64x2(vuint64m1_t v)
+    {
+        *pval = v;
+    }
+    v_uint64x2(uint64 v0, uint64 v1)
+    {
+        uint64 v[] = {v0, v1};
+        *pval = vle64_v_u64m1(v, nlanes);
+    }
+    operator vuint64m1_t() const
+    {
+        return *pval;
+    }
+    uint64 get0() const
+    {
+        return vmv_x(*pval);
+    }
+
+    inline v_uint64x2& operator=(const v_uint64x2& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_uint64x2(const v_uint64x2& vec) {
+        *pval = *(vec.pval);
+    }
+    uint64 val[2];
+    vuint64m1_t* pval = (vuint64m1_t*)val;
+};
+
+struct v_int64x2
+{
+    typedef int64 lane_type;
+    enum { nlanes = 2 };
+
+    v_int64x2() {}
+    explicit v_int64x2(vint64m1_t v)
+    {
+        *pval = v;
+    }
+    v_int64x2(int64 v0, int64 v1)
+    {
+        int64 v[] = {v0, v1};
+        *pval = vle64_v_i64m1(v, nlanes);
+    }
+    operator vint64m1_t() const
+    {
+        return *pval;
+    }
+    int64 get0() const
+    {
+        return vmv_x(*pval);
+    }
+
+    inline v_int64x2& operator=(const v_int64x2& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_int64x2(const v_int64x2& vec) {
+        *pval = *(vec.pval);
+    }
+    int64 val[2];
+    vint64m1_t* pval = (vint64m1_t*)val;
+};
+
+#if CV_SIMD128_64F
+struct v_float64x2
+{
+    typedef double lane_type;
+    enum { nlanes = 2 };
+
+    v_float64x2() {}
+    explicit v_float64x2(vfloat64m1_t v)
+    {
+        *pval = v;
+    }
+    v_float64x2(double v0, double v1)
+    {
+        double v[] = {v0, v1};
+        *pval = vle64_v_f64m1(v, nlanes);
+    }
+    operator vfloat64m1_t() const
+    {
+        return *pval;
+    }
+    double get0() const
+    {
+        return vfmv_f(*pval);
+    }
+
+    inline v_float64x2& operator=(const v_float64x2& vec) {
+        *pval = *(vec.pval);
+        return *this;
+    }
+    inline v_float64x2(const v_float64x2& vec) {
+        *pval = *(vec.pval);
+    }
+    double val[2];
+    vfloat64m1_t* pval = (vfloat64m1_t*)val;
+};
+#endif // CV_SIMD128_64F
+#endif // __clang__

 //////////// Initial ////////////

@ -1819,6 +2171,7 @@ inline v_float32x4 v_cvt_f32(const v_int32x4& a)
 }

 #if CV_SIMD128_64F
+#ifndef __clang__
 inline v_float32x4 v_cvt_f32(const v_float64x2& a)
 {
    double arr[4] = {a.val[0], a.val[1], 0, 0};
@ -1832,6 +2185,18 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
    vfloat64m2_t tmp = vle64_v_f64m2(arr, 4);
    return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4));
 }
+#else
+inline v_float32x4 v_cvt_f32(const v_float64x2& a)
+{
+    vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4);
+    return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(zero, 0, a), 4));
+}
+inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b)
+{
+    vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a);
+    return v_float32x4(vfncvt_f_f_w_f32m1(vset_v_f64m1_f64m2(dst, 1, b), 4));
+}
+#endif

 inline v_float64x2 v_cvt_f64(const v_int32x4& a)
 {
@ -2351,6 +2716,7 @@ OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64)

 //////////// SignMask ////////////

+#ifndef __clang__
 #define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, vl, shift) \
 inline int v_signmask(const _Tpvec& a) \
 { \
@ -2381,6 +2747,36 @@ inline int v_signmask(const v_float64x2& a)
 { return v_signmask(v_reinterpret_as_u64(a)); }
 #endif

+#else
+#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, width, vl) \
+inline int v_signmask(const _Tpvec& a) \
+{ \
+    uint8_t ans[16] = {0};\
+    vsm(ans, vmslt(a, 0, vl), vl);\
+    return reinterpret_cast<int*>(ans)[0];\
+}
+
+OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int8x16, 8, 16)
+OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int16x8, 16, 8)
+OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int32x4, 32, 4)
+OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_int64x2, 64, 2)
+
+inline int v_signmask(const v_uint8x16& a)
+{ return v_signmask(v_reinterpret_as_s8(a)); }
+inline int v_signmask(const v_uint16x8& a)
+{ return v_signmask(v_reinterpret_as_s16(a)); }
+inline int v_signmask(const v_uint32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_float32x4& a)
+{ return v_signmask(v_reinterpret_as_s32(a)); }
+inline int v_signmask(const v_uint64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+#if CV_SIMD128_64F
+inline int v_signmask(const v_float64x2& a)
+{ return v_signmask(v_reinterpret_as_s64(a)); }
+#endif
+
+#endif

 //////////// Scan forward ////////////

@ -2520,6 +2916,7 @@ inline v_int32x4 v_trunc(const v_float32x4& a)
    return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a, 4));
 }
 #if CV_SIMD128_64F
+#ifndef __clang__
 inline v_int32x4 v_round(const v_float64x2& a)
 {
    double arr[4] = {a.val[0], a.val[1], 0, 0};
@ -2554,6 +2951,42 @@ inline v_int32x4 v_trunc(const v_float64x2& a)
    vfloat64m2_t tmp = vle64_v_f64m2(arr, 4);
    return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp, 4));
 }
+
+#else
+inline v_int32x4 v_round(const v_float64x2& a)
+{
+    vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4);
+    return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4));
+}
+
+inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b)
+{
+    vfloat64m2_t dst = vlmul_ext_v_f64m1_f64m2(a);
+    return v_int32x4(vfncvt_x_f_w_i32m1(vset_v_f64m1_f64m2(dst, 1, b), 4));
+}
+
+inline v_int32x4 v_floor(const v_float64x2& a)
+{
+    vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4);
+    dst = vset_v_f64m1_f64m2(dst, 0, a);
+    dst = vfsub_vf_f64m2(dst, 0.5, 2);
+    return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4));
+}
+
+inline v_int32x4 v_ceil(const v_float64x2& a)
+{
+    vfloat64m2_t dst = vfmv_v_f_f64m2(0, 4);
+    dst = vset_v_f64m1_f64m2(dst, 0, a);
+    dst = vfadd_vf_f64m2(dst, 0.5, 2);
+    return v_int32x4(vfncvt_x_f_w_i32m1(dst, 4));
+}
+
+inline v_int32x4 v_trunc(const v_float64x2& a)
+{
+    vfloat64m2_t zero = vfmv_v_f_f64m2(0, 4);
+    return v_int32x4(vfncvt_rtz_x_f_w_i32m1(vset_v_f64m1_f64m2(zero, 0, a), 4));
+}
+#endif
 #endif


--- a/modules/core/include/opencv2/core/hal/msa_macros.h
+++ b/modules/core/include/opencv2/core/hal/msa_macros.h
@ -719,7 +719,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
  v2i64 _c;                                     \
  _b = __builtin_msa_hadd_s_w(__a, __a);        \
  _c = __builtin_msa_hadd_s_d(_b, _b);          \
-  (int16_t)(_c[0] + _c[1]);                     \
+  (int32_t)(_c[0] + _c[1]);                     \
 })


@ -736,7 +736,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
 ({                                             \
  v2i64 _b;                                    \
  _b = __builtin_msa_hadd_s_d(__a, __a);       \
-  (int32_t)(_b[0] + _b[1]);                    \
+  (int64_t)(_b[0] + _b[1]);                    \
 })

 /* uint8_t msa_sum_u8(v16u8 __a)*/
@ -756,7 +756,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
  v4i32 _c32;                                    \
  _b16 = __builtin_msa_hadd_s_h(__a, __a);       \
  _c32 = __builtin_msa_hadd_s_w(_b16, _b16);         \
-  (int8_t)msa_sum_s32(_c32);                     \
+  (int16_t)msa_sum_s32(_c32);                     \
 })

 /* float msa_sum_f32(v4f32 __a)*/
--- a/modules/core/include/opencv2/core/matx.hpp
+++ b/modules/core/include/opencv2/core/matx.hpp
@ -382,6 +382,14 @@ public:
    Vec(const Vec<_Tp, cn>& v);

    static Vec all(_Tp alpha);
+    static Vec ones();
+    static Vec randn(_Tp a, _Tp b);
+    static Vec randu(_Tp a, _Tp b);
+    static Vec zeros();
+#ifdef CV_CXX11
+    static Vec diag(_Tp alpha) = delete;
+    static Vec eye() = delete;
+#endif

    //! per-element multiplication
    Vec mul(const Vec<_Tp, cn>& v) const;
@ -1061,6 +1069,18 @@ Vec<_Tp, cn> Vec<_Tp, cn>::all(_Tp alpha)
    return v;
 }

+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::ones()
+{
+    return Vec::all(1);
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::zeros()
+{
+    return Vec::all(0);
+}
+
 template<typename _Tp, int cn> inline
 Vec<_Tp, cn> Vec<_Tp, cn>::mul(const Vec<_Tp, cn>& v) const
 {
--- a/modules/core/include/opencv2/core/operations.hpp
+++ b/modules/core/include/opencv2/core/operations.hpp
@ -230,6 +230,22 @@ Matx<_Tp,m,n> Matx<_Tp,m,n>::randn(_Tp a, _Tp b)
    return M;
 }

+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::randu(_Tp a, _Tp b)
+{
+    Vec<_Tp,cn> V;
+    cv::randu(V, Scalar(a), Scalar(b));
+    return V;
+}
+
+template<typename _Tp, int cn> inline
+Vec<_Tp, cn> Vec<_Tp, cn>::randn(_Tp a, _Tp b)
+{
+    Vec<_Tp,cn> V;
+    cv::randn(V, Scalar(a), Scalar(b));
+    return V;
+}
+
 template<typename _Tp, int m, int n> inline
 Matx<_Tp, n, m> Matx<_Tp, m, n>::inv(int method, bool *p_is_ok /*= NULL*/) const
 {
--- a/modules/core/include/opencv2/core/vsx_utils.hpp
+++ b/modules/core/include/opencv2/core/vsx_utils.hpp
@ -324,6 +324,7 @@ VSX_IMPL_1RG(vec_udword2, vec_float4,  xvcvspuxds, vec_ctulo)
 #define VSX_IMPL_CONVERT(rt, rg, fnm) \
 VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); }

+#ifndef vec_permi
 #if __clang_major__ < 5
 // implement vec_permi in a dirty way
 #   define VSX_IMPL_CLANG_4_PERMI(Tvec)                                                 \
@ -351,12 +352,14 @@ VSX_FINLINE(rt) fnm(const rg& a) { return __builtin_convertvector(a, rt); }
 // vec_xxpermdi is missing little-endian supports in clang 4 just like gcc4
 #   define vec_permi(a, b, c) vec_xxpermdi(b, a, (3 ^ (((c) & 1) << 1 | (c) >> 1)))
 #endif // __clang_major__ < 5
+#endif

 // shift left double by word immediate
 #ifndef vec_sldw
 #   define vec_sldw vec_xxsldwi
 #endif

+#if __clang_major__ < 13
 // Implement vec_rsqrt since clang only supports vec_rsqrte
 #ifndef vec_rsqrt
    VSX_FINLINE(vec_float4) vec_rsqrt(const vec_float4& a)
@ -380,6 +383,7 @@ VSX_FINLINE(vec_udword2) vec_promote(unsigned long long a, int b)
    ret[b & 1] = a;
    return ret;
 }
+#endif

 // vec_popcnt should return unsigned but clang has different thought just like gcc in vec_vpopcnt
 #define VSX_IMPL_POPCNTU(Tvec, Tvec2, ucast)   \
--- a/modules/core/misc/objc/gen_dict.json
+++ b/modules/core/misc/objc/gen_dict.json
@ -78,6 +78,26 @@
            "(void)divide:(double)scale src2:(Mat*)src2 dst:(Mat*)dst dtype:(int)dtype" : { "src2" : {"name" : "src"} }
        }
    },
+    "header_fix" : {
+        "Core": {
+            "pow" : {
+                "prolog" : "#pragma push_macro(\"pow\")\n#undef pow",
+                "epilog" : "#pragma pop_macro(\"pow\")"
+            },
+            "sqrt" : {
+                "prolog" : "#pragma push_macro(\"sqrt\")\n#undef sqrt",
+                "epilog" : "#pragma pop_macro(\"sqrt\")"
+            },
+            "exp" : {
+                "prolog" : "#pragma push_macro(\"exp\")\n#undef exp",
+                "epilog" : "#pragma pop_macro(\"exp\")"
+            },
+            "log" : {
+                "prolog" : "#pragma push_macro(\"log\")\n#undef log",
+                "epilog" : "#pragma pop_macro(\"log\")"
+            }
+        }
+    },
    "type_dict" : {
        "Algorithm": {
            "objc_type": "Algorithm*"
--- a/modules/core/perf/perf_arithm.cpp
+++ b/modules/core/perf/perf_arithm.cpp
@ -1,4 +1,5 @@
 #include "perf_precomp.hpp"
+#include <numeric>

 namespace opencv_test
 {
@ -393,6 +394,29 @@ PERF_TEST_P_(BinaryOpTest, reciprocal)
    SANITY_CHECK_NOTHING();
 }

+
+PERF_TEST_P_(BinaryOpTest, transposeND)
+{
+    Size sz = get<0>(GetParam());
+    int type = get<1>(GetParam());
+    cv::Mat a = Mat(sz, type).reshape(1);
+
+    std::vector<int> order(a.dims);
+    std::iota(order.begin(), order.end(), 0);
+    std::reverse(order.begin(), order.end());
+
+    std::vector<int> new_sz(a.dims);
+    std::copy(a.size.p, a.size.p + a.dims, new_sz.begin());
+    std::reverse(new_sz.begin(), new_sz.end());
+    cv::Mat b = Mat(new_sz, type);
+
+    declare.in(a,WARMUP_RNG).out(b);
+
+    TEST_CYCLE() cv::transposeND(a, order, b);
+
+    SANITY_CHECK_NOTHING();
+}
+
 INSTANTIATE_TEST_CASE_P(/*nothing*/ , BinaryOpTest,
    testing::Combine(
        testing::Values(szVGA, sz720p, sz1080p),
--- a/modules/core/src/directx.cpp
+++ b/modules/core/src/directx.cpp
@ -1385,6 +1385,7 @@ void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst
    OpenCL_D3D11_NV* impl_nv = ctx.getUserContext<OpenCL_D3D11_NV>().get();
    if (impl_nv) {
        __convertFromD3D11Texture2DNV(pD3D11Texture2D,dst);
+        return;
    }
 #endif
    OpenCL_D3D11* impl = ctx.getUserContext<OpenCL_D3D11>().get();
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@ -270,6 +270,9 @@ void cartToPolar( InputArray src1, InputArray src2,
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() &&
+              src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj());
+
    CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
            ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))

@ -564,6 +567,9 @@ void polarToCart( InputArray src1, InputArray src2,
 {
    CV_INSTRUMENT_REGION();

+    CV_Assert(src1.getObj() != dst1.getObj() && src1.getObj() != dst2.getObj() &&
+              src2.getObj() != dst1.getObj() && src2.getObj() != dst2.getObj());
+
    int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));

--- a/modules/core/src/matrix.cpp
+++ b/modules/core/src/matrix.cpp
@ -664,6 +664,8 @@ void Mat::create(int d, const int* _sizes, int _type)

    if( data && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
    {
+        if ( dims == 1 && (d == 1 && _sizes[0] == size[0]) )
+            return;
        if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
            return;
        for( i = 0; i < d; i++ )
--- a/modules/core/src/matrix_transform.cpp
+++ b/modules/core/src/matrix_transform.cpp
@ -4,6 +4,7 @@

 #include "precomp.hpp"
 #include "opencl_kernels_core.hpp"
+#include "opencv2/core/detail/dispatch_helper.impl.hpp"

 namespace cv {

@ -282,6 +283,72 @@ void transpose( InputArray _src, OutputArray _dst )
 }


+void transposeND(InputArray src_, const std::vector<int>& order, OutputArray dst_)
+{
+    Mat inp = src_.getMat();
+    CV_Assert(inp.isContinuous());
+    CV_CheckEQ(inp.channels(), 1, "Input array should be single-channel");
+    CV_CheckEQ(order.size(), static_cast<size_t>(inp.dims), "Number of dimensions shouldn't change");
+
+    auto order_ = order;
+    std::sort(order_.begin(), order_.end());
+    for (size_t i = 0; i < order_.size(); ++i)
+    {
+        CV_CheckEQ(static_cast<size_t>(order_[i]), i, "New order should be a valid permutation of the old one");
+    }
+
+    std::vector<int> newShape(order.size());
+    for (size_t i = 0; i < order.size(); ++i)
+    {
+        newShape[i] = inp.size[order[i]];
+    }
+
+    dst_.create(static_cast<int>(newShape.size()), newShape.data(), inp.type());
+    Mat out = dst_.getMat();
+    CV_Assert(out.isContinuous());
+    CV_Assert(inp.data != out.data);
+
+    int continuous_idx = 0;
+    for (int i = static_cast<int>(order.size()) - 1; i >= 0; --i)
+    {
+        if (order[i] != i)
+        {
+            continuous_idx = i + 1;
+            break;
+        }
+    }
+
+    size_t continuous_size = continuous_idx == 0 ? out.total() : out.step1(continuous_idx - 1);
+    size_t outer_size = out.total() / continuous_size;
+
+    std::vector<size_t> steps(order.size());
+    for (int i = 0; i < static_cast<int>(steps.size()); ++i)
+    {
+        steps[i] = inp.step1(order[i]);
+    }
+
+    auto* src = inp.ptr<const unsigned char>();
+    auto* dst = out.ptr<unsigned char>();
+
+    size_t src_offset = 0;
+    size_t es = out.elemSize();
+    for (size_t i = 0; i < outer_size; ++i)
+    {
+        std::memcpy(dst, src + es * src_offset, es * continuous_size);
+        dst += es * continuous_size;
+        for (int j = continuous_idx - 1; j >= 0; --j)
+        {
+            src_offset += steps[j];
+            if ((src_offset / steps[j]) % out.size[j] != 0)
+            {
+                break;
+            }
+            src_offset -= steps[j] * out.size[j];
+        }
+    }
+}
+
+
 #if CV_SIMD128
 template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
 {
--- a/modules/core/src/persistence.cpp
+++ b/modules/core/src/persistence.cpp
@ -817,7 +817,7 @@ char *FileStorage::Impl::gets(size_t maxCount) {
        int delta = (int) strlen(ptr);
        ofs += delta;
        maxCount -= delta;
-        if (ptr[delta - 1] == '\n' || maxCount == 0)
+        if (delta == 0 || ptr[delta - 1] == '\n' || maxCount == 0)
            break;
        if (delta == count)
            buffer.resize((size_t) (buffer.size() * 1.5));
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -615,6 +615,9 @@ struct HWFeatures
    #if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800)
        have[CV_CPU_NEON] = true;
    #endif
+    #if defined _M_ARM64
+        have[CV_CPU_NEON] = true;
+    #endif
    #ifdef __riscv_vector
        have[CV_CPU_RISCVV] = true;
    #endif
--- a/modules/core/src/va_wrapper.impl.hpp
+++ b/modules/core/src/va_wrapper.impl.hpp
@ -15,18 +15,33 @@ typedef VAStatus (*FN_vaDestroyImage)(VADisplay dpy, VAImageID image);
 typedef VAStatus (*FN_vaMapBuffer)(VADisplay dpy, VABufferID buf_id, void **pbuf);
 typedef VAStatus (*FN_vaSyncSurface)(VADisplay dpy, VASurfaceID render_target);
 typedef VAStatus (*FN_vaUnmapBuffer)(VADisplay dpy, VABufferID buf_id);
+typedef int (*FN_vaMaxNumImageFormats)(VADisplay dpy);
+typedef VAStatus (*FN_vaQueryImageFormats)(VADisplay dpy, VAImageFormat *format_list, int *num_formats);
+typedef VAStatus (*FN_vaCreateImage)(VADisplay dpy, VAImageFormat *format, int width, int height, VAImage *image);
+typedef VAStatus (*FN_vaPutImage)(VADisplay dpy, VASurfaceID surface, VAImageID image, int src_x, int src_y, unsigned int src_width, unsigned int src_height, int dest_x, int dest_y, unsigned int dest_width, unsigned int dest_height);
+typedef VAStatus (*FN_vaGetImage)(VADisplay dpy, VASurfaceID surface, int x, int y, unsigned int width, unsigned int height, VAImageID image);

 static FN_vaDeriveImage fn_vaDeriveImage = NULL;
 static FN_vaDestroyImage fn_vaDestroyImage = NULL;
 static FN_vaMapBuffer fn_vaMapBuffer = NULL;
 static FN_vaSyncSurface fn_vaSyncSurface = NULL;
 static FN_vaUnmapBuffer fn_vaUnmapBuffer = NULL;
+static FN_vaMaxNumImageFormats fn_vaMaxNumImageFormats = NULL;
+static FN_vaQueryImageFormats fn_vaQueryImageFormats = NULL;
+static FN_vaCreateImage fn_vaCreateImage = NULL;
+static FN_vaPutImage fn_vaPutImage = NULL;
+static FN_vaGetImage fn_vaGetImage = NULL;

 #define vaDeriveImage fn_vaDeriveImage
 #define vaDestroyImage fn_vaDestroyImage
 #define vaMapBuffer fn_vaMapBuffer
 #define vaSyncSurface fn_vaSyncSurface
 #define vaUnmapBuffer fn_vaUnmapBuffer
+#define vaMaxNumImageFormats fn_vaMaxNumImageFormats
+#define vaQueryImageFormats fn_vaQueryImageFormats
+#define vaCreateImage fn_vaCreateImage
+#define vaPutImage fn_vaPutImage
+#define vaGetImage fn_vaGetImage


 static std::shared_ptr<cv::plugin::impl::DynamicLib> loadLibVA()
@ -76,6 +91,11 @@ static void init_libva()
        VA_LOAD_SYMBOL(vaMapBuffer);
        VA_LOAD_SYMBOL(vaSyncSurface);
        VA_LOAD_SYMBOL(vaUnmapBuffer);
+        VA_LOAD_SYMBOL(vaMaxNumImageFormats);
+        VA_LOAD_SYMBOL(vaQueryImageFormats);
+        VA_LOAD_SYMBOL(vaCreateImage);
+        VA_LOAD_SYMBOL(vaPutImage);
+        VA_LOAD_SYMBOL(vaGetImage);
        initialized = true;
    }
    if (!library)
--- a/modules/core/test/test_arithm.cpp
+++ b/modules/core/test/test_arithm.cpp
@ -3,6 +3,7 @@
 // of this distribution and at http://opencv.org/license.html.
 #include "test_precomp.hpp"
 #include "ref_reduce_arg.impl.hpp"
+#include <algorithm>

 namespace opencv_test { namespace {

@ -2128,6 +2129,79 @@ TEST(Core_minMaxIdx, regression_9207_1)
 }


+class TransposeND : public testing::TestWithParam< tuple<std::vector<int>, perf::MatType> >
+{
+public:
+    std::vector<int> m_shape;
+    int m_type;
+
+    void SetUp()
+    {
+        std::tie(m_shape, m_type) = GetParam();
+    }
+};
+
+
+TEST_P(TransposeND, basic)
+{
+    Mat inp(m_shape, m_type);
+    randu(inp, 0, 255);
+
+    std::vector<int> order(m_shape.size());
+    std::iota(order.begin(), order.end(), 0);
+    auto transposer = [&order] (const std::vector<int>& id)
+    {
+        std::vector<int> ret(id.size());
+        for (size_t i = 0; i < id.size(); ++i)
+        {
+            ret[i] = id[order[i]];
+        }
+        return ret;
+    };
+    auto advancer = [&inp] (std::vector<int>& id)
+    {
+        for (int j = static_cast<int>(id.size() - 1); j >= 0; --j)
+        {
+            ++id[j];
+            if (id[j] != inp.size[j])
+            {
+                break;
+            }
+            id[j] = 0;
+        }
+    };
+
+    do
+    {
+        Mat out;
+        cv::transposeND(inp, order, out);
+        std::vector<int> id(order.size());
+        for (size_t i = 0; i < inp.total(); ++i)
+        {
+            auto new_id = transposer(id);
+            switch (inp.type())
+            {
+            case CV_8UC1:
+                ASSERT_EQ(inp.at<uint8_t>(id.data()), out.at<uint8_t>(new_id.data()));
+                break;
+            case CV_32FC1:
+                ASSERT_EQ(inp.at<float>(id.data()), out.at<float>(new_id.data()));
+                break;
+            default:
+                FAIL() << "Unsupported type: " << inp.type();
+            }
+            advancer(id);
+        }
+    } while (std::next_permutation(order.begin(), order.end()));
+}
+
+
+INSTANTIATE_TEST_CASE_P(Arithm, TransposeND, testing::Combine(
+    testing::Values(std::vector<int>{2, 3, 4}, std::vector<int>{5, 10}),
+    testing::Values(perf::MatType(CV_8UC1), CV_32FC1)
+));
+
+
 TEST(Core_minMaxIdx, regression_9207_2)
 {
    const int rows = 13;
@ -2546,5 +2620,36 @@ TEST(Core_Magnitude, regression_19506)
    }
 }

+TEST(Core_CartPolar, inplace)
+{
+    RNG& rng = TS::ptr()->get_rng();
+    cv::Mat1d A[2] = {cv::Mat1d(10, 10), cv::Mat1d(10, 10)};
+    cv::Mat1d B[2], C[2];
+    cv::UMat uA[2];
+
+    for(int i = 0; i < 2; ++i)
+    {
+        cvtest::randUni(rng, A[i], Scalar::all(-1000), Scalar::all(1000));
+        A[i].copyTo(uA[i]);
+    }
+
+    // Reverse
+    cv::cartToPolar(A[0], A[1], B[0], B[1], false);
+    cv::polarToCart(B[0], B[1], C[0], C[1], false);
+    EXPECT_MAT_NEAR(A[0], C[0], 2);
+    EXPECT_MAT_NEAR(A[1], C[1], 2);
+
+    // Inplace
+    EXPECT_THROW(cv::polarToCart(B[0], B[1], B[0], B[1], false), cv::Exception);
+    EXPECT_THROW(cv::polarToCart(B[0], B[1], B[1], B[0], false), cv::Exception);
+    EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[0], A[1], false), cv::Exception);
+    EXPECT_THROW(cv::cartToPolar(A[0], A[1], A[1], A[0], false), cv::Exception);
+    // Inplace OCL
+    EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
+    EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[1], uA[0]), cv::Exception);
+    EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
+    EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception);
+
+}

 }} // namespace
--- a/modules/core/test/test_io.cpp
+++ b/modules/core/test/test_io.cpp
@ -3,6 +3,8 @@
 // of this distribution and at http://opencv.org/license.html.
 #include "test_precomp.hpp"

+#include <fstream>
+
 namespace opencv_test { namespace {

 static SparseMat cvTsGetRandomSparseMat(int dims, const int* sz, int type,
@ -799,6 +801,25 @@ TEST(Core_InputOutput, filestorage_base64_basic_memory_JSON)
    test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".json", true, true);
 }

+// issue #21851
+TEST(Core_InputOutput, filestorage_heap_overflow)
+{
+    const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
+    CV_Assert(test_info);
+
+    std::string name = std::string(test_info->test_case_name()) + "--" + test_info->name();
+    const char data[] = {0x00, 0x2f, 0x4a, 0x4a, 0x50, 0x4a, 0x4a };
+
+    std::ofstream file;
+    file.open(name, std::ios_base::binary);
+    assert(file.is_open());
+
+    file.write(data, sizeof(data));
+    file.close();
+
+    // This just shouldn't segfault, otherwise it's fine
+    EXPECT_ANY_THROW(FileStorage(name, FileStorage::READ));
+}

 TEST(Core_InputOutput, filestorage_base64_valid_call)
 {
--- a/modules/core/test/test_mat.cpp
+++ b/modules/core/test/test_mat.cpp
@ -2370,6 +2370,18 @@ TEST(Mat, ptrVecni_20044)
    EXPECT_EQ(int(6), *(ci));
 }

+
+TEST(Mat, VecMatx_4650)
+{
+  // Makes sure the following compiles.
+  cv::Vec3b a;
+  a = cv::Vec3b::ones();
+  a = cv::Vec3b::zeros();
+  a = cv::Vec3b::randn(0, 10);
+  a = cv::Vec3b::randu(0, 10);
+}
+
+
 TEST(Mat, reverse_iterator_19967)
 {
    // empty iterator (#16855)
@ -2448,4 +2460,16 @@ TEST(Mat, reverse_iterator_19967)

 }

+TEST(Mat, Recreate1DMatWithSameMeta)
+{
+    std::vector<int> dims = {100};
+    auto depth = CV_8U;
+    cv::Mat m(dims, depth);
+
+    // By default m has dims: [1, 100]
+    m.dims = 1;
+
+    EXPECT_NO_THROW(m.create(dims, depth));
+}
+
 }} // namespace
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@ -23,6 +23,10 @@ if(WITH_WEBNN AND HAVE_WEBNN)
  add_definitions(-DHAVE_WEBNN=1)
 endif()

+if(HAVE_TIMVX)
+  add_definitions(-DHAVE_TIMVX=1)
+endif()
+
 ocv_option(OPENCV_DNN_CUDA "Build with CUDA support"
    HAVE_CUDA
    AND HAVE_CUBLAS
@ -146,6 +150,11 @@ if(HAVE_TENGINE)
 	list(APPEND libs -Wl,--whole-archive ${TENGINE_LIBRARIES} -Wl,--no-whole-archive)
 endif()

+if(HAVE_TIMVX)
+    list(APPEND include_dirs ${TIMVX_INCLUDE_DIR})
+    list(APPEND libs -Wl,--whole-archive ${TIMVX_LIBRARY} -Wl,--no-whole-archive)
+endif()
+
 set(webnn_srcs "")
 if(NOT EMSCRIPTEN)
  if(HAVE_WEBNN)
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@ -262,7 +262,7 @@ CV__DNN_INLINE_NS_BEGIN
    {
    public:
        int input_zp, output_zp;
-        float output_sc;
+        float input_sc, output_sc;
        static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
    };

@ -322,9 +322,24 @@ CV__DNN_INLINE_NS_BEGIN
    {
    public:
        int input_zp, output_zp;
+        float input_sc, output_sc;
        static Ptr<PoolingLayerInt8> create(const LayerParams& params);
    };

+    class CV_EXPORTS ReduceLayer : public Layer
+    {
+    public:
+        int reduceType;
+        std::vector<size_t> reduceDims;
+        static Ptr<ReduceLayer> create(const LayerParams& params);
+    };
+
+    class CV_EXPORTS ReduceLayerInt8 : public ReduceLayer
+    {
+    public:
+        static Ptr<ReduceLayerInt8> create(const LayerParams& params);
+    };
+
    class CV_EXPORTS SoftmaxLayer : public Layer
    {
    public:
@ -351,7 +366,8 @@ CV__DNN_INLINE_NS_BEGIN
    class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer
    {
    public:
-        int output_zp;
+        int input_zp, output_zp;
+        float input_sc, output_sc;
        static Ptr<InnerProductLayerInt8> create(const LayerParams& params);
    };

@ -778,6 +794,26 @@ CV__DNN_INLINE_NS_BEGIN
        static Ptr<ActivationLayerInt8> create(const LayerParams &params);
    };

+    class CV_EXPORTS SignLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<SignLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ShrinkLayer : public ActivationLayer
+    {
+    public:
+        float bias;
+        float lambd;
+        static Ptr<ShrinkLayer> create(const LayerParams &params);
+    };
+
+    class CV_EXPORTS ReciprocalLayer : public ActivationLayer
+    {
+    public:
+        static Ptr<ReciprocalLayer> create(const LayerParams &params);
+    };
+
    /* Layers used in semantic segmentation */

    class CV_EXPORTS CropLayer : public Layer
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -75,6 +75,7 @@ CV__DNN_INLINE_NS_BEGIN
        DNN_BACKEND_VKCOM,
        DNN_BACKEND_CUDA,
        DNN_BACKEND_WEBNN,
+        DNN_BACKEND_TIMVX,
 #ifdef __OPENCV_BUILD
        DNN_BACKEND_INFERENCE_ENGINE_NGRAPH = 1000000,     // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType()
        DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019,      // internal - use DNN_BACKEND_INFERENCE_ENGINE + setInferenceEngineBackendType()
@ -95,7 +96,8 @@ CV__DNN_INLINE_NS_BEGIN
        DNN_TARGET_FPGA,  //!< FPGA device with CPU fallbacks using Inference Engine's Heterogeneous plugin.
        DNN_TARGET_CUDA,
        DNN_TARGET_CUDA_FP16,
-        DNN_TARGET_HDDL
+        DNN_TARGET_HDDL,
+        DNN_TARGET_NPU,
    };

    CV_EXPORTS std::vector< std::pair<Backend, Target> > getAvailableBackends();
@ -321,6 +323,19 @@ CV__DNN_INLINE_NS_BEGIN
            const std::vector<Ptr<BackendWrapper>>& outputs
        );

+        /**
+         * @brief Returns a TimVX backend node
+         *
+         * @param   timVxInfo  void pointer to CSLContext object
+         * @param   inputsWrapper   layer inputs
+         * @param   outputsWrapper  layer outputs
+         * @param   isLast if the node is the last one of the TimVX Graph.
+         */
+        virtual Ptr<BackendNode> initTimVX(void* timVxInfo,
+                                           const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                           const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                           bool isLast);
+
       /**
        * @brief Automatic Halide scheduling based on layer hyper-parameters.
        * @param[in] node Backend node with Halide functions.
@ -389,7 +404,7 @@ CV__DNN_INLINE_NS_BEGIN


        /**
-         * @brief "Deattaches" all the layers, attached to particular layer.
+         * @brief "Detaches" all the layers, attached to particular layer.
         */
        virtual void unsetAttached();

@ -1310,6 +1325,9 @@ CV__DNN_INLINE_NS_BEGIN
     class CV_EXPORTS_W_SIMPLE ClassificationModel : public Model
     {
     public:
+         CV_DEPRECATED_EXTERNAL  // avoid using in C++ code, will be moved to "protected" (need to fix bindings first)
+         ClassificationModel();
+
         /**
          * @brief Create classification model from network represented in one of the supported formats.
          * An order of @p model and @p config arguments does not matter.
@ -1324,6 +1342,24 @@ CV__DNN_INLINE_NS_BEGIN
          */
         CV_WRAP ClassificationModel(const Net& network);

+         /**
+          * @brief Set enable/disable softmax post processing option.
+          *
+          * If this option is true, softmax is applied after forward inference within the classify() function
+          * to convert the confidences range to [0.0-1.0].
+          * This function allows you to toggle this behavior.
+          * Please turn true when not contain softmax layer in model.
+          * @param[in] enable Set enable softmax post processing within the classify() function.
+          */
+         CV_WRAP ClassificationModel& setEnableSoftmaxPostProcessing(bool enable);
+
+         /**
+          * @brief Get enable/disable softmax post processing option.
+          *
+          * This option defaults to false, softmax post processing is not applied within the classify() function.
+          */
+         CV_WRAP bool getEnableSoftmaxPostProcessing() const;
+
         /** @brief Given the @p input frame, create input blob, run net and return top-1 prediction.
          *  @param[in]  frame  The input image.
          */
@ -1558,7 +1594,7 @@ public:
     * - top-right
     * - bottom-right
     *
-     * Use cv::getPerspectiveTransform function to retrive image region without perspective transformations.
+     * Use cv::getPerspectiveTransform function to retrieve image region without perspective transformations.
     *
     * @note If DL model doesn't support that kind of output then result may be derived from detectTextRectangles() output.
     *
--- a/modules/dnn/src/cuda/activations.cu
+++ b/modules/dnn/src/cuda/activations.cu
@ -248,6 +248,21 @@ void selu(const Stream& stream, Span<T> output, View<T> input, T alpha, T gamma)
    generic_op<T, SeluFunctor<T>>(stream, output, input, {alpha, gamma});
 }

+template <class T>
+void sign(const Stream& stream, Span<T> output, View<T> input) {
+    generic_op<T, SignFunctor<T>>(stream, output, input);
+}
+
+template <class T>
+void shrink(const Stream& stream, Span<T> output, View<T> input, T bias, T lambd) {
+    generic_op<T, ShrinkFunctor<T>>(stream, output, input, {bias, lambd});
+}
+
+template <class T>
+void reciprocal(const Stream& stream, Span<T> output, View<T> input) {
+    generic_op<T, ReciprocalFunctor<T>>(stream, output, input);
+}
+
 template <class T>
 void thresholdedrelu(const Stream& stream, Span<T> output, View<T> input, T alpha) {
    generic_op<T, ThresholdedReluFunctor<T>>(stream, output, input, {alpha});
@ -312,6 +327,9 @@ template void selu<__half>(const Stream&, Span<__half>, View<__half>, __half, __
 template void thresholdedrelu<__half>(const Stream&, Span<__half>, View<__half>, __half);
 template void power<__half>(const Stream&, Span<__half>, View<__half>, __half, __half, __half);
 template void exp<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
+template void sign<__half>(const Stream&, Span<__half>, View<__half>);
+template void shrink<__half>(const Stream&, Span<__half>, View<__half>, __half, __half);
+template void reciprocal<__half>(const Stream&, Span<__half>, View<__half>);
 #endif


@ -351,6 +369,9 @@ template void selu<float>(const Stream&, Span<float>, View<float>, float, float)
 template void thresholdedrelu<float>(const Stream&, Span<float>, View<float>, float);
 template void power<float>(const Stream&, Span<float>, View<float>, float, float, float);
 template void exp<float>(const Stream&, Span<float>, View<float>, float, float);
+template void sign<float>(const Stream&, Span<float>, View<float>);
+template void shrink<float>(const Stream&, Span<float>, View<float>, float, float);
+template void reciprocal<float>(const Stream&, Span<float>, View<float>);

 template <class T, std::size_t N> static
 void launch_vectorized_axiswise_relu(const Stream& stream, Span<T> output, View<T> input, std::size_t inner_size, View<T> slope) {
--- a/modules/dnn/src/cuda/concat.cu
+++ b/modules/dnn/src/cuda/concat.cu
@ -100,7 +100,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
        CV_Assert(output.rank() == input.rank());
        CV_Assert(output_axis_offset < output.get_axis_size(axis));

-        /* if axes preceeding the concat axis are all singleton, the concat blocks are contiguous
+        /* if axes preceding the concat axis are all singleton, the concat blocks are contiguous
         * in the output and we can copy each block directly
         */
        if (output.size_range(0, axis) == 1)
--- a/modules/dnn/src/cuda/functors.hpp
+++ b/modules/dnn/src/cuda/functors.hpp
@ -726,6 +726,52 @@ struct DivFunctor {
    CUDA4DNN_DEVICE T operator()(T x, T y) { return x / y; }
 };

+template <class T>
+struct SignFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() {}
+    };
+
+    CUDA4DNN_DEVICE SignFunctor() { }
+    CUDA4DNN_DEVICE SignFunctor(const Params& params) { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        return value > T(0) ? T(1) : (value < T(0) ? T(-1) : T(0));
+    }
+};
+
+template <class T>
+struct ShrinkFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() : bias(0), lambd(0.5) { }
+        CUDA4DNN_HOST_DEVICE Params(T bias_, T lambd_) : bias(bias_), lambd(lambd_) { }
+        T bias, lambd;
+    };
+
+    CUDA4DNN_DEVICE ShrinkFunctor() : ShrinkFunctor(Params{}) { }
+    CUDA4DNN_DEVICE ShrinkFunctor(const Params& params) : bias{params.bias}, lambd{params.lambd} { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        return value > lambd ? value - bias : (value < -lambd ? value + bias : T(0));
+    }
+
+    T bias, lambd;
+};
+
+template <class T>
+struct ReciprocalFunctor {
+    struct Params {
+        CUDA4DNN_HOST_DEVICE Params() {}
+    };
+
+    CUDA4DNN_DEVICE ReciprocalFunctor() { }
+    CUDA4DNN_DEVICE ReciprocalFunctor(const Params& params) { }
+
+    CUDA4DNN_DEVICE T operator()(T value) {
+        return T(1.f)/value;
+    }
+};
+
 }}}} /* namespace cv::dnn::cuda4dnn::kernels */

 #endif /* OPENCV_DNN_SRC_CUDA_FUNCTORS_HPP */
--- a/modules/dnn/src/cuda/kernel_dispatcher.hpp
+++ b/modules/dnn/src/cuda/kernel_dispatcher.hpp
@ -33,7 +33,7 @@
 * template <class T, std::size_t Rank>
 * void launch_some_kernel(...);
 *
- * // creates the dispatcher named "some_dispatcher" which invokves the correct instantiation of "launch_some_kernel"
+ * // creates the dispatcher named "some_dispatcher" which invokes the correct instantiation of "launch_some_kernel"
 * GENERATE_KERNEL_DISPATCHER(some_dispatcher, launch_some_kernel);
 *
 * // internal API function
--- a/modules/dnn/src/cuda/permute.cu
+++ b/modules/dnn/src/cuda/permute.cu
@ -72,7 +72,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
            __syncthreads();

            /* We interchange `threadIdx.x` and `threadIdx.y` so that consecutive output indices map to
-             * consecutive threads. This would allow writes across threds in a warp to be coalesced.
+             * consecutive threads. This would allow writes across threads in a warp to be coalesced.
             */
            const index_type out_x = blockIdx.y * TILE_SIZE + threadIdx.x;
            const index_type out_y_begin = blockIdx.x * TILE_SIZE + threadIdx.y;
@ -156,7 +156,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
         * tensor indices be [o1, o2, ...]. The permutation operation essentially copies items
         * from the input tensor to new locations in the output tensor as dictated by the indices.
         *
-         * If the size of the nth axis (say i2) of the input is one the input and output indicies for
+         * If the size of the nth axis (say i2) of the input is one the input and output indices for
         * all the elements will be of the form be [i1, 0, ...] and [..., 0, ...] respectively.
         * The index does not contribute to the element's address calculation and hence would give
         * identical result if it weren't there.
--- a/modules/dnn/src/cuda/slice.cu
+++ b/modules/dnn/src/cuda/slice.cu
@ -159,7 +159,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {

        /* We can do a copy if the reduced rank is two and only the first axis is sliced.
         * The general requirement is that only one axis is sliced and all the axes that
-         * preceed the sliced axis are singleton. However, the reductions above will remove
+         * precede the sliced axis are singleton. However, the reductions above will remove
         * all the leading singleton axes and merge the trailing unsliced axes into one, or
         * zero if there are no trailing unsliced axes. The latter is handled separately.
         */
--- a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp
@ -287,6 +287,51 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
        cudnnTensorDescriptor_t descriptor;
    };

+    /** An array of number fully packed tensor descriptors
+     *
+     * @tparam  T   type of elements in the tensor
+     */
+    template<class T>
+    class TensorDescriptorsArray
+    {
+    public:
+        TensorDescriptorsArray() noexcept = default;
+        TensorDescriptorsArray(const TensorDescriptorsArray&) = delete;
+        TensorDescriptorsArray(TensorDescriptorsArray&& other) noexcept
+            : descriptors{std::move(other.descriptors)} {}
+
+        TensorDescriptorsArray(int seqLength, std::array<int, 3> dims)
+        {
+            for (int i = 0; i < seqLength; ++i)
+            {
+                descriptors.emplace_back(dims);
+            }
+        }
+
+        ~TensorDescriptorsArray() noexcept = default;
+
+        TensorDescriptorsArray& operator=(const TensorDescriptorsArray&) = delete;
+        TensorDescriptorsArray& operator=(TensorDescriptorsArray&& other) noexcept
+        {
+            descriptors = std::move(other.descriptors);
+            return *this;
+        };
+
+        std::vector<cudnnTensorDescriptor_t> get() const noexcept
+        {
+            std::vector<cudnnTensorDescriptor_t> descPtrs;
+            descPtrs.reserve(descriptors.size());
+            for (auto& desc : descriptors)
+            {
+                descPtrs.push_back(desc.get());
+            }
+            return descPtrs;
+        }
+
+    private:
+        std::vector<TensorDescriptor<T>> descriptors;
+    };
+
 }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */

 #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_HPP */
--- a/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/cudnn/recurrent.hpp
@ -0,0 +1,195 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP
+#define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP
+
+#include "cudnn.hpp"
+#include <cudnn.h>
+
+
+namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
+
+/**
+ */
+class DropoutDescriptor
+{
+public:
+    DropoutDescriptor() noexcept = default;
+    DropoutDescriptor(const DropoutDescriptor &) = delete;
+    DropoutDescriptor(DropoutDescriptor &&other) noexcept : descriptor{other.descriptor}
+    {
+        states = std::move(other.states);
+        other.descriptor = nullptr;
+    }
+
+    /**
+     */
+    DropoutDescriptor(const Handle &handle, float dropout)
+    {
+        CUDA4DNN_CHECK_CUDNN(cudnnCreateDropoutDescriptor(&descriptor));
+
+        // we need additional memory for dropout descriptor
+        size_t stateSize;
+        CUDA4DNN_CHECK_CUDNN(cudnnDropoutGetStatesSize(handle.get(), &stateSize));
+        states.reset(stateSize);
+
+        try
+        {
+            auto seed = 1234ull; // Pick a seed.
+            CUDA4DNN_CHECK_CUDNN(cudnnSetDropoutDescriptor(descriptor, handle.get(), dropout,
+                                                           states.get().get(), stateSize, seed));
+        }
+        catch (...)
+        {
+            CUDA4DNN_CHECK_CUDNN(cudnnDestroyDropoutDescriptor(descriptor));
+            throw;
+        }
+    }
+
+    ~DropoutDescriptor() noexcept
+    {
+        if (descriptor)
+        {
+            CUDA4DNN_CHECK_CUDNN(cudnnDestroyDropoutDescriptor(descriptor));
+        }
+    }
+
+    DropoutDescriptor &operator=(const DropoutDescriptor &) = delete;
+    DropoutDescriptor &operator=(DropoutDescriptor &&other) noexcept
+    {
+        descriptor = other.descriptor;
+        states = std::move(other.states);
+        other.descriptor = nullptr;
+        return *this;
+    };
+
+    cudnnDropoutDescriptor_t get() const noexcept { return descriptor; }
+
+private:
+    cudnnDropoutDescriptor_t descriptor{nullptr};
+
+    using value_type = typename ManagedPtr<char>::element_type;
+    ManagedPtr<value_type> states;
+};
+
+/**
+ */
+template<class T>
+class RNNDescriptor
+{
+public:
+    enum class RNNMode
+    {
+        RNN_RELU,
+        RNN_TANH,
+        LSTM,
+        GRU
+    };
+
+    RNNDescriptor() noexcept = default;
+    RNNDescriptor(const RNNDescriptor &) = delete;
+    RNNDescriptor(RNNDescriptor &&other) noexcept : descriptor{other.descriptor}
+    {
+        other.descriptor = nullptr;
+    }
+
+    /**
+    */
+    RNNDescriptor(const Handle &handle, RNNMode mode, int hidden_size, int num_layers,
+                  bool bidirectional, const DropoutDescriptor &dropoutDesc)
+    {
+        CUDA4DNN_CHECK_CUDNN(cudnnCreateRNNDescriptor(&descriptor));
+        const auto rnn_mode = [mode] {
+            switch (mode)
+            {
+            case RNNMode::RNN_RELU:
+                return CUDNN_RNN_RELU;
+            case RNNMode::RNN_TANH:
+                return CUDNN_RNN_TANH;
+            case RNNMode::LSTM:
+                return CUDNN_LSTM;
+            case RNNMode::GRU:
+                return CUDNN_GRU;
+            default:
+                return CUDNN_LSTM;
+            }
+        }();
+
+        try
+        {
+            CUDA4DNN_CHECK_CUDNN(cudnnSetRNNDescriptor_v6(
+                handle.get(), descriptor, hidden_size, num_layers, dropoutDesc.get(),
+                CUDNN_LINEAR_INPUT, bidirectional ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL,
+                rnn_mode,
+                algo, //CUDNN_RNN_ALGO_STANDARD,
+                detail::get_data_type<T>()));
+        }
+        catch (...)
+        {
+            CUDA4DNN_CHECK_CUDNN(cudnnDestroyRNNDescriptor(descriptor));
+            throw;
+        }
+    }
+
+    ~RNNDescriptor() noexcept
+    {
+        if (descriptor)
+        {
+            CUDA4DNN_CHECK_CUDNN(cudnnDestroyRNNDescriptor(descriptor));
+        }
+    }
+
+    RNNDescriptor &operator=(const RNNDescriptor &) = delete;
+    RNNDescriptor &operator=(RNNDescriptor &&other) noexcept
+    {
+        descriptor = other.descriptor;
+        other.descriptor = nullptr;
+        return *this;
+    };
+
+    cudnnRNNDescriptor_t get() const noexcept { return descriptor; }
+
+private:
+    cudnnRNNDescriptor_t descriptor{nullptr};
+    cudnnRNNMode_t mode{CUDNN_LSTM};
+    // support only one algo for a while
+    cudnnRNNAlgo_t algo{CUDNN_RNN_ALGO_STANDARD};
+};
+
+template<class T>
+size_t getRNNWorkspaceSize(const Handle &handle, const RNNDescriptor<T> &rnnDesc,
+                           const int seqLength, const TensorDescriptorsArray<T> &inputDesc)
+{
+    size_t workSize;
+    CUDA4DNN_CHECK_CUDNN(cudnnGetRNNWorkspaceSize(handle.get(), rnnDesc.get(), seqLength,
+                                                  inputDesc.get().data(), &workSize));
+    return workSize;
+}
+
+template<class T>
+void LSTMForward(const Handle &handle, const RNNDescriptor<T> &rnnDesc,
+                 const FilterDescriptor<T> &filterDesc, DevicePtr<const T> filterPtr,
+                 const TensorDescriptorsArray<T> &inputDesc, DevicePtr<const T> inputPtr,
+                 const TensorDescriptor<T> &initialHDesc, DevicePtr<const T> initialH,
+                 const TensorDescriptor<T> &initialCDesc, DevicePtr<const T> initialC,
+                 const int seqLength, const TensorDescriptorsArray<T> &outputDesc,
+                 DevicePtr<T> yOutputPtr, DevicePtr<T> ycOutputPtr, WorkspaceInstance workspace)
+{
+    CV_Assert(handle);
+
+    CUDA4DNN_CHECK_CUDNN(cudnnRNNForwardInference(handle.get(), rnnDesc.get(), seqLength,
+                                                  inputDesc.get().data(), inputPtr.get(), // input sequence
+                                                  initialHDesc.get(), initialH.get(),
+                                                  initialCDesc.get(), initialC.get(), // hidden
+                                                  filterDesc.get(), filterPtr.get(), // weights
+                                                  outputDesc.get().data(), yOutputPtr.get(), // output
+                                                  nullptr, nullptr,
+                                                  initialCDesc.get(), ycOutputPtr.get(),
+                                                  static_cast<void*>(workspace.get()), workspace.size_in_bytes()));
+}
+
+}}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
+
+#endif //OPENCV_DNN_CUDA4DNN_CSL_CUDNN_RECURRENT_HPP
--- a/modules/dnn/src/cuda4dnn/csl/memory.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/memory.hpp
@ -68,7 +68,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
                    }
                }
            });
-            /* std::shared_ptr<T>::reset invokves the deleter if an exception occurs; hence, we don't
+            /* std::shared_ptr<T>::reset invokes the deleter if an exception occurs; hence, we don't
             * need to have a try-catch block to free the allocated device memory
             */

--- a/modules/dnn/src/cuda4dnn/csl/pointer.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/pointer.hpp
@ -147,7 +147,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
        /* host const void pointer to const void device pointer */
        CUDA4DNN_HOST_DEVICE explicit DevicePtr(pointer ptr_) noexcept : ptr{ ptr_ } { }

-        /* allow any device pointer to be implicitly convereted to void device pointer */
+        /* allow any device pointer to be implicitly converted to void device pointer */
        template <class T>
        CUDA4DNN_HOST_DEVICE DevicePtr(DevicePtr<T> ptr_) noexcept : ptr{ ptr_.get() } { }

@ -199,7 +199,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
        /* host pointer to device pointer */
        CUDA4DNN_HOST_DEVICE explicit DevicePtr(pointer ptr_) noexcept : ptr{ ptr_ } { }

-        /* allow any device pointer to mutable memory to be implicitly convereted to void device pointer */
+        /* allow any device pointer to mutable memory to be implicitly converted to void device pointer */
        template <class T, typename std::enable_if<!std::is_const<T>::value, bool>::type = false>
        CUDA4DNN_HOST_DEVICE DevicePtr(DevicePtr<T> ptr_) noexcept : ptr { ptr_.get() } { }

--- a/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp
+++ b/modules/dnn/src/cuda4dnn/csl/tensor_ops.hpp
@ -18,6 +18,7 @@
 #include "cudnn/softmax.hpp"
 #include "cudnn/transform.hpp"
 #include "cudnn/transpose_convolution.hpp"
+#include "cudnn/recurrent.hpp"

 #include <opencv2/core.hpp>

@ -472,6 +473,90 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
        TensorTransformDescriptor transDesc;
    };

+    template<class T>
+    class LSTM
+    {
+        using TensorDescriptor = cudnn::TensorDescriptor<T>;
+        using DropoutDescriptor = cudnn::DropoutDescriptor;
+        using RNNDescriptor = cudnn::RNNDescriptor<T>;
+        using FilterDescriptor = cudnn::FilterDescriptor<T>;
+        using TensorDescriptorsArray = cudnn::TensorDescriptorsArray<T>;
+
+    public:
+        using RNNMode = typename RNNDescriptor::RNNMode;
+
+        struct params_type
+        {
+            std::vector<std::size_t> weights_shape;
+
+            int seqLength;
+            int numLayers;
+            int hiddenSize;
+            int inputSize;
+            int miniBatch;
+            bool bidirectional;
+
+            float dropout;
+            RNNMode type;
+        };
+
+        LSTM() = default;
+        LSTM(const LSTM&) = delete;
+        LSTM(LSTM&&) = default;
+        LSTM(cudnn::Handle handle, const params_type& params)
+            : cudnnHandle(std::move(handle)), seqLength{params.seqLength},
+              inputDesc(seqLength, {params.miniBatch, params.inputSize, 1}),
+              outputDesc(seqLength,
+                         {params.miniBatch,
+                          params.bidirectional ? params.hiddenSize * 2 : params.hiddenSize,
+                          1})
+        {
+            dropoutDesc = DropoutDescriptor(cudnnHandle, params.dropout);
+            filterDesc = FilterDescriptor(params.weights_shape);
+            rnnDesc = RNNDescriptor(cudnnHandle, params.type, params.hiddenSize,
+                                    params.numLayers, params.bidirectional, dropoutDesc);
+
+            int num_direction = params.bidirectional ? 2 : 1;
+            h0TensorDesc = TensorDescriptor(
+                    {num_direction, params.miniBatch, params.hiddenSize});
+            c0TensorDesc = TensorDescriptor(
+                    {num_direction, params.miniBatch, params.hiddenSize});
+
+            // Get amount of work space required to execute the RNN described by rnnDesc
+            // with input dimensions defined by inputDesc
+            csl::WorkspaceBuilder builder;
+            builder.require(cudnn::getRNNWorkspaceSize<T>(cudnnHandle, rnnDesc, seqLength, inputDesc));
+            scratch_mem_in_bytes = builder.required_workspace_size();
+        }
+
+        LSTM& operator=(const LSTM&) = delete;
+        LSTM& operator=(LSTM&&) = default;
+
+        void inference(TensorView<T> input, TensorSpan<T> y_output, TensorSpan<T> yc_output, TensorView<T> filters,
+                       TensorView<T> h0, TensorView<T> c0, WorkspaceInstance workspace)
+        {
+            cudnn::LSTMForward<T>(cudnnHandle, rnnDesc, filterDesc, filters.get(), inputDesc,
+                                  input.get(), h0TensorDesc, h0.get(), c0TensorDesc, c0.get(),
+                                  seqLength, outputDesc, y_output.get(), yc_output.get(), workspace);
+        }
+
+        std::size_t get_workspace_memory_in_bytes() const noexcept { return scratch_mem_in_bytes; }
+
+    private:
+        cudnn::Handle cudnnHandle;
+        std::size_t scratch_mem_in_bytes{0};
+        int seqLength;
+
+        RNNDescriptor rnnDesc;
+        DropoutDescriptor dropoutDesc;
+
+        FilterDescriptor filterDesc;
+        TensorDescriptor h0TensorDesc, c0TensorDesc;
+
+        TensorDescriptorsArray inputDesc;
+        TensorDescriptorsArray outputDesc;
+    };
+
 }}}} /* namespace cv::dnn::cuda4dnn::csl */

 #endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_OPS_HPP */
--- a/modules/dnn/src/cuda4dnn/kernels/activations.hpp
+++ b/modules/dnn/src/cuda4dnn/kernels/activations.hpp
@ -123,6 +123,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
    template <class T>
    void exp(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T normScale, T normShift);

+    template <class T>
+    void sign(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
+
+    template <class T>
+    void shrink(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input, T bias, T lambd);
+
+    template <class T>
+    void reciprocal(const csl::Stream& stream, csl::Span<T> output, csl::View<T> input);
 }}}} /* namespace cv::dnn::cuda4dnn::kernels */

 #endif /* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ACTIVATIONS_HPP */
--- a/modules/dnn/src/cuda4dnn/primitives/activation.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/activation.hpp
@ -584,6 +584,52 @@ namespace cv { namespace dnn { namespace cuda4dnn {
        const T normScale, normShift;
    };

+    template <class T>
+    class ShrinkOp final : public BaseOp<ShrinkOp, T> {
+    public:
+        ShrinkOp(csl::Stream stream_, T bias_, T lambd_)
+                : stream(std::move(stream_)), bias{ bias_ }, lambd{ lambd_ } { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::shrink<T>(stream, output, input, bias, lambd);
+        }
+
+    private:
+        csl::Stream stream;
+        const T bias, lambd;
+    };
+
+    template <class T>
+    class SignOp final : public BaseOp<SignOp, T> {
+    public:
+        SignOp(csl::Stream stream_)
+                : stream(std::move(stream_)) { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::sign<T>(stream, output, input);
+        }
+
+    private:
+        csl::Stream stream;
+    };
+
+    template <class T>
+    class ReciprocalOp final : public BaseOp<ReciprocalOp, T> {
+    public:
+        ReciprocalOp(csl::Stream stream_)
+                : stream(std::move(stream_)) { }
+
+        void calculate(csl::TensorSpan<T> output, csl::TensorView<T> input) const
+        {
+            kernels::reciprocal<T>(stream, output, input);
+        }
+
+    private:
+        csl::Stream stream;
+    };
+
 }}} /* namespace cv::dnn::cuda4dnn */

 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_ACTIVATION_HPP */
--- a/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp
+++ b/modules/dnn/src/cuda4dnn/primitives/recurrent_cells.hpp
@ -0,0 +1,97 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CELLS_HPP
+#define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CELLS_HPP
+
+#include "../../op_cuda.hpp"
+
+#include "../csl/cudnn.hpp"
+#include "../csl/tensor_ops.hpp"
+#include "../csl/cudnn/recurrent.hpp"
+
+namespace cv { namespace dnn { namespace cuda4dnn {
+
+struct RNNConfiguration
+{
+    int seqLength;
+    int numLayers;
+    int hiddenSize;
+    int inputSize;
+    int miniBatch;
+    bool bidirectional;
+};
+
+template<class T>
+class LSTMOp final : public CUDABackendNode
+{
+public:
+    using wrapper_type = GetCUDABackendWrapperType<T>;
+
+    LSTMOp(csl::Stream stream_, csl::cudnn::Handle handle, const Mat& filters, const Mat& h0,
+           const Mat& c0, const RNNConfiguration& config)
+            : stream(std::move(stream_))
+    {
+        typename csl::LSTM<T>::params_type params{
+                {filters.total(), 1, 1}, // reshape
+                config.seqLength,
+                config.numLayers,
+                config.hiddenSize,
+                config.inputSize,
+                config.miniBatch,
+                config.bidirectional,
+                0.0, /* dropout */
+                csl::cudnn::RNNDescriptor<T>::RNNMode::LSTM
+        };
+
+        lstm = csl::LSTM<T>(handle, params);
+        auto correct_shape_filters = filters.reshape(1, {static_cast<int>(filters.total()), 1, 1});
+        filtersTensor = csl::makeTensorHeader<T>(correct_shape_filters);
+        csl::copyMatToTensor<T>(correct_shape_filters, filtersTensor, stream);
+
+        h0Tensor = csl::makeTensorHeader<T>(h0);
+        csl::copyMatToTensor<T>(h0, h0Tensor, stream);
+
+        c0Tensor = csl::makeTensorHeader<T>(c0);
+        csl::copyMatToTensor<T>(c0, c0Tensor, stream);
+
+        csl::WorkspaceBuilder builder;
+        builder.require<T>(lstm.get_workspace_memory_in_bytes());
+    }
+
+    void forward(const std::vector<cv::Ptr<BackendWrapper>>& inputs,
+                 const std::vector<cv::Ptr<BackendWrapper>>& outputs,
+                 csl::Workspace& workspace) override
+    {
+        CV_Assert(inputs.size() == 1 && !outputs.empty());
+
+        auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
+        auto input = input_wrapper->getView();
+
+        auto y_output_wrapper = outputs[0].dynamicCast<wrapper_type>();
+        auto y_output = y_output_wrapper->getSpan();
+
+        Ptr<wrapper_type> yc_output_wrapper = outputs.size() == 2 ? outputs[1].dynamicCast<wrapper_type>() : Ptr<wrapper_type>();
+        csl::TensorSpan<T> yc_output = yc_output_wrapper.empty() ? csl::TensorSpan<T>() : yc_output_wrapper->getSpan();
+
+        csl::WorkspaceAllocator allocator(workspace);
+        lstm.inference(input, y_output, yc_output, filtersTensor, h0Tensor, c0Tensor, allocator.get_instance());
+    }
+
+    std::size_t get_workspace_memory_in_bytes() const noexcept override
+    {
+        return lstm.get_workspace_memory_in_bytes();
+    }
+
+private:
+    csl::LSTM<T> lstm;
+    csl::Stream stream;
+    csl::Tensor<T> filtersTensor;
+    csl::Tensor<T> h0Tensor;
+    csl::Tensor<T> c0Tensor;
+};
+
+}}} /* namespace cv::dnn::cuda4dnn */
+
+#endif //OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_RECURRENT_CELLS_HPP
--- a/modules/dnn/src/darknet/darknet_io.cpp
+++ b/modules/dnn/src/darknet/darknet_io.cpp
@ -376,7 +376,7 @@ namespace cv {
                    int begin[] = {0, split_size * group_id, 0, 0};
                    cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin, 4);

-                    int end[] = {-1, begin[1] + split_size, -1, -1};
+                    int end[] = {INT_MAX, begin[1] + split_size, INT_MAX, INT_MAX};
                    cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end, 4);

                    darknet::LayerParameter lp;
@ -791,7 +791,7 @@ namespace cv {
                            if (layers_vec.size() > 1)
                            {
                                // layer ids in layers_vec - inputs of Slice layers
-                                // after adding offset to layers_vec: layer ids - ouputs of Slice layers
+                                // after adding offset to layers_vec: layer ids - outputs of Slice layers
                                for (size_t k = 0; k < layers_vec.size(); ++k)
                                    layers_vec[k] += layers_vec.size();

--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
--- a/modules/dnn/src/dnn_common.hpp
+++ b/modules/dnn/src/dnn_common.hpp
@ -19,7 +19,44 @@ void initializeLayerFactory();
 extern bool DNN_DIAGNOSTICS_RUN;
 extern bool DNN_SKIP_REAL_IMPORT;

-namespace detail {
+//
+// dnn_params.cpp
+//
+
+/// Network dump level
+size_t getParam_DNN_NETWORK_DUMP();
+
+/// This parameter is useful to run with valgrind memory errors detection
+bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS();
+
+#ifdef HAVE_OPENCL
+bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES();
+#endif
+
+int getParam_DNN_BACKEND_DEFAULT();
+
+// Additional checks (slowdowns execution!)
+bool getParam_DNN_CHECK_NAN_INF();
+bool getParam_DNN_CHECK_NAN_INF_DUMP();
+bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR();
+
+
+inline namespace detail {
+
+typedef std::vector<MatShape> ShapesVec;
+
+struct LayerShapes
+{
+    ShapesVec in, out, internal;
+    // No guarantees that layer which support in-place computations
+    // will be computed in-place (input.data_ptr == output.data_ptr).
+    // If layer said that it could work in-place and layers after it
+    // no longer use input blob, we'll set output = input.
+    bool supportInPlace;
+    LayerShapes() {supportInPlace = false;}
+};
+
+
 #define CALL_MEMBER_FN(object, ptrToMemFn)  ((object).*(ptrToMemFn))

 class NotImplemented : public Layer
@ -82,8 +119,6 @@ struct NetImplBase
 }  // namespace detail


-typedef std::vector<MatShape> ShapesVec;
-
 static inline std::string toString(const ShapesVec& shapes, const std::string& name = std::string())
 {
    std::ostringstream ss;
--- a/modules/dnn/src/dnn_params.cpp
+++ b/modules/dnn/src/dnn_params.cpp
@ -0,0 +1,67 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "precomp.hpp"
+
+#include "dnn_common.hpp"
+#include <opencv2/core/utils/configuration.private.hpp>
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+
+size_t getParam_DNN_NETWORK_DUMP()
+{
+    static size_t DNN_NETWORK_DUMP = utils::getConfigurationParameterSizeT("OPENCV_DNN_NETWORK_DUMP", 0);
+    return DNN_NETWORK_DUMP;
+}
+
+// this option is useful to run with valgrind memory errors detection
+bool getParam_DNN_DISABLE_MEMORY_OPTIMIZATIONS()
+{
+    static bool DNN_DISABLE_MEMORY_OPTIMIZATIONS = utils::getConfigurationParameterBool("OPENCV_DNN_DISABLE_MEMORY_OPTIMIZATIONS", false);
+    return DNN_DISABLE_MEMORY_OPTIMIZATIONS;
+}
+
+#ifdef HAVE_OPENCL
+bool getParam_DNN_OPENCL_ALLOW_ALL_DEVICES()
+{
+    static bool DNN_OPENCL_ALLOW_ALL_DEVICES = utils::getConfigurationParameterBool("OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES", false);
+    return DNN_OPENCL_ALLOW_ALL_DEVICES;
+}
+#endif
+
+int getParam_DNN_BACKEND_DEFAULT()
+{
+    static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSizeT("OPENCV_DNN_BACKEND_DEFAULT",
+#ifdef HAVE_INF_ENGINE
+            (size_t)DNN_BACKEND_INFERENCE_ENGINE
+#else
+            (size_t)DNN_BACKEND_OPENCV
+#endif
+    );
+    return PARAM_DNN_BACKEND_DEFAULT;
+}
+
+// Additional checks (slowdowns execution!)
+bool getParam_DNN_CHECK_NAN_INF()
+{
+    static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
+    return DNN_CHECK_NAN_INF;
+}
+bool getParam_DNN_CHECK_NAN_INF_DUMP()
+{
+    static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
+    return DNN_CHECK_NAN_INF_DUMP;
+}
+bool getParam_DNN_CHECK_NAN_INF_RAISE_ERROR()
+{
+    static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
+    return DNN_CHECK_NAN_INF_RAISE_ERROR;
+}
+
+
+CV__DNN_INLINE_NS_END
+}}  // namespace cv::dnn
--- a/modules/dnn/src/dnn_read.cpp
+++ b/modules/dnn/src/dnn_read.cpp
@ -0,0 +1,93 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "precomp.hpp"
+
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+
+Net readNet(const String& _model, const String& _config, const String& _framework)
+{
+    String framework = toLowerCase(_framework);
+    String model = _model;
+    String config = _config;
+    const std::string modelExt = model.substr(model.rfind('.') + 1);
+    const std::string configExt = config.substr(config.rfind('.') + 1);
+    if (framework == "caffe" || modelExt == "caffemodel" || configExt == "caffemodel" || modelExt == "prototxt" || configExt == "prototxt")
+    {
+        if (modelExt == "prototxt" || configExt == "caffemodel")
+            std::swap(model, config);
+        return readNetFromCaffe(config, model);
+    }
+    if (framework == "tensorflow" || modelExt == "pb" || configExt == "pb" || modelExt == "pbtxt" || configExt == "pbtxt")
+    {
+        if (modelExt == "pbtxt" || configExt == "pb")
+            std::swap(model, config);
+        return readNetFromTensorflow(model, config);
+    }
+    if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net")
+    {
+        return readNetFromTorch(model.empty() ? config : model);
+    }
+    if (framework == "darknet" || modelExt == "weights" || configExt == "weights" || modelExt == "cfg" || configExt == "cfg")
+    {
+        if (modelExt == "cfg" || configExt == "weights")
+            std::swap(model, config);
+        return readNetFromDarknet(config, model);
+    }
+    if (framework == "dldt" || modelExt == "bin" || configExt == "bin" || modelExt == "xml" || configExt == "xml")
+    {
+        if (modelExt == "xml" || configExt == "bin")
+            std::swap(model, config);
+        return readNetFromModelOptimizer(config, model);
+    }
+    if (framework == "onnx" || modelExt == "onnx")
+    {
+        return readNetFromONNX(model);
+    }
+    CV_Error(Error::StsError, "Cannot determine an origin framework of files: " + model + (config.empty() ? "" : ", " + config));
+}
+
+Net readNet(const String& _framework, const std::vector<uchar>& bufferModel,
+        const std::vector<uchar>& bufferConfig)
+{
+    String framework = toLowerCase(_framework);
+    if (framework == "caffe")
+        return readNetFromCaffe(bufferConfig, bufferModel);
+    else if (framework == "tensorflow")
+        return readNetFromTensorflow(bufferModel, bufferConfig);
+    else if (framework == "darknet")
+        return readNetFromDarknet(bufferConfig, bufferModel);
+    else if (framework == "torch")
+        CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers");
+    else if (framework == "dldt")
+        return readNetFromModelOptimizer(bufferConfig, bufferModel);
+    CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework);
+}
+
+Net readNetFromModelOptimizer(const String& xml, const String& bin)
+{
+    return Net::readFromModelOptimizer(xml, bin);
+}
+
+Net readNetFromModelOptimizer(const std::vector<uchar>& bufferCfg, const std::vector<uchar>& bufferModel)
+{
+    return Net::readFromModelOptimizer(bufferCfg, bufferModel);
+}
+
+Net readNetFromModelOptimizer(
+        const uchar* bufferModelConfigPtr, size_t bufferModelConfigSize,
+        const uchar* bufferWeightsPtr, size_t bufferWeightsSize)
+{
+    return Net::readFromModelOptimizer(
+            bufferModelConfigPtr, bufferModelConfigSize,
+            bufferWeightsPtr, bufferWeightsSize);
+}
+
+
+CV__DNN_INLINE_NS_END
+}}  // namespace cv::dnn
--- a/modules/dnn/src/dnn_utils.cpp
+++ b/modules/dnn/src/dnn_utils.cpp
@ -0,0 +1,158 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "precomp.hpp"
+
+#include <opencv2/imgproc.hpp>
+
+
+namespace cv {
+namespace dnn {
+CV__DNN_INLINE_NS_BEGIN
+
+
+Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
+        const Scalar& mean, bool swapRB, bool crop, int ddepth)
+{
+    CV_TRACE_FUNCTION();
+    Mat blob;
+    blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
+    return blob;
+}
+
+void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
+        const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
+{
+    CV_TRACE_FUNCTION();
+    std::vector<Mat> images(1, image.getMat());
+    blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
+}
+
+Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
+        const Scalar& mean, bool swapRB, bool crop, int ddepth)
+{
+    CV_TRACE_FUNCTION();
+    Mat blob;
+    blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
+    return blob;
+}
+
+void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
+        Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
+{
+    CV_TRACE_FUNCTION();
+    CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
+    if (ddepth == CV_8U)
+    {
+        CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
+        CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
+    }
+
+    std::vector<Mat> images;
+    images_.getMatVector(images);
+    CV_Assert(!images.empty());
+    for (size_t i = 0; i < images.size(); i++)
+    {
+        Size imgSize = images[i].size();
+        if (size == Size())
+            size = imgSize;
+        if (size != imgSize)
+        {
+            if (crop)
+            {
+                float resizeFactor = std::max(size.width / (float)imgSize.width,
+                        size.height / (float)imgSize.height);
+                resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
+                Rect crop(Point(0.5 * (images[i].cols - size.width),
+                                  0.5 * (images[i].rows - size.height)),
+                        size);
+                images[i] = images[i](crop);
+            }
+            else
+                resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
+        }
+        if (images[i].depth() == CV_8U && ddepth == CV_32F)
+            images[i].convertTo(images[i], CV_32F);
+        Scalar mean = mean_;
+        if (swapRB)
+            std::swap(mean[0], mean[2]);
+
+        images[i] -= mean;
+        images[i] *= scalefactor;
+    }
+
+    size_t nimages = images.size();
+    Mat image0 = images[0];
+    int nch = image0.channels();
+    CV_Assert(image0.dims == 2);
+    if (nch == 3 || nch == 4)
+    {
+        int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
+        blob_.create(4, sz, ddepth);
+        Mat blob = blob_.getMat();
+        Mat ch[4];
+
+        for (size_t i = 0; i < nimages; i++)
+        {
+            const Mat& image = images[i];
+            CV_Assert(image.depth() == blob_.depth());
+            nch = image.channels();
+            CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
+            CV_Assert(image.size() == image0.size());
+
+            for (int j = 0; j < nch; j++)
+                ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
+            if (swapRB)
+                std::swap(ch[0], ch[2]);
+            split(image, ch);
+        }
+    }
+    else
+    {
+        CV_Assert(nch == 1);
+        int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
+        blob_.create(4, sz, ddepth);
+        Mat blob = blob_.getMat();
+
+        for (size_t i = 0; i < nimages; i++)
+        {
+            const Mat& image = images[i];
+            CV_Assert(image.depth() == blob_.depth());
+            nch = image.channels();
+            CV_Assert(image.dims == 2 && (nch == 1));
+            CV_Assert(image.size() == image0.size());
+
+            image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
+        }
+    }
+}
+
+void imagesFromBlob(const cv::Mat& blob_, OutputArrayOfArrays images_)
+{
+    CV_TRACE_FUNCTION();
+
+    // A blob is a 4 dimensional matrix in floating point precision
+    // blob_[0] = batchSize = nbOfImages
+    // blob_[1] = nbOfChannels
+    // blob_[2] = height
+    // blob_[3] = width
+    CV_Assert(blob_.depth() == CV_32F);
+    CV_Assert(blob_.dims == 4);
+
+    images_.create(cv::Size(1, blob_.size[0]), blob_.depth());
+
+    std::vector<Mat> vectorOfChannels(blob_.size[1]);
+    for (int n = 0; n < blob_.size[0]; ++n)
+    {
+        for (int c = 0; c < blob_.size[1]; ++c)
+        {
+            vectorOfChannels[c] = getPlane(blob_, n, c);
+        }
+        cv::merge(vectorOfChannels, images_.getMatRef(n));
+    }
+}
+
+
+CV__DNN_INLINE_NS_END
+}}  // namespace cv::dnn
--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@ -80,7 +80,7 @@ class NgraphCustomOp: public ngraph::op::Op {
 public:
    const ngraph::NodeTypeInfo& get_type_info() const override
    {
-        static constexpr ngraph::NodeTypeInfo type_info{kOpenCVLayersType, 0};
+        static constexpr ngraph::NodeTypeInfo type_info{kOpenCVLayersType, static_cast<uint64_t>(0)};
        return type_info;
    }

--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@ -92,6 +92,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(Pooling,        PoolingLayer);
    CV_DNN_REGISTER_LAYER_CLASS(ROIPooling,     PoolingLayer);
    CV_DNN_REGISTER_LAYER_CLASS(PSROIPooling,   PoolingLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Reduce,         ReduceLayer);
    CV_DNN_REGISTER_LAYER_CLASS(LRN,            LRNLayer);
    CV_DNN_REGISTER_LAYER_CLASS(InnerProduct,   InnerProductLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Softmax,        SoftmaxLayer);
@ -129,6 +130,8 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(HardSwish,      HardSwishLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Sin,            SinLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Sinh,           SinhLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Sign,           SignLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Shrink,         ShrinkLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Softplus,       SoftplusLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Softsign,       SoftsignLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Tan,            TanLayer);
@ -143,6 +146,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(Silence,        BlankLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Const,          ConstLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Arg,            ArgLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Reciprocal,     ReciprocalLayer);

    CV_DNN_REGISTER_LAYER_CLASS(Crop,           CropLayer);
    CV_DNN_REGISTER_LAYER_CLASS(Eltwise,        EltwiseLayer);
@ -175,6 +179,7 @@ void initializeLayerFactory()
    CV_DNN_REGISTER_LAYER_CLASS(ConvolutionInt8,  ConvolutionLayerInt8);
    CV_DNN_REGISTER_LAYER_CLASS(InnerProductInt8, InnerProductLayerInt8);
    CV_DNN_REGISTER_LAYER_CLASS(PoolingInt8,      PoolingLayerInt8);
+    CV_DNN_REGISTER_LAYER_CLASS(ReduceInt8,       ReduceLayerInt8);
    CV_DNN_REGISTER_LAYER_CLASS(EltwiseInt8,      EltwiseLayerInt8);
    CV_DNN_REGISTER_LAYER_CLASS(BatchNormInt8,    BatchNormLayerInt8);
    CV_DNN_REGISTER_LAYER_CLASS(ScaleInt8,        ScaleLayerInt8);
--- a/modules/dnn/src/int8layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/int8layers/batch_norm_layer.cpp
@ -4,6 +4,8 @@

 #include "../precomp.hpp"
 #include "layers_common.hpp"
+#include "../op_timvx.hpp"
+
 #include <opencv2/dnn/shape_utils.hpp>

 namespace cv
@ -103,6 +105,11 @@ public:

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
+        if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
+        {
+            return true;
+        }
+
        return backendId == DNN_BACKEND_OPENCV;
    }

@ -116,6 +123,121 @@ public:
        return false;
    }

+    virtual Ptr<BackendNode> initTimVX(void* timVXInfo_,
+                                       const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                       const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                       bool isLast) CV_OVERRIDE
+    {
+#ifdef HAVE_TIMVX
+        // tvGraph Initialization.
+        auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_);
+        CV_Assert(timVxInfo);
+        Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph();
+        CV_Assert(tvGraph);
+        Ptr<tim::vx::Graph> graph = tvGraph->graph;
+
+        const int numChannels = (int)origin_bias.total();
+        Mat tvGamma = origin_weights.reshape(1, numChannels);
+        Mat tvBeta = origin_bias.reshape(1, numChannels);
+
+        std::vector<int> inputsIndex;
+        std::vector<int> outputsIndex;
+
+        Mat tvMean = Mat::zeros(1, numChannels, CV_32F);
+        tvMean = tvMean.reshape(1, numChannels);
+        Mat tvVar = Mat::ones(1, numChannels, CV_32F);
+        tvVar = tvVar.reshape(1, numChannels);
+
+        CV_Assert(inputsWrapper.size() == 1);
+        if (outputsWrapper.size() > 1)
+            return Ptr<BackendNode>();
+
+        Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp));
+
+        // input Tensor
+        auto inputWrapper = inputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Mat tmpInput = inputWrapper->getMat();
+
+        if (tmpInput.dims != 4)  // Only support 4 dim input.
+            return Ptr<BackendNode>();
+
+        int input_index = -1, mean_index = -1, var_index = -1, gamma_index = -1, beta_index = -1, output_index = -1;
+
+        if (inputWrapper->isTensor())
+        {
+            input_index = tvGraph->getTensorIndex(inputWrapper->getTensor());
+            if (input_index == -1)
+            {
+                // Copy To New inputWrapper
+                Mat tmp = inputWrapper->getMat();
+                inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp));
+            }
+        }
+
+        if (!inputWrapper->isTensor())
+        {
+            inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant);
+            input_index = tvGraph->addWrapper(inputWrapper);
+        }
+        inputsIndex.push_back(input_index);
+
+        // Mean tensor
+        Ptr<TimVXBackendWrapper> meanWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tvMean));
+        Ptr<tim::vx::Quantization> meanQuant;
+        meanWrapper->createTensor(graph, tim::vx::TensorAttribute::CONSTANT);
+        mean_index = tvGraph->addWrapper(meanWrapper);
+        inputsIndex.push_back(mean_index);
+
+        // Var tensor
+        Ptr<TimVXBackendWrapper> varWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tvVar));
+        varWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT);
+        var_index = tvGraph->addWrapper(varWrapper);
+        inputsIndex.push_back(var_index);
+
+        // Gamma tensor
+        Ptr<TimVXBackendWrapper> gammaWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tvGamma));
+        gammaWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT);
+        gamma_index = tvGraph->addWrapper(gammaWrapper);
+        inputsIndex.push_back(gamma_index);
+
+        // Beta tensor
+        Ptr<TimVXBackendWrapper> betaWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tvBeta));
+        betaWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT);
+        beta_index = tvGraph->addWrapper(betaWrapper);
+        inputsIndex.push_back(beta_index);
+
+        // Output tensor
+        CV_Assert(outputsWrapper.size() == 1);
+        Ptr<TimVXBackendWrapper> outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp));
+
+        if (isLast)
+        {
+            auto shapeType = getShapeTypeFromMat(outputWrapper->getMat());
+
+            // For Graph Output tensor, we need to set tensor shape before createTensor().
+            outputWrapper->setTensorShape(shapeType);
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant);
+        }
+        else
+        {
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant);
+        }
+
+        output_index = tvGraph->addWrapper(outputWrapper);
+        outputsIndex.push_back(output_index);
+
+        std::shared_ptr<tim::vx::Operation> tvBatchNorm = graph->CreateOperation<tim::vx::ops::BatchNorm>(0.f);
+
+        Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvBatchNorm, inputsIndex, outputsIndex);
+
+        return tvBackendNode;
+#endif  // HAVE_TIMVX
+        return Ptr<BackendNode>();
+    }
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
--- a/modules/dnn/src/int8layers/convolution_layer.cpp
+++ b/modules/dnn/src/int8layers/convolution_layer.cpp
@ -9,6 +9,7 @@

 #include "opencv2/core/hal/hal.hpp"
 #include "opencv2/core/hal/intrin.hpp"
+#include "../op_timvx.hpp"
 #include <iostream>
 #include <numeric>

@ -46,6 +47,7 @@ public:
        int ngroups = params.get<int>("group", 1);
        CV_Assert(numOutput % ngroups == 0);

+        input_sc = params.get<float>("input_scale");
        input_zp = params.get<int>("input_zeropoint");
        output_zp = params.get<int>("zeropoints");
        output_sc = params.get<float>("scales");
@ -181,6 +183,16 @@ public:
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
        size_t ksize = kernel_size.size();
+
+#ifdef HAVE_TIMVX
+        if (backendId == DNN_BACKEND_TIMVX)
+        {
+            /* only Conv1d and Conv2d supported. */
+            if (ksize == 2 || ksize == 1)
+                return true;
+            return false;
+        }
+#endif
        // Only default backend and Conv1D/Conv2D/Conv3D are supported
        return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3;
    }
@ -261,6 +273,11 @@ public:

    bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
    {
+        // TODO! add activation in convolution.
+#ifdef HAVE_TIMVX
+        if (preferableTarget == DNN_TARGET_NPU)
+            return false;
+#endif
        Ptr<ActivationLayerInt8> activ_int8 = layer.dynamicCast<ActivationLayerInt8>();
        if (!activ_int8.empty())
        {
@ -300,6 +317,249 @@ public:
        outputMultiplier[outCn] = outputMultiplier[outCn+1] = outputMultiplier[outCn-1];
    }

+    virtual Ptr<BackendNode> initTimVX(void* timVXInfo_,
+                                       const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                       const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                       bool isLast) CV_OVERRIDE
+    {
+#ifdef HAVE_TIMVX
+        /* TODO :support GroupConv;
+        Ref:
+        https://github.com/VeriSilicon/TIM-VX/blob/main/docs/Operators.md#conv2d
+        Link Reference: https://github.com/VeriSilicon/TIM-VX/blob/main/src/tim/vx/ops/conv1d_test.cc
+        */
+
+        // tvGraph Initialization.
+        auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_);
+        CV_Assert(timVxInfo);
+        Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph();
+        CV_Assert(tvGraph);
+        Ptr<tim::vx::Graph> graph = tvGraph->graph;
+
+        Mat tvWeightMat = blobs[0];
+
+        std::vector<int> tvBiasVec;
+        tvBiasVec.assign(biasvec.begin(), biasvec.end() - 2);
+        Mat tvBiasMat(tvBiasVec);
+
+        for (int i = 0; i < numOutput; i++)
+        {
+            tvBiasVec[i] += input_zp * (cv::sum(blobs[0].row(i))[0]);
+        }
+
+        // Padding Type
+        tim::vx::PadType tvPadType;
+
+        if (padMode.empty())
+        {
+            tvPadType = tim::vx::PadType::AUTO; // TODO! check the padding type.
+        }
+        else if(padMode == "VALID")
+        {
+            tvPadType = tim::vx::PadType::VALID;
+        }
+        else if (padMode == "SAME")
+        {
+            tvPadType = tim::vx::PadType::SAME;
+        }
+        else
+        {
+            CV_Error(Error::StsError, "Unsupported padding mode in TimVXBackend!");
+        }
+
+        size_t ksize = kernel_size.size();
+
+        std::vector<int> inputsIndex;
+        std::vector<int> outputsIndex;
+
+        CV_Assert(inputsWrapper.size() == 1);
+        CV_Assert(ksize == 2 || ksize == 1);
+
+        std::vector<float> weight_scs, bias_scs;
+        std::vector<int32_t> weight_zps, bias_zps;
+
+        weight_scs.resize(numOutput);
+        bias_scs.resize(numOutput);
+
+        for (int i = 0; i < numOutput; i++)
+        {
+            bias_scs[i] = outputMultiplier[i] * output_sc;
+            weight_scs[i] = bias_scs[i] / input_sc;
+        }
+
+        weight_zps.assign(numOutput, 0);
+        bias_zps.assign(numOutput, 0);
+
+        bool tvSymmetric;
+        tvSymmetric = getQuantType(weight_scs, numOutput);
+
+        // input Tensor
+        auto inputWrapper = inputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        int input_index = -1, weight_index = -1, bias_index = -1, output_index = -1;
+
+        if (inputWrapper->isTensor())
+        {
+            input_index = tvGraph->getTensorIndex(inputWrapper->getTensor());
+            if (input_index == -1)
+            {
+                // Copy To New inputWrapper
+                Mat tmp = inputWrapper->getMat();
+                inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp));
+            }
+        }
+
+        if (!inputWrapper->isTensor())
+        {
+            Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp));
+            inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant);
+            input_index = tvGraph->addWrapper(inputWrapper);
+        }
+        inputsIndex.push_back(input_index);
+
+        // weight Tensor
+        auto tvConvWeightShape = shape(tvWeightMat);
+        Mat tvInputMat = inputWrapper->getMat();
+        // calculate group value.
+        int group = tvInputMat.size[1] / tvWeightMat.size[1];
+
+        // TODO! It will be supported in future.
+        if (tvSymmetric && tvWeightMat.total() == tvConvWeightShape[0])
+            return Ptr<TimVXBackendNode>();
+        // Reverse weight shape From OpenCV NCHW to TimVX WHCN.
+        std::reverse(tvConvWeightShape.begin(), tvConvWeightShape.end());
+
+        Ptr<TimVXBackendWrapper> weightWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tvWeightMat));
+        Ptr<tim::vx::Quantization> weightQuant;
+
+        if (tvSymmetric)
+        {
+            int wtChanneldim = tvWeightMat.dims - 1;
+            weightQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, wtChanneldim,
+                                              weight_scs, weight_zps));
+        }
+        else
+        {
+            weightQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0], 0));
+        }
+        weightWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT, weightQuant);
+
+        weight_index = tvGraph->addWrapper(weightWrapper);
+        inputsIndex.push_back(weight_index);
+
+        // Bias Tensor
+        Ptr<TimVXBackendWrapper> biasWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tvBiasMat));
+        Ptr<tim::vx::Quantization> biasQuant;
+
+        if (tvSymmetric)
+        {
+            biasQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::SYMMETRIC_PER_CHANNEL, 0,
+                                              bias_scs, bias_zps));
+        }
+        else
+        {
+            biasQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, weight_scs[0] * input_sc, 0));
+        }
+
+        biasWrapper->createTensor(graph, tim::vx::TensorAttribute::CONSTANT, biasQuant);
+        bias_index = tvGraph->addWrapper(biasWrapper);
+        inputsIndex.push_back(bias_index);
+        // Output tensor
+        CV_Assert(outputsWrapper.size() == 1);
+        auto outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp));
+
+        if (isLast)
+        {
+            // From OpenCV NCHW, to TimVX WHCN
+            auto shapeType = getShapeTypeFromMat(outputWrapper->getMat());
+
+            // For Graph Output tensor, we need to set tensor shape before createTensor().
+            outputWrapper->setTensorShape(shapeType);
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant);
+        }
+        else
+        {
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant);
+        }
+
+        output_index = tvGraph->addWrapper(outputWrapper);
+        outputsIndex.push_back(output_index);
+
+        std::shared_ptr<tim::vx::Operation> tvConv;
+
+        if (ksize == 2)  // for conv2d
+        {
+            int multiplier = 0;
+            if(group == tvConvWeightShape[3] && group != 1)
+                multiplier = 1;
+            if (group == 1 || (group == tvConvWeightShape[3] && group != 1)) // Conv2D || DeConv2D
+            {
+                if (tvPadType == tim::vx::PadType::AUTO) {
+                    tvConv = graph->CreateOperation<tim::vx::ops::Conv2d>(
+                            tvConvWeightShape[3], tvPadType,
+                            std::array<uint32_t, 2>({(uint32_t) kernel_size[1], (uint32_t) kernel_size[0]}),
+                            std::array<uint32_t, 2>({(uint32_t) strides[1], (uint32_t) strides[0]}),
+                            std::array<uint32_t, 2>({(uint32_t) dilations[1], (uint32_t) dilations[0]}),
+                            std::array<uint32_t, 4>({(uint32_t) pads_begin[1], (uint32_t) pads_end[1],
+                                                     (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}),
+                            multiplier);
+                }
+                else
+                {
+                    tvConv = graph->CreateOperation<tim::vx::ops::Conv2d>(
+                            tvPadType,
+                            std::array<uint32_t, 2>({(uint32_t) strides[1], (uint32_t) strides[0]}),
+                            std::array<uint32_t, 2>({(uint32_t) dilations[1], (uint32_t) dilations[0]}),
+                            multiplier);
+                }
+            }
+            else
+            {
+                // GroupedConv2d
+                if (tvPadType == tim::vx::PadType::AUTO)
+                {
+                    tvConv = graph->CreateOperation<tim::vx::ops::GroupedConv2d>(
+                            std::array<uint32_t, 4>({(uint32_t) pads_begin[1], (uint32_t) pads_end[1],
+                                                     (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}),
+                            std::array<uint32_t, 2>({(uint32_t)strides[1], (uint32_t)strides[0]}),
+                            std::array<uint32_t, 2>({(uint32_t)dilations[1], (uint32_t)dilations[0]}),
+                            group);
+                }
+                else
+                {
+                    tvConv = graph->CreateOperation<tim::vx::ops::GroupedConv2d>(
+                            tvPadType,
+                            std::array<uint32_t, 2>({(uint32_t)strides[1], (uint32_t)strides[0]}),
+                            std::array<uint32_t, 2>({(uint32_t)dilations[1], (uint32_t)dilations[0]}),
+                            group);
+                }
+            }
+        }
+        else
+        {
+            // for Conv1d
+            if (group != 1)
+                CV_Error( CV_StsNotImplemented, " Grouped Conv1d or Depth-Wise Conv1d are not supported by "
+                                                "TimVX Backend. Please try OpenCV Backend.");
+            tvConv = graph->CreateOperation<tim::vx::ops::Conv1d>(
+                    tvConvWeightShape[2], tvPadType, (uint32_t)kernel_size[0],
+                    (uint32_t)strides[0],(uint32_t)dilations[0],
+                    std::array<uint32_t, 2>({(uint32_t)pads_begin[0], (uint32_t)pads_end[0]}));
+        }
+        // Create TimVXBackendNode
+        Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvConv, inputsIndex, outputsIndex);
+
+        return tvBackendNode;
+#endif  // HAVE_TIMVX
+        return Ptr<BackendNode>();
+    }
+
    class ParallelConv : public cv::ParallelLoopBody
    {
    public:
--- a/modules/dnn/src/int8layers/elementwise_layers.cpp
+++ b/modules/dnn/src/int8layers/elementwise_layers.cpp
@ -4,6 +4,7 @@

 #include "../precomp.hpp"
 #include "layers_common.hpp"
+#include "../op_timvx.hpp"

 #include <opencv2/dnn/shape_utils.hpp>
 #include <iostream>
@ -16,14 +17,45 @@ namespace dnn
 class ActivationLayerInt8Impl CV_FINAL : public ActivationLayerInt8
 {
 public:
+    int input_zp, output_zp;
+    float input_sc, output_sc;
+    float slope = 0.0f;
+
+#ifdef HAVE_TIMVX
+    tvActivationType tvActType;
+#endif
    ActivationLayerInt8Impl(const LayerParams &params)
    {
        setParamsFrom(params);
        activationLUT = !blobs.empty() ? blobs[0] : Mat();
+
+        input_zp = params.get<int>("input_zeropoint");
+        input_sc = params.get<float>("input_scale");
+        output_zp = params.get<int>("zeropoints");
+        output_sc = params.get<float>("scales");
+
+        if (params.has("slope"))
+        {
+            slope = params.get<float>("slope");
+        }
+
+#ifdef HAVE_TIMVX
+        tvActType = getTimVXActType(type);
+#endif
+
    }

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
+#ifdef HAVE_TIMVX
+        if (backendId == DNN_BACKEND_TIMVX)
+        {
+            // TODO!: Leaky ReLU will be supported in future.
+            if (tvActType == tvActReLU && slope != 0.f)
+                return false;
+            return tvActType != tvActNotSupported;
+        }
+#endif
        return backendId == DNN_BACKEND_OPENCV;
    }

@ -106,6 +138,112 @@ public:
        }
    };

+    virtual Ptr<BackendNode> initTimVX(void* timVXInfo_,
+                                       const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                       const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                       bool isLast) CV_OVERRIDE
+    {
+#ifdef HAVE_TIMVX
+        // tvGraph Initialization.
+        auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_);
+        CV_Assert(timVxInfo);
+        Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph();
+        CV_Assert(tvGraph);
+        Ptr<tim::vx::Graph> graph = tvGraph->graph;
+
+        std::vector<int> inputsIndex, outputsIndex;
+        int input_index, output_index;
+        CV_Assert(inputsWrapper.size() == 1);
+
+        // input Tensor
+        Ptr<TimVXBackendWrapper> inputWrapper = inputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+
+        if (inputWrapper->isTensor())
+        {
+            input_index = tvGraph->getTensorIndex(inputWrapper->getTensor());
+            if(input_index == -1)
+            {
+                // Copy To New inputWrapper
+                Mat tmp = inputWrapper->getMat();
+                inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp));
+            }
+        }
+
+        if (!inputWrapper->isTensor())
+        {
+            Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp));
+            inputWrapper->createTensor(graph, tim::vx::TensorAttribute::INPUT, tvInputQuant);
+            input_index = tvGraph->addWrapper(inputWrapper);
+        }
+
+        inputsIndex.push_back(input_index);
+
+        // output tensor
+        CV_Assert(outputsWrapper.size() == 1);
+        Ptr<TimVXBackendWrapper> outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp));
+
+        Ptr<tim::vx::Tensor> outputTensor;
+
+        if (isLast)
+        {
+            auto shapeType = getShapeTypeFromMat(outputWrapper->getMat());
+
+            // For Graph Output tensor, we need to set tensor shape before createTensor().
+            outputWrapper->setTensorShape(shapeType);
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant);
+        }
+        else
+        {
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant);
+        }
+        output_index = tvGraph->addWrapper(outputWrapper);
+        outputsIndex.push_back(output_index);
+
+        std::shared_ptr<tim::vx::Operation> tvAct;
+
+        switch(tvActType) {
+            case tvActReLU:
+            {
+                if (slope != 0.f)
+                    tvAct = graph->CreateOperation<tim::vx::ops::LeakyRelu>(slope);
+                else
+                    tvAct = graph->CreateOperation<tim::vx::ops::Relu>();
+                break;
+            }
+            case tvActReLU6:
+                tvAct = graph->CreateOperation<tim::vx::ops::Relu6>();
+                break;
+            case tvActTanH:
+                tvAct = graph->CreateOperation<tim::vx::ops::Tanh>();
+                break;
+            case tvActSwish:
+                tvAct = graph->CreateOperation<tim::vx::ops::Swish>();
+                break;
+            case tvActMish:
+                tvAct = graph->CreateOperation<tim::vx::ops::Mish>();
+                break;
+            case tvActSigmoid:
+                tvAct = graph->CreateOperation<tim::vx::ops::Sigmoid>();
+                break;
+            case tvActELU:
+                tvAct = graph->CreateOperation<tim::vx::ops::Elu>();
+                break;
+            default:
+                // TODO! check the default function.
+                tvAct = graph->CreateOperation<tim::vx::ops::Relu>();
+                break;
+        }
+
+        Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvAct, inputsIndex, outputsIndex);
+
+        return tvBackendNode;
+#endif  // HAVE_TIMVX
+        return Ptr<BackendNode>();
+    }
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
--- a/modules/dnn/src/int8layers/eltwise_layer.cpp
+++ b/modules/dnn/src/int8layers/eltwise_layer.cpp
@ -4,6 +4,7 @@

 #include "../precomp.hpp"
 #include "layers_common.hpp"
+#include "../op_timvx.hpp"
 #include <opencv2/dnn/shape_utils.hpp>

 namespace cv
@ -22,6 +23,10 @@ public:
    } op;
    std::vector<float> coeffs;
    std::vector<int> zeropoints;
+    std::vector<float> scales;
+
+    int output_zp;
+    float output_sc;

    enum OutputChannelsMode
    {
@ -84,6 +89,20 @@ public:
            }
        }

+        if (params.has("input_scales"))
+        {
+            DictValue sc = params.get("input_scales");
+            int i, n = sc.size();
+            scales.resize(n);
+            for (i = 0; i < n; i++)
+            {
+                scales[i] = sc.get<float>(i);
+            }
+        }
+
+        output_zp = params.get<int>("zeropoints");
+        output_sc = params.get<float>("scales");
+
        channelsModeInput = ELTWISE_CHANNNELS_SAME;
        if (params.has("output_channels_mode"))
        {
@ -116,6 +135,9 @@ public:

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
+        // For TimVX Backend, only ELTWISE_CHANNNELS_SAME was supported.
+        if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
+            return channelsModeInput == ELTWISE_CHANNNELS_SAME;
        return backendId == DNN_BACKEND_OPENCV;
    }

@ -219,6 +241,134 @@ public:
        }
    }

+    virtual Ptr<BackendNode> initTimVX(void* timVXInfo_,
+                                       const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                       const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                       bool isLast) CV_OVERRIDE
+    {
+#ifdef HAVE_TIMVX
+        // tvGraph Initialization.
+        if (inputsWrapper.size() != 2)
+            return Ptr<BackendNode>();
+
+        auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_);
+        CV_Assert(timVxInfo);
+        Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph();
+        CV_Assert(tvGraph);
+        Ptr<tim::vx::Graph> graph = tvGraph->graph;
+
+        bool isSub = false;
+        // TODO: support variable coeffs.
+        if (op == SUM)
+        {
+            CV_Assert(coeffs.size() == scales.size());
+            std::vector<float> originalCoeffs;
+
+            for (int i = 0; i < coeffs.size(); i++)
+            {
+                originalCoeffs.push_back(coeffs[i] * output_sc / scales[i]);
+            }
+
+            float eps = std::numeric_limits<float>::epsilon();
+            if (std::fabs(originalCoeffs[0] - 1.0f) <= eps * std::fabs(originalCoeffs[0] + 1.0f) &&
+                std::fabs(originalCoeffs[1] + 1.0f) <= eps * std::fabs(originalCoeffs[1] - 1.0f))
+            {
+                // Sub, if coeffs = {1., -1.}, isSub = true.
+                isSub = true;
+            }
+            else if (std::fabs(originalCoeffs[0] - 1.0f) <= eps * std::fabs(originalCoeffs[0] + 1.0f) &&
+                std::abs(originalCoeffs[1] - 1.0f) <= eps * std::abs(originalCoeffs[1] + 1.0f))
+            {
+                // Sum, if coeff = {1., 1.}, isSub = false.
+                isSub = false;
+            }
+            else
+            {
+                return Ptr<BackendNode>();
+            }
+        }
+
+        std::vector<int> inputsIndex, outputsIndex;
+        int input_index = -1, output_index = -1;
+        CV_Assert(channelsModeInput == ELTWISE_CHANNNELS_SAME);
+
+        // Input
+        Ptr<TimVXBackendWrapper> inputWrapper;
+
+        CV_Assert(!scales.empty() && !zeropoints.empty());
+
+        for (int i = 0; i<inputsWrapper.size(); i++){
+            inputWrapper = inputsWrapper[i].dynamicCast<TimVXBackendWrapper>();
+
+            if (inputWrapper->isTensor())
+            {
+                input_index = tvGraph->getTensorIndex(inputWrapper->getTensor());
+                if (input_index == -1)
+                {
+                    // Copy To New inputWrapper
+                    Mat tmp = inputWrapper->getMat();
+                    inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp));
+                }
+            }
+
+            if (!inputWrapper->isTensor())
+            {
+                Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>(
+                        new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, scales[i], zeropoints[i]));
+                inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant);
+                input_index = tvGraph->addWrapper(inputWrapper);
+            }
+
+            inputsIndex.push_back(input_index);
+        }
+
+        // Output
+        CV_Assert(outputsWrapper.size() == 1);
+        Ptr<TimVXBackendWrapper> outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp));
+
+        if (isLast)
+        {
+            auto shapeType = getShapeTypeFromMat(outputWrapper->getMat());
+
+            // For Graph Output tensor, we need to set tensor shape before createTensor().
+            outputWrapper->setTensorShape(shapeType);
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant);
+        }
+        else
+        {
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant);
+        }
+        output_index = tvGraph->addWrapper(outputWrapper);
+        outputsIndex.push_back(output_index);
+
+        std::shared_ptr<tim::vx::Operation> tvEltwise;
+
+        switch (op) {
+            case SUM:
+                if (isSub)
+                    tvEltwise = graph->CreateOperation<tim::vx::ops::Sub>();
+                else
+                    tvEltwise = graph->CreateOperation<tim::vx::ops::Add>();
+                break;
+            case PROD:
+                tvEltwise = graph->CreateOperation<tim::vx::ops::Multiply>();
+                break;
+            case MAX:
+                tvEltwise = graph->CreateOperation<tim::vx::ops::Maximum>();
+                break;
+            default:
+                CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation");
+        }
+
+        Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvEltwise, inputsIndex, outputsIndex);
+
+        return tvBackendNode;
+#endif  // HAVE_TIMVX
+        return Ptr<BackendNode>();
+    }
+
    class EltwiseInvoker : public ParallelLoopBody
    {
        EltwiseLayerInt8Impl& self;
--- a/modules/dnn/src/int8layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp
@ -4,6 +4,7 @@

 #include "../precomp.hpp"
 #include "layers_common.hpp"
+#include "../op_timvx.hpp"

 #include <opencv2/dnn/shape_utils.hpp>

@ -19,7 +20,11 @@ public:
    FullyConnectedLayerInt8Impl(const LayerParams& params)
    {
        setParamsFrom(params);
+
+        input_sc = params.get<float>("input_scale");
+        input_zp = params.get<int>("input_zeropoint");
        output_zp = params.get<int>("zeropoints");
+        output_sc = params.get<float>("scales");
        axis = params.get<int>("axis", 1);
        if (blobs.size() == 3)
        {
@ -71,11 +76,25 @@ public:

    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
+        if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
+        {
+           if (biasMat.empty())
+               return true;
+           else
+               return false;
+        }
+
        return backendId == DNN_BACKEND_OPENCV;
    }

    virtual bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
    {
+        // TODO! add activation in Fully connection.
+#ifdef HAVE_TIMVX
+        if(preferableTarget == DNN_TARGET_NPU)
+            return false;
+#endif
+
        Ptr<ActivationLayerInt8> activ_int8 = layer.dynamicCast<ActivationLayerInt8>();
        if (!activ_int8.empty())
        {
@ -87,6 +106,120 @@ public:
        return false;
    }

+
+    virtual Ptr<BackendNode> initTimVX(void* timVXInfo_,
+                                       const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                       const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                       bool isLast) CV_OVERRIDE
+    {
+#ifdef HAVE_TIMVX
+        // tvGraph Initialization.
+        auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_);
+        CV_Assert(timVxInfo);
+        Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph();
+        CV_Assert(tvGraph);
+        Ptr<tim::vx::Graph> graph = tvGraph->graph;
+
+        int numOutput = blobs[0].size[0];
+        Mat weightMat = blobs[0];
+
+        std::vector<int> inputsIndex;
+        std::vector<int> outputsIndex;
+
+        std::vector<float> weight_scs, bias_scs;
+        std::vector<int32_t> weight_zps;
+
+        bias_scs.resize(numOutput);
+        weight_scs.resize(numOutput);
+
+        for (int i = 0; i < numOutput; i++)
+        {
+            bias_scs[i] = outputMultiplier.at<float>(i) * output_sc;
+            weight_scs[i] = bias_scs[i] / input_sc;
+        }
+
+        weight_zps.assign(numOutput, 0);
+
+        // input Tensor
+        auto inputWrapper = inputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        int input_index = -1, weight_index = -1, output_index = -1;
+
+        if (inputWrapper->isTensor())
+        {
+            input_index = tvGraph->getTensorIndex(inputWrapper->getTensor());
+            if (input_index == -1)
+            {
+                // Copy To New inputWrapper
+                Mat tmp = inputWrapper->getMat();
+                inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp));
+            }
+        }
+
+        if (!inputWrapper->isTensor() || input_index == -1)
+        {
+            Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp));
+            inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant);
+            input_index = tvGraph->addWrapper(inputWrapper);
+        }
+        inputsIndex.push_back(input_index);
+
+        // weight tensor
+        Ptr<TimVXBackendWrapper> weightWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(weightMat));
+        Ptr<tim::vx::Quantization> weightQuant;
+
+        bool tvSymmetric;
+        tvSymmetric = getQuantType(weight_scs, numOutput);
+
+        if (tvSymmetric)
+        {
+            // TODO! fix the following issue.
+            // TimVX does not support the SYMMETRIC PER CHANNEL MatMul.
+            return Ptr<BackendNode>();
+        }
+        else
+        {
+            weightQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC,  weight_scs[0], 0));
+        }
+        weightWrapper->createTensor(graph,tim::vx::TensorAttribute::CONSTANT, weightQuant);
+
+        weight_index = tvGraph->addWrapper(weightWrapper);
+        inputsIndex.push_back(weight_index);
+
+        // Output tensor
+        CV_Assert(outputsWrapper.size() == 1);
+        Ptr<TimVXBackendWrapper> outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp));
+
+        if (isLast)
+        {
+            auto shapeType = getShapeTypeFromMat(outputWrapper->getMat());
+
+            // For Graph Output tensor, we need to set tensor shape before createTensor().
+            outputWrapper->setTensorShape(shapeType);
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant);
+        }
+        else
+        {
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant);
+        }
+
+        output_index = tvGraph->addWrapper(outputWrapper);
+        outputsIndex.push_back(output_index);
+
+        std::shared_ptr<tim::vx::Operation> tvMatmul;
+
+        tvMatmul = graph->CreateOperation<tim::vx::ops::Matmul>(false, true);
+
+        Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvMatmul, inputsIndex, outputsIndex);
+
+        return tvBackendNode;
+#endif  // HAVE_TIMVX
+        return Ptr<BackendNode>();
+    }
+
    class FullyConnected : public ParallelLoopBody
    {
    public:
--- a/modules/dnn/src/int8layers/pooling_layer.cpp
+++ b/modules/dnn/src/int8layers/pooling_layer.cpp
@ -4,6 +4,7 @@

 #include "../precomp.hpp"
 #include "layers_common.hpp"
+#include "../op_timvx.hpp"
 #include "opencv2/core/hal/intrin.hpp"

 #include <float.h>
@ -26,9 +27,12 @@ public:
        globalPooling = false;
        isGlobalPooling = std::vector<bool>(3, false);
        output_zp = params.get<int>("zeropoints");
-        input_zp = params.get<int>("input_zeropoint", 0);
+        input_zp = params.get<int>("input_zeropoint", output_zp);
        multiplier = params.get<float>("multiplier", 1.f);

+        output_sc = params.get<float>("scales");
+        input_sc =  multiplier * output_sc;
+
        hasDynamicShapes = params.get<bool>("has_dynamic_shapes", false);
        shapesInitialized = !hasDynamicShapes;

@ -103,6 +107,24 @@ public:
            else
                return false;
        }
+        else if (backendId == DNN_BACKEND_TIMVX && haveTimVX())
+        {
+            // Only pool 2d and pool 1d were supported.
+            if (kernel_size.size() == 3)
+            {
+                // fallback to CPU implementation.
+                preferableTarget = DNN_TARGET_CPU;
+                return false;
+            }
+            if (!avePoolPaddedArea) // TimVX does not support exclude padding.
+                return false;
+            if (globalPooling) // TODO support globalPooling in TimVX backend.
+                return false;
+            if (kernel_size.size() == 2)
+                return type == MAX || type == AVE;
+            return false;
+        }
+
        return false;
    }

@ -116,6 +138,139 @@ public:
        return false;
    }

+
+    virtual Ptr<BackendNode> initTimVX(void* timVXInfo_,
+                                       const std::vector<Ptr<BackendWrapper> > &inputsWrapper,
+                                       const std::vector<Ptr<BackendWrapper> > &outputsWrapper,
+                                       bool isLast) CV_OVERRIDE
+    {
+#ifdef HAVE_TIMVX
+        // tvGraph Initialization.
+        auto timVxInfo = reinterpret_cast<TimVXInfo *>(timVXInfo_);
+        CV_Assert(timVxInfo);
+        Ptr<TimVXGraph> tvGraph = timVxInfo->getGraph();
+        CV_Assert(tvGraph);
+        Ptr<tim::vx::Graph> graph = tvGraph->graph;
+
+        tim::vx::PoolType tvPoolType;
+        tim::vx::RoundType tvRoundType;
+        size_t ksize = kernel_size.size();
+        if (ksize != 2)
+            return Ptr<BackendNode>();
+
+        // type Change from OpenCV to TimVX only MAX and AVG are supported.
+        switch (type) {
+            case MAX: {
+                tvPoolType = tim::vx::PoolType::MAX;
+                break;
+            }
+            case AVE:{
+                tvPoolType = tim::vx::PoolType::AVG;
+                break;
+            }
+            default:
+                CV_Error(Error::StsNotImplemented, "Not implemented Pooling type in TimVX Backend.");
+        }
+
+        // Padding Type
+        tim::vx::PadType tvPadType;
+        if (padMode.empty())
+        {
+            tvPadType = tim::vx::PadType::AUTO; // TODO! check the padding type.
+        }
+        else if(padMode == "VALID")
+        {
+            tvPadType = tim::vx::PadType::VALID;
+        }
+        else if (padMode == "SAME")
+        {
+            tvPadType = tim::vx::PadType::SAME;
+        }
+        else
+        {
+            CV_Error(Error::StsError, "Unsupported padding mode in TimVXBackend!");
+        }
+
+        if (ceilMode)
+            tvRoundType = tim::vx::RoundType::CEILING;
+        else
+            tvRoundType = tim::vx::RoundType::FLOOR;
+
+        auto input = inputsWrapper[0];
+        std::vector<int> inputsIndex;
+        std::vector<int> outputsIndex;
+
+        // input Tensor
+        auto inputWrapper = inputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        int input_index, output_index;
+
+        if (inputWrapper->isTensor())
+        {
+            input_index = tvGraph->getTensorIndex(inputWrapper->getTensor());
+            if (input_index == -1)
+            {
+                // Copy To New inputWrapper
+                Mat tmp = inputWrapper->getMat();
+                inputWrapper = Ptr<TimVXBackendWrapper>(new TimVXBackendWrapper(tmp));
+            }
+        }
+
+        if (!inputWrapper->isTensor())
+        {
+            Ptr<tim::vx::Quantization> tvInputQuant = Ptr<tim::vx::Quantization>(
+                    new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, input_sc, input_zp));
+            inputWrapper->createTensor(graph,tim::vx::TensorAttribute::INPUT, tvInputQuant);
+            input_index = tvGraph->addWrapper(inputWrapper);
+        }
+        inputsIndex.push_back(input_index);
+
+        // Output tensor
+        CV_Assert(outputsWrapper.size() == 1);
+        auto outputWrapper = outputsWrapper[0].dynamicCast<TimVXBackendWrapper>();
+        Ptr<tim::vx::Quantization> outputQuant = Ptr<tim::vx::Quantization>(
+                new tim::vx::Quantization(tim::vx::QuantType::ASYMMETRIC, output_sc, output_zp));
+
+        if (isLast)
+        {
+            auto shapeType = getShapeTypeFromMat(outputWrapper->getMat());
+            // For Graph Output tensor, we need to set tensor shape before createTensor().
+            outputWrapper->setTensorShape(shapeType);
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::OUTPUT, outputQuant);
+        }
+        else
+        {
+            outputWrapper->createTensor(graph, tim::vx::TensorAttribute::TRANSIENT, outputQuant);
+        }
+
+        output_index = tvGraph->addWrapper(outputWrapper);
+        outputsIndex.push_back(output_index);
+        std::shared_ptr<tim::vx::Operation> tvPool;
+
+        if (tvPadType == tim::vx::PadType::AUTO)
+        {
+            tvPool = graph->CreateOperation<tim::vx::ops::Pool2d>( tvPoolType,
+                       std::array<uint32_t, 4>({(uint32_t) pads_begin[1], (uint32_t) pads_end[1],
+                                                (uint32_t) pads_begin[0], (uint32_t) pads_end[0]}),
+                       std::array<uint32_t, 2>({(uint32_t)kernel_size[1], (uint32_t)kernel_size[0]}),
+                       std::array<uint32_t, 2>({(uint32_t)strides[1], (uint32_t)strides[0]}),
+                       tvRoundType);
+        }
+        else
+        {
+            tvPool = graph->CreateOperation<tim::vx::ops::Pool2d>(
+                    tvPoolType, tvPadType,
+                    std::array<uint32_t, 2>({(uint32_t)kernel_size[1], (uint32_t)kernel_size[0]}),
+                    std::array<uint32_t, 2>({(uint32_t)strides[1], (uint32_t)strides[0]}),
+                    tvRoundType);
+        }
+
+        Ptr<TimVXBackendNode> tvBackendNode = new TimVXBackendNode(tvGraph, tvPool, inputsIndex, outputsIndex);
+
+        return tvBackendNode;
+#endif  // HAVE_TIMVX
+        return Ptr<BackendNode>();
+    }
+
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
    {
        CV_TRACE_FUNCTION();
--- a/Show More
+++ b/Show More