Merge branch 4.x

6 months ago · 672a662dff
parent 459a9c60ed eab21b6106
commit 672a662dff
61 changed files with 1617 additions and 29784 deletions
--- a/3rdparty/ippicv/ippicv.cmake
+++ b/3rdparty/ippicv/ippicv.cmake
@ -2,7 +2,7 @@ function(download_ippicv root_var)
  set(${root_var} "" PARENT_SCOPE)

  # Commit SHA in the opencv_3rdparty repo
-  set(IPPICV_COMMIT "fd27188235d85e552de31425e7ea0f53ba73ba53")
+  set(IPPICV_COMMIT "7f55c0c26be418d494615afca15218566775c725")
  # Define actual ICV versions
  if(APPLE)
    set(IPPICV_COMMIT "0cc4aa06bf2bef4b05d237c69a5a96b9cd0cb85a")
@ -14,21 +14,21 @@ function(download_ippicv root_var)
    set(OPENCV_ICV_PLATFORM "linux")
    set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_lnx")
    if(X86_64)
-      set(OPENCV_ICV_NAME "ippicv_2021.11.0_lnx_intel64_20240201_general.tgz")
-      set(OPENCV_ICV_HASH "0f2745ff705ecae31176dad437608f6f")
+      set(OPENCV_ICV_NAME "ippicv_2021.12.0_lnx_intel64_20240425_general.tgz")
+      set(OPENCV_ICV_HASH "d06e6d44ece88f7f17a6cd9216761186")
    else()
-      set(OPENCV_ICV_NAME "ippicv_2021.11.0_lnx_ia32_20240201_general.tgz")
-      set(OPENCV_ICV_HASH "63e381bf08076ca34fd5264203043a45")
+      set(OPENCV_ICV_NAME "ippicv_2021.12.0_lnx_ia32_20240425_general.tgz")
+      set(OPENCV_ICV_HASH "85ffa2b9ed7802b93c23fa27b0097d36")
    endif()
  elseif(WIN32 AND NOT ARM)
    set(OPENCV_ICV_PLATFORM "windows")
    set(OPENCV_ICV_PACKAGE_SUBDIR "ippicv_win")
    if(X86_64)
-      set(OPENCV_ICV_NAME "ippicv_2021.11.0_win_intel64_20240201_general.zip")
-      set(OPENCV_ICV_HASH "59d154bf54a1e3eea20d7248f81a2a8e")
+      set(OPENCV_ICV_NAME "ippicv_2021.12.0_win_intel64_20240425_general.zip")
+      set(OPENCV_ICV_HASH "402ff8c6b4986738fed71c44e1ce665d")
    else()
-      set(OPENCV_ICV_NAME "ippicv_2021.11.0_win_ia32_20240201_general.zip")
-      set(OPENCV_ICV_HASH "7a6d8ac5825c02fea6cbfc1201b521b5")
+      set(OPENCV_ICV_NAME "ippicv_2021.12.0_win_ia32_20240425_general.zip")
+      set(OPENCV_ICV_HASH "8b1d2a23957d57624d0de8f2a5cae5f1")
    endif()
  else()
    return()
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -724,7 +724,13 @@ if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)

    cmake_policy(SET CMP0092 NEW) # CMake 3.15+: leave warning flags out of default CMAKE_<LANG>_FLAGS flags.
    if(CMAKE_CUDA_COMPILER)
+      if(CMAKE_CUDA_ARCHITECTURES)
+        set(USER_DEFINED_CMAKE_CUDA_ARCHITECTURES TRUE)
+      endif()
      enable_language(CUDA)
+      if(NOT USER_DEFINED_CMAKE_CUDA_ARCHITECTURES)
+        set(CMAKE_CUDA_ARCHITECTURES "")
+      endif()
    elseif(UNIX)
      message(WARNING "CUDA: Not detected!  If you are not using the default host compiler (g++) then you need to specify both CMAKE_CUDA_HOST_COMPILER and CMAKE_CUDA_COMPILER. e.g. -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/clang++ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc.")
    endif()
@ -1447,16 +1453,10 @@ if(WITH_GTK OR HAVE_GTK)
    status("    GTK+:" "YES (ver ${GTK3_VERSION})")
  elseif(HAVE_GTK)
    status("    GTK+:" "YES (ver ${GTK2_VERSION})")
+    status("      GtkGlExt:" HAVE_GTKGLEXT THEN "YES (ver ${GTKGLEXT_VERSION})" ELSE NO)
  else()
    status("    GTK+:" "NO")
  endif()
-
-  if(HAVE_GTK)
-    status(  "      GThread :" HAVE_GTHREAD THEN "YES (ver ${GTHREAD_VERSION})" ELSE NO)
-    if(NOT HAVE_GTK3)
-      status(  "    GtkGlExt:" HAVE_GTKGLEXT THEN "YES (ver ${GTKGLEXT_VERSION})" ELSE NO)
-    endif()
-  endif()
 endif()

 if(WITH_FRAMEBUFFER OR HAVE_FRAMEBUFFER)
--- a/cmake/OpenCVCompilerOptimizations.cmake
+++ b/cmake/OpenCVCompilerOptimizations.cmake
@ -390,14 +390,9 @@ elseif(PPC64LE)
  set(CPU_BASELINE "VSX" CACHE STRING "${HELP_CPU_BASELINE}")

 elseif(RISCV)
-  option(RISCV_RVV_SCALABLE "Use scalable RVV API on RISC-V" ON)
-
  ocv_update(CPU_RVV_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_rvv.cpp")
  ocv_update(CPU_KNOWN_OPTIMIZATIONS "RVV")
  ocv_update(CPU_RVV_FLAGS_ON "-march=rv64gcv")
-  if(RISCV_RVV_SCALABLE)
-    set(CPU_RVV_FLAGS_ON "${CPU_RVV_FLAGS_ON} -DCV_RVV_SCALABLE")
-  endif()
  ocv_update(CPU_RVV_FLAGS_CONFLICT "-march=[^ ]*")

  set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}")
--- a/cmake/checks/cpu_rvv.cpp
+++ b/cmake/checks/cpu_rvv.cpp
@ -1,17 +1,16 @@
 #include <stdio.h>

-#if defined(__riscv)
-#  include <riscv_vector.h>
-#  define CV_RVV 1
+#if !defined(__riscv) || !defined(__riscv_v)
+#error "RISC-V or vector extension(RVV) is not supported by the compiler"
 #endif

-#if defined CV_RVV
-#if defined(__riscv_v_intrinsic) &&  __riscv_v_intrinsic>10999
-#define vreinterpret_v_u64m1_u8m1 __riscv_vreinterpret_v_u64m1_u8m1
-#define vle64_v_u64m1 __riscv_vle64_v_u64m1
-#define vle32_v_f32m1 __riscv_vle32_v_f32m1
-#define vfmv_f_s_f32m1_f32 __riscv_vfmv_f_s_f32m1_f32
+#if !defined(__THEAD_VERSION__) && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic < 12000
+#error "Wrong intrinsics version, v0.12 or higher is required for gcc or clang"
 #endif
+
+#include <riscv_vector.h>
+
+#ifdef __THEAD_VERSION__
 int test()
 {
    const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
@ -21,7 +20,14 @@ int test()
    return (int)vfmv_f_s_f32m1_f32(val);
 }
 #else
-#error "RISC-V vector extension(RVV) is not supported"
+int test()
+{
+    const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+    uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
+    vuint8m1_t a = __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(ptr, 2));
+    vfloat32m1_t val = __riscv_vle32_v_f32m1((const float*)(src), 4);
+    return (int)__riscv_vfmv_f_s_f32m1_f32(val);
+}
 #endif

 int main()
--- a/doc/tutorials/app/images/orbbec_uvc_cpp.jpg
+++ b/doc/tutorials/app/images/orbbec_uvc_cpp.jpg
--- a/doc/tutorials/app/images/orbbec_uvc_python.jpg
+++ b/doc/tutorials/app/images/orbbec_uvc_python.jpg
--- a/doc/tutorials/app/intelperc.markdown
+++ b/doc/tutorials/app/intelperc.markdown
@ -3,7 +3,7 @@ Using Creative Senz3D and other Intel RealSense SDK compatible depth sensors {#t

@tableofcontents

-@prev_tutorial{tutorial_orbbec_astra_openni}
+@prev_tutorial{tutorial_orbbec_uvc}
@next_tutorial{tutorial_wayland_ubuntu}

 ![hardwares](images/realsense.jpg)
--- a/doc/tutorials/app/orbbec_astra_openni.markdown
+++ b/doc/tutorials/app/orbbec_astra_openni.markdown
@ -4,7 +4,7 @@ Using Orbbec Astra 3D cameras {#tutorial_orbbec_astra_openni}
@tableofcontents

@prev_tutorial{tutorial_kinect_openni}
-@next_tutorial{tutorial_intelperc}
+@next_tutorial{tutorial_orbbec_uvc}


 ### Introduction
--- a/doc/tutorials/app/orbbec_uvc.markdown
+++ b/doc/tutorials/app/orbbec_uvc.markdown
@ -0,0 +1,126 @@
+Using Orbbec 3D cameras (UVC) {#tutorial_orbbec_uvc}
+====================================================
+
+@tableofcontents
+
+@prev_tutorial{tutorial_orbbec_astra_openni}
+@next_tutorial{tutorial_intelperc}
+
+|    |    |
+| -: | :- |
+| Original author | Jinyue Chen |
+| Compatibility | OpenCV >= 4.10 |
+
+### Introduction
+This tutorial is devoted to the Orbbec 3D cameras based on UVC protocol. For the use of the older
+Orbbec 3D cameras which depends on OpenNI, please refer to the
+[previous tutorial](https://github.com/opencv/opencv/blob/5.x/doc/tutorials/app/orbbec_astra_openni.markdown).
+
+Unlike working with the OpenNI based Astra 3D cameras which requires OpenCV built with OpenNI2 SDK,
+Orbbec SDK is not required to be installed for accessing Orbbec UVC 3D cameras via OpenCV. By using
+`cv::VideoCapture` class, users get the stream data from 3D cameras, similar to working with USB
+cameras. The calibration and alignment of the depth map and color image are done internally.
+
+### Instructions
+
+In order to use the 3D cameras with OpenCV. You can refer to [Get Started](https://opencv.org/get-started/)
+to install OpenCV.
+
+Note since 4.11 on, Mac OS users need to compile OpenCV from source with flag
+`-DOBSENSOR_USE_ORBBEC_SDK=ON` in order to use the cameras:
+```bash
+cmake -DOBSENSOR_USE_ORBBEC_SDK=ON ..
+make
+sudo make install
+```
+
+Code
+----
+
+@add_toggle_python
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/blob/5.x/samples/python/videocapture_obsensor.py)
+@include samples/python/videocapture_obsensor.py
+@end_toggle
+
+@add_toggle_cpp
+This tutorial code's is shown lines below. You can also download it from
+[here](https://github.com/opencv/opencv/blob/5.x/samples/cpp/videocapture_obsensor.cpp)
+@include samples/cpp/videocapture_obsensor.cpp
+@end_toggle
+
+### Code Explanation
+
+#### Python
+
+- **Open Orbbec Depth Sensor**:
+  Using `cv.VideoCapture(0, cv.CAP_OBSENSOR)` to attempt to open the first Orbbec depth sensor device.
+  If the camera fails to open, the program will exit and display an error message.
+
+- **Loop to Grab and Process Data**:
+  In an infinite loop, the code continuously grabs data from the camera. The `orbbec_cap.grab()`
+  method attempts to grab a frame.
+
+- **Process BGR Image**:
+  Using `orbbec_cap.retrieve(None, cv.CAP_OBSENSOR_BGR_IMAGE)` to retrieve the BGR image data.
+  If successfully retrieved, the BGR image is displayed in a window using `cv.imshow("BGR", bgr_image)`.
+
+- **Process Depth Image**:
+  Using `orbbec_cap.retrieve(None, cv.CAP_OBSENSOR_DEPTH_MAP)` to retrieve the depth image data.
+  If successfully retrieved, the depth image is first normalized to a range of 0 to 255, then a
+  false color image is applied, and the result is displayed in a window using `cv.imshow("DEPTH", color_depth_map)`.
+
+- **Keyboard Interrupt**:
+  Using `cv.pollKey()` to detect keyboard events. If a key is pressed, the loop breaks and
+  the program ends.
+
+- **Release Resources**:
+  After exiting the loop, the camera resources are released using `orbbec_cap.release()`.
+
+#### C++
+
+- **Open Orbbec Depth Sensor**:
+  Using `VideoCapture obsensorCapture(0, CAP_OBSENSOR)` to attempt to open the first Orbbec depth
+  sensor device. If the camera fails to open, an error message is displayed, and the program exits.
+
+- **Retrieve Camera Intrinsic Parameters**:
+  Using `obsensorCapture.get()` to retrieve the intrinsic parameters of the camera, including focal
+  lengths (`fx`, `fy`) and principal points (`cx`, `cy`).
+
+- **Loop to Grab and Process Data**:
+  In an infinite loop, the code continuously grabs data from the camera. The `obsensorCapture.grab()`
+  method attempts to grab a frame.
+
+- **Process BGR Image**:
+  Using `obsensorCapture.retrieve(image, CAP_OBSENSOR_BGR_IMAGE)` to retrieve the BGR image data.
+  If successfully retrieved, the BGR image is displayed in a window using `imshow("BGR", image)`.
+
+- **Process Depth Image**:
+  Using `obsensorCapture.retrieve(depthMap, CAP_OBSENSOR_DEPTH_MAP)` to retrieve the depth image data.
+  If successfully retrieved, the depth image is normalized and a false color image is applied, then
+  the result is displayed in a window using `imshow("DEPTH", adjDepthMap)`. The retrieved depth
+  values are in millimeters and are truncated to a range between 300 and 5000 (millimeter).
+  This fixed range can be interpreted as a truncation based on the depth camera's depth range,
+  removing invalid pixels on the depth map.
+
+- **Overlay Depth Map on BGR Image**:
+  Convert the depth map to an 8-bit image, resize it to match the BGR image size, and overlay it
+  on the BGR image with a specified transparency (`alpha`). The overlaid image is displayed in
+  a window using `imshow("DepthToColor", image)`.
+
+- **Keyboard Interrupt**:
+  Using `pollKey()` to detect keyboard events. If a key is pressed, the loop breaks and the program ends.
+
+- **Release Resources**:
+  After exiting the loop, the camera resources are released.
+### Results
+#### Python
+
+![BGR And DEPTH frame](images/orbbec_uvc_python.jpg)
+
+#### C++
+
+![BGR And DEPTH And DepthToColor frame](images/orbbec_uvc_cpp.jpg)
+
+### Note
+Mac users need sudo privileges to execute the code.
--- a/doc/tutorials/app/table_of_content_app.markdown
+++ b/doc/tutorials/app/table_of_content_app.markdown
@ -7,5 +7,6 @@ Application utils (highgui, imgcodecs, videoio modules) {#tutorial_table_of_cont
 -   @subpage tutorial_video_write
 -   @subpage tutorial_kinect_openni
 -   @subpage tutorial_orbbec_astra_openni
+-   @subpage tutorial_orbbec_uvc
 -   @subpage tutorial_intelperc
 -   @subpage tutorial_wayland_ubuntu
--- a/modules/calib/src/calibinit.cpp
+++ b/modules/calib/src/calibinit.cpp
@ -1151,22 +1151,40 @@ int ChessBoardDetector::addOuterQuad(ChessBoardQuad& quad, std::vector<ChessBoar
            // have to set exact corner
            q.corners[j] = quad.corners[i];

+            // set row and col for next step check
+            switch (i)
+            {
+            case 0:
+                q.col = quad.col - 1; q.row = quad.row - 1; break;
+            case 1:
+                q.col = quad.col + 1; q.row = quad.row - 1; break;
+            case 2:
+                q.col = quad.col + 1; q.row = quad.row - 1; break;
+            case 3:
+                q.col = quad.col - 1; q.row = quad.row + 1; break;
+            }
+
            // now find other neighbor and add it, if possible
-            int next_i = (i + 1) & 3;
-            int prev_i = (i + 3) & 3; // equal to (j + 1) & 3
-            ChessBoardQuad* quad_prev = quad.neighbors[prev_i];
-            if (quad_prev &&
-                quad_prev->ordered &&
-                quad_prev->neighbors[i] &&
-                quad_prev->neighbors[i]->ordered )
+            for (int k = 1; k <= 3; k += 2)
            {
-                ChessBoardQuad* qn = quad_prev->neighbors[i];
-                q.count = 2;
-                q.neighbors[prev_i] = qn;
-                qn->neighbors[next_i] = &q;
-                qn->count += 1;
-                // have to set exact corner
-                q.corners[prev_i] = qn->corners[next_i];
+                int next_i = (i + k) % 4;
+                int prev_i = (i + k + 2) % 4;
+                ChessBoardQuad* quad_prev = quad.neighbors[prev_i];
+                if (quad_prev &&
+                    quad_prev->ordered &&
+                    quad_prev->neighbors[i] &&
+                    quad_prev->neighbors[i]->ordered &&
+                    std::abs(quad_prev->neighbors[i]->col - q.col) == 1 &&
+                    std::abs(quad_prev->neighbors[i]->row - q.row) == 1)
+                {
+                    ChessBoardQuad* qn = quad_prev->neighbors[i];
+                    q.count = 2;
+                    q.neighbors[prev_i] = qn;
+                    qn->neighbors[next_i] = &q;
+                    qn->count += 1;
+                    // have to set exact corner
+                    q.corners[prev_i] = qn->corners[next_i];
+                }
            }
        }
    }
--- a/modules/core/include/opencv2/core.hpp
+++ b/modules/core/include/opencv2/core.hpp
@ -123,12 +123,12 @@ public:
     Instead, the macros CV_Error(), CV_Error_() and CV_Assert() are used.
    */
    Exception(int _code, const String& _err, const String& _func, const String& _file, int _line);
-    virtual ~Exception() throw();
+    virtual ~Exception() CV_NOEXCEPT;

    /*!
     \return the error description and the context as a text string.
    */
-    virtual const char *what() const throw() CV_OVERRIDE;
+    virtual const char *what() const CV_NOEXCEPT CV_OVERRIDE;
    void formatMessage();

    String msg; ///< the formatted error message
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@ -146,9 +146,23 @@
 #  define CV_NEON 1
 #endif

-#if defined(__riscv) && defined(__riscv_vector) && defined(__riscv_vector_071)
-# include<riscv_vector.h>
-# define CV_RVV071 1
+/* RVV-related macro states with different compiler
+// +--------------------+----------+----------+
+// | Macro              | Upstream | XuanTie  |
+// +--------------------+----------+----------+
+// | CV_CPU_COMPILE_RVV | defined  | defined  |
+// | CV_RVV             | 1        | 0        |
+// | CV_RVV071          | 0        | 1        |
+// | CV_TRY_RVV         | 1        | 1        |
+// +--------------------+----------+----------+
+*/
+#ifdef CV_CPU_COMPILE_RVV
+#  ifdef __riscv_vector_071
+#    define CV_RVV071 1
+#  else
+#    define CV_RVV 1
+#  endif
+#include <riscv_vector.h>
 #endif

 #ifdef CV_CPU_COMPILE_VSX
@ -183,11 +197,6 @@
 #  include <wasm_simd128.h>
 #endif

-#if defined CV_CPU_COMPILE_RVV
-#  define CV_RVV 1
-#  include <riscv_vector.h>
-#endif
-
 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__

 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
--- a/modules/core/include/opencv2/core/hal/intrin.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin.hpp
@ -239,11 +239,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
 #include "opencv2/core/hal/intrin_wasm.hpp"

 #elif CV_RVV && !defined(CV_FORCE_SIMD128_CPP)
-#if defined(CV_RVV_SCALABLE)
 #include "opencv2/core/hal/intrin_rvv_scalable.hpp"
-#else
-#include "opencv2/core/hal/intrin_rvv.hpp"
-#endif

 #elif CV_LSX && !defined(CV_FORCE_SIMD128_CPP)

--- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
@ -1,768 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-// Copied from
-// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
-
-#ifndef __RVV_0P10_COMPATIBLE_HEADERS_OVERLOADED_NON_POLICY_H
-#define __RVV_0P10_COMPATIBLE_HEADERS_OVERLOADED_NON_POLICY_H
-
-
-// The maximum number of parameters is 20, this is held by segment load
-// instructions with a NFIELD (NF) of 8. 20 is contributed by 8 vector register
-// pointers passed, 1 vector mask register, 8 passthrough register for
-// undisturbed policy, and 3 for address base, byte index, vl.
-#define _GET_OVERRIDE(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13,\
-_14, _15, _16, _17, _18, _19, _20, NAME, ...) NAME
-
-
-#if __has_include ("riscv_vector.h")
-#include <riscv_vector.h>
-#endif
-#ifndef __RISCV_VECTOR_H
-#include_next <riscv_vector.h>
-#endif
-
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-#define vmerge(mask, op1, op2, vl) __riscv_vmerge((op1), (op2), (mask), (vl))
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-#define vfmerge(mask, op1, op2, vl) __riscv_vfmerge((op1), (op2), (mask), (vl))
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-// masked functions
-#define vcompress(mask, dest, src, vl) __riscv_vcompress_tu((dest), (src), (mask), (vl))
-// Reinterpret between different type under the same SEW/LMUL
-// Reinterpret between different SEW under the same LMUL
-#define vse16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vse16, __riscv_vse16, 2, 1)(__VA_ARGS__)
-#define vse32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vse32, __riscv_vse32, 2, 1)(__VA_ARGS__)
-#define vse64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vse64, __riscv_vse64, 2, 1)(__VA_ARGS__)
-#define vse8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vse8, __riscv_vse8, 2, 1)(__VA_ARGS__)
-#define vsse16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsse16, __riscv_vsse16, 3, 2, 1)(__VA_ARGS__)
-#define vsse32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsse32, __riscv_vsse32, 3, 2, 1)(__VA_ARGS__)
-#define vsse64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsse64, __riscv_vsse64, 3, 2, 1)(__VA_ARGS__)
-#define vsse8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsse8, __riscv_vsse8, 3, 2, 1)(__VA_ARGS__)
-#define vloxei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vloxei8_tumu, 4, __riscv_vloxei8, 2, 1)(__VA_ARGS__)
-#define vloxei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vloxei16_tumu, 4, __riscv_vloxei16, 2, 1)(__VA_ARGS__)
-#define vloxei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vloxei32_tumu, 4, __riscv_vloxei32, 2, 1)(__VA_ARGS__)
-#define vloxei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vloxei64_tumu, 4, __riscv_vloxei64, 2, 1)(__VA_ARGS__)
-#define vluxei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vluxei8_tumu, 4, __riscv_vluxei8, 2, 1)(__VA_ARGS__)
-#define vluxei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vluxei16_tumu, 4, __riscv_vluxei16, 2, 1)(__VA_ARGS__)
-#define vluxei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vluxei32_tumu, 4, __riscv_vluxei32, 2, 1)(__VA_ARGS__)
-#define vluxei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vluxei64_tumu, 4, __riscv_vluxei64, 2, 1)(__VA_ARGS__)
-#define vsoxei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsoxei8, __riscv_vsoxei8, 3, 2, 1)(__VA_ARGS__)
-#define vsoxei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsoxei16, __riscv_vsoxei16, 3, 2, 1)(__VA_ARGS__)
-#define vsoxei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsoxei32, __riscv_vsoxei32, 3, 2, 1)(__VA_ARGS__)
-#define vsoxei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsoxei64, __riscv_vsoxei64, 3, 2, 1)(__VA_ARGS__)
-#define vsuxei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsuxei8, __riscv_vsuxei8, 3, 2, 1)(__VA_ARGS__)
-#define vsuxei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsuxei16, __riscv_vsuxei16, 3, 2, 1)(__VA_ARGS__)
-#define vsuxei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsuxei32, __riscv_vsuxei32, 3, 2, 1)(__VA_ARGS__)
-#define vsuxei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsuxei64, __riscv_vsuxei64, 3, 2, 1)(__VA_ARGS__)
-#define vsseg2e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsseg2e16, __riscv_vsseg2e16, 3, 2, 1)(__VA_ARGS__)
-#define vsseg3e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsseg3e16, __riscv_vsseg3e16, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg4e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsseg4e16, __riscv_vsseg4e16, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg5e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsseg5e16, __riscv_vsseg5e16, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg6e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsseg6e16, __riscv_vsseg6e16, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg7e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsseg7e16, __riscv_vsseg7e16, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg8e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsseg8e16, __riscv_vsseg8e16, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg2e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsseg2e32, __riscv_vsseg2e32, 3, 2, 1)(__VA_ARGS__)
-#define vsseg3e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsseg3e32, __riscv_vsseg3e32, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg4e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsseg4e32, __riscv_vsseg4e32, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg5e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsseg5e32, __riscv_vsseg5e32, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg6e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsseg6e32, __riscv_vsseg6e32, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg7e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsseg7e32, __riscv_vsseg7e32, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg8e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsseg8e32, __riscv_vsseg8e32, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg2e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsseg2e64, __riscv_vsseg2e64, 3, 2, 1)(__VA_ARGS__)
-#define vsseg3e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsseg3e64, __riscv_vsseg3e64, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg4e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsseg4e64, __riscv_vsseg4e64, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg5e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsseg5e64, __riscv_vsseg5e64, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg6e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsseg6e64, __riscv_vsseg6e64, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg7e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsseg7e64, __riscv_vsseg7e64, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg8e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsseg8e64, __riscv_vsseg8e64, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg2e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsseg2e8, __riscv_vsseg2e8, 3, 2, 1)(__VA_ARGS__)
-#define vsseg3e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsseg3e8, __riscv_vsseg3e8, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg4e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsseg4e8, __riscv_vsseg4e8, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg5e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsseg5e8, __riscv_vsseg5e8, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg6e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsseg6e8, __riscv_vsseg6e8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg7e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsseg7e8, __riscv_vsseg7e8, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsseg8e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsseg8e8, __riscv_vsseg8e8, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg2e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vssseg2e16, __riscv_vssseg2e16, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg3e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vssseg3e16, __riscv_vssseg3e16, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg4e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vssseg4e16, __riscv_vssseg4e16, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg5e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vssseg5e16, __riscv_vssseg5e16, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg6e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vssseg6e16, __riscv_vssseg6e16, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg7e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vssseg7e16, __riscv_vssseg7e16, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg8e16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vssseg8e16, __riscv_vssseg8e16, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg2e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vssseg2e32, __riscv_vssseg2e32, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg3e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vssseg3e32, __riscv_vssseg3e32, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg4e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vssseg4e32, __riscv_vssseg4e32, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg5e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vssseg5e32, __riscv_vssseg5e32, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg6e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vssseg6e32, __riscv_vssseg6e32, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg7e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vssseg7e32, __riscv_vssseg7e32, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg8e32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vssseg8e32, __riscv_vssseg8e32, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg2e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vssseg2e64, __riscv_vssseg2e64, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg3e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vssseg3e64, __riscv_vssseg3e64, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg4e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vssseg4e64, __riscv_vssseg4e64, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg5e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vssseg5e64, __riscv_vssseg5e64, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg6e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vssseg6e64, __riscv_vssseg6e64, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg7e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vssseg7e64, __riscv_vssseg7e64, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg8e64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vssseg8e64, __riscv_vssseg8e64, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg2e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vssseg2e8, __riscv_vssseg2e8, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg3e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vssseg3e8, __riscv_vssseg3e8, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg4e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vssseg4e8, __riscv_vssseg4e8, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg5e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vssseg5e8, __riscv_vssseg5e8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg6e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vssseg6e8, __riscv_vssseg6e8, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg7e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vssseg7e8, __riscv_vssseg7e8, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vssseg8e8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vssseg8e8, __riscv_vssseg8e8, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg2ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vloxseg2ei8_tumu, 7, 6, __riscv_vloxseg2ei8, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg3ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg3ei8_tumu, 9, 8, 7, __riscv_vloxseg3ei8, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg4ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vloxseg4ei8_tumu, 11, 10, 9, 8, __riscv_vloxseg4ei8, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg5ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vloxseg5ei8_tumu, 13, 12, 11, 10, 9, __riscv_vloxseg5ei8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg6ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vloxseg6ei8_tumu, 15, 14, 13, 12, 11, 10, __riscv_vloxseg6ei8, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg7ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vloxseg7ei8_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg7ei8, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg8ei8(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vloxseg8ei8_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vloxseg8ei8, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg2ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vloxseg2ei16_tumu, 7, 6, __riscv_vloxseg2ei16, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg3ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg3ei16_tumu, 9, 8, 7, __riscv_vloxseg3ei16, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg4ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vloxseg4ei16_tumu, 11, 10, 9, 8, __riscv_vloxseg4ei16, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg5ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vloxseg5ei16_tumu, 13, 12, 11, 10, 9, __riscv_vloxseg5ei16, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg6ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vloxseg6ei16_tumu, 15, 14, 13, 12, 11, 10, __riscv_vloxseg6ei16, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg7ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vloxseg7ei16_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg7ei16, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg8ei16(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vloxseg8ei16_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vloxseg8ei16, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg2ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vloxseg2ei32_tumu, 7, 6, __riscv_vloxseg2ei32, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg3ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg3ei32_tumu, 9, 8, 7, __riscv_vloxseg3ei32, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg4ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vloxseg4ei32_tumu, 11, 10, 9, 8, __riscv_vloxseg4ei32, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg5ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vloxseg5ei32_tumu, 13, 12, 11, 10, 9, __riscv_vloxseg5ei32, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg6ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vloxseg6ei32_tumu, 15, 14, 13, 12, 11, 10, __riscv_vloxseg6ei32, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg7ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vloxseg7ei32_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg7ei32, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg8ei32(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vloxseg8ei32_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vloxseg8ei32, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg2ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vloxseg2ei64_tumu, 7, 6, __riscv_vloxseg2ei64, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg3ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg3ei64_tumu, 9, 8, 7, __riscv_vloxseg3ei64, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg4ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vloxseg4ei64_tumu, 11, 10, 9, 8, __riscv_vloxseg4ei64, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg5ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vloxseg5ei64_tumu, 13, 12, 11, 10, 9, __riscv_vloxseg5ei64, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg6ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vloxseg6ei64_tumu, 15, 14, 13, 12, 11, 10, __riscv_vloxseg6ei64, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg7ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vloxseg7ei64_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vloxseg7ei64, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vloxseg8ei64(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vloxseg8ei64_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vloxseg8ei64, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg2ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vluxseg2ei8_tumu, 7, 6, __riscv_vluxseg2ei8, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg3ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg3ei8_tumu, 9, 8, 7, __riscv_vluxseg3ei8, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg4ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vluxseg4ei8_tumu, 11, 10, 9, 8, __riscv_vluxseg4ei8, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg5ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vluxseg5ei8_tumu, 13, 12, 11, 10, 9, __riscv_vluxseg5ei8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg6ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vluxseg6ei8_tumu, 15, 14, 13, 12, 11, 10, __riscv_vluxseg6ei8, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg7ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vluxseg7ei8_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg7ei8, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg8ei8(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vluxseg8ei8_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vluxseg8ei8, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg2ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vluxseg2ei16_tumu, 7, 6, __riscv_vluxseg2ei16, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg3ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg3ei16_tumu, 9, 8, 7, __riscv_vluxseg3ei16, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg4ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vluxseg4ei16_tumu, 11, 10, 9, 8, __riscv_vluxseg4ei16, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg5ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vluxseg5ei16_tumu, 13, 12, 11, 10, 9, __riscv_vluxseg5ei16, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg6ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vluxseg6ei16_tumu, 15, 14, 13, 12, 11, 10, __riscv_vluxseg6ei16, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg7ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vluxseg7ei16_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg7ei16, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg8ei16(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vluxseg8ei16_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vluxseg8ei16, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg2ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vluxseg2ei32_tumu, 7, 6, __riscv_vluxseg2ei32, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg3ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg3ei32_tumu, 9, 8, 7, __riscv_vluxseg3ei32, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg4ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vluxseg4ei32_tumu, 11, 10, 9, 8, __riscv_vluxseg4ei32, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg5ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vluxseg5ei32_tumu, 13, 12, 11, 10, 9, __riscv_vluxseg5ei32, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg6ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vluxseg6ei32_tumu, 15, 14, 13, 12, 11, 10, __riscv_vluxseg6ei32, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg7ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vluxseg7ei32_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg7ei32, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg8ei32(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vluxseg8ei32_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vluxseg8ei32, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg2ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vluxseg2ei64_tumu, 7, 6, __riscv_vluxseg2ei64, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg3ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg3ei64_tumu, 9, 8, 7, __riscv_vluxseg3ei64, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg4ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vluxseg4ei64_tumu, 11, 10, 9, 8, __riscv_vluxseg4ei64, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg5ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, __riscv_vluxseg5ei64_tumu, 13, 12, 11, 10, 9, __riscv_vluxseg5ei64, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg6ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, __riscv_vluxseg6ei64_tumu, 15, 14, 13, 12, 11, 10, __riscv_vluxseg6ei64, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg7ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, __riscv_vluxseg7ei64_tumu, 17, 16, 15, 14, 13, 12, 11, __riscv_vluxseg7ei64, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vluxseg8ei64(...) _GET_OVERRIDE(__VA_ARGS__, __riscv_vluxseg8ei64_tumu, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vluxseg8ei64, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg2ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsoxseg2ei8, __riscv_vsoxseg2ei8, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg3ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsoxseg3ei8, __riscv_vsoxseg3ei8, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg4ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsoxseg4ei8, __riscv_vsoxseg4ei8, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg5ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsoxseg5ei8, __riscv_vsoxseg5ei8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg6ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsoxseg6ei8, __riscv_vsoxseg6ei8, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg7ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsoxseg7ei8, __riscv_vsoxseg7ei8, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg8ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsoxseg8ei8, __riscv_vsoxseg8ei8, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg2ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsoxseg2ei16, __riscv_vsoxseg2ei16, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg3ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsoxseg3ei16, __riscv_vsoxseg3ei16, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg4ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsoxseg4ei16, __riscv_vsoxseg4ei16, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg5ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsoxseg5ei16, __riscv_vsoxseg5ei16, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg6ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsoxseg6ei16, __riscv_vsoxseg6ei16, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg7ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsoxseg7ei16, __riscv_vsoxseg7ei16, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg8ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsoxseg8ei16, __riscv_vsoxseg8ei16, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg2ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsoxseg2ei32, __riscv_vsoxseg2ei32, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg3ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsoxseg3ei32, __riscv_vsoxseg3ei32, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg4ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsoxseg4ei32, __riscv_vsoxseg4ei32, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg5ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsoxseg5ei32, __riscv_vsoxseg5ei32, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg6ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsoxseg6ei32, __riscv_vsoxseg6ei32, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg7ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsoxseg7ei32, __riscv_vsoxseg7ei32, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg8ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsoxseg8ei32, __riscv_vsoxseg8ei32, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg2ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsoxseg2ei64, __riscv_vsoxseg2ei64, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg3ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsoxseg3ei64, __riscv_vsoxseg3ei64, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg4ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsoxseg4ei64, __riscv_vsoxseg4ei64, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg5ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsoxseg5ei64, __riscv_vsoxseg5ei64, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg6ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsoxseg6ei64, __riscv_vsoxseg6ei64, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg7ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsoxseg7ei64, __riscv_vsoxseg7ei64, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsoxseg8ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsoxseg8ei64, __riscv_vsoxseg8ei64, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg2ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsuxseg2ei8, __riscv_vsuxseg2ei8, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg3ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsuxseg3ei8, __riscv_vsuxseg3ei8, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg4ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsuxseg4ei8, __riscv_vsuxseg4ei8, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg5ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsuxseg5ei8, __riscv_vsuxseg5ei8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg6ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsuxseg6ei8, __riscv_vsuxseg6ei8, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg7ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsuxseg7ei8, __riscv_vsuxseg7ei8, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg8ei8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsuxseg8ei8, __riscv_vsuxseg8ei8, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg2ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsuxseg2ei16, __riscv_vsuxseg2ei16, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg3ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsuxseg3ei16, __riscv_vsuxseg3ei16, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg4ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsuxseg4ei16, __riscv_vsuxseg4ei16, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg5ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsuxseg5ei16, __riscv_vsuxseg5ei16, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg6ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsuxseg6ei16, __riscv_vsuxseg6ei16, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg7ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsuxseg7ei16, __riscv_vsuxseg7ei16, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg8ei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsuxseg8ei16, __riscv_vsuxseg8ei16, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg2ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsuxseg2ei32, __riscv_vsuxseg2ei32, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg3ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsuxseg3ei32, __riscv_vsuxseg3ei32, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg4ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsuxseg4ei32, __riscv_vsuxseg4ei32, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg5ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsuxseg5ei32, __riscv_vsuxseg5ei32, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg6ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsuxseg6ei32, __riscv_vsuxseg6ei32, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg7ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsuxseg7ei32, __riscv_vsuxseg7ei32, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg8ei32(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsuxseg8ei32, __riscv_vsuxseg8ei32, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg2ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, __riscv_vsuxseg2ei64, __riscv_vsuxseg2ei64, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg3ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, __riscv_vsuxseg3ei64, __riscv_vsuxseg3ei64, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg4ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, __riscv_vsuxseg4ei64, __riscv_vsuxseg4ei64, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg5ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, __riscv_vsuxseg5ei64, __riscv_vsuxseg5ei64, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg6ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, __riscv_vsuxseg6ei64, __riscv_vsuxseg6ei64, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg7ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, __riscv_vsuxseg7ei64, __riscv_vsuxseg7ei64, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vsuxseg8ei64(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, __riscv_vsuxseg8ei64, __riscv_vsuxseg8ei64, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)(__VA_ARGS__)
-#define vadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vadd_tumu, 4, __riscv_vadd, 2, 1)(__VA_ARGS__)
-#define vsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsub_tumu, 4, __riscv_vsub, 2, 1)(__VA_ARGS__)
-#define vrsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vrsub_tumu, 4, __riscv_vrsub, 2, 1)(__VA_ARGS__)
-#define vneg(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vneg_tumu, 3, __riscv_vneg, 1)(__VA_ARGS__)
-#define vwadd_vv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwadd_vv_tumu, 4, __riscv_vwadd_vv, 2, 1)(__VA_ARGS__)
-#define vwadd_vx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwadd_vx_tumu, 4, __riscv_vwadd_vx, 2, 1)(__VA_ARGS__)
-#define vwadd_wv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwadd_wv_tumu, 4, __riscv_vwadd_wv, 2, 1)(__VA_ARGS__)
-#define vwadd_wx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwadd_wx_tumu, 4, __riscv_vwadd_wx, 2, 1)(__VA_ARGS__)
-#define vwsub_vv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsub_vv_tumu, 4, __riscv_vwsub_vv, 2, 1)(__VA_ARGS__)
-#define vwsub_vx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsub_vx_tumu, 4, __riscv_vwsub_vx, 2, 1)(__VA_ARGS__)
-#define vwsub_wv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsub_wv_tumu, 4, __riscv_vwsub_wv, 2, 1)(__VA_ARGS__)
-#define vwsub_wx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsub_wx_tumu, 4, __riscv_vwsub_wx, 2, 1)(__VA_ARGS__)
-#define vwaddu_vv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwaddu_vv_tumu, 4, __riscv_vwaddu_vv, 2, 1)(__VA_ARGS__)
-#define vwaddu_vx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwaddu_vx_tumu, 4, __riscv_vwaddu_vx, 2, 1)(__VA_ARGS__)
-#define vwaddu_wv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwaddu_wv_tumu, 4, __riscv_vwaddu_wv, 2, 1)(__VA_ARGS__)
-#define vwaddu_wx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwaddu_wx_tumu, 4, __riscv_vwaddu_wx, 2, 1)(__VA_ARGS__)
-#define vwsubu_vv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsubu_vv_tumu, 4, __riscv_vwsubu_vv, 2, 1)(__VA_ARGS__)
-#define vwsubu_vx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsubu_vx_tumu, 4, __riscv_vwsubu_vx, 2, 1)(__VA_ARGS__)
-#define vwsubu_wv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsubu_wv_tumu, 4, __riscv_vwsubu_wv, 2, 1)(__VA_ARGS__)
-#define vwsubu_wx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwsubu_wx_tumu, 4, __riscv_vwsubu_wx, 2, 1)(__VA_ARGS__)
-#define vsext_vf2(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vsext_vf2_tumu, 3, __riscv_vsext_vf2, 1)(__VA_ARGS__)
-#define vsext_vf4(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vsext_vf4_tumu, 3, __riscv_vsext_vf4, 1)(__VA_ARGS__)
-#define vsext_vf8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vsext_vf8_tumu, 3, __riscv_vsext_vf8, 1)(__VA_ARGS__)
-#define vzext_vf2(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vzext_vf2_tumu, 3, __riscv_vzext_vf2, 1)(__VA_ARGS__)
-#define vzext_vf4(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vzext_vf4_tumu, 3, __riscv_vzext_vf4, 1)(__VA_ARGS__)
-#define vzext_vf8(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vzext_vf8_tumu, 3, __riscv_vzext_vf8, 1)(__VA_ARGS__)
-#define vadc(...) __riscv_vadc(__VA_ARGS__)
-#define vsbc(...) __riscv_vsbc(__VA_ARGS__)
-#define vmadc(...) __riscv_vmadc(__VA_ARGS__)
-#define vmsbc(...) __riscv_vmsbc(__VA_ARGS__)
-#define vand(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vand_tumu, 4, __riscv_vand, 2, 1)(__VA_ARGS__)
-#define vor(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vor_tumu, 4, __riscv_vor, 2, 1)(__VA_ARGS__)
-#define vxor(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vxor_tumu, 4, __riscv_vxor, 2, 1)(__VA_ARGS__)
-#define vnot(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vnot_tumu, 3, __riscv_vnot, 1)(__VA_ARGS__)
-#define vsll(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsll_tumu, 4, __riscv_vsll, 2, 1)(__VA_ARGS__)
-#define vsra(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsra_tumu, 4, __riscv_vsra, 2, 1)(__VA_ARGS__)
-#define vsrl(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsrl_tumu, 4, __riscv_vsrl, 2, 1)(__VA_ARGS__)
-#define vnsra(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vnsra_tumu, 4, __riscv_vnsra, 2, 1)(__VA_ARGS__)
-#define vnsrl(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vnsrl_tumu, 4, __riscv_vnsrl, 2, 1)(__VA_ARGS__)
-#define vmseq(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmseq_mu, 4, __riscv_vmseq, 2, 1)(__VA_ARGS__)
-#define vmsne(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsne_mu, 4, __riscv_vmsne, 2, 1)(__VA_ARGS__)
-#define vmslt(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmslt_mu, 4, __riscv_vmslt, 2, 1)(__VA_ARGS__)
-#define vmsle(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsle_mu, 4, __riscv_vmsle, 2, 1)(__VA_ARGS__)
-#define vmsgt(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsgt_mu, 4, __riscv_vmsgt, 2, 1)(__VA_ARGS__)
-#define vmsge(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsge_mu, 4, __riscv_vmsge, 2, 1)(__VA_ARGS__)
-#define vmsltu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsltu_mu, 4, __riscv_vmsltu, 2, 1)(__VA_ARGS__)
-#define vmsleu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsleu_mu, 4, __riscv_vmsleu, 2, 1)(__VA_ARGS__)
-#define vmsgtu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsgtu_mu, 4, __riscv_vmsgtu, 2, 1)(__VA_ARGS__)
-#define vmsgeu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmsgeu_mu, 4, __riscv_vmsgeu, 2, 1)(__VA_ARGS__)
-#define vmin(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmin_tumu, 4, __riscv_vmin, 2, 1)(__VA_ARGS__)
-#define vmax(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmax_tumu, 4, __riscv_vmax, 2, 1)(__VA_ARGS__)
-#define vminu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vminu_tumu, 4, __riscv_vminu, 2, 1)(__VA_ARGS__)
-#define vmaxu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmaxu_tumu, 4, __riscv_vmaxu, 2, 1)(__VA_ARGS__)
-#define vmul(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmul_tumu, 4, __riscv_vmul, 2, 1)(__VA_ARGS__)
-#define vmulh(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmulh_tumu, 4, __riscv_vmulh, 2, 1)(__VA_ARGS__)
-#define vmulhsu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmulhsu_tumu, 4, __riscv_vmulhsu, 2, 1)(__VA_ARGS__)
-#define vmulhu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmulhu_tumu, 4, __riscv_vmulhu, 2, 1)(__VA_ARGS__)
-#define vdiv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vdiv_tumu, 4, __riscv_vdiv, 2, 1)(__VA_ARGS__)
-#define vrem(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vrem_tumu, 4, __riscv_vrem, 2, 1)(__VA_ARGS__)
-#define vdivu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vdivu_tumu, 4, __riscv_vdivu, 2, 1)(__VA_ARGS__)
-#define vremu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vremu_tumu, 4, __riscv_vremu, 2, 1)(__VA_ARGS__)
-#define vwmul(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmul_tumu, 4, __riscv_vwmul, 2, 1)(__VA_ARGS__)
-#define vwmulsu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmulsu_tumu, 4, __riscv_vwmulsu, 2, 1)(__VA_ARGS__)
-#define vwmulu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmulu_tumu, 4, __riscv_vwmulu, 2, 1)(__VA_ARGS__)
-#define vmacc(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmacc_tumu, __riscv_vmacc_tu, 3, 2, 1)(__VA_ARGS__)
-#define vnmsac(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vnmsac_tumu, __riscv_vnmsac_tu, 3, 2, 1)(__VA_ARGS__)
-#define vmadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmadd_tumu, __riscv_vmadd_tu, 3, 2, 1)(__VA_ARGS__)
-#define vnmsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vnmsub_tumu, __riscv_vnmsub_tu, 3, 2, 1)(__VA_ARGS__)
-#define vwmacc(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmacc_tumu, __riscv_vwmacc_tu, 3, 2, 1)(__VA_ARGS__)
-#define vwmaccsu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmaccsu_tumu, __riscv_vwmaccsu_tu, 3, 2, 1)(__VA_ARGS__)
-#define vwmaccus(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmaccus_tumu, __riscv_vwmaccus_tu, 3, 2, 1)(__VA_ARGS__)
-#define vwmaccu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwmaccu_tumu, __riscv_vwmaccu_tu, 3, 2, 1)(__VA_ARGS__)
-#define vmv_v(...) __riscv_vmv_v(__VA_ARGS__)
-#define vsadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsadd_tumu, 4, __riscv_vsadd, 2, 1)(__VA_ARGS__)
-#define vssub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vssub_tumu, 4, __riscv_vssub, 2, 1)(__VA_ARGS__)
-#define vsaddu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsaddu_tumu, 4, __riscv_vsaddu, 2, 1)(__VA_ARGS__)
-#define vssubu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vssubu_tumu, 4, __riscv_vssubu, 2, 1)(__VA_ARGS__)
-#define vaadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vaadd_tumu, 4, __riscv_vaadd, 2, 1)(__VA_ARGS__)
-#define vasub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vasub_tumu, 4, __riscv_vasub, 2, 1)(__VA_ARGS__)
-#define vaaddu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vaaddu_tumu, 4, __riscv_vaaddu, 2, 1)(__VA_ARGS__)
-#define vasubu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vasubu_tumu, 4, __riscv_vasubu, 2, 1)(__VA_ARGS__)
-#define vsmul(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vsmul_mu, 4, __riscv_vsmul, 2, 1)(__VA_ARGS__)
-#define vssra(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vssra_tumu, 4, __riscv_vssra, 2, 1)(__VA_ARGS__)
-#define vssrl(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vssrl_tumu, 4, __riscv_vssrl, 2, 1)(__VA_ARGS__)
-#define vnclip(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vnclip_tumu, 4, __riscv_vnclip, 2, 1)(__VA_ARGS__)
-#define vnclipu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vnclipu_tumu, 4, __riscv_vnclipu, 2, 1)(__VA_ARGS__)
-#define vfadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfadd_tumu, 4, __riscv_vfadd, 2, 1)(__VA_ARGS__)
-#define vfsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfsub_tumu, 4, __riscv_vfsub, 2, 1)(__VA_ARGS__)
-#define vfrsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfrsub_tumu, 4, __riscv_vfrsub, 2, 1)(__VA_ARGS__)
-#define vfneg(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfneg_tumu, 3, __riscv_vfneg, 1)(__VA_ARGS__)
-#define vfwadd_vv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwadd_vv_tumu, 4, __riscv_vfwadd_vv, 2, 1)(__VA_ARGS__)
-#define vfwadd_vf(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwadd_vf_tumu, 4, __riscv_vfwadd_vf, 2, 1)(__VA_ARGS__)
-#define vfwadd_wv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwadd_wv_tumu, 4, __riscv_vfwadd_wv, 2, 1)(__VA_ARGS__)
-#define vfwadd_wf(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwadd_wf_tumu, 4, __riscv_vfwadd_wf, 2, 1)(__VA_ARGS__)
-#define vfwsub_vv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwsub_vv_tumu, 4, __riscv_vfwsub_vv, 2, 1)(__VA_ARGS__)
-#define vfwsub_vf(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwsub_vf_tumu, 4, __riscv_vfwsub_vf, 2, 1)(__VA_ARGS__)
-#define vfwsub_wv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwsub_wv_tumu, 4, __riscv_vfwsub_wv, 2, 1)(__VA_ARGS__)
-#define vfwsub_wf(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwsub_wf_tumu, 4, __riscv_vfwsub_wf, 2, 1)(__VA_ARGS__)
-#define vfmul(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmul_tumu, 4, __riscv_vfmul, 2, 1)(__VA_ARGS__)
-#define vfdiv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfdiv_tumu, 4, __riscv_vfdiv, 2, 1)(__VA_ARGS__)
-#define vfrdiv(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfrdiv_tumu, 4, __riscv_vfrdiv, 2, 1)(__VA_ARGS__)
-#define vfwmul(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwmul_tumu, 4, __riscv_vfwmul, 2, 1)(__VA_ARGS__)
-#define vfmacc(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmacc_tumu, __riscv_vfmacc_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfnmacc(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfnmacc_tumu, __riscv_vfnmacc_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfmsac(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmsac_tumu, __riscv_vfmsac_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfnmsac(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfnmsac_tumu, __riscv_vfnmsac_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfmadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmadd_tumu, __riscv_vfmadd_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfnmadd(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfnmadd_tumu, __riscv_vfnmadd_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfmsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmsub_tumu, __riscv_vfmsub_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfnmsub(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfnmsub_tumu, __riscv_vfnmsub_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfwmacc(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwmacc_tumu, __riscv_vfwmacc_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfwnmacc(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwnmacc_tumu, __riscv_vfwnmacc_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfwmsac(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwmsac_tumu, __riscv_vfwmsac_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfwnmsac(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwnmsac_tumu, __riscv_vfwnmsac_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfsqrt(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfsqrt_tumu, 3, __riscv_vfsqrt, 1)(__VA_ARGS__)
-#define vfrsqrt7(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfrsqrt7_tumu, 3, __riscv_vfrsqrt7, 1)(__VA_ARGS__)
-#define vfrec7(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfrec7_tumu, 3, __riscv_vfrec7, 1)(__VA_ARGS__)
-#define vfmin(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmin_tumu, 4, __riscv_vfmin, 2, 1)(__VA_ARGS__)
-#define vfmax(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfmax_tumu, 4, __riscv_vfmax, 2, 1)(__VA_ARGS__)
-#define vfsgnj(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfsgnj_tumu, 4, __riscv_vfsgnj, 2, 1)(__VA_ARGS__)
-#define vfsgnjn(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfsgnjn_tumu, 4, __riscv_vfsgnjn, 2, 1)(__VA_ARGS__)
-#define vfsgnjx(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfsgnjx_tumu, 4, __riscv_vfsgnjx, 2, 1)(__VA_ARGS__)
-#define vfabs(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfabs_tumu, 3, __riscv_vfabs, 1)(__VA_ARGS__)
-#define vmfeq(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmfeq_mu, 4, __riscv_vmfeq, 2, 1)(__VA_ARGS__)
-#define vmfne(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmfne_mu, 4, __riscv_vmfne, 2, 1)(__VA_ARGS__)
-#define vmflt(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmflt_mu, 4, __riscv_vmflt, 2, 1)(__VA_ARGS__)
-#define vmfle(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmfle_mu, 4, __riscv_vmfle, 2, 1)(__VA_ARGS__)
-#define vmfgt(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmfgt_mu, 4, __riscv_vmfgt, 2, 1)(__VA_ARGS__)
-#define vmfge(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vmfge_mu, 4, __riscv_vmfge, 2, 1)(__VA_ARGS__)
-#define vfclass(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfclass_tumu, 3, __riscv_vfclass, 1)(__VA_ARGS__)
-#define vfcvt_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfcvt_x_tumu, 3, __riscv_vfcvt_x, 1)(__VA_ARGS__)
-#define vfcvt_rtz_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfcvt_rtz_x_tumu, 3, __riscv_vfcvt_rtz_x, 1)(__VA_ARGS__)
-#define vfcvt_xu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfcvt_xu_tumu, 3, __riscv_vfcvt_xu, 1)(__VA_ARGS__)
-#define vfcvt_rtz_xu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfcvt_rtz_xu_tumu, 3, __riscv_vfcvt_rtz_xu, 1)(__VA_ARGS__)
-#define vfcvt_f(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfcvt_f_tumu, 3, __riscv_vfcvt_f, 1)(__VA_ARGS__)
-#define vwcvt_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vwcvt_x_tumu, 3, __riscv_vwcvt_x, 1)(__VA_ARGS__)
-#define vwcvtu_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vwcvtu_x_tumu, 3, __riscv_vwcvtu_x, 1)(__VA_ARGS__)
-#define vfwcvt_f(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfwcvt_f_tumu, 3, __riscv_vfwcvt_f, 1)(__VA_ARGS__)
-#define vfwcvt_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfwcvt_x_tumu, 3, __riscv_vfwcvt_x, 1)(__VA_ARGS__)
-#define vfwcvt_rtz_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfwcvt_rtz_x_tumu, 3, __riscv_vfwcvt_rtz_x, 1)(__VA_ARGS__)
-#define vfwcvt_xu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfwcvt_xu_tumu, 3, __riscv_vfwcvt_xu, 1)(__VA_ARGS__)
-#define vfwcvt_rtz_xu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfwcvt_rtz_xu_tumu, 3, __riscv_vfwcvt_rtz_xu, 1)(__VA_ARGS__)
-#define vfncvt_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfncvt_x_tumu, 3, __riscv_vfncvt_x, 1)(__VA_ARGS__)
-#define vfncvt_rtz_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfncvt_rtz_x_tumu, 3, __riscv_vfncvt_rtz_x, 1)(__VA_ARGS__)
-#define vncvt_x(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vncvt_x_tumu, 3, __riscv_vncvt_x, 1)(__VA_ARGS__)
-#define vfncvt_xu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfncvt_xu_tumu, 3, __riscv_vfncvt_xu, 1)(__VA_ARGS__)
-#define vfncvt_rtz_xu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfncvt_rtz_xu_tumu, 3, __riscv_vfncvt_rtz_xu, 1)(__VA_ARGS__)
-#define vfncvt_f(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfncvt_f_tumu, 3, __riscv_vfncvt_f, 1)(__VA_ARGS__)
-#define vfncvt_rod_f(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vfncvt_rod_f_tumu, 3, __riscv_vfncvt_rod_f, 1)(__VA_ARGS__)
-#define vredsum(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredsum_tum, __riscv_vredsum_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredmax(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredmax_tum, __riscv_vredmax_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredmin(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredmin_tum, __riscv_vredmin_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredand(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredand_tum, __riscv_vredand_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredor(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredor_tum, __riscv_vredor_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredxor(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredxor_tum, __riscv_vredxor_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredmaxu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredmaxu_tum, __riscv_vredmaxu_tu, 3, 2, 1)(__VA_ARGS__)
-#define vredminu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vredminu_tum, __riscv_vredminu_tu, 3, 2, 1)(__VA_ARGS__)
-#define vwredsum(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwredsum_tum, __riscv_vwredsum_tu, 3, 2, 1)(__VA_ARGS__)
-#define vwredsumu(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vwredsumu_tum, __riscv_vwredsumu_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfredosum(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfredosum_tum, __riscv_vfredosum_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfredusum(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfredusum_tum, __riscv_vfredusum_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfredmax(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfredmax_tum, __riscv_vfredmax_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfredmin(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfredmin_tum, __riscv_vfredmin_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfwredosum(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwredosum_tum, __riscv_vfwredosum_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfwredusum(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfwredusum_tum, __riscv_vfwredusum_tu, 3, 2, 1)(__VA_ARGS__)
-#define vsm(...) __riscv_vsm(__VA_ARGS__)
-#define vmand(...) __riscv_vmand(__VA_ARGS__)
-#define vmnand(...) __riscv_vmnand(__VA_ARGS__)
-#define vmandn(...) __riscv_vmandn(__VA_ARGS__)
-#define vmxor(...) __riscv_vmxor(__VA_ARGS__)
-#define vmor(...) __riscv_vmor(__VA_ARGS__)
-#define vmnor(...) __riscv_vmnor(__VA_ARGS__)
-#define vmorn(...) __riscv_vmorn(__VA_ARGS__)
-#define vmxnor(...) __riscv_vmxnor(__VA_ARGS__)
-#define vmmv(...) __riscv_vmmv(__VA_ARGS__)
-#define vmnot(...) __riscv_vmnot(__VA_ARGS__)
-#define vcpop(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, __riscv_vcpop, __riscv_vcpop, 1)(__VA_ARGS__)
-#define vfirst(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, __riscv_vfirst, __riscv_vfirst, 1)(__VA_ARGS__)
-#define vmsbf(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vmsbf_mu, 3, __riscv_vmsbf, 1)(__VA_ARGS__)
-#define vmsif(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vmsif_mu, 3, __riscv_vmsif, 1)(__VA_ARGS__)
-#define vmsof(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, __riscv_vmsof_mu, 3, __riscv_vmsof, 1)(__VA_ARGS__)
-#define vfmv_f(...) __riscv_vfmv_f(__VA_ARGS__)
-#define vfmv_s(...) __riscv_vfmv_s_tu(__VA_ARGS__)
-#define vmv_x(...) __riscv_vmv_x(__VA_ARGS__)
-#define vmv_s(...) __riscv_vmv_s_tu(__VA_ARGS__)
-#define vslideup(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vslideup_tumu, __riscv_vslideup_tu, 3, 2, 1)(__VA_ARGS__)
-#define vslidedown(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vslidedown_tumu, __riscv_vslidedown_tu, 3, 2, 1)(__VA_ARGS__)
-#define vfslide1up(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfslide1up_tumu, 4, __riscv_vfslide1up, 2, 1)(__VA_ARGS__)
-#define vfslide1down(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vfslide1down_tumu, 4, __riscv_vfslide1down, 2, 1)(__VA_ARGS__)
-#define vslide1up(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vslide1up_tumu, 4, __riscv_vslide1up, 2, 1)(__VA_ARGS__)
-#define vslide1down(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vslide1down_tumu, 4, __riscv_vslide1down, 2, 1)(__VA_ARGS__)
-#define vrgather(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vrgather_tumu, 4, __riscv_vrgather, 2, 1)(__VA_ARGS__)
-#define vrgatherei16(...) _GET_OVERRIDE(__VA_ARGS__, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, __riscv_vrgatherei16_tumu, 4, __riscv_vrgatherei16, 2, 1)(__VA_ARGS__)
-#define vreinterpret_u8mf8(...) __riscv_vreinterpret_u8mf8(__VA_ARGS__)
-#define vreinterpret_u8mf4(...) __riscv_vreinterpret_u8mf4(__VA_ARGS__)
-#define vreinterpret_u8mf2(...) __riscv_vreinterpret_u8mf2(__VA_ARGS__)
-#define vreinterpret_u8m1(...) __riscv_vreinterpret_u8m1(__VA_ARGS__)
-#define vreinterpret_u8m2(...) __riscv_vreinterpret_u8m2(__VA_ARGS__)
-#define vreinterpret_u8m4(...) __riscv_vreinterpret_u8m4(__VA_ARGS__)
-#define vreinterpret_u8m8(...) __riscv_vreinterpret_u8m8(__VA_ARGS__)
-#define vreinterpret_i8mf8(...) __riscv_vreinterpret_i8mf8(__VA_ARGS__)
-#define vreinterpret_i8mf4(...) __riscv_vreinterpret_i8mf4(__VA_ARGS__)
-#define vreinterpret_i8mf2(...) __riscv_vreinterpret_i8mf2(__VA_ARGS__)
-#define vreinterpret_i8m1(...) __riscv_vreinterpret_i8m1(__VA_ARGS__)
-#define vreinterpret_i8m2(...) __riscv_vreinterpret_i8m2(__VA_ARGS__)
-#define vreinterpret_i8m4(...) __riscv_vreinterpret_i8m4(__VA_ARGS__)
-#define vreinterpret_i8m8(...) __riscv_vreinterpret_i8m8(__VA_ARGS__)
-#define vreinterpret_f16mf4(...) __riscv_vreinterpret_f16mf4(__VA_ARGS__)
-#define vreinterpret_f16mf2(...) __riscv_vreinterpret_f16mf2(__VA_ARGS__)
-#define vreinterpret_f16m1(...) __riscv_vreinterpret_f16m1(__VA_ARGS__)
-#define vreinterpret_f16m2(...) __riscv_vreinterpret_f16m2(__VA_ARGS__)
-#define vreinterpret_f16m4(...) __riscv_vreinterpret_f16m4(__VA_ARGS__)
-#define vreinterpret_f16m8(...) __riscv_vreinterpret_f16m8(__VA_ARGS__)
-#define vreinterpret_u16mf4(...) __riscv_vreinterpret_u16mf4(__VA_ARGS__)
-#define vreinterpret_u16mf2(...) __riscv_vreinterpret_u16mf2(__VA_ARGS__)
-#define vreinterpret_u16m1(...) __riscv_vreinterpret_u16m1(__VA_ARGS__)
-#define vreinterpret_u16m2(...) __riscv_vreinterpret_u16m2(__VA_ARGS__)
-#define vreinterpret_u16m4(...) __riscv_vreinterpret_u16m4(__VA_ARGS__)
-#define vreinterpret_u16m8(...) __riscv_vreinterpret_u16m8(__VA_ARGS__)
-#define vreinterpret_i16mf4(...) __riscv_vreinterpret_i16mf4(__VA_ARGS__)
-#define vreinterpret_i16mf2(...) __riscv_vreinterpret_i16mf2(__VA_ARGS__)
-#define vreinterpret_i16m1(...) __riscv_vreinterpret_i16m1(__VA_ARGS__)
-#define vreinterpret_i16m2(...) __riscv_vreinterpret_i16m2(__VA_ARGS__)
-#define vreinterpret_i16m4(...) __riscv_vreinterpret_i16m4(__VA_ARGS__)
-#define vreinterpret_i16m8(...) __riscv_vreinterpret_i16m8(__VA_ARGS__)
-#define vreinterpret_f32mf2(...) __riscv_vreinterpret_f32mf2(__VA_ARGS__)
-#define vreinterpret_f32m1(...) __riscv_vreinterpret_f32m1(__VA_ARGS__)
-#define vreinterpret_f32m2(...) __riscv_vreinterpret_f32m2(__VA_ARGS__)
-#define vreinterpret_f32m4(...) __riscv_vreinterpret_f32m4(__VA_ARGS__)
-#define vreinterpret_f32m8(...) __riscv_vreinterpret_f32m8(__VA_ARGS__)
-#define vreinterpret_u32mf2(...) __riscv_vreinterpret_u32mf2(__VA_ARGS__)
-#define vreinterpret_u32m1(...) __riscv_vreinterpret_u32m1(__VA_ARGS__)
-#define vreinterpret_u32m2(...) __riscv_vreinterpret_u32m2(__VA_ARGS__)
-#define vreinterpret_u32m4(...) __riscv_vreinterpret_u32m4(__VA_ARGS__)
-#define vreinterpret_u32m8(...) __riscv_vreinterpret_u32m8(__VA_ARGS__)
-#define vreinterpret_i32mf2(...) __riscv_vreinterpret_i32mf2(__VA_ARGS__)
-#define vreinterpret_i32m1(...) __riscv_vreinterpret_i32m1(__VA_ARGS__)
-#define vreinterpret_i32m2(...) __riscv_vreinterpret_i32m2(__VA_ARGS__)
-#define vreinterpret_i32m4(...) __riscv_vreinterpret_i32m4(__VA_ARGS__)
-#define vreinterpret_i32m8(...) __riscv_vreinterpret_i32m8(__VA_ARGS__)
-#define vreinterpret_f64m1(...) __riscv_vreinterpret_f64m1(__VA_ARGS__)
-#define vreinterpret_f64m2(...) __riscv_vreinterpret_f64m2(__VA_ARGS__)
-#define vreinterpret_f64m4(...) __riscv_vreinterpret_f64m4(__VA_ARGS__)
-#define vreinterpret_f64m8(...) __riscv_vreinterpret_f64m8(__VA_ARGS__)
-#define vreinterpret_u64m1(...) __riscv_vreinterpret_u64m1(__VA_ARGS__)
-#define vreinterpret_u64m2(...) __riscv_vreinterpret_u64m2(__VA_ARGS__)
-#define vreinterpret_u64m4(...) __riscv_vreinterpret_u64m4(__VA_ARGS__)
-#define vreinterpret_u64m8(...) __riscv_vreinterpret_u64m8(__VA_ARGS__)
-#define vreinterpret_i64m1(...) __riscv_vreinterpret_i64m1(__VA_ARGS__)
-#define vreinterpret_i64m2(...) __riscv_vreinterpret_i64m2(__VA_ARGS__)
-#define vreinterpret_i64m4(...) __riscv_vreinterpret_i64m4(__VA_ARGS__)
-#define vreinterpret_i64m8(...) __riscv_vreinterpret_i64m8(__VA_ARGS__)
-#define vlmul_ext_f16mf2(...) __riscv_vlmul_ext_f16mf2(__VA_ARGS__)
-#define vlmul_ext_f16m1(...) __riscv_vlmul_ext_f16m1(__VA_ARGS__)
-#define vlmul_ext_f16m2(...) __riscv_vlmul_ext_f16m2(__VA_ARGS__)
-#define vlmul_ext_f16m4(...) __riscv_vlmul_ext_f16m4(__VA_ARGS__)
-#define vlmul_ext_f16m8(...) __riscv_vlmul_ext_f16m8(__VA_ARGS__)
-#define vlmul_ext_f32m1(...) __riscv_vlmul_ext_f32m1(__VA_ARGS__)
-#define vlmul_ext_f32m2(...) __riscv_vlmul_ext_f32m2(__VA_ARGS__)
-#define vlmul_ext_f32m4(...) __riscv_vlmul_ext_f32m4(__VA_ARGS__)
-#define vlmul_ext_f32m8(...) __riscv_vlmul_ext_f32m8(__VA_ARGS__)
-#define vlmul_ext_f64m2(...) __riscv_vlmul_ext_f64m2(__VA_ARGS__)
-#define vlmul_ext_f64m4(...) __riscv_vlmul_ext_f64m4(__VA_ARGS__)
-#define vlmul_ext_f64m8(...) __riscv_vlmul_ext_f64m8(__VA_ARGS__)
-#define vlmul_ext_i8mf4(...) __riscv_vlmul_ext_i8mf4(__VA_ARGS__)
-#define vlmul_ext_i8mf2(...) __riscv_vlmul_ext_i8mf2(__VA_ARGS__)
-#define vlmul_ext_i8m1(...) __riscv_vlmul_ext_i8m1(__VA_ARGS__)
-#define vlmul_ext_i8m2(...) __riscv_vlmul_ext_i8m2(__VA_ARGS__)
-#define vlmul_ext_i8m4(...) __riscv_vlmul_ext_i8m4(__VA_ARGS__)
-#define vlmul_ext_i8m8(...) __riscv_vlmul_ext_i8m8(__VA_ARGS__)
-#define vlmul_ext_i16mf2(...) __riscv_vlmul_ext_i16mf2(__VA_ARGS__)
-#define vlmul_ext_i16m1(...) __riscv_vlmul_ext_i16m1(__VA_ARGS__)
-#define vlmul_ext_i16m2(...) __riscv_vlmul_ext_i16m2(__VA_ARGS__)
-#define vlmul_ext_i16m4(...) __riscv_vlmul_ext_i16m4(__VA_ARGS__)
-#define vlmul_ext_i16m8(...) __riscv_vlmul_ext_i16m8(__VA_ARGS__)
-#define vlmul_ext_i32m1(...) __riscv_vlmul_ext_i32m1(__VA_ARGS__)
-#define vlmul_ext_i32m2(...) __riscv_vlmul_ext_i32m2(__VA_ARGS__)
-#define vlmul_ext_i32m4(...) __riscv_vlmul_ext_i32m4(__VA_ARGS__)
-#define vlmul_ext_i32m8(...) __riscv_vlmul_ext_i32m8(__VA_ARGS__)
-#define vlmul_ext_i64m2(...) __riscv_vlmul_ext_i64m2(__VA_ARGS__)
-#define vlmul_ext_i64m4(...) __riscv_vlmul_ext_i64m4(__VA_ARGS__)
-#define vlmul_ext_i64m8(...) __riscv_vlmul_ext_i64m8(__VA_ARGS__)
-#define vlmul_ext_u8mf4(...) __riscv_vlmul_ext_u8mf4(__VA_ARGS__)
-#define vlmul_ext_u8mf2(...) __riscv_vlmul_ext_u8mf2(__VA_ARGS__)
-#define vlmul_ext_u8m1(...) __riscv_vlmul_ext_u8m1(__VA_ARGS__)
-#define vlmul_ext_u8m2(...) __riscv_vlmul_ext_u8m2(__VA_ARGS__)
-#define vlmul_ext_u8m4(...) __riscv_vlmul_ext_u8m4(__VA_ARGS__)
-#define vlmul_ext_u8m8(...) __riscv_vlmul_ext_u8m8(__VA_ARGS__)
-#define vlmul_ext_u16mf2(...) __riscv_vlmul_ext_u16mf2(__VA_ARGS__)
-#define vlmul_ext_u16m1(...) __riscv_vlmul_ext_u16m1(__VA_ARGS__)
-#define vlmul_ext_u16m2(...) __riscv_vlmul_ext_u16m2(__VA_ARGS__)
-#define vlmul_ext_u16m4(...) __riscv_vlmul_ext_u16m4(__VA_ARGS__)
-#define vlmul_ext_u16m8(...) __riscv_vlmul_ext_u16m8(__VA_ARGS__)
-#define vlmul_ext_u32m1(...) __riscv_vlmul_ext_u32m1(__VA_ARGS__)
-#define vlmul_ext_u32m2(...) __riscv_vlmul_ext_u32m2(__VA_ARGS__)
-#define vlmul_ext_u32m4(...) __riscv_vlmul_ext_u32m4(__VA_ARGS__)
-#define vlmul_ext_u32m8(...) __riscv_vlmul_ext_u32m8(__VA_ARGS__)
-#define vlmul_ext_u64m2(...) __riscv_vlmul_ext_u64m2(__VA_ARGS__)
-#define vlmul_ext_u64m4(...) __riscv_vlmul_ext_u64m4(__VA_ARGS__)
-#define vlmul_ext_u64m8(...) __riscv_vlmul_ext_u64m8(__VA_ARGS__)
-#define vlmul_trunc_f16mf4(...) __riscv_vlmul_trunc_f16mf4(__VA_ARGS__)
-#define vlmul_trunc_f16mf2(...) __riscv_vlmul_trunc_f16mf2(__VA_ARGS__)
-#define vlmul_trunc_f16m1(...) __riscv_vlmul_trunc_f16m1(__VA_ARGS__)
-#define vlmul_trunc_f16m2(...) __riscv_vlmul_trunc_f16m2(__VA_ARGS__)
-#define vlmul_trunc_f16m4(...) __riscv_vlmul_trunc_f16m4(__VA_ARGS__)
-#define vlmul_trunc_f32mf2(...) __riscv_vlmul_trunc_f32mf2(__VA_ARGS__)
-#define vlmul_trunc_f32m1(...) __riscv_vlmul_trunc_f32m1(__VA_ARGS__)
-#define vlmul_trunc_f32m2(...) __riscv_vlmul_trunc_f32m2(__VA_ARGS__)
-#define vlmul_trunc_f32m4(...) __riscv_vlmul_trunc_f32m4(__VA_ARGS__)
-#define vlmul_trunc_f64m1(...) __riscv_vlmul_trunc_f64m1(__VA_ARGS__)
-#define vlmul_trunc_f64m2(...) __riscv_vlmul_trunc_f64m2(__VA_ARGS__)
-#define vlmul_trunc_f64m4(...) __riscv_vlmul_trunc_f64m4(__VA_ARGS__)
-#define vlmul_trunc_i8mf8(...) __riscv_vlmul_trunc_i8mf8(__VA_ARGS__)
-#define vlmul_trunc_i8mf4(...) __riscv_vlmul_trunc_i8mf4(__VA_ARGS__)
-#define vlmul_trunc_i8mf2(...) __riscv_vlmul_trunc_i8mf2(__VA_ARGS__)
-#define vlmul_trunc_i8m1(...) __riscv_vlmul_trunc_i8m1(__VA_ARGS__)
-#define vlmul_trunc_i8m2(...) __riscv_vlmul_trunc_i8m2(__VA_ARGS__)
-#define vlmul_trunc_i8m4(...) __riscv_vlmul_trunc_i8m4(__VA_ARGS__)
-#define vlmul_trunc_i16mf4(...) __riscv_vlmul_trunc_i16mf4(__VA_ARGS__)
-#define vlmul_trunc_i16mf2(...) __riscv_vlmul_trunc_i16mf2(__VA_ARGS__)
-#define vlmul_trunc_i16m1(...) __riscv_vlmul_trunc_i16m1(__VA_ARGS__)
-#define vlmul_trunc_i16m2(...) __riscv_vlmul_trunc_i16m2(__VA_ARGS__)
-#define vlmul_trunc_i16m4(...) __riscv_vlmul_trunc_i16m4(__VA_ARGS__)
-#define vlmul_trunc_i32mf2(...) __riscv_vlmul_trunc_i32mf2(__VA_ARGS__)
-#define vlmul_trunc_i32m1(...) __riscv_vlmul_trunc_i32m1(__VA_ARGS__)
-#define vlmul_trunc_i32m2(...) __riscv_vlmul_trunc_i32m2(__VA_ARGS__)
-#define vlmul_trunc_i32m4(...) __riscv_vlmul_trunc_i32m4(__VA_ARGS__)
-#define vlmul_trunc_i64m1(...) __riscv_vlmul_trunc_i64m1(__VA_ARGS__)
-#define vlmul_trunc_i64m2(...) __riscv_vlmul_trunc_i64m2(__VA_ARGS__)
-#define vlmul_trunc_i64m4(...) __riscv_vlmul_trunc_i64m4(__VA_ARGS__)
-#define vlmul_trunc_u8mf8(...) __riscv_vlmul_trunc_u8mf8(__VA_ARGS__)
-#define vlmul_trunc_u8mf4(...) __riscv_vlmul_trunc_u8mf4(__VA_ARGS__)
-#define vlmul_trunc_u8mf2(...) __riscv_vlmul_trunc_u8mf2(__VA_ARGS__)
-#define vlmul_trunc_u8m1(...) __riscv_vlmul_trunc_u8m1(__VA_ARGS__)
-#define vlmul_trunc_u8m2(...) __riscv_vlmul_trunc_u8m2(__VA_ARGS__)
-#define vlmul_trunc_u8m4(...) __riscv_vlmul_trunc_u8m4(__VA_ARGS__)
-#define vlmul_trunc_u16mf4(...) __riscv_vlmul_trunc_u16mf4(__VA_ARGS__)
-#define vlmul_trunc_u16mf2(...) __riscv_vlmul_trunc_u16mf2(__VA_ARGS__)
-#define vlmul_trunc_u16m1(...) __riscv_vlmul_trunc_u16m1(__VA_ARGS__)
-#define vlmul_trunc_u16m2(...) __riscv_vlmul_trunc_u16m2(__VA_ARGS__)
-#define vlmul_trunc_u16m4(...) __riscv_vlmul_trunc_u16m4(__VA_ARGS__)
-#define vlmul_trunc_u32mf2(...) __riscv_vlmul_trunc_u32mf2(__VA_ARGS__)
-#define vlmul_trunc_u32m1(...) __riscv_vlmul_trunc_u32m1(__VA_ARGS__)
-#define vlmul_trunc_u32m2(...) __riscv_vlmul_trunc_u32m2(__VA_ARGS__)
-#define vlmul_trunc_u32m4(...) __riscv_vlmul_trunc_u32m4(__VA_ARGS__)
-#define vlmul_trunc_u64m1(...) __riscv_vlmul_trunc_u64m1(__VA_ARGS__)
-#define vlmul_trunc_u64m2(...) __riscv_vlmul_trunc_u64m2(__VA_ARGS__)
-#define vlmul_trunc_u64m4(...) __riscv_vlmul_trunc_u64m4(__VA_ARGS__)
-#define vset(...) __riscv_vset(__VA_ARGS__)
-#define vget_f16m1(...) __riscv_vget_f16m1(__VA_ARGS__)
-#define vget_f16m2(...) __riscv_vget_f16m2(__VA_ARGS__)
-#define vget_f16m4(...) __riscv_vget_f16m4(__VA_ARGS__)
-#define vget_f32m1(...) __riscv_vget_f32m1(__VA_ARGS__)
-#define vget_f32m2(...) __riscv_vget_f32m2(__VA_ARGS__)
-#define vget_f32m4(...) __riscv_vget_f32m4(__VA_ARGS__)
-#define vget_f64m1(...) __riscv_vget_f64m1(__VA_ARGS__)
-#define vget_f64m2(...) __riscv_vget_f64m2(__VA_ARGS__)
-#define vget_f64m4(...) __riscv_vget_f64m4(__VA_ARGS__)
-#define vget_i8m1(...) __riscv_vget_i8m1(__VA_ARGS__)
-#define vget_i8m2(...) __riscv_vget_i8m2(__VA_ARGS__)
-#define vget_i8m4(...) __riscv_vget_i8m4(__VA_ARGS__)
-#define vget_i16m1(...) __riscv_vget_i16m1(__VA_ARGS__)
-#define vget_i16m2(...) __riscv_vget_i16m2(__VA_ARGS__)
-#define vget_i16m4(...) __riscv_vget_i16m4(__VA_ARGS__)
-#define vget_i32m1(...) __riscv_vget_i32m1(__VA_ARGS__)
-#define vget_i32m2(...) __riscv_vget_i32m2(__VA_ARGS__)
-#define vget_i32m4(...) __riscv_vget_i32m4(__VA_ARGS__)
-#define vget_i64m1(...) __riscv_vget_i64m1(__VA_ARGS__)
-#define vget_i64m2(...) __riscv_vget_i64m2(__VA_ARGS__)
-#define vget_i64m4(...) __riscv_vget_i64m4(__VA_ARGS__)
-#define vget_u8m1(...) __riscv_vget_u8m1(__VA_ARGS__)
-#define vget_u8m2(...) __riscv_vget_u8m2(__VA_ARGS__)
-#define vget_u8m4(...) __riscv_vget_u8m4(__VA_ARGS__)
-#define vget_u16m1(...) __riscv_vget_u16m1(__VA_ARGS__)
-#define vget_u16m2(...) __riscv_vget_u16m2(__VA_ARGS__)
-#define vget_u16m4(...) __riscv_vget_u16m4(__VA_ARGS__)
-#define vget_u32m1(...) __riscv_vget_u32m1(__VA_ARGS__)
-#define vget_u32m2(...) __riscv_vget_u32m2(__VA_ARGS__)
-#define vget_u32m4(...) __riscv_vget_u32m4(__VA_ARGS__)
-#define vget_u64m1(...) __riscv_vget_u64m1(__VA_ARGS__)
-#define vget_u64m2(...) __riscv_vget_u64m2(__VA_ARGS__)
-#define vget_u64m4(...) __riscv_vget_u64m4(__VA_ARGS__)
-#define vle16(...) __riscv_vle16_tumu(__VA_ARGS__)
-#define vle32(...) __riscv_vle32_tumu(__VA_ARGS__)
-#define vle64(...) __riscv_vle64_tumu(__VA_ARGS__)
-#define vle8(...) __riscv_vle8_tumu(__VA_ARGS__)
-#define vlse16(...) __riscv_vlse16_tumu(__VA_ARGS__)
-#define vlse32(...) __riscv_vlse32_tumu(__VA_ARGS__)
-#define vlse64(...) __riscv_vlse64_tumu(__VA_ARGS__)
-#define vlse8(...) __riscv_vlse8_tumu(__VA_ARGS__)
-#define vle16ff(...) __riscv_vle16ff_tumu(__VA_ARGS__)
-#define vle32ff(...) __riscv_vle32ff_tumu(__VA_ARGS__)
-#define vle64ff(...) __riscv_vle64ff_tumu(__VA_ARGS__)
-#define vle8ff(...) __riscv_vle8ff_tumu(__VA_ARGS__)
-#define vlseg2e16(...) __riscv_vlseg2e16_tumu(__VA_ARGS__)
-#define vlseg3e16(...) __riscv_vlseg3e16_tumu(__VA_ARGS__)
-#define vlseg4e16(...) __riscv_vlseg4e16_tumu(__VA_ARGS__)
-#define vlseg5e16(...) __riscv_vlseg5e16_tumu(__VA_ARGS__)
-#define vlseg6e16(...) __riscv_vlseg6e16_tumu(__VA_ARGS__)
-#define vlseg7e16(...) __riscv_vlseg7e16_tumu(__VA_ARGS__)
-#define vlseg8e16(...) __riscv_vlseg8e16_tumu(__VA_ARGS__)
-#define vlseg2e32(...) __riscv_vlseg2e32_tumu(__VA_ARGS__)
-#define vlseg3e32(...) __riscv_vlseg3e32_tumu(__VA_ARGS__)
-#define vlseg4e32(...) __riscv_vlseg4e32_tumu(__VA_ARGS__)
-#define vlseg5e32(...) __riscv_vlseg5e32_tumu(__VA_ARGS__)
-#define vlseg6e32(...) __riscv_vlseg6e32_tumu(__VA_ARGS__)
-#define vlseg7e32(...) __riscv_vlseg7e32_tumu(__VA_ARGS__)
-#define vlseg8e32(...) __riscv_vlseg8e32_tumu(__VA_ARGS__)
-#define vlseg2e64(...) __riscv_vlseg2e64_tumu(__VA_ARGS__)
-#define vlseg3e64(...) __riscv_vlseg3e64_tumu(__VA_ARGS__)
-#define vlseg4e64(...) __riscv_vlseg4e64_tumu(__VA_ARGS__)
-#define vlseg5e64(...) __riscv_vlseg5e64_tumu(__VA_ARGS__)
-#define vlseg6e64(...) __riscv_vlseg6e64_tumu(__VA_ARGS__)
-#define vlseg7e64(...) __riscv_vlseg7e64_tumu(__VA_ARGS__)
-#define vlseg8e64(...) __riscv_vlseg8e64_tumu(__VA_ARGS__)
-#define vlseg2e16ff(...) __riscv_vlseg2e16ff_tumu(__VA_ARGS__)
-#define vlseg3e16ff(...) __riscv_vlseg3e16ff_tumu(__VA_ARGS__)
-#define vlseg4e16ff(...) __riscv_vlseg4e16ff_tumu(__VA_ARGS__)
-#define vlseg5e16ff(...) __riscv_vlseg5e16ff_tumu(__VA_ARGS__)
-#define vlseg6e16ff(...) __riscv_vlseg6e16ff_tumu(__VA_ARGS__)
-#define vlseg7e16ff(...) __riscv_vlseg7e16ff_tumu(__VA_ARGS__)
-#define vlseg8e16ff(...) __riscv_vlseg8e16ff_tumu(__VA_ARGS__)
-#define vlseg2e32ff(...) __riscv_vlseg2e32ff_tumu(__VA_ARGS__)
-#define vlseg3e32ff(...) __riscv_vlseg3e32ff_tumu(__VA_ARGS__)
-#define vlseg4e32ff(...) __riscv_vlseg4e32ff_tumu(__VA_ARGS__)
-#define vlseg5e32ff(...) __riscv_vlseg5e32ff_tumu(__VA_ARGS__)
-#define vlseg6e32ff(...) __riscv_vlseg6e32ff_tumu(__VA_ARGS__)
-#define vlseg7e32ff(...) __riscv_vlseg7e32ff_tumu(__VA_ARGS__)
-#define vlseg8e32ff(...) __riscv_vlseg8e32ff_tumu(__VA_ARGS__)
-#define vlseg2e64ff(...) __riscv_vlseg2e64ff_tumu(__VA_ARGS__)
-#define vlseg3e64ff(...) __riscv_vlseg3e64ff_tumu(__VA_ARGS__)
-#define vlseg4e64ff(...) __riscv_vlseg4e64ff_tumu(__VA_ARGS__)
-#define vlseg5e64ff(...) __riscv_vlseg5e64ff_tumu(__VA_ARGS__)
-#define vlseg6e64ff(...) __riscv_vlseg6e64ff_tumu(__VA_ARGS__)
-#define vlseg7e64ff(...) __riscv_vlseg7e64ff_tumu(__VA_ARGS__)
-#define vlseg8e64ff(...) __riscv_vlseg8e64ff_tumu(__VA_ARGS__)
-#define vlseg2e8(...) __riscv_vlseg2e8_tumu(__VA_ARGS__)
-#define vlseg3e8(...) __riscv_vlseg3e8_tumu(__VA_ARGS__)
-#define vlseg4e8(...) __riscv_vlseg4e8_tumu(__VA_ARGS__)
-#define vlseg5e8(...) __riscv_vlseg5e8_tumu(__VA_ARGS__)
-#define vlseg6e8(...) __riscv_vlseg6e8_tumu(__VA_ARGS__)
-#define vlseg7e8(...) __riscv_vlseg7e8_tumu(__VA_ARGS__)
-#define vlseg8e8(...) __riscv_vlseg8e8_tumu(__VA_ARGS__)
-#define vlseg2e8ff(...) __riscv_vlseg2e8ff_tumu(__VA_ARGS__)
-#define vlseg3e8ff(...) __riscv_vlseg3e8ff_tumu(__VA_ARGS__)
-#define vlseg4e8ff(...) __riscv_vlseg4e8ff_tumu(__VA_ARGS__)
-#define vlseg5e8ff(...) __riscv_vlseg5e8ff_tumu(__VA_ARGS__)
-#define vlseg6e8ff(...) __riscv_vlseg6e8ff_tumu(__VA_ARGS__)
-#define vlseg7e8ff(...) __riscv_vlseg7e8ff_tumu(__VA_ARGS__)
-#define vlseg8e8ff(...) __riscv_vlseg8e8ff_tumu(__VA_ARGS__)
-#define vlsseg2e16(...) __riscv_vlsseg2e16_tumu(__VA_ARGS__)
-#define vlsseg3e16(...) __riscv_vlsseg3e16_tumu(__VA_ARGS__)
-#define vlsseg4e16(...) __riscv_vlsseg4e16_tumu(__VA_ARGS__)
-#define vlsseg5e16(...) __riscv_vlsseg5e16_tumu(__VA_ARGS__)
-#define vlsseg6e16(...) __riscv_vlsseg6e16_tumu(__VA_ARGS__)
-#define vlsseg7e16(...) __riscv_vlsseg7e16_tumu(__VA_ARGS__)
-#define vlsseg8e16(...) __riscv_vlsseg8e16_tumu(__VA_ARGS__)
-#define vlsseg2e32(...) __riscv_vlsseg2e32_tumu(__VA_ARGS__)
-#define vlsseg3e32(...) __riscv_vlsseg3e32_tumu(__VA_ARGS__)
-#define vlsseg4e32(...) __riscv_vlsseg4e32_tumu(__VA_ARGS__)
-#define vlsseg5e32(...) __riscv_vlsseg5e32_tumu(__VA_ARGS__)
-#define vlsseg6e32(...) __riscv_vlsseg6e32_tumu(__VA_ARGS__)
-#define vlsseg7e32(...) __riscv_vlsseg7e32_tumu(__VA_ARGS__)
-#define vlsseg8e32(...) __riscv_vlsseg8e32_tumu(__VA_ARGS__)
-#define vlsseg2e64(...) __riscv_vlsseg2e64_tumu(__VA_ARGS__)
-#define vlsseg3e64(...) __riscv_vlsseg3e64_tumu(__VA_ARGS__)
-#define vlsseg4e64(...) __riscv_vlsseg4e64_tumu(__VA_ARGS__)
-#define vlsseg5e64(...) __riscv_vlsseg5e64_tumu(__VA_ARGS__)
-#define vlsseg6e64(...) __riscv_vlsseg6e64_tumu(__VA_ARGS__)
-#define vlsseg7e64(...) __riscv_vlsseg7e64_tumu(__VA_ARGS__)
-#define vlsseg8e64(...) __riscv_vlsseg8e64_tumu(__VA_ARGS__)
-#define vlsseg2e8(...) __riscv_vlsseg2e8_tumu(__VA_ARGS__)
-#define vlsseg3e8(...) __riscv_vlsseg3e8_tumu(__VA_ARGS__)
-#define vlsseg4e8(...) __riscv_vlsseg4e8_tumu(__VA_ARGS__)
-#define vlsseg5e8(...) __riscv_vlsseg5e8_tumu(__VA_ARGS__)
-#define vlsseg6e8(...) __riscv_vlsseg6e8_tumu(__VA_ARGS__)
-#define vlsseg7e8(...) __riscv_vlsseg7e8_tumu(__VA_ARGS__)
-#define vlsseg8e8(...) __riscv_vlsseg8e8_tumu(__VA_ARGS__)
-#define viota(...) __riscv_viota_tumu(__VA_ARGS__)
-#define vid(...) __riscv_vid_tumu(__VA_ARGS__)
-#endif
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_011_compat.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_011_compat.hpp
@ -1,33 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-// 0.11 -> 0.12 compatibility
-
-#ifndef _RVV_IMPLICIT_VXRM
-#define _RVV_IMPLICIT_VXRM __RISCV_VXRM_RNU
-#endif
-
-// NOTE: masked should go first to avoid extra substitution (3 arg -> 4 arg -> 5 arg)
-
-// masked
-#define __riscv_vaadd(_1, _2, _3, _4) __riscv_vaadd(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vasub(_1, _2, _3, _4) __riscv_vasub(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vaaddu(_1, _2, _3, _4) __riscv_vaaddu(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vasubu(_1, _2, _3, _4) __riscv_vasubu(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vsmul(_1, _2, _3, _4) __riscv_vsmul(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vssra(_1, _2, _3, _4) __riscv_vssra(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vssrl(_1, _2, _3, _4) __riscv_vssrl(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vnclip(_1, _2, _3, _4) __riscv_vnclip(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-#define __riscv_vnclipu(_1, _2, _3, _4) __riscv_vnclipu(_1, _2, _3, _RVV_IMPLICIT_VXRM, _4)
-
-// unmasked
-#define __riscv_vaadd(_1, _2, _3) __riscv_vaadd(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vasub(_1, _2, _3) __riscv_vasub(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vaaddu(_1, _2, _3) __riscv_vaaddu(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vasubu(_1, _2, _3) __riscv_vasubu(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vsmul(_1, _2, _3) __riscv_vsmul(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vssra(_1, _2, _3) __riscv_vssra(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vssrl(_1, _2, _3) __riscv_vssrl(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vnclip(_1, _2, _3) __riscv_vnclip(_1, _2, _RVV_IMPLICIT_VXRM, _3)
-#define __riscv_vnclipu(_1, _2, _3) __riscv_vnclipu(_1, _2, _RVV_IMPLICIT_VXRM, _3)
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
@ -1,213 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-#ifndef OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
-#define OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
-
-// This file requires VTraits to be defined for vector types
-
-#define OPENCV_HAL_IMPL_RVV_FUN_AND(REG, SUF) \
-inline static REG vand(const REG & op1, const REG & op2, size_t vl) \
-{ \
-    return vand_vv_##SUF(op1, op2, vl); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_AND(vint8m1_t, i8m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vuint8m1_t, u8m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vint16m1_t, i16m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vuint16m1_t, u16m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vint32m1_t, i32m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vuint32m1_t, u32m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vint64m1_t, i64m1)
-OPENCV_HAL_IMPL_RVV_FUN_AND(vuint64m1_t, u64m1)
-
-#define OPENCV_HAL_IMPL_RVV_FUN_LOXEI(REG, SUF, INDX, ISUF) \
-inline static REG vloxe##ISUF(const VTraits<REG>::lane_type *base, INDX bindex, size_t vl) \
-{ \
-    return vloxe##ISUF##_v_##SUF(base, bindex, vl); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m1_t, i8m1, vuint8m1_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m2_t, i8m2, vuint8m2_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m4_t, i8m4, vuint8m4_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m8_t, i8m8, vuint8m8_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m1_t, i8m1, vuint32m4_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m2_t, i8m2, vuint32m8_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint16m1_t, i16m1, vuint32m2_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m1_t, i32m1, vuint32m1_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m2_t, i32m2, vuint32m2_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m4_t, i32m4, vuint32m4_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m8_t, i32m8, vuint32m8_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint64m1_t, i64m1, vuint32mf2_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m1_t, u8m1, vuint8m1_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m2_t, u8m2, vuint8m2_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m4_t, u8m4, vuint8m4_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m8_t, u8m8, vuint8m8_t, i8)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vfloat32m1_t, f32m1, vuint32m1_t, i32)
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint32m1_t, u32m1, vuint32m1_t, i32)
-#if CV_SIMD_SCALABLE_64F
-OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vfloat64m1_t, f64m1, vuint32mf2_t, i32)
-#endif
-
-#define OPENCV_HAL_IMPL_RVV_FUN_MUL(REG, SUF) \
-inline static REG##m1_t vmul(const REG##m1_t & op1, const REG##m1_t & op2, size_t vl) \
-{ \
-    return vmul_vv_##SUF##m1(op1, op2, vl); \
-} \
-inline static REG##m1_t vmul(const REG##m1_t & op1, VTraits<REG##m1_t>::lane_type op2, size_t vl) \
-{ \
-    return vmul_vx_##SUF##m1(op1, op2, vl); \
-} \
-inline static REG##m2_t vmul(const REG##m2_t & op1, const REG##m2_t & op2, size_t vl) \
-{ \
-    return vmul_vv_##SUF##m2(op1, op2, vl); \
-} \
-inline static REG##m2_t vmul(const REG##m2_t & op1, VTraits<REG##m2_t>::lane_type op2, size_t vl) \
-{ \
-    return vmul_vx_##SUF##m2(op1, op2, vl); \
-} \
-inline static REG##m4_t vmul(const REG##m4_t & op1, const REG##m4_t & op2, size_t vl) \
-{ \
-    return vmul_vv_##SUF##m4(op1, op2, vl); \
-} \
-inline static REG##m4_t vmul(const REG##m4_t & op1, VTraits<REG##m4_t>::lane_type op2, size_t vl) \
-{ \
-    return vmul_vx_##SUF##m4(op1, op2, vl); \
-} \
-inline static REG##m8_t vmul(const REG##m8_t & op1, const REG##m8_t & op2, size_t vl) \
-{ \
-    return vmul_vv_##SUF##m8(op1, op2, vl); \
-} \
-inline static REG##m8_t vmul(const REG##m8_t & op1, VTraits<REG##m8_t>::lane_type op2, size_t vl) \
-{ \
-    return vmul_vx_##SUF##m8(op1, op2, vl); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_MUL(vint8, i8)
-OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint8, u8)
-OPENCV_HAL_IMPL_RVV_FUN_MUL(vint16, i16)
-OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint16, u16)
-OPENCV_HAL_IMPL_RVV_FUN_MUL(vint32, i32)
-OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint32, u32)
-
-#define OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(REG1, SUF1, REG2, SUF2) \
-inline static REG1##m1_t vreinterpret_##SUF1##m1(const REG2##m1_t & src) \
-{\
-    return vreinterpret_v_##SUF2##m1_##SUF1##m1(src); \
-} \
-inline static REG1##m2_t vreinterpret_##SUF1##m2(const REG2##m2_t & src) \
-{\
-    return vreinterpret_v_##SUF2##m2_##SUF1##m2(src); \
-} \
-inline static REG1##m4_t vreinterpret_##SUF1##m4(const REG2##m4_t & src) \
-{\
-    return vreinterpret_v_##SUF2##m4_##SUF1##m4(src); \
-} \
-inline static REG1##m8_t vreinterpret_##SUF1##m8(const REG2##m8_t & src) \
-{\
-    return vreinterpret_v_##SUF2##m8_##SUF1##m8(src); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint8, i8, vuint8, u8)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint16, i16, vuint16, u16)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint32, i32, vuint32, u32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vfloat32, f32, vuint32, u32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vfloat32, f32, vint32, i32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vfloat32, f32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint32, i32, vfloat32, f32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vint8, i8)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint16, u16)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint32, u32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint64, u64)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vint16, i16)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint8, u8)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint32, u32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint64, u64)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vint32, i32)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint8, u8)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint16, u16)
-OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint64, u64)
-
-#define OPENCV_HAL_IMPL_RVV_FUN_STORE(REG, SUF, SZ) \
-inline static void vse##SZ(VTraits<REG>::lane_type *base, REG value, size_t vl) \
-{ \
-    return vse##SZ##_v_##SUF##m1(base, value, vl); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint8, u8, 8)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int8, i8, 8)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint16, u16, 16)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int16, i16, 16)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint32, u32, 32)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int32, i32, 32)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint64, u64, 64)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int64, i64, 64)
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_float32, f32, 32)
-#if CV_SIMD_SCALABLE_64F
-OPENCV_HAL_IMPL_RVV_FUN_STORE(v_float64, f64, 64)
-#endif
-
-#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(REG, SUF) \
-inline static VTraits<REG>::lane_type vmv_x(const REG & reg) \
-{\
-    return vmv_x_s_##SUF##m1_##SUF(reg); \
-}
-#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(REG, SUF) \
-inline static VTraits<REG>::lane_type vfmv_f(const REG & reg) \
-{\
-    return vfmv_f_s_##SUF##m1_##SUF(reg); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint8, u8)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int8, i8)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint16, u16)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int16, i16)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint32, u32)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int32, i32)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint64, u64)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int64, i64)
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(v_float32, f32)
-#if CV_SIMD_SCALABLE_64F
-OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(v_float64, f64)
-#endif
-
-#define OPENCV_HAL_IMPL_RVV_FUN_SLIDE(REG, SUF) \
-inline static REG vslidedown(const REG & dst, const REG & src, size_t offset, size_t vl) \
-{ \
-    return vslidedown_vx_##SUF##m1(dst, src, offset, vl); \
-} \
-inline static REG vslideup(const REG & dst, const REG & src, size_t offset, size_t vl) \
-{ \
-    return vslideup_vx_##SUF##m1(dst, src, offset, vl); \
-}
-
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint8, u8)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int8, i8)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint16, u16)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int16, i16)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint32, u32)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int32, i32)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_float32, f32)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint64, u64)
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int64, i64)
-#if CV_SIMD_SCALABLE_64F
-OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_float64, f64)
-#endif
-
-inline static vuint32mf2_t vmul(const vuint32mf2_t & op1, uint32_t op2, size_t vl)
-{
-    return vmul_vx_u32mf2(op1, op2, vl);
-}
-
-inline static vuint32mf2_t vreinterpret_u32mf2(const vint32mf2_t& val)
-{
-    return vreinterpret_v_i32mf2_u32mf2(val);
-}
-
-inline static vuint32mf2_t vreinterpret_u32mf2(const vuint16mf2_t& val)
-{
-    return vreinterpret_v_u16mf2_u32mf2(val);
-}
-
-#endif //OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
--- a/modules/core/src/hal_replacement.hpp
+++ b/modules/core/src/hal_replacement.hpp
@ -1001,6 +1001,27 @@ inline int hal_ni_meanStdDev(const uchar* src_data, size_t src_step, int width,
 #define cv_hal_meanStdDev hal_ni_meanStdDev
 //! @endcond

+/**
+ * @brief calculates dot product of two vectors (represented as 2d images)
+ *
+ * @param a_data Pointer to 1st 2nd image data
+ * @param a_step Stride of 1st 2nd image
+ * @param b_data Pointer to 1st 2nd image data
+ * @param b_step Stride of 1st 2nd image
+ * @param width Width of both images
+ * @param height Height of both images
+ * @param type Data type of both images, for example CV_8U or CV_32F
+ * @param dot_val Pointer to resulting dot product value
+ * @return int
+ */
+inline int hal_ni_dotProduct(const uchar* a_data, size_t a_step, const uchar* b_data, size_t b_step, int width, int height,
+                             int type, double *dot_val)
+{ return CV_HAL_ERROR_NOT_IMPLEMENTED; }
+
+//! @cond IGNORED
+#define cv_hal_dotProduct hal_ni_dotProduct
+//! @endcond
+
 /**
   @brief hal_flip
   @param src_type source and destination image type
--- a/modules/core/src/matmul.dispatch.cpp
+++ b/modules/core/src/matmul.dispatch.cpp
@ -995,9 +995,18 @@ double Mat::dot(InputArray _mat) const
    CV_INSTRUMENT_REGION();

    Mat mat = _mat.getMat();
+    CV_Assert_N( mat.type() == type(), mat.size == size);
+
    int cn = channels();
+    if (this->dims <= 2)
+    {
+        double product = 0;
+        CALL_HAL_RET(dotProduct, cv_hal_dotProduct, product, this->data, this->step, mat.data, mat.step,
+                     this->cols * cn, this->rows, this->depth());
+    }
+
    DotProdFunc func = getDotProdFunc(depth());
-    CV_Assert_N( mat.type() == type(), mat.size == size, func != 0 );
+    CV_Assert(func != 0 );

    if( isContinuous() && mat.isContinuous() )
    {
--- a/modules/core/src/matrix_wrap.cpp
+++ b/modules/core/src/matrix_wrap.cpp
@ -1303,7 +1303,7 @@ void _OutputArray::create(int d, const int* sizes, int mtype, int i,

        if(fixedType())
        {
-            if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 )
+            if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_DEPTH(flags)) & fixedDepthMask) != 0 )
                mtype = m.type();
            else
                CV_CheckTypeEQ(m.type(), CV_MAT_TYPE(mtype), "Can't reallocate Mat with locked type (probably due to misused 'const' modifier)");
--- a/modules/core/src/system.cpp
+++ b/modules/core/src/system.cpp
@ -318,12 +318,12 @@ Exception::Exception(int _code, const String& _err, const String& _func, const S
    formatMessage();
 }

-Exception::~Exception() throw() {}
+Exception::~Exception() CV_NOEXCEPT {}

 /*!
 \return the error description and the context as a text string.
 */
-const char* Exception::what() const throw() { return msg.c_str(); }
+const char* Exception::what() const CV_NOEXCEPT { return msg.c_str(); }

 void Exception::formatMessage()
 {
--- a/modules/dnn/CMakeLists.txt
+++ b/modules/dnn/CMakeLists.txt
@ -213,7 +213,7 @@ file(GLOB_RECURSE dnn_int_hdrs
 )
 set(dnn_plugin_srcs ${dnn_srcs} ${dnn_int_hdrs})
 ocv_list_filterout_ex(dnn_plugin_srcs
-    "/src/dnn.cpp$|/src/dnn_utils.cpp$|/src/dnn_utils.cpp$|/src/dnn_read.cpp$|/src/registry.cpp$|/src/backend.cpp$"
+    "/src/dnn.cpp$|/src/dnn_utils.cpp$|/src/dnn_read.cpp$|/src/registry.cpp$|/src/backend.cpp$"
    # importers
    "/src/(caffe|darknet|onnx|tensorflow)/"
    # executors
--- a/modules/dnn/perf/perf_layer.cpp
+++ b/modules/dnn/perf/perf_layer.cpp
@ -973,49 +973,72 @@ INSTANTIATE_TEST_CASE_P(/**/, Layer_Softmax, Combine(
                          /* withCann= */            false) // only test on CPU
 ));

-using Layer_Elementwise = TestBaseWithParam<tuple<std::vector<int>, std::string, tuple<Backend, Target>>>;
-PERF_TEST_P_(Layer_Elementwise, elementwise) {
-    std::vector<int> input_shape = get<0>(GetParam());
-    std::string op = get<1>(GetParam());
-    int backend_id = get<0>(get<2>(GetParam()));
-    int target_id = get<1>(get<2>(GetParam()));
+struct Layer_Elementwise : public TestBaseWithParam<tuple<Backend, Target>> {
+    void test_layer(const std::string &op_type, const std::vector<int> &input_shape) {
+        int backend_id = get<0>(GetParam());
+        int target_id = get<1>(GetParam());

-    Mat input(input_shape, CV_32F);
-    randn(input, 0.f, 1.f);
+        Mat input(input_shape, CV_32F);
+        randu(input, -10.0f, 10.f);

-    LayerParams lp;
-    lp.type = op;
-    lp.name = "TestLayer";
+        LayerParams lp;
+        lp.type = op_type;
+        lp.name = cv::format("PerfLayer/%s", op_type.c_str());

-    Net net;
-    net.addLayerToPrev(lp.name, lp.type, lp);
+        Net net;
+        net.addLayerToPrev(lp.name, lp.type, lp);

-    // Warmup
-    {
-        net.setInput(input);
-        net.setPreferableBackend(backend_id);
-        net.setPreferableTarget(target_id);
-        Mat out = net.forward();
-    }
+        // Warmup
+        {
+            net.setInput(input);
+            net.setPreferableBackend(backend_id);
+            net.setPreferableTarget(target_id);
+            net.forward();
+        }

-    TEST_CYCLE() {
-        net.forward();
+        TEST_CYCLE() {
+            net.forward();
+        }
+
+        SANITY_CHECK_NOTHING();
    }

-    SANITY_CHECK_NOTHING();
+    int N = 2;
+    int C = 32;
+    int H = 416;
+    int W = 416;
+};
+
+PERF_TEST_P_(Layer_Elementwise, Gelu) {
+    test_layer("Gelu", std::vector<int>{1, 50, 3072});
+}
+PERF_TEST_P_(Layer_Elementwise, Swish) {
+    test_layer("Swish", std::vector<int>{N, C, H, W});
+}
+PERF_TEST_P_(Layer_Elementwise, Mish) {
+    test_layer("Mish", std::vector<int>{N, C, H, W});
+}
+PERF_TEST_P_(Layer_Elementwise, Elu) {
+    test_layer("ELU", std::vector<int>{N, C, H, W});
+}
+PERF_TEST_P_(Layer_Elementwise, Celu) {
+    test_layer("Celu", std::vector<int>{N, C, H, W});
+}
+PERF_TEST_P_(Layer_Elementwise, Selu) {
+    test_layer("Selu", std::vector<int>{N, C, H, W});
+}
+PERF_TEST_P_(Layer_Elementwise, HardSwish) {
+    test_layer("HardSwish", std::vector<int>{N, C, H, W});
 }

-INSTANTIATE_TEST_CASE_P(/**/, Layer_Elementwise, testing::Combine(
-    testing::Values(std::vector<int>{1, 50, 3072}),
-    testing::Values(std::string{"Gelu"}),
-    dnnBackendsAndTargets(/* withInferenceEngine= */ true,
-                          /* withHalide= */          false,
-                          /* withCpuOCV= */          true,
-                          /* withVkCom= */           false,
-                          /* withCUDA= */            true,
-                          /* withNgraph= */          true,
-                          /* withWebnn= */           false,
-                          /* withCann= */            false) // only test on CPU
-));
+INSTANTIATE_TEST_CASE_P(/**/, Layer_Elementwise,
+                        dnnBackendsAndTargets(/* withInferenceEngine= */ true,
+                                              /* withHalide= */          false,
+                                              /* withCpuOCV= */          true,
+                                              /* withVkCom= */           false,
+                                              /* withCUDA= */            true,
+                                              /* withNgraph= */          true,
+                                              /* withWebnn= */           false,
+                                              /* withCann= */            false));

 } // namespace
--- a/modules/dnn/src/int8layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp
@ -304,8 +304,8 @@ public:
                    opt_LASX::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
                else
            #endif
-            #if CV_TRY_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>=11000
-                if( useRVV)
+            #if CV_TRY_RVV && CV_RVV
+                if( useRVV )
                    opt_RVV::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp );
                else
            #endif
--- a/modules/dnn/src/int8layers/layers_common.simd.hpp
+++ b/modules/dnn/src/int8layers/layers_common.simd.hpp
@ -1257,7 +1257,7 @@ void fastGEMM1T( const int8_t* vec, const int8_t* weights,
 }
 #endif // CV_LASX

-#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_RVV && defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>=11000
+#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_RVV

 static const size_t __cv_rvv_e8m1_max = __riscv_vsetvlmax_e8m1();
 static const size_t __cv_rvv_e16m1_max = __riscv_vsetvlmax_e16m1();
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -1519,7 +1519,7 @@ public:
                opt_AVX::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax );
            else
        #endif
-        #if CV_TRY_RVV
+        #if CV_TRY_RVV && CV_RVV
            if( useRVV ) {
                opt_RVV::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax );
            }
--- a/modules/dnn/src/layers/cpu_kernels/conv_depthwise.cpp
+++ b/modules/dnn/src/layers/cpu_kernels/conv_depthwise.cpp
@ -119,7 +119,7 @@ void runDepthwise(InputArray _input, OutputArray _output, const Ptr<FastConv>& c
                                            pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0);
            else
 #endif
-#if CV_TRY_RVV
+#if CV_TRY_RVV && CV_RVV
            if(canRunOpt && conv->useRVV)
                opt_RVV::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w,
                                            pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0);
--- a/modules/dnn/src/layers/cpu_kernels/conv_depthwise.simd.hpp
+++ b/modules/dnn/src/layers/cpu_kernels/conv_depthwise.simd.hpp
@ -264,48 +264,48 @@ void fastDepthwiseConv( const float* wptr,
            if( stride_w == 1 )
                for( ; out_j < outW1; out_j += vl, avl -= vl)
                {
-                    vl = vsetvl_e32m8(avl);
+                    vl = __riscv_vsetvl_e32m8(avl);
                    int in_j = out_j * stride_w - pad_l;
-                    vfloat32m8_t vout0 = vfmacc_vf_f32m8(vfmv_v_f_f32m8(bias, vl), w00, vle32_v_f32m8(imgptr0 + in_j, vl), vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w01, vle32_v_f32m8(imgptr0 + in_j + dilation_w, vl), vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w02, vle32_v_f32m8(imgptr0 + in_j + dilation_w*2, vl), vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w10, vle32_v_f32m8(imgptr1 + in_j, vl),vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w11, vle32_v_f32m8(imgptr1 + in_j + dilation_w, vl),vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w12, vle32_v_f32m8(imgptr1 + in_j + dilation_w*2, vl),vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w20, vle32_v_f32m8(imgptr2 + in_j, vl), vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w21, vle32_v_f32m8(imgptr2 + in_j + dilation_w, vl), vl);
-                    vout0 = vfmacc_vf_f32m8(vout0, w22, vle32_v_f32m8(imgptr2 + in_j + dilation_w*2, vl), vl);
+                    vfloat32m8_t vout0 = __riscv_vfmacc_vf_f32m8(__riscv_vfmv_v_f_f32m8(bias, vl), w00, __riscv_vle32_v_f32m8(imgptr0 + in_j, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w01, __riscv_vle32_v_f32m8(imgptr0 + in_j + dilation_w, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w02, __riscv_vle32_v_f32m8(imgptr0 + in_j + dilation_w*2, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w10, __riscv_vle32_v_f32m8(imgptr1 + in_j, vl),vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w11, __riscv_vle32_v_f32m8(imgptr1 + in_j + dilation_w, vl),vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w12, __riscv_vle32_v_f32m8(imgptr1 + in_j + dilation_w*2, vl),vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w20, __riscv_vle32_v_f32m8(imgptr2 + in_j, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w21, __riscv_vle32_v_f32m8(imgptr2 + in_j + dilation_w, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m8(vout0, w22, __riscv_vle32_v_f32m8(imgptr2 + in_j + dilation_w*2, vl), vl);
                    if (relu)
                    {
-                        vbool4_t m = vmfgt_vf_f32m8_b4(vout0, 0, vl);
-                        vout0 = vmerge_vvm_f32m8(m, vfmul_vf_f32m8(vout0, relu_coeff, vl), vout0, vl);
+                        vbool4_t m = __riscv_vmfgt_vf_f32m8_b4(vout0, 0, vl);
+                        vout0 = __riscv_vmerge_vvm_f32m8(__riscv_vfmul_vf_f32m8(vout0, relu_coeff, vl), vout0, m, vl);
                    }
-                    vse32_v_f32m8(outptr + out_j, vout0, vl);
+                    __riscv_vse32_v_f32m8(outptr + out_j, vout0, vl);
                }
            else //stride_w == 2 && dilation_w == 1
                for( ; out_j < outW1; out_j += vl, avl -= vl)
                {
-                    vl = vsetvl_e32m2(avl);
+                    vl = __riscv_vsetvl_e32m2(avl);
                    int in_j = out_j * stride_w - pad_l;
-                    vfloat32m2_t vout0 = vfmacc_vf_f32m2(vfmv_v_f_f32m2(bias, vl), w00, vlse32_v_f32m2(imgptr0+in_j  , 8, vl), vl);
-                    vfloat32m2_t vout1 = vfmul_vf_f32m2(vlse32_v_f32m2(imgptr0+in_j+1, 8, vl), w01, vl);
-                    vfloat32m2_t vout2 = vfmul_vf_f32m2(vlse32_v_f32m2(imgptr0+in_j+2, 8, vl), w02, vl);
+                    vfloat32m2_t vout0 = __riscv_vfmacc_vf_f32m2(__riscv_vfmv_v_f_f32m2(bias, vl), w00, __riscv_vlse32_v_f32m2(imgptr0+in_j  , 8, vl), vl);
+                    vfloat32m2_t vout1 = __riscv_vfmul_vf_f32m2(__riscv_vlse32_v_f32m2(imgptr0+in_j+1, 8, vl), w01, vl);
+                    vfloat32m2_t vout2 = __riscv_vfmul_vf_f32m2(__riscv_vlse32_v_f32m2(imgptr0+in_j+2, 8, vl), w02, vl);

-                    vout0 = vfmacc_vf_f32m2(vout0, w10, vlse32_v_f32m2(imgptr1+in_j  , 8, vl), vl);
-                    vout1 = vfmacc_vf_f32m2(vout1, w11, vlse32_v_f32m2(imgptr1+in_j+1, 8, vl), vl);
-                    vout2 = vfmacc_vf_f32m2(vout2, w12, vlse32_v_f32m2(imgptr1+in_j+2, 8, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m2(vout0, w10, __riscv_vlse32_v_f32m2(imgptr1+in_j  , 8, vl), vl);
+                    vout1 = __riscv_vfmacc_vf_f32m2(vout1, w11, __riscv_vlse32_v_f32m2(imgptr1+in_j+1, 8, vl), vl);
+                    vout2 = __riscv_vfmacc_vf_f32m2(vout2, w12, __riscv_vlse32_v_f32m2(imgptr1+in_j+2, 8, vl), vl);

-                    vout0 = vfmacc_vf_f32m2(vout0, w20, vlse32_v_f32m2(imgptr2+in_j  , 8, vl), vl);
-                    vout1 = vfmacc_vf_f32m2(vout1, w21, vlse32_v_f32m2(imgptr2+in_j+1, 8, vl), vl);
-                    vout2 = vfmacc_vf_f32m2(vout2, w22, vlse32_v_f32m2(imgptr2+in_j+2, 8, vl), vl);
+                    vout0 = __riscv_vfmacc_vf_f32m2(vout0, w20, __riscv_vlse32_v_f32m2(imgptr2+in_j  , 8, vl), vl);
+                    vout1 = __riscv_vfmacc_vf_f32m2(vout1, w21, __riscv_vlse32_v_f32m2(imgptr2+in_j+1, 8, vl), vl);
+                    vout2 = __riscv_vfmacc_vf_f32m2(vout2, w22, __riscv_vlse32_v_f32m2(imgptr2+in_j+2, 8, vl), vl);

-                    vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl);
+                    vout0 = __riscv_vfadd_vv_f32m2(__riscv_vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl);
                    if (relu)
                    {
-                        vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl);
-                        vout0 = vmerge_vvm_f32m2(m, vfmul_vf_f32m2(vout0, relu_coeff, vl), vout0, vl);
+                        vbool16_t m = __riscv_vmfgt_vf_f32m2_b16(vout0, 0, vl);
+                        vout0 = __riscv_vmerge_vvm_f32m2(__riscv_vfmul_vf_f32m2(vout0, relu_coeff, vl), vout0, m, vl);
                    }
-                    vse32_v_f32m2(outptr + out_j, vout0, vl);
+                    __riscv_vse32_v_f32m2(outptr + out_j, vout0, vl);
                }
        }

--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@ -730,12 +730,6 @@ struct GeluFunctor : public BaseFunctor {
                      one = vx_setall_f32(1.0f),
                      reciprocal_sqrt2 = vx_setall_f32(M_SQRT1_2);
            for (; i <= len - vlanes; i += vlanes) {
-                if (i + vlanes > len) {
-                    if (i == 0 || i == len) {
-                        break;
-                    }
-                    i = len - vlanes;
-                }
                v_float32 x0 = vx_load(srcptr + i);

                // t = x * M_SQRT1_2
@ -910,7 +904,17 @@ const char* const TanHFunctor::BaseDefaultFunctor<TanHFunctor>::ocl_kernel_name

 struct SwishFunctor : public BaseDefaultFunctor<SwishFunctor>
 {
-    typedef SwishLayer Layer;
+    using Layer = SwishLayer;
+
+    int vlanes;
+
+    explicit SwishFunctor() {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
+    }

    bool supportBackend(int backendId, int)
    {
@ -925,6 +929,32 @@ struct SwishFunctor : public BaseDefaultFunctor<SwishFunctor>
        return x / (1.f + exp(-x));
    }

+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            // x / (1.f + exp(-x));
+            v_float32 one = vx_setall_f32(1.0f),
+                      zero = vx_setzero_f32();
+            for (; i <= len - vlanes; i += vlanes) {
+                v_float32 x = vx_load(srcptr + i);
+
+                v_float32 t = v_sub(zero, x);
+                t = v_exp(t);
+                t = v_add(one, t);
+                t = v_div(x, t);
+
+                vx_store(dstptr + i, t);
+            }
+#endif
+            // In case SIMD is not available or len < vlanes
+            for (; i < len; i++) {
+                dstptr[i] = calculate(srcptr[i]);
+            }
+        }
+    }
+
 #ifdef HAVE_CUDA
    Ptr<BackendNode> initCUDA(int target, csl::Stream stream)
    {
@ -969,9 +999,27 @@ struct SwishFunctor : public BaseDefaultFunctor<SwishFunctor>
 template<>
 const char* const SwishFunctor::BaseDefaultFunctor<SwishFunctor>::ocl_kernel_name = "SwishForward";

+namespace {
+    constexpr float MISH_THRESHOLD = -36.73f;
+}
+
+/*
+    This implementation is derived from
+    https://github.com/vpisarev/ficus/blob/3c9a8b78f49e17489c5e1fd6dd5dd487348c99c2/lib/NN/OpElemwise.fx#L110
+*/
 struct MishFunctor : public BaseDefaultFunctor<MishFunctor>
 {
-    typedef MishLayer Layer;
+    using Layer = MishLayer;
+
+    int vlanes;
+
+    explicit MishFunctor() {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
+    }

    bool supportBackend(int backendId, int)
    {
@ -983,15 +1031,34 @@ struct MishFunctor : public BaseDefaultFunctor<MishFunctor>

    inline float calculate(float x) const
    {
-        // Use fast approximation introduced in https://github.com/opencv/opencv/pull/17200
-        if (x >= 8.f)
-        {
-            return x;
-        }
+        float y = x > MISH_THRESHOLD ? std::exp(-x) : 1.f;
+        x *= x > MISH_THRESHOLD ? 1.f : 0.f;
+        return x * (1 + 2 * y) / (1 + 2 * y + 2 * y * y);
+    }

-        float eX = exp(x);
-        float n = (eX + 2.f) * eX;
-        return (x * n) / (n + 2.f);
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            v_float32 v_threshold = vx_setall_f32(MISH_THRESHOLD), one = vx_setall_f32(1.f), z = vx_setzero_f32();
+            for (; i <= len - vlanes; i += vlanes) {
+                v_float32 x = vx_load(srcptr + i);
+
+                x = v_select(v_le(x, v_threshold), z, x);
+                v_float32 y = v_exp(v_sub(z, x));
+                v_float32 _2y = v_add(y, y),
+                          _2ya1 = v_add(_2y, one);
+                x = v_div(v_mul(x, _2ya1), v_add(_2ya1, v_mul(_2y, y)));
+
+                vx_store(dstptr + i, x);
+            }
+#endif
+            // In case SIMD is not available or len < vlanes
+            for (; i < len; i++) {
+                dstptr[i] = calculate(srcptr[i]);
+            }
+        }
    }

 #ifdef HAVE_CUDA
@ -1105,10 +1172,18 @@ const char* const SigmoidFunctor::BaseDefaultFunctor<SigmoidFunctor>::ocl_kernel

 struct ELUFunctor : public BaseDefaultFunctor<ELUFunctor>
 {
-    typedef ELULayer Layer;
+    using Layer = ELULayer;
+
    float alpha;
+    int vlanes;

-    explicit ELUFunctor(float alpha_ = 1.f) : alpha(alpha_) {}
+    explicit ELUFunctor(float alpha_ = 1.f) : alpha(alpha_) {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
+    }

    bool supportBackend(int backendId, int)
    {
@ -1126,6 +1201,28 @@ struct ELUFunctor : public BaseDefaultFunctor<ELUFunctor>
        return x >= 0.f ? x : alpha * (exp(x) - 1.f);
    }

+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            v_float32 z = vx_setzero_f32(), v_alpha = vx_setall_f32(alpha), one = vx_setall_f32(1.0f);
+            for (; i <= len - vlanes; i += vlanes) {
+                v_float32 x = vx_load(srcptr + i);
+
+                v_float32 t = v_mul(v_alpha, v_sub(v_exp(x), one));
+                x = v_select(v_ge(x, z), x, t);
+
+                vx_store(dstptr + i, x);
+            }
+#endif
+            // In case SIMD is not available or len < vlanes
+            for (; i < len; i++) {
+                dstptr[i] = calculate(srcptr[i]);
+            }
+        }
+    }
+
    inline void setKernelParams(ocl::Kernel& kernel) const
    {
        kernel.set(3, alpha);
@ -1722,7 +1819,16 @@ const char* const BaseDefaultFunctor<ErfFunctor>::ocl_kernel_name = "ErfForward"

 struct HardSwishFunctor : public BaseDefaultFunctor<HardSwishFunctor>
 {
-    typedef HardSwishLayer Layer;
+    using Layer = HardSwishLayer;
+    int vlanes;
+
+    explicit HardSwishFunctor() {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
+    }

    bool supportBackend(int backendId, int)
    {
@ -1733,7 +1839,32 @@ struct HardSwishFunctor : public BaseDefaultFunctor<HardSwishFunctor>

    inline float calculate(float x) const
    {
-        return x * max(0.f, min(1.f, x / 6.f + 0.5f));
+        return x * std::max(0.f, std::min(1.f, x / 6.f + 0.5f));
+    }
+
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            v_float32 zero = vx_setzero_f32(), one = vx_setall_f32(1.0f),
+                      half = vx_setall_f32(0.5f), sixth = vx_setall_f32(1 / 6.0f);
+            for (; i <= len - vlanes; i += vlanes) {
+                v_float32 x = vx_load(srcptr + i);
+
+                v_float32 t = v_add(v_mul(x, sixth), half);
+                t = v_min(one, t);
+                t = v_max(zero, t);
+                t = v_mul(x, t);
+
+                vx_store(dstptr + i, t);
+            }
+#endif
+            // In case SIMD is not available or len > vlanes
+            for (; i < len; i++) {
+                dstptr[i] = calculate(srcptr[i]);
+            }
+        }
    }

 #ifdef HAVE_CUDA
@ -1907,11 +2038,18 @@ const char* const BaseDefaultFunctor<TanFunctor>::ocl_kernel_name = "TanForward"

 struct CeluFunctor : public BaseDefaultFunctor<CeluFunctor>
 {
-    typedef CeluLayer Layer;
+    using Layer = CeluLayer;

    float alpha;
+    int vlanes;

-    explicit CeluFunctor(float alpha_ = 1.f) : alpha(alpha_) {}
+    explicit CeluFunctor(float alpha_ = 1.f) : alpha(alpha_) {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
+    }

    bool supportBackend(int backendId, int)
    {
@ -1920,7 +2058,30 @@ struct CeluFunctor : public BaseDefaultFunctor<CeluFunctor>

    inline float calculate(float x) const
    {
-        return max(0.f, x) + min(0.f, alpha * expm1(x / alpha));
+        return std::max(0.f, x) + std::min(0.f, alpha * expm1(x / alpha));
+    }
+
+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            v_float32 zero = vx_setzero_f32(), v_alpha = vx_setall_f32(alpha),
+                      one = vx_setall_f32(1.0f), v_ralpha = vx_setall_f32(1.0f / alpha);
+            for (; i <= len - vlanes; i += vlanes) {
+                v_float32 x = vx_load(srcptr + i);
+
+                v_float32 t = v_min(zero, v_mul(v_alpha, v_sub(v_exp(v_mul(x, v_ralpha)), one)));
+                t = v_add(v_max(zero, x), t);
+
+                vx_store(dstptr + i, t);
+            }
+#endif
+            // In case SIMD is not available or len < vlanes
+            for (; i < len; i++) {
+                dstptr[i] = calculate(srcptr[i]);
+            }
+        }
    }

    inline void setKernelParams(ocl::Kernel& kernel) const
@ -1981,13 +2142,21 @@ const char* const BaseDefaultFunctor<HardSigmoidFunctor>::ocl_kernel_name = "Har

 struct SeluFunctor : public BaseDefaultFunctor<SeluFunctor>
 {
-    typedef SeluLayer Layer;
+    using Layer = SeluLayer;

    float alpha;
    float gamma;
+    int vlanes;

    explicit SeluFunctor(float alpha_ = 1.67326319217681884765625f,
-                         float gamma_ = 1.05070102214813232421875f) : alpha(alpha_), gamma(gamma_) {}
+                         float gamma_ = 1.05070102214813232421875f)
+        : alpha(alpha_), gamma(gamma_) {
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+        vlanes = VTraits<v_float32>::vlanes();
+#else
+        vlanes = 1;
+#endif
+    }

    bool supportBackend(int backendId, int)
    {
@ -1999,6 +2168,30 @@ struct SeluFunctor : public BaseDefaultFunctor<SeluFunctor>
        return gamma * (x > 0.f ? x : alpha * expm1(x));
    }

+    void apply(const float* srcptr, float* dstptr, int stripeStart, int len, size_t planeSize, int cn0, int cn1) const {
+        CV_UNUSED(stripeStart);
+        for (int cn = cn0; cn < cn1; cn++, srcptr += planeSize, dstptr += planeSize) {
+            int i = 0;
+#if (CV_SIMD || CV_SIMD_SCALABLE)
+            v_float32 z = vx_setzero_f32(), one = vx_setall_f32(1.0f),
+                      v_alpha = vx_setall_f32(alpha), v_gamma = vx_setall_f32(gamma);
+            for (; i <= len - vlanes; i += vlanes) {
+                v_float32 x = vx_load(srcptr + i);
+
+                v_float32 t = v_mul(v_alpha, v_sub(v_exp(x), one));
+                x = v_select(v_le(x, z), t, x);
+                x = v_mul(v_gamma, x);
+
+                vx_store(dstptr + i, x);
+            }
+#endif
+            // In case SIMD is not available or len > vlanes
+            for (; i < len; i++) {
+                dstptr[i] = calculate(srcptr[i]);
+            }
+        }
+    }
+
    inline void setKernelParams(ocl::Kernel& kernel) const
    {
        kernel.set(3, alpha);
--- a/modules/dnn/src/layers/fully_connected_layer.cpp
+++ b/modules/dnn/src/layers/fully_connected_layer.cpp
@ -276,7 +276,7 @@ public:
                    opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize_aligned);
                else
            #endif
-            #if CV_TRY_RVV
+            #if CV_TRY_RVV && CV_RVV
                if( useRVV )
                    opt_RVV::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
                else
--- a/modules/dnn/src/layers/layers_common.simd.hpp
+++ b/modules/dnn/src/layers/layers_common.simd.hpp
@ -295,7 +295,7 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
    int avl = nb, vl;
    for(int n = 0; n < nb; n += vl, avl -= vl)
    {
-        vl = vsetvl_e32m4(avl);
+        vl = __riscv_vsetvl_e32m4(avl);
        for( int m = 0; m < ma; m += 7 )
        {
            const float* aptr0 = aptr + astep*m;
@ -314,13 +314,13 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
            float* cptr5 = cptr + cstep*std::min(m+5, ma-1);
            float* cptr6 = cptr + cstep*std::min(m+6, ma-1);

-            vfloat32m4_t d0 = vfmv_v_f_f32m4(0, vl);
-            vfloat32m4_t d1 = vfmv_v_f_f32m4(0, vl);
-            vfloat32m4_t d2 = vfmv_v_f_f32m4(0, vl);
-            vfloat32m4_t d3 = vfmv_v_f_f32m4(0, vl);
-            vfloat32m4_t d4 = vfmv_v_f_f32m4(0, vl);
-            vfloat32m4_t d5 = vfmv_v_f_f32m4(0, vl);
-            vfloat32m4_t d6 = vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d0 = __riscv_vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d1 = __riscv_vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d2 = __riscv_vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d3 = __riscv_vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d4 = __riscv_vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d5 = __riscv_vfmv_v_f_f32m4(0, vl);
+            vfloat32m4_t d6 = __riscv_vfmv_v_f_f32m4(0, vl);

            for( int k = 0; k < na; k++ )
            {
@ -332,22 +332,22 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
                float a5 = aptr5[k];
                float a6 = aptr6[k];

-                vfloat32m4_t b = vle32_v_f32m4(bptr + k*bstep + n, vl);
-                d0 = vfmacc_vf_f32m4(d0, a0, b, vl);
-                d1 = vfmacc_vf_f32m4(d1, a1, b, vl);
-                d2 = vfmacc_vf_f32m4(d2, a2, b, vl);
-                d3 = vfmacc_vf_f32m4(d3, a3, b, vl);
-                d4 = vfmacc_vf_f32m4(d4, a4, b, vl);
-                d5 = vfmacc_vf_f32m4(d5, a5, b, vl);
-                d6 = vfmacc_vf_f32m4(d6, a6, b, vl);
+                vfloat32m4_t b = __riscv_vle32_v_f32m4(bptr + k*bstep + n, vl);
+                d0 = __riscv_vfmacc_vf_f32m4(d0, a0, b, vl);
+                d1 = __riscv_vfmacc_vf_f32m4(d1, a1, b, vl);
+                d2 = __riscv_vfmacc_vf_f32m4(d2, a2, b, vl);
+                d3 = __riscv_vfmacc_vf_f32m4(d3, a3, b, vl);
+                d4 = __riscv_vfmacc_vf_f32m4(d4, a4, b, vl);
+                d5 = __riscv_vfmacc_vf_f32m4(d5, a5, b, vl);
+                d6 = __riscv_vfmacc_vf_f32m4(d6, a6, b, vl);
            }
-            vse32_v_f32m4(cptr0 + n, d0, vl);
-            vse32_v_f32m4(cptr1 + n, d1, vl);
-            vse32_v_f32m4(cptr2 + n, d2, vl);
-            vse32_v_f32m4(cptr3 + n, d3, vl);
-            vse32_v_f32m4(cptr4 + n, d4, vl);
-            vse32_v_f32m4(cptr5 + n, d5, vl);
-            vse32_v_f32m4(cptr6 + n, d6, vl);
+            __riscv_vse32_v_f32m4(cptr0 + n, d0, vl);
+            __riscv_vse32_v_f32m4(cptr1 + n, d1, vl);
+            __riscv_vse32_v_f32m4(cptr2 + n, d2, vl);
+            __riscv_vse32_v_f32m4(cptr3 + n, d3, vl);
+            __riscv_vse32_v_f32m4(cptr4 + n, d4, vl);
+            __riscv_vse32_v_f32m4(cptr5 + n, d5, vl);
+            __riscv_vse32_v_f32m4(cptr6 + n, d6, vl);
        }
    }
 }
@ -356,112 +356,112 @@ void fastGEMM1T( const float* vec, const float* weights,
                 size_t wstep, const float* bias,
                 float* dst, int nvecs, int vecsize )
 {
-    const int vlm2 = vsetvlmax_e32m2();
+    const int vlm2 = __riscv_vsetvlmax_e32m2();
    int i = 0;
    for( ; i <= nvecs - 15; i += 15 )
    {
        const float* wptr = weights + i*wstep;
        vfloat32m2_t
-               vs0 = vfmv_v_f_f32m2(0, vlm2), vs1 = vfmv_v_f_f32m2(0, vlm2), vs2 = vfmv_v_f_f32m2(0, vlm2),
-               vs3 = vfmv_v_f_f32m2(0, vlm2), vs4 = vfmv_v_f_f32m2(0, vlm2), vs5 = vfmv_v_f_f32m2(0, vlm2),
-               vs6 = vfmv_v_f_f32m2(0, vlm2), vs7 = vfmv_v_f_f32m2(0, vlm2), vs8 = vfmv_v_f_f32m2(0, vlm2),
-               vs9 = vfmv_v_f_f32m2(0, vlm2), vs10 = vfmv_v_f_f32m2(0, vlm2), vs11 = vfmv_v_f_f32m2(0, vlm2),
-               vs12 = vfmv_v_f_f32m2(0, vlm2), vs13 = vfmv_v_f_f32m2(0, vlm2), vs14 = vfmv_v_f_f32m2(0, vlm2);
+               vs0 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs1 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs2 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs3 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs4 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs5 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs6 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs7 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs8 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs9 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs10 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs11 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs12 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs13 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs14 = __riscv_vfmv_v_f_f32m2(0, vlm2);
        int avl = vecsize, vl;
        for(int k = 0 ; k < vecsize; k += vl, wptr += vl, avl -= vl)
        {
-            vl = vsetvl_e32m2(avl);
-            vfloat32m2_t v = vle32_v_f32m2(vec + k, vl);
-            vs0 = vfmacc_vv_f32m2(vs0, vle32_v_f32m2(wptr, vl), v, vl);
-            vs1 = vfmacc_vv_f32m2(vs1, vle32_v_f32m2(wptr + wstep, vl), v, vl);
-            vs2 = vfmacc_vv_f32m2(vs2, vle32_v_f32m2(wptr + wstep*2, vl), v, vl);
-            vs3 = vfmacc_vv_f32m2(vs3, vle32_v_f32m2(wptr + wstep*3, vl), v, vl);
-            vs4 = vfmacc_vv_f32m2(vs4, vle32_v_f32m2(wptr + wstep*4, vl), v, vl);
-            vs5 = vfmacc_vv_f32m2(vs5, vle32_v_f32m2(wptr + wstep*5, vl), v, vl);
-            vs6 = vfmacc_vv_f32m2(vs6, vle32_v_f32m2(wptr + wstep*6, vl), v, vl);
-            vs7 = vfmacc_vv_f32m2(vs7, vle32_v_f32m2(wptr + wstep*7, vl), v, vl);
-            vs8 = vfmacc_vv_f32m2(vs8, vle32_v_f32m2(wptr + wstep*8, vl), v, vl);
-            vs9 = vfmacc_vv_f32m2(vs9, vle32_v_f32m2(wptr + wstep*9, vl), v, vl);
-            vs10 = vfmacc_vv_f32m2(vs10, vle32_v_f32m2(wptr + wstep*10, vl), v, vl);
-            vs11 = vfmacc_vv_f32m2(vs11, vle32_v_f32m2(wptr + wstep*11, vl), v, vl);
-            vs12 = vfmacc_vv_f32m2(vs12, vle32_v_f32m2(wptr + wstep*12, vl), v, vl);
-            vs13 = vfmacc_vv_f32m2(vs13, vle32_v_f32m2(wptr + wstep*13, vl), v, vl);
-            vs14 = vfmacc_vv_f32m2(vs14, vle32_v_f32m2(wptr + wstep*14, vl), v, vl);
+            vl = __riscv_vsetvl_e32m2(avl);
+            vfloat32m2_t v = __riscv_vle32_v_f32m2(vec + k, vl);
+            vs0 = __riscv_vfmacc_vv_f32m2(vs0, __riscv_vle32_v_f32m2(wptr, vl), v, vl);
+            vs1 = __riscv_vfmacc_vv_f32m2(vs1, __riscv_vle32_v_f32m2(wptr + wstep, vl), v, vl);
+            vs2 = __riscv_vfmacc_vv_f32m2(vs2, __riscv_vle32_v_f32m2(wptr + wstep*2, vl), v, vl);
+            vs3 = __riscv_vfmacc_vv_f32m2(vs3, __riscv_vle32_v_f32m2(wptr + wstep*3, vl), v, vl);
+            vs4 = __riscv_vfmacc_vv_f32m2(vs4, __riscv_vle32_v_f32m2(wptr + wstep*4, vl), v, vl);
+            vs5 = __riscv_vfmacc_vv_f32m2(vs5, __riscv_vle32_v_f32m2(wptr + wstep*5, vl), v, vl);
+            vs6 = __riscv_vfmacc_vv_f32m2(vs6, __riscv_vle32_v_f32m2(wptr + wstep*6, vl), v, vl);
+            vs7 = __riscv_vfmacc_vv_f32m2(vs7, __riscv_vle32_v_f32m2(wptr + wstep*7, vl), v, vl);
+            vs8 = __riscv_vfmacc_vv_f32m2(vs8, __riscv_vle32_v_f32m2(wptr + wstep*8, vl), v, vl);
+            vs9 = __riscv_vfmacc_vv_f32m2(vs9, __riscv_vle32_v_f32m2(wptr + wstep*9, vl), v, vl);
+            vs10 = __riscv_vfmacc_vv_f32m2(vs10, __riscv_vle32_v_f32m2(wptr + wstep*10, vl), v, vl);
+            vs11 = __riscv_vfmacc_vv_f32m2(vs11, __riscv_vle32_v_f32m2(wptr + wstep*11, vl), v, vl);
+            vs12 = __riscv_vfmacc_vv_f32m2(vs12, __riscv_vle32_v_f32m2(wptr + wstep*12, vl), v, vl);
+            vs13 = __riscv_vfmacc_vv_f32m2(vs13, __riscv_vle32_v_f32m2(wptr + wstep*13, vl), v, vl);
+            vs14 = __riscv_vfmacc_vv_f32m2(vs14, __riscv_vle32_v_f32m2(wptr + wstep*14, vl), v, vl);
        }

        // Calculate the sum of each vector
        float sum[15];
-        vfloat32m1_t zero = vfmv_v_f_f32m1(0, vlm2);
-        sum[0] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs0, zero, vlm2));
-        sum[1] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs1, zero, vlm2));
-        sum[2] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs2, zero, vlm2));
-        sum[3] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs3, zero, vlm2));
-        sum[4] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs4, zero, vlm2));
-        sum[5] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs5, zero, vlm2));
-        sum[6] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs6, zero, vlm2));
-        sum[7] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs7, zero, vlm2));
-        sum[8] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs8, zero, vlm2));
-        sum[9] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs9, zero, vlm2));
-        sum[10] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs10, zero, vlm2));
-        sum[11] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs11, zero, vlm2));
-        sum[12] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs12, zero, vlm2));
-        sum[13] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs13, zero, vlm2));
-        sum[14] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs14, zero, vlm2));
-
-        vfloat32m4_t s0 = vfadd_vv_f32m4(vle32_v_f32m4(sum, 15), vle32_v_f32m4(bias + i, 15), 15);
-        vse32_v_f32m4(dst + i, s0, 15);
+        vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0, vlm2);
+        sum[0] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs0, zero, vlm2));
+        sum[1] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs1, zero, vlm2));
+        sum[2] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs2, zero, vlm2));
+        sum[3] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs3, zero, vlm2));
+        sum[4] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs4, zero, vlm2));
+        sum[5] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs5, zero, vlm2));
+        sum[6] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs6, zero, vlm2));
+        sum[7] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs7, zero, vlm2));
+        sum[8] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs8, zero, vlm2));
+        sum[9] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs9, zero, vlm2));
+        sum[10] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs10, zero, vlm2));
+        sum[11] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs11, zero, vlm2));
+        sum[12] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs12, zero, vlm2));
+        sum[13] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs13, zero, vlm2));
+        sum[14] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs14, zero, vlm2));
+
+        vfloat32m4_t s0 = __riscv_vfadd_vv_f32m4(__riscv_vle32_v_f32m4(sum, 15), __riscv_vle32_v_f32m4(bias + i, 15), 15);
+        __riscv_vse32_v_f32m4(dst + i, s0, 15);
    }
    int unroll_tail = nvecs - i;
    if (unroll_tail > 0)
    {
        const float* wptr = weights + i*wstep;
        vfloat32m2_t
-               vs0 = vfmv_v_f_f32m2(0, vlm2), vs1 = vfmv_v_f_f32m2(0, vlm2), vs2 = vfmv_v_f_f32m2(0, vlm2),
-               vs3 = vfmv_v_f_f32m2(0, vlm2), vs4 = vfmv_v_f_f32m2(0, vlm2), vs5 = vfmv_v_f_f32m2(0, vlm2),
-               vs6 = vfmv_v_f_f32m2(0, vlm2), vs7 = vfmv_v_f_f32m2(0, vlm2), vs8 = vfmv_v_f_f32m2(0, vlm2),
-               vs9 = vfmv_v_f_f32m2(0, vlm2), vs10 = vfmv_v_f_f32m2(0, vlm2), vs11 = vfmv_v_f_f32m2(0, vlm2),
-               vs12 = vfmv_v_f_f32m2(0, vlm2), vs13 = vfmv_v_f_f32m2(0, vlm2);
+               vs0 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs1 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs2 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs3 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs4 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs5 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs6 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs7 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs8 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs9 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs10 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs11 = __riscv_vfmv_v_f_f32m2(0, vlm2),
+               vs12 = __riscv_vfmv_v_f_f32m2(0, vlm2), vs13 = __riscv_vfmv_v_f_f32m2(0, vlm2);
        int avl = vecsize, vl;
        for(int k = 0; k < vecsize; k += vl, wptr += vl, avl -= vl)
        {
-            vl = vsetvl_e32m2(avl);
-            vfloat32m2_t v = vle32_v_f32m2(vec + k, vl);
-            vs0 = vfmacc_vv_f32m2(vs0, vle32_v_f32m2(wptr, vl), v, vl);
-            vs1 = vfmacc_vv_f32m2(vs1, vle32_v_f32m2(wptr + wstep*std::min(1, unroll_tail-1), vl), v, vl);
-            vs2 = vfmacc_vv_f32m2(vs2, vle32_v_f32m2(wptr + wstep*std::min(2, unroll_tail-1), vl), v, vl);
-            vs3 = vfmacc_vv_f32m2(vs3, vle32_v_f32m2(wptr + wstep*std::min(3, unroll_tail-1), vl), v, vl);
-            vs4 = vfmacc_vv_f32m2(vs4, vle32_v_f32m2(wptr + wstep*std::min(4, unroll_tail-1), vl), v, vl);
-            vs5 = vfmacc_vv_f32m2(vs5, vle32_v_f32m2(wptr + wstep*std::min(5, unroll_tail-1), vl), v, vl);
-            vs6 = vfmacc_vv_f32m2(vs6, vle32_v_f32m2(wptr + wstep*std::min(6, unroll_tail-1), vl), v, vl);
-            vs7 = vfmacc_vv_f32m2(vs7, vle32_v_f32m2(wptr + wstep*std::min(7, unroll_tail-1), vl), v, vl);
-            vs8 = vfmacc_vv_f32m2(vs8, vle32_v_f32m2(wptr + wstep*std::min(8, unroll_tail-1), vl), v, vl);
-            vs9 = vfmacc_vv_f32m2(vs9, vle32_v_f32m2(wptr + wstep*std::min(9, unroll_tail-1), vl), v, vl);
-            vs10 = vfmacc_vv_f32m2(vs10, vle32_v_f32m2(wptr + wstep*std::min(10, unroll_tail-1), vl), v, vl);
-            vs11 = vfmacc_vv_f32m2(vs11, vle32_v_f32m2(wptr + wstep*std::min(11, unroll_tail-1), vl), v, vl);
-            vs12 = vfmacc_vv_f32m2(vs12, vle32_v_f32m2(wptr + wstep*std::min(12, unroll_tail-1), vl), v, vl);
-            vs13 = vfmacc_vv_f32m2(vs13, vle32_v_f32m2(wptr + wstep*std::min(13, unroll_tail-1), vl), v, vl);
+            vl = __riscv_vsetvl_e32m2(avl);
+            vfloat32m2_t v = __riscv_vle32_v_f32m2(vec + k, vl);
+            vs0 = __riscv_vfmacc_vv_f32m2(vs0, __riscv_vle32_v_f32m2(wptr, vl), v, vl);
+            vs1 = __riscv_vfmacc_vv_f32m2(vs1, __riscv_vle32_v_f32m2(wptr + wstep*std::min(1, unroll_tail-1), vl), v, vl);
+            vs2 = __riscv_vfmacc_vv_f32m2(vs2, __riscv_vle32_v_f32m2(wptr + wstep*std::min(2, unroll_tail-1), vl), v, vl);
+            vs3 = __riscv_vfmacc_vv_f32m2(vs3, __riscv_vle32_v_f32m2(wptr + wstep*std::min(3, unroll_tail-1), vl), v, vl);
+            vs4 = __riscv_vfmacc_vv_f32m2(vs4, __riscv_vle32_v_f32m2(wptr + wstep*std::min(4, unroll_tail-1), vl), v, vl);
+            vs5 = __riscv_vfmacc_vv_f32m2(vs5, __riscv_vle32_v_f32m2(wptr + wstep*std::min(5, unroll_tail-1), vl), v, vl);
+            vs6 = __riscv_vfmacc_vv_f32m2(vs6, __riscv_vle32_v_f32m2(wptr + wstep*std::min(6, unroll_tail-1), vl), v, vl);
+            vs7 = __riscv_vfmacc_vv_f32m2(vs7, __riscv_vle32_v_f32m2(wptr + wstep*std::min(7, unroll_tail-1), vl), v, vl);
+            vs8 = __riscv_vfmacc_vv_f32m2(vs8, __riscv_vle32_v_f32m2(wptr + wstep*std::min(8, unroll_tail-1), vl), v, vl);
+            vs9 = __riscv_vfmacc_vv_f32m2(vs9, __riscv_vle32_v_f32m2(wptr + wstep*std::min(9, unroll_tail-1), vl), v, vl);
+            vs10 = __riscv_vfmacc_vv_f32m2(vs10, __riscv_vle32_v_f32m2(wptr + wstep*std::min(10, unroll_tail-1), vl), v, vl);
+            vs11 = __riscv_vfmacc_vv_f32m2(vs11, __riscv_vle32_v_f32m2(wptr + wstep*std::min(11, unroll_tail-1), vl), v, vl);
+            vs12 = __riscv_vfmacc_vv_f32m2(vs12, __riscv_vle32_v_f32m2(wptr + wstep*std::min(12, unroll_tail-1), vl), v, vl);
+            vs13 = __riscv_vfmacc_vv_f32m2(vs13, __riscv_vle32_v_f32m2(wptr + wstep*std::min(13, unroll_tail-1), vl), v, vl);
        }

        // Calculate the sum of each vector
        float sum[14];
-        vfloat32m1_t zero = vfmv_v_f_f32m1(0, vlm2);
-        sum[0] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs0, zero, vlm2));
-        sum[1] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs1, zero, vlm2));
-        sum[2] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs2, zero, vlm2));
-        sum[3] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs3, zero, vlm2));
-        sum[4] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs4, zero, vlm2));
-        sum[5] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs5, zero, vlm2));
-        sum[6] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs6, zero, vlm2));
-        sum[7] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs7, zero, vlm2));
-        sum[8] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs8, zero, vlm2));
-        sum[9] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs9, zero, vlm2));
-        sum[10] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs10, zero, vlm2));
-        sum[11] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs11, zero, vlm2));
-        sum[12] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs12, zero, vlm2));
-        sum[13] = vfmv_f_s_f32m1_f32(vfredosum_vs_f32m2_f32m1(zero, vs13, zero, vlm2));
-
-        vfloat32m4_t s0 = vfadd_vv_f32m4(vle32_v_f32m4(sum, unroll_tail), vle32_v_f32m4(bias + i, unroll_tail), unroll_tail);
-        vse32_v_f32m4(dst + i, s0, unroll_tail);
+        vfloat32m1_t zero = __riscv_vfmv_v_f_f32m1(0, vlm2);
+        sum[0] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs0, zero, vlm2));
+        sum[1] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs1, zero, vlm2));
+        sum[2] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs2, zero, vlm2));
+        sum[3] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs3, zero, vlm2));
+        sum[4] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs4, zero, vlm2));
+        sum[5] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs5, zero, vlm2));
+        sum[6] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs6, zero, vlm2));
+        sum[7] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs7, zero, vlm2));
+        sum[8] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs8, zero, vlm2));
+        sum[9] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs9, zero, vlm2));
+        sum[10] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs10, zero, vlm2));
+        sum[11] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs11, zero, vlm2));
+        sum[12] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs12, zero, vlm2));
+        sum[13] = __riscv_vfmv_f_s_f32m1_f32(__riscv_vfredusum_vs_f32m2_f32m1(vs13, zero, vlm2));
+
+        vfloat32m4_t s0 = __riscv_vfadd_vv_f32m4(__riscv_vle32_v_f32m4(sum, unroll_tail), __riscv_vle32_v_f32m4(bias + i, unroll_tail), unroll_tail);
+        __riscv_vse32_v_f32m4(dst + i, s0, unroll_tail);
    }
 }

--- a/modules/dnn/test/test_onnx_conformance.cpp
+++ b/modules/dnn/test/test_onnx_conformance.cpp
@ -250,7 +250,10 @@ static const TestCase testConformanceConfig[] = {
    {"test_einsum_transpose", 1, 1},
    {"test_elu", 1, 1},
    {"test_elu_default", 1, 1},
+    {"test_elu_default_expanded_ver18", 1, 1},
    {"test_elu_example", 1, 1},
+    {"test_elu_example_expanded_ver18", 1, 1},
+    {"test_elu_expanded_ver18", 1, 1},
    {"test_equal", 2, 1},
    {"test_equal_bcast", 2, 1},
    {"test_erf", 1, 1},
@ -454,6 +457,8 @@ static const TestCase testConformanceConfig[] = {
    {"test_min_uint32", 2, 1},
    {"test_min_uint64", 2, 1},
    {"test_min_uint8", 2, 1},
+    {"test_mish", 1, 1},
+    {"test_mish_expanded", 1, 1},
    {"test_mod_broadcast", 2, 1},
    {"test_mod_int64_fmod", 2, 1},
    {"test_mod_mixed_sign_float16", 2, 1},
@ -775,7 +780,10 @@ static const TestCase testConformanceConfig[] = {
    {"test_sce_sum_log_prob_expanded", 2, 2},
    {"test_selu", 1, 1},
    {"test_selu_default", 1, 1},
+    {"test_selu_default_expanded_ver18", 1, 1},
    {"test_selu_example", 1, 1},
+    {"test_selu_example_expanded_ver18", 1, 1},
+    {"test_selu_expanded_ver18", 1, 1},
    {"test_sequence_insert_at_back", 2, 1},
    {"test_sequence_insert_at_front", 3, 1},
    {"test_shape", 1, 1},
--- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
@ -560,8 +560,14 @@ CASE(test_elu)
    // no filter
 CASE(test_elu_default)
    // no filter
+CASE(test_elu_default_expanded_ver18)
+    // no filter
 CASE(test_elu_example)
    // no filter
+CASE(test_elu_example_expanded_ver18)
+    // no filter
+CASE(test_elu_expanded_ver18)
+    // no filter
 CASE(test_equal)
    // no filter
 CASE(test_equal_bcast)
@ -1032,6 +1038,10 @@ CASE(test_min_uint64)
    // no filter
 CASE(test_min_uint8)
    // no filter
+CASE(test_mish)
+    // no filter
+CASE(test_mish_expanded)
+    // no filter
 CASE(test_mod_broadcast)
    // no filter
 CASE(test_mod_int64_fmod)
@ -1783,8 +1793,14 @@ CASE(test_selu)
    // no filter
 CASE(test_selu_default)
    // no filter
+CASE(test_selu_default_expanded_ver18)
+    // no filter
 CASE(test_selu_example)
    // no filter
+CASE(test_selu_example_expanded_ver18)
+    // no filter
+CASE(test_selu_expanded_ver18)
+    // no filter
 CASE(test_sequence_insert_at_back)
    // no filter
 CASE(test_sequence_insert_at_front)
--- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
+++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
@ -94,6 +94,9 @@
 "test_dynamicquantizelinear_min_adjusted_expanded",  // ---- same as above ---
 "test_edge_pad", // Issue::Parser::Weights are required as inputs
 "test_einsum_inner_prod", // Issue::Output shape does not match with reference
+"test_elu_default_expanded_ver18",
+"test_elu_example_expanded_ver18",
+"test_elu_expanded_ver18",
 "test_expand_dim_changed", // Issue:: Unkonwn error
 "test_expand_dim_unchanged", // Issue:: Unkonwn error
 "test_eyelike_populate_off_main_diagonal", // Issues::Layer::Can't create layer::Can't create layer "onnx_node_output_0!y" of type "EyeLike" in function 'getLayerInstance'
@ -357,6 +360,9 @@
 "test_sce_sum_expanded", // ---- same as above ---
 "test_sce_sum_log_prob", // ---- same as above ---
 "test_sce_sum_log_prob_expanded", // ---- same as above ---
+"test_selu_default_expanded_ver18",
+"test_selu_example_expanded_ver18",
+"test_selu_expanded_ver18",
 "test_sequence_insert_at_back", // Issue:: Parser:  typeProto.has_tensor_type() in function 'populateNet'
 "test_sequence_insert_at_front", // ---- same as above ---
 "test_shape", // Issue:: Parser: DNN/ONNX: can't find layer for output name: 'y'. Does model imported properly?
--- a/modules/highgui/CMakeLists.txt
+++ b/modules/highgui/CMakeLists.txt
@ -228,9 +228,6 @@ if(TARGET ocv.3rdparty.gtk3 OR TARGET ocv.3rdparty.gtk2)
    endif()
    list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_gtk.cpp)
    list(APPEND tgts ${__gtk_dependency})
-    if(TARGET ocv.3rdparty.gthread)
-      list(APPEND tgts ocv.3rdparty.gthread)
-    endif()
    if(TARGET ocv.3rdparty.gtkglext
        AND __gtk_dependency STREQUAL "ocv.3rdparty.gtk2"
        AND NOT OPENCV_GTK_DISABLE_GTKGLEXT
@ -245,27 +242,18 @@ if(TARGET ocv.3rdparty.gtk3 OR TARGET ocv.3rdparty.gtk2)
    endif()
  elseif("gtk" IN_LIST HIGHGUI_PLUGIN_LIST)
    ocv_create_builtin_highgui_plugin(opencv_highgui_gtk ${__gtk_dependency} "window_gtk.cpp")
-    if(TARGET ocv.3rdparty.gthread)
-      ocv_target_link_libraries(opencv_highgui_gtk ocv.3rdparty.gthread)
-    endif()
    if(TARGET ocv.3rdparty.gtkglext)
      ocv_target_link_libraries(opencv_highgui_gtk ocv.3rdparty.gtkglext)
    endif()
  else()
    if(TARGET ocv.3rdparty.gtk3 AND ("gtk3" IN_LIST HIGHGUI_PLUGIN_LIST OR HIGHGUI_PLUGIN_LIST STREQUAL "all"))
      ocv_create_builtin_highgui_plugin(opencv_highgui_gtk3 ocv.3rdparty.gtk3 "window_gtk.cpp")
-      if(TARGET ocv.3rdparty.gthread)
-        ocv_target_link_libraries(opencv_highgui_gtk3 ocv.3rdparty.gthread)
-      endif()
      if(TARGET ocv.3rdparty.gtkglext)
        ocv_target_link_libraries(opencv_highgui_gtk3 ocv.3rdparty.gtkglext)
      endif()
    endif()
    if(TARGET ocv.3rdparty.gtk2 AND ("gtk2" IN_LIST HIGHGUI_PLUGIN_LIST OR HIGHGUI_PLUGIN_LIST STREQUAL "all"))
      ocv_create_builtin_highgui_plugin(opencv_highgui_gtk2 ocv.3rdparty.gtk2 "window_gtk.cpp")
-      if(TARGET ocv.3rdparty.gthread)
-        ocv_target_link_libraries(opencv_highgui_gtk2 ocv.3rdparty.gthread)
-      endif()
      if(TARGET ocv.3rdparty.gtkglext)
        ocv_target_link_libraries(opencv_highgui_gtk2 ocv.3rdparty.gtkglext)
      endif()
--- a/modules/highgui/cmake/detect_gtk.cmake
+++ b/modules/highgui/cmake/detect_gtk.cmake
@ -1,5 +1,5 @@
 # --- GTK ---
-ocv_clear_vars(HAVE_GTK HAVE_GTK2 HAVE_GTK3 HAVE_GTHREAD HAVE_GTKGLEXT)
+ocv_clear_vars(HAVE_GTK HAVE_GTK2 HAVE_GTK3 HAVE_GTKGLEXT)
 if(WITH_GTK)
  if(NOT WITH_GTK_2_X)
    ocv_check_modules(GTK3 gtk+-3.0)
@ -20,12 +20,6 @@ if(WITH_GTK)
      endif()
    endif()
  endif()
-  ocv_check_modules(GTHREAD gthread-2.0>=2.32)
-  if(HAVE_GTK AND NOT HAVE_GTHREAD)
-    message(FATAL_ERROR "gthread not found. This library is required when building with GTK support")
-  else()
-    ocv_add_external_target(gthread "${GTHREAD_INCLUDE_DIRS}" "${GTHREAD_LIBRARIES}" "HAVE_GTHREAD")
-  endif()
  if((WITH_OPENGL OR HAVE_OPENGL) AND (HAVE_GTK2 OR HAVE_GTK3))
    if(HAVE_GTK2)
      ocv_check_modules(GTKGLEXT gtkglext-1.0)
--- a/modules/highgui/misc/plugins/plugin_gtk/CMakeLists.txt
+++ b/modules/highgui/misc/plugins/plugin_gtk/CMakeLists.txt
@ -50,9 +50,3 @@ elseif(DEFINED GTK_VERSION)
 else()
  message(STATUS "GTK+: YES")
 endif()
-
-if(HAVE_GTHREAD)
-  message(STATUS "GThread : YES (ver ${GTHREAD_VERSION})")
-else()
-  message(STATUS "GThread : NO")
-endif()
--- a/modules/highgui/src/window_gtk.cpp
+++ b/modules/highgui/src/window_gtk.cpp
@ -613,13 +613,11 @@ static gboolean icvOnKeyPress( GtkWidget* widget, GdkEventKey* event, gpointer u
 static void icvOnTrackbar( GtkWidget* widget, gpointer user_data );
 static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_data );

-#ifdef HAVE_GTHREAD
 int thread_started=0;
 static gpointer icvWindowThreadLoop(gpointer data);
 GMutex*				   last_key_mutex = NULL;
 GCond*				   cond_have_key = NULL;
 GThread*			   window_thread = NULL;
-#endif

 static int             last_key = -1;

@ -666,7 +664,6 @@ static int gtk_InitSystem( int argc, char** argv )

 int cv::startWindowThread(){
    CV_TRACE_FUNCTION();
-#ifdef HAVE_GTHREAD
    gtk_InitSystem(0,NULL);
    if (!thread_started)
    {
@ -682,12 +679,8 @@ int cv::startWindowThread(){
    }
    thread_started = window_thread!=NULL;
    return thread_started;
-#else
-    return 0;
-#endif
 }

-#ifdef HAVE_GTHREAD
 gpointer icvWindowThreadLoop(gpointer /*data*/)
 {
    while(1){
@ -704,8 +697,6 @@ gpointer icvWindowThreadLoop(gpointer /*data*/)
    return NULL;
 }

-#endif
-
 #define CV_LOCK_MUTEX() cv::AutoLock lock(getWindowMutex())

 static
@ -1276,7 +1267,6 @@ static void checkLastWindow()
    // if last window...
    if (getGTKWindows().empty())
    {
-#ifdef HAVE_GTHREAD
        if( thread_started )
        {
            // send key press signal to jump out of any waiting waitKeyImpl's
@ -1284,7 +1274,6 @@ static void checkLastWindow()
        }
        else
        {
-#endif
            // Some GTK+ modules (like the Unity module) use GDBusConnection,
            // which has a habit of postponing cleanup by performing it via
            // idle sources added to the main loop. Since this was the last window,
@ -1295,9 +1284,7 @@ static void checkLastWindow()
            // thread will process events continuously.
            while( gtk_events_pending() )
                gtk_main_iteration();
-#ifdef HAVE_GTHREAD
        }
-#endif
    }
 }

@ -1806,7 +1793,6 @@ static gboolean icvOnKeyPress(GtkWidget* widget, GdkEventKey* event, gpointer us

    code |= event->state << 16;

-#ifdef HAVE_GTHREAD
    if(thread_started)
    {
        g_mutex_lock(last_key_mutex);
@ -1816,7 +1802,6 @@ static gboolean icvOnKeyPress(GtkWidget* widget, GdkEventKey* event, gpointer us
        g_mutex_unlock(last_key_mutex);
    }
    else
-#endif
    {
        last_key = code;
    }
@ -2007,7 +1992,6 @@ static gboolean icvAlarm( gpointer user_data )

 int waitKeyImpl( int delay )
 {
-#ifdef HAVE_GTHREAD
    if (thread_started && g_thread_self() != window_thread)
    {
        gboolean expired = true;
@ -2041,7 +2025,6 @@ int waitKeyImpl( int delay )
        return my_last_key;
    }
    else
-#endif
    {
        int expired = 0;
        guint timer = 0;
--- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp
+++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp
@ -307,9 +307,11 @@ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector<Mat>& m

 /** @brief Returns the number of images inside the given file

-The function imcount will return the number of pages in a multi-page image, or 1 for single-page images
+The function imcount returns the number of pages in a multi-page image (e.g. TIFF), the number of frames in an animation (e.g. AVIF), and 1 otherwise.
+If the image cannot be decoded, 0 is returned.
@param filename Name of file to be loaded.
@param flags Flag that can take values of cv::ImreadModes, default with cv::IMREAD_ANYCOLOR.
+@todo when cv::IMREAD_LOAD_GDAL flag used the return value will be 0 or 1 because OpenCV's GDAL decoder doesn't support multi-page reading yet.
 */
 CV_EXPORTS_W size_t imcount(const String& filename, int flags = IMREAD_ANYCOLOR);

--- a/modules/imgcodecs/src/grfmt_avif.cpp
+++ b/modules/imgcodecs/src/grfmt_avif.cpp
@ -195,6 +195,7 @@ bool AvifDecoder::readHeader() {

  m_width = decoder_->image->width;
  m_height = decoder_->image->height;
+  m_frame_count = decoder_->imageCount;
  channels_ = (decoder_->image->yuvFormat == AVIF_PIXEL_FORMAT_YUV400) ? 1 : 3;
  if (decoder_->alphaPresent) ++channels_;
  bit_depth_ = decoder_->image->depth;
--- a/modules/imgcodecs/src/grfmt_base.cpp
+++ b/modules/imgcodecs/src/grfmt_base.cpp
@ -54,6 +54,7 @@ BaseImageDecoder::BaseImageDecoder()
    m_buf_supported = false;
    m_scale_denom = 1;
    m_use_rgb = false;
+    m_frame_count = 1;
 }


--- a/modules/imgcodecs/src/grfmt_base.hpp
+++ b/modules/imgcodecs/src/grfmt_base.hpp
@ -64,6 +64,7 @@ public:

    int width() const { return m_width; }
    int height() const { return m_height; }
+    size_t getFrameCount() const { return m_frame_count; }
    virtual int type() const { return m_type; }

    ExifEntry_t getExifTag(const ExifTagName tag) const;
@ -93,6 +94,7 @@ protected:
    bool m_buf_supported;
    bool m_use_rgb;       // flag of decode image as RGB order instead of BGR.
    ExifReader m_exif;
+    size_t  m_frame_count;
 };


--- a/modules/imgcodecs/src/grfmt_tiff.cpp
+++ b/modules/imgcodecs/src/grfmt_tiff.cpp
@ -279,6 +279,7 @@ bool TiffDecoder::readHeader()

            m_width = wdth;
            m_height = hght;
+            m_frame_count = TIFFNumberOfDirectories(tif);
            if (ncn == 3 && photometric == PHOTOMETRIC_LOGLUV)
            {
                m_type = CV_32FC3;
--- a/modules/imgcodecs/src/loadsave.cpp
+++ b/modules/imgcodecs/src/loadsave.cpp
@ -1226,26 +1226,8 @@ void ImageCollection::Impl::init(String const& filename, int flags) {
    m_decoder->setSource(filename);
    CV_Assert(m_decoder->readHeader());

-    // count the pages of the image collection
-    size_t count = 1;
-    while(m_decoder->nextPage()) count++;
-
-    m_size = count;
+    m_size = m_decoder->getFrameCount();
    m_pages.resize(m_size);
-    // Reinitialize the decoder because we advanced to the last page while counting the pages of the image
-#ifdef HAVE_GDAL
-    if (m_flags != IMREAD_UNCHANGED && (m_flags & IMREAD_LOAD_GDAL) == IMREAD_LOAD_GDAL) {
-        m_decoder = GdalDecoder().newDecoder();
-    }
-    else {
-#endif
-    m_decoder = findDecoder(m_filename);
-#ifdef HAVE_GDAL
-    }
-#endif
-
-    m_decoder->setSource(m_filename);
-    m_decoder->readHeader();
 }

 size_t ImageCollection::Impl::size() const { return m_size; }
--- a/modules/imgcodecs/src/rgbe.cpp
+++ b/modules/imgcodecs/src/rgbe.cpp
@ -191,7 +191,7 @@ int RGBE_ReadHeader(FILE *fp, int *width, int *height, rgbe_header_info *info)
    info->valid |= RGBE_VALID_PROGRAMTYPE;
    for(i=0;i<static_cast<int>(sizeof(info->programtype)-1);i++) {
      if ((buf[i+2] == 0) || isspace(buf[i+2]))
-  break;
+        break;
      info->programtype[i] = buf[i+2];
    }
    info->programtype[i] = 0;
@ -285,34 +285,37 @@ static int RGBE_WriteBytes_RLE(FILE *fp, unsigned char *data, int numbytes)
      run_count = 1;
      while( (beg_run + run_count < numbytes) && (run_count < 127)
             && (data[beg_run] == data[beg_run + run_count]))
-  run_count++;
+        run_count++;
    }
    /* if data before next big run is a short run then write it as such */
    if ((old_run_count > 1)&&(old_run_count == beg_run - cur)) {
      buf[0] = static_cast<unsigned char>(128 + old_run_count);   /*write short run*/
      buf[1] = data[cur];
      if (fwrite(buf,sizeof(buf[0])*2,1,fp) < 1)
-  return rgbe_error(rgbe_write_error,NULL);
+        return rgbe_error(rgbe_write_error,NULL);
      cur = beg_run;
    }
    /* write out bytes until we reach the start of the next run */
    while(cur < beg_run) {
      nonrun_count = beg_run - cur;
      if (nonrun_count > 128)
-  nonrun_count = 128;
+        nonrun_count = 128;
      buf[0] = static_cast<unsigned char>(nonrun_count);
+
      if (fwrite(buf,sizeof(buf[0]),1,fp) < 1)
-  return rgbe_error(rgbe_write_error,NULL);
+        return rgbe_error(rgbe_write_error,NULL);
+
      if (fwrite(&data[cur],sizeof(data[0])*nonrun_count,1,fp) < 1)
-  return rgbe_error(rgbe_write_error,NULL);
+        return rgbe_error(rgbe_write_error,NULL);
      cur += nonrun_count;
    }
    /* write out next run if one was found */
    if (run_count >= MINRUNLENGTH) {
      buf[0] = static_cast<unsigned char>(128 + run_count);
      buf[1] = data[beg_run];
+
      if (fwrite(buf,sizeof(buf[0])*2,1,fp) < 1)
-  return rgbe_error(rgbe_write_error,NULL);
+        return rgbe_error(rgbe_write_error,NULL);
      cur += run_count;
    }
  }
@ -330,19 +333,23 @@ int RGBE_WritePixels_RLE(FILE *fp, float *data, int scanline_width,
  if ((scanline_width < 8)||(scanline_width > 0x7fff))
    /* run length encoding is not allowed so write flat*/
    return RGBE_WritePixels(fp,data,scanline_width*num_scanlines);
+
  buffer = (unsigned char *)malloc(sizeof(unsigned char)*4*scanline_width);
  if (buffer == NULL)
    /* no buffer space so write flat */
    return RGBE_WritePixels(fp,data,scanline_width*num_scanlines);
+
  while(num_scanlines-- > 0) {
    rgbe[0] = 2;
    rgbe[1] = 2;
    rgbe[2] = static_cast<unsigned char>(scanline_width >> 8);
    rgbe[3] = scanline_width & 0xFF;
+
    if (fwrite(rgbe, sizeof(rgbe), 1, fp) < 1) {
      free(buffer);
      return rgbe_error(rgbe_write_error,NULL);
    }
+
    for(i=0;i<scanline_width;i++) {
      float2rgbe(rgbe,data[RGBE_DATA_RED],
     data[RGBE_DATA_GREEN],data[RGBE_DATA_BLUE]);
@ -352,13 +359,16 @@ int RGBE_WritePixels_RLE(FILE *fp, float *data, int scanline_width,
      buffer[i+3*scanline_width] = rgbe[3];
      data += RGBE_DATA_SIZE;
    }
+
    /* write out each of the four channels separately run length encoded */
    /* first red, then green, then blue, then exponent */
-    for(i=0;i<4;i++) {
+    for(i=0;i<4;i++)
+    {
      if ((err = RGBE_WriteBytes_RLE(fp,&buffer[i*scanline_width],
-             scanline_width)) != RGBE_RETURN_SUCCESS) {
-  free(buffer);
-  return err;
+             scanline_width)) != RGBE_RETURN_SUCCESS)
+      {
+        free(buffer);
+        return err;
      }
    }
  }
@ -376,6 +386,7 @@ int RGBE_ReadPixels_RLE(FILE *fp, float *data, int scanline_width,
  if ((scanline_width < 8)||(scanline_width > 0x7fff))
    /* run length encoding is not allowed so read flat*/
    return RGBE_ReadPixels(fp,data,scanline_width*num_scanlines);
+
  scanline_buffer = NULL;
  /* read in each successive scanline */
  while(num_scanlines > 0) {
@ -383,6 +394,7 @@ int RGBE_ReadPixels_RLE(FILE *fp, float *data, int scanline_width,
      free(scanline_buffer);
      return rgbe_error(rgbe_read_error,NULL);
    }
+
    if ((rgbe[0] != 2)||(rgbe[1] != 2)||(rgbe[2] & 0x80)) {
      /* this file is not run length encoded */
      rgbe2float(&data[RGBE_DATA_RED],&data[RGBE_DATA_GREEN],&data[RGBE_DATA_BLUE],rgbe);
@ -390,13 +402,15 @@ int RGBE_ReadPixels_RLE(FILE *fp, float *data, int scanline_width,
      free(scanline_buffer);
      return RGBE_ReadPixels(fp,data,scanline_width*num_scanlines-1);
    }
+
    if ((((int)rgbe[2])<<8 | rgbe[3]) != scanline_width) {
      free(scanline_buffer);
      return rgbe_error(rgbe_format_error,"wrong scanline width");
    }
+
    if (scanline_buffer == NULL)
-      scanline_buffer = (unsigned char *)
-  malloc(sizeof(unsigned char)*4*scanline_width);
+      scanline_buffer = (unsigned char *)malloc(sizeof(unsigned char)*4*scanline_width);
+
    if (scanline_buffer == NULL)
      return rgbe_error(rgbe_memory_error,"unable to allocate buffer space");

@ -404,47 +418,56 @@ int RGBE_ReadPixels_RLE(FILE *fp, float *data, int scanline_width,
    /* read each of the four channels for the scanline into the buffer */
    for(i=0;i<4;i++) {
      ptr_end = &scanline_buffer[(i+1)*scanline_width];
-      while(ptr < ptr_end) {
-  if (fread(buf,sizeof(buf[0])*2,1,fp) < 1) {
-    free(scanline_buffer);
-    return rgbe_error(rgbe_read_error,NULL);
-  }
-  if (buf[0] > 128) {
-    /* a run of the same value */
-    count = buf[0]-128;
-    if ((count == 0)||(count > ptr_end - ptr)) {
-      free(scanline_buffer);
-      return rgbe_error(rgbe_format_error,"bad scanline data");
-    }
-    while(count-- > 0)
-      *ptr++ = buf[1];
-  }
-  else {
-    /* a non-run */
-    count = buf[0];
-    if ((count == 0)||(count > ptr_end - ptr)) {
-      free(scanline_buffer);
-      return rgbe_error(rgbe_format_error,"bad scanline data");
-    }
-    *ptr++ = buf[1];
-    if (--count > 0) {
-      if (fread(ptr,sizeof(*ptr)*count,1,fp) < 1) {
-        free(scanline_buffer);
-        return rgbe_error(rgbe_read_error,NULL);
-      }
-      ptr += count;
-    }
-  }
+
+      while(ptr < ptr_end)
+      {
+        if (fread(buf,sizeof(buf[0])*2,1,fp) < 1)
+        {
+          free(scanline_buffer);
+          return rgbe_error(rgbe_read_error,NULL);
+        }
+
+        if (buf[0] > 128)
+        {
+          /* a run of the same value */
+          count = buf[0]-128;
+          if ((count == 0)||(count > ptr_end - ptr)) {
+            free(scanline_buffer);
+            return rgbe_error(rgbe_format_error,"bad scanline data");
+          }
+          while(count-- > 0)
+            *ptr++ = buf[1];
+        }
+        else
+        {
+          /* a non-run */
+          count = buf[0];
+          if ((count == 0)||(count > ptr_end - ptr))
+          {
+            free(scanline_buffer);
+            return rgbe_error(rgbe_format_error,"bad scanline data");
+          }
+
+          *ptr++ = buf[1];
+          if (--count > 0) {
+            if (fread(ptr,sizeof(*ptr)*count,1,fp) < 1)
+            {
+              free(scanline_buffer);
+              return rgbe_error(rgbe_read_error,NULL);
+            }
+            ptr += count;
+          }
+        }
      }
    }
+
    /* now convert data from buffer into floats */
    for(i=0;i<scanline_width;i++) {
      rgbe[0] = scanline_buffer[i];
      rgbe[1] = scanline_buffer[i+scanline_width];
      rgbe[2] = scanline_buffer[i+2*scanline_width];
      rgbe[3] = scanline_buffer[i+3*scanline_width];
-      rgbe2float(&data[RGBE_DATA_RED],&data[RGBE_DATA_GREEN],
-     &data[RGBE_DATA_BLUE],rgbe);
+      rgbe2float(&data[RGBE_DATA_RED],&data[RGBE_DATA_GREEN],&data[RGBE_DATA_BLUE],rgbe);
      data += RGBE_DATA_SIZE;
    }
    num_scanlines--;
--- a/modules/imgcodecs/test/test_avif.cpp
+++ b/modules/imgcodecs/test/test_avif.cpp
@ -296,6 +296,7 @@ TEST_P(Imgcodecs_Avif_Animation_WriteReadSuite, encode_decode) {
    return;
  }
  EXPECT_NO_THROW(cv::imwritemulti(output, anim_original, encoding_params_));
+  EXPECT_EQ(anim_original.size(), imcount(output));

  // Read from file.
  std::vector<cv::Mat> anim;
--- a/modules/imgproc/src/imgwarp.cpp
+++ b/modules/imgproc/src/imgwarp.cpp
@ -2402,6 +2402,9 @@ static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArra
    _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
    UMat dst = _dst.getUMat();

+    if (src.u == dst.u)
+        src = src.clone();
+
    float M[9] = {0};
    int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
    Mat matM(matRows, 3, CV_32F, M), M1 = _M0.getMat();
@ -2506,6 +2509,9 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
    _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
    UMat dst = _dst.getUMat();

+    if (src.u == dst.u)
+        src = src.clone();
+
    double M[9] = {0};
    int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
    Mat matM(matRows, 3, CV_64F, M), M1 = _M0.getMat();
--- a/modules/imgproc/test/ocl/test_warp.cpp
+++ b/modules/imgproc/test/ocl/test_warp.cpp
@ -185,6 +185,26 @@ OCL_TEST_P(WarpAffine, Mat)
    }
 }

+OCL_TEST_P(WarpAffine, inplace_25853) // when src and dst are the same variable, ocl on/off should produce consistent and correct results
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        double eps = depth < CV_32F ? 0.04 : 0.06;
+        random_roi();
+
+        Mat M = getRotationMatrix2D(Point2f(src_roi.cols / 2.0f, src_roi.rows / 2.0f),
+            rng.uniform(-180.f, 180.f), rng.uniform(0.4f, 2.0f));
+
+        OCL_OFF(cv::warpAffine(src_roi, src_roi, M, dsize, interpolation));
+        OCL_ON(cv::warpAffine(usrc_roi, usrc_roi, M, dsize, interpolation));
+
+        dst_roi = src_roi.clone();
+        udst_roi = usrc_roi.clone();
+
+        Near(eps);
+    }
+}
+
 typedef WarpTest_cols4_Base WarpAffine_cols4;

 OCL_TEST_P(WarpAffine_cols4, Mat)
--- a/modules/objc/generator/gen_objc.py
+++ b/modules/objc/generator/gen_objc.py
@ -482,6 +482,7 @@ class FuncInfo(GeneralInfo):
            self.objc_name = "getelem"
        if self.namespace in namespaces_dict:
            self.objc_name = '%s_%s' % (namespaces_dict[self.namespace], self.objc_name)
+            self.swift_name = '%s_%s' % (namespaces_dict[self.namespace], self.swift_name)
        for m in decl[2]:
            if m.startswith("="):
                self.objc_name = m[1:]
--- a/modules/objdetect/src/aruco/charuco_detector.cpp
+++ b/modules/objdetect/src/aruco/charuco_detector.cpp
@ -326,8 +326,21 @@ struct CharucoDetector::CharucoDetectorImpl {
            interpolateCornersCharucoLocalHom(_markerCorners, _markerIds, image, charucoCorners, charucoIds);
        // to return a charuco corner, its closest aruco markers should have been detected
        filterCornersWithoutMinMarkers(charucoCorners, charucoIds, _markerIds, charucoCorners, charucoIds);
-}
+    }

+    void detectBoardWithCheck(InputArray image, OutputArray charucoCorners, OutputArray charucoIds,
+                              InputOutputArrayOfArrays markerCorners, InputOutputArray markerIds) {
+        vector<vector<Point2f>> tmpMarkerCorners;
+        vector<int> tmpMarkerIds;
+        InputOutputArrayOfArrays _markerCorners = markerCorners.needed() ? markerCorners : tmpMarkerCorners;
+        InputOutputArray _markerIds = markerIds.needed() ? markerIds : tmpMarkerIds;
+        detectBoard(image, charucoCorners, charucoIds, _markerCorners, _markerIds);
+        if (checkBoard(_markerCorners, _markerIds, charucoCorners, charucoIds) == false) {
+            CV_LOG_DEBUG(NULL, "ChArUco board is built incorrectly");
+            charucoCorners.release();
+            charucoIds.release();
+        }
+    }
 };

 CharucoDetector::CharucoDetector(const CharucoBoard &board, const CharucoParameters &charucoParams,
@ -370,12 +383,7 @@ void CharucoDetector::setRefineParameters(const RefineParameters& refineParamete

 void CharucoDetector::detectBoard(InputArray image, OutputArray charucoCorners, OutputArray charucoIds,
                                  InputOutputArrayOfArrays markerCorners, InputOutputArray markerIds) const {
-    charucoDetectorImpl->detectBoard(image, charucoCorners, charucoIds, markerCorners, markerIds);
-    if (charucoDetectorImpl->checkBoard(markerCorners, markerIds, charucoCorners, charucoIds) == false) {
-        CV_LOG_DEBUG(NULL, "ChArUco board is built incorrectly");
-        charucoCorners.release();
-        charucoIds.release();
-    }
+    charucoDetectorImpl->detectBoardWithCheck(image, charucoCorners, charucoIds, markerCorners, markerIds);
 }

 void CharucoDetector::detectDiamonds(InputArray image, OutputArrayOfArrays _diamondCorners, OutputArray _diamondIds,
@ -484,7 +492,7 @@ void CharucoDetector::detectDiamonds(InputArray image, OutputArrayOfArrays _diam
            // interpolate the charuco corners of the diamond
            vector<Point2f> currentMarkerCorners;
            Mat aux;
-            charucoDetectorImpl->detectBoard(grey, currentMarkerCorners, aux, currentMarker, currentMarkerId);
+            charucoDetectorImpl->detectBoardWithCheck(grey, currentMarkerCorners, aux, currentMarker, currentMarkerId);

            // if everything is ok, save the diamond
            if(currentMarkerCorners.size() > 0ull) {
--- a/modules/objdetect/test/test_charucodetection.cpp
+++ b/modules/objdetect/test/test_charucodetection.cpp
@ -764,23 +764,29 @@ TEST_P(CharucoBoard, testWrongSizeDetection)
    ASSERT_FALSE(boardSize.width == boardSize.height);
    aruco::CharucoBoard board(boardSize, 1.f, 0.5f, aruco::getPredefinedDictionary(aruco::DICT_4X4_50));

-    vector<int> detectedCharucoIds, detectedArucoIds;
-    vector<Point2f> detectedCharucoCorners;
-    vector<vector<Point2f>> detectedArucoCorners;
    Mat boardImage;
    board.generateImage(boardSize*40, boardImage);

    swap(boardSize.width, boardSize.height);
    aruco::CharucoDetector detector(aruco::CharucoBoard(boardSize, 1.f, 0.5f, aruco::getPredefinedDictionary(aruco::DICT_4X4_50)));
    // try detect board with wrong size
-    detector.detectBoard(boardImage, detectedCharucoCorners, detectedCharucoIds, detectedArucoCorners, detectedArucoIds);
-
-    // aruco markers must be found
-    ASSERT_EQ(detectedArucoIds.size(), board.getIds().size());
-    ASSERT_EQ(detectedArucoCorners.size(), board.getIds().size());
-    // charuco corners should not be found in board with wrong size
-    ASSERT_TRUE(detectedCharucoCorners.empty());
-    ASSERT_TRUE(detectedCharucoIds.empty());
+    for(int i: {0, 1}) {
+        vector<int> detectedCharucoIds, detectedArucoIds;
+        vector<Point2f> detectedCharucoCorners;
+        vector<vector<Point2f>> detectedArucoCorners;
+        if (i == 0) {
+            detector.detectBoard(boardImage, detectedCharucoCorners, detectedCharucoIds, detectedArucoCorners, detectedArucoIds);
+            // aruco markers must be found
+            ASSERT_EQ(detectedArucoIds.size(), board.getIds().size());
+            ASSERT_EQ(detectedArucoCorners.size(), board.getIds().size());
+        } else {
+            detector.detectBoard(boardImage, detectedCharucoCorners, detectedCharucoIds);
+        }
+
+        // charuco corners should not be found in board with wrong size
+        ASSERT_TRUE(detectedCharucoCorners.empty());
+        ASSERT_TRUE(detectedCharucoIds.empty());
+    }
 }


--- a/modules/ts/src/ts.cpp
+++ b/modules/ts/src/ts.cpp
@ -1127,6 +1127,7 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&)
    recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG);
    recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n"));
    recordPropertyVerbose("implementation_hint", "Algorithm hint", getSnippetFromConfig("Algorithm Hint:", "\n"));
+    recordPropertyVerbose("hal", "HAL", getSnippetFromConfig("Custom HAL:", "\n"));
    const char* parallelFramework = cv::currentParallelFramework();
    if (parallelFramework)
    {
--- a/modules/videoio/include/opencv2/videoio.hpp
+++ b/modules/videoio/include/opencv2/videoio.hpp
@ -205,6 +205,8 @@ enum VideoCaptureProperties {
       CAP_PROP_CODEC_EXTRADATA_INDEX = 68, //!< Positive index indicates that returning extra data is supported by the video back end.  This can be retrieved as cap.retrieve(data, <returned index>).  E.g. When reading from a h264 encoded RTSP stream, the FFmpeg backend could return the SPS and/or PPS if available (if sent in reply to a DESCRIBE request), from calls to cap.retrieve(data, <returned index>).
       CAP_PROP_FRAME_TYPE = 69, //!< (read-only) FFmpeg back-end only - Frame type ascii code (73 = 'I', 80 = 'P', 66 = 'B' or 63 = '?' if unknown) of the most recently read frame.
       CAP_PROP_N_THREADS = 70, //!< (**open-only**) Set the maximum number of threads to use. Use 0 to use as many threads as CPU cores (applicable for FFmpeg back-end only).
+       CAP_PROP_PTS = 71, //!<  (read-only) FFmpeg back-end only - presentation timestamp of the most recently read frame using the FPS time base.  e.g. fps = 25, VideoCapture::get(\ref CAP_PROP_PTS) = 3, presentation time = 3/25 seconds.
+       CAP_PROP_DTS_DELAY = 72, //!<  (read-only) FFmpeg back-end only - maximum difference between presentation (pts) and decompression timestamps (dts) using FPS time base.  e.g. delay is maximum when frame_num = 0, if true, VideoCapture::get(\ref CAP_PROP_PTS) = 0 and VideoCapture::get(\ref CAP_PROP_DTS_DELAY) = 2, dts = -2.  Non zero values usually imply the stream is encoded using B-frames which are not decoded in presentation order.
 #ifndef CV_DOXYGEN
       CV__CAP_PROP_LATEST
 #endif
@ -224,8 +226,10 @@ enum VideoWriterProperties {
  VIDEOWRITER_PROP_HW_DEVICE       = 7, //!< (**open-only**) Hardware device index (select GPU if multiple available). Device enumeration is acceleration type specific.
  VIDEOWRITER_PROP_HW_ACCELERATION_USE_OPENCL= 8, //!< (**open-only**) If non-zero, create new OpenCL context and bind it to current thread. The OpenCL context created with Video Acceleration context attached it (if not attached yet) for optimized GPU data copy between cv::UMat and HW accelerated encoder.
  VIDEOWRITER_PROP_RAW_VIDEO = 9, //!< (**open-only**) Set to non-zero to enable encapsulation of an encoded raw video stream. Each raw encoded video frame should be passed to VideoWriter::write() as single row or column of a \ref CV_8UC1 Mat. \note If the key frame interval is not 1 then it must be manually specified by the user. This can either be performed during initialization passing \ref VIDEOWRITER_PROP_KEY_INTERVAL as one of the extra encoder params  to \ref VideoWriter::VideoWriter(const String &, int, double, const Size &, const std::vector< int > &params) or afterwards by setting the \ref VIDEOWRITER_PROP_KEY_FLAG with \ref VideoWriter::set() before writing each frame. FFMpeg backend only.
-  VIDEOWRITER_PROP_KEY_INTERVAL = 10, //!< (**open-only**) Set the key frame interval using raw video encapsulation (\ref VIDEOWRITER_PROP_RAW_VIDEO != 0). Defaults to 1 when not set. FFMpeg backend only.
-  VIDEOWRITER_PROP_KEY_FLAG = 11, //!< Set to non-zero to signal that the following frames are key frames or zero if not, when encapsulating raw video (\ref VIDEOWRITER_PROP_RAW_VIDEO != 0). FFMpeg backend only.
+  VIDEOWRITER_PROP_KEY_INTERVAL = 10, //!< (**open-only**) Set the key frame interval using raw video encapsulation (\ref VIDEOWRITER_PROP_RAW_VIDEO != 0). Defaults to 1 when not set. FFmpeg back-end only.
+  VIDEOWRITER_PROP_KEY_FLAG = 11, //!< Set to non-zero to signal that the following frames are key frames or zero if not, when encapsulating raw video (\ref VIDEOWRITER_PROP_RAW_VIDEO != 0). FFmpeg back-end only.
+  VIDEOWRITER_PROP_PTS = 12, //!< Specifies the frame presentation timestamp for each frame using the FPS time base. This property is **only** necessary when encapsulating **externally** encoded video where the decoding order differs from the presentation order, such as in GOP patterns with bi-directional B-frames. The value should be provided by your external encoder and for video sources with fixed frame rates it is equivalent to dividing the current frame's presentation time (\ref CAP_PROP_POS_MSEC) by the frame duration (1000.0 / VideoCapture::get(\ref CAP_PROP_FPS)). It can be queried from the resulting encapsulated video file using VideoCapture::get(\ref CAP_PROP_PTS). FFmpeg back-end only.
+  VIDEOWRITER_PROP_DTS_DELAY = 13, //!< Specifies the maximum difference between presentation (pts) and decompression timestamps (dts) using the FPS time base. This property is necessary **only** when encapsulating **externally** encoded video where the decoding order differs from the presentation order, such as in GOP patterns with bi-directional B-frames. The value should be calculated based on the specific GOP pattern used during encoding. For example, in a GOP with presentation order IBP and decoding order IPB, this value would be 1, as the B-frame is the second frame presented but the third to be decoded. It can be queried from the resulting encapsulated video file using VideoCapture::get(\ref CAP_PROP_DTS_DELAY). Non-zero values usually imply the stream is encoded using B-frames. FFmpeg back-end only.
 #ifndef CV_DOXYGEN
  CV__VIDEOWRITER_PROP_LATEST
 #endif
--- a/modules/videoio/src/cap_ffmpeg_impl.hpp
+++ b/modules/videoio/src/cap_ffmpeg_impl.hpp
@ -560,6 +560,8 @@ struct CvCapture_FFMPEG
    AVFrame         * picture;
    AVFrame           rgb_picture;
    int64_t           picture_pts;
+    int64_t           pts_in_fps_time_base;
+    int64_t           dts_delay_in_fps_time_base;

    AVPacket          packet;
    Image_FFMPEG      frame;
@ -615,6 +617,8 @@ void CvCapture_FFMPEG::init()
    video_st = 0;
    picture = 0;
    picture_pts = AV_NOPTS_VALUE_;
+    pts_in_fps_time_base = 0;
+    dts_delay_in_fps_time_base = 0;
    first_frame_number = -1;
    memset( &rgb_picture, 0, sizeof(rgb_picture) );
    memset( &frame, 0, sizeof(frame) );
@ -1581,13 +1585,26 @@ bool CvCapture_FFMPEG::grabFrame()

    if (valid) {
        if (picture_pts == AV_NOPTS_VALUE_) {
-            if (!rawMode)
+            int64_t dts = 0;
+            if (!rawMode) {
                picture_pts = picture->CV_FFMPEG_PTS_FIELD != AV_NOPTS_VALUE_ && picture->CV_FFMPEG_PTS_FIELD != 0 ? picture->CV_FFMPEG_PTS_FIELD : picture->pkt_dts;
+                if(frame_number == 0) dts = picture->pkt_dts;
+            }
            else {
                const AVPacket& packet_raw = packet.data != 0 ? packet : packet_filtered;
                picture_pts = packet_raw.pts != AV_NOPTS_VALUE_ && packet_raw.pts != 0 ? packet_raw.pts : packet_raw.dts;
+                if (frame_number == 0) dts = packet_raw.dts;
                if (picture_pts < 0) picture_pts = 0;
            }
+#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54, 1, 0) || LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)
+            AVRational frame_rate = video_st->avg_frame_rate;
+#else
+            AVRational frame_rate = video_st->r_frame_rate;
+#endif
+            if (picture_pts != AV_NOPTS_VALUE_)
+                pts_in_fps_time_base = av_rescale_q(picture_pts, video_st->time_base, AVRational{ frame_rate.den, frame_rate.num });
+            if (frame_number == 0 && dts != AV_NOPTS_VALUE_)
+                dts_delay_in_fps_time_base = -av_rescale_q(dts, video_st->time_base, AVRational{ frame_rate.den, frame_rate.num });
            frame_number++;
        }
    }
@ -1855,6 +1872,11 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const
    case CAP_PROP_N_THREADS:
        if (!rawMode)
            return static_cast<double>(context->thread_count);
+        break;
+    case CAP_PROP_PTS:
+        return static_cast<double>(pts_in_fps_time_base);
+    case CAP_PROP_DTS_DELAY:
+        return static_cast<double>(dts_delay_in_fps_time_base);
    default:
        break;
    }
@ -2107,6 +2129,8 @@ struct CvVideoWriter_FFMPEG
    bool              encode_video;
    int               idr_period;
    bool              key_frame;
+    int               pts_index;
+    int               b_frame_dts_delay;
 };

 static const char * icvFFMPEGErrStr(int err)
@ -2175,6 +2199,8 @@ void CvVideoWriter_FFMPEG::init()
    encode_video = true;
    idr_period = 0;
    key_frame = false;
+    pts_index = -1;
+    b_frame_dts_delay = 0;
 }

 /**
@ -2343,7 +2369,7 @@ static AVCodecContext * icv_configure_video_stream_FFMPEG(AVFormatContext *oc,
 static const int OPENCV_NO_FRAMES_WRITTEN_CODE = 1000;

 static int icv_av_encapsulate_video_FFMPEG(AVFormatContext* oc, AVStream* video_st, AVCodecContext* c,
-    uint8_t* data, int sz, const int frame_idx, const bool key_frame)
+    uint8_t* data, int sz, const int frame_idx, const int pts_index, const int b_frame_dts_delay, const bool key_frame)
 {
 #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(57, 0, 0)
    AVPacket pkt_;
@ -2354,7 +2380,9 @@ static int icv_av_encapsulate_video_FFMPEG(AVFormatContext* oc, AVStream* video_
 #endif
    if(key_frame)
        pkt->flags |= PKT_FLAG_KEY;
-    pkt->pts = frame_idx;
+    pkt->pts = pts_index == -1 ? frame_idx : pts_index;
+    pkt->dts = frame_idx - b_frame_dts_delay;
+    pkt->duration = 1;
    pkt->size = sz;
    pkt->data = data;
    av_packet_rescale_ts(pkt, c->time_base, video_st->time_base);
@ -2449,7 +2477,7 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int
    if (!encode_video) {
        CV_Assert(cn == 1 && ((width > 0 && height == 1) || (width == 1 && height > 0 && step == 1)));
        const bool set_key_frame = key_frame ? key_frame : idr_period ? frame_idx % idr_period == 0 : 1;
-        bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, set_key_frame);
+        bool ret = icv_av_encapsulate_video_FFMPEG(oc, video_st, context, (uint8_t*)data, width, frame_idx, pts_index, b_frame_dts_delay, set_key_frame);
        frame_idx++;
        return ret;
    }
@ -2651,6 +2679,12 @@ bool CvVideoWriter_FFMPEG::setProperty(int property_id, double value)
    case VIDEOWRITER_PROP_KEY_FLAG:
        key_frame = static_cast<bool>(value);
        break;
+    case VIDEOWRITER_PROP_PTS:
+        pts_index = static_cast<int>(value);
+        break;
+    case VIDEOWRITER_PROP_DTS_DELAY:
+        b_frame_dts_delay = static_cast<int>(value);
+        break;
    default:
        return false;
    }
--- a/modules/videoio/src/cap_gphoto2.cpp
+++ b/modules/videoio/src/cap_gphoto2.cpp
@ -65,7 +65,7 @@ public:
        result = gPhoto2Result;
        method = methodStr;
    }
-    virtual const char * what() const throw() CV_OVERRIDE
+    virtual const char * what() const CV_NOEXCEPT CV_OVERRIDE
    {
        return gp_result_as_string(result);
    }
--- a/modules/videoio/test/test_ffmpeg.cpp
+++ b/modules/videoio/test/test_ffmpeg.cpp
@ -293,9 +293,13 @@ const videoio_container_get_params_t videoio_container_get_params[] =

 INSTANTIATE_TEST_CASE_P(/**/, videoio_container_get, testing::ValuesIn(videoio_container_get_params));

-typedef tuple<string, string, int, int> videoio_encapsulate_params_t;
+typedef tuple<string, string, int, int, bool, bool> videoio_encapsulate_params_t;
 typedef testing::TestWithParam< videoio_encapsulate_params_t > videoio_encapsulate;

+#if defined(WIN32)  // remove when FFmpeg wrapper includes PR25874
+#define WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE
+#endif
+
 TEST_P(videoio_encapsulate, write)
 {
    const VideoCaptureAPIs api = CAP_FFMPEG;
@ -307,6 +311,8 @@ TEST_P(videoio_encapsulate, write)
    const int idrPeriod = get<2>(GetParam());
    const int nFrames = get<3>(GetParam());
    const string fileNameOut = tempfile(cv::format("test_encapsulated_stream.%s", ext.c_str()).c_str());
+    const bool setPts = get<4>(GetParam());
+    const bool tsWorking = get<5>(GetParam());

    // Use VideoWriter to encapsulate encoded video read with VideoReader
    {
@ -320,12 +326,16 @@ TEST_P(videoio_encapsulate, write)
        capRaw.retrieve(extraData, codecExtradataIndex);
        const int fourcc = static_cast<int>(capRaw.get(CAP_PROP_FOURCC));
        const bool mpeg4 = (fourcc == fourccFromString("FMP4"));
-
        VideoWriter container(fileNameOut, api, fourcc, fps, { width, height }, { VideoWriterProperties::VIDEOWRITER_PROP_RAW_VIDEO, 1, VideoWriterProperties::VIDEOWRITER_PROP_KEY_INTERVAL, idrPeriod });
        ASSERT_TRUE(container.isOpened());
        Mat rawFrame;
        for (int i = 0; i < nFrames; i++) {
            ASSERT_TRUE(capRaw.read(rawFrame));
+#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
+            if (setPts && i == 0) {
+                ASSERT_TRUE(container.set(VIDEOWRITER_PROP_DTS_DELAY, capRaw.get(CAP_PROP_DTS_DELAY)));
+            }
+#endif
            ASSERT_FALSE(rawFrame.empty());
            if (i == 0 && mpeg4) {
                Mat tmp = rawFrame.clone();
@ -336,6 +346,11 @@ TEST_P(videoio_encapsulate, write)
                memcpy(rawFrame.data, extraData.data, extraData.total());
                memcpy(rawFrame.data + extraData.total(), tmp.data, tmp.total());
            }
+#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
+            if (setPts) {
+                ASSERT_TRUE(container.set(VIDEOWRITER_PROP_PTS, capRaw.get(CAP_PROP_PTS)));
+            }
+#endif
            container.write(rawFrame);
        }
        container.release();
@ -362,11 +377,15 @@ TEST_P(videoio_encapsulate, write)
            ASSERT_TRUE(capActual.read(actual));
            ASSERT_FALSE(actual.empty());
            ASSERT_EQ(0, cvtest::norm(reference, actual, NORM_INF));
-
            ASSERT_TRUE(capActualRaw.grab());
            const bool keyFrameActual = capActualRaw.get(CAP_PROP_LRF_HAS_KEY_FRAME) == 1.;
            const bool keyFrameReference = idrPeriod ? i % idrPeriod == 0 : 1;
            ASSERT_EQ(keyFrameReference, keyFrameActual);
+#if !defined(WIN32_WAIT_FOR_FFMPEG_WRAPPER_UPDATE)
+            if (tsWorking) {
+                ASSERT_EQ(round(capReference.get(CAP_PROP_POS_MSEC)), round(capActual.get(CAP_PROP_POS_MSEC)));
+            }
+#endif
        }
    }

@ -375,30 +394,29 @@ TEST_P(videoio_encapsulate, write)

 const videoio_encapsulate_params_t videoio_encapsulate_params[] =
 {
-    videoio_encapsulate_params_t("video/big_buck_bunny.h264", "avi", 125, 125),
-    videoio_encapsulate_params_t("video/big_buck_bunny.h265", "mp4", 125, 125),
-    videoio_encapsulate_params_t("video/big_buck_bunny.wmv", "wmv", 12, 13),
-    videoio_encapsulate_params_t("video/big_buck_bunny.mp4", "mp4", 12, 13),
-    videoio_encapsulate_params_t("video/big_buck_bunny.mjpg.avi", "mp4", 0, 4),
-    videoio_encapsulate_params_t("video/big_buck_bunny.mov", "mp4", 12, 13),
-    videoio_encapsulate_params_t("video/big_buck_bunny.avi", "mp4", 125, 125),
-    videoio_encapsulate_params_t("video/big_buck_bunny.mpg", "mp4", 12, 13),
-    videoio_encapsulate_params_t("video/VID00003-20100701-2204.wmv", "wmv", 12, 13),
-    videoio_encapsulate_params_t("video/VID00003-20100701-2204.mpg", "mp4", 12,13),
-    videoio_encapsulate_params_t("video/VID00003-20100701-2204.avi", "mp4", 12, 13),
-    videoio_encapsulate_params_t("video/VID00003-20100701-2204.3GP", "mp4", 51, 52),
-    videoio_encapsulate_params_t("video/sample_sorenson.avi", "mp4", 12, 13),
-    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libxvid.mp4", "mp4", 3, 4),
-    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.mpeg2video.mp4", "mp4", 12, 13),
-    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.mjpeg.mp4", "mp4", 0, 5),
-    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libx264.mp4", "avi", 15, 15),
-    videoio_encapsulate_params_t("../cv/tracking/faceocc2/data/faceocc2.webm", "webm", 128, 129),
-    videoio_encapsulate_params_t("../cv/video/1920x1080.avi", "mp4", 12, 13),
-    videoio_encapsulate_params_t("../cv/video/768x576.avi", "avi", 15, 16)
+    videoio_encapsulate_params_t("video/big_buck_bunny.h264", "avi", 125, 125, false, false), // tsWorking = false: no timestamp information
+    videoio_encapsulate_params_t("video/big_buck_bunny.h265", "mp4", 125, 125, false, false), // tsWorking = false: no timestamp information
+    videoio_encapsulate_params_t("video/big_buck_bunny.wmv", "wmv", 12, 13, false, true),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mp4", "mp4", 12, 13, false, true),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mjpg.avi", "mp4", 0, 4, false, true),
+    videoio_encapsulate_params_t("video/big_buck_bunny.mov", "mp4", 12, 13, false, true),
+    videoio_encapsulate_params_t("video/big_buck_bunny.avi", "mp4", 125, 125, false, false), // tsWorking = false: PTS not available for all frames
+    videoio_encapsulate_params_t("video/big_buck_bunny.mpg", "mp4", 12, 13, true, true),
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.wmv", "wmv", 12, 13, false, true),
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.mpg", "mp4", 12, 13, false, false), // tsWorking = false: PTS not available for all frames
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.avi", "mp4", 12, 13, false, false), // tsWorking = false: Unable to correctly set PTS when writing
+    videoio_encapsulate_params_t("video/VID00003-20100701-2204.3GP", "mp4", 51, 52, false, false), // tsWorking = false: Source with variable fps
+    videoio_encapsulate_params_t("video/sample_sorenson.avi", "mp4", 12, 13, false, true),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libxvid.mp4", "mp4", 3, 4, false, true),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.mpeg2video.mp4", "mpg", 12, 13, false, true),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.mjpeg.mp4", "mp4", 0, 5, false, true),
+    videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libx264.mp4", "ts", 15, 15, true, true),
+    videoio_encapsulate_params_t("../cv/tracking/faceocc2/data/faceocc2.webm", "webm", 128, 129, false, true),
+    videoio_encapsulate_params_t("../cv/video/1920x1080.avi", "mp4", 12, 13, false, true),
+    videoio_encapsulate_params_t("../cv/video/768x576.avi", "avi", 15, 16, false, true),
    // Not supported by with FFmpeg:
-    //videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libx265.mp4", "mp4", 15, 15),
-    //videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "mp4", 15, 15),
-
+    //videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libx265.mp4", "mp4", 15, 15, true, true),
+    //videoio_encapsulate_params_t("video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "mp4", 15, 15, false, true),
 };

 INSTANTIATE_TEST_CASE_P(/**/, videoio_encapsulate, testing::ValuesIn(videoio_encapsulate_params));
--- a/platforms/linux/riscv64-clang.toolchain.cmake
+++ b/platforms/linux/riscv64-clang.toolchain.cmake
@ -18,7 +18,7 @@ set(CMAKE_ASM_COMPILER_TARGET ${CLANG_TARGET_TRIPLE})
 set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

 set(CMAKE_C_FLAGS "-march=rv64gc --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_C_FLAGS}")
-set(CMAKE_CXX_FLAGS "-march=rv64gc --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CXX_FLAGS}")
+set(CMAKE_CXX_FLAGS "-march=rv64gc --gcc-toolchain=${RISCV_GCC_INSTALL_ROOT} -w ${CMAKE_CXX_FLAGS}")

 set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT})
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
--- a/samples/cpp/videocapture_obsensor.cpp
+++ b/samples/cpp/videocapture_obsensor.cpp
@ -0,0 +1,83 @@
+/**
+ * attention: Astra2 cameras currently only support Windows and Linux kernel versions no higher than 4.15, and higher versions of Linux kernel may have exceptions.
+*/
+
+#include <opencv2/videoio.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/imgproc.hpp>
+#include <iostream>
+
+using namespace cv;
+int main()
+{
+    VideoCapture obsensorCapture(0, CAP_OBSENSOR);
+    if(!obsensorCapture.isOpened()){
+        std::cerr << "Failed to open obsensor capture! Index out of range or no response from device";
+        return -1;
+    }
+
+    double fx = obsensorCapture.get(CAP_PROP_OBSENSOR_INTRINSIC_FX);
+    double fy = obsensorCapture.get(CAP_PROP_OBSENSOR_INTRINSIC_FY);
+    double cx = obsensorCapture.get(CAP_PROP_OBSENSOR_INTRINSIC_CX);
+    double cy = obsensorCapture.get(CAP_PROP_OBSENSOR_INTRINSIC_CY);
+    std::cout << "obsensor camera intrinsic params: fx=" << fx << ", fy=" << fy << ", cx=" << cx << ", cy=" << cy << std::endl;
+
+    Mat image;
+    Mat depthMap;
+    Mat adjDepthMap;
+
+    // Minimum depth value
+    const double minVal = 300;
+    // Maximum depth value
+    const double maxVal = 5000;
+    while (true)
+    {
+        // Grab depth map like this:
+        // obsensorCapture >> depthMap;
+
+        // Another way to grab depth map (and bgr image).
+        if (obsensorCapture.grab())
+        {
+            if (obsensorCapture.retrieve(image, CAP_OBSENSOR_BGR_IMAGE))
+            {
+                imshow("RGB", image);
+            }
+
+            if (obsensorCapture.retrieve(depthMap, CAP_OBSENSOR_DEPTH_MAP))
+            {
+                depthMap.convertTo(adjDepthMap, CV_8U, 255.0 / (maxVal - minVal), -minVal * 255.0 / (maxVal - minVal));
+                applyColorMap(adjDepthMap, adjDepthMap, COLORMAP_JET);
+                imshow("DEPTH", adjDepthMap);
+            }
+
+            // depth map overlay on bgr image
+            static const float alpha = 0.6f;
+            if (!image.empty() && !depthMap.empty())
+            {
+                depthMap.convertTo(adjDepthMap, CV_8U, 255.0 / (maxVal - minVal), -minVal * 255.0 / (maxVal - minVal));
+                cv::resize(adjDepthMap, adjDepthMap, cv::Size(image.cols, image.rows));
+                for (int i = 0; i < image.rows; i++)
+                {
+                    for (int j = 0; j < image.cols; j++)
+                    {
+                        cv::Vec3b& outRgb = image.at<cv::Vec3b>(i, j);
+                        uint8_t depthValue = 255 - adjDepthMap.at<uint8_t>(i, j);
+                        if (depthValue != 0 && depthValue != 255)
+                        {
+                            outRgb[0] = (uint8_t)(outRgb[0] * (1.0f - alpha) + depthValue * alpha);
+                            outRgb[1] = (uint8_t)(outRgb[1] * (1.0f - alpha) + depthValue *  alpha);
+                            outRgb[2] = (uint8_t)(outRgb[2] * (1.0f - alpha) + depthValue *  alpha);
+                        }
+                    }
+                }
+                imshow("DepthToColor", image);
+            }
+            image.release();
+            depthMap.release();
+        }
+
+        if (pollKey() >= 0)
+            break;
+    }
+    return 0;
+}