From 5acf351e4b9d099d446f401df690d559ed5dfdad Mon Sep 17 00:00:00 2001 From: Prasanth R Date: Thu, 14 Jul 2022 13:34:54 +0530 Subject: [PATCH 001/199] #21804 Add opencv_gapi as dep to samples/cpp --- samples/cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt index c9c4440f22..f23b333ca9 100644 --- a/samples/cpp/CMakeLists.txt +++ b/samples/cpp/CMakeLists.txt @@ -15,6 +15,7 @@ set(OPENCV_CPP_SAMPLES_REQUIRED_DEPS opencv_calib3d opencv_stitching opencv_dnn + opencv_gapi ${OPENCV_MODULES_PUBLIC} ${OpenCV_LIB_COMPONENTS}) ocv_check_dependencies(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) From c8561eae2d45639a5c3d8507d361a6c2e85e4a62 Mon Sep 17 00:00:00 2001 From: Zhi-Qiang Zhou Date: Wed, 19 Oct 2022 11:17:23 +0800 Subject: [PATCH 002/199] Update region_layer.cpp Fix objectness (dstData[index + 4]) is not assigned if new_coords == 1. --- modules/dnn/src/layers/region_layer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/dnn/src/layers/region_layer.cpp b/modules/dnn/src/layers/region_layer.cpp index 73ed53974f..04d3257ee4 100644 --- a/modules/dnn/src/layers/region_layer.cpp +++ b/modules/dnn/src/layers/region_layer.cpp @@ -317,6 +317,7 @@ public: dstData[box_index + 1] = (y + y_tmp) / rows; dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm; dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm; + dstData[box_index + 4] = srcData[p_index]; scale = srcData[p_index]; if (classfix == -1 && scale < thresh) From 2eb7bf4cfa464f704be1f2f93cd056648b4853a4 Mon Sep 17 00:00:00 2001 From: Alexander Duda Date: Wed, 30 Nov 2022 11:37:35 +0100 Subject: [PATCH 003/199] core: improve doc for setNumThreads The old documentation implies that the call is only valid for the next parallel region and must be called again if addtional regions should be affected as well. --- modules/core/include/opencv2/core/utility.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index 108c0d93e7..5fc5a4eaac 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -179,7 +179,7 @@ CV_EXPORTS ErrorCallback redirectError( ErrorCallback errCallback, void* userdat CV_EXPORTS String tempfile( const char* suffix = 0); CV_EXPORTS void glob(String pattern, std::vector& result, bool recursive = false); -/** @brief OpenCV will try to set the number of threads for the next parallel region. +/** @brief OpenCV will try to set the number of threads for subsequent parallel regions. If threads == 0, OpenCV will disable threading optimizations and run all it's functions sequentially. Passing threads \< 0 will reset threads number to system default. This function must From a2fc479c0b36d1786a9570ddb76f2ab72626994b Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 30 Dec 2022 15:43:41 +0000 Subject: [PATCH 004/199] Fix Heap-buffer-overflow READ in opj_jp2_apply_pclr https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=47342 The read overflow triggered by reading `src[j]` in ```cpp for (j = 0; j < max; ++j) { dst[j] = src[j]; } ``` The max is calculated as `new_comps[pcol].w * new_comps[pcol].h`, however the `src = old_comps[cmp].data;` which may have different `w` and `h` dimensions. --- 3rdparty/openjpeg/openjp2/jp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/openjpeg/openjp2/jp2.c b/3rdparty/openjpeg/openjp2/jp2.c index 7c065ba742..d01881471f 100644 --- a/3rdparty/openjpeg/openjp2/jp2.c +++ b/3rdparty/openjpeg/openjp2/jp2.c @@ -1108,7 +1108,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, pcol = cmap[i].pcol; src = old_comps[cmp].data; assert(src); /* verified above */ - max = new_comps[pcol].w * new_comps[pcol].h; + max = new_comps[i].w * new_comps[i].h; /* Direct use: */ if (cmap[i].mtyp == 0) { From 743d4ecf7be2ec9bd0fa9452f3b347c07100398c Mon Sep 17 00:00:00 2001 From: Genci Berisha Date: Sun, 22 Jan 2023 01:55:18 +0100 Subject: [PATCH 005/199] generateQR() method data loss fix Added regression parameterized test for Structure Append mode final_qr_code clear outside generateQR() method --- modules/objdetect/src/qrcode_encoder.cpp | 15 ++++++----- modules/objdetect/test/test_qrcode_encode.cpp | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/modules/objdetect/src/qrcode_encoder.cpp b/modules/objdetect/src/qrcode_encoder.cpp index 1016a17936..24a9548899 100644 --- a/modules/objdetect/src/qrcode_encoder.cpp +++ b/modules/objdetect/src/qrcode_encoder.cpp @@ -332,14 +332,16 @@ void QRCodeEncoderImpl::generateQR(const std::string &input) } total_num = (uint8_t) struct_num - 1; } - int segment_len = (int) ceil((int) input.length() / struct_num); - for (int i = 0; i < struct_num; i++) + auto string_itr = input.begin(); + for (int i = struct_num; i > 0; --i) { sequence_num = (uint8_t) i; - int segment_begin = i * segment_len; - int segemnt_end = min((i + 1) * segment_len, (int) input.length()) - 1; - std::string input_info = input.substr(segment_begin, segemnt_end - segment_begin + 1); + size_t segment_begin = string_itr - input.begin(); + size_t segment_end = (input.end() - string_itr) / i; + + std::string input_info = input.substr(segment_begin, segment_end); + string_itr += segment_end; int detected_version = versionAuto(input_info); CV_Assert(detected_version != -1); if (version_level == 0) @@ -349,7 +351,6 @@ void QRCodeEncoderImpl::generateQR(const std::string &input) payload.clear(); payload.reserve(MAX_PAYLOAD_LEN); - final_qrcodes.clear(); format = vector (15, 255); version_reserved = vector (18, 255); version_size = (21 + (version_level - 1) * 4); @@ -1234,6 +1235,7 @@ void QRCodeEncoderImpl::encode(const String& input, OutputArray output) generateQR(input); CV_Assert(!final_qrcodes.empty()); output.assign(final_qrcodes[0]); + final_qrcodes.clear(); } void QRCodeEncoderImpl::encodeStructuredAppend(const String& input, OutputArrayOfArrays output) @@ -1250,6 +1252,7 @@ void QRCodeEncoderImpl::encodeStructuredAppend(const String& input, OutputArrayO { output.getMatRef(i) = final_qrcodes[i]; } + final_qrcodes.clear(); } Ptr QRCodeEncoder::create(const QRCodeEncoder::Params& parameters) diff --git a/modules/objdetect/test/test_qrcode_encode.cpp b/modules/objdetect/test/test_qrcode_encode.cpp index fe2d51480b..14900c3078 100644 --- a/modules/objdetect/test/test_qrcode_encode.cpp +++ b/modules/objdetect/test/test_qrcode_encode.cpp @@ -450,6 +450,32 @@ TEST(Objdetect_QRCode_Encode_Decode_Structured_Append, DISABLED_regression) #endif // UPDATE_QRCODE_TEST_DATA +CV_ENUM(EncodeModes, QRCodeEncoder::EncodeMode::MODE_NUMERIC, + QRCodeEncoder::EncodeMode::MODE_ALPHANUMERIC, + QRCodeEncoder::EncodeMode::MODE_BYTE) + +typedef ::testing::TestWithParam Objdetect_QRCode_Encode_Decode_Structured_Append_Parameterized; +TEST_P(Objdetect_QRCode_Encode_Decode_Structured_Append_Parameterized, regression_22205) +{ + const std::string input_data = "the quick brown fox jumps over the lazy dog"; + + std::vector result_qrcodes; + + cv::QRCodeEncoder::Params params; + int encode_mode = GetParam(); + params.mode = static_cast(encode_mode); + + for(size_t struct_num = 2; struct_num < 5; ++struct_num) + { + params.structure_number = static_cast(struct_num); + cv::Ptr encoder = cv::QRCodeEncoder::create(params); + encoder->encodeStructuredAppend(input_data, result_qrcodes); + EXPECT_EQ(result_qrcodes.size(), struct_num) << "The number of QR Codes requested is not equal"<< + "to the one returned"; + } +} +INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode_Encode_Decode_Structured_Append_Parameterized, EncodeModes::all()); + TEST(Objdetect_QRCode_Encode_Decode, regression_issue22029) { const cv::String msg = "OpenCV"; From 9efaa3cce70851fdd3a6f09f0f34c969feb4dc5c Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 30 Jan 2023 21:30:37 +0300 Subject: [PATCH 006/199] RISC-V/RVV 0.7: v_add/v_sub saturation and avoiding 64-bit register in v_check_ --- .../include/opencv2/core/hal/intrin_rvv071.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp index 2bdc622ffd..f8765510f8 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp @@ -291,14 +291,14 @@ OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint16x8, vsaddu_vv_u16m1, 8) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint16x8, vssubu_vv_u16m1, 8) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int16x8, vsadd_vv_i16m1, 8) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int16x8, vssub_vv_i16m1, 8) -OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int32x4, vsadd_vv_i32m1, 4) -OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int32x4, vssub_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int32x4, vadd_vv_i32m1, 4) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int32x4, vsub_vv_i32m1, 4) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_int32x4, vmul_vv_i32m1, 4) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint32x4, vadd_vv_u32m1, 4) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint32x4, vsub_vv_u32m1, 4) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(*, v_uint32x4, vmul_vv_u32m1, 4) -OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int64x2, vsadd_vv_i64m1, 2) -OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int64x2, vssub_vv_i64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_int64x2, vadd_vv_i64m1, 2) +OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_int64x2, vsub_vv_i64m1, 2) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_uint64x2, vadd_vv_u64m1, 2) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(-, v_uint64x2, vsub_vv_u64m1, 2) OPENCV_HAL_IMPL_RISCVV_BIN_OPN(+, v_float32x4, vfadd_vv_f32m1, 4) @@ -1909,14 +1909,14 @@ else return trailingZeros32(val); } inline bool v_check_all(const v_##_Tpvec& a) \ { \ suffix##m1_t v0 = vsrl_vx_##_T(vnot_v_##_T(a.val, num), shift, num); \ - vuint64m1_t v1 = vuint64m1_t(v0); \ - return (v1[0] | v1[1]) == 0; \ + vuint32m1_t v1 = vuint32m1_t(v0); \ + return (v1[0] | v1[1] | v1[2] | v1[3]) == 0; \ } \ inline bool v_check_any(const v_##_Tpvec& a) \ { \ suffix##m1_t v0 = vsrl_vx_##_T(a.val, shift, num); \ - vuint64m1_t v1 = vuint64m1_t(v0); \ - return (v1[0] | v1[1]) != 0; \ + vuint32m1_t v1 = vuint32m1_t(v0); \ + return (v1[0] | v1[1] | v1[2] | v1[3]) != 0; \ } OPENCV_HAL_IMPL_RISCVV_CHECK_ALLANY(uint8x16, vuint8, u8m1, 7, 16) From 4718a4bf81b4e26b513fae43c8423646376fdb5d Mon Sep 17 00:00:00 2001 From: wanli Date: Thu, 29 Dec 2022 17:14:02 +0800 Subject: [PATCH 007/199] make GEMM can be supported with transA and transB in CUDA --- .../dnn/include/opencv2/dnn/all_layers.hpp | 4 + .../dnn/src/cuda4dnn/primitives/matmul.hpp | 75 +++++++++++++------ .../dnn/src/layers/fully_connected_layer.cpp | 11 ++- modules/dnn/src/onnx/onnx_importer.cpp | 1 + modules/dnn/test/test_onnx_importer.cpp | 5 ++ 5 files changed, 69 insertions(+), 27 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 37af0ddea5..d447a221d3 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -374,6 +374,10 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + /** + * `InnerProduct`, `MatMul` and `Gemm` operations are all implemented by Fully Connected Layer. + * Parameter `is_matmul` is used to distinguish `MatMul` and `Gemm` from `InnerProduct`. + */ class CV_EXPORTS InnerProductLayer : public Layer { public: diff --git a/modules/dnn/src/cuda4dnn/primitives/matmul.hpp b/modules/dnn/src/cuda4dnn/primitives/matmul.hpp index e4ab3d2721..fe167de269 100644 --- a/modules/dnn/src/cuda4dnn/primitives/matmul.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/matmul.hpp @@ -12,6 +12,8 @@ #include "../csl/tensor.hpp" #include "../csl/tensor_ops.hpp" +#include "../kernels/scale_shift.hpp" + #include #include @@ -23,7 +25,7 @@ namespace cv { namespace dnn { namespace cuda4dnn { public: using wrapper_type = GetCUDABackendWrapperType; - MatMulOp(csl::Stream stream_, csl::cublas::Handle handle, const Mat& constInp) + MatMulOp(csl::Stream stream_, csl::cublas::Handle handle, const Mat& constInp, const Mat& bias, bool _transA, bool _transB) : stream(std::move(stream_)), cublasHandle(std::move(handle)) { if (!constInp.empty()) @@ -31,6 +33,15 @@ namespace cv { namespace dnn { namespace cuda4dnn { constTensor = csl::makeTensorHeader(constInp); csl::copyMatToTensor(constInp, constTensor, stream); } + + if (!bias.empty()) + { + biasTensor = csl::makeTensorHeader(bias); + csl::copyMatToTensor(bias, biasTensor, stream); + } + + transA = _transA; + transB = _transB; } void forward( @@ -69,50 +80,72 @@ namespace cv { namespace dnn { namespace cuda4dnn { CV_Assert(input2.get_axis_size(i) == size); } - auto m = input1.get_axis_size(-2); - auto n = input1.get_axis_size(-1); - auto b = input1.size() / m / n; - int k; - if (constTensor.empty()) + int m1, n1, b1, m2, n2, b2; + if (transA) + { + m1 = input1.get_axis_size(-1); + n1 = input1.get_axis_size(-2); + } + else + { + m1 = input1.get_axis_size(-2); + n1 = input1.get_axis_size(-1); + } + + if (transB) { - k = input2.get_axis_size(-1); - CV_Assert(input2.get_axis_size(-2) == n); + m2 = input2.get_axis_size(-1); + n2 = input2.get_axis_size(-2); } else { - k = input2.get_axis_size(-2); - CV_Assert(input2.get_axis_size(-1) == n); + m2 = input2.get_axis_size(-2); + n2 = input2.get_axis_size(-1); } - CV_Assert(output.get_axis_size(-2) == m); - CV_Assert(output.get_axis_size(-1) == k); + + b1 = input1.size() / m1 / n1; + b2 = input2.size() / m2 / n2; + CV_Assert(b1 == b2); + CV_Assert(n1 == m2); + CV_Assert(output.get_axis_size(-2) == m1); + CV_Assert(output.get_axis_size(-1) == n2); if (get_effective_rank(output) <= 2) { - CV_Assert(b == 1); + CV_Assert(b2 == 1); CV_Assert(get_effective_rank(input1) <= 2); CV_Assert(get_effective_rank(input2) <= 2); - csl::tensor_ops::gemm(cublasHandle, 0.0, output, 1.0, false, input1, !constTensor.empty(), input2); + csl::tensor_ops::gemm(cublasHandle, 0.0, output, 1.0, transA, input1, transB, input2); + // used for GEMM + if (!biasTensor.empty()) + kernels::biasN(stream, output, output, 1, biasTensor); } else { CV_Assert(rank >= 3); - input1.reshape(b, m, n); - if (constTensor.empty()) - input2.reshape(b, n, k); + if (transA) + input1.reshape(b1, n1, m1); + else + input1.reshape(b1, m1, n1); + + if (transB) + input2.reshape(b2, n2, m2); else - input2.reshape(b, k, n); - output.reshape(b, m, k); + input2.reshape(b2, m2, n2); + + output.reshape(b1, m1, n2); input1.squeeze_to(3); input2.squeeze_to(3); output.squeeze_to(3); - csl::tensor_ops::gemmStridedBatched(cublasHandle, 0.0, output, 1.0, false, input1, !constTensor.empty(), input2); + csl::tensor_ops::gemmStridedBatched(cublasHandle, 0.0, output, 1.0, transA, input1, transB, input2); } } private: csl::Stream stream; csl::cublas::Handle cublasHandle; - csl::Tensor constTensor; + csl::Tensor constTensor, biasTensor; + bool transA, transB; }; }}} /* namespace cv::dnn::cuda4dnn */ diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 539c083399..34c8b33515 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -115,6 +115,8 @@ public: biasMat = Mat::zeros(1, oriMat.size[oriMat.dims - 2], weightsMat.type()); else biasMat = Mat::zeros(1, numOutput, weightsMat.type()); + + transB = !transB; } } @@ -155,7 +157,6 @@ public: } else { - CV_Assert(!transA && !transB); CV_CheckEQ(inputsTmp.size(), (size_t)1, ""); CV_CheckEQ(blobs[0].dims, 2, ""); if(isMatMul) @@ -183,7 +184,7 @@ public: return axis == 1 && !tranAorB; #endif return backendId == DNN_BACKEND_OPENCV || - (backendId == DNN_BACKEND_CUDA && !tranAorB) || + backendId == DNN_BACKEND_CUDA || (backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !tranAorB) || (backendId == DNN_BACKEND_WEBNN && axis == 1 && !tranAorB) || backendId == DNN_BACKEND_CANN;; @@ -527,7 +528,6 @@ public: if (!blobs.empty()) { - CV_Assert(!transA && !transB); int inp1Dim = input[0].dims; if (isMatMul) { @@ -611,12 +611,12 @@ public: const std::vector>& outputs ) override { + auto biasMat_ = bias ? biasMat : Mat(); auto context = reinterpret_cast(context_); auto input_wrapper = inputs[0].dynamicCast(); if (weightsMat.empty() || isMatMul) { - CV_Assert(!bias); int inp2Dim; // broadcast is not supported with CUDA if(weightsMat.empty()) @@ -627,13 +627,12 @@ public: inp2Dim = oriMat.dims; if(input_wrapper->getRank() == inp2Dim) - return make_cuda_node(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat); + return make_cuda_node(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), oriMat, biasMat_, transA, transB); else return Ptr(); } auto flatten_start_axis = normalize_axis(axis, input_wrapper->getRank()); - auto biasMat_ = bias ? biasMat : Mat(); return make_cuda_node(preferableTarget, std::move(context->stream), std::move(context->cublas_handle), flatten_start_axis, weightsMat, biasMat_); } #endif diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index fe4d4660f3..fa3ae2b789 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -2056,6 +2056,7 @@ void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodePr } layerParams.set("bias_term", node_proto.input_size() == 3); + layerParams.set("is_matmul", true); addLayer(layerParams, node_proto); } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 12dc3987b9..ce141d7e05 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1745,6 +1745,11 @@ TEST_P(Test_ONNX_layers, Gemm) testONNXModels("gemm_first_const"); } +TEST_P(Test_ONNX_layers, Gemm_bias) +{ + testONNXModels("gemm_vector_bias"); +} + TEST_P(Test_ONNX_layers, Quantized_Convolution) { // The difference of QOperator and QDQ format: From 400572b19f01ebda905c828c6ed43d61156c4de7 Mon Sep 17 00:00:00 2001 From: whuaegeansea Date: Wed, 1 Feb 2023 01:34:22 +0800 Subject: [PATCH 008/199] Fix bug --- modules/features2d/src/sift.simd.hpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp index d2fe0bb429..70f773b7bd 100644 --- a/modules/features2d/src/sift.simd.hpp +++ b/modules/features2d/src/sift.simd.hpp @@ -850,7 +850,6 @@ else // CV_8U #endif } #else - float* dst = dstMat.ptr(row); float nrm1 = 0; for( k = 0; k < len; k++ ) { @@ -858,20 +857,22 @@ else // CV_8U nrm1 += rawDst[k]; } nrm1 = 1.f/std::max(nrm1, FLT_EPSILON); -if( dstMat.type() == CV_32F ) -{ - for( k = 0; k < len; k++ ) + if( dstMat.type() == CV_32F ) { - dst[k] = std::sqrt(rawDst[k] * nrm1); + float *dst = dstMat.ptr(row); + for( k = 0; k < len; k++ ) + { + dst[k] = std::sqrt(rawDst[k] * nrm1); + } } -} -else // CV_8U -{ - for( k = 0; k < len; k++ ) + else // CV_8U { - dst[k] = saturate_cast(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR); + uint8_t *dst = dstMat.ptr(row); + for( k = 0; k < len; k++ ) + { + dst[k] = saturate_cast(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR); + } } -} #endif } From c855dcc52f0ed1119a1a901b3a5ef2c00ea69bd8 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 31 Jan 2023 20:13:41 +0300 Subject: [PATCH 009/199] Supressed tones of Wdeprecated-copy that jump out of GTes after XCode update to 13.1 on Mac M1. --- modules/gapi/cmake/DownloadADE.cmake | 6 ++++++ modules/ts/include/opencv2/ts.hpp | 3 +++ 2 files changed, 9 insertions(+) diff --git a/modules/gapi/cmake/DownloadADE.cmake b/modules/gapi/cmake/DownloadADE.cmake index 3157436369..e22c4f1a32 100644 --- a/modules/gapi/cmake/DownloadADE.cmake +++ b/modules/gapi/cmake/DownloadADE.cmake @@ -24,6 +24,12 @@ add_library(ade STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${ADE_include} ${ADE_sources} ) + +# https://github.com/opencv/ade/issues/32 +if(CV_CLANG AND CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.1) + ocv_warnings_disable(CMAKE_CXX_FLAGS -Wdeprecated-copy) +endif() + target_include_directories(ade PUBLIC $) set_target_properties(ade PROPERTIES POSITION_INDEPENDENT_CODE True diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 2e7a241d8e..eee0376a0e 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -123,6 +123,9 @@ //#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-override" #endif +#if defined(__OPENCV_BUILD) && defined(__APPLE__) && defined(__clang__) && ((__clang_major__*100 + __clang_minor__) >= 1301) +#pragma clang diagnostic ignored "-Wdeprecated-copy" +#endif #include "opencv2/ts/ts_gtest.h" #if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 //#pragma GCC diagnostic pop From f8f425e34c47dc2a95c3082d56d073d0a81295aa Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 3 Feb 2023 18:02:45 +0800 Subject: [PATCH 010/199] Change custom_hal.hpp output location --- CMakeLists.txt | 2 +- modules/core/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a620c94af..cbf43a1605 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -912,7 +912,7 @@ foreach(hal ${OpenCV_HAL}) endif() endif() endforeach() -configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${CMAKE_BINARY_DIR}/custom_hal.hpp" @ONLY) +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/custom_hal.hpp.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/custom_hal.hpp" @ONLY) unset(_hal_includes) diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index fe747540e8..517b0f31a5 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -179,7 +179,7 @@ ocv_install_3rdparty_licenses(SoftFloat "${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/So # generate data (samples data) config file -set(OPENCV_DATA_CONFIG_FILE "${CMAKE_BINARY_DIR}/opencv_data_config.hpp") +set(OPENCV_DATA_CONFIG_FILE "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv_data_config.hpp") set(OPENCV_DATA_CONFIG_STR "") if(CMAKE_INSTALL_PREFIX) From b0aace31ecbb09b9976dc513bbe60b6e1561f3e6 Mon Sep 17 00:00:00 2001 From: keith siilats Date: Sun, 5 Feb 2023 19:39:25 -0500 Subject: [PATCH 011/199] Update charuco_detector.cpp Delete the debug print statements accidentally left in --- modules/objdetect/src/aruco/charuco_detector.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/objdetect/src/aruco/charuco_detector.cpp b/modules/objdetect/src/aruco/charuco_detector.cpp index 3955a8f236..b5ca70a274 100644 --- a/modules/objdetect/src/aruco/charuco_detector.cpp +++ b/modules/objdetect/src/aruco/charuco_detector.cpp @@ -129,25 +129,25 @@ struct CharucoDetector::CharucoDetectorImpl { // approximated pose estimation using marker corners Mat approximatedRvec, approximatedTvec; Mat objPoints, imgPoints; // object and image points for the solvePnP function - printf("before board.matchImagePoints(markerCorners, markerIds, objPoints, imgPoints);\n"); + // printf("before board.matchImagePoints(markerCorners, markerIds, objPoints, imgPoints);\n"); board.matchImagePoints(markerCorners, markerIds, objPoints, imgPoints); - printf("after board.matchImagePoints(markerCorners, markerIds, objPoints, imgPoints);\n"); + // printf("after board.matchImagePoints(markerCorners, markerIds, objPoints, imgPoints);\n"); if (objPoints.total() < 4ull) // need, at least, 4 corners return; solvePnP(objPoints, imgPoints, charucoParameters.cameraMatrix, charucoParameters.distCoeffs, approximatedRvec, approximatedTvec); - printf("after solvePnP\n"); + // printf("after solvePnP\n"); // project chessboard corners vector allChessboardImgPoints; projectPoints(board.getChessboardCorners(), approximatedRvec, approximatedTvec, charucoParameters.cameraMatrix, charucoParameters.distCoeffs, allChessboardImgPoints); - printf("after projectPoints\n"); + // printf("after projectPoints\n"); // calculate maximum window sizes for subpixel refinement. The size is limited by the distance // to the closes marker corner to avoid erroneous displacements to marker corners vector subPixWinSizes = getMaximumSubPixWindowSizes(markerCorners, markerIds, allChessboardImgPoints); // filter corners outside the image and subpixel-refine charuco corners - printf("before selectAndRefineChessboardCorners\n"); + // printf("before selectAndRefineChessboardCorners\n"); selectAndRefineChessboardCorners(allChessboardImgPoints, image, charucoCorners, charucoIds, subPixWinSizes); } From 3d635cb4a7fa5e6a5190768092d45d3bb685e45d Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 2 Feb 2023 13:57:20 +0300 Subject: [PATCH 012/199] Warning supression fix for XCode 13.1 and newer. Backport #23203 --- 3rdparty/libpng/CMakeLists.txt | 5 +++++ modules/calib3d/test/test_cameracalibration.cpp | 9 --------- modules/flann/include/opencv2/flann/index_testing.h | 2 -- modules/ml/src/kdtree.cpp | 3 --- modules/ts/include/opencv2/ts.hpp | 3 +++ modules/video/test/test_optflowpyrlk.cpp | 3 +-- 6 files changed, 9 insertions(+), 16 deletions(-) diff --git a/3rdparty/libpng/CMakeLists.txt b/3rdparty/libpng/CMakeLists.txt index f72b966079..921391eadf 100644 --- a/3rdparty/libpng/CMakeLists.txt +++ b/3rdparty/libpng/CMakeLists.txt @@ -66,6 +66,11 @@ if(PPC64LE OR PPC64) endif() endif() +if(APPLE AND CV_CLANG AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.1) + ocv_warnings_disable(CMAKE_C_FLAGS -Wnull-pointer-subtraction) + ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-but-set-variable) +endif() + # ---------------------------------------------------------------------------------- # Define the library target: # ---------------------------------------------------------------------------------- diff --git a/modules/calib3d/test/test_cameracalibration.cpp b/modules/calib3d/test/test_cameracalibration.cpp index a63c131a0f..407b9004b1 100644 --- a/modules/calib3d/test/test_cameracalibration.cpp +++ b/modules/calib3d/test/test_cameracalibration.cpp @@ -559,12 +559,9 @@ void CV_CameraCalibrationTest::run( int start_from ) i = 0; double dx,dy; double rx,ry; - double meanDx,meanDy; double maxDx = 0.0; double maxDy = 0.0; - meanDx = 0; - meanDy = 0; for( currImage = 0; currImage < numImages; currImage++ ) { double imageMeanDx = 0; @@ -576,9 +573,6 @@ void CV_CameraCalibrationTest::run( int start_from ) dx = rx - imagePoints[i].x; dy = ry - imagePoints[i].y; - meanDx += dx; - meanDy += dy; - imageMeanDx += dx*dx; imageMeanDy += dy*dy; @@ -601,9 +595,6 @@ void CV_CameraCalibrationTest::run( int start_from ) perViewErrors[currImage] = goodPerViewErrors[currImage]; } - meanDx /= numImages * etalonSize.width * etalonSize.height; - meanDy /= numImages * etalonSize.width * etalonSize.height; - /* ========= Compare parameters ========= */ /* ----- Compare focal lengths ----- */ diff --git a/modules/flann/include/opencv2/flann/index_testing.h b/modules/flann/include/opencv2/flann/index_testing.h index 207adef449..4c00143326 100644 --- a/modules/flann/include/opencv2/flann/index_testing.h +++ b/modules/flann/include/opencv2/flann/index_testing.h @@ -246,7 +246,6 @@ void test_index_precisions(NNIndex& index, const Matrix& index, const Matrix 0)&&(time > maxTime)&&(p2 middle; k-- ) { CV_Assert(vals[ofs[k]] >= pivot); - more += vals[ofs[k]] > pivot; } return vals[ofs[middle]]; diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index 5364143d15..3777c8eb2b 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -122,6 +122,9 @@ //#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-override" #endif +#if defined(__OPENCV_BUILD) && defined(__APPLE__) && defined(__clang__) && ((__clang_major__*100 + __clang_minor__) >= 1301) +#pragma clang diagnostic ignored "-Wdeprecated-copy" +#endif #include "opencv2/ts/ts_gtest.h" #if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 //#pragma GCC diagnostic pop diff --git a/modules/video/test/test_optflowpyrlk.cpp b/modules/video/test/test_optflowpyrlk.cpp index 1f08270ec7..a7e9a2d3a5 100644 --- a/modules/video/test/test_optflowpyrlk.cpp +++ b/modules/video/test/test_optflowpyrlk.cpp @@ -65,7 +65,7 @@ void CV_OptFlowPyrLKTest::run( int ) const int bad_points_max = 8; /* test parameters */ - double max_err = 0., sum_err = 0; + double max_err = 0.; int pt_cmpd = 0; int pt_exceed = 0; int merr_i = 0, merr_j = 0, merr_k = 0, merr_nan = 0; @@ -175,7 +175,6 @@ void CV_OptFlowPyrLKTest::run( int ) } pt_exceed += err > success_error_level; - sum_err += err; pt_cmpd++; } else From b07031b59415e38ad12e0f8aeee27531e7a36c07 Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Mon, 6 Feb 2023 16:41:20 +0300 Subject: [PATCH 013/199] feat: named arguments handling in Python interface --- .../include/opencv2/core/bindings_utils.hpp | 27 ++ modules/core/include/opencv2/core/cvdef.h | 1 + modules/python/src2/gen2.py | 245 +++++++++++++----- modules/python/src2/hdr_parser.py | 14 +- modules/python/test/test_misc.py | 23 ++ 5 files changed, 250 insertions(+), 60 deletions(-) diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index 001d91c381..64f346570a 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -243,6 +243,33 @@ struct CV_EXPORTS_W_SIMPLE ClassWithKeywordProperties { } }; +struct CV_EXPORTS_W_PARAMS FunctionParams +{ + CV_PROP_RW int lambda = -1; + CV_PROP_RW float sigma = 0.0f; + + FunctionParams& setLambda(int value) CV_NOEXCEPT + { + lambda = value; + return *this; + } + + FunctionParams& setSigma(float value) CV_NOEXCEPT + { + sigma = value; + return *this; + } +}; + +CV_WRAP static inline String +copyMatAndDumpNamedArguments(InputArray src, OutputArray dst, + const FunctionParams& params = FunctionParams()) +{ + src.copyTo(dst); + return format("lambda=%d, sigma=%.1f", params.lambda, + params.sigma); +} + namespace nested { CV_WRAP static inline bool testEchoBooleanFunction(bool flag) { return flag; diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 957e7bcd1d..8307ca7d1c 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -459,6 +459,7 @@ Cv64suf; #define CV_EXPORTS_W_SIMPLE CV_EXPORTS #define CV_EXPORTS_AS(synonym) CV_EXPORTS #define CV_EXPORTS_W_MAP CV_EXPORTS +#define CV_EXPORTS_W_PARAMS CV_EXPORTS #define CV_IN_OUT #define CV_OUT #define CV_PROP diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index d7a54910ba..92629c0e7a 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -241,6 +241,7 @@ class ClassProp(object): def __init__(self, decl): self.tp = decl[0].replace("*", "_ptr") self.name = decl[1] + self.default_value = decl[2] self.readonly = True if "/RW" in decl[3]: self.readonly = False @@ -268,6 +269,7 @@ class ClassInfo(object): self.cname = name.replace(".", "::") self.ismap = False + self.is_parameters = False self.issimple = False self.isalgorithm = False self.methods = {} @@ -300,6 +302,9 @@ class ClassInfo(object): self.ismap = True elif m == "/Simple": self.issimple = True + elif m == "/Params": + self.is_parameters = True + self.issimple = True self.props = [ClassProp(p) for p in decl[3]] if not self.has_export_alias and self.original_name.startswith("Cv"): @@ -421,39 +426,55 @@ def handle_ptr(tp): class ArgInfo(object): - def __init__(self, arg_tuple): - self.tp = handle_ptr(arg_tuple[0]) - self.name = arg_tuple[1] - if self.name in python_reserved_keywords: - self.name += "_" - self.defval = arg_tuple[2] + def __init__(self, atype, name, default_value, modifiers=(), + enclosing_arg=None): + # type: (ArgInfo, str, str, str, tuple[str, ...], ArgInfo | None) -> None + self.tp = handle_ptr(atype) + self.name = name + self.defval = default_value + self._modifiers = tuple(modifiers) self.isarray = False self.is_smart_ptr = self.tp.startswith('Ptr<') # FIXIT: handle through modifiers - need to modify parser self.arraylen = 0 self.arraycvt = None - self.inputarg = True - self.outputarg = False - self.returnarg = False - self.isrvalueref = False - for m in arg_tuple[3]: - if m == "/O": - self.inputarg = False - self.outputarg = True - self.returnarg = True - elif m == "/IO": - self.inputarg = True - self.outputarg = True - self.returnarg = True - elif m.startswith("/A"): + for m in self._modifiers: + if m.startswith("/A"): self.isarray = True self.arraylen = m[2:].strip() elif m.startswith("/CA"): self.isarray = True self.arraycvt = m[2:].strip() - elif m == "/RRef": - self.isrvalueref = True self.py_inputarg = False self.py_outputarg = False + self.enclosing_arg = enclosing_arg + + @property + def export_name(self): + if self.name in python_reserved_keywords: + return self.name + '_' + return self.name + + @property + def inputarg(self): + return '/O' not in self._modifiers + + @property + def outputarg(self): + return '/O' in self._modifiers or '/IO' in self._modifiers + + @property + def returnarg(self): + return self.outputarg + + @property + def isrvalueref(self): + return '/RRef' in self._modifiers + + @property + def full_name(self): + if self.enclosing_arg is None: + return self.name + return self.enclosing_arg.name + '.' + self.name def isbig(self): return self.tp in ["Mat", "vector_Mat", "cuda::GpuMat", "GpuMat", "vector_GpuMat", "UMat", "vector_UMat"] # or self.tp.startswith("vector") @@ -462,9 +483,62 @@ class ArgInfo(object): return "ArgInfo(\"%s\", %d)" % (self.name, self.outputarg) +def find_argument_class_info(argument_type, function_namespace, + function_class_name, known_classes): + # type: (str, str, str, dict[str, ClassInfo]) -> ClassInfo | None + """Tries to find corresponding class info for the provided argument type + + Args: + argument_type (str): Function argument type + function_namespace (str): Namespace of the function declaration + function_class_name (str): Name of the class if function is a method of class + known_classes (dict[str, ClassInfo]): Mapping between string class + identifier and ClassInfo struct. + + Returns: + Optional[ClassInfo]: class info struct if the provided argument type + refers to a known C++ class, None otherwise. + """ + + possible_classes = tuple(filter(lambda cls: cls.endswith(argument_type), known_classes)) + # If argument type is not a known class - just skip it + if not possible_classes: + return None + if len(possible_classes) == 1: + return known_classes[possible_classes[0]] + + # If there is more than 1 matched class, try to select the most probable one + # Look for a matched class name in different scope, starting from the + # narrowest one + + # First try to find argument inside class scope of the function (if any) + if function_class_name: + type_to_match = function_class_name + '_' + argument_type + if type_to_match in possible_classes: + return known_classes[type_to_match] + else: + type_to_match = argument_type + + # Trying to find argument type in the namespace of the function + type_to_match = '{}_{}'.format( + function_namespace.lstrip('cv.').replace('.', '_'), type_to_match + ) + if type_to_match in possible_classes: + return known_classes[type_to_match] + + # Try to find argument name as is + if argument_type in possible_classes: + return known_classes[argument_type] + + # NOTE: parser is broken - some classes might not be visible, depending on + # the order of parsed headers. + # print("[WARNING] Can't select an appropriate class for argument: '", + # argument_type, "'. Possible matches: '", possible_classes, "'") + return None + + class FuncVariant(object): - def __init__(self, classname, name, decl, isconstructor, isphantom=False): - self.classname = classname + def __init__(self, namespace, classname, name, decl, isconstructor, known_classes, isphantom=False): self.name = self.wname = name self.isconstructor = isconstructor self.isphantom = isphantom @@ -476,8 +550,14 @@ class FuncVariant(object): self.rettype = "" self.args = [] self.array_counters = {} - for a in decl[3]: - ainfo = ArgInfo(a) + for arg_decl in decl[3]: + assert len(arg_decl) == 4, \ + 'ArgInfo contract is violated. Arg declaration should contain:' \ + '"arg_type", "name", "default_value", "modifiers". '\ + 'Got tuple: {}'.format(arg_decl) + + ainfo = ArgInfo(atype=arg_decl[0], name=arg_decl[1], + default_value=arg_decl[2], modifiers=arg_decl[3]) if ainfo.isarray and not ainfo.arraycvt: c = ainfo.arraylen c_arrlist = self.array_counters.get(c, []) @@ -486,9 +566,9 @@ class FuncVariant(object): else: self.array_counters[c] = [ainfo.name] self.args.append(ainfo) - self.init_pyproto() + self.init_pyproto(namespace, classname, known_classes) - def init_pyproto(self): + def init_pyproto(self, namespace, classname, known_classes): # string representation of argument list, with '[', ']' symbols denoting optional arguments, e.g. # "src1, src2[, dst[, mask]]" for cv.add argstr = "" @@ -510,12 +590,44 @@ class FuncVariant(object): outlist = [] firstoptarg = 1000000 - argno = -1 - for a in self.args: - argno += 1 + + # Check if there is params structure in arguments + arguments = [] + for arg in self.args: + arg_class_info = find_argument_class_info( + arg.tp, namespace, classname, known_classes + ) + # If argument refers to the 'named arguments' structure - instead of + # the argument put its properties + if arg_class_info is not None and arg_class_info.is_parameters: + for prop in arg_class_info.props: + # Convert property to ArgIfno and mark that argument is + # a part of the parameters structure: + arguments.append( + ArgInfo(prop.tp, prop.name, prop.default_value, + enclosing_arg=arg) + ) + else: + arguments.append(arg) + # Prevent names duplication after named arguments are merged + # to the main arguments list + argument_names = tuple(arg.name for arg in arguments) + assert len(set(argument_names)) == len(argument_names), \ + "Duplicate arguments with names '{}' in function '{}'. "\ + "Please, check named arguments used in function interface".format( + argument_names, self.name + ) + + self.args = arguments + + for argno, a in enumerate(self.args): if a.name in self.array_counters: continue - assert not a.tp in forbidden_arg_types, 'Forbidden type "{}" for argument "{}" in "{}" ("{}")'.format(a.tp, a.name, self.name, self.classname) + assert a.tp not in forbidden_arg_types, \ + 'Forbidden type "{}" for argument "{}" in "{}" ("{}")'.format( + a.tp, a.name, self.name, self.classname + ) + if a.tp in ignored_arg_types: continue if a.returnarg: @@ -542,7 +654,7 @@ class FuncVariant(object): firstoptarg = min(firstoptarg, len(arglist)) noptargs = len(arglist) - firstoptarg - argnamelist = [aname for aname, argno in arglist] + argnamelist = [self.args[argno].export_name for _, argno in arglist] argstr = ", ".join(argnamelist[:firstoptarg]) argstr = "[, ".join([argstr] + argnamelist[firstoptarg:]) argstr += "]" * noptargs @@ -552,9 +664,8 @@ class FuncVariant(object): assert outlist == [] outlist = [("self", -1)] if self.isconstructor: - classname = self.classname if classname.startswith("Cv"): - classname=classname[2:] + classname = classname[2:] outstr = "<%s object>" % (classname,) elif outlist: outstr = ", ".join([o[0] for o in outlist]) @@ -566,9 +677,9 @@ class FuncVariant(object): self.py_prototype = "%s(%s) -> %s" % (self.wname, argstr, outstr) self.py_noptargs = noptargs self.py_arglist = arglist - for aname, argno in arglist: + for _, argno in arglist: self.args[argno].py_inputarg = True - for aname, argno in outlist: + for _, argno in outlist: if argno >= 0: self.args[argno].py_outputarg = True self.py_outlist = outlist @@ -584,8 +695,11 @@ class FuncInfo(object): self.is_static = is_static self.variants = [] - def add_variant(self, decl, isphantom=False): - self.variants.append(FuncVariant(self.classname, self.name, decl, self.isconstructor, isphantom)) + def add_variant(self, decl, known_classes, isphantom=False): + self.variants.append( + FuncVariant(self.namespace, self.classname, self.name, decl, + self.isconstructor, known_classes, isphantom) + ) def get_wrapper_name(self): name = self.name @@ -698,6 +812,7 @@ class FuncInfo(object): # add necessary conversions from Python objects to code_cvt_list, # form the function/method call, # for the list of type mappings + instantiated_args = set() for a in v.args: if a.tp in ignored_arg_types: defval = a.defval @@ -738,17 +853,29 @@ class FuncInfo(object): arg_type_info = ArgTypeInfo(tp, FormatStrings.object, defval0, True) parse_name = a.name - if a.py_inputarg: - if arg_type_info.strict_conversion: - code_decl += " PyObject* pyobj_%s = NULL;\n" % (a.name,) - parse_name = "pyobj_" + a.name - if a.tp == 'char': - code_cvt_list.append("convert_to_char(pyobj_%s, &%s, %s)" % (a.name, a.name, a.crepr())) - else: - code_cvt_list.append("pyopencv_to_safe(pyobj_%s, %s, %s)" % (a.name, a.name, a.crepr())) + if a.py_inputarg and arg_type_info.strict_conversion: + parse_name = "pyobj_" + a.full_name.replace('.', '_') + code_decl += " PyObject* %s = NULL;\n" % (parse_name,) + if a.tp == 'char': + code_cvt_list.append("convert_to_char(%s, &%s, %s)" % (parse_name, a.full_name, a.crepr())) + else: + code_cvt_list.append("pyopencv_to_safe(%s, %s, %s)" % (parse_name, a.full_name, a.crepr())) all_cargs.append([arg_type_info, parse_name]) + # Argument is actually a part of the named arguments structure, + # but it is possible to mimic further processing like it is normal arg + if a.enclosing_arg: + a = a.enclosing_arg + arg_type_info = ArgTypeInfo(a.tp, FormatStrings.object, + default_value=a.defval, + strict_conversion=True) + # Skip further actions if enclosing argument is already instantiated + # by its another field + if a.name in instantiated_args: + continue + instantiated_args.add(a.name) + defval = a.defval if not defval: defval = arg_type_info.default_value @@ -773,9 +900,9 @@ class FuncInfo(object): code_args += ", " if a.isrvalueref: - a.name = 'std::move(' + a.name + ')' - - code_args += amp + a.name + code_args += amp + 'std::move(' + a.name + ')' + else: + code_args += amp + a.name code_args += ")" @@ -821,7 +948,7 @@ class FuncInfo(object): # form the format spec for PyArg_ParseTupleAndKeywords fmtspec = "".join([ get_type_format_string(all_cargs[argno][0]) - for aname, argno in v.py_arglist + for _, argno in v.py_arglist ]) if v.py_noptargs > 0: fmtspec = fmtspec[:-v.py_noptargs] + "|" + fmtspec[-v.py_noptargs:] @@ -832,10 +959,10 @@ class FuncInfo(object): # - calls PyArg_ParseTupleAndKeywords # - converts complex arguments from PyObject's to native OpenCV types code_parse = gen_template_parse_args.substitute( - kw_list = ", ".join(['"' + aname + '"' for aname, argno in v.py_arglist]), - fmtspec = fmtspec, - parse_arglist = ", ".join(["&" + all_cargs[argno][1] for aname, argno in v.py_arglist]), - code_cvt = " &&\n ".join(code_cvt_list)) + kw_list=", ".join(['"' + v.args[argno].export_name + '"' for _, argno in v.py_arglist]), + fmtspec=fmtspec, + parse_arglist=", ".join(["&" + all_cargs[argno][1] for _, argno in v.py_arglist]), + code_cvt=" &&\n ".join(code_cvt_list)) else: code_parse = "if(PyObject_Size(py_args) == 0 && (!kw || PyObject_Size(kw) == 0))" @@ -1036,7 +1163,7 @@ class PythonWrapperGenerator(object): # Add it as a method to the class func_map = self.classes[classname].methods func = func_map.setdefault(name, FuncInfo(classname, name, cname, isconstructor, namespace_str, is_static)) - func.add_variant(decl, isphantom) + func.add_variant(decl, self.classes, isphantom) # Add it as global function g_name = "_".join(classes+[name]) @@ -1053,10 +1180,10 @@ class PythonWrapperGenerator(object): func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs # Exports static function with internal name (backward compatibility) func = func_map.setdefault(g_name, FuncInfo("", g_name, cname, isconstructor, namespace_str, False)) - func.add_variant(decl, isphantom) + func.add_variant(decl, self.classes, isphantom) if g_wname != g_name: # TODO OpenCV 5.0 wfunc = func_map.setdefault(g_wname, FuncInfo("", g_wname, cname, isconstructor, namespace_str, False)) - wfunc.add_variant(decl, isphantom) + wfunc.add_variant(decl, self.classes, isphantom) else: if classname and not isconstructor: if not isphantom: @@ -1066,7 +1193,7 @@ class PythonWrapperGenerator(object): func_map = self.namespaces.setdefault(namespace_str, Namespace()).funcs func = func_map.setdefault(name, FuncInfo(classname, name, cname, isconstructor, namespace_str, is_static)) - func.add_variant(decl, isphantom) + func.add_variant(decl, self.classes, isphantom) if classname and isconstructor: self.classes[classname].constructor = func diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index ebe13f05c7..f5df3e2aab 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -259,6 +259,10 @@ class CppHeaderParser(object): if "CV_EXPORTS_W_SIMPLE" in l: l = l.replace("CV_EXPORTS_W_SIMPLE", "") modlist.append("/Simple") + if "CV_EXPORTS_W_PARAMS" in l: + l = l.replace("CV_EXPORTS_W_PARAMS", "") + modlist.append("/Map") + modlist.append("/Params") npos = l.find("CV_EXPORTS_AS") if npos < 0: npos = l.find('CV_WRAP_AS') @@ -776,7 +780,15 @@ class CppHeaderParser(object): var_list = [var_name1] + [i.strip() for i in var_list[1:]] for v in var_list: - class_decl[3].append([var_type, v, "", var_modlist]) + prop_definition = v.split('=') + prop_name = prop_definition[0].strip() + if len(prop_definition) == 1: + # default value is not provided + prop_default_value = '' + else: + prop_default_value = prop_definition[-1] + class_decl[3].append([var_type, prop_name, prop_default_value, + var_modlist]) return stmt_type, "", False, None # something unknown diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 765201e973..0e5f5bc018 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -738,6 +738,29 @@ class Arguments(NewOpenCVTests): ) ) + def test_named_arguments_without_parameters(self): + src = np.ones((5, 5, 3), dtype=np.uint8) + arguments_dump, src_copy = cv.utils.copyMatAndDumpNamedArguments(src) + np.testing.assert_equal(src, src_copy) + self.assertEqual(arguments_dump, 'lambda=-1, sigma=0.0') + + def test_named_arguments_without_output_argument(self): + src = np.zeros((2, 2, 3), dtype=np.uint8) + arguments_dump, src_copy = cv.utils.copyMatAndDumpNamedArguments( + src, lambda_=15, sigma=3.5 + ) + np.testing.assert_equal(src, src_copy) + self.assertEqual(arguments_dump, 'lambda=15, sigma=3.5') + + def test_named_arguments_with_output_argument(self): + src = np.zeros((3, 3, 3), dtype=np.uint8) + dst = np.ones_like(src) + arguments_dump, src_copy = cv.utils.copyMatAndDumpNamedArguments( + src, dst, lambda_=25, sigma=5.5 + ) + np.testing.assert_equal(src, src_copy) + np.testing.assert_equal(dst, src_copy) + self.assertEqual(arguments_dump, 'lambda=25, sigma=5.5') class CanUsePurePythonModuleFunction(NewOpenCVTests): From e4acd74e875b133ce4845de504da540cb17b1788 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 6 Feb 2023 22:17:50 +0300 Subject: [PATCH 014/199] Fix some clang 14 warnings --- 3rdparty/ippicv/CMakeLists.txt | 2 +- modules/core/include/opencv2/core/private.hpp | 7 +++++++ modules/gapi/misc/python/pyopencv_gapi.hpp | 2 -- .../test/internal/gapi_int_executor_tests.cpp | 2 +- .../test/streaming/gapi_streaming_tests.cpp | 8 ++++---- modules/imgproc/test/test_filter.cpp | 20 +++++++++---------- modules/ts/include/opencv2/ts.hpp | 10 ++++++---- modules/ts/include/opencv2/ts/ts_ext.hpp | 16 +++++++-------- modules/ts/include/opencv2/ts/ts_perf.hpp | 6 +++--- modules/ts/src/ts_perf.cpp | 5 ++--- .../obsensor_uvc_stream_channel.cpp | 2 +- .../obsensor_uvc_stream_channel.hpp | 4 ++-- samples/cpp/train_svmsgd.cpp | 2 +- samples/cpp/warpPerspective_demo.cpp | 2 +- 14 files changed, 47 insertions(+), 41 deletions(-) diff --git a/3rdparty/ippicv/CMakeLists.txt b/3rdparty/ippicv/CMakeLists.txt index 43ad806dd7..4ef248f3c0 100644 --- a/3rdparty/ippicv/CMakeLists.txt +++ b/3rdparty/ippicv/CMakeLists.txt @@ -24,7 +24,7 @@ if(UNIX) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-missing-braces -Wno-missing-field-initializers") endif() if(CV_CLANG) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-self-assign -Wno-strict-prototypes") endif() endif() diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 146d37f009..02ac1ba1e3 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -235,6 +235,10 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); } #include "ipp.h" #endif #ifdef HAVE_IPP_IW +# if defined(__OPENCV_BUILD) && defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wstrict-prototypes" +# endif # if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wsuggest-override" @@ -246,6 +250,9 @@ T* allocSingletonNew() { return new(allocSingletonNewBuffer(sizeof(T))) T(); } # if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 # pragma GCC diagnostic pop # endif +# if defined(__OPENCV_BUILD) && defined(__clang__) +# pragma clang diagnostic pop +# endif #endif #if IPP_VERSION_X100 >= 201700 diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 49b2ddd1eb..07bacd3665 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -664,7 +664,6 @@ static cv::GRunArgs run_py_kernel(cv::detail::PyObjectHolder kernel, cv::GRunArgs outs; try { - int in_idx = 0; // NB: Doesn't increase reference counter (false), // because PyObject already have ownership. // In case exception decrement reference counter. @@ -697,7 +696,6 @@ static cv::GRunArgs run_py_kernel(cv::detail::PyObjectHolder kernel, util::throw_error(std::logic_error("GFrame isn't supported for custom operation")); break; } - ++in_idx; } if (ctx.m_state.has_value()) diff --git a/modules/gapi/test/internal/gapi_int_executor_tests.cpp b/modules/gapi/test/internal/gapi_int_executor_tests.cpp index b8f0e18e0b..79117aebf3 100644 --- a/modules/gapi/test/internal/gapi_int_executor_tests.cpp +++ b/modules/gapi/test/internal/gapi_int_executor_tests.cpp @@ -27,7 +27,7 @@ class GMockExecutable final: public cv::gimpl::GIslandExecutable m_priv->m_reshape_counter++; } virtual void handleNewStream() override { } - virtual void run(std::vector&&, std::vector&&) { } + virtual void run(std::vector&&, std::vector&&) override { } virtual bool allocatesOutputs() const override { return true; diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp index c27ebe3ca2..bdb0ae9cd9 100644 --- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp @@ -190,7 +190,7 @@ public: : cv::gapi::wip::GCaptureSource(pipeline) { } - bool pull(cv::gapi::wip::Data& data) { + bool pull(cv::gapi::wip::Data& data) override { if (cv::gapi::wip::GCaptureSource::pull(data)) { data = cv::MediaFrame::Create(cv::util::get(data)); return true; @@ -232,7 +232,7 @@ public: : cv::gapi::wip::GCaptureSource(pipeline) { } - bool pull(cv::gapi::wip::Data& data) { + bool pull(cv::gapi::wip::Data& data) override { if (cv::gapi::wip::GCaptureSource::pull(data)) { cv::Mat bgr = cv::util::get(data); cv::Mat y, uv; @@ -256,7 +256,7 @@ public: : cv::gapi::wip::GCaptureSource(pipeline) { } - bool pull(cv::gapi::wip::Data& data) { + bool pull(cv::gapi::wip::Data& data) override { if (cv::gapi::wip::GCaptureSource::pull(data)) { cv::Mat bgr = cv::util::get(data); cv::Mat gray; @@ -319,7 +319,7 @@ public: return "InvalidSource sucessfuly failed!"; } - bool pull(cv::gapi::wip::Data& d) { + bool pull(cv::gapi::wip::Data& d) override { ++m_curr_frame_id; if (m_curr_frame_id > m_num_frames) { return false; diff --git a/modules/imgproc/test/test_filter.cpp b/modules/imgproc/test/test_filter.cpp index ad8bac5a47..02d5e232a2 100644 --- a/modules/imgproc/test/test_filter.cpp +++ b/modules/imgproc/test/test_filter.cpp @@ -49,10 +49,10 @@ public: CV_FilterBaseTest( bool _fp_kernel ); protected: - int prepare_test_case( int test_case_idx ); - int read_params( const cv::FileStorage& fs ); - void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ); - void get_minmax_bounds( int i, int j, int type, Scalar& low, Scalar& high ); + int prepare_test_case( int test_case_idx ) CV_OVERRIDE; + int read_params( const cv::FileStorage& fs ) CV_OVERRIDE; + void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ) CV_OVERRIDE; + void get_minmax_bounds( int i, int j, int type, Scalar& low, Scalar& high ) CV_OVERRIDE; Size aperture_size; Point anchor; int max_aperture_size; @@ -689,8 +689,8 @@ public: CV_SmoothBaseTest(); protected: - void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ); - double get_success_error_level( int test_case_idx, int i, int j ); + void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ) CV_OVERRIDE; + double get_success_error_level( int test_case_idx, int i, int j ) CV_OVERRIDE; const char* smooth_type; void dump_test_case(int test_case_idx, std::ostream* out) CV_OVERRIDE @@ -802,10 +802,10 @@ public: CV_GaussianBlurTest(); protected: - void prepare_to_validation( int test_case_idx ); - void run_func(); - void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ); - double get_success_error_level( int /*test_case_idx*/, int /*i*/, int /*j*/ ); + void prepare_to_validation( int test_case_idx ) CV_OVERRIDE; + void run_func() CV_OVERRIDE; + void get_test_array_types_and_sizes( int test_case_idx, vector >& sizes, vector >& types ) CV_OVERRIDE; + double get_success_error_level( int /*test_case_idx*/, int /*i*/, int /*j*/ ) CV_OVERRIDE; double sigma; int param1, param2; diff --git a/modules/ts/include/opencv2/ts.hpp b/modules/ts/include/opencv2/ts.hpp index eee0376a0e..86f2d07761 100644 --- a/modules/ts/include/opencv2/ts.hpp +++ b/modules/ts/include/opencv2/ts.hpp @@ -116,20 +116,22 @@ # endif #endif -#if defined(__OPENCV_BUILD) && defined(__clang__) -#pragma clang diagnostic ignored "-Winconsistent-missing-override" -#endif #if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 //#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsuggest-override" #endif -#if defined(__OPENCV_BUILD) && defined(__APPLE__) && defined(__clang__) && ((__clang_major__*100 + __clang_minor__) >= 1301) +#if defined(__OPENCV_BUILD) && defined(__clang__) && ((__clang_major__*100 + __clang_minor__) >= 1301) +#pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-copy" +#pragma clang diagnostic ignored "-Winconsistent-missing-override" #endif #include "opencv2/ts/ts_gtest.h" #if defined(__OPENCV_BUILD) && defined(__GNUC__) && __GNUC__ >= 5 //#pragma GCC diagnostic pop #endif +#if defined(__OPENCV_BUILD) && defined(__clang__) && ((__clang_major__*100 + __clang_minor__) >= 1301) +#pragma clang diagnostic pop +#endif #include "opencv2/ts/ts_ext.hpp" #ifndef GTEST_USES_SIMPLE_RE diff --git a/modules/ts/include/opencv2/ts/ts_ext.hpp b/modules/ts/include/opencv2/ts/ts_ext.hpp index 5c09b569a5..efa4860510 100644 --- a/modules/ts/include/opencv2/ts/ts_ext.hpp +++ b/modules/ts/include/opencv2/ts/ts_ext.hpp @@ -49,13 +49,13 @@ bool checkBigDataTests(); #undef TEST -#define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_IMPL) \ +#define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_ATTR, BODY_IMPL) \ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\ public:\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\ private:\ virtual void TestBody() CV_OVERRIDE;\ - virtual void bodyMethodName();\ + virtual void bodyMethodName() BODY_ATTR;\ static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ @@ -74,7 +74,7 @@ bool checkBigDataTests(); void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName() -#define TEST(test_case_name, test_name) TEST_(test_case_name, test_name, ::testing::Test, Body, CV__TEST_BODY_IMPL) +#define TEST(test_case_name, test_name) TEST_(test_case_name, test_name, ::testing::Test, Body,, CV__TEST_BODY_IMPL) #define CV__TEST_BIGDATA_BODY_IMPL(name) \ { \ @@ -96,9 +96,9 @@ bool checkBigDataTests(); // Special type of tests which require / use or validate processing of huge amount of data (>= 2Gb) #if defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__) -#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, test_name, ::testing::Test, Body, CV__TEST_BIGDATA_BODY_IMPL) +#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, test_name, ::testing::Test, Body,, CV__TEST_BIGDATA_BODY_IMPL) #else -#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, DISABLED_ ## test_name, ::testing::Test, Body, CV__TEST_BIGDATA_BODY_IMPL) +#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, DISABLED_ ## test_name, ::testing::Test, Body,, CV__TEST_BIGDATA_BODY_IMPL) #endif #undef TEST_F @@ -128,13 +128,13 @@ bool checkBigDataTests(); void GTEST_TEST_CLASS_NAME_(test_fixture, test_name)::Body() // Don't use directly -#define CV__TEST_P(test_case_name, test_name, bodyMethodName, BODY_IMPL/*(name_str)*/) \ +#define CV__TEST_P(test_case_name, test_name, bodyMethodName, BODY_ATTR, BODY_IMPL/*(name_str)*/) \ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ : public test_case_name { \ public: \ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \ private: \ - virtual void bodyMethodName(); \ + virtual void bodyMethodName() BODY_ATTR; \ virtual void TestBody() CV_OVERRIDE; \ static int AddToRegistry() { \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ @@ -160,7 +160,7 @@ bool checkBigDataTests(); void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName() #undef TEST_P -#define TEST_P(test_case_name, test_name) CV__TEST_P(test_case_name, test_name, Body, CV__TEST_BODY_IMPL) +#define TEST_P(test_case_name, test_name) CV__TEST_P(test_case_name, test_name, Body,, CV__TEST_BODY_IMPL) #define CV_TEST_EXPECT_EXCEPTION_MESSAGE(statement, msg) \ diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index 5ca22d2b1e..7aa3809f27 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -551,7 +551,7 @@ void PrintTo(const Size& sz, ::std::ostream* os); // EXPECT_TRUE(foo.StatusIsOK()); // } #define PERF_TEST(test_case_name, test_name)\ - TEST_(test_case_name, test_name, ::perf::TestBase, PerfTestBody, CV__PERF_TEST_BODY_IMPL) + TEST_(test_case_name, test_name, ::perf::TestBase, PerfTestBody, CV_OVERRIDE, CV__PERF_TEST_BODY_IMPL) // Defines a performance test that uses a test fixture. // @@ -595,7 +595,7 @@ void PrintTo(const Size& sz, ::std::ostream* os); // // @Note PERF_TEST_P() below violates behavior of original Google Tests - there is no tests instantiation in original TEST_P() // This macro is intended for usage with separate INSTANTIATE_TEST_CASE_P macro -#define PERF_TEST_P_(test_case_name, test_name) CV__TEST_P(test_case_name, test_name, PerfTestBody, CV__PERF_TEST_BODY_IMPL) +#define PERF_TEST_P_(test_case_name, test_name) CV__TEST_P(test_case_name, test_name, PerfTestBody, CV_OVERRIDE, CV__PERF_TEST_BODY_IMPL) // Defines a parametrized performance test. // @@ -628,7 +628,7 @@ void PrintTo(const Size& sz, ::std::ostream* os); protected:\ virtual void PerfTestBody();\ };\ - CV__TEST_P(fixture##_##name, name, PerfTestBodyDummy, CV__PERF_TEST_BODY_IMPL){} \ + CV__TEST_P(fixture##_##name, name, PerfTestBodyDummy,, CV__PERF_TEST_BODY_IMPL){} \ INSTANTIATE_TEST_CASE_P(/*none*/, fixture##_##name, params);\ void fixture##_##name::PerfTestBody() diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 958a2e300d..39147228b8 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -2104,8 +2104,6 @@ struct KeypointComparator { return cmp(pts_[idx1], pts_[idx2]); } -private: - KeypointComparator& operator=(const KeypointComparator&) = delete; }; }//namespace @@ -2119,7 +2117,8 @@ void perf::sort(std::vector& pts, cv::InputOutputArray descriptors for (int i = 0; i < desc.rows; ++i) idxs[i] = i; - std::sort(idxs.data(), idxs.data() + desc.rows, KeypointComparator(pts)); + comparators::KeypointGreater cmp; + std::sort(idxs.data(), idxs.data() + desc.rows, [&](int lhs, int rhs){ return cmp(pts[lhs], pts[rhs]); }); std::vector spts(pts.size()); cv::Mat sdesc(desc.size(), desc.type()); diff --git a/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp b/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp index 1bcb6ddf76..35d2a7483f 100644 --- a/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp +++ b/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp @@ -198,7 +198,7 @@ DepthFrameUnpacker::DepthFrameUnpacker(){ outputDataBuf_ = new uint8_t[OUT_DATA_SIZE]; } -DepthFrameUnpacker::~DepthFrameUnpacker(){ +DepthFrameUnpacker::~DepthFrameUnpacker() { delete[] outputDataBuf_; } diff --git a/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.hpp b/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.hpp index 02f4040101..caff38efea 100644 --- a/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.hpp +++ b/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.hpp @@ -67,7 +67,7 @@ public: class DepthFrameProcessor: public IFrameProcessor { public: DepthFrameProcessor(const OBExtensionParam& parma); - virtual ~DepthFrameProcessor() noexcept; + virtual ~DepthFrameProcessor(); virtual void process(Frame* frame) override; private: @@ -78,7 +78,7 @@ private: class DepthFrameUnpacker: public IFrameProcessor { public: DepthFrameUnpacker(); - virtual ~DepthFrameUnpacker() noexcept; + virtual ~DepthFrameUnpacker(); virtual void process(Frame* frame) override; private: const uint32_t OUT_DATA_SIZE = 1280*800*2; diff --git a/samples/cpp/train_svmsgd.cpp b/samples/cpp/train_svmsgd.cpp index bfd837d507..12e0384081 100644 --- a/samples/cpp/train_svmsgd.cpp +++ b/samples/cpp/train_svmsgd.cpp @@ -29,7 +29,7 @@ struct Data bool doTrain(const Mat samples, const Mat responses, Mat &weights, float &shift); //function finds two points for drawing line (wx = 0) -bool findPointsForLine(const Mat &weights, float shift, Point points[], int width, int height); +bool findPointsForLine(const Mat &weights, float shift, Point points[2], int width, int height); // function finds cross point of line (wx = 0) and segment ( (y = HEIGHT, 0 <= x <= WIDTH) or (x = WIDTH, 0 <= y <= HEIGHT) ) bool findCrossPointWithBorders(const Mat &weights, float shift, const std::pair &segment, Point &crossPoint); diff --git a/samples/cpp/warpPerspective_demo.cpp b/samples/cpp/warpPerspective_demo.cpp index 947abd4359..1a5bb07d87 100644 --- a/samples/cpp/warpPerspective_demo.cpp +++ b/samples/cpp/warpPerspective_demo.cpp @@ -157,7 +157,7 @@ static void onMouse(int event, int x, int y, int, void*) { for (int i = 0; i < 4; ++i) { - if ((event == EVENT_LBUTTONDOWN) & ((abs(roi_corners[i].x - x) < 10)) & (abs(roi_corners[i].y - y) < 10)) + if ((event == EVENT_LBUTTONDOWN) && ((abs(roi_corners[i].x - x) < 10)) && (abs(roi_corners[i].y - y) < 10)) { selected_corner_index = i; dragging = true; From 6ea22535e23837ec55842c77e579b41cf9307822 Mon Sep 17 00:00:00 2001 From: CSBVision Date: Wed, 1 Feb 2023 08:49:38 +0100 Subject: [PATCH 015/199] Fixes #23187 (part 1) Supports delay-loading for Ninja generators and Python bindings. --- cmake/OpenCVDetectCUDA.cmake | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index 140244ff54..4a562bdaf9 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -362,7 +362,7 @@ if(CUDA_FOUND) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math) endif() - OCV_OPTION(CUDA_ENABLE_DELAYLOAD "Enable delayed loading of CUDA DLLs" OFF VISIBLE_IF MSVC AND (CMAKE_GENERATOR MATCHES "Visual Studio")) + OCV_OPTION(CUDA_ENABLE_DELAYLOAD "Enable delayed loading of CUDA DLLs" OFF VISIBLE_IF MSVC) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR) @@ -565,13 +565,17 @@ if(HAVE_CUDA) endforeach() if(MSVC AND CUDA_ENABLE_DELAYLOAD) + set(DELAYFLAGS "delayimp.lib") file(GLOB CUDA_DLLS "${CUDA_TOOLKIT_ROOT_DIR}/bin/*.dll") foreach(d ${CUDA_DLLS}) cmake_path(GET "d" FILENAME DLL_NAME) if(NOT ${DLL_NAME} MATCHES "cudart") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DELAYLOAD:${DLL_NAME}") + set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:${DLL_NAME}") endif() endforeach() - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4199") + set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:nvcuda.dll /DELAYLOAD:nvml.dll /IGNORE:4199") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${DELAYFLAGS}") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DELAYFLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DELAYFLAGS}") endif() endif() From c280cd7290c11c8e9f4d4ed9cb24ca16483ee29c Mon Sep 17 00:00:00 2001 From: Ibai Gorordo <43162939+ibaiGorordo@users.noreply.github.com> Date: Thu, 9 Feb 2023 02:33:06 +0900 Subject: [PATCH 016/199] Merge pull request #23210 from ibaiGorordo:rect_nfa_bugfix Fix rect_nfa (lsd) * Fix missing log_gamma in nfa() Comparing the nfa function with the function in the binomial_nfa repository (https://github.com/rafael-grompone-von-gioi/binomial_nfa/blob/main/C99/log_binomial_nfa.c#L152), the first log_gamma call is missing. * Fix rect_nfa pixel index * Replace std::rotate * Rename tmp to v_tmp * Replace auto and std::min_element * Change slope equality check to int * Fix left limit check --- modules/imgproc/src/lsd.cpp | 154 ++++++++++++------------------------ 1 file changed, 50 insertions(+), 104 deletions(-) diff --git a/modules/imgproc/src/lsd.cpp b/modules/imgproc/src/lsd.cpp index c4a77ee265..2f13c5a8a0 100644 --- a/modules/imgproc/src/lsd.cpp +++ b/modules/imgproc/src/lsd.cpp @@ -69,12 +69,6 @@ const double DEG_TO_RADS = CV_PI / 180; #define log_gamma(x) ((x)>15.0?log_gamma_windschitl(x):log_gamma_lanczos(x)) -struct edge -{ - cv::Point p; - bool taken; -}; - ///////////////////////////////////////////////////////////////////////////////////////// inline double distSq(const double x1, const double y1, @@ -120,10 +114,20 @@ inline bool double_equal(const double& a, const double& b) return (abs_diff / abs_max) <= (RELATIVE_ERROR_FACTOR * DBL_EPSILON); } -inline bool AsmallerB_XoverY(const edge& a, const edge& b) -{ - if (a.p.x == b.p.x) return a.p.y < b.p.y; - else return a.p.x < b.p.x; +// function to sort points by y and then by x +inline bool AsmallerB_YoverX(const cv::Point2d &a, const cv::Point2d &b) { + if (a.y == b.y) return a.x < b.x; + else return a.y < b.y; +} + +// function to get the slope of the rectangle for a specific row +inline double get_slope(cv::Point2d p1, cv::Point2d p2) { + return ((int) ceil(p2.y) != (int) ceil(p1.y)) ? (p2.x - p1.x) / (p2.y - p1.y) : 0; +} + +// function to get the limit of the rectangle for a specific row +inline double get_limit(cv::Point2d p, int row, double slope) { + return p.x + (row - p.y) * slope; } /** @@ -945,105 +949,53 @@ double LineSegmentDetectorImpl::rect_nfa(const rect& rec) const double dyhw = rec.dy * half_width; double dxhw = rec.dx * half_width; - edge ordered_x[4]; - edge* min_y = &ordered_x[0]; - edge* max_y = &ordered_x[0]; // Will be used for loop range - - ordered_x[0].p.x = int(rec.x1 - dyhw); ordered_x[0].p.y = int(rec.y1 + dxhw); ordered_x[0].taken = false; - ordered_x[1].p.x = int(rec.x2 - dyhw); ordered_x[1].p.y = int(rec.y2 + dxhw); ordered_x[1].taken = false; - ordered_x[2].p.x = int(rec.x2 + dyhw); ordered_x[2].p.y = int(rec.y2 - dxhw); ordered_x[2].taken = false; - ordered_x[3].p.x = int(rec.x1 + dyhw); ordered_x[3].p.y = int(rec.y1 - dxhw); ordered_x[3].taken = false; - - std::sort(ordered_x, ordered_x + 4, AsmallerB_XoverY); - - // Find min y. And mark as taken. find max y. - for(unsigned int i = 1; i < 4; ++i) - { - if(min_y->p.y > ordered_x[i].p.y) {min_y = &ordered_x[i]; } - if(max_y->p.y < ordered_x[i].p.y) {max_y = &ordered_x[i]; } - } - min_y->taken = true; - - // Find leftmost untaken point; - edge* leftmost = 0; - for(unsigned int i = 0; i < 4; ++i) - { - if(!ordered_x[i].taken) - { - if(!leftmost) // if uninitialized - { - leftmost = &ordered_x[i]; - } - else if (leftmost->p.x > ordered_x[i].p.x) - { - leftmost = &ordered_x[i]; - } - } - } - CV_Assert(leftmost != NULL); - leftmost->taken = true; - - // Find rightmost untaken point; - edge* rightmost = 0; - for(unsigned int i = 0; i < 4; ++i) - { - if(!ordered_x[i].taken) - { - if(!rightmost) // if uninitialized - { - rightmost = &ordered_x[i]; - } - else if (rightmost->p.x < ordered_x[i].p.x) - { - rightmost = &ordered_x[i]; - } + cv::Point2d v_tmp[4]; + v_tmp[0] = cv::Point2d(rec.x1 - dyhw, rec.y1 + dxhw); + v_tmp[1] = cv::Point2d(rec.x2 - dyhw, rec.y2 + dxhw); + v_tmp[2] = cv::Point2d(rec.x2 + dyhw, rec.y2 - dxhw); + v_tmp[3] = cv::Point2d(rec.x1 + dyhw, rec.y1 - dxhw); + + // Find the vertex with the smallest y coordinate (or the smallest x if there is a tie). + int offset = 0; + for (int i = 1; i < 4; ++i) { + if (AsmallerB_YoverX(v_tmp[i], v_tmp[offset])){ + offset = i; } } - CV_Assert(rightmost != NULL); - rightmost->taken = true; - // Find last untaken point; - edge* tailp = 0; - for(unsigned int i = 0; i < 4; ++i) - { - if(!ordered_x[i].taken) - { - if(!tailp) // if uninitialized - { - tailp = &ordered_x[i]; - } - else if (tailp->p.x > ordered_x[i].p.x) - { - tailp = &ordered_x[i]; - } - } + // Rotate the vertices so that the first one is the one with the smallest y coordinate (or the smallest x if there is a tie). + // The rest will be then ordered counterclockwise. + cv::Point2d ordered_y[4]; + for (int i = 0; i < 4; ++i) { + ordered_y[i] = v_tmp[(i + offset) % 4]; } - CV_Assert(tailp != NULL); - tailp->taken = true; - - double flstep = (min_y->p.y != leftmost->p.y) ? - (min_y->p.x - leftmost->p.x) / (min_y->p.y - leftmost->p.y) : 0; //first left step - double slstep = (leftmost->p.y != tailp->p.x) ? - (leftmost->p.x - tailp->p.x) / (leftmost->p.y - tailp->p.x) : 0; //second left step - double frstep = (min_y->p.y != rightmost->p.y) ? - (min_y->p.x - rightmost->p.x) / (min_y->p.y - rightmost->p.y) : 0; //first right step - double srstep = (rightmost->p.y != tailp->p.x) ? - (rightmost->p.x - tailp->p.x) / (rightmost->p.y - tailp->p.x) : 0; //second right step + double flstep = get_slope(ordered_y[0], ordered_y[1]); //first left step + double slstep = get_slope(ordered_y[1], ordered_y[2]); //second left step - double lstep = flstep, rstep = frstep; + double frstep = get_slope(ordered_y[0], ordered_y[3]); //first right step + double srstep = get_slope(ordered_y[3], ordered_y[2]); //second right step - double left_x = min_y->p.x, right_x = min_y->p.x; + double top_y = ordered_y[0].y, bottom_y = ordered_y[2].y; // Loop around all points in the region and count those that are aligned. - int min_iter = min_y->p.y; - int max_iter = max_y->p.y; - for(int y = min_iter; y <= max_iter; ++y) + std::vector points; + double left_limit, right_limit; + for(int y = (int) ceil(top_y); y <= (int) ceil(bottom_y); ++y) { if (y < 0 || y >= img_height) continue; - for(int x = int(left_x); x <= int(right_x); ++x) - { + if(y <= int(ceil(ordered_y[1].y))) + left_limit = get_limit(ordered_y[0], y, flstep); + else + left_limit = get_limit(ordered_y[1], y, slstep); + + if(y < int(ceil(ordered_y[3].y))) + right_limit = get_limit(ordered_y[0], y, frstep); + else + right_limit = get_limit(ordered_y[3], y, srstep); + + for(int x = (int) ceil(left_limit); x <= (int)(right_limit); ++x) { if (x < 0 || x >= img_width) continue; ++total_pts; @@ -1052,12 +1004,6 @@ double LineSegmentDetectorImpl::rect_nfa(const rect& rec) const ++alg_pts; } } - - if(y >= leftmost->p.y) { lstep = slstep; } - if(y >= rightmost->p.y) { rstep = srstep; } - - left_x += lstep; - right_x += rstep; } return nfa(total_pts, alg_pts, rec.p); @@ -1071,7 +1017,7 @@ double LineSegmentDetectorImpl::nfa(const int& n, const int& k, const double& p) double p_term = p / (1 - p); - double log1term = (double(n) + 1) - log_gamma(double(k) + 1) + double log1term = log_gamma(double(n) + 1) - log_gamma(double(k) + 1) - log_gamma(double(n-k) + 1) + double(k) * log(p) + double(n-k) * log(1.0 - p); double term = exp(log1term); From c8f5e228fced36a5c8c8718fc5ccce4557709483 Mon Sep 17 00:00:00 2001 From: wanli Date: Fri, 10 Feb 2023 19:33:59 +0800 Subject: [PATCH 017/199] release MUL and ADD operator on CUDA --- modules/dnn/perf/perf_layer.cpp | 2 +- modules/dnn/src/layers/nary_eltwise_layers.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/dnn/perf/perf_layer.cpp b/modules/dnn/perf/perf_layer.cpp index 38e35f1258..1e7552c86c 100644 --- a/modules/dnn/perf/perf_layer.cpp +++ b/modules/dnn/perf/perf_layer.cpp @@ -55,7 +55,7 @@ struct Layer_Slice : public TestBaseWithParam > } }; -static std::set nary_eltwise_cuda_deny_ops = {"add", "equal", "greater", "less", "mean", "mul", "pow", "sub"}; +static std::set nary_eltwise_cuda_deny_ops = {"equal", "greater", "less", "mean", "pow", "sub"}; struct Layer_NaryEltwise : public TestBaseWithParam > { diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 3f43c024c7..91eb7f3c0e 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -112,7 +112,7 @@ public: op == OPERATION::LESS_EQUAL ); if (op == OPERATION::MAX || op == OPERATION::MIN || op == OPERATION::SUM || - op == OPERATION::PROD || op == OPERATION::DIV) + op == OPERATION::PROD || op == OPERATION::DIV || op == OPERATION::ADD) return backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_CUDA; return backendId == DNN_BACKEND_OPENCV; } @@ -688,6 +688,7 @@ public: case OPERATION::SUM: return cuda4dnn::EltwiseOpType::SUM; case OPERATION::PROD: return cuda4dnn::EltwiseOpType::PRODUCT; case OPERATION::DIV: return cuda4dnn::EltwiseOpType::DIV; + case OPERATION::ADD: return cuda4dnn::EltwiseOpType::SUM; default: CV_Error(Error::StsNotImplemented, "Other operators except MAX, MIN, SUM, PRODUCT and DIV are not supported with cuda."); } }(); From c2b7c1f13b998a23dc9e1c8f18324b9114052782 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Sat, 11 Feb 2023 02:03:29 +0800 Subject: [PATCH 018/199] Merge pull request #23219 from fengyuentau:add_gelu Add GELU layer for vision transformers * add gelu and gelu approximation * drop setKernelParams --- .../dnn/include/opencv2/dnn/all_layers.hpp | 12 ++ modules/dnn/src/init.cpp | 2 + modules/dnn/src/layers/elementwise_layers.cpp | 67 +++++++ .../dnn/src/onnx/onnx_graph_simplifier.cpp | 179 ++++++++++++++++++ modules/dnn/src/onnx/onnx_importer.cpp | 3 +- modules/dnn/src/opencl/activations.cl | 24 +++ modules/dnn/test/test_onnx_importer.cpp | 6 + 7 files changed, 292 insertions(+), 1 deletion(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 405f761060..49be0674f4 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -806,6 +806,18 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS GeluLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS GeluApproximationLayer : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer { public: diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 72eca9ed4e..a62312375c 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -145,6 +145,8 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(HardSigmoid, HardSigmoidLayer); CV_DNN_REGISTER_LAYER_CLASS(Selu, SeluLayer); CV_DNN_REGISTER_LAYER_CLASS(ThresholdedRelu,ThresholdedReluLayer); + CV_DNN_REGISTER_LAYER_CLASS(Gelu, GeluLayer); + CV_DNN_REGISTER_LAYER_CLASS(GeluApproximation, GeluApproximationLayer); CV_DNN_REGISTER_LAYER_CLASS(BatchNorm, BatchNormLayer); CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer); diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index a4b71ddddf..9819073bc6 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -837,6 +837,57 @@ private: static const char* const ocl_kernel_name; }; +struct GeluFunctor : public BaseDefaultFunctor +{ + typedef GeluLayer Layer; + + explicit GeluFunctor() {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV; + } + + inline float calculate(float x) const + { + return 0.5f * x * (1.0f + erf(x * M_SQRT1_2)); + } + + int64 getFLOPSPerElement() const { return 100; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "GeluForward"; + +namespace GeluApproximationConstants +{ + static constexpr float sqrt_2_pi = 0.7978845834732056f; + static constexpr float coef_sqrt_2_pi = 0.044714998453855515f * sqrt_2_pi; +} + +struct GeluApproximationFunctor : public BaseDefaultFunctor +{ + typedef GeluApproximationLayer Layer; + + explicit GeluApproximationFunctor() {} + + bool supportBackend(int backendId, int) + { + return backendId == DNN_BACKEND_OPENCV; + } + + inline float calculate(float x) const + { + return 0.5f * x * (1.f + tanh(x * (GeluApproximationConstants::sqrt_2_pi + + GeluApproximationConstants::coef_sqrt_2_pi * x * x))); + } + + int64 getFLOPSPerElement() const { return 100; } +}; + +template<> +const char* const BaseDefaultFunctor::ocl_kernel_name = "GeluApproximationForward"; + struct TanHFunctor : public BaseDefaultFunctor { typedef TanHLayer Layer; @@ -2694,6 +2745,22 @@ Ptr ReLU6Layer::create(const LayerParams& params) return l; } +Ptr GeluLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer(GeluFunctor())); + l->setParamsFrom(params); + + return l; +} + +Ptr GeluApproximationLayer::create(const LayerParams& params) +{ + Ptr l(new ElementWiseLayer(GeluApproximationFunctor())); + l->setParamsFrom(params); + + return l; +} + Ptr TanHLayer::create(const LayerParams& params) { Ptr l(new ElementWiseLayer()); diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index c977a4761d..e9559a4c19 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -132,6 +132,183 @@ private: opencv_onnx::GraphProto& net; }; +/* Fusion for Gelu. + + Graph before fusion: + +---------------------------------------------+ + | | + [Input] -> Div[B=sqrt(2)] -> Erf -> Add[B=1] -> Mul -> Mul[B=0.5] -> [Output] + + Graph after fusion: + [Input] -> Gelu -> [Output] + +*/ +class GeluSubGraph : public Subgraph +{ +public: + GeluSubGraph() + { + int input = addNodeToMatch(""); + int div = addNodeToMatch("Div", input, addNodeToMatch("") /* B=sqrt(2) */ ); + int erf = addNodeToMatch("Erf", div); + int add = addNodeToMatch("Add", erf, addNodeToMatch("") /* B=1 */ ); + int mul = addNodeToMatch("Mul", input, add); + addNodeToMatch("Mul", mul, addNodeToMatch("") /* B=0.5 */) ; + + setFusedNode("Gelu", input); + } + + static bool isWithInitializer(const std::vector& matchedNodesIds) + { + // if node.getType() is Constant, Constant nodes are placed between other nodes + if (matchedNodesIds[2] - matchedNodesIds[1] != 1) + return false; + // if Initializer, there is no Constant node between other nodes + return true; + } + + static float extractConstant(const Ptr& net, int node_id, int input_id, bool withInitializer) + { + if (withInitializer) + { + auto onnx_net = net.dynamicCast(); + int initializer_id = onnx_net->getInputInitializerId(node_id, input_id); + Mat const_mat = onnx_net->getMatFromInitializer(initializer_id); + return *const_mat.ptr(); + } else { + const Ptr node = net->getNode(node_id); + int constant_id = getInputNodeId(net, node, input_id); + Ptr constant_ptr = net->getNode(constant_id); + opencv_onnx::NodeProto* constant_node = constant_ptr.dynamicCast()->node; + opencv_onnx::TensorProto constant_proto = constant_node->attribute(0).t(); + Mat constant_mat = getMatFromTensor(constant_proto); + return *constant_mat.ptr(); + } + } + + virtual bool match(const Ptr& net, int nodeId, + std::vector& matchedNodesIds, + std::vector& targetNodesIds) CV_OVERRIDE + { + if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds)) + { + bool withInitializer = isWithInitializer(matchedNodesIds); + + // Check Div[B=sqrt(2)] + float divisor = extractConstant(net, matchedNodesIds[0], 1, withInitializer); + if (divisor - M_SQRT2 >= 1e-6) + return false; + + // Check Add[B=1] + float add_const = extractConstant(net, matchedNodesIds[2], 1, withInitializer); + if (add_const - 1.f >= 1e-6) + return false; + + // Check Mul[B=0.5] + float mul_const = extractConstant(net, matchedNodesIds[4], 1, withInitializer); + if (mul_const - 0.5f >= 1e-6) + return false; + + return true; + } + return false; + } +}; + +/* Fusion for GeluApproximation. + + Graph before fusion: + +--------+------+----------------+------------------------------------+ + | | | | | + [Input] -> Mul -> Mul -> Mul[ ] -> Add -> Mul[ ] -> Tanh -> Add[A=1] -> Mul -> Mul(A=0.5) -> [Output] + / \ + A=0.044714998453855515 A=sqrt(2/pie) + + Graph after fusion: + [Input] -> GeluApproximation -> [Output] + +*/ +class GeluApproximationSubGraph : public Subgraph +{ +public: + GeluApproximationSubGraph() + { + int input = addNodeToMatch(""); + int mul0 = addNodeToMatch("Mul", input, input); + int mul1 = addNodeToMatch("Mul", input, mul0); + int mul2 = addNodeToMatch("Mul", addNodeToMatch("") /* A=0.044714998453855515 */, mul1); + int add0 = addNodeToMatch("Add", input, mul2); + int mul3 = addNodeToMatch("Mul", addNodeToMatch("") /* A=sqrt(2/pie) */, add0); + int tanh = addNodeToMatch("Tanh", mul3); + int add1 = addNodeToMatch("Add", addNodeToMatch("") /* A=1 */, tanh); + int mul4 = addNodeToMatch("Mul", input, add1); + addNodeToMatch("Mul", addNodeToMatch("") /* A=0.5 */, mul4); + + setFusedNode("GeluApproximation", input); + } + + static bool isWithInitializer(const std::vector& matchedNodesIds) + { + // if node.getType() is Constant, Constant nodes are placed between other nodes + if (matchedNodesIds[2] - matchedNodesIds[1] != 1) + return false; + // if Initializer, there is no Constant node between other nodes + return true; + } + + static float extractConstant(const Ptr& net, int node_id, int input_id, bool withInitializer) + { + if (withInitializer) + { + auto onnx_net = net.dynamicCast(); + int initializer_id = onnx_net->getInputInitializerId(node_id, input_id); + Mat const_mat = onnx_net->getMatFromInitializer(initializer_id); + return *const_mat.ptr(); + } else { + const Ptr node = net->getNode(node_id); + int constant_id = getInputNodeId(net, node, input_id); + Ptr constant_ptr = net->getNode(constant_id); + opencv_onnx::NodeProto* constant_node = constant_ptr.dynamicCast()->node; + opencv_onnx::TensorProto constant_proto = constant_node->attribute(0).t(); + Mat constant_mat = getMatFromTensor(constant_proto); + return *constant_mat.ptr(); + } + } + + virtual bool match(const Ptr& net, int nodeId, + std::vector& matchedNodesIds, + std::vector& targetNodesIds) CV_OVERRIDE + { + if (Subgraph::match(net, nodeId, matchedNodesIds, targetNodesIds)) + { + bool withInitializer = isWithInitializer(matchedNodesIds); + + // Check Mul[A=0.044714998453855515] + float coef = extractConstant(net, matchedNodesIds[2], 0, withInitializer); + if (coef - 0.044714998453855515 >= 1e-6) + return false; + + // Check Mul[A=sqrt(2/pie)] + float sqrt_2_pie = extractConstant(net, matchedNodesIds[4], 0, withInitializer); + if (sqrt_2_pie - 0.7978845834732056 >= 1e-6) + return false; + + // Check Add[A=1] + float add_const = extractConstant(net, matchedNodesIds[6], 0, withInitializer); + if (add_const - 1.f >= 1e-6) + return false; + + // Check Mul[A=0.5] + float mul_const = extractConstant(net, matchedNodesIds[8], 0, withInitializer); + if (mul_const - 0.5f >= 1e-6) + return false; + + return true; + } + return false; + } +}; + class LayerNormSubGraph : public Subgraph { public: @@ -904,6 +1081,8 @@ public: void simplifySubgraphs(opencv_onnx::GraphProto& net) { std::vector > subgraphs; + subgraphs.push_back(makePtr()); + subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); subgraphs.push_back(makePtr()); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 48a75f728e..307a05ef4b 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -4051,7 +4051,8 @@ void ONNXImporter::buildDispatchMap_ONNX_AI(int opset_version) std::vector simpleLayers{"Acos", "Acosh", "Asin", "Asinh", "Atan", "Atanh", "Ceil", "Celu", "Cos", "Cosh", "Dropout", "Erf", "Exp", "Floor", "HardSigmoid", "HardSwish", "Identity", "Log", "Round", "Reciprocal", "Selu", "Sign", "Sigmoid", "Sin", "Sinh", "Softmax", - "Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu"}; + "Softplus", "Softsign", "Shrink", "Sqrt", "Tan", "ThresholdedRelu", "Gelu", + "GeluApproximation"}; for (const auto& name : simpleLayers) { dispatch[name] = &ONNXImporter::parseSimpleLayers; diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl index 0624f48e19..317d2c1e62 100644 --- a/modules/dnn/src/opencl/activations.cl +++ b/modules/dnn/src/opencl/activations.cl @@ -307,6 +307,30 @@ __kernel void ThresholdedReluForward(const int n, __global T* in, __global T* ou out[index] = (in[index] > alpha ? in[index] : 0.f); } +__kernel void GeluForward(const int n, __global T* in, __global T* out) +{ + int index = get_global_id(0); + if (index < n) + { + T x = in[index]; + out[index] = (T)0.5f * x * ( (T)1.f + erf(x * M_SQRT1_2) ); + } +} + +__kernel void GeluApproximationForward(const int n, __global T* in, __global T* out) +{ + // see GeluApproximationConstants from modules/dnn/src/layers/elementwise_layers.cpp + const T sqrt_2_pi = 0.7978845834732056f; + const T coef_sqrt_2_pi = 0.044714998453855515f * sqrt_2_pi; + + int index = get_global_id(0); + if(index < n) + { + T x = in[index]; + out[index] = (T)0.5f * x * ( (T)1.f + tanh(x * (sqrt_2_pi + coef_sqrt_2_pi * x * x)) ); + } +} + __kernel void ShrinkForward(const int n, __global T* in, __global T* out, const KERNEL_ARG_DTYPE bias, const KERNEL_ARG_DTYPE lambd) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 12bbb31372..6698174521 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -2456,6 +2456,12 @@ TEST_P(Test_ONNX_layers, LayerNormExpanded) testONNXModels("layer_norm_expanded_with_initializers"); } +TEST_P(Test_ONNX_layers, Gelu) +{ + testONNXModels("gelu"); + testONNXModels("gelu_approximation"); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets()); }} // namespace From 56102737d775f147a0a99578cbad3e218a3633d3 Mon Sep 17 00:00:00 2001 From: Yannis Guyon Date: Fri, 10 Feb 2023 23:46:21 +0100 Subject: [PATCH 019/199] Merge pull request #23131 from y-guyon:align_ptr_intrin_sse Fix misaligned-pointer-use in intrin_sse.hpp * Fix misaligned-pointer-use in intrin_sse.hpp * Use _mm_loadu_si32() instead of memcpy() * Use CV_DECL_ALIGNED instead of _mm_loadu_si32() --- modules/core/include/opencv2/core/hal/intrin_sse.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 443ee16097..9d17f71666 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -1921,11 +1921,12 @@ OPENCV_HAL_IMPL_SSE_EXPAND(v_int16x8, v_int32x4, short, _v128_cvtepi16_epi OPENCV_HAL_IMPL_SSE_EXPAND(v_uint32x4, v_uint64x2, unsigned, _v128_cvtepu32_epi64) OPENCV_HAL_IMPL_SSE_EXPAND(v_int32x4, v_int64x2, int, _v128_cvtepi32_epi64) -#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin) \ - inline _Tpvec v_load_expand_q(const _Tp* ptr) \ - { \ - __m128i a = _mm_cvtsi32_si128(*(const int*)ptr); \ - return _Tpvec(intrin(a)); \ +#define OPENCV_HAL_IMPL_SSE_EXPAND_Q(_Tpvec, _Tp, intrin) \ + inline _Tpvec v_load_expand_q(const _Tp* ptr) \ + { \ + typedef int CV_DECL_ALIGNED(1) unaligned_int; \ + __m128i a = _mm_cvtsi32_si128(*(const unaligned_int*)ptr); \ + return _Tpvec(intrin(a)); \ } OPENCV_HAL_IMPL_SSE_EXPAND_Q(v_uint32x4, uchar, _v128_cvtepu8_epi32) From 325fe7e663daa7dfe29a5213beb7b1b0598923db Mon Sep 17 00:00:00 2001 From: hzcyf Date: Sat, 11 Feb 2023 14:11:40 +0800 Subject: [PATCH 020/199] add support for Orbbec Femto Mega RGB-D camera --- .../obsensor_stream_channel_interface.hpp | 1 + .../obsensor_stream_channel_msmf.cpp | 2 +- .../obsensor_uvc_stream_channel.cpp | 18 +++++++++++++ modules/videoio/src/cap_obsensor_capture.cpp | 26 ++++++++++++++----- 4 files changed, 40 insertions(+), 7 deletions(-) diff --git a/modules/videoio/src/cap_obsensor/obsensor_stream_channel_interface.hpp b/modules/videoio/src/cap_obsensor/obsensor_stream_channel_interface.hpp index ac0cf1259e..ff78c5a696 100644 --- a/modules/videoio/src/cap_obsensor/obsensor_stream_channel_interface.hpp +++ b/modules/videoio/src/cap_obsensor/obsensor_stream_channel_interface.hpp @@ -36,6 +36,7 @@ namespace obsensor { #define OBSENSOR_CAM_VID 0x2bc5 // usb vid #define OBSENSOR_ASTRA2_PID 0x0660 // pid of Orbbec Astra 2 Camera #define OBSENSOR_GEMINI2_PID 0x0670 // pid of Orbbec Gemini 2 Camera +#define OBSENSOR_FEMTO_MEGA_PID 0x0669 // pid of Orbbec Femto Mega Camera enum StreamType { diff --git a/modules/videoio/src/cap_obsensor/obsensor_stream_channel_msmf.cpp b/modules/videoio/src/cap_obsensor/obsensor_stream_channel_msmf.cpp index 7d984b63de..5de686430f 100644 --- a/modules/videoio/src/cap_obsensor/obsensor_stream_channel_msmf.cpp +++ b/modules/videoio/src/cap_obsensor/obsensor_stream_channel_msmf.cpp @@ -499,7 +499,7 @@ STDMETHODIMP MSMFStreamChannel::OnEvent(DWORD /*sidx*/, IMFMediaEvent* /*event*/ STDMETHODIMP MSMFStreamChannel::OnFlush(DWORD) { - if (streamState_ == STREAM_STARTING) + if (streamState_ != STREAM_STOPED) { std::unique_lock lock(streamStateMutex_); streamState_ = STREAM_STOPED; diff --git a/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp b/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp index 1bcb6ddf76..55c2d03fd7 100644 --- a/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp +++ b/modules/videoio/src/cap_obsensor/obsensor_uvc_stream_channel.cpp @@ -338,6 +338,24 @@ bool IUvcStreamChannel::getProperty(int propId, uint8_t* recvData, uint32_t* rec *recvDataSize = sizeof(CameraParam); memcpy(recvData, ¶m, *recvDataSize); } + else if(OBSENSOR_FEMTO_MEGA_PID == devInfo_.pid){ + // return default param + CameraParam param; + param.p0[0] = 748.370f; + param.p0[1] = 748.296f; + param.p0[2] = 634.670f; + param.p0[3] = 341.196f; + param.p1[0] = 374.185f; + param.p1[1] = 374.148f; + param.p1[2] = 317.335f; + param.p1[3] = 170.598f; + param.p6[0] = 1280; + param.p6[1] = 720; + param.p7[0] = 640; + param.p7[1] = 360; + *recvDataSize = sizeof(CameraParam); + memcpy(recvData, ¶m, *recvDataSize); + } else{ rst &= setXu(2, OB_EXT_CMD5, sizeof(OB_EXT_CMD5)); rst &= getXu(2, &rcvData, &rcvLen); diff --git a/modules/videoio/src/cap_obsensor_capture.cpp b/modules/videoio/src/cap_obsensor_capture.cpp index ccbfd61a5c..8138f09333 100644 --- a/modules/videoio/src/cap_obsensor_capture.cpp +++ b/modules/videoio/src/cap_obsensor_capture.cpp @@ -34,8 +34,10 @@ VideoCapture_obsensor::VideoCapture_obsensor(int index) : isOpened_(false) { static const obsensor::StreamProfile colorProfile = { 640, 480, 30, obsensor::FRAME_FORMAT_MJPG }; static const obsensor::StreamProfile depthProfile = {640, 480, 30, obsensor::FRAME_FORMAT_Y16}; - static const obsensor::StreamProfile gemini2depthProfile = {1280, 800, 30, obsensor::FRAME_FORMAT_Y14}; - static const obsensor::StreamProfile astra2depthProfile = {640, 480, 30, obsensor::FRAME_FORMAT_Y14}; + static const obsensor::StreamProfile gemini2DepthProfile = {1280, 800, 30, obsensor::FRAME_FORMAT_Y14}; + static const obsensor::StreamProfile astra2DepthProfile = {640, 480, 30, obsensor::FRAME_FORMAT_Y14}; + static const obsensor::StreamProfile megaColorProfile = {1280, 720, 30, obsensor::FRAME_FORMAT_MJPG}; + static const obsensor::StreamProfile megaDepthProfile = {640, 576, 30, obsensor::FRAME_FORMAT_Y16}; streamChannelGroup_ = obsensor::getStreamChannelGroup(index); if (!streamChannelGroup_.empty()) @@ -46,11 +48,17 @@ VideoCapture_obsensor::VideoCapture_obsensor(int index) : isOpened_(false) switch (streamType) { case obsensor::OBSENSOR_STREAM_COLOR: - channel->start(colorProfile, [&](obsensor::Frame* frame) { + { + auto profile = colorProfile; + if(OBSENSOR_FEMTO_MEGA_PID == channel->getPid()){ + profile = megaColorProfile; + } + channel->start(profile, [&](obsensor::Frame* frame) { std::unique_lock lk(frameMutex_); colorFrame_ = Mat(1, frame->dataSize, CV_8UC1, frame->data).clone(); frameCv_.notify_all(); }); + } break; case obsensor::OBSENSOR_STREAM_DEPTH: { @@ -59,11 +67,13 @@ VideoCapture_obsensor::VideoCapture_obsensor(int index) : isOpened_(false) obsensor::StreamProfile profile = depthProfile; if(OBSENSOR_GEMINI2_PID == channel->getPid()){ - profile = gemini2depthProfile; + profile = gemini2DepthProfile; } else if(OBSENSOR_ASTRA2_PID == channel->getPid()){ - - profile = astra2depthProfile; + profile = astra2DepthProfile; + } + else if(OBSENSOR_FEMTO_MEGA_PID == channel->getPid()){ + profile = megaDepthProfile; } channel->start(profile, [&](obsensor::Frame* frame) { @@ -127,6 +137,10 @@ bool VideoCapture_obsensor::retrieveFrame(int outputType, OutputArray frame) grabbedDepthFrame_ = grabbedDepthFrame_*0.8; grabbedDepthFrame_.copyTo(frame); } + else if(OBSENSOR_FEMTO_MEGA_PID == streamChannelGroup_.front()->getPid()){ + Rect rect(0, 0, 640, 360); + grabbedDepthFrame_(rect).copyTo(frame); + } else{ grabbedDepthFrame_.copyTo(frame); } From 76350cd30f6c4697b5be7968ba0b8ab9cc28b64c Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 13 Feb 2023 17:00:20 +0300 Subject: [PATCH 021/199] Merge pull request #23161 from dkurt:dnn_tflite TFLite models importer * initial commit * Refactor TFLiteImporter * Better FlatBuffers detection * Add permute before 4D->3D reshape * Track layers layout * TFLite Convolution2DTransposeBias layer * Skip TFLite tests without FlatBuffers * Fix check of FlatBuffers in tests. Add readNetFromTFLite from buffer * TFLite Max Unpooling test * Add skip for TFLite unpooling test * Revert DW convolution workaround * Fix ObjC bindings * Better errors handling * Regenerate TFLite schema using flatc * dnn(tflite): more checks, better logging * Checks for unimplemented fusion. Fix tests --- CMakeLists.txt | 4 + cmake/OpenCVFindFlatBuffers.cmake | 15 + modules/dnn/CMakeLists.txt | 17 + modules/dnn/include/opencv2/dnn/dnn.hpp | 20 + modules/dnn/misc/objc/gen_dict.json | 4 +- modules/dnn/src/dnn_read.cpp | 6 + modules/dnn/src/tflite/builtin_op_data.h | 41 + modules/dnn/src/tflite/schema.fbs | 1341 ++++++++++++++++++++ modules/dnn/src/tflite/tflite_importer.cpp | 644 ++++++++++ modules/dnn/test/test_tflite_importer.cpp | 123 ++ platforms/js/opencv_js.config.py | 2 +- 11 files changed, 2215 insertions(+), 2 deletions(-) create mode 100644 cmake/OpenCVFindFlatBuffers.cmake create mode 100644 modules/dnn/src/tflite/builtin_op_data.h create mode 100644 modules/dnn/src/tflite/schema.fbs create mode 100644 modules/dnn/src/tflite/tflite_importer.cpp create mode 100644 modules/dnn/test/test_tflite_importer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cbf43a1605..5543cba93a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,6 +471,9 @@ OCV_OPTION(WITH_OBSENSOR "Include obsensor support (Orbbec RGB-D modules: Astra+ OCV_OPTION(WITH_CANN "Include CANN support" OFF VISIBLE_IF TRUE VERIFY HAVE_CANN) +OCV_OPTION(WITH_FLATBUFFERS "Include FlatBuffers support" OFF + VISIBLE_IF TRUE + VERIFY HAVE_FLATBUFFERS) # OpenCV build components # =================================================== @@ -750,6 +753,7 @@ include(cmake/OpenCVFindLibsVideo.cmake) include(cmake/OpenCVFindLibsPerf.cmake) include(cmake/OpenCVFindLAPACK.cmake) include(cmake/OpenCVFindProtobuf.cmake) +include(cmake/OpenCVFindFlatBuffers.cmake) if(WITH_TENGINE) include(cmake/OpenCVFindTengine.cmake) endif() diff --git a/cmake/OpenCVFindFlatBuffers.cmake b/cmake/OpenCVFindFlatBuffers.cmake new file mode 100644 index 0000000000..2b204314eb --- /dev/null +++ b/cmake/OpenCVFindFlatBuffers.cmake @@ -0,0 +1,15 @@ +set(HAVE_FLATBUFFERS FALSE) + +if(NOT WITH_FLATBUFFERS) + return() +endif() + +list(APPEND CUSTOM_STATUS flatbuffers) + +find_package(flatbuffers QUIET) +if(flatbuffers_FOUND) + set(HAVE_FLATBUFFERS 1) + list(APPEND CUSTOM_STATUS_flatbuffers " FlatBuffers:" "${flatbuffers_VERSION}") +else() + list(APPEND CUSTOM_STATUS_flatbuffers " FlatBuffers:" "NO") +endif() diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 1ec21c085d..e5aca128be 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -133,6 +133,17 @@ if(NOT BUILD_PROTOBUF) list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS}) endif() +if(HAVE_FLATBUFFERS) + list(APPEND libs flatbuffers::flatbuffers) + list(APPEND fw_srcs "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h") + + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h" + COMMAND flatbuffers::flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_LIST_DIR}/src/tflite/schema.fbs") + + ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_FLATBUFFERS=1") +endif() + set(sources_options "") list(APPEND libs ${LAPACK_LIBRARIES}) @@ -280,3 +291,9 @@ if(TARGET ocv.3rdparty.cann AND OPENCV_TEST_DNN_CANN) ocv_target_link_libraries(opencv_test_dnn ocv.3rdparty.cann) endif() endif() + +if(HAVE_FLATBUFFERS) + if(TARGET opencv_test_dnn) + ocv_target_compile_definitions(opencv_test_dnn PRIVATE "HAVE_FLATBUFFERS=1") + endif() +endif() diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index ffc9473c6e..11ad69b8d9 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -953,6 +953,26 @@ CV__DNN_INLINE_NS_BEGIN CV_EXPORTS Net readNetFromTensorflow(const char *bufferModel, size_t lenModel, const char *bufferConfig = NULL, size_t lenConfig = 0); + /** @brief Reads a network model stored in TFLite framework's format. + * @param model path to the .tflite file with binary flatbuffers description of the network architecture + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromTFLite(const String &model); + + /** @brief Reads a network model stored in TFLite framework's format. + * @param bufferModel buffer containing the content of the tflite file + * @returns Net object. + */ + CV_EXPORTS_W Net readNetFromTFLite(const std::vector& bufferModel); + + /** @brief Reads a network model stored in TFLite framework's format. + * @details This is an overloaded member function, provided for convenience. + * It differs from the above function only in what argument(s) it accepts. + * @param bufferModel buffer containing the content of the tflite file + * @param lenModel length of bufferModel + */ + CV_EXPORTS Net readNetFromTFLite(const char *bufferModel, size_t lenModel); + /** * @brief Reads a network model stored in Torch7 framework's format. * @param model path to the file, dumped from Torch by using torch.save() function. diff --git a/modules/dnn/misc/objc/gen_dict.json b/modules/dnn/misc/objc/gen_dict.json index 6072bdfc01..8aab0a5500 100644 --- a/modules/dnn/misc/objc/gen_dict.json +++ b/modules/dnn/misc/objc/gen_dict.json @@ -8,7 +8,9 @@ "(Net*)readNetFromONNX:(NSString*)onnxFile" : { "readNetFromONNX" : {"name" : "readNetFromONNXFile"} }, "(Net*)readNetFromONNX:(ByteVector*)buffer" : { "readNetFromONNX" : {"name" : "readNetFromONNXBuffer"} }, "(Net*)readNetFromTensorflow:(NSString*)model config:(NSString*)config" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowFile"} }, - "(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} } + "(Net*)readNetFromTensorflow:(ByteVector*)bufferModel bufferConfig:(ByteVector*)bufferConfig" : { "readNetFromTensorflow" : {"name" : "readNetFromTensorflowBuffer"} }, + "(Net*)readNetFromTFLite:(NSString*)model" : { "readNetFromTFLite" : {"name" : "readNetFromTFLiteFile"} }, + "(Net*)readNetFromTFLite:(ByteVector*)buffer" : { "readNetFromTFLite" : {"name" : "readNetFromTFLiteBuffer"} } }, "Net": { "(void)forward:(NSMutableArray*)outputBlobs outputName:(NSString*)outputName" : { "forward" : {"name" : "forwardOutputBlobs"} }, diff --git a/modules/dnn/src/dnn_read.cpp b/modules/dnn/src/dnn_read.cpp index 931170722b..9c06ced3c4 100644 --- a/modules/dnn/src/dnn_read.cpp +++ b/modules/dnn/src/dnn_read.cpp @@ -29,6 +29,10 @@ Net readNet(const String& _model, const String& _config, const String& _framewor std::swap(model, config); return readNetFromTensorflow(model, config); } + if (framework == "tflite" || modelExt == "tflite") + { + return readNetFromTFLite(model); + } if (framework == "torch" || modelExt == "t7" || modelExt == "net" || configExt == "t7" || configExt == "net") { return readNetFromTorch(model.empty() ? config : model); @@ -66,6 +70,8 @@ Net readNet(const String& _framework, const std::vector& bufferModel, CV_Error(Error::StsNotImplemented, "Reading Torch models from buffers"); else if (framework == "dldt") return readNetFromModelOptimizer(bufferConfig, bufferModel); + else if (framework == "tflite") + return readNetFromTFLite(bufferModel); CV_Error(Error::StsError, "Cannot determine an origin framework with a name " + framework); } diff --git a/modules/dnn/src/tflite/builtin_op_data.h b/modules/dnn/src/tflite/builtin_op_data.h new file mode 100644 index 0000000000..114d4f0cf8 --- /dev/null +++ b/modules/dnn/src/tflite/builtin_op_data.h @@ -0,0 +1,41 @@ +// source: https://github.com/tensorflow/tensorflow/blob/b2f5959ff823a8ed5bf4883e785f8f96d4253a8b/tensorflow/lite/core/c/builtin_op_data.h +typedef enum { + kTfLitePaddingUnknown = 0, + kTfLitePaddingSame, + kTfLitePaddingValid, +} TfLitePadding; + +typedef enum { + kTfLiteActNone = 0, + kTfLiteActRelu, + kTfLiteActReluN1To1, // min(max(-1, x), 1) + kTfLiteActRelu6, // min(max(0, x), 6) + kTfLiteActTanh, + kTfLiteActSignBit, + kTfLiteActSigmoid, +} TfLiteFusedActivation; + +typedef struct { + int width; + int height; + int width_offset; + int height_offset; +} TfLitePaddingValues; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; + int filter_width; + int filter_height; + TfLiteFusedActivation activation; + struct { + TfLitePaddingValues padding; + } computed; +} TfLitePoolParams; + +typedef struct { + TfLitePadding padding; + int stride_width; + int stride_height; +} TfLiteTransposeConvParams; diff --git a/modules/dnn/src/tflite/schema.fbs b/modules/dnn/src/tflite/schema.fbs new file mode 100644 index 0000000000..7eb63f60ef --- /dev/null +++ b/modules/dnn/src/tflite/schema.fbs @@ -0,0 +1,1341 @@ +// source: https://github.com/tensorflow/tensorflow/blob/b0164f014fd4f1b5af2c7b578aa7687198c5d92e/tensorflow/lite/schema/schema.fbs +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// Version 0: Initial version. +// Version 1: Add subgraphs to schema. +// Version 2: Rename operators to conform to NN API. +// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers. +// Version 3a: Add new builtin op code field. Has backward compatibility with +// version 3. +// Version 3b: Rename fields in SignatureDef. Has backward compatibility with +// version 3 and 3a. + +namespace opencv_tflite; + +// This corresponds to the version. +file_identifier "TFL3"; +// File extension of any written files. +file_extension "tflite"; + +// IMPORTANT: All new members of tables, enums and unions must be added at the +// end to ensure backwards compatibility. + +// The type of data stored in a tensor. +enum TensorType : byte { + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, + BOOL = 6, + INT16 = 7, + COMPLEX64 = 8, + INT8 = 9, + FLOAT64 = 10, + COMPLEX128 = 11, + UINT64 = 12, + // Experimental: Resource and variant types are experimental, that are subject + // to change. Do not implement custom kernels using resource & variant types + // now. + RESOURCE = 13, + VARIANT = 14, + UINT32 = 15, + UINT16 = 16, + INT4 = 17, +} + +// Custom quantization parameters for experimenting with new quantization +// techniques. +table CustomQuantization { + custom:[ubyte] (force_align: 16); +} + +// Represents a specific quantization technique's parameters. +union QuantizationDetails { + CustomQuantization, +} + +// Parameters for converting a quantized tensor back to float. +table QuantizationParameters { + // These four parameters are the asymmetric linear quantization parameters. + // Given a quantized value q, the corresponding float value f should be: + // f = scale * (q - zero_point) + // For other quantization types, the QuantizationDetails below is used. + min:[float]; // For importing back into tensorflow. + max:[float]; // For importing back into tensorflow. + scale:[float]; // For dequantizing the tensor's values. + zero_point:[long]; + + // If this is not none, the other quantization parameters (i.e. min, max, + // scale, zero_point fields above) are ignored and the value of the + // QuantizationDetails union should be used. + details:QuantizationDetails; + + // Specifies the dimension of the Tensor's shape that the scales and + // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1] + // with quantization params: + // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1 + // will be quantized across the second dimension of t. + // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1 + // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2 + // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3 + quantized_dimension:int; +} + +// Sparse tensors. +// We use a modification of the TACO format. +// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf +// +// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1), +// potentially with a k-dimensional block (0 <= k <= n) with dims +// (dn, ..., dn+k-1), the format needs to specify: +// 1. In what order to traverse these dimensions. For example, to store a 2-D +// matrix in row major order, the traversal order would be (d0, d1), +// whereas to store it in column major order, the traversal order would be +// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order +// could be (d0, d1, d2, d3). +// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original +// tensor dimension in (d0, ..., dn-1). +// 3. In the traversal order defined above, the format (dense vs. sparse) and +// index metadata for each dimension. For a dense dimension, this is just +// the size of that dimension. For a sparse dimension, it's the same as +// the compressed index defined in the Compressed Sparse Row (CSR) format. +// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html) + +// The storage type for a dimension. Currently we support: +// 1. DENSE: each coordinate in this dimension is stored implicitly. +// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The +// compression technique is the same what CSR uses. +// More types like a sparse dimension with a different compression technique +// could be added to the list in the future. +enum DimensionType : byte { + DENSE = 0, + SPARSE_CSR = 1, +} + +table Int32Vector { + values:[int]; +} + +table Uint16Vector { + values:[ushort] (force_align: 4); +} + +table Uint8Vector { + values:[ubyte] (force_align: 4); +} + +// Variable-typed buffer to store the index metadata for a sparse dimension. +// The widest type is Int32 instead of UInt32 because tensor's shape is a int32 +// vector. We don't want the per-dimensional index to overflow that range. +union SparseIndexVector { + Int32Vector, + Uint16Vector, + Uint8Vector +} + +table DimensionMetadata { + // Whether a dimension is dense or sparse. + format:DimensionType; + // Index metadata used for a dimension. + // - If format is DimensionType.DENSE then we use the dense_size field to + // store the size of that dimension. Each index in that dimension is + // stored implicitly. + // - If format is DimensionType.SPARSE_CSR then we use array_segments and + // array_indices to encode that dimension. array_segments represents how + // to segment the indices array, each segment corresponds to one element + // in the previous dimension. array_indices represents the index of the + // non-zero elements within this dimension (as those in the CSR matrix + // format, where the first array is row pointers and the second array is + // column indices). + dense_size:int; + array_segments:SparseIndexVector; + array_indices:SparseIndexVector; +} + +// Parameters to encode a sparse TfLite tensor. +table SparsityParameters { + // The traversal order of the dimensions defined in the `shape` field of the + // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1, + // ..., dn-1), + // - if not block sparse, the traversal_order is just a permutation of (d0, + // ..., dn-1). For example, a 2-D matrix stored in row-major order would + // have traversal_order = (d0, d1). + // - if block sparse with a k-dimensional block (0 <= k <= n), the + // traversal_order has n + k elements. The first n elements are still a + // permutation of (d0, ..., dn-1). The lask k elements are a permutation + // of (dn, ..., dn+k-1), defining how to traverse a block internally. For + // example, a 2-D matrix with 2-D blocks, both stored in row-major order + // would have traversal_order = (d0, d1, d2, d3). + traversal_order:[int]; + // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n), + // stores how a block dimension in (dn, ..., dn+k-1) maps to the original + // tensor dimension in (d0, ..., dn). + // It's stored in the order of (dn, ..., dn+k-1). + // If not block-sparse, this field is NULL. + block_map:[int]; + // In the traversal order defined above, the metadata needed for + // each dimension to locate the non-zero values in the original dense tensor. + // The size of the dim_metadata array = the size of the traversal_order array + // = n + k. + dim_metadata:[DimensionMetadata]; +} + +// The nested tensor type for VARIANT type. +table VariantSubType { + // The tensor shape. + shape:[int]; + type:TensorType; + // If false, the rank or the number of tensor dimensions is unknown. + // If false, "shape" must be []. + has_rank: bool = false; +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, height, width, number of channels] (That's + // Tensorflow's NHWC). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existent empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. + + is_variable:bool = false; + + // Parameters to encode a sparse tensor. See the example in + // tensorflow/lite/testdata/sparse_tensor.json. + sparsity:SparsityParameters; // Optional. + + // Encodes `shape` with unknown dimensions. Unknown dimensions are + // represented with -1. + shape_signature:[int]; // Optional. + + // If false, the rank or the number of tensor dimensions is unknown. + // If false, "shape" must be []. + has_rank: bool = false; + + // The nested Tensor types for VARIANT type. This is always empty for + // non-VARIANT types. This is optional because the nested type can be omitted. + // Currently only 1 subtype is supported. The field is defined as an array for + // flexibility of supporting multiple subtypes in the future. + variant_tensors:[VariantSubType]; +} + +// A list of builtin operators. Builtin operators are slightly faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +// LINT.IfChange +enum BuiltinOperator : int32 { + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + DEPTH_TO_SPACE = 5, + DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + MUL = 18, + RELU = 19, + // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed + // since different model developers use RELU1 in different ways. Never + // create another op called RELU1. + RELU_N1_TO_1 = 20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + EMBEDDING_LOOKUP_SPARSE = 33, + PAD = 34, + UNIDIRECTIONAL_SEQUENCE_RNN = 35, + GATHER = 36, + BATCH_TO_SPACE_ND = 37, + SPACE_TO_BATCH_ND = 38, + TRANSPOSE = 39, + MEAN = 40, + SUB = 41, + DIV = 42, + SQUEEZE = 43, + UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + STRIDED_SLICE = 45, + BIDIRECTIONAL_SEQUENCE_RNN = 46, + EXP = 47, + TOPK_V2 = 48, + SPLIT = 49, + LOG_SOFTMAX = 50, + // DELEGATE is a special op type for the operations which are delegated to + // other backends. + // WARNING: Experimental interface, subject to change + DELEGATE = 51, + BIDIRECTIONAL_SEQUENCE_LSTM = 52, + CAST = 53, + PRELU = 54, + MAXIMUM = 55, + ARG_MAX = 56, + MINIMUM = 57, + LESS = 58, + NEG = 59, + PADV2 = 60, + GREATER = 61, + GREATER_EQUAL = 62, + LESS_EQUAL = 63, + SELECT = 64, + SLICE = 65, + SIN = 66, + TRANSPOSE_CONV = 67, + SPARSE_TO_DENSE = 68, + TILE = 69, + EXPAND_DIMS = 70, + EQUAL = 71, + NOT_EQUAL = 72, + LOG = 73, + SUM = 74, + SQRT = 75, + RSQRT = 76, + SHAPE = 77, + POW = 78, + ARG_MIN = 79, + FAKE_QUANT = 80, + REDUCE_PROD = 81, + REDUCE_MAX = 82, + PACK = 83, + LOGICAL_OR = 84, + ONE_HOT = 85, + LOGICAL_AND = 86, + LOGICAL_NOT = 87, + UNPACK = 88, + REDUCE_MIN = 89, + FLOOR_DIV = 90, + REDUCE_ANY = 91, + SQUARE = 92, + ZEROS_LIKE = 93, + FILL = 94, + FLOOR_MOD = 95, + RANGE = 96, + RESIZE_NEAREST_NEIGHBOR = 97, + LEAKY_RELU = 98, + SQUARED_DIFFERENCE = 99, + MIRROR_PAD = 100, + ABS = 101, + SPLIT_V = 102, + UNIQUE = 103, + CEIL = 104, + REVERSE_V2 = 105, + ADD_N = 106, + GATHER_ND = 107, + COS = 108, + WHERE = 109, + RANK = 110, + ELU = 111, + REVERSE_SEQUENCE = 112, + MATRIX_DIAG = 113, + QUANTIZE = 114, + MATRIX_SET_DIAG = 115, + ROUND = 116, + HARD_SWISH = 117, + IF = 118, + WHILE = 119, + NON_MAX_SUPPRESSION_V4 = 120, + NON_MAX_SUPPRESSION_V5 = 121, + SCATTER_ND = 122, + SELECT_V2 = 123, + DENSIFY = 124, + SEGMENT_SUM = 125, + BATCH_MATMUL = 126, + PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + CUMSUM = 128, + CALL_ONCE = 129, + BROADCAST_TO = 130, + RFFT2D = 131, + CONV_3D = 132, + IMAG=133, + REAL=134, + COMPLEX_ABS=135, + HASHTABLE = 136, + HASHTABLE_FIND = 137, + HASHTABLE_IMPORT = 138, + HASHTABLE_SIZE = 139, + REDUCE_ALL = 140, + CONV_3D_TRANSPOSE = 141, + VAR_HANDLE = 142, + READ_VARIABLE = 143, + ASSIGN_VARIABLE = 144, + BROADCAST_ARGS = 145, + RANDOM_STANDARD_NORMAL = 146, + BUCKETIZE = 147, + RANDOM_UNIFORM = 148, + MULTINOMIAL = 149, + GELU = 150, + DYNAMIC_UPDATE_SLICE = 151, + RELU_0_TO_1 = 152, + UNSORTED_SEGMENT_PROD = 153, + UNSORTED_SEGMENT_MAX = 154, + UNSORTED_SEGMENT_SUM = 155, + ATAN2 = 156, + UNSORTED_SEGMENT_MIN = 157, + SIGN = 158 +} +// LINT.ThenChange(nnapi_linter/linter.proto) + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, + EmbeddingLookupSparseOptions, + MulOptions, + PadOptions, + GatherOptions, + BatchToSpaceNDOptions, + SpaceToBatchNDOptions, + TransposeOptions, + ReducerOptions, + SubOptions, + DivOptions, + SqueezeOptions, + SequenceRNNOptions, + StridedSliceOptions, + ExpOptions, + TopKV2Options, + SplitOptions, + LogSoftmaxOptions, + CastOptions, + DequantizeOptions, + MaximumMinimumOptions, + ArgMaxOptions, + LessOptions, + NegOptions, + PadV2Options, + GreaterOptions, + GreaterEqualOptions, + LessEqualOptions, + SelectOptions, + SliceOptions, + TransposeConvOptions, + SparseToDenseOptions, + TileOptions, + ExpandDimsOptions, + EqualOptions, + NotEqualOptions, + ShapeOptions, + PowOptions, + ArgMinOptions, + FakeQuantOptions, + PackOptions, + LogicalOrOptions, + OneHotOptions, + LogicalAndOptions, + LogicalNotOptions, + UnpackOptions, + FloorDivOptions, + SquareOptions, + ZerosLikeOptions, + FillOptions, + BidirectionalSequenceLSTMOptions, + BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, + FloorModOptions, + RangeOptions, + ResizeNearestNeighborOptions, + LeakyReluOptions, + SquaredDifferenceOptions, + MirrorPadOptions, + AbsOptions, + SplitVOptions, + UniqueOptions, + ReverseV2Options, + AddNOptions, + GatherNdOptions, + CosOptions, + WhereOptions, + RankOptions, + ReverseSequenceOptions, + MatrixDiagOptions, + QuantizeOptions, + MatrixSetDiagOptions, + HardSwishOptions, + IfOptions, + WhileOptions, + DepthToSpaceOptions, + NonMaxSuppressionV4Options, + NonMaxSuppressionV5Options, + ScatterNdOptions, + SelectV2Options, + DensifyOptions, + SegmentSumOptions, + BatchMatMulOptions, + CumsumOptions, + CallOnceOptions, + BroadcastToOptions, + Rfft2dOptions, + Conv3DOptions, + HashtableOptions, + HashtableFindOptions, + HashtableImportOptions, + HashtableSizeOptions, + VarHandleOptions, + ReadVariableOptions, + AssignVariableOptions, + RandomOptions, + BucketizeOptions, + GeluOptions, + DynamicUpdateSliceOptions, + UnsortedSegmentProdOptions, + UnsortedSegmentMaxOptions, + UnsortedSegmentMinOptions, + UnsortedSegmentSumOptions, + ATan2Options, + SignOptions +} + +// LINT.IfChange +enum Padding : byte { SAME, VALID } +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// LINT.IfChange +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU_N1_TO_1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +// Options for both Conv3D and Conv3DTranspose. +table Conv3DOptions { + padding:Padding; + stride_d:int; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_d_factor:int = 1; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + // Parameters for DepthwiseConv version 1 or above. + padding:Padding; + stride_w:int; + stride_h:int; + // `depth_multiplier` is redundant. It's used by CPU kernels in + // TensorFlow 2.0 or below, but ignored in versions above. + // See comments in lite/c/builtin_op_data.h for more details. + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; + // Parameters for DepthwiseConv version 2 or above. + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; + // For weights-only quantization, use asymmetric quantization for non + // constant inputs at evaluation time. + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow dynamic_rnn with RNNCell. +table SequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell. +table BidirectionalSequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + merge_outputs: bool; + asymmetric_quantize_inputs:bool; +} + +// LINT.IfChange +enum FullyConnectedOptionsWeightsFormat: byte { + DEFAULT = 0, + SHUFFLED4x16INT8 = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + // Parameters for FullyConnected version 1 or above. + fused_activation_function:ActivationFunctionType; + + // Parameters for FullyConnected version 2 or above. + weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT; + + // Parameters for FullyConnected version 5 or above. + // If set to true, then the number of dimension is preserved. Furthermore, + // all but the last dimension of the input and output shapes will be equal. + keep_num_dims: bool; + + // Parameters for FullyConnected version 7 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 3. + pot_scale_int16:bool = true; +} + +table MulOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + // This field is currently ignored in the L2 Norm Op. + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// LINT.IfChange +enum LSTMKernelType : byte { + // Full LSTM kernel which supports peephole and projection. + FULL = 0, + // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell. + BASIC = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + // Parameters for LSTM version 1 or above. + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // Parameters for LSTM version 2 or above. + // Basic kernel is only supported in version 2 or above. + kernel_type: LSTMKernelType = FULL; + + // Parameters for LSTM version 4 or above. + asymmetric_quantize_inputs: bool; +} + +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true then first dimension is sequence, otherwise batch. + time_major:bool; + + // Parameter for Unidirectional Sequence LSTM version 3. + asymmetric_quantize_inputs:bool; + + // Parameter for unidirectional sequence RNN version 4. + diagonal_recurrent_tensors:bool; +} + +table BidirectionalSequenceLSTMOptions { + // Parameters supported by version 1: + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true, store the outputs of both directions into the first output. + merge_outputs: bool; + + // Parameters supported by version 2: + // If true then first dimension is sequence, otherwise batch. + // Version 1 implementations assumed time_major to be true, so this default + // value should never change. + time_major: bool = true; + + // Parameters for version 3 or above. + asymmetric_quantize_inputs:bool; +} + +table ResizeBilinearOptions { + new_height: int (deprecated); + new_width: int (deprecated); + align_corners: bool; + half_pixel_centers: bool; +} + +table ResizeNearestNeighborOptions { + align_corners: bool; + half_pixel_centers: bool; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table PadOptions { +} + +table PadV2Options { +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SpaceToBatchNDOptions { +} + +table BatchToSpaceNDOptions { +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +table DepthToSpaceOptions { + block_size: int; +} + +table SubOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; +} + +table DivOptions { + fused_activation_function:ActivationFunctionType; +} + +table TopKV2Options { +} + +enum CombinerType : byte { + SUM = 0, + MEAN = 1, + SQRTN = 2, +} + +table EmbeddingLookupSparseOptions { + combiner:CombinerType; +} + +table GatherOptions { + axis: int; + // Parameters for Gather version 5 or above. + batch_dims: int = 0; +} + +table TransposeOptions { +} + +table ExpOptions { +} + +table CosOptions { +} + +table ReducerOptions { + keep_dims: bool; +} + +table SqueezeOptions { + squeeze_dims:[int]; +} + +table SplitOptions { + num_splits: int; +} + +table SplitVOptions { + num_splits: int; +} + +table StridedSliceOptions { + begin_mask: int; + end_mask: int; + ellipsis_mask: int; + new_axis_mask: int; + shrink_axis_mask: int; +} + +table LogSoftmaxOptions { +} + +table CastOptions { + in_data_type: TensorType; + out_data_type: TensorType; +} + +table DequantizeOptions { +} + +table MaximumMinimumOptions { +} + +table TileOptions { +} + +table ArgMaxOptions { + output_type : TensorType; +} + +table ArgMinOptions { + output_type : TensorType; +} + +table GreaterOptions { +} + +table GreaterEqualOptions { +} + +table LessOptions { +} + +table LessEqualOptions { +} + +table NegOptions { +} + +table SelectOptions { +} + +table SliceOptions { +} + +table TransposeConvOptions { + // Parameters supported by version 1, 2, 3: + padding:Padding; + stride_w:int; + stride_h:int; + + // Parameters supported by version 4: + fused_activation_function:ActivationFunctionType = NONE; +} + +table ExpandDimsOptions { +} + +table SparseToDenseOptions { + validate_indices:bool; +} + +table EqualOptions { +} + +table NotEqualOptions { +} + +table ShapeOptions { + // Optional output type of the operation (int32 or int64). Defaults to int32. + out_type : TensorType; +} + +table RankOptions { +} + +table PowOptions { +} + +table FakeQuantOptions { + // Parameters supported by version 1: + min:float; + max:float; + num_bits:int; + + // Parameters supported by version 2: + narrow_range:bool; +} + +table PackOptions { + values_count:int; + axis:int; +} + +table LogicalOrOptions { +} + +table OneHotOptions { + axis:int; +} + +table AbsOptions { +} + + +table HardSwishOptions { +} + +table LogicalAndOptions { +} + +table LogicalNotOptions { +} + +table UnpackOptions { + num:int; + axis:int; +} + +table FloorDivOptions { +} + +table SquareOptions { +} + +table ZerosLikeOptions { +} + +table FillOptions { +} + +table FloorModOptions { +} + +table RangeOptions { +} + +table LeakyReluOptions { + alpha:float; +} + +table SquaredDifferenceOptions { +} + +// LINT.IfChange +enum MirrorPadMode : byte { + // Doesn't include borders. + REFLECT = 0, + // Includes borders. + SYMMETRIC = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +table MirrorPadOptions { + mode:MirrorPadMode; +} + +table UniqueOptions { + idx_out_type:TensorType = INT32; +} + +table ReverseV2Options { +} + +table AddNOptions { +} + +table GatherNdOptions { +} + +table WhereOptions { +} + +table ReverseSequenceOptions { + seq_dim:int; + batch_dim:int = 0; +} + +table MatrixDiagOptions { +} + +table QuantizeOptions { +} + +table MatrixSetDiagOptions { +} + +table IfOptions { + then_subgraph_index:int; + else_subgraph_index:int; +} + +table CallOnceOptions { + init_subgraph_index:int; +} + +table WhileOptions { + cond_subgraph_index:int; + body_subgraph_index:int; +} + +table NonMaxSuppressionV4Options { +} + +table NonMaxSuppressionV5Options { +} + +table ScatterNdOptions { +} + +table SelectV2Options { +} + +table DensifyOptions { +} + +table SegmentSumOptions { +} + +table BatchMatMulOptions { + adj_x:bool; + adj_y:bool; + // Parameters for BatchMatMul version 4 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table CumsumOptions { + exclusive:bool; + reverse:bool; +} + +table BroadcastToOptions { +} + +table Rfft2dOptions { +} + +table HashtableOptions { + // The identity of hash tables. This identity will be used across different + // subgraphs in the same interpreter instance. + table_id:int; + key_dtype:TensorType; + value_dtype:TensorType; +} + +table HashtableFindOptions { +} + +table HashtableImportOptions { +} + +table HashtableSizeOptions { +} + +table VarHandleOptions { + container:string; + shared_name:string; +} + +table ReadVariableOptions { +} + +table AssignVariableOptions { +} + +table RandomOptions { + seed: long; + seed2: long; +} + +table BucketizeOptions { + boundaries: [float]; // The bucket boundaries. +} + +table GeluOptions { + approximate: bool; +} + +table DynamicUpdateSliceOptions { +} + +table UnsortedSegmentProdOptions { +} + +table UnsortedSegmentMaxOptions { +} + +table UnsortedSegmentSumOptions { +} + +table ATan2Options { +} + +table UnsortedSegmentMinOptions{ +} + +table SignOptions { +} + + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + // This field is for backward compatibility. This field will be used when + // the value of the extended builtin_code field has less than + // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + deprecated_builtin_code:byte; + custom_code:string; + + // The version of the operator. The version need to be bumped whenever new + // parameters are introduced into an op. + version:int = 1; + + // This field is introduced for resolving op builtin code shortage problem + // (the original BuiltinOperator enum field was represented as a byte). + // This field will be used when the value of the extended builtin_code field + // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + builtin_code:BuiltinOperator; +} + +enum CustomOptionsFormat : byte { + FLEXBUFFERS = 0, +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + // Optional input are indicated by -1. + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; + custom_options_format:CustomOptionsFormat; + + // A list of booleans indicating the input tensors which are being mutated by + // this operator.(e.g. used by RNN and LSTM). + // For example, if the "inputs" array refers to 5 tensors and the second and + // fifth are mutable variables, then this list will contain + // [false, true, false, false, true]. + // + // If the list is empty, no variable is mutated in this operator. + // The list either has the same length as `inputs`, or is empty. + mutating_variable_inputs:[bool]; + + // A list of indices to the subgraph's "tensors" that are internal to an Op. + // Internal tensors are those that do not flow in or out of the operation, + // but instead are part of internal computation. As such, the operation's + // implementation may manage its memory more efficiently. They are needed + // however (i.e. not just an implementation detail) since they are part of the + // computation, which may require relevant metadata such as quantization + // parameters. + intermediates:[int]; +} + +// The root type, defining a subgraph, which typically represents an entire +// model. +table SubGraph { + // A list of all tensors used in this subgraph. + tensors:[Tensor]; + + // Indices of the tensors that are inputs into this subgraph. Note this is + // the list of non-static tensors that feed into the subgraph for inference. + inputs:[int]; + + // Indices of the tensors that are outputs out of this subgraph. Note this is + // the list of output tensors that are considered the product of the + // subgraph's inference. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of this subgraph (used for debugging). + name:string; +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. The generous alignment accommodates mmap-friendly data structures. +table Buffer { + data:[ubyte] (force_align: 16); +} + +table Metadata { + // A human readable string to uniquely identify a Metadata. + name:string; + // An index to the buffers table. + buffer:uint; +} + +// Map from an alias name of tensor to tensor index in the graph. +// This is used in Signature def. +table TensorMap { + // Represents the alias to use for this tensor. + name:string; + + // The actual tensor index in the primary graph, that 'name' corresponds to. + tensor_index:uint; +} + +// This corresponds to SignatureDef in Tensorflow SavedModel. +// The SignatureDef will be part of the SavedModel provided for conversion. +table SignatureDef { + // Named inputs for this signature. + inputs:[TensorMap]; + + // Named outputs for this signature. + outputs:[TensorMap]; + + // Key value which was in the Tensorflow SavedModel SignatureDef map. + signature_key:string; + + // Model tag, deprecated. + deprecated_tag:string (deprecated); + + // Index of subgraphs that corresponds to the exported method. + subgraph_index:uint; +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model. + // Note the 0th entry of this array must be an empty buffer (sentinel). + // This is a convention so that tensors without a buffer can provide 0 as + // their buffer. + buffers:[Buffer]; + + // Metadata about the model. Indirects into the existings buffers list. + // Deprecated, prefer to use metadata field. + metadata_buffer:[int]; + + // Metadata about the model. + metadata:[Metadata]; + + // Optional SignatureDefs for the model. + signature_defs:[SignatureDef]; +} + +root_type Model; diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp new file mode 100644 index 0000000000..ee051547f9 --- /dev/null +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -0,0 +1,644 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" + +#ifdef HAVE_FLATBUFFERS +#include "schema_generated.h" +#include "builtin_op_data.h" +#endif + +#include +#undef CV_LOG_STRIP_LEVEL +#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1 +#include + +namespace cv { +namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +#ifdef HAVE_FLATBUFFERS + +using namespace opencv_tflite; + +// This values are used to indicate layer output's data layout where it's possible. +// Approach is similar to TensorFlow importer but TFLite models do not have explicit +// layout field "data_format". So we consider that all 4D inputs are in NHWC data layout. +enum DataLayout +{ + DATA_LAYOUT_NHWC, + DATA_LAYOUT_NCHW, + DATA_LAYOUT_NDHWC, + DATA_LAYOUT_UNKNOWN, + DATA_LAYOUT_PLANAR // 2-dimensional outputs (matmul, flatten, reshape to 2d) +}; + +class TFLiteImporter { +public: + TFLiteImporter(Net& net, const char* modelBuffer, size_t bufSize); + +private: + const opencv_tflite::Model* model; + const flatbuffers::Vector >* modelTensors; + std::map allTensors; + Net& dstNet; + + // This is a vector of pairs (layerId, outputId) where we iterate over + // indices from TFLite notation and get created OpenCV layers. + std::map > layerIds; + + // Tracking of layouts for layers outputs. + std::vector layouts; + + void populateNet(); + + // Wrap TFLite Tensor to OpenCV Mat without data copying + Mat parseTensor(const Tensor& tensor); + + typedef void (TFLiteImporter::*TFLiteImporterNodeParser)(const Operator&, const std::string&, LayerParams&); + typedef std::map DispatchMap; + + const DispatchMap dispatch; + static DispatchMap buildDispatchMap(); + + void parseConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseDWConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePadding(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseEltwise(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePooling(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parsePoolingWithArgmax(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams); + void parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams); + + int addPermuteLayer(const std::vector& order, const std::string& permName, const std::pair& inpId); +}; + +Mat TFLiteImporter::parseTensor(const Tensor& tensor) +{ + const auto tensor_shape = tensor.shape(); + CV_Assert(tensor_shape); + std::vector shape(tensor_shape->begin(), tensor_shape->end()); + int bufferIdx = tensor.buffer(); + CV_Assert(bufferIdx != 0); // 0th buffer is a no-data buffer + const Buffer* buffer = model->buffers()->Get(bufferIdx); + CV_Assert(buffer); + const auto buffer_data = buffer->data(); + CV_Assert(buffer_data); + const void* data = buffer_data->data(); + + int dtype = -1; + switch (tensor.type()) { + case TensorType_FLOAT32: + dtype = CV_32F; + break; + case TensorType_INT32: + dtype = CV_32S; + break; + case TensorType_FLOAT16: + dtype = CV_16S; + break; + default: + CV_Error(Error::StsNotImplemented, format("Parse tensor with type %s", EnumNameTensorType(tensor.type()))); + } + return Mat(shape, dtype, const_cast(data)); +} + +TFLiteImporter::TFLiteImporter(Net& dstNet, const char* modelBuffer, size_t bufSize) + : dstNet(dstNet), dispatch(buildDispatchMap()) +{ + flatbuffers::Verifier verifier((const uint8_t*)modelBuffer, bufSize); + if (!VerifyModelBuffer(verifier)) { + CV_Error(Error::StsError, "DNN/TFLite: model is incorrect"); + } + + model = GetModel(modelBuffer); + CV_Assert(model); + CV_Assert(model->subgraphs()); + CV_Assert(model->buffers()); + CV_CheckEQ(model->subgraphs()->size(), 1, ""); + + modelTensors = model->subgraphs()->Get(0)->tensors(); + CV_Assert(modelTensors); + for (int i = 0; i < modelTensors->size(); ++i) { + const Tensor* tensor = modelTensors->Get(i); + CV_Assert(tensor); + if (tensor->buffer() != 0) { + allTensors[i] = parseTensor(*tensor); + } + } + + populateNet(); +} + +DataLayout estimateLayout(const Tensor& t) +{ + const auto t_shape = t.shape(); + CV_Assert(t_shape); + switch (t_shape->size()) { + case 5: return DATA_LAYOUT_NDHWC; + case 4: return DATA_LAYOUT_NHWC; + case 2: return DATA_LAYOUT_PLANAR; + default: return DATA_LAYOUT_UNKNOWN; + } +} + +void TFLiteImporter::populateNet() +{ + CV_Assert(model); + const auto model_subgraphs = model->subgraphs(); + CV_Assert(model_subgraphs); + const SubGraph* subgraph = model_subgraphs->Get(0); + CV_Assert(subgraph); + const auto subgraph_inputs = subgraph->inputs(); + CV_Assert(subgraph_inputs); + const auto subgraph_operators = subgraph->operators(); + CV_Assert(subgraph_operators); + const auto opCodes = model->operator_codes(); + CV_Assert(opCodes); + + CV_Assert(modelTensors); + layouts.resize(modelTensors->size(), DATA_LAYOUT_UNKNOWN); + size_t subgraph_inputs_size = subgraph_inputs->size(); + for (size_t i = 0; i < subgraph_inputs_size; ++i) + { + int idx = subgraph_inputs->Get(i); + layerIds[idx] = std::make_pair(0, i); + const auto tensor = modelTensors->Get(idx); + if (!tensor) + CV_Error(Error::StsError, cv::format("DNN/TFLite: subgraph input %d (%d) is NULL", (int)i, idx)); + layouts[idx] = estimateLayout(*tensor); + } + const auto& all_operators = *subgraph_operators; + const size_t all_operators_size = all_operators.size(); + for (size_t op_idx = 0; op_idx < all_operators_size; ++op_idx) + { + const auto op = all_operators[op_idx]; + CV_Assert(op); + const auto op_inputs = op->inputs(); + CV_Assert(op_inputs); + const auto op_outputs = op->outputs(); + CV_Assert(op_outputs); + int idx = op->opcode_index(); + + LayerParams layerParams; + layerParams.name = modelTensors->Get(op_outputs->Get(0))->name()->str(); + + std::string type = EnumNameBuiltinOperator(BuiltinOperator(opCodes->Get(idx)->deprecated_builtin_code())); + if (type == "CUSTOM") { + type = opCodes->Get(idx)->custom_code()->str(); + } + + CV_LOG_DEBUG(NULL, "DNN/TFLite: processing operator (" << op_idx << "/" << all_operators_size << ") with " << op_inputs->size() << " inputs: " + << cv::format("[%s]:(%s)", type.c_str(), layerParams.name.c_str())); + + try + { + if (type == "DEQUANTIZE") { + // Convert from FP16 to FP32 + Mat data = allTensors[op_inputs->Get(0)]; + Mat dataFP32; + convertFp16(data, dataFP32); + allTensors[op_outputs->Get(0)] = dataFP32; + continue; + } + + DispatchMap::const_iterator iter = dispatch.find(type); + if (iter == dispatch.end()) + CV_Error(Error::StsNotImplemented, "Unsupported operator type " + type); + + CALL_MEMBER_FN(*this, iter->second)(*op, type, layerParams); + + // Collect input blobs + std::vector layerInputs; + std::vector inpLayouts; + for (int idx : *op_inputs) { + if (layerIds.find(idx) != layerIds.end()) { + layerInputs.push_back(idx); + inpLayouts.push_back(layouts[idx]); + continue; // Output from a different layer + } + + Mat blob = allTensors[idx]; + layerParams.blobs.push_back(blob.u ? blob : blob.clone()); // some tensors are owned by OpenCV + } + + int layerId = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + + // Connect layer to inputs + int i = 0; + for (int idx : layerInputs) { + auto it = layerIds.find(idx); + CV_Assert(it != layerIds.end()); + dstNet.connect(it->second.first, it->second.second, layerId, i++); + } + + // Predict output layout. Some layer-specific parsers may set them explicitly. + // Otherwise, propagate input layout. + if (layouts[op_outputs->Get(0)] == DATA_LAYOUT_UNKNOWN) { + DataLayout predictedLayout = DATA_LAYOUT_UNKNOWN; + for (auto layout : inpLayouts) { + if (layout != DATA_LAYOUT_UNKNOWN) { + if (predictedLayout == DATA_LAYOUT_UNKNOWN) + predictedLayout = layout; + else if (predictedLayout != layout) { + predictedLayout = DATA_LAYOUT_UNKNOWN; + break; + } + } + } + layouts[op_outputs->Get(0)] = predictedLayout; + } + + // Register outputs + i = 0; + for (int idx : *op_outputs) { + layerIds[idx] = std::make_pair(layerId, i++); + } + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TFLite: Problem during import of operator " + << cv::format("[%s]:(%s)", type.c_str(), layerParams.name.c_str()) + << " (" << op_idx << "/" << all_operators_size << "). Exception: " << e.what()); + if (DNN_DIAGNOSTICS_RUN) + { + continue; + } + throw; + } + } +} + +TFLiteImporter::DispatchMap TFLiteImporter::buildDispatchMap() +{ + static DispatchMap dispatch; + if (!dispatch.empty()) + return dispatch; + + dispatch["CONV_2D"] = &TFLiteImporter::parseConvolution; + dispatch["DEPTHWISE_CONV_2D"] = &TFLiteImporter::parseDWConvolution; + dispatch["RELU"] = dispatch["ADD"] = dispatch["MUL"] = dispatch["PRELU"] = + dispatch["HARD_SWISH"] = dispatch["LOGISTIC"] = &TFLiteImporter::parseEltwise; + dispatch["MAX_POOL_2D"] = dispatch["AVERAGE_POOL_2D"] = &TFLiteImporter::parsePooling; + dispatch["MaxPoolingWithArgmax2D"] = &TFLiteImporter::parsePoolingWithArgmax; + dispatch["MaxUnpooling2D"] = &TFLiteImporter::parseUnpooling; + dispatch["PAD"] = &TFLiteImporter::parsePadding; + dispatch["RESHAPE"] = &TFLiteImporter::parseReshape; + dispatch["CONCATENATION"] = &TFLiteImporter::parseConcat; + dispatch["RESIZE_BILINEAR"] = &TFLiteImporter::parseResize; + dispatch["Convolution2DTransposeBias"] = &TFLiteImporter::parseDeconvolution; + return dispatch; +} + +void TFLiteImporter::parseConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Convolution"; + + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Convolution with fused activation"); + } + layerParams.set("pad_mode", EnumNamePadding(options->padding())); + layerParams.set("stride_w", options->stride_w()); + layerParams.set("stride_h", options->stride_h()); + layerParams.set("dilation_w", options->dilation_w_factor()); + layerParams.set("dilation_h", options->dilation_h_factor()); + + // Get filter size + int filterIdx = op.inputs()->Get(1); + Mat filter = allTensors[filterIdx]; + int oc = filter.size[0]; + int kh = filter.size[1]; + int kw = filter.size[2]; + int ic = filter.size[3]; + layerParams.set("kernel_w", kw); + layerParams.set("kernel_h", kh); + layerParams.set("num_output", oc); + + // Reorder filter data from OHWI to OIHW and change shape correspondingly. + filter = allTensors[filterIdx] = filter.reshape(1, {oc, ic, kh, kw}); + + CV_CheckTypeEQ(filter.type(), CV_32F, ""); + Mat filterCopy = filter.clone(); + float* data = filterCopy.ptr(); + float* dstData = filter.ptr(); + + int total = oc * ic * kh * kw; + for (int i_oc = 0; i_oc < oc; i_oc++) { + for (int i_ic = 0; i_ic < ic; i_ic++) { + for (int i_h = 0; i_h < kh; i_h++) { + for (int i_w = 0; i_w < kw; i_w++) { + int dst_i = kw * (kh * (ic * i_oc + i_ic) + i_h) + i_w; + int src_i = ic * (kw * (kh * i_oc + i_h) + i_w) + i_ic; + CV_CheckLT(dst_i, total, ""); + CV_CheckLT(src_i, total, ""); + dstData[dst_i] = data[src_i]; + } + } + } + } +} + +void TFLiteImporter::parseDWConvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Convolution"; + + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Depthwise convolution with fused activation"); + } + layerParams.set("pad_mode", EnumNamePadding(options->padding())); + layerParams.set("stride_w", options->stride_w()); + layerParams.set("stride_h", options->stride_h()); + layerParams.set("dilation_w", options->dilation_w_factor()); + layerParams.set("dilation_h", options->dilation_h_factor()); + + int filterIdx = op.inputs()->Get(1); + Mat filter = allTensors[filterIdx]; + int kh = filter.size[1]; + int kw = filter.size[2]; + int oc = filter.size[3]; + layerParams.set("kernel_w", kw); + layerParams.set("kernel_h", kh); + layerParams.set("num_output", oc); + layerParams.set("group", oc); + + filter = allTensors[filterIdx] = filter.reshape(1, {oc, 1, kh, kw}); + cv::transpose(filter.reshape(1, kh * kw).clone(), filter.reshape(1, oc)); +} + +void TFLiteImporter::parsePadding(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Padding"; + Mat paddings = allTensors[op.inputs()->Get(1)]; + + CV_CheckTypeEQ(paddings.type(), CV_32S, ""); + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(2), paddings.at(6)); + std::swap(paddings.at(3), paddings.at(7)); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(4), paddings.at(6)); + std::swap(paddings.at(5), paddings.at(7)); + // N C H W + // 0 1 2 3 4 5 6 7 + + layerParams.set("paddings", DictValue::arrayInt((int32_t*)paddings.data, paddings.total())); +} + +void TFLiteImporter::parseEltwise(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + if (opcode == "PRELU") { + layerParams.type = "PReLU"; + } else if (opcode == "RELU") { + layerParams.type = "ReLU"; + } else if (opcode == "ADD") { + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Add with fused activation"); + } + layerParams.type = "Eltwise"; + layerParams.set("operation", "sum"); + } else if (opcode == "MUL") { + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Mul with fused activation"); + } + layerParams.type = "Eltwise"; + layerParams.set("operation", "prod"); + } else if (opcode == "HARD_SWISH") { + layerParams.type = "HardSwish"; + } else if (opcode == "LOGISTIC") { + layerParams.type = "Sigmoid"; + } else { + CV_Error(Error::StsNotImplemented, "Unknown eltwise operator opcode: " + opcode); + } +} + +void TFLiteImporter::parsePooling(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Pooling"; + + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Pooling with fused activation"); + } + layerParams.set("pad_mode", EnumNamePadding(options->padding())); + layerParams.set("stride_w", options->stride_w()); + layerParams.set("stride_h", options->stride_h()); + layerParams.set("kernel_w", options->filter_width()); + layerParams.set("kernel_h", options->filter_height()); + if (opcode == "MAX_POOL_2D") + layerParams.set("pool", "max"); + else if (opcode == "AVERAGE_POOL_2D") + layerParams.set("pool", "ave"); + else + CV_Error(Error::StsNotImplemented, "Pool type selection for " + opcode); +} + +void TFLiteImporter::parsePoolingWithArgmax(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Pooling"; + + CV_CheckLE(op.custom_options()->size(), sizeof(TfLitePoolParams), ""); + const auto* params = reinterpret_cast(op.custom_options()->Data()); + if (params->activation != kTfLiteActNone) { + CV_Error(Error::StsNotImplemented, "Argmax pooling with fused activation"); + } + if (params->padding != kTfLitePaddingUnknown) + layerParams.set("pad_mode", params->padding == kTfLitePaddingSame ? "SAME" : "VALID"); + layerParams.set("stride_w", params->stride_width); + layerParams.set("stride_h", params->stride_height); + layerParams.set("kernel_w", params->filter_width); + layerParams.set("kernel_h", params->filter_height); + layerParams.set("pool", "max"); +} + +void TFLiteImporter::parseUnpooling(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "MaxUnpool"; + + CV_CheckLE(op.custom_options()->size(), sizeof(TfLitePoolParams), ""); + const auto* params = reinterpret_cast(op.custom_options()->Data()); + if (params->activation != kTfLiteActNone) { + CV_Error(Error::StsNotImplemented, "Unpooling with fused activation"); + } + layerParams.set("pool_stride_w", params->stride_width); + layerParams.set("pool_stride_h", params->stride_height); + layerParams.set("pool_k_w", params->filter_width); + layerParams.set("pool_k_h", params->filter_height); + layerParams.set("pool_pad_w", 0); + layerParams.set("pool_pad_h", 0); +} + +void TFLiteImporter::parseReshape(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + DataLayout inpLayout = layouts[op.inputs()->Get(0)]; + + if (inpLayout == DATA_LAYOUT_NHWC) { + // Permute to NCHW + int permId = addPermuteLayer({0, 2, 3, 1}, layerParams.name + "/permute", layerIds[op.inputs()->Get(0)]); // NCHW -> NHWC + layerIds[op.inputs()->Get(0)] = std::make_pair(permId, 0); + layouts[op.outputs()->Get(0)] = DATA_LAYOUT_NCHW; + } + + layerParams.type = "Reshape"; + auto options = reinterpret_cast(op.builtin_options()); + std::vector shape(options->new_shape()->begin(), options->new_shape()->end()); + // std::swap(shape[1], shape[2]); + layerParams.set("dim", DictValue::arrayInt(shape.data(), shape.size())); +} + +void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Concat"; + auto options = reinterpret_cast(op.builtin_options()); + if (options->fused_activation_function() != ActivationFunctionType_NONE) { + CV_Error(Error::StsNotImplemented, "Concat with fused activation"); + } + int axis = options->axis(); + + DataLayout inpLayout = layouts[op.inputs()->Get(0)]; + if (inpLayout == DATA_LAYOUT_NHWC) { + // OpenCV works in NCHW data layout. So change the axis correspondingly. + CV_Check(axis, -4 < axis && axis < 4, ""); + int remap[] = {0, 2, 3, 1}; + axis = axis > 0 ? axis : 4 + axis; + axis = remap[axis]; + } + layerParams.set("axis", axis); +} + +void TFLiteImporter::parseResize(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Resize"; + + auto options = reinterpret_cast(op.builtin_options()); + + layerParams.set("interpolation", "bilinear"); + layerParams.set("align_corners", options->align_corners()); + layerParams.set("half_pixel_centers", options->half_pixel_centers()); + + Mat shape = allTensors[op.inputs()->Get(1)].reshape(1, 1); + layerParams.set("height", shape.at(0, 0)); + layerParams.set("width", shape.at(0, 1)); +} + +int TFLiteImporter::addPermuteLayer(const std::vector& order, const std::string& permName, + const std::pair& inpId) +{ + LayerParams permLP; + permLP.set("order", DictValue::arrayInt(order.data(), order.size())); + int permId = dstNet.addLayer(permName, "Permute", permLP); + dstNet.connect(inpId.first, inpId.second, permId, 0); + return permId; +} + +void TFLiteImporter::parseDeconvolution(const Operator& op, const std::string& opcode, LayerParams& layerParams) { + layerParams.type = "Deconvolution"; + + CV_CheckLE(op.custom_options()->size(), sizeof(TfLiteTransposeConvParams), ""); + const auto* params = reinterpret_cast(op.custom_options()->Data()); + if (params->padding != kTfLitePaddingUnknown) + layerParams.set("pad_mode", params->padding == kTfLitePaddingSame ? "SAME" : "VALID"); + layerParams.set("stride_w", params->stride_width); + layerParams.set("stride_h", params->stride_height); + + // Get filter size + int filterIdx = op.inputs()->Get(1); + Mat filter = allTensors[filterIdx]; + int oc = filter.size[0]; + int kh = filter.size[1]; + int kw = filter.size[2]; + int ic = filter.size[3]; + layerParams.set("kernel_w", kw); + layerParams.set("kernel_h", kh); + layerParams.set("num_output", oc); + + // Add adjust padding similar to TensorFlow (see tf_importer) + const auto* outShape = modelTensors->Get(op.outputs()->Get(0))->shape(); + const int outH = outShape->Get(1); + const int outW = outShape->Get(2); + if (params->padding == kTfLitePaddingSame) + { + layerParams.set("adj_w", (outW - 1) % params->stride_width); + layerParams.set("adj_h", (outH - 1) % params->stride_height); + } + else if (params->padding == kTfLitePaddingValid) + { + layerParams.set("adj_w", (outW - kw) % params->stride_width); + layerParams.set("adj_h", (outH - kh) % params->stride_height); + } + + // Reorder filter data from OHWI to IOHW and change shape correspondingly. + filter = allTensors[filterIdx] = filter.reshape(1, {ic, oc, kh, kw}); + + CV_CheckTypeEQ(filter.type(), CV_32F, ""); + Mat filterCopy = filter.clone(); + float* data = filterCopy.ptr(); + float* dstData = filter.ptr(); + + int total = oc * ic * kh * kw; + for (int i_oc = 0; i_oc < oc; i_oc++) { + for (int i_ic = 0; i_ic < ic; i_ic++) { + for (int i_h = 0; i_h < kh; i_h++) { + for (int i_w = 0; i_w < kw; i_w++) { + int dst_i = kw * (kh * (oc * i_ic + i_oc) + i_h) + i_w; + int src_i = ic * (kw * (kh * i_oc + i_h) + i_w) + i_ic; + CV_CheckLT(dst_i, total, ""); + CV_CheckLT(src_i, total, ""); + dstData[dst_i] = data[src_i]; + } + } + } + } +} + +Net readNetFromTFLite(const String &modelPath) { + Net net; + + std::vector content; + + const std::ios::openmode mode = std::ios::in | std::ios::binary; + std::ifstream ifs(modelPath, mode); + if (!ifs.is_open()) + CV_Error(Error::StsError, cv::format("DNN/TFLite: can't open model file '%s'", modelPath.c_str())); + + ifs.seekg(0, std::ios::end); + const size_t sz = ifs.tellg(); + CV_Assert(sz > 0); + content.resize(sz); + ifs.seekg(0, std::ios::beg); + + ifs.read(content.data(), sz); + CV_Assert(!ifs.bad()); + + TFLiteImporter(net, content.data(), content.size()); + return net; +} + +Net readNetFromTFLite(const std::vector& bufferModel) { + return readNetFromTFLite((const char*)bufferModel.data(), bufferModel.size()); +} + +Net readNetFromTFLite(const char *bufferModel, size_t bufSize) { + Net net; + TFLiteImporter(net, bufferModel, bufSize); + return net; +} + +#else // HAVE_FLATBUFFERS + +#define DNN_TFLITE_UNSUPPORTED() CV_Error(Error::StsError, "DNN/TFLite: Build OpenCV with FlatBuffers to import TFLite models: https://github.com/opencv/opencv/pull/23161") + +Net readNetFromTFLite(const String &) { + DNN_TFLITE_UNSUPPORTED(); +} + +Net readNetFromTFLite(const std::vector&) { + DNN_TFLITE_UNSUPPORTED(); +} + +Net readNetFromTFLite(const char *, size_t) { + DNN_TFLITE_UNSUPPORTED(); +} + +#endif // HAVE_FLATBUFFERS + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp new file mode 100644 index 0000000000..41c3133593 --- /dev/null +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -0,0 +1,123 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +/* +Test for TFLite models loading +*/ + +#include "test_precomp.hpp" +#include "npy_blob.hpp" + +#include // CV_DNN_REGISTER_LAYER_CLASS +#include + +namespace opencv_test +{ + +using namespace cv; +using namespace cv::dnn; + +void testModel(const std::string& modelName, const Mat& input, double norm = 1e-5) { +#ifndef HAVE_FLATBUFFERS + throw SkipTestException("FlatBuffers required for TFLite importer"); +#endif + + Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); + net.setInput(input); + + std::vector outNames = net.getUnconnectedOutLayersNames(); + + std::vector outs; + net.forward(outs, outNames); + + ASSERT_EQ(outs.size(), outNames.size()); + for (int i = 0; i < outNames.size(); ++i) { + Mat ref = blobFromNPY(findDataFile(format("dnn/tflite/%s_out_%s.npy", modelName.c_str(), outNames[i].c_str()))); + normAssert(ref.reshape(1, 1), outs[i].reshape(1, 1), outNames[i].c_str(), norm); + } +} + +void testModel(const std::string& modelName, const Size& inpSize, double norm = 1e-5) { + Mat input = imread(findDataFile("cv/shared/lena.png")); + input = blobFromImage(input, 1.0 / 255, inpSize, 0, true); + testModel(modelName, input, norm); +} + +// https://google.github.io/mediapipe/solutions/face_mesh +TEST(Test_TFLite, face_landmark) +{ + testModel("face_landmark", Size(192, 192), 2e-5); +} + +// https://google.github.io/mediapipe/solutions/face_detection +TEST(Test_TFLite, face_detection_short_range) +{ + testModel("face_detection_short_range", Size(128, 128)); +} + +// https://google.github.io/mediapipe/solutions/selfie_segmentation +TEST(Test_TFLite, selfie_segmentation) +{ + testModel("selfie_segmentation", Size(256, 256)); +} + +TEST(Test_TFLite, max_unpooling) +{ +#ifndef HAVE_FLATBUFFERS + throw SkipTestException("FlatBuffers required for TFLite importer"); +#endif + // Due Max Unpoling is a numerically unstable operation and small difference between frameworks + // might lead to positional difference of maximal elements in the tensor, this test checks + // behavior of Max Unpooling layer only. + Net net = readNet(findDataFile("dnn/tflite/hair_segmentation.tflite", false)); + + Mat input = imread(findDataFile("cv/shared/lena.png")); + cvtColor(input, input, COLOR_BGR2RGBA); + input = input.mul(Scalar(1, 1, 1, 0)); + input = blobFromImage(input, 1.0 / 255); + net.setInput(input); + + std::vector > outs; + net.forward(outs, {"p_re_lu_1", "max_pooling_with_argmax2d", "conv2d_86", "max_unpooling2d_2"}); + ASSERT_EQ(outs.size(), 4); + ASSERT_EQ(outs[0].size(), 1); + ASSERT_EQ(outs[1].size(), 2); + ASSERT_EQ(outs[2].size(), 1); + ASSERT_EQ(outs[3].size(), 1); + Mat poolInp = outs[0][0]; + Mat poolOut = outs[1][0]; + Mat poolIds = outs[1][1]; + Mat unpoolInp = outs[2][0]; + Mat unpoolOut = outs[3][0]; + + ASSERT_EQ(poolInp.size, unpoolOut.size); + ASSERT_EQ(poolOut.size, poolIds.size); + ASSERT_EQ(poolOut.size, unpoolInp.size); + + for (int c = 0; c < 32; ++c) { + float *poolInpData = poolInp.ptr(0, c); + float *poolOutData = poolOut.ptr(0, c); + float *poolIdsData = poolIds.ptr(0, c); + float *unpoolInpData = unpoolInp.ptr(0, c); + float *unpoolOutData = unpoolOut.ptr(0, c); + for (int y = 0; y < 64; ++y) { + for (int x = 0; x < 64; ++x) { + int maxIdx = (y * 128 + x) * 2; + std::vector indices{maxIdx + 1, maxIdx + 128, maxIdx + 129}; + std::string errMsg = format("Channel %d, y: %d, x: %d", c, y, x); + for (int idx : indices) { + if (poolInpData[idx] > poolInpData[maxIdx]) { + EXPECT_EQ(unpoolOutData[maxIdx], 0.0f) << errMsg; + maxIdx = idx; + } + } + EXPECT_EQ(poolInpData[maxIdx], poolOutData[y * 64 + x]) << errMsg; + EXPECT_EQ(poolIdsData[y * 64 + x], (float)maxIdx) << errMsg; + EXPECT_EQ(unpoolOutData[maxIdx], unpoolInpData[y * 64 + x]) << errMsg; + } + } + } +} + +} diff --git a/platforms/js/opencv_js.config.py b/platforms/js/opencv_js.config.py index 903d1dd6fa..fe91e3334e 100644 --- a/platforms/js/opencv_js.config.py +++ b/platforms/js/opencv_js.config.py @@ -135,7 +135,7 @@ video = { dnn = {'dnn_Net': ['setInput', 'forward', 'setPreferableBackend'], '': ['readNetFromCaffe', 'readNetFromTensorflow', 'readNetFromTorch', 'readNetFromDarknet', - 'readNetFromONNX', 'readNet', 'blobFromImage']} + 'readNetFromONNX', 'readNetFromTFLite', 'readNet', 'blobFromImage']} features2d = {'Feature2D': ['detect', 'compute', 'detectAndCompute', 'descriptorSize', 'descriptorType', 'defaultNorm', 'empty', 'getDefaultName'], 'BRISK': ['create', 'getDefaultName'], From f1f14ce40388ae8991c00de58900d9b41c972e51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corentin=20No=C3=ABl?= Date: Tue, 14 Feb 2023 13:28:42 +0100 Subject: [PATCH 022/199] highgui: Set hard GLib requirement to >=2.32 This version has been released 10 years ago. --- modules/highgui/cmake/detect_gtk.cmake | 2 +- modules/highgui/src/window_gtk.cpp | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/modules/highgui/cmake/detect_gtk.cmake b/modules/highgui/cmake/detect_gtk.cmake index c58246ac54..5e9412d427 100644 --- a/modules/highgui/cmake/detect_gtk.cmake +++ b/modules/highgui/cmake/detect_gtk.cmake @@ -20,7 +20,7 @@ if(WITH_GTK) endif() endif() endif() - ocv_check_modules(GTHREAD gthread-2.0) + ocv_check_modules(GTHREAD gthread-2.0>=2.32) if(HAVE_GTK AND NOT HAVE_GTHREAD) message(FATAL_ERROR "gthread not found. This library is required when building with GTK support") else() diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 98698cbade..598850d4ac 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -659,14 +659,6 @@ CV_IMPL int cvStartWindowThread(){ cvInitSystem(0,NULL); if (!thread_started) { -#if !GLIB_CHECK_VERSION(2, 32, 0) // https://github.com/GNOME/glib/blame/b4d58a7105bb9d75907233968bb534b38f9a6e43/glib/deprecated/gthread.h#L274 - if (!g_thread_supported ()) - { - /* the GThread system wasn't inited, so init it */ - g_thread_init(NULL); - } -#endif - (void)getWindowMutex(); // force mutex initialization // protects the 'last key pressed' variable @@ -675,13 +667,7 @@ CV_IMPL int cvStartWindowThread(){ // conditional that indicates a key has been pressed cond_have_key = g_cond_new(); - #if !GLIB_CHECK_VERSION(2, 32, 0) - // this is the window update thread - window_thread = g_thread_create(icvWindowThreadLoop, - NULL, TRUE, NULL); - #else window_thread = g_thread_new("OpenCV window update", icvWindowThreadLoop, NULL); - #endif } thread_started = window_thread!=NULL; return thread_started; From a87b9fb4b66bfda73e8b797c4ad0278ac17066a6 Mon Sep 17 00:00:00 2001 From: Lilit Grigoryan Date: Mon, 13 Feb 2023 20:12:37 +0300 Subject: [PATCH 023/199] Fix focal length estimation from homography matrix --- modules/stitching/src/autocalib.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/modules/stitching/src/autocalib.cpp b/modules/stitching/src/autocalib.cpp index 18b6e048d0..772eb7e7e9 100644 --- a/modules/stitching/src/autocalib.cpp +++ b/modules/stitching/src/autocalib.cpp @@ -74,7 +74,11 @@ void focalsFromHomography(const Mat& H, double &f0, double &f1, bool &f0_ok, boo d2 = (h[7] - h[6]) * (h[7] + h[6]); v1 = -(h[0] * h[1] + h[3] * h[4]) / d1; v2 = (h[0] * h[0] + h[3] * h[3] - h[1] * h[1] - h[4] * h[4]) / d2; - if (v1 < v2) std::swap(v1, v2); + if (v1 < v2) + { + std::swap(v1, v2); + std::swap(d1, d2); + } if (v1 > 0 && v2 > 0) f1 = std::sqrt(std::abs(d1) > std::abs(d2) ? v1 : v2); else if (v1 > 0) f1 = std::sqrt(v1); else f1_ok = false; @@ -84,7 +88,11 @@ void focalsFromHomography(const Mat& H, double &f0, double &f1, bool &f0_ok, boo d2 = h[0] * h[0] + h[1] * h[1] - h[3] * h[3] - h[4] * h[4]; v1 = -h[2] * h[5] / d1; v2 = (h[5] * h[5] - h[2] * h[2]) / d2; - if (v1 < v2) std::swap(v1, v2); + if (v1 < v2) + { + std::swap(v1, v2); + std::swap(d1, d2); + } if (v1 > 0 && v2 > 0) f0 = std::sqrt(std::abs(d1) > std::abs(d2) ? v1 : v2); else if (v1 > 0) f0 = std::sqrt(v1); else f0_ok = false; From 6c235c8edbca8c89306a17e1958d904c42733b89 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Wed, 15 Feb 2023 11:04:14 +0000 Subject: [PATCH 024/199] Merge pull request #23211 from TolyaTalamanov:at/pipeline-modeling-tool-perf-alignment [G-API] Pipeline modeling tool: Refactor calculating performance statistics * Add warmup execution * Align perf metrics * Add busy wait mode for source * Small fix for late frames * pl_fn to src_fn * Change show statistics * Correct warm-up iteration * Properly calculate drop frames * Enable frame dropping for streaming mode * Enable frame dropping for streaming mode * Fix comments to review * Fix typos * Cosmetic --- .../gapi/samples/pipeline_modeling_tool.cpp | 25 ++- .../pipeline_modeling_tool/dummy_source.hpp | 63 ++++--- .../pipeline_modeling_tool/pipeline.hpp | 160 ++++++++++-------- .../pipeline_builder.hpp | 27 ++- .../samples/pipeline_modeling_tool/utils.hpp | 26 ++- 5 files changed, 185 insertions(+), 116 deletions(-) diff --git a/modules/gapi/samples/pipeline_modeling_tool.cpp b/modules/gapi/samples/pipeline_modeling_tool.cpp index 7c202642a9..3a300d7dd2 100644 --- a/modules/gapi/samples/pipeline_modeling_tool.cpp +++ b/modules/gapi/samples/pipeline_modeling_tool.cpp @@ -35,6 +35,22 @@ static AppMode strToAppMode(const std::string& mode_str) { } } +enum class WaitMode { + BUSY, + SLEEP +}; + +static WaitMode strToWaitMode(const std::string& mode_str) { + if (mode_str == "sleep") { + return WaitMode::SLEEP; + } else if (mode_str == "busy") { + return WaitMode::BUSY; + } else { + throw std::logic_error("Unsupported wait mode: " + mode_str + + "\nPlease chose between: busy (default) and sleep"); + } +} + template T read(const cv::FileNode& node) { return static_cast(node); @@ -401,7 +417,12 @@ int main(int argc, char* argv[]) { if (app_mode == AppMode::BENCHMARK) { latency = 0.0; } - auto src = std::make_shared(latency, output, drop_frames); + + const auto wait_mode = + strToWaitMode(readOpt(src_fn["wait_mode"]).value_or("busy")); + auto wait_strategy = (wait_mode == WaitMode::SLEEP) ? utils::sleep : utils::busyWait; + auto src = std::make_shared( + utils::double_ms_t{latency}, output, drop_frames, std::move(wait_strategy)); builder.setSource(src_name, src); } @@ -446,7 +467,7 @@ int main(int argc, char* argv[]) { // NB: Pipeline mode from config takes priority over cmd. auto pl_mode = cfg_pl_mode.has_value() ? strToPLMode(cfg_pl_mode.value()) : cmd_pl_mode; - // NB: Using drop_frames with streaming pipelines will follow to + // NB: Using drop_frames with streaming pipelines will lead to // incorrect performance results. if (drop_frames && pl_mode == PLMode::STREAMING) { throw std::logic_error( diff --git a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp index 4c2ea1638c..f0cad0338a 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/dummy_source.hpp @@ -12,26 +12,36 @@ class DummySource final: public cv::gapi::wip::IStreamSource { public: + using WaitStrategy = std::function; using Ptr = std::shared_ptr; - DummySource(const double latency, + using ts_t = std::chrono::microseconds; + + template + DummySource(const DurationT latency, const OutputDescr& output, - const bool drop_frames); + const bool drop_frames, + WaitStrategy&& wait); + bool pull(cv::gapi::wip::Data& data) override; cv::GMetaArg descr_of() const override; - double latency() const { return m_latency; }; private: - double m_latency; - cv::Mat m_mat; - bool m_drop_frames; - double m_next_tick_ts = -1; - int64_t m_curr_seq_id = 0; + int64_t m_latency; + cv::Mat m_mat; + bool m_drop_frames; + int64_t m_next_tick_ts = -1; + int64_t m_curr_seq_id = 0; + WaitStrategy m_wait; }; -DummySource::DummySource(const double latency, +template +DummySource::DummySource(const DurationT latency, const OutputDescr& output, - const bool drop_frames) - : m_latency(latency), m_drop_frames(drop_frames) { + const bool drop_frames, + WaitStrategy&& wait) + : m_latency(std::chrono::duration_cast(latency).count()), + m_drop_frames(drop_frames), + m_wait(std::move(wait)) { utils::createNDMat(m_mat, output.dims, output.precision); utils::generateRandom(m_mat); } @@ -42,10 +52,10 @@ bool DummySource::pull(cv::gapi::wip::Data& data) { // NB: Wait m_latency before return the first frame. if (m_next_tick_ts == -1) { - m_next_tick_ts = utils::timestamp() + m_latency; + m_next_tick_ts = utils::timestamp() + m_latency; } - int64_t curr_ts = utils::timestamp(); + int64_t curr_ts = utils::timestamp(); if (curr_ts < m_next_tick_ts) { /* * curr_ts @@ -57,8 +67,8 @@ bool DummySource::pull(cv::gapi::wip::Data& data) { * * NB: New frame will be produced at the m_next_tick_ts point. */ - utils::sleep(m_next_tick_ts - curr_ts); - } else { + m_wait(ts_t{m_next_tick_ts - curr_ts}); + } else if (m_latency != 0) { /* * curr_ts * +1 +2 | @@ -66,29 +76,28 @@ bool DummySource::pull(cv::gapi::wip::Data& data) { * ^ ^ * m_next_tick_ts -------------> * - * - * NB: Shift m_next_tick_ts to the nearest tick before curr_ts and - * update current seq_id correspondingly. - * - * if drop_frames is enabled, wait for the next tick, otherwise - * return last written frame (+2 at the picture above) immediately. */ + + // NB: Count how many frames have been produced since last pull (m_next_tick_ts). int64_t num_frames = static_cast((curr_ts - m_next_tick_ts) / m_latency); - m_curr_seq_id += num_frames; + // NB: Shift m_next_tick_ts to the nearest tick before curr_ts. m_next_tick_ts += num_frames * m_latency; + // NB: if drop_frames is enabled, update current seq_id and wait for the next tick, otherwise + // return last written frame (+2 at the picture above) immediately. if (m_drop_frames) { + // NB: Shift tick to the next frame. m_next_tick_ts += m_latency; - ++m_curr_seq_id; - utils::sleep(m_next_tick_ts - curr_ts); + // NB: Wait for the next frame. + m_wait(ts_t{m_next_tick_ts - curr_ts}); + // NB: Drop already produced frames + update seq_id for the current. + m_curr_seq_id += num_frames + 1; } } - // NB: Just increase reference counter not to release mat memory // after assigning it to the data. cv::Mat mat = m_mat; - - data.meta[meta_tag::timestamp] = utils::timestamp(); + data.meta[meta_tag::timestamp] = utils::timestamp(); data.meta[meta_tag::seq_id] = m_curr_seq_id++; data = mat; m_next_tick_ts += m_latency; diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp index ac192cba52..5220a0d1ad 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline.hpp @@ -6,34 +6,39 @@ struct PerfReport { std::string name; double avg_latency = 0.0; - int64_t min_latency = 0; - int64_t max_latency = 0; - int64_t first_latency = 0; + double min_latency = 0.0; + double max_latency = 0.0; + double first_latency = 0.0; double throughput = 0.0; - int64_t elapsed = 0; - int64_t warmup_time = 0; + double elapsed = 0.0; + double warmup_time = 0.0; int64_t num_late_frames = 0; - std::vector latencies; + std::vector latencies; + std::vector seq_ids; std::string toStr(bool expanded = false) const; }; std::string PerfReport::toStr(bool expand) const { + const auto to_double_str = [](double val) { + std::stringstream ss; + ss << std::fixed << std::setprecision(3) << val; + return ss.str(); + }; + std::stringstream ss; - ss << name << ": \n" - << " Warm up time: " << warmup_time << " ms\n" - << " Execution time: " << elapsed << " ms\n" - << " Frames: " << num_late_frames << "/" << latencies.size() << " (late/all)\n" - << " Latency:\n" - << " first: " << first_latency << " ms\n" - << " min: " << min_latency << " ms\n" - << " max: " << max_latency << " ms\n" - << " avg: " << std::fixed << std::setprecision(3) << avg_latency << " ms\n" - << " Throughput: " << std::fixed << std::setprecision(3) << throughput << " FPS"; + ss << name << ": warm-up: " << to_double_str(warmup_time) + << " ms, execution time: " << to_double_str(elapsed) + << " ms, throughput: " << to_double_str(throughput) + << " FPS, latency: first: " << to_double_str(first_latency) + << " ms, min: " << to_double_str(min_latency) + << " ms, avg: " << to_double_str(avg_latency) + << " ms, max: " << to_double_str(max_latency) + << " ms, frames: " << num_late_frames << "/" << seq_ids.back()+1 << " (dropped/all)"; if (expand) { for (size_t i = 0; i < latencies.size(); ++i) { ss << "\nFrame:" << i << "\nLatency: " - << latencies[i] << " ms"; + << to_double_str(latencies[i]) << " ms"; } } @@ -70,10 +75,12 @@ public: virtual ~Pipeline() = default; protected: - virtual void _compile() = 0; - virtual int64_t run_iter() = 0; - virtual void init() {}; - virtual void deinit() {}; + virtual void _compile() = 0; + virtual void run_iter() = 0; + virtual void init() {}; + virtual void deinit() {}; + + void prepareOutputs(); std::string m_name; cv::GComputation m_comp; @@ -82,6 +89,11 @@ protected: cv::GCompileArgs m_args; size_t m_num_outputs; PerfReport m_perf; + + cv::GRunArgsP m_pipeline_outputs; + std::vector m_out_mats; + int64_t m_start_ts; + int64_t m_seq_id; }; Pipeline::Pipeline(std::string&& name, @@ -101,42 +113,82 @@ Pipeline::Pipeline(std::string&& name, void Pipeline::compile() { m_perf.warmup_time = - utils::measure([this]() { + utils::measure([this]() { _compile(); }); } +void Pipeline::prepareOutputs() { + // NB: N-2 buffers + timestamp + seq_id. + m_out_mats.resize(m_num_outputs - 2); + for (auto& m : m_out_mats) { + m_pipeline_outputs += cv::gout(m); + } + m_pipeline_outputs += cv::gout(m_start_ts); + m_pipeline_outputs += cv::gout(m_seq_id); +} + void Pipeline::run() { using namespace std::chrono; + // NB: Allocate outputs for execution + prepareOutputs(); + + // NB: Warm-up iteration invalidates source state + // so need to copy it + auto orig_src = m_src; + auto copy_src = std::make_shared(*m_src); + + // NB: Use copy for warm-up iteration + m_src = copy_src; + + // NB: Warm-up iteration + init(); + run_iter(); + deinit(); + + // NB: Calculate first latency + m_perf.first_latency = utils::double_ms_t{ + microseconds{utils::timestamp() - m_start_ts}}.count(); + + // NB: Now use original source + m_src = orig_src; + + // NB: Start measuring execution init(); auto start = high_resolution_clock::now(); m_stop_criterion->start(); + while (true) { - m_perf.latencies.push_back(run_iter()); - m_perf.elapsed = duration_cast(high_resolution_clock::now() - start).count(); + run_iter(); + const auto latency = utils::double_ms_t{ + microseconds{utils::timestamp() - m_start_ts}}.count(); + + m_perf.latencies.push_back(latency); + m_perf.seq_ids.push_back(m_seq_id); + m_stop_criterion->iter(); if (m_stop_criterion->done()) { + m_perf.elapsed = duration_cast( + high_resolution_clock::now() - start).count(); deinit(); break; } } - m_perf.avg_latency = utils::avg(m_perf.latencies); - m_perf.min_latency = utils::min(m_perf.latencies); - m_perf.max_latency = utils::max(m_perf.latencies); - m_perf.first_latency = m_perf.latencies[0]; + m_perf.avg_latency = utils::avg(m_perf.latencies); + m_perf.min_latency = utils::min(m_perf.latencies); + m_perf.max_latency = utils::max(m_perf.latencies); - // NB: Count how many executions don't fit into camera latency interval. - m_perf.num_late_frames = - std::count_if(m_perf.latencies.begin(), m_perf.latencies.end(), - [this](int64_t latency) { - return static_cast(latency) > m_src->latency(); - }); + // NB: Count the number of dropped frames + int64_t prev_seq_id = m_perf.seq_ids[0]; + for (size_t i = 1; i < m_perf.seq_ids.size(); ++i) { + m_perf.num_late_frames += m_perf.seq_ids[i] - prev_seq_id - 1; + prev_seq_id = m_perf.seq_ids[i]; + } - m_perf.throughput = - (m_perf.latencies.size() / static_cast(m_perf.elapsed)) * 1000; + m_perf.throughput = (m_perf.latencies.size() / m_perf.elapsed) * 1000; } const PerfReport& Pipeline::report() const { @@ -155,13 +207,6 @@ private: } virtual void init() override { - using namespace std::chrono; - // NB: N-1 buffers + timestamp. - m_out_mats.resize(m_num_outputs - 1); - for (auto& m : m_out_mats) { - m_pipeline_outputs += cv::gout(m); - } - m_pipeline_outputs += cv::gout(m_start_ts); m_compiled.setSource(m_src); m_compiled.start(); } @@ -170,15 +215,11 @@ private: m_compiled.stop(); } - virtual int64_t run_iter() override { + virtual void run_iter() override { m_compiled.pull(cv::GRunArgsP{m_pipeline_outputs}); - return utils::timestamp() - m_start_ts; } cv::GStreamingCompiled m_compiled; - cv::GRunArgsP m_pipeline_outputs; - std::vector m_out_mats; - int64_t m_start_ts; }; class RegularPipeline : public Pipeline { @@ -192,26 +233,13 @@ private: cv::GCompileArgs(m_args)); } - virtual void init() override { - m_out_mats.resize(m_num_outputs); - for (auto& m : m_out_mats) { - m_pipeline_outputs += cv::gout(m); - } - } - - virtual int64_t run_iter() override { - using namespace std::chrono; - cv::gapi::wip::Data d; - m_src->pull(d); - auto in_mat = cv::util::get(d); - return utils::measure([&]{ - m_compiled(cv::gin(in_mat), cv::GRunArgsP{m_pipeline_outputs}); - }); + virtual void run_iter() override { + cv::gapi::wip::Data data; + m_src->pull(data); + m_compiled({data}, cv::GRunArgsP{m_pipeline_outputs}); } - cv::GCompiled m_compiled; - cv::GRunArgsP m_pipeline_outputs; - std::vector m_out_mats; + cv::GCompiled m_compiled; }; enum class PLMode { diff --git a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp index 6ac6374f07..3964b68a86 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/pipeline_builder.hpp @@ -163,13 +163,10 @@ struct DummyCall { cv::Mat& out_mat, DummyState& state) { using namespace std::chrono; - double total = 0; - auto start = high_resolution_clock::now(); + auto start_ts = utils::timestamp(); state.mat.copyTo(out_mat); - while (total < time) { - total = duration_cast>( - high_resolution_clock::now() - start).count(); - } + auto elapsed = utils::timestamp() - start_ts; + utils::busyWait(duration_cast(utils::double_ms_t{time-elapsed})); } }; @@ -656,16 +653,16 @@ Pipeline::Ptr PipelineBuilder::construct() { } GAPI_Assert(m_state->stop_criterion); - if (m_state->mode == PLMode::STREAMING) { - GAPI_Assert(graph_inputs.size() == 1); - GAPI_Assert(cv::util::holds_alternative(graph_inputs[0])); - // FIXME: Handle GFrame when NV12 comes. - const auto& graph_input = cv::util::get(graph_inputs[0]); - // NB: In case streaming mode need to expose timestamp in order to - // calculate performance metrics. - graph_outputs.emplace_back( - cv::gapi::streaming::timestamp(graph_input).strip()); + GAPI_Assert(graph_inputs.size() == 1); + GAPI_Assert(cv::util::holds_alternative(graph_inputs[0])); + // FIXME: Handle GFrame when NV12 comes. + const auto& graph_input = cv::util::get(graph_inputs[0]); + graph_outputs.emplace_back( + cv::gapi::streaming::timestamp(graph_input).strip()); + graph_outputs.emplace_back( + cv::gapi::streaming::seq_id(graph_input).strip()); + if (m_state->mode == PLMode::STREAMING) { return std::make_shared(std::move(m_state->name), cv::GComputation( cv::GProtoInputArgs{graph_inputs}, diff --git a/modules/gapi/samples/pipeline_modeling_tool/utils.hpp b/modules/gapi/samples/pipeline_modeling_tool/utils.hpp index c0f0897c35..0297aed53a 100644 --- a/modules/gapi/samples/pipeline_modeling_tool/utils.hpp +++ b/modules/gapi/samples/pipeline_modeling_tool/utils.hpp @@ -17,6 +17,8 @@ struct OutputDescr { namespace utils { +using double_ms_t = std::chrono::duration; + inline void createNDMat(cv::Mat& mat, const std::vector& dims, int depth) { GAPI_Assert(!dims.empty()); mat.create(dims, depth); @@ -50,10 +52,8 @@ inline void generateRandom(cv::Mat& out) { } } -inline void sleep(double ms) { +inline void sleep(std::chrono::microseconds delay) { #if defined(_WIN32) - // NB: It takes portions of 100 nanoseconds. - int64_t ns_units = static_cast(ms * 1e4); // FIXME: Wrap it to RAII and instance only once. HANDLE timer = CreateWaitableTimer(NULL, true, NULL); if (!timer) { @@ -61,7 +61,12 @@ inline void sleep(double ms) { } LARGE_INTEGER li; - li.QuadPart = -ns_units; + using ns_t = std::chrono::nanoseconds; + using ns_100_t = std::chrono::duration, ns_t::period>>; + // NB: QuadPart takes portions of 100 nanoseconds. + li.QuadPart = -std::chrono::duration_cast(delay).count(); + if(!SetWaitableTimer(timer, &li, 0, NULL, NULL, false)){ CloseHandle(timer); throw std::logic_error("Failed to set timer"); @@ -72,8 +77,7 @@ inline void sleep(double ms) { } CloseHandle(timer); #else - using namespace std::chrono; - std::this_thread::sleep_for(duration(ms)); + std::this_thread::sleep_for(delay); #endif } @@ -93,6 +97,16 @@ typename duration_t::rep timestamp() { return duration_cast(now.time_since_epoch()).count(); } +inline void busyWait(std::chrono::microseconds delay) { + auto start_ts = timestamp(); + auto end_ts = start_ts; + auto time_to_wait = delay.count(); + + while (end_ts - start_ts < time_to_wait) { + end_ts = timestamp(); + } +} + template void mergeMapWith(std::map& target, const std::map& second) { for (auto&& item : second) { From 2ab7b7c09eff6cb69fadaf9238869f01f350fc8f Mon Sep 17 00:00:00 2001 From: Vladimir Ponomarev Date: Thu, 16 Feb 2023 15:18:30 +0300 Subject: [PATCH 025/199] Remove separator between trackbars. Remove separator between 2 or more trackbars. This separator has zero thickness and creates bags in toolbar. --- modules/highgui/src/window_w32.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index eb17d5fc01..a4eb322faf 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -2205,21 +2205,6 @@ icvCreateTrackbar( const char* trackbar_name, const char* window_name, /* Retrieve current buttons count */ bcount = (int)SendMessage(window->toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); - if (bcount > 0) - { - /* If this is not the first button then we need to - separate it from the previous one */ - tbs.iBitmap = 0; - tbs.idCommand = bcount; // Set button id to it's number - tbs.iString = 0; - tbs.fsStyle = TBSTYLE_SEP; - tbs.fsState = TBSTATE_ENABLED; - SendMessage(window->toolbar.toolbar, TB_ADDBUTTONS, 1, (LPARAM)&tbs); - - // Retrieve current buttons count - bcount = (int)SendMessage(window->toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); - } - /* Add a button which we're going to cover with the slider */ tbs.iBitmap = 0; tbs.idCommand = bcount; // Set button id to it's number From 923dbcc58fa8bd1427735fa53e847e6361a27592 Mon Sep 17 00:00:00 2001 From: Vaclav Vavra Date: Fri, 17 Feb 2023 08:35:54 +0100 Subject: [PATCH 026/199] different interpolation by double image (#23124) * different interpolation by double image * fixing scaling mapping * fixing a test * added an option to enable previous interpolation * added doxygen entries for the new parameter * ASSERT_TRUE -> ASSERT_EQ * changed log message when using old upscale mode --- .../features2d/include/opencv2/features2d.hpp | 12 ++++- modules/features2d/src/sift.dispatch.cpp | 39 ++++++++++----- .../test/test_descriptors_regression.impl.hpp | 47 +++++++++++++++++++ .../test/test_detectors_invariance.cpp | 2 +- .../test/test_detectors_invariance.impl.hpp | 5 -- .../features2d/test/test_invariance_utils.hpp | 4 +- 6 files changed, 88 insertions(+), 21 deletions(-) diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 4bb335eb68..88c9dc9cc3 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -286,10 +286,14 @@ public: @param sigma The sigma of the Gaussian applied to the input image at the octave \#0. If your image is captured with a weak camera with soft lenses, you might want to reduce the number. + + @param enable_precise_upscale Whether to enable precise upscaling in the scale pyramid, which maps + index \f$\texttt{x}\f$ to \f$\texttt{2x}\f$. This prevents localization bias. The option + to disable it (which is deprecated and issues a warning) is provided to keep the original behavior. */ CV_WRAP static Ptr create(int nfeatures = 0, int nOctaveLayers = 3, double contrastThreshold = 0.04, double edgeThreshold = 10, - double sigma = 1.6); + double sigma = 1.6, bool enable_precise_upscale = true); /** @brief Create SIFT with specified descriptorType. @param nfeatures The number of best features to retain. The features are ranked by their scores @@ -313,10 +317,14 @@ public: is captured with a weak camera with soft lenses, you might want to reduce the number. @param descriptorType The type of descriptors. Only CV_32F and CV_8U are supported. + + @param enable_precise_upscale Whether to enable precise upscaling in the scale pyramid, which maps + index \f$\texttt{x}\f$ to \f$\texttt{2x}\f$. This prevents localization bias. The option + to disable it (which is deprecated and issues a warning) is provided to keep the original behavior. */ CV_WRAP static Ptr create(int nfeatures, int nOctaveLayers, double contrastThreshold, double edgeThreshold, - double sigma, int descriptorType); + double sigma, int descriptorType, bool enable_precise_upscale = true); CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; diff --git a/modules/features2d/src/sift.dispatch.cpp b/modules/features2d/src/sift.dispatch.cpp index 7c72b37898..d5c1171e9f 100644 --- a/modules/features2d/src/sift.dispatch.cpp +++ b/modules/features2d/src/sift.dispatch.cpp @@ -72,6 +72,7 @@ #include "precomp.hpp" #include #include +#include #include "sift.simd.hpp" #include "sift.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content @@ -88,7 +89,8 @@ class SIFT_Impl : public SIFT public: explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3, double contrastThreshold = 0.04, double edgeThreshold = 10, - double sigma = 1.6, int descriptorType = CV_32F ); + double sigma = 1.6, int descriptorType = CV_32F, + bool enable_precise_upscale = true ); //! returns the descriptor size in floats (128) int descriptorSize() const CV_OVERRIDE; @@ -136,24 +138,25 @@ protected: CV_PROP_RW double edgeThreshold; CV_PROP_RW double sigma; CV_PROP_RW int descriptor_type; + CV_PROP_RW bool enable_precise_upscale; }; Ptr SIFT::create( int _nfeatures, int _nOctaveLayers, - double _contrastThreshold, double _edgeThreshold, double _sigma ) + double _contrastThreshold, double _edgeThreshold, double _sigma, bool enable_precise_upscale ) { CV_TRACE_FUNCTION(); - return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, CV_32F); + return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, CV_32F, enable_precise_upscale); } Ptr SIFT::create( int _nfeatures, int _nOctaveLayers, - double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType ) + double _contrastThreshold, double _edgeThreshold, double _sigma, int _descriptorType, bool enable_precise_upscale ) { CV_TRACE_FUNCTION(); // SIFT descriptor supports 32bit floating point and 8bit unsigned int. CV_Assert(_descriptorType == CV_32F || _descriptorType == CV_8U); - return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, _descriptorType); + return makePtr(_nfeatures, _nOctaveLayers, _contrastThreshold, _edgeThreshold, _sigma, _descriptorType, enable_precise_upscale); } String SIFT::getDefaultName() const @@ -170,7 +173,7 @@ unpackOctave(const KeyPoint& kpt, int& octave, int& layer, float& scale) scale = octave >= 0 ? 1.f/(1 << octave) : (float)(1 << -octave); } -static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma ) +static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma, bool enable_precise_upscale ) { CV_TRACE_FUNCTION(); @@ -188,12 +191,22 @@ static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma if( doubleImageSize ) { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) ); + Mat dbl; + if (enable_precise_upscale) { + dbl.create(Size(gray_fpt.cols*2, gray_fpt.rows*2), gray_fpt.type()); + Mat H = Mat::zeros(2, 3, CV_32F); + H.at(0, 0) = 0.5f; + H.at(1, 1) = 0.5f; + + cv::warpAffine(gray_fpt, dbl, H, dbl.size(), INTER_LINEAR | WARP_INVERSE_MAP, BORDER_REFLECT); + } else { #if DoG_TYPE_SHORT - resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR_EXACT); + resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR_EXACT); #else - resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR); + resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR); #endif + } Mat result; GaussianBlur(dbl, result, Size(), sig_diff, sig_diff); return result; @@ -459,10 +472,14 @@ static void calcDescriptors(const std::vector& gpyr, const std::vector gpyr; int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(std::log( (double)std::min( base.cols, base.rows ) ) / std::log(2.) - 2) - firstOctave; diff --git a/modules/features2d/test/test_descriptors_regression.impl.hpp b/modules/features2d/test/test_descriptors_regression.impl.hpp index e60b5a0691..42756a9010 100644 --- a/modules/features2d/test/test_descriptors_regression.impl.hpp +++ b/modules/features2d/test/test_descriptors_regression.impl.hpp @@ -7,6 +7,34 @@ namespace opencv_test { namespace { /****************************************************************************************\ * Regression tests for descriptor extractors. * \****************************************************************************************/ +static void double_image(Mat& src, Mat& dst) { + + dst.create(Size(src.cols*2, src.rows*2), src.type()); + + Mat H = Mat::zeros(2, 3, CV_32F); + H.at(0, 0) = 0.5f; + H.at(1, 1) = 0.5f; + cv::warpAffine(src, dst, H, dst.size(), INTER_LINEAR | WARP_INVERSE_MAP, BORDER_REFLECT); + +} + +static Mat prepare_img(bool rows_indexed) { + int rows = 5; + int columns = 5; + Mat img(rows, columns, CV_32F); + + for (int i = 0; i < rows; i++) { + for (int j = 0; j < columns; j++) { + if (rows_indexed) { + img.at(i, j) = (float)i; + } else { + img.at(i, j) = (float)j; + } + } + } + return img; +} + static void writeMatInBin( const Mat& mat, const string& filename ) { FILE* f = fopen( filename.c_str(), "wb"); @@ -145,6 +173,25 @@ protected: ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA ); } + image = prepare_img(false); + Mat dbl; + try + { + double_image(image, dbl); + + Mat downsized_back(dbl.rows/2, dbl.cols/2, CV_32F); + resize(dbl, downsized_back, Size(dbl.cols/2, dbl.rows/2), 0, 0, INTER_NEAREST); + + cv::Mat diff = (image != downsized_back); + ASSERT_EQ(0, cv::norm(image, downsized_back, NORM_INF)); + } + catch(...) + { + ts->printf( cvtest::TS::LOG, "double_image() must not generate exception (1).\n"); + ts->printf( cvtest::TS::LOG, "double_image() when downsized back by NEAREST must generate the same original image (1).\n"); + ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA ); + } + // Several images. vector images; vector > keypointsCollection; diff --git a/modules/features2d/test/test_detectors_invariance.cpp b/modules/features2d/test/test_detectors_invariance.cpp index 3c69c49eb7..11e7ecfedc 100644 --- a/modules/features2d/test/test_detectors_invariance.cpp +++ b/modules/features2d/test/test_detectors_invariance.cpp @@ -37,7 +37,7 @@ INSTANTIATE_TEST_CASE_P(AKAZE_DESCRIPTOR_KAZE, DetectorRotationInvariance, */ INSTANTIATE_TEST_CASE_P(SIFT, DetectorScaleInvariance, - Value(IMAGE_BIKES, SIFT::create(0, 3, 0.09), 0.65f, 0.98f)); + Value(IMAGE_BIKES, SIFT::create(0, 3, 0.09), 0.60f, 0.98f)); INSTANTIATE_TEST_CASE_P(BRISK, DetectorScaleInvariance, Value(IMAGE_BIKES, BRISK::create(), 0.08f, 0.49f)); diff --git a/modules/features2d/test/test_detectors_invariance.impl.hpp b/modules/features2d/test/test_detectors_invariance.impl.hpp index 8a571cb718..e50316fed9 100644 --- a/modules/features2d/test/test_detectors_invariance.impl.hpp +++ b/modules/features2d/test/test_detectors_invariance.impl.hpp @@ -25,7 +25,6 @@ void matchKeyPoints(const vector& keypoints0, const Mat& H, perspectiveTransform(Mat(points0), points0t, H); matches.clear(); - vector usedMask(keypoints1.size(), 0); for(int i0 = 0; i0 < static_cast(keypoints0.size()); i0++) { int nearestPointIndex = -1; @@ -33,8 +32,6 @@ void matchKeyPoints(const vector& keypoints0, const Mat& H, const float r0 = 0.5f * keypoints0[i0].size; for(size_t i1 = 0; i1 < keypoints1.size(); i1++) { - if(nearestPointIndex >= 0 && usedMask[i1]) - continue; float r1 = 0.5f * keypoints1[i1].size; float intersectRatio = calcIntersectRatio(points0t.at(i0), r0, @@ -47,8 +44,6 @@ void matchKeyPoints(const vector& keypoints0, const Mat& H, } matches.push_back(DMatch(i0, nearestPointIndex, maxIntersectRatio)); - if(nearestPointIndex >= 0) - usedMask[nearestPointIndex] = 1; } } diff --git a/modules/features2d/test/test_invariance_utils.hpp b/modules/features2d/test/test_invariance_utils.hpp index 41b3c8ed9f..ba9f97d990 100644 --- a/modules/features2d/test/test_invariance_utils.hpp +++ b/modules/features2d/test/test_invariance_utils.hpp @@ -75,8 +75,8 @@ void scaleKeyPoints(const vector& src, vector& dst, float sc dst.resize(src.size()); for (size_t i = 0; i < src.size(); i++) { dst[i] = src[i]; - dst[i].pt.x *= scale; - dst[i].pt.y *= scale; + dst[i].pt.x = dst[i].pt.x * scale + (scale - 1.0f) / 2.0f; + dst[i].pt.y = dst[i].pt.y * scale + (scale - 1.0f) / 2.0f; dst[i].size *= scale; } } From 642942a72fe544f9f6466f87c550755d707e3e47 Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Fri, 17 Feb 2023 12:26:41 +0300 Subject: [PATCH 027/199] fix: remove extra '/O' modifier for '/IO' arguments --- modules/python/src2/gen2.py | 9 +++++++++ modules/python/src2/hdr_parser.py | 2 ++ 2 files changed, 11 insertions(+) diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 92629c0e7a..23f31c3e19 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -448,6 +448,15 @@ class ArgInfo(object): self.py_outputarg = False self.enclosing_arg = enclosing_arg + def __str__(self): + return 'ArgInfo("{}", tp="{}", default="{}", in={}, out={})'.format( + self.name, self.tp, self.defval, self.inputarg, + self.outputarg + ) + + def __repr__(self): + return str(self) + @property def export_name(self): if self.name in python_reserved_keywords: diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index f5df3e2aab..710c792179 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -616,6 +616,8 @@ class CppHeaderParser(object): ("InputOutputArray", mat), ("OutputArray", mat), ("noArray", arg_type)]).strip() + if '/IO' in modlist and '/O' in modlist: + modlist.remove('/O') args.append([arg_type, arg_name, defval, modlist]) npos = arg_start-1 From 20dac7ea485eb35a54a81b77ef340a69c2107d85 Mon Sep 17 00:00:00 2001 From: Zihao Mu Date: Fri, 17 Feb 2023 18:18:13 +0800 Subject: [PATCH 028/199] Merge pull request #23255 from zihaomu:fused_cuda_naryeltwise DNN: fuse conv+naryEletwise on CUDA backend. --- modules/dnn/src/layers/convolution_layer.cpp | 3 +- .../dnn/src/layers/nary_eltwise_layers.cpp | 33 ++++++++---- modules/dnn/src/net_impl_fuse.cpp | 52 +++++++++++++++---- 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index b4829c72a6..5567a58a2a 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -606,7 +606,8 @@ public: if(IS_DNN_CUDA_TARGET(preferableTarget)) { Ptr eltwise = top.dynamicCast(); - if (!eltwise.empty()) // && eltwise->op == EltwiseLayer::SUM && eltwise->coeffs.empty()) + Ptr naryEltwise = top.dynamicCast(); + if (!eltwise.empty() || !naryEltwise.empty()) { /* we also need to check that the eltwise input does not require shortcut mechanism * it's difficult to verify it here but we hope that `fuseLayers` has done the check already diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 91eb7f3c0e..3232f0ae5c 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -681,17 +681,28 @@ public: return Ptr(); } - auto op_ = [this] { - switch (op) { - case OPERATION::MAX: return cuda4dnn::EltwiseOpType::MAX; - case OPERATION::MIN: return cuda4dnn::EltwiseOpType::MIN; - case OPERATION::SUM: return cuda4dnn::EltwiseOpType::SUM; - case OPERATION::PROD: return cuda4dnn::EltwiseOpType::PRODUCT; - case OPERATION::DIV: return cuda4dnn::EltwiseOpType::DIV; - case OPERATION::ADD: return cuda4dnn::EltwiseOpType::SUM; - default: CV_Error(Error::StsNotImplemented, "Other operators except MAX, MIN, SUM, PRODUCT and DIV are not supported with cuda."); - } - }(); + cuda4dnn::EltwiseOpType op_ = cuda4dnn::EltwiseOpType::SUM; + switch (op) { + case OPERATION::MAX: + op_ = cuda4dnn::EltwiseOpType::MAX; + break; + case OPERATION::MIN: + op_ = cuda4dnn::EltwiseOpType::MIN; + break; + case OPERATION::SUM: + op_ = cuda4dnn::EltwiseOpType::SUM; + break; + case OPERATION::PROD: + op_ = cuda4dnn::EltwiseOpType::PRODUCT; + break; + case OPERATION::DIV: + op_ = cuda4dnn::EltwiseOpType::DIV; + break; + case OPERATION::ADD: + op_ = cuda4dnn::EltwiseOpType::SUM; + break; + default: return Ptr(); // return empty cuda_node if the EltwiseOpType is unsupported type. + }; return make_cuda_node(preferableTarget, std::move(context->stream), op_, std::vector()); } diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp index 79365d0411..935f71833f 100644 --- a/modules/dnn/src/net_impl_fuse.cpp +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -82,10 +82,11 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) break; } #endif - /* we use `tryFuse` member of convolution layer to fuse eltwise later + /* we use `tryFuse` member of convolution layer to fuse eltwise/naryEltwise later * it's not intended to be fused here; hence, we stop when we encounter eltwise */ - if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && nextData->type == "Eltwise") + if (preferableBackend == DNN_BACKEND_CUDA && ld.type == "Convolution" && + (nextData->type == "Eltwise" || nextData->type == "NaryEltwise")) break; Ptr nextLayer = nextData->layerInstance; if (currLayer->tryFuse(nextLayer)) @@ -335,22 +336,31 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) } // OpenCL: fuse convolution layer followed by eltwise + relu - // CUDA: fuse convolution layer followed by eltwise (and optional activation) + // CUDA: fuse convolution layer followed by eltwise/naryEltwise (and optional activation) while (nextData && (IS_DNN_OPENCL_TARGET(preferableTarget) || IS_DNN_CUDA_TARGET(preferableTarget)) && ld.layerInstance->type == "Convolution" ) // semantic of 'if' { Ptr nextEltwiseLayer = nextData->layerInstance.dynamicCast(); - if (nextEltwiseLayer.empty()) + Ptr nextNaryEltwiseLayer = nextData->layerInstance.dynamicCast(); + if (nextEltwiseLayer.empty() && nextNaryEltwiseLayer.empty()) + break; + + // TODO: fused the Conv+NaryEltwise on OpenCL backend. At present, we can only support it at CUDA backend. + if (IS_DNN_OPENCL_TARGET(preferableTarget) && nextNaryEltwiseLayer) break; #ifdef HAVE_CUDA // CUDA backend supports fusion with eltwise sum (without variable channels) - if (IS_DNN_CUDA_TARGET(preferableTarget) && !nextEltwiseLayer.empty()) + if (IS_DNN_CUDA_TARGET(preferableTarget) && (!nextEltwiseLayer.empty() || !nextNaryEltwiseLayer.empty())) { // we create a temporary backend node for eltwise layer to obtain the eltwise configuration cuda4dnn::csl::CSLContext context; // assume that initCUDA and EltwiseOp do not use the context during init + + if (!nextData->layerInstance->supportBackend(DNN_BACKEND_CUDA)) + break; + const auto node = nextData->layerInstance->initCUDA(&context, nextData->inputBlobsWrappers, nextData->outputBlobsWrappers); auto eltwiseNode = node.dynamicCast(); @@ -408,7 +418,7 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) { LayerData *eltwiseData = nextData; - // Eltwise layer has two inputs. We need to determine which + // Eltwise/NaryEltwise layer has two inputs. We need to determine which // is a base convolution layer and which could be used as it's bias. LayerData* biasLayerData = 0; for (int i = 0; i < 2; ++i) @@ -483,7 +493,14 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) * => activation(convolution + eltwise) * > fuse eltwise and then activation */ - auto layer = nextEltwiseLayer.staticCast(); + Ptr layer = nullptr; + if (nextNaryEltwiseLayer) + layer = nextNaryEltwiseLayer.staticCast(); + else if (nextEltwiseLayer) + layer = nextEltwiseLayer.staticCast(); + else + CV_Error(Error::StsError, "Both nextNaryEltwiseLayer and nextEltwiseLayer are empty!"); + if (currLayer->tryFuse(layer)) { fuse_eltwise = true; /* eltwise was successfully fused */ @@ -511,7 +528,14 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) CV_Assert(nextData); CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); - printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + + if (nextEltwiseLayer) + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + else if (nextNaryEltwiseLayer) + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + else + CV_Error(Error::StsError, "Both nextNaryEltwiseLayer and nextEltwiseLayer are empty!"); + printf_(("\tfused with %s\n", nextFusabeleActivLayer->name.c_str())); eltwiseData->skip = true; nextData->skip = true; @@ -554,12 +578,19 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) } } } - else if (fuse_eltwise) // conv + eltwise (note: conv could have fused activations before eltwise) + else if (fuse_eltwise) // conv + eltwise/naryEltwise (note: conv could have fused activations before eltwise) { CV_Assert(IS_DNN_CUDA_TARGET(preferableTarget)); CV_Assert_N(biasLayerData->outputBlobsWrappers.size() == 1, ld.inputBlobsWrappers.size() == 1); ld.inputBlobsWrappers.push_back(biasLayerData->outputBlobsWrappers[0]); - printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + + if (nextEltwiseLayer) + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + else if (nextNaryEltwiseLayer) + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + else + CV_Error(Error::StsError, "Both nextNaryEltwiseLayer and nextEltwiseLayer are empty!"); + eltwiseData->skip = true; // This optimization is for cases like // some_layer conv (maybe fused with activ) @@ -682,6 +713,7 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) inp_i_data->layerInstance->type != "Permute" && inp_i_data->layerInstance->type != "Reorg" && inp_i_data->layerInstance->type != "Eltwise" && + inp_i_data->layerInstance->type != "NaryEltwise" && inp_i_data->layerInstance.dynamicCast().empty()))) { break; From 903ec0ec60b5785a01fb90563199249e9a89db20 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 13 Feb 2023 16:35:38 +0300 Subject: [PATCH 029/199] RISC-V: support RVV 0.7 in mainline RVV intrinsics --- .../include/opencv2/core/hal/intrin_rvv.hpp | 70 +++++++++++++------ .../opencv2/core/hal/intrin_rvv071.hpp | 19 ++--- 2 files changed, 54 insertions(+), 35 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index 3e7ce51f6b..392772439d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -10,13 +10,27 @@ #include +// Building for T-Head C906 core with RVV 0.7.1 using toolchain +// https://github.com/T-head-Semi/xuantie-gnu-toolchain +// with option '-march=rv64gcv0p7' +#ifdef __THEAD_VERSION__ +# if __riscv_v == 7000 +# include +# define CV_RVV_THEAD_0_7 +# endif +#endif + namespace cv { CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN #define CV_SIMD128 1 -#define CV_SIMD128_64F 1 +#ifndef CV_RVV_THEAD_0_7 +# define CV_SIMD128_64F 1 +#else +# define CV_SIMD128_64F 0 +#endif //////////// Unsupported native intrinsics in C++ //////////// // The following types have been defined in clang, but not in GCC yet. @@ -1001,14 +1015,17 @@ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64x2, float32x4, u64, f32, u, f, 6 OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8x16, float32x4, s8, f32, i, f, 8, 32) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16x8, float32x4, s16, f32, i, f, 16, 32) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int64x2, float32x4, s64, f32, i, f, 64, 32) +#if CV_SIMD128_64F OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint8x16, float64x2, u8, f64, u, f, 8, 64) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint16x8, float64x2, u16, f64, u, f, 16, 64) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint32x4, float64x2, u32, f64, u, f, 32, 64) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int8x16, float64x2, s8, f64, i, f, 8, 64) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int16x8, float64x2, s16, f64, i, f, 16, 64) OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(int32x4, float64x2, s32, f64, i, f, 32, 64) +#endif // Three times reinterpret +#if CV_SIMD128_64F inline v_float32x4 v_reinterpret_as_f32(const v_float64x2& v) \ { \ return v_float32x4(vreinterpret_v_u32m1_f32m1(vreinterpret_v_u64m1_u32m1(vreinterpret_v_f64m1_u64m1(v))));\ @@ -1017,6 +1034,7 @@ inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& v) \ { \ return v_float64x2(vreinterpret_v_u64m1_f64m1(vreinterpret_v_u32m1_u64m1(vreinterpret_v_f32m1_u32m1(v))));\ } +#endif ////////////// Extract ////////////// @@ -1920,13 +1938,15 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_ #define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \ inline bool v_check_all(const _Tpvec& a) \ { \ - v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl))); \ - return (v.val[0] | v.val[1]) == 0; \ + auto v0 = vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl); \ + v_uint32x4 v = v_uint32x4(v_reinterpret_as_u32(_Tpvec(v0))); \ + return (v.val[0] | v.val[1] | v.val[2] | v.val[3]) == 0; \ } \ inline bool v_check_any(const _Tpvec& a) \ { \ - v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(a, shift, vl))); \ - return (v.val[0] | v.val[1]) != 0; \ + auto v0 = vsrl_vx_##suffix##m1(a, shift, vl); \ + v_uint32x4 v = v_uint32x4(v_reinterpret_as_u32(_Tpvec(v0))); \ + return (v.val[0] | v.val[1] | v.val[2] | v.val[3]) != 0; \ } OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16) @@ -2042,28 +2062,18 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff) OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8x16, absdiffs) OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) -// use reinterpret instead of c-style casting. -#ifndef __clang__ -#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \ -inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(ivec, uvec, itype, utype, isuf, usuf, vlen) \ +inline uvec v_absdiff(const ivec& a, const ivec& b) \ { \ - return _rTpvec(rshr(vreinterpret_v_i##width##m2_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \ + itype max = vmax_vv_##isuf(a, b, vlen); \ + itype min = vmin_vv_##isuf(a, b, vlen); \ + return uvec(vreinterpret_v_##isuf##_##usuf(vsub_vv_##isuf(max, min, vlen))); \ } -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4) -#else -#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \ -inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ -{ \ - return _rTpvec(rshr(vreinterpret_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \ -} +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vint8m1_t, vuint8m1_t, i8m1, u8m1, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vint16m1_t, vuint16m1_t, i16m1, u16m1, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vint32m1_t, vuint32m1_t, i32m1, u32m1, 4) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16, 16) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 32, 8) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 64, 4) -#endif #define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \ inline _Tprvec v_abs(const _Tpvec& a) \ { \ @@ -2902,7 +2912,14 @@ inline v_int32x4 v_ceil(const v_float32x4& a) inline v_int32x4 v_trunc(const v_float32x4& a) { +#ifndef CV_RVV_THEAD_0_7 return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a, 4)); +#else + const int old_round = fesetround(FE_TOWARDZERO); + vint32m1_t val = vfcvt_x_f_v_i32m1(a, 4); + fesetround(old_round); + return v_int32x4(val); +#endif } #if CV_SIMD128_64F #ifndef __clang__ @@ -2938,7 +2955,14 @@ inline v_int32x4 v_trunc(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); +#ifndef CV_RVV_THEAD_0_7 return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp, 4)); +#else + const int old_round = fesetround(FE_TOWARDZERO); + vint32m1_t val = vfncvt_x_f_w_i32m1(tmp, 4); + fesetround(old_round); + return v_int32x4(val); +#endif } #else diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp index f8765510f8..9faefd97b7 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv071.hpp @@ -19,7 +19,7 @@ namespace cv CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN #define CV_SIMD128 1 -#define CV_SIMD128_64F 1 +#define CV_SIMD128_64F 0 //////////// Types //////////// struct v_uint8x16 { @@ -2021,23 +2021,18 @@ inline v_int32x4 v_load_expand_q(const schar* ptr) c = vwadd_vv_i32m2(vget_i16m2_i16m1(b, 0), vmv_v_x_i16m1(0, 4), 4); \ return v_int32x4(vget_i32m2_i32m1(c, 0)); } -#define VITL_16 (vuint64m2_t){0x1303120211011000, 0x1707160615051404, 0x1B0B1A0A19091808, 0x1F0F1E0E1D0D1C0C} -#define VITL_8 (vuint64m2_t){0x0009000100080000, 0x000B0003000A0002, 0x000D0005000C0004, 0x000F0007000E0006} -#define VITL_4 (vuint64m2_t){0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003} -#define VITL_2 (vuint64m2_t){0, 2, 1, 3} -#define LOW_4 0x0000000100000000, 0x0000000500000004 -#define LOW_8 0x0003000200010000, 0x000B000A00090008 -#define LOW_16 0x0706050403020100, 0x1716151413121110 -#define HIGH_4 0x0000000300000002, 0x0000000700000006 -#define HIGH_8 0x0007000600050004, 0x000F000E000D000C -#define HIGH_16 0x0F0E0D0C0B0A0908, 0x1F1E1D1C1B1A1918 +#define VITL_16 (vuint32m2_t){0x11011000, 0x13031202, 0x15051404, 0x17071606, 0x19091808, 0x1B0B1A0A, 0x1D0D1C0C, 0x1F0F1E0E} +#define VITL_8 (vuint32m2_t){0x00080000, 0x00090001, 0x000A0002, 0x000B0003, 0x000C0004, 0x000D0005, 0x000E0006, 0x000F0007} +#define VITL_4 (vuint32m2_t){0x00000000, 0x00000004, 0x00000001, 0x00000005, 0x00000002, 0x00000006, 0x00000003, 0x00000007} +#define VITL_2 (vuint32m2_t){0, 0, 2, 0, 1, 0, 3, 0} + #define OPENCV_HAL_IMPL_RISCVV_UNPACKS(_Tpvec, _Tp, _T, _UTp, _UT, num, num2, len, numh) \ inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ { \ v##_Tp##m2_t tmp = vundefined_##_T##m2();\ tmp = vset_##_T##m2(tmp, 0, a0.val); \ tmp = vset_##_T##m2(tmp, 1, a1.val); \ - vuint64m2_t mask = VITL_##num; \ + vuint32m2_t mask = VITL_##num; \ tmp = (v##_Tp##m2_t)vrgather_vv_##_T##m2((v##_Tp##m2_t)tmp, (v##_UTp##m2_t)mask, num2); \ b0.val = vget_##_T##m2_##_T##m1(tmp, 0); \ b1.val = vget_##_T##m2_##_T##m1(tmp, 1); \ From 39e2ebbde459857a0484126e1b0cad6db684212e Mon Sep 17 00:00:00 2001 From: Stefan Becker Date: Tue, 7 Feb 2023 10:22:38 +0100 Subject: [PATCH 030/199] Aruco/Charuco test case fixes for floating point for loops --- .../objdetect/test/test_arucodetection.cpp | 2 +- .../objdetect/test/test_boarddetection.cpp | 4 +-- .../objdetect/test/test_charucodetection.cpp | 32 ++++++++++++++++--- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/modules/objdetect/test/test_arucodetection.cpp b/modules/objdetect/test/test_arucodetection.cpp index aee147c58f..5d20cd4fd3 100644 --- a/modules/objdetect/test/test_arucodetection.cpp +++ b/modules/objdetect/test/test_arucodetection.cpp @@ -247,7 +247,7 @@ void CV_ArucoDetectionPerspective::run(int) { aruco::ArucoDetector detector(aruco::getPredefinedDictionary(aruco::DICT_6X6_250), params); // detect from different positions - for(double distance = 0.1; distance < 0.7; distance += 0.2) { + for(double distance : {0.1, 0.3, 0.5, 0.7}) { for(int pitch = 0; pitch < 360; pitch += (distance == 0.1? 60:180)) { for(int yaw = 70; yaw <= 120; yaw += 40){ int currentId = iter % 250; diff --git a/modules/objdetect/test/test_boarddetection.cpp b/modules/objdetect/test/test_boarddetection.cpp index d3859920fc..ed940069fb 100644 --- a/modules/objdetect/test/test_boarddetection.cpp +++ b/modules/objdetect/test/test_boarddetection.cpp @@ -51,7 +51,7 @@ void CV_ArucoBoardPose::run(int) { aruco::DetectorParameters detectorParameters = detector.getDetectorParameters(); // for different perspectives - for(double distance = 0.2; distance <= 0.4; distance += 0.15) { + for(double distance : {0.2, 0.35}) { for(int yaw = -55; yaw <= 50; yaw += 25) { for(int pitch = -55; pitch <= 50; pitch += 25) { vector tmpIds; @@ -162,7 +162,7 @@ void CV_ArucoRefine::run(int) { aruco::DetectorParameters detectorParameters = detector.getDetectorParameters(); // for different perspectives - for(double distance = 0.2; distance <= 0.4; distance += 0.2) { + for(double distance : {0.2, 0.4}) { for(int yaw = -60; yaw < 60; yaw += 30) { for(int pitch = -60; pitch <= 60; pitch += 30) { aruco::GridBoard gridboard(Size(3, 3), 0.02f, 0.005f, detector.getDictionary()); diff --git a/modules/objdetect/test/test_charucodetection.cpp b/modules/objdetect/test/test_charucodetection.cpp index ef044c893b..e99f9de262 100644 --- a/modules/objdetect/test/test_charucodetection.cpp +++ b/modules/objdetect/test/test_charucodetection.cpp @@ -109,7 +109,7 @@ void CV_CharucoDetection::run(int) { Mat distCoeffs(5, 1, CV_64FC1, Scalar::all(0)); // for different perspectives - for(double distance = 0.2; distance <= 0.4; distance += 0.2) { + for(double distance : {0.2, 0.4}) { for(int yaw = -55; yaw <= 50; yaw += 25) { for(int pitch = -55; pitch <= 50; pitch += 25) { @@ -213,7 +213,7 @@ void CV_CharucoPoseEstimation::run(int) { Mat distCoeffs(5, 1, CV_64FC1, Scalar::all(0)); // for different perspectives - for(double distance = 0.2; distance <= 0.3; distance += 0.1) { + for(double distance : {0.2, 0.3}) { for(int yaw = -55; yaw <= 50; yaw += 25) { for(int pitch = -55; pitch <= 50; pitch += 25) { @@ -244,6 +244,18 @@ void CV_CharucoPoseEstimation::run(int) { detector.setCharucoParameters(charucoParameters); detector.detectBoard(img, charucoCorners, charucoIds, corners, ids); } + + // // create debug images + // Mat rgb_image; + // cv::cvtColor(img, rgb_image, COLOR_GRAY2RGB); + // aruco::drawDetectedCornersCharuco(rgb_image, charucoCorners, charucoIds); + // aruco::drawDetectedMarkers(rgb_image, corners, ids); + // cv::imwrite("Debug_CV_CharucoPoseEstimation" + // + (legacyPattern ? std::string("_legacy") : std::string("")) + // + "_dist" + std::to_string(distance) + // + "_yaw" + std::to_string(yaw) + // + "_pitch" + std::to_string(pitch) + ".png", rgb_image); + ASSERT_EQ(ids.size(), board.getIds().size()); if(charucoIds.size() == 0) continue; @@ -311,7 +323,7 @@ void CV_CharucoDiamondDetection::run(int) { int iter = 0; Mat cameraMatrix = Mat::eye(3, 3, CV_64FC1); - Size imgSize(500, 500); + Size imgSize(750, 750); aruco::DetectorParameters params; params.minDistanceToBorder = 0; float squareLength = 0.03f; @@ -321,7 +333,7 @@ void CV_CharucoDiamondDetection::run(int) { aruco::CharucoDetector detector(board); - cameraMatrix.at(0, 0) = cameraMatrix.at< double >(1, 1) = 650; + cameraMatrix.at(0, 0) = cameraMatrix.at< double >(1, 1) = 1000; cameraMatrix.at(0, 2) = imgSize.width / 2; cameraMatrix.at(1, 2) = imgSize.height / 2; @@ -332,7 +344,7 @@ void CV_CharucoDiamondDetection::run(int) { detector.setCharucoParameters(charucoParameters); // for different perspectives - for(double distance = 0.2; distance <= 0.3; distance += 0.1) { + for(double distance : {0.2, 0.3}) { for(int yaw = -50; yaw <= 50; yaw += 25) { for(int pitch = -50; pitch <= 50; pitch += 25) { @@ -364,6 +376,16 @@ void CV_CharucoDiamondDetection::run(int) { detector.detectDiamonds(img, diamondCorners, diamondIds, corners, ids); + // // create debug images + // Mat rgb_image; + // cv::cvtColor(img, rgb_image, COLOR_GRAY2RGB); + // aruco::drawDetectedDiamonds(rgb_image, diamondCorners, diamondIds); + // aruco::drawDetectedMarkers(rgb_image, corners, ids); + // cv::imwrite(std::string("Debug_CV_CharucoDiamondDetection") + // + "_dist" + std::to_string(distance) + // + "_yaw" + std::to_string(yaw) + // + "_pitch" + std::to_string(pitch) + ".png", rgb_image); + // check detect if(ids.size() != 4) { ts->printf(cvtest::TS::LOG, "Not enough markers for diamond detection"); From f48939c2d73a22f7627640ec8e1bb312fa5b9dfe Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Fri, 17 Feb 2023 18:57:38 +0300 Subject: [PATCH 031/199] temporarily set "enable_precise_upscale=false" by default to avoid sporadic failures in regression tests (#23270) --- modules/features2d/include/opencv2/features2d.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 88c9dc9cc3..b528f72f83 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -289,11 +289,11 @@ public: @param enable_precise_upscale Whether to enable precise upscaling in the scale pyramid, which maps index \f$\texttt{x}\f$ to \f$\texttt{2x}\f$. This prevents localization bias. The option - to disable it (which is deprecated and issues a warning) is provided to keep the original behavior. + is disabled by default. */ CV_WRAP static Ptr create(int nfeatures = 0, int nOctaveLayers = 3, double contrastThreshold = 0.04, double edgeThreshold = 10, - double sigma = 1.6, bool enable_precise_upscale = true); + double sigma = 1.6, bool enable_precise_upscale = false); /** @brief Create SIFT with specified descriptorType. @param nfeatures The number of best features to retain. The features are ranked by their scores From ca48e217f1be1f77efc0a8e684d42b7ac899053d Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Sat, 18 Feb 2023 00:07:45 +0300 Subject: [PATCH 032/199] fixed another SIFT constructor (#23272) --- modules/features2d/include/opencv2/features2d.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index b528f72f83..98ae85f353 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -320,11 +320,11 @@ public: @param enable_precise_upscale Whether to enable precise upscaling in the scale pyramid, which maps index \f$\texttt{x}\f$ to \f$\texttt{2x}\f$. This prevents localization bias. The option - to disable it (which is deprecated and issues a warning) is provided to keep the original behavior. + is disabled by default. */ CV_WRAP static Ptr create(int nfeatures, int nOctaveLayers, double contrastThreshold, double edgeThreshold, - double sigma, int descriptorType, bool enable_precise_upscale = true); + double sigma, int descriptorType, bool enable_precise_upscale = false); CV_WRAP virtual String getDefaultName() const CV_OVERRIDE; From bdff0949bb7626fe117e6a654d903dd3b21534c2 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 18 Feb 2023 16:21:07 +0000 Subject: [PATCH 033/199] dnn(tflite): add 3rdparty flatbuffers with pre-generated schema --- 3rdparty/flatbuffers/LICENSE.txt | 202 + 3rdparty/flatbuffers/README.md | 1 + .../include/flatbuffers/allocator.h | 68 + .../flatbuffers/include/flatbuffers/array.h | 253 + .../flatbuffers/include/flatbuffers/base.h | 486 + .../flatbuffers/include/flatbuffers/buffer.h | 154 + .../include/flatbuffers/buffer_ref.h | 53 + .../include/flatbuffers/default_allocator.h | 64 + .../include/flatbuffers/detached_buffer.h | 114 + .../include/flatbuffers/flatbuffer_builder.h | 1225 ++ .../include/flatbuffers/flatbuffers.h | 272 + .../include/flatbuffers/stl_emulation.h | 510 + .../flatbuffers/include/flatbuffers/string.h | 64 + .../flatbuffers/include/flatbuffers/struct.h | 53 + .../flatbuffers/include/flatbuffers/table.h | 168 + .../flatbuffers/include/flatbuffers/vector.h | 393 + .../include/flatbuffers/vector_downward.h | 273 + .../include/flatbuffers/verifier.h | 317 + CMakeLists.txt | 4 +- cmake/OpenCVDetectFlatbuffers.cmake | 19 + cmake/OpenCVFindFlatBuffers.cmake | 15 - cmake/OpenCVUtils.cmake | 11 + modules/dnn/CMakeLists.txt | 37 +- modules/dnn/misc/tflite/schema_generated.h | 10543 ++++++++++++++++ modules/dnn/src/tflite/tflite_importer.cpp | 2 +- modules/dnn/test/test_tflite_importer.cpp | 28 +- modules/highgui/CMakeLists.txt | 11 +- modules/videoio/CMakeLists.txt | 11 +- 28 files changed, 15285 insertions(+), 66 deletions(-) create mode 100644 3rdparty/flatbuffers/LICENSE.txt create mode 100644 3rdparty/flatbuffers/README.md create mode 100644 3rdparty/flatbuffers/include/flatbuffers/allocator.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/array.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/base.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/buffer.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/buffer_ref.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/default_allocator.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/detached_buffer.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/flatbuffer_builder.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/flatbuffers.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/stl_emulation.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/string.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/struct.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/table.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/vector.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/vector_downward.h create mode 100644 3rdparty/flatbuffers/include/flatbuffers/verifier.h create mode 100644 cmake/OpenCVDetectFlatbuffers.cmake delete mode 100644 cmake/OpenCVFindFlatBuffers.cmake create mode 100644 modules/dnn/misc/tflite/schema_generated.h diff --git a/3rdparty/flatbuffers/LICENSE.txt b/3rdparty/flatbuffers/LICENSE.txt new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/3rdparty/flatbuffers/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/3rdparty/flatbuffers/README.md b/3rdparty/flatbuffers/README.md new file mode 100644 index 0000000000..26b78f4392 --- /dev/null +++ b/3rdparty/flatbuffers/README.md @@ -0,0 +1 @@ +Origin: https://github.com/google/flatbuffers/tree/v23.1.21 diff --git a/3rdparty/flatbuffers/include/flatbuffers/allocator.h b/3rdparty/flatbuffers/include/flatbuffers/allocator.h new file mode 100644 index 0000000000..30427190b6 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/allocator.h @@ -0,0 +1,68 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_ALLOCATOR_H_ +#define FLATBUFFERS_ALLOCATOR_H_ + +#include "flatbuffers/base.h" + +namespace flatbuffers { + +// Allocator interface. This is flatbuffers-specific and meant only for +// `vector_downward` usage. +class Allocator { + public: + virtual ~Allocator() {} + + // Allocate `size` bytes of memory. + virtual uint8_t *allocate(size_t size) = 0; + + // Deallocate `size` bytes of memory at `p` allocated by this allocator. + virtual void deallocate(uint8_t *p, size_t size) = 0; + + // Reallocate `new_size` bytes of memory, replacing the old region of size + // `old_size` at `p`. In contrast to a normal realloc, this grows downwards, + // and is intended specifcally for `vector_downward` use. + // `in_use_back` and `in_use_front` indicate how much of `old_size` is + // actually in use at each end, and needs to be copied. + virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size, + size_t new_size, size_t in_use_back, + size_t in_use_front) { + FLATBUFFERS_ASSERT(new_size > old_size); // vector_downward only grows + uint8_t *new_p = allocate(new_size); + memcpy_downward(old_p, old_size, new_p, new_size, in_use_back, + in_use_front); + deallocate(old_p, old_size); + return new_p; + } + + protected: + // Called by `reallocate_downward` to copy memory from `old_p` of `old_size` + // to `new_p` of `new_size`. Only memory of size `in_use_front` and + // `in_use_back` will be copied from the front and back of the old memory + // allocation. + void memcpy_downward(uint8_t *old_p, size_t old_size, uint8_t *new_p, + size_t new_size, size_t in_use_back, + size_t in_use_front) { + memcpy(new_p + new_size - in_use_back, old_p + old_size - in_use_back, + in_use_back); + memcpy(new_p, old_p, in_use_front); + } +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_ALLOCATOR_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/array.h b/3rdparty/flatbuffers/include/flatbuffers/array.h new file mode 100644 index 0000000000..2ff58c6fb5 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/array.h @@ -0,0 +1,253 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_ARRAY_H_ +#define FLATBUFFERS_ARRAY_H_ + +#include + +#include "flatbuffers/base.h" +#include "flatbuffers/stl_emulation.h" +#include "flatbuffers/vector.h" + +namespace flatbuffers { + +// This is used as a helper type for accessing arrays. +template class Array { + // Array can carry only POD data types (scalars or structs). + typedef typename flatbuffers::bool_constant::value> + scalar_tag; + typedef + typename flatbuffers::conditional::type + IndirectHelperType; + + public: + typedef uint16_t size_type; + typedef typename IndirectHelper::return_type return_type; + typedef VectorConstIterator const_iterator; + typedef VectorReverseIterator const_reverse_iterator; + + // If T is a LE-scalar or a struct (!scalar_tag::value). + static FLATBUFFERS_CONSTEXPR bool is_span_observable = + (scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1)) || + !scalar_tag::value; + + FLATBUFFERS_CONSTEXPR uint16_t size() const { return length; } + + return_type Get(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return IndirectHelper::Read(Data(), i); + } + + return_type operator[](uoffset_t i) const { return Get(i); } + + // If this is a Vector of enums, T will be its storage type, not the enum + // type. This function makes it convenient to retrieve value with enum + // type E. + template E GetEnum(uoffset_t i) const { + return static_cast(Get(i)); + } + + const_iterator begin() const { return const_iterator(Data(), 0); } + const_iterator end() const { return const_iterator(Data(), size()); } + + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + const_iterator cbegin() const { return begin(); } + const_iterator cend() const { return end(); } + + const_reverse_iterator crbegin() const { return rbegin(); } + const_reverse_iterator crend() const { return rend(); } + + // Get a mutable pointer to elements inside this array. + // This method used to mutate arrays of structs followed by a @p Mutate + // operation. For primitive types use @p Mutate directly. + // @warning Assignments and reads to/from the dereferenced pointer are not + // automatically converted to the correct endianness. + typename flatbuffers::conditional::type + GetMutablePointer(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return const_cast(&data()[i]); + } + + // Change elements if you have a non-const pointer to this object. + void Mutate(uoffset_t i, const T &val) { MutateImpl(scalar_tag(), i, val); } + + // The raw data in little endian format. Use with care. + const uint8_t *Data() const { return data_; } + + uint8_t *Data() { return data_; } + + // Similarly, but typed, much like std::vector::data + const T *data() const { return reinterpret_cast(Data()); } + T *data() { return reinterpret_cast(Data()); } + + // Copy data from a span with endian conversion. + // If this Array and the span overlap, the behavior is undefined. + void CopyFromSpan(flatbuffers::span src) { + const auto p1 = reinterpret_cast(src.data()); + const auto p2 = Data(); + FLATBUFFERS_ASSERT(!(p1 >= p2 && p1 < (p2 + length)) && + !(p2 >= p1 && p2 < (p1 + length))); + (void)p1; + (void)p2; + CopyFromSpanImpl(flatbuffers::bool_constant(), src); + } + + protected: + void MutateImpl(flatbuffers::true_type, uoffset_t i, const T &val) { + FLATBUFFERS_ASSERT(i < size()); + WriteScalar(data() + i, val); + } + + void MutateImpl(flatbuffers::false_type, uoffset_t i, const T &val) { + *(GetMutablePointer(i)) = val; + } + + void CopyFromSpanImpl(flatbuffers::true_type, + flatbuffers::span src) { + // Use std::memcpy() instead of std::copy() to avoid performance degradation + // due to aliasing if T is char or unsigned char. + // The size is known at compile time, so memcpy would be inlined. + std::memcpy(data(), src.data(), length * sizeof(T)); + } + + // Copy data from flatbuffers::span with endian conversion. + void CopyFromSpanImpl(flatbuffers::false_type, + flatbuffers::span src) { + for (size_type k = 0; k < length; k++) { Mutate(k, src[k]); } + } + + // This class is only used to access pre-existing data. Don't ever + // try to construct these manually. + // 'constexpr' allows us to use 'size()' at compile time. + // @note Must not use 'FLATBUFFERS_CONSTEXPR' here, as const is not allowed on + // a constructor. +#if defined(__cpp_constexpr) + constexpr Array(); +#else + Array(); +#endif + + uint8_t data_[length * sizeof(T)]; + + private: + // This class is a pointer. Copying will therefore create an invalid object. + // Private and unimplemented copy constructor. + Array(const Array &); + Array &operator=(const Array &); +}; + +// Specialization for Array[struct] with access using Offset pointer. +// This specialization used by idl_gen_text.cpp. +template class Array, length> { + static_assert(flatbuffers::is_same::value, "unexpected type T"); + + public: + typedef const void *return_type; + + const uint8_t *Data() const { return data_; } + + // Make idl_gen_text.cpp::PrintContainer happy. + return_type operator[](uoffset_t) const { + FLATBUFFERS_ASSERT(false); + return nullptr; + } + + private: + // This class is only used to access pre-existing data. + Array(); + Array(const Array &); + Array &operator=(const Array &); + + uint8_t data_[1]; +}; + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Array &arr) + FLATBUFFERS_NOEXCEPT { + static_assert( + Array::is_span_observable, + "wrong type U, only plain struct, LE-scalar, or byte types are allowed"); + return span(arr.data(), N); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( + const Array &arr) FLATBUFFERS_NOEXCEPT { + static_assert( + Array::is_span_observable, + "wrong type U, only plain struct, LE-scalar, or byte types are allowed"); + return span(arr.data(), N); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span +make_bytes_span(Array &arr) FLATBUFFERS_NOEXCEPT { + static_assert(Array::is_span_observable, + "internal error, Array might hold only scalars or structs"); + return span(arr.Data(), sizeof(U) * N); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span +make_bytes_span(const Array &arr) FLATBUFFERS_NOEXCEPT { + static_assert(Array::is_span_observable, + "internal error, Array might hold only scalars or structs"); + return span(arr.Data(), sizeof(U) * N); +} + +// Cast a raw T[length] to a raw flatbuffers::Array +// without endian conversion. Use with care. +// TODO: move these Cast-methods to `internal` namespace. +template +Array &CastToArray(T (&arr)[length]) { + return *reinterpret_cast *>(arr); +} + +template +const Array &CastToArray(const T (&arr)[length]) { + return *reinterpret_cast *>(arr); +} + +template +Array &CastToArrayOfEnum(T (&arr)[length]) { + static_assert(sizeof(E) == sizeof(T), "invalid enum type E"); + return *reinterpret_cast *>(arr); +} + +template +const Array &CastToArrayOfEnum(const T (&arr)[length]) { + static_assert(sizeof(E) == sizeof(T), "invalid enum type E"); + return *reinterpret_cast *>(arr); +} + +template +bool operator==(const Array &lhs, + const Array &rhs) noexcept { + return std::addressof(lhs) == std::addressof(rhs) || + (lhs.size() == rhs.size() && + std::memcmp(lhs.Data(), rhs.Data(), rhs.size() * sizeof(T)) == 0); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_ARRAY_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/base.h b/3rdparty/flatbuffers/include/flatbuffers/base.h new file mode 100644 index 0000000000..86688cc6e4 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/base.h @@ -0,0 +1,486 @@ +#ifndef FLATBUFFERS_BASE_H_ +#define FLATBUFFERS_BASE_H_ + +// clang-format off + +// If activate should be declared and included first. +#if defined(FLATBUFFERS_MEMORY_LEAK_TRACKING) && \ + defined(_MSC_VER) && defined(_DEBUG) + // The _CRTDBG_MAP_ALLOC inside will replace + // calloc/free (etc) to its debug version using #define directives. + #define _CRTDBG_MAP_ALLOC + #include + #include + // Replace operator new by trace-enabled version. + #define DEBUG_NEW new(_NORMAL_BLOCK, __FILE__, __LINE__) + #define new DEBUG_NEW +#endif + +#if !defined(FLATBUFFERS_ASSERT) +#include +#define FLATBUFFERS_ASSERT assert +#elif defined(FLATBUFFERS_ASSERT_INCLUDE) +// Include file with forward declaration +#include FLATBUFFERS_ASSERT_INCLUDE +#endif + +#ifndef ARDUINO +#include +#endif + +#include +#include +#include + +#if defined(ARDUINO) && !defined(ARDUINOSTL_M_H) && defined(__AVR__) + #include +#else + #include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__unix__) && !defined(FLATBUFFERS_LOCALE_INDEPENDENT) + #include +#endif + +#ifdef __ANDROID__ + #include +#endif + +#if defined(__ICCARM__) +#include +#endif + +// Note the __clang__ check is needed, because clang presents itself +// as an older GNUC compiler (4.2). +// Clang 3.3 and later implement all of the ISO C++ 2011 standard. +// Clang 3.4 and later implement all of the ISO C++ 2014 standard. +// http://clang.llvm.org/cxx_status.html + +// Note the MSVC value '__cplusplus' may be incorrect: +// The '__cplusplus' predefined macro in the MSVC stuck at the value 199711L, +// indicating (erroneously!) that the compiler conformed to the C++98 Standard. +// This value should be correct starting from MSVC2017-15.7-Preview-3. +// The '__cplusplus' will be valid only if MSVC2017-15.7-P3 and the `/Zc:__cplusplus` switch is set. +// Workaround (for details see MSDN): +// Use the _MSC_VER and _MSVC_LANG definition instead of the __cplusplus for compatibility. +// The _MSVC_LANG macro reports the Standard version regardless of the '/Zc:__cplusplus' switch. + +#if defined(__GNUC__) && !defined(__clang__) + #define FLATBUFFERS_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#else + #define FLATBUFFERS_GCC 0 +#endif + +#if defined(__clang__) + #define FLATBUFFERS_CLANG (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#else + #define FLATBUFFERS_CLANG 0 +#endif + +/// @cond FLATBUFFERS_INTERNAL +#if __cplusplus <= 199711L && \ + (!defined(_MSC_VER) || _MSC_VER < 1600) && \ + (!defined(__GNUC__) || \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40400)) + #error A C++11 compatible compiler with support for the auto typing is \ + required for FlatBuffers. + #error __cplusplus _MSC_VER __GNUC__ __GNUC_MINOR__ __GNUC_PATCHLEVEL__ +#endif + +#if !defined(__clang__) && \ + defined(__GNUC__) && \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ < 40600) + // Backwards compatibility for g++ 4.4, and 4.5 which don't have the nullptr + // and constexpr keywords. Note the __clang__ check is needed, because clang + // presents itself as an older GNUC compiler. + #ifndef nullptr_t + const class nullptr_t { + public: + template inline operator T*() const { return 0; } + private: + void operator&() const; + } nullptr = {}; + #endif + #ifndef constexpr + #define constexpr const + #endif +#endif + +// The wire format uses a little endian encoding (since that's efficient for +// the common platforms). +#if defined(__s390x__) + #define FLATBUFFERS_LITTLEENDIAN 0 +#endif // __s390x__ +#if !defined(FLATBUFFERS_LITTLEENDIAN) + #if defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__) + #if (defined(__BIG_ENDIAN__) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define FLATBUFFERS_LITTLEENDIAN 0 + #else + #define FLATBUFFERS_LITTLEENDIAN 1 + #endif // __BIG_ENDIAN__ + #elif defined(_MSC_VER) + #if defined(_M_PPC) + #define FLATBUFFERS_LITTLEENDIAN 0 + #else + #define FLATBUFFERS_LITTLEENDIAN 1 + #endif + #else + #error Unable to determine endianness, define FLATBUFFERS_LITTLEENDIAN. + #endif +#endif // !defined(FLATBUFFERS_LITTLEENDIAN) + +#define FLATBUFFERS_VERSION_MAJOR 23 +#define FLATBUFFERS_VERSION_MINOR 1 +#define FLATBUFFERS_VERSION_REVISION 21 +#define FLATBUFFERS_STRING_EXPAND(X) #X +#define FLATBUFFERS_STRING(X) FLATBUFFERS_STRING_EXPAND(X) +namespace flatbuffers { + // Returns version as string "MAJOR.MINOR.REVISION". + const char* FLATBUFFERS_VERSION(); +} + +#if (!defined(_MSC_VER) || _MSC_VER > 1600) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 407)) || \ + defined(__clang__) + #define FLATBUFFERS_FINAL_CLASS final + #define FLATBUFFERS_OVERRIDE override + #define FLATBUFFERS_EXPLICIT_CPP11 explicit + #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE : flatbuffers::voffset_t +#else + #define FLATBUFFERS_FINAL_CLASS + #define FLATBUFFERS_OVERRIDE + #define FLATBUFFERS_EXPLICIT_CPP11 + #define FLATBUFFERS_VTABLE_UNDERLYING_TYPE +#endif + +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \ + (defined(__cpp_constexpr) && __cpp_constexpr >= 200704) + #define FLATBUFFERS_CONSTEXPR constexpr + #define FLATBUFFERS_CONSTEXPR_CPP11 constexpr + #define FLATBUFFERS_CONSTEXPR_DEFINED +#else + #define FLATBUFFERS_CONSTEXPR const + #define FLATBUFFERS_CONSTEXPR_CPP11 +#endif + +#if (defined(__cplusplus) && __cplusplus >= 201402L) || \ + (defined(__cpp_constexpr) && __cpp_constexpr >= 201304) + #define FLATBUFFERS_CONSTEXPR_CPP14 FLATBUFFERS_CONSTEXPR_CPP11 +#else + #define FLATBUFFERS_CONSTEXPR_CPP14 +#endif + +#if (defined(__GXX_EXPERIMENTAL_CXX0X__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406)) || \ + (defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023026)) || \ + defined(__clang__) + #define FLATBUFFERS_NOEXCEPT noexcept +#else + #define FLATBUFFERS_NOEXCEPT +#endif + +// NOTE: the FLATBUFFERS_DELETE_FUNC macro may change the access mode to +// private, so be sure to put it at the end or reset access mode explicitly. +#if (!defined(_MSC_VER) || _MSC_FULL_VER >= 180020827) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 404)) || \ + defined(__clang__) + #define FLATBUFFERS_DELETE_FUNC(func) func = delete +#else + #define FLATBUFFERS_DELETE_FUNC(func) private: func +#endif + +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && \ + (!defined(__GNUC__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \ + defined(__clang__) + #define FLATBUFFERS_DEFAULT_DECLARATION +#endif + +// Check if we can use template aliases +// Not possible if Microsoft Compiler before 2012 +// Possible is the language feature __cpp_alias_templates is defined well +// Or possible if the C++ std is C+11 or newer +#if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \ + || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \ + || (defined(__cplusplus) && __cplusplus >= 201103L) + #define FLATBUFFERS_TEMPLATES_ALIASES +#endif + +#ifndef FLATBUFFERS_HAS_STRING_VIEW + // Only provide flatbuffers::string_view if __has_include can be used + // to detect a header that provides an implementation + #if defined(__has_include) + // Check for std::string_view (in c++17) + #if __has_include() && (__cplusplus >= 201606 || (defined(_HAS_CXX17) && _HAS_CXX17)) + #include + namespace flatbuffers { + typedef std::string_view string_view; + } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + // Check for std::experimental::string_view (in c++14, compiler-dependent) + #elif __has_include() && (__cplusplus >= 201411) + #include + namespace flatbuffers { + typedef std::experimental::string_view string_view; + } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + // Check for absl::string_view + #elif __has_include("absl/strings/string_view.h") + #include "absl/strings/string_view.h" + namespace flatbuffers { + typedef absl::string_view string_view; + } + #define FLATBUFFERS_HAS_STRING_VIEW 1 + #endif + #endif // __has_include +#endif // !FLATBUFFERS_HAS_STRING_VIEW + +#ifndef FLATBUFFERS_GENERAL_HEAP_ALLOC_OK + // Allow heap allocations to be used + #define FLATBUFFERS_GENERAL_HEAP_ALLOC_OK 1 +#endif // !FLATBUFFERS_GENERAL_HEAP_ALLOC_OK + +#ifndef FLATBUFFERS_HAS_NEW_STRTOD + // Modern (C++11) strtod and strtof functions are available for use. + // 1) nan/inf strings as argument of strtod; + // 2) hex-float as argument of strtod/strtof. + #if (defined(_MSC_VER) && _MSC_VER >= 1900) || \ + (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409)) || \ + (defined(__clang__)) + #define FLATBUFFERS_HAS_NEW_STRTOD 1 + #endif +#endif // !FLATBUFFERS_HAS_NEW_STRTOD + +#ifndef FLATBUFFERS_LOCALE_INDEPENDENT + // Enable locale independent functions {strtof_l, strtod_l,strtoll_l, + // strtoull_l}. + #if (defined(_MSC_VER) && _MSC_VER >= 1800) || \ + (defined(__ANDROID_API__) && __ANDROID_API__>= 21) || \ + (defined(_XOPEN_VERSION) && (_XOPEN_VERSION >= 700)) && \ + (!defined(__Fuchsia__) && !defined(__ANDROID_API__)) + #define FLATBUFFERS_LOCALE_INDEPENDENT 1 + #else + #define FLATBUFFERS_LOCALE_INDEPENDENT 0 + #endif +#endif // !FLATBUFFERS_LOCALE_INDEPENDENT + +// Suppress Undefined Behavior Sanitizer (recoverable only). Usage: +// - __suppress_ubsan__("undefined") +// - __suppress_ubsan__("signed-integer-overflow") +#if defined(__clang__) && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >=7)) + #define __suppress_ubsan__(type) __attribute__((no_sanitize(type))) +#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 409) + #define __suppress_ubsan__(type) __attribute__((no_sanitize_undefined)) +#else + #define __suppress_ubsan__(type) +#endif + +// This is constexpr function used for checking compile-time constants. +// Avoid `#pragma warning(disable: 4127) // C4127: expression is constant`. +template FLATBUFFERS_CONSTEXPR inline bool IsConstTrue(T t) { + return !!t; +} + +// Enable C++ attribute [[]] if std:c++17 or higher. +#if ((__cplusplus >= 201703L) \ + || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L))) + // All attributes unknown to an implementation are ignored without causing an error. + #define FLATBUFFERS_ATTRIBUTE(attr) attr + + #define FLATBUFFERS_FALLTHROUGH() [[fallthrough]] +#else + #define FLATBUFFERS_ATTRIBUTE(attr) + + #if FLATBUFFERS_CLANG >= 30800 + #define FLATBUFFERS_FALLTHROUGH() [[clang::fallthrough]] + #elif FLATBUFFERS_GCC >= 70300 + #define FLATBUFFERS_FALLTHROUGH() [[gnu::fallthrough]] + #else + #define FLATBUFFERS_FALLTHROUGH() + #endif +#endif + +/// @endcond + +/// @file +namespace flatbuffers { + +/// @cond FLATBUFFERS_INTERNAL +// Our default offset / size type, 32bit on purpose on 64bit systems. +// Also, using a consistent offset type maintains compatibility of serialized +// offset values between 32bit and 64bit systems. +typedef uint32_t uoffset_t; + +// Signed offsets for references that can go in both directions. +typedef int32_t soffset_t; + +// Offset/index used in v-tables, can be changed to uint8_t in +// format forks to save a bit of space if desired. +typedef uint16_t voffset_t; + +typedef uintmax_t largest_scalar_t; + +// In 32bits, this evaluates to 2GB - 1 +#define FLATBUFFERS_MAX_BUFFER_SIZE ((1ULL << (sizeof(::flatbuffers::soffset_t) * 8 - 1)) - 1) + +// The minimum size buffer that can be a valid flatbuffer. +// Includes the offset to the root table (uoffset_t), the offset to the vtable +// of the root table (soffset_t), the size of the vtable (uint16_t), and the +// size of the referring table (uint16_t). +#define FLATBUFFERS_MIN_BUFFER_SIZE sizeof(uoffset_t) + sizeof(soffset_t) + \ + sizeof(uint16_t) + sizeof(uint16_t) + +// We support aligning the contents of buffers up to this size. +#ifndef FLATBUFFERS_MAX_ALIGNMENT + #define FLATBUFFERS_MAX_ALIGNMENT 32 +#endif + +/// @brief The length of a FlatBuffer file header. +static const size_t kFileIdentifierLength = 4; + +inline bool VerifyAlignmentRequirements(size_t align, size_t min_align = 1) { + return (min_align <= align) && (align <= (FLATBUFFERS_MAX_ALIGNMENT)) && + (align & (align - 1)) == 0; // must be power of 2 +} + +#if defined(_MSC_VER) + #pragma warning(disable: 4351) // C4351: new behavior: elements of array ... will be default initialized + #pragma warning(push) + #pragma warning(disable: 4127) // C4127: conditional expression is constant +#endif + +template T EndianSwap(T t) { + #if defined(_MSC_VER) + #define FLATBUFFERS_BYTESWAP16 _byteswap_ushort + #define FLATBUFFERS_BYTESWAP32 _byteswap_ulong + #define FLATBUFFERS_BYTESWAP64 _byteswap_uint64 + #elif defined(__ICCARM__) + #define FLATBUFFERS_BYTESWAP16 __REV16 + #define FLATBUFFERS_BYTESWAP32 __REV + #define FLATBUFFERS_BYTESWAP64(x) \ + ((__REV(static_cast(x >> 32U))) | (static_cast(__REV(static_cast(x)))) << 32U) + #else + #if defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ < 408 && !defined(__clang__) + // __builtin_bswap16 was missing prior to GCC 4.8. + #define FLATBUFFERS_BYTESWAP16(x) \ + static_cast(__builtin_bswap32(static_cast(x) << 16)) + #else + #define FLATBUFFERS_BYTESWAP16 __builtin_bswap16 + #endif + #define FLATBUFFERS_BYTESWAP32 __builtin_bswap32 + #define FLATBUFFERS_BYTESWAP64 __builtin_bswap64 + #endif + if (sizeof(T) == 1) { // Compile-time if-then's. + return t; + } else if (sizeof(T) == 2) { + union { T t; uint16_t i; } u = { t }; + u.i = FLATBUFFERS_BYTESWAP16(u.i); + return u.t; + } else if (sizeof(T) == 4) { + union { T t; uint32_t i; } u = { t }; + u.i = FLATBUFFERS_BYTESWAP32(u.i); + return u.t; + } else if (sizeof(T) == 8) { + union { T t; uint64_t i; } u = { t }; + u.i = FLATBUFFERS_BYTESWAP64(u.i); + return u.t; + } else { + FLATBUFFERS_ASSERT(0); + return t; + } +} + +#if defined(_MSC_VER) + #pragma warning(pop) +#endif + + +template T EndianScalar(T t) { + #if FLATBUFFERS_LITTLEENDIAN + return t; + #else + return EndianSwap(t); + #endif +} + +template +// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details. +__suppress_ubsan__("alignment") +T ReadScalar(const void *p) { + return EndianScalar(*reinterpret_cast(p)); +} + +// See https://github.com/google/flatbuffers/issues/5950 + +#if (FLATBUFFERS_GCC >= 100000) && (FLATBUFFERS_GCC < 110000) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif + +template +// UBSAN: C++ aliasing type rules, see std::bit_cast<> for details. +__suppress_ubsan__("alignment") +void WriteScalar(void *p, T t) { + *reinterpret_cast(p) = EndianScalar(t); +} + +template struct Offset; +template __suppress_ubsan__("alignment") void WriteScalar(void *p, Offset t) { + *reinterpret_cast(p) = EndianScalar(t.o); +} + +#if (FLATBUFFERS_GCC >= 100000) && (FLATBUFFERS_GCC < 110000) + #pragma GCC diagnostic pop +#endif + +// Computes how many bytes you'd have to pad to be able to write an +// "scalar_size" scalar if the buffer had grown to "buf_size" (downwards in +// memory). +__suppress_ubsan__("unsigned-integer-overflow") +inline size_t PaddingBytes(size_t buf_size, size_t scalar_size) { + return ((~buf_size) + 1) & (scalar_size - 1); +} + +// Generic 'operator==' with conditional specialisations. +// T e - new value of a scalar field. +// T def - default of scalar (is known at compile-time). +template inline bool IsTheSameAs(T e, T def) { return e == def; } + +#if defined(FLATBUFFERS_NAN_DEFAULTS) && \ + defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0) +// Like `operator==(e, def)` with weak NaN if T=(float|double). +template inline bool IsFloatTheSameAs(T e, T def) { + return (e == def) || ((def != def) && (e != e)); +} +template<> inline bool IsTheSameAs(float e, float def) { + return IsFloatTheSameAs(e, def); +} +template<> inline bool IsTheSameAs(double e, double def) { + return IsFloatTheSameAs(e, def); +} +#endif + +// Check 'v' is out of closed range [low; high]. +// Workaround for GCC warning [-Werror=type-limits]: +// comparison is always true due to limited range of data type. +template +inline bool IsOutRange(const T &v, const T &low, const T &high) { + return (v < low) || (high < v); +} + +// Check 'v' is in closed range [low; high]. +template +inline bool IsInRange(const T &v, const T &low, const T &high) { + return !IsOutRange(v, low, high); +} + +} // namespace flatbuffers +#endif // FLATBUFFERS_BASE_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/buffer.h b/3rdparty/flatbuffers/include/flatbuffers/buffer.h new file mode 100644 index 0000000000..e26a153c3f --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/buffer.h @@ -0,0 +1,154 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_BUFFER_H_ +#define FLATBUFFERS_BUFFER_H_ + +#include + +#include "flatbuffers/base.h" + +namespace flatbuffers { + +// Wrapper for uoffset_t to allow safe template specialization. +// Value is allowed to be 0 to indicate a null object (see e.g. AddOffset). +template struct Offset { + uoffset_t o; + Offset() : o(0) {} + Offset(uoffset_t _o) : o(_o) {} + Offset Union() const { return Offset(o); } + bool IsNull() const { return !o; } +}; + +inline void EndianCheck() { + int endiantest = 1; + // If this fails, see FLATBUFFERS_LITTLEENDIAN above. + FLATBUFFERS_ASSERT(*reinterpret_cast(&endiantest) == + FLATBUFFERS_LITTLEENDIAN); + (void)endiantest; +} + +template FLATBUFFERS_CONSTEXPR size_t AlignOf() { + // clang-format off + #ifdef _MSC_VER + return __alignof(T); + #else + #ifndef alignof + return __alignof__(T); + #else + return alignof(T); + #endif + #endif + // clang-format on +} + +// Lexicographically compare two strings (possibly containing nulls), and +// return true if the first is less than the second. +static inline bool StringLessThan(const char *a_data, uoffset_t a_size, + const char *b_data, uoffset_t b_size) { + const auto cmp = memcmp(a_data, b_data, (std::min)(a_size, b_size)); + return cmp == 0 ? a_size < b_size : cmp < 0; +} + +// When we read serialized data from memory, in the case of most scalars, +// we want to just read T, but in the case of Offset, we want to actually +// perform the indirection and return a pointer. +// The template specialization below does just that. +// It is wrapped in a struct since function templates can't overload on the +// return type like this. +// The typedef is for the convenience of callers of this function +// (avoiding the need for a trailing return decltype) +template struct IndirectHelper { + typedef T return_type; + typedef T mutable_return_type; + static const size_t element_stride = sizeof(T); + static return_type Read(const uint8_t *p, uoffset_t i) { + return EndianScalar((reinterpret_cast(p))[i]); + } + static return_type Read(uint8_t *p, uoffset_t i) { + return Read(const_cast(p), i); + } +}; +template struct IndirectHelper> { + typedef const T *return_type; + typedef T *mutable_return_type; + static const size_t element_stride = sizeof(uoffset_t); + static return_type Read(const uint8_t *p, uoffset_t i) { + p += i * sizeof(uoffset_t); + return reinterpret_cast(p + ReadScalar(p)); + } + static mutable_return_type Read(uint8_t *p, uoffset_t i) { + p += i * sizeof(uoffset_t); + return reinterpret_cast(p + ReadScalar(p)); + } +}; +template struct IndirectHelper { + typedef const T *return_type; + typedef T *mutable_return_type; + static const size_t element_stride = sizeof(T); + static return_type Read(const uint8_t *p, uoffset_t i) { + return reinterpret_cast(p + i * sizeof(T)); + } + static mutable_return_type Read(uint8_t *p, uoffset_t i) { + return reinterpret_cast(p + i * sizeof(T)); + } +}; + +/// @brief Get a pointer to the file_identifier section of the buffer. +/// @return Returns a const char pointer to the start of the file_identifier +/// characters in the buffer. The returned char * has length +/// 'flatbuffers::FlatBufferBuilder::kFileIdentifierLength'. +/// This function is UNDEFINED for FlatBuffers whose schema does not include +/// a file_identifier (likely points at padding or the start of a the root +/// vtable). +inline const char *GetBufferIdentifier(const void *buf, + bool size_prefixed = false) { + return reinterpret_cast(buf) + + ((size_prefixed) ? 2 * sizeof(uoffset_t) : sizeof(uoffset_t)); +} + +// Helper to see if the identifier in a buffer has the expected value. +inline bool BufferHasIdentifier(const void *buf, const char *identifier, + bool size_prefixed = false) { + return strncmp(GetBufferIdentifier(buf, size_prefixed), identifier, + flatbuffers::kFileIdentifierLength) == 0; +} + +/// @cond FLATBUFFERS_INTERNAL +// Helpers to get a typed pointer to the root object contained in the buffer. +template T *GetMutableRoot(void *buf) { + EndianCheck(); + return reinterpret_cast( + reinterpret_cast(buf) + + EndianScalar(*reinterpret_cast(buf))); +} + +template T *GetMutableSizePrefixedRoot(void *buf) { + return GetMutableRoot(reinterpret_cast(buf) + + sizeof(uoffset_t)); +} + +template const T *GetRoot(const void *buf) { + return GetMutableRoot(const_cast(buf)); +} + +template const T *GetSizePrefixedRoot(const void *buf) { + return GetRoot(reinterpret_cast(buf) + sizeof(uoffset_t)); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_BUFFER_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/buffer_ref.h b/3rdparty/flatbuffers/include/flatbuffers/buffer_ref.h new file mode 100644 index 0000000000..f70941fc64 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/buffer_ref.h @@ -0,0 +1,53 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_BUFFER_REF_H_ +#define FLATBUFFERS_BUFFER_REF_H_ + +#include "flatbuffers/base.h" +#include "flatbuffers/verifier.h" + +namespace flatbuffers { + +// Convenient way to bundle a buffer and its length, to pass it around +// typed by its root. +// A BufferRef does not own its buffer. +struct BufferRefBase {}; // for std::is_base_of + +template struct BufferRef : BufferRefBase { + BufferRef() : buf(nullptr), len(0), must_free(false) {} + BufferRef(uint8_t *_buf, uoffset_t _len) + : buf(_buf), len(_len), must_free(false) {} + + ~BufferRef() { + if (must_free) free(buf); + } + + const T *GetRoot() const { return flatbuffers::GetRoot(buf); } + + bool Verify() { + Verifier verifier(buf, len); + return verifier.VerifyBuffer(nullptr); + } + + uint8_t *buf; + uoffset_t len; + bool must_free; +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_BUFFER_REF_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/default_allocator.h b/3rdparty/flatbuffers/include/flatbuffers/default_allocator.h new file mode 100644 index 0000000000..d4724122cb --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/default_allocator.h @@ -0,0 +1,64 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_DEFAULT_ALLOCATOR_H_ +#define FLATBUFFERS_DEFAULT_ALLOCATOR_H_ + +#include "flatbuffers/allocator.h" +#include "flatbuffers/base.h" + +namespace flatbuffers { + +// DefaultAllocator uses new/delete to allocate memory regions +class DefaultAllocator : public Allocator { + public: + uint8_t *allocate(size_t size) FLATBUFFERS_OVERRIDE { + return new uint8_t[size]; + } + + void deallocate(uint8_t *p, size_t) FLATBUFFERS_OVERRIDE { delete[] p; } + + static void dealloc(void *p, size_t) { delete[] static_cast(p); } +}; + +// These functions allow for a null allocator to mean use the default allocator, +// as used by DetachedBuffer and vector_downward below. +// This is to avoid having a statically or dynamically allocated default +// allocator, or having to move it between the classes that may own it. +inline uint8_t *Allocate(Allocator *allocator, size_t size) { + return allocator ? allocator->allocate(size) + : DefaultAllocator().allocate(size); +} + +inline void Deallocate(Allocator *allocator, uint8_t *p, size_t size) { + if (allocator) + allocator->deallocate(p, size); + else + DefaultAllocator().deallocate(p, size); +} + +inline uint8_t *ReallocateDownward(Allocator *allocator, uint8_t *old_p, + size_t old_size, size_t new_size, + size_t in_use_back, size_t in_use_front) { + return allocator ? allocator->reallocate_downward(old_p, old_size, new_size, + in_use_back, in_use_front) + : DefaultAllocator().reallocate_downward( + old_p, old_size, new_size, in_use_back, in_use_front); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_DEFAULT_ALLOCATOR_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/detached_buffer.h b/3rdparty/flatbuffers/include/flatbuffers/detached_buffer.h new file mode 100644 index 0000000000..5e900baeb5 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/detached_buffer.h @@ -0,0 +1,114 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_DETACHED_BUFFER_H_ +#define FLATBUFFERS_DETACHED_BUFFER_H_ + +#include "flatbuffers/allocator.h" +#include "flatbuffers/base.h" +#include "flatbuffers/default_allocator.h" + +namespace flatbuffers { + +// DetachedBuffer is a finished flatbuffer memory region, detached from its +// builder. The original memory region and allocator are also stored so that +// the DetachedBuffer can manage the memory lifetime. +class DetachedBuffer { + public: + DetachedBuffer() + : allocator_(nullptr), + own_allocator_(false), + buf_(nullptr), + reserved_(0), + cur_(nullptr), + size_(0) {} + + DetachedBuffer(Allocator *allocator, bool own_allocator, uint8_t *buf, + size_t reserved, uint8_t *cur, size_t sz) + : allocator_(allocator), + own_allocator_(own_allocator), + buf_(buf), + reserved_(reserved), + cur_(cur), + size_(sz) {} + + DetachedBuffer(DetachedBuffer &&other) noexcept + : allocator_(other.allocator_), + own_allocator_(other.own_allocator_), + buf_(other.buf_), + reserved_(other.reserved_), + cur_(other.cur_), + size_(other.size_) { + other.reset(); + } + + DetachedBuffer &operator=(DetachedBuffer &&other) noexcept { + if (this == &other) return *this; + + destroy(); + + allocator_ = other.allocator_; + own_allocator_ = other.own_allocator_; + buf_ = other.buf_; + reserved_ = other.reserved_; + cur_ = other.cur_; + size_ = other.size_; + + other.reset(); + + return *this; + } + + ~DetachedBuffer() { destroy(); } + + const uint8_t *data() const { return cur_; } + + uint8_t *data() { return cur_; } + + size_t size() const { return size_; } + + // These may change access mode, leave these at end of public section + FLATBUFFERS_DELETE_FUNC(DetachedBuffer(const DetachedBuffer &other)); + FLATBUFFERS_DELETE_FUNC( + DetachedBuffer &operator=(const DetachedBuffer &other)); + + protected: + Allocator *allocator_; + bool own_allocator_; + uint8_t *buf_; + size_t reserved_; + uint8_t *cur_; + size_t size_; + + inline void destroy() { + if (buf_) Deallocate(allocator_, buf_, reserved_); + if (own_allocator_ && allocator_) { delete allocator_; } + reset(); + } + + inline void reset() { + allocator_ = nullptr; + own_allocator_ = false; + buf_ = nullptr; + reserved_ = 0; + cur_ = nullptr; + size_ = 0; + } +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_DETACHED_BUFFER_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/flatbuffer_builder.h b/3rdparty/flatbuffers/include/flatbuffers/flatbuffer_builder.h new file mode 100644 index 0000000000..a1d3d60a79 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/flatbuffer_builder.h @@ -0,0 +1,1225 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_FLATBUFFER_BUILDER_H_ +#define FLATBUFFERS_FLATBUFFER_BUILDER_H_ + +#include +#include +#include + +#include "flatbuffers/allocator.h" +#include "flatbuffers/array.h" +#include "flatbuffers/base.h" +#include "flatbuffers/buffer_ref.h" +#include "flatbuffers/default_allocator.h" +#include "flatbuffers/detached_buffer.h" +#include "flatbuffers/stl_emulation.h" +#include "flatbuffers/string.h" +#include "flatbuffers/struct.h" +#include "flatbuffers/table.h" +#include "flatbuffers/vector.h" +#include "flatbuffers/vector_downward.h" +#include "flatbuffers/verifier.h" + +namespace flatbuffers { + +// Converts a Field ID to a virtual table offset. +inline voffset_t FieldIndexToOffset(voffset_t field_id) { + // Should correspond to what EndTable() below builds up. + const int fixed_fields = 2; // Vtable size and Object Size. + return static_cast((field_id + fixed_fields) * sizeof(voffset_t)); +} + +template> +const T *data(const std::vector &v) { + // Eventually the returned pointer gets passed down to memcpy, so + // we need it to be non-null to avoid undefined behavior. + static uint8_t t; + return v.empty() ? reinterpret_cast(&t) : &v.front(); +} +template> +T *data(std::vector &v) { + // Eventually the returned pointer gets passed down to memcpy, so + // we need it to be non-null to avoid undefined behavior. + static uint8_t t; + return v.empty() ? reinterpret_cast(&t) : &v.front(); +} + +/// @addtogroup flatbuffers_cpp_api +/// @{ +/// @class FlatBufferBuilder +/// @brief Helper class to hold data needed in creation of a FlatBuffer. +/// To serialize data, you typically call one of the `Create*()` functions in +/// the generated code, which in turn call a sequence of `StartTable`/ +/// `PushElement`/`AddElement`/`EndTable`, or the builtin `CreateString`/ +/// `CreateVector` functions. Do this is depth-first order to build up a tree to +/// the root. `Finish()` wraps up the buffer ready for transport. +class FlatBufferBuilder { + public: + /// @brief Default constructor for FlatBufferBuilder. + /// @param[in] initial_size The initial size of the buffer, in bytes. Defaults + /// to `1024`. + /// @param[in] allocator An `Allocator` to use. If null will use + /// `DefaultAllocator`. + /// @param[in] own_allocator Whether the builder/vector should own the + /// allocator. Defaults to / `false`. + /// @param[in] buffer_minalign Force the buffer to be aligned to the given + /// minimum alignment upon reallocation. Only needed if you intend to store + /// types with custom alignment AND you wish to read the buffer in-place + /// directly after creation. + explicit FlatBufferBuilder( + size_t initial_size = 1024, Allocator *allocator = nullptr, + bool own_allocator = false, + size_t buffer_minalign = AlignOf()) + : buf_(initial_size, allocator, own_allocator, buffer_minalign), + num_field_loc(0), + max_voffset_(0), + nested(false), + finished(false), + minalign_(1), + force_defaults_(false), + dedup_vtables_(true), + string_pool(nullptr) { + EndianCheck(); + } + + /// @brief Move constructor for FlatBufferBuilder. + FlatBufferBuilder(FlatBufferBuilder &&other) noexcept + : buf_(1024, nullptr, false, AlignOf()), + num_field_loc(0), + max_voffset_(0), + nested(false), + finished(false), + minalign_(1), + force_defaults_(false), + dedup_vtables_(true), + string_pool(nullptr) { + EndianCheck(); + // Default construct and swap idiom. + // Lack of delegating constructors in vs2010 makes it more verbose than + // needed. + Swap(other); + } + + /// @brief Move assignment operator for FlatBufferBuilder. + FlatBufferBuilder &operator=(FlatBufferBuilder &&other) noexcept { + // Move construct a temporary and swap idiom + FlatBufferBuilder temp(std::move(other)); + Swap(temp); + return *this; + } + + void Swap(FlatBufferBuilder &other) { + using std::swap; + buf_.swap(other.buf_); + swap(num_field_loc, other.num_field_loc); + swap(max_voffset_, other.max_voffset_); + swap(nested, other.nested); + swap(finished, other.finished); + swap(minalign_, other.minalign_); + swap(force_defaults_, other.force_defaults_); + swap(dedup_vtables_, other.dedup_vtables_); + swap(string_pool, other.string_pool); + } + + ~FlatBufferBuilder() { + if (string_pool) delete string_pool; + } + + void Reset() { + Clear(); // clear builder state + buf_.reset(); // deallocate buffer + } + + /// @brief Reset all the state in this FlatBufferBuilder so it can be reused + /// to construct another buffer. + void Clear() { + ClearOffsets(); + buf_.clear(); + nested = false; + finished = false; + minalign_ = 1; + if (string_pool) string_pool->clear(); + } + + /// @brief The current size of the serialized buffer, counting from the end. + /// @return Returns an `uoffset_t` with the current size of the buffer. + uoffset_t GetSize() const { return buf_.size(); } + + /// @brief Get the serialized buffer (after you call `Finish()`). + /// @return Returns an `uint8_t` pointer to the FlatBuffer data inside the + /// buffer. + uint8_t *GetBufferPointer() const { + Finished(); + return buf_.data(); + } + + /// @brief Get the serialized buffer (after you call `Finish()`) as a span. + /// @return Returns a constructed flatbuffers::span that is a view over the + /// FlatBuffer data inside the buffer. + flatbuffers::span GetBufferSpan() const { + Finished(); + return flatbuffers::span(buf_.data(), buf_.size()); + } + + /// @brief Get a pointer to an unfinished buffer. + /// @return Returns a `uint8_t` pointer to the unfinished buffer. + uint8_t *GetCurrentBufferPointer() const { return buf_.data(); } + + /// @brief Get the released pointer to the serialized buffer. + /// @warning Do NOT attempt to use this FlatBufferBuilder afterwards! + /// @return A `FlatBuffer` that owns the buffer and its allocator and + /// behaves similar to a `unique_ptr` with a deleter. + FLATBUFFERS_ATTRIBUTE([[deprecated("use Release() instead")]]) + DetachedBuffer ReleaseBufferPointer() { + Finished(); + return buf_.release(); + } + + /// @brief Get the released DetachedBuffer. + /// @return A `DetachedBuffer` that owns the buffer and its allocator. + DetachedBuffer Release() { + Finished(); + return buf_.release(); + } + + /// @brief Get the released pointer to the serialized buffer. + /// @param size The size of the memory block containing + /// the serialized `FlatBuffer`. + /// @param offset The offset from the released pointer where the finished + /// `FlatBuffer` starts. + /// @return A raw pointer to the start of the memory block containing + /// the serialized `FlatBuffer`. + /// @remark If the allocator is owned, it gets deleted when the destructor is + /// called.. + uint8_t *ReleaseRaw(size_t &size, size_t &offset) { + Finished(); + return buf_.release_raw(size, offset); + } + + /// @brief get the minimum alignment this buffer needs to be accessed + /// properly. This is only known once all elements have been written (after + /// you call Finish()). You can use this information if you need to embed + /// a FlatBuffer in some other buffer, such that you can later read it + /// without first having to copy it into its own buffer. + size_t GetBufferMinAlignment() const { + Finished(); + return minalign_; + } + + /// @cond FLATBUFFERS_INTERNAL + void Finished() const { + // If you get this assert, you're attempting to get access a buffer + // which hasn't been finished yet. Be sure to call + // FlatBufferBuilder::Finish with your root table. + // If you really need to access an unfinished buffer, call + // GetCurrentBufferPointer instead. + FLATBUFFERS_ASSERT(finished); + } + /// @endcond + + /// @brief In order to save space, fields that are set to their default value + /// don't get serialized into the buffer. + /// @param[in] fd When set to `true`, always serializes default values that + /// are set. Optional fields which are not set explicitly, will still not be + /// serialized. + void ForceDefaults(bool fd) { force_defaults_ = fd; } + + /// @brief By default vtables are deduped in order to save space. + /// @param[in] dedup When set to `true`, dedup vtables. + void DedupVtables(bool dedup) { dedup_vtables_ = dedup; } + + /// @cond FLATBUFFERS_INTERNAL + void Pad(size_t num_bytes) { buf_.fill(num_bytes); } + + void TrackMinAlign(size_t elem_size) { + if (elem_size > minalign_) minalign_ = elem_size; + } + + void Align(size_t elem_size) { + TrackMinAlign(elem_size); + buf_.fill(PaddingBytes(buf_.size(), elem_size)); + } + + void PushFlatBuffer(const uint8_t *bytes, size_t size) { + PushBytes(bytes, size); + finished = true; + } + + void PushBytes(const uint8_t *bytes, size_t size) { buf_.push(bytes, size); } + + void PopBytes(size_t amount) { buf_.pop(amount); } + + template void AssertScalarT() { + // The code assumes power of 2 sizes and endian-swap-ability. + static_assert(flatbuffers::is_scalar::value, "T must be a scalar type"); + } + + // Write a single aligned scalar to the buffer + template uoffset_t PushElement(T element) { + AssertScalarT(); + Align(sizeof(T)); + buf_.push_small(EndianScalar(element)); + return GetSize(); + } + + template uoffset_t PushElement(Offset off) { + // Special case for offsets: see ReferTo below. + return PushElement(ReferTo(off.o)); + } + + // When writing fields, we track where they are, so we can create correct + // vtables later. + void TrackField(voffset_t field, uoffset_t off) { + FieldLoc fl = { off, field }; + buf_.scratch_push_small(fl); + num_field_loc++; + if (field > max_voffset_) { max_voffset_ = field; } + } + + // Like PushElement, but additionally tracks the field this represents. + template void AddElement(voffset_t field, T e, T def) { + // We don't serialize values equal to the default. + if (IsTheSameAs(e, def) && !force_defaults_) return; + TrackField(field, PushElement(e)); + } + + template void AddElement(voffset_t field, T e) { + TrackField(field, PushElement(e)); + } + + template void AddOffset(voffset_t field, Offset off) { + if (off.IsNull()) return; // Don't store. + AddElement(field, ReferTo(off.o), static_cast(0)); + } + + template void AddStruct(voffset_t field, const T *structptr) { + if (!structptr) return; // Default, don't store. + Align(AlignOf()); + buf_.push_small(*structptr); + TrackField(field, GetSize()); + } + + void AddStructOffset(voffset_t field, uoffset_t off) { + TrackField(field, off); + } + + // Offsets initially are relative to the end of the buffer (downwards). + // This function converts them to be relative to the current location + // in the buffer (when stored here), pointing upwards. + uoffset_t ReferTo(uoffset_t off) { + // Align to ensure GetSize() below is correct. + Align(sizeof(uoffset_t)); + // Offset must refer to something already in buffer. + const uoffset_t size = GetSize(); + FLATBUFFERS_ASSERT(off && off <= size); + return size - off + static_cast(sizeof(uoffset_t)); + } + + void NotNested() { + // If you hit this, you're trying to construct a Table/Vector/String + // during the construction of its parent table (between the MyTableBuilder + // and table.Finish(). + // Move the creation of these sub-objects to above the MyTableBuilder to + // not get this assert. + // Ignoring this assert may appear to work in simple cases, but the reason + // it is here is that storing objects in-line may cause vtable offsets + // to not fit anymore. It also leads to vtable duplication. + FLATBUFFERS_ASSERT(!nested); + // If you hit this, fields were added outside the scope of a table. + FLATBUFFERS_ASSERT(!num_field_loc); + } + + // From generated code (or from the parser), we call StartTable/EndTable + // with a sequence of AddElement calls in between. + uoffset_t StartTable() { + NotNested(); + nested = true; + return GetSize(); + } + + // This finishes one serialized object by generating the vtable if it's a + // table, comparing it against existing vtables, and writing the + // resulting vtable offset. + uoffset_t EndTable(uoffset_t start) { + // If you get this assert, a corresponding StartTable wasn't called. + FLATBUFFERS_ASSERT(nested); + // Write the vtable offset, which is the start of any Table. + // We fill its value later. + auto vtableoffsetloc = PushElement(0); + // Write a vtable, which consists entirely of voffset_t elements. + // It starts with the number of offsets, followed by a type id, followed + // by the offsets themselves. In reverse: + // Include space for the last offset and ensure empty tables have a + // minimum size. + max_voffset_ = + (std::max)(static_cast(max_voffset_ + sizeof(voffset_t)), + FieldIndexToOffset(0)); + buf_.fill_big(max_voffset_); + auto table_object_size = vtableoffsetloc - start; + // Vtable use 16bit offsets. + FLATBUFFERS_ASSERT(table_object_size < 0x10000); + WriteScalar(buf_.data() + sizeof(voffset_t), + static_cast(table_object_size)); + WriteScalar(buf_.data(), max_voffset_); + // Write the offsets into the table + for (auto it = buf_.scratch_end() - num_field_loc * sizeof(FieldLoc); + it < buf_.scratch_end(); it += sizeof(FieldLoc)) { + auto field_location = reinterpret_cast(it); + auto pos = static_cast(vtableoffsetloc - field_location->off); + // If this asserts, it means you've set a field twice. + FLATBUFFERS_ASSERT( + !ReadScalar(buf_.data() + field_location->id)); + WriteScalar(buf_.data() + field_location->id, pos); + } + ClearOffsets(); + auto vt1 = reinterpret_cast(buf_.data()); + auto vt1_size = ReadScalar(vt1); + auto vt_use = GetSize(); + // See if we already have generated a vtable with this exact same + // layout before. If so, make it point to the old one, remove this one. + if (dedup_vtables_) { + for (auto it = buf_.scratch_data(); it < buf_.scratch_end(); + it += sizeof(uoffset_t)) { + auto vt_offset_ptr = reinterpret_cast(it); + auto vt2 = reinterpret_cast(buf_.data_at(*vt_offset_ptr)); + auto vt2_size = ReadScalar(vt2); + if (vt1_size != vt2_size || 0 != memcmp(vt2, vt1, vt1_size)) continue; + vt_use = *vt_offset_ptr; + buf_.pop(GetSize() - vtableoffsetloc); + break; + } + } + // If this is a new vtable, remember it. + if (vt_use == GetSize()) { buf_.scratch_push_small(vt_use); } + // Fill the vtable offset we created above. + // The offset points from the beginning of the object to where the + // vtable is stored. + // Offsets default direction is downward in memory for future format + // flexibility (storing all vtables at the start of the file). + WriteScalar(buf_.data_at(vtableoffsetloc), + static_cast(vt_use) - + static_cast(vtableoffsetloc)); + + nested = false; + return vtableoffsetloc; + } + + FLATBUFFERS_ATTRIBUTE([[deprecated("call the version above instead")]]) + uoffset_t EndTable(uoffset_t start, voffset_t /*numfields*/) { + return EndTable(start); + } + + // This checks a required field has been set in a given table that has + // just been constructed. + template void Required(Offset table, voffset_t field); + + uoffset_t StartStruct(size_t alignment) { + Align(alignment); + return GetSize(); + } + + uoffset_t EndStruct() { return GetSize(); } + + void ClearOffsets() { + buf_.scratch_pop(num_field_loc * sizeof(FieldLoc)); + num_field_loc = 0; + max_voffset_ = 0; + } + + // Aligns such that when "len" bytes are written, an object can be written + // after it with "alignment" without padding. + void PreAlign(size_t len, size_t alignment) { + if (len == 0) return; + TrackMinAlign(alignment); + buf_.fill(PaddingBytes(GetSize() + len, alignment)); + } + template void PreAlign(size_t len) { + AssertScalarT(); + PreAlign(len, AlignOf()); + } + /// @endcond + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const char pointer to the data to be stored as a string. + /// @param[in] len The number of bytes that should be stored from `str`. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(const char *str, size_t len) { + NotNested(); + PreAlign(len + 1); // Always 0-terminated. + buf_.fill(1); + PushBytes(reinterpret_cast(str), len); + PushElement(static_cast(len)); + return Offset(GetSize()); + } + + /// @brief Store a string in the buffer, which is null-terminated. + /// @param[in] str A const char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(const char *str) { + return CreateString(str, strlen(str)); + } + + /// @brief Store a string in the buffer, which is null-terminated. + /// @param[in] str A char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(char *str) { + return CreateString(str, strlen(str)); + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const reference to a std::string to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(const std::string &str) { + return CreateString(str.c_str(), str.length()); + } + + // clang-format off + #ifdef FLATBUFFERS_HAS_STRING_VIEW + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const string_view to copy in to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateString(flatbuffers::string_view str) { + return CreateString(str.data(), str.size()); + } + #endif // FLATBUFFERS_HAS_STRING_VIEW + // clang-format on + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const pointer to a `String` struct to add to the buffer. + /// @return Returns the offset in the buffer where the string starts + Offset CreateString(const String *str) { + return str ? CreateString(str->c_str(), str->size()) : 0; + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// @param[in] str A const reference to a std::string like type with support + /// of T::c_str() and T::length() to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + template Offset CreateString(const T &str) { + return CreateString(str.c_str(), str.length()); + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const char pointer to the data to be stored as a string. + /// @param[in] len The number of bytes that should be stored from `str`. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateSharedString(const char *str, size_t len) { + FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK); + if (!string_pool) + string_pool = new StringOffsetMap(StringOffsetCompare(buf_)); + auto size_before_string = buf_.size(); + // Must first serialize the string, since the set is all offsets into + // buffer. + auto off = CreateString(str, len); + auto it = string_pool->find(off); + // If it exists we reuse existing serialized data! + if (it != string_pool->end()) { + // We can remove the string we serialized. + buf_.pop(buf_.size() - size_before_string); + return *it; + } + // Record this string for future use. + string_pool->insert(off); + return off; + } + +#ifdef FLATBUFFERS_HAS_STRING_VIEW + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const std::string_view to store in the buffer. + /// @return Returns the offset in the buffer where the string starts + Offset CreateSharedString(const flatbuffers::string_view str) { + return CreateSharedString(str.data(), str.size()); + } +#else + /// @brief Store a string in the buffer, which null-terminated. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const char pointer to a C-string to add to the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateSharedString(const char *str) { + return CreateSharedString(str, strlen(str)); + } + + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const reference to a std::string to store in the buffer. + /// @return Returns the offset in the buffer where the string starts. + Offset CreateSharedString(const std::string &str) { + return CreateSharedString(str.c_str(), str.length()); + } +#endif + + /// @brief Store a string in the buffer, which can contain any binary data. + /// If a string with this exact contents has already been serialized before, + /// instead simply returns the offset of the existing string. This uses a map + /// stored on the heap, but only stores the numerical offsets. + /// @param[in] str A const pointer to a `String` struct to add to the buffer. + /// @return Returns the offset in the buffer where the string starts + Offset CreateSharedString(const String *str) { + return str ? CreateSharedString(str->c_str(), str->size()) : 0; + } + + /// @cond FLATBUFFERS_INTERNAL + uoffset_t EndVector(size_t len) { + FLATBUFFERS_ASSERT(nested); // Hit if no corresponding StartVector. + nested = false; + return PushElement(static_cast(len)); + } + + void StartVector(size_t len, size_t elemsize, size_t alignment) { + NotNested(); + nested = true; + PreAlign(len * elemsize); + PreAlign(len * elemsize, alignment); // Just in case elemsize > uoffset_t. + } + + template void StartVector(size_t len) { + return StartVector(len, sizeof(T), AlignOf()); + } + + // Call this right before StartVector/CreateVector if you want to force the + // alignment to be something different than what the element size would + // normally dictate. + // This is useful when storing a nested_flatbuffer in a vector of bytes, + // or when storing SIMD floats, etc. + void ForceVectorAlignment(size_t len, size_t elemsize, size_t alignment) { + if (len == 0) return; + FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment)); + PreAlign(len * elemsize, alignment); + } + + // Similar to ForceVectorAlignment but for String fields. + void ForceStringAlignment(size_t len, size_t alignment) { + if (len == 0) return; + FLATBUFFERS_ASSERT(VerifyAlignmentRequirements(alignment)); + PreAlign((len + 1) * sizeof(char), alignment); + } + + /// @endcond + + /// @brief Serialize an array into a FlatBuffer `vector`. + /// @tparam T The data type of the array elements. + /// @param[in] v A pointer to the array of type `T` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template Offset> CreateVector(const T *v, size_t len) { + // If this assert hits, you're specifying a template argument that is + // causing the wrong overload to be selected, remove it. + AssertScalarT(); + StartVector(len); + if (len == 0) { return Offset>(EndVector(len)); } + // clang-format off + #if FLATBUFFERS_LITTLEENDIAN + PushBytes(reinterpret_cast(v), len * sizeof(T)); + #else + if (sizeof(T) == 1) { + PushBytes(reinterpret_cast(v), len); + } else { + for (auto i = len; i > 0; ) { + PushElement(v[--i]); + } + } + #endif + // clang-format on + return Offset>(EndVector(len)); + } + + /// @brief Serialize an array like object into a FlatBuffer `vector`. + /// @tparam T The data type of the array elements. + /// @tparam C The type of the array. + /// @param[in] array A reference to an array like object of type `T` to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template Offset> CreateVector(const C &array) { + return CreateVector(array.data(), array.size()); + } + + /// @brief Serialize an initializer list into a FlatBuffer `vector`. + /// @tparam T The data type of the initializer list elements. + /// @param[in] v The value of the initializer list. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVector(std::initializer_list v) { + return CreateVector(v.begin(), v.size()); + } + + template + Offset>> CreateVector(const Offset *v, size_t len) { + StartVector>(len); + for (auto i = len; i > 0;) { PushElement(v[--i]); } + return Offset>>(EndVector(len)); + } + + /// @brief Serialize a `std::vector` into a FlatBuffer `vector`. + /// @tparam T The data type of the `std::vector` elements. + /// @param v A const reference to the `std::vector` to serialize into the + /// buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVector(const std::vector &v) { + return CreateVector(data(v), v.size()); + } + + // vector may be implemented using a bit-set, so we can't access it as + // an array. Instead, read elements manually. + // Background: https://isocpp.org/blog/2012/11/on-vectorbool + Offset> CreateVector(const std::vector &v) { + StartVector(v.size()); + for (auto i = v.size(); i > 0;) { + PushElement(static_cast(v[--i])); + } + return Offset>(EndVector(v.size())); + } + + /// @brief Serialize values returned by a function into a FlatBuffer `vector`. + /// This is a convenience function that takes care of iteration for you. + /// @tparam T The data type of the `std::vector` elements. + /// @param f A function that takes the current iteration 0..vector_size-1 and + /// returns any type that you can construct a FlatBuffers vector out of. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVector(size_t vector_size, + const std::function &f) { + FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK); + std::vector elems(vector_size); + for (size_t i = 0; i < vector_size; i++) elems[i] = f(i); + return CreateVector(elems); + } + + /// @brief Serialize values returned by a function into a FlatBuffer `vector`. + /// This is a convenience function that takes care of iteration for you. This + /// uses a vector stored on the heap to store the intermediate results of the + /// iteration. + /// @tparam T The data type of the `std::vector` elements. + /// @param f A function that takes the current iteration 0..vector_size-1, + /// and the state parameter returning any type that you can construct a + /// FlatBuffers vector out of. + /// @param state State passed to f. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVector(size_t vector_size, F f, S *state) { + FLATBUFFERS_ASSERT(FLATBUFFERS_GENERAL_HEAP_ALLOC_OK); + std::vector elems(vector_size); + for (size_t i = 0; i < vector_size; i++) elems[i] = f(i, state); + return CreateVector(elems); + } + + /// @brief Serialize a `std::vector` into a FlatBuffer `vector`. + /// whereas StringType is any type that is accepted by the CreateString() + /// overloads. + /// This is a convenience function for a common case. + /// @param v A const reference to the `std::vector` to serialize into the + /// buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset>> CreateVectorOfStrings( + const std::vector &v) { + return CreateVectorOfStrings(v.cbegin(), v.cend()); + } + + /// @brief Serialize a collection of Strings into a FlatBuffer `vector`. + /// This is a convenience function for a common case. + /// @param begin The beginning iterator of the collection + /// @param end The ending iterator of the collection + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset>> CreateVectorOfStrings(It begin, It end) { + auto size = std::distance(begin, end); + auto scratch_buffer_usage = size * sizeof(Offset); + // If there is not enough space to store the offsets, there definitely won't + // be enough space to store all the strings. So ensuring space for the + // scratch region is OK, for if it fails, it would have failed later. + buf_.ensure_space(scratch_buffer_usage); + for (auto it = begin; it != end; ++it) { + buf_.scratch_push_small(CreateString(*it)); + } + StartVector>(size); + for (auto i = 1; i <= size; i++) { + // Note we re-evaluate the buf location each iteration to account for any + // underlying buffer resizing that may occur. + PushElement(*reinterpret_cast *>( + buf_.scratch_end() - i * sizeof(Offset))); + } + buf_.scratch_pop(scratch_buffer_usage); + return Offset>>(EndVector(size)); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @param[in] v A pointer to the array of type `T` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfStructs(const T *v, size_t len) { + StartVector(len * sizeof(T) / AlignOf(), sizeof(T), AlignOf()); + if (len > 0) { + PushBytes(reinterpret_cast(v), sizeof(T) * len); + } + return Offset>(EndVector(len)); + } + + /// @brief Serialize an array of native structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @tparam S The data type of the native struct array elements. + /// @param[in] v A pointer to the array of type `S` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @param[in] pack_func Pointer to a function to convert the native struct + /// to the FlatBuffer struct. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfNativeStructs( + const S *v, size_t len, T (*const pack_func)(const S &)) { + FLATBUFFERS_ASSERT(pack_func); + auto structs = StartVectorOfStructs(len); + for (size_t i = 0; i < len; i++) { structs[i] = pack_func(v[i]); } + return EndVectorOfStructs(len); + } + + /// @brief Serialize an array of native structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @tparam S The data type of the native struct array elements. + /// @param[in] v A pointer to the array of type `S` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfNativeStructs(const S *v, + size_t len) { + extern T Pack(const S &); + return CreateVectorOfNativeStructs(v, len, Pack); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @param[in] filler A function that takes the current iteration + /// 0..vector_size-1 and a pointer to the struct that must be filled. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + /// This is mostly useful when flatbuffers are generated with mutation + /// accessors. + template + Offset> CreateVectorOfStructs( + size_t vector_size, const std::function &filler) { + T *structs = StartVectorOfStructs(vector_size); + for (size_t i = 0; i < vector_size; i++) { + filler(i, structs); + structs++; + } + return EndVectorOfStructs(vector_size); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the struct array elements. + /// @param[in] f A function that takes the current iteration 0..vector_size-1, + /// a pointer to the struct that must be filled and the state argument. + /// @param[in] state Arbitrary state to pass to f. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + /// This is mostly useful when flatbuffers are generated with mutation + /// accessors. + template + Offset> CreateVectorOfStructs(size_t vector_size, F f, + S *state) { + T *structs = StartVectorOfStructs(vector_size); + for (size_t i = 0; i < vector_size; i++) { + f(i, structs, state); + structs++; + } + return EndVectorOfStructs(vector_size); + } + + /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector`. + /// @tparam T The data type of the `std::vector` struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfStructs( + const std::vector &v) { + return CreateVectorOfStructs(data(v), v.size()); + } + + /// @brief Serialize a `std::vector` of native structs into a FlatBuffer + /// `vector`. + /// @tparam T The data type of the `std::vector` struct elements. + /// @tparam S The data type of the `std::vector` native struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @param[in] pack_func Pointer to a function to convert the native struct + /// to the FlatBuffer struct. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfNativeStructs( + const std::vector &v, T (*const pack_func)(const S &)) { + return CreateVectorOfNativeStructs(data(v), v.size(), pack_func); + } + + /// @brief Serialize a `std::vector` of native structs into a FlatBuffer + /// `vector`. + /// @tparam T The data type of the `std::vector` struct elements. + /// @tparam S The data type of the `std::vector` native struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfNativeStructs( + const std::vector &v) { + return CreateVectorOfNativeStructs(data(v), v.size()); + } + + /// @cond FLATBUFFERS_INTERNAL + template struct StructKeyComparator { + bool operator()(const T &a, const T &b) const { + return a.KeyCompareLessThan(&b); + } + }; + /// @endcond + + /// @brief Serialize a `std::vector` of structs into a FlatBuffer `vector` + /// in sorted order. + /// @tparam T The data type of the `std::vector` struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfSortedStructs( + std::vector *v) { + return CreateVectorOfSortedStructs(data(*v), v->size()); + } + + /// @brief Serialize a `std::vector` of native structs into a FlatBuffer + /// `vector` in sorted order. + /// @tparam T The data type of the `std::vector` struct elements. + /// @tparam S The data type of the `std::vector` native struct elements. + /// @param[in] v A const reference to the `std::vector` of structs to + /// serialize into the buffer as a `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset> CreateVectorOfSortedNativeStructs( + std::vector *v) { + return CreateVectorOfSortedNativeStructs(data(*v), v->size()); + } + + /// @brief Serialize an array of structs into a FlatBuffer `vector` in sorted + /// order. + /// @tparam T The data type of the struct array elements. + /// @param[in] v A pointer to the array of type `T` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfSortedStructs(T *v, size_t len) { + std::stable_sort(v, v + len, StructKeyComparator()); + return CreateVectorOfStructs(v, len); + } + + /// @brief Serialize an array of native structs into a FlatBuffer `vector` in + /// sorted order. + /// @tparam T The data type of the struct array elements. + /// @tparam S The data type of the native struct array elements. + /// @param[in] v A pointer to the array of type `S` to serialize into the + /// buffer as a `vector`. + /// @param[in] len The number of elements to serialize. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset> CreateVectorOfSortedNativeStructs(S *v, + size_t len) { + extern T Pack(const S &); + auto structs = StartVectorOfStructs(len); + for (size_t i = 0; i < len; i++) { structs[i] = Pack(v[i]); } + std::stable_sort(structs, structs + len, StructKeyComparator()); + return EndVectorOfStructs(len); + } + + /// @cond FLATBUFFERS_INTERNAL + template struct TableKeyComparator { + TableKeyComparator(vector_downward &buf) : buf_(buf) {} + TableKeyComparator(const TableKeyComparator &other) : buf_(other.buf_) {} + bool operator()(const Offset &a, const Offset &b) const { + auto table_a = reinterpret_cast(buf_.data_at(a.o)); + auto table_b = reinterpret_cast(buf_.data_at(b.o)); + return table_a->KeyCompareLessThan(table_b); + } + vector_downward &buf_; + + private: + FLATBUFFERS_DELETE_FUNC( + TableKeyComparator &operator=(const TableKeyComparator &other)); + }; + /// @endcond + + /// @brief Serialize an array of `table` offsets as a `vector` in the buffer + /// in sorted order. + /// @tparam T The data type that the offset refers to. + /// @param[in] v An array of type `Offset` that contains the `table` + /// offsets to store in the buffer in sorted order. + /// @param[in] len The number of elements to store in the `vector`. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template + Offset>> CreateVectorOfSortedTables(Offset *v, + size_t len) { + std::stable_sort(v, v + len, TableKeyComparator(buf_)); + return CreateVector(v, len); + } + + /// @brief Serialize an array of `table` offsets as a `vector` in the buffer + /// in sorted order. + /// @tparam T The data type that the offset refers to. + /// @param[in] v An array of type `Offset` that contains the `table` + /// offsets to store in the buffer in sorted order. + /// @return Returns a typed `Offset` into the serialized data indicating + /// where the vector is stored. + template> + Offset>> CreateVectorOfSortedTables( + std::vector, Alloc> *v) { + return CreateVectorOfSortedTables(data(*v), v->size()); + } + + /// @brief Specialized version of `CreateVector` for non-copying use cases. + /// Write the data any time later to the returned buffer pointer `buf`. + /// @param[in] len The number of elements to store in the `vector`. + /// @param[in] elemsize The size of each element in the `vector`. + /// @param[out] buf A pointer to a `uint8_t` pointer that can be + /// written to at a later time to serialize the data into a `vector` + /// in the buffer. + uoffset_t CreateUninitializedVector(size_t len, size_t elemsize, + size_t alignment, uint8_t **buf) { + NotNested(); + StartVector(len, elemsize, alignment); + buf_.make_space(len * elemsize); + auto vec_start = GetSize(); + auto vec_end = EndVector(len); + *buf = buf_.data_at(vec_start); + return vec_end; + } + + FLATBUFFERS_ATTRIBUTE([[deprecated("call the version above instead")]]) + uoffset_t CreateUninitializedVector(size_t len, size_t elemsize, + uint8_t **buf) { + return CreateUninitializedVector(len, elemsize, elemsize, buf); + } + + /// @brief Specialized version of `CreateVector` for non-copying use cases. + /// Write the data any time later to the returned buffer pointer `buf`. + /// @tparam T The data type of the data that will be stored in the buffer + /// as a `vector`. + /// @param[in] len The number of elements to store in the `vector`. + /// @param[out] buf A pointer to a pointer of type `T` that can be + /// written to at a later time to serialize the data into a `vector` + /// in the buffer. + template + Offset> CreateUninitializedVector(size_t len, T **buf) { + AssertScalarT(); + return CreateUninitializedVector(len, sizeof(T), AlignOf(), + reinterpret_cast(buf)); + } + + template + Offset> CreateUninitializedVectorOfStructs(size_t len, + T **buf) { + return CreateUninitializedVector(len, sizeof(T), AlignOf(), + reinterpret_cast(buf)); + } + + // @brief Create a vector of scalar type T given as input a vector of scalar + // type U, useful with e.g. pre "enum class" enums, or any existing scalar + // data of the wrong type. + template + Offset> CreateVectorScalarCast(const U *v, size_t len) { + AssertScalarT(); + AssertScalarT(); + StartVector(len); + for (auto i = len; i > 0;) { PushElement(static_cast(v[--i])); } + return Offset>(EndVector(len)); + } + + /// @brief Write a struct by itself, typically to be part of a union. + template Offset CreateStruct(const T &structobj) { + NotNested(); + Align(AlignOf()); + buf_.push_small(structobj); + return Offset(GetSize()); + } + + /// @brief Finish serializing a buffer by writing the root offset. + /// @param[in] file_identifier If a `file_identifier` is given, the buffer + /// will be prefixed with a standard FlatBuffers file header. + template + void Finish(Offset root, const char *file_identifier = nullptr) { + Finish(root.o, file_identifier, false); + } + + /// @brief Finish a buffer with a 32 bit size field pre-fixed (size of the + /// buffer following the size field). These buffers are NOT compatible + /// with standard buffers created by Finish, i.e. you can't call GetRoot + /// on them, you have to use GetSizePrefixedRoot instead. + /// All >32 bit quantities in this buffer will be aligned when the whole + /// size pre-fixed buffer is aligned. + /// These kinds of buffers are useful for creating a stream of FlatBuffers. + template + void FinishSizePrefixed(Offset root, + const char *file_identifier = nullptr) { + Finish(root.o, file_identifier, true); + } + + void SwapBufAllocator(FlatBufferBuilder &other) { + buf_.swap_allocator(other.buf_); + } + + /// @brief The length of a FlatBuffer file header. + static const size_t kFileIdentifierLength = + ::flatbuffers::kFileIdentifierLength; + + protected: + // You shouldn't really be copying instances of this class. + FlatBufferBuilder(const FlatBufferBuilder &); + FlatBufferBuilder &operator=(const FlatBufferBuilder &); + + void Finish(uoffset_t root, const char *file_identifier, bool size_prefix) { + NotNested(); + buf_.clear_scratch(); + // This will cause the whole buffer to be aligned. + PreAlign((size_prefix ? sizeof(uoffset_t) : 0) + sizeof(uoffset_t) + + (file_identifier ? kFileIdentifierLength : 0), + minalign_); + if (file_identifier) { + FLATBUFFERS_ASSERT(strlen(file_identifier) == kFileIdentifierLength); + PushBytes(reinterpret_cast(file_identifier), + kFileIdentifierLength); + } + PushElement(ReferTo(root)); // Location of root. + if (size_prefix) { PushElement(GetSize()); } + finished = true; + } + + struct FieldLoc { + uoffset_t off; + voffset_t id; + }; + + vector_downward buf_; + + // Accumulating offsets of table members while it is being built. + // We store these in the scratch pad of buf_, after the vtable offsets. + uoffset_t num_field_loc; + // Track how much of the vtable is in use, so we can output the most compact + // possible vtable. + voffset_t max_voffset_; + + // Ensure objects are not nested. + bool nested; + + // Ensure the buffer is finished before it is being accessed. + bool finished; + + size_t minalign_; + + bool force_defaults_; // Serialize values equal to their defaults anyway. + + bool dedup_vtables_; + + struct StringOffsetCompare { + StringOffsetCompare(const vector_downward &buf) : buf_(&buf) {} + bool operator()(const Offset &a, const Offset &b) const { + auto stra = reinterpret_cast(buf_->data_at(a.o)); + auto strb = reinterpret_cast(buf_->data_at(b.o)); + return StringLessThan(stra->data(), stra->size(), strb->data(), + strb->size()); + } + const vector_downward *buf_; + }; + + // For use with CreateSharedString. Instantiated on first use only. + typedef std::set, StringOffsetCompare> StringOffsetMap; + StringOffsetMap *string_pool; + + private: + // Allocates space for a vector of structures. + // Must be completed with EndVectorOfStructs(). + template T *StartVectorOfStructs(size_t vector_size) { + StartVector(vector_size * sizeof(T) / AlignOf(), sizeof(T), AlignOf()); + return reinterpret_cast(buf_.make_space(vector_size * sizeof(T))); + } + + // End the vector of structures in the flatbuffers. + // Vector should have previously be started with StartVectorOfStructs(). + template + Offset> EndVectorOfStructs(size_t vector_size) { + return Offset>(EndVector(vector_size)); + } +}; +/// @} + +/// Helpers to get a typed pointer to objects that are currently being built. +/// @warning Creating new objects will lead to reallocations and invalidates +/// the pointer! +template +T *GetMutableTemporaryPointer(FlatBufferBuilder &fbb, Offset offset) { + return reinterpret_cast(fbb.GetCurrentBufferPointer() + fbb.GetSize() - + offset.o); +} + +template +const T *GetTemporaryPointer(FlatBufferBuilder &fbb, Offset offset) { + return GetMutableTemporaryPointer(fbb, offset); +} + +template +void FlatBufferBuilder::Required(Offset table, voffset_t field) { + auto table_ptr = reinterpret_cast(buf_.data_at(table.o)); + bool ok = table_ptr->GetOptionalFieldOffset(field) != 0; + // If this fails, the caller will show what field needs to be set. + FLATBUFFERS_ASSERT(ok); + (void)ok; +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VECTOR_DOWNWARD_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/flatbuffers.h b/3rdparty/flatbuffers/include/flatbuffers/flatbuffers.h new file mode 100644 index 0000000000..d7ee6ab4dd --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/flatbuffers.h @@ -0,0 +1,272 @@ +/* + * Copyright 2014 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_H_ +#define FLATBUFFERS_H_ + +#include + +// TODO: These includes are for mitigating the pains of users editing their +// source because they relied on flatbuffers.h to include everything for them. +#include "flatbuffers/array.h" +#include "flatbuffers/base.h" +#include "flatbuffers/buffer.h" +#include "flatbuffers/buffer_ref.h" +#include "flatbuffers/detached_buffer.h" +#include "flatbuffers/flatbuffer_builder.h" +#include "flatbuffers/stl_emulation.h" +#include "flatbuffers/string.h" +#include "flatbuffers/struct.h" +#include "flatbuffers/table.h" +#include "flatbuffers/vector.h" +#include "flatbuffers/vector_downward.h" +#include "flatbuffers/verifier.h" + +namespace flatbuffers { + +/// @brief This can compute the start of a FlatBuffer from a root pointer, i.e. +/// it is the opposite transformation of GetRoot(). +/// This may be useful if you want to pass on a root and have the recipient +/// delete the buffer afterwards. +inline const uint8_t *GetBufferStartFromRootPointer(const void *root) { + auto table = reinterpret_cast(root); + auto vtable = table->GetVTable(); + // Either the vtable is before the root or after the root. + auto start = (std::min)(vtable, reinterpret_cast(root)); + // Align to at least sizeof(uoffset_t). + start = reinterpret_cast(reinterpret_cast(start) & + ~(sizeof(uoffset_t) - 1)); + // Additionally, there may be a file_identifier in the buffer, and the root + // offset. The buffer may have been aligned to any size between + // sizeof(uoffset_t) and FLATBUFFERS_MAX_ALIGNMENT (see "force_align"). + // Sadly, the exact alignment is only known when constructing the buffer, + // since it depends on the presence of values with said alignment properties. + // So instead, we simply look at the next uoffset_t values (root, + // file_identifier, and alignment padding) to see which points to the root. + // None of the other values can "impersonate" the root since they will either + // be 0 or four ASCII characters. + static_assert(flatbuffers::kFileIdentifierLength == sizeof(uoffset_t), + "file_identifier is assumed to be the same size as uoffset_t"); + for (auto possible_roots = FLATBUFFERS_MAX_ALIGNMENT / sizeof(uoffset_t) + 1; + possible_roots; possible_roots--) { + start -= sizeof(uoffset_t); + if (ReadScalar(start) + start == + reinterpret_cast(root)) + return start; + } + // We didn't find the root, either the "root" passed isn't really a root, + // or the buffer is corrupt. + // Assert, because calling this function with bad data may cause reads + // outside of buffer boundaries. + FLATBUFFERS_ASSERT(false); + return nullptr; +} + +/// @brief This return the prefixed size of a FlatBuffer. +inline uoffset_t GetPrefixedSize(const uint8_t *buf) { + return ReadScalar(buf); +} + +// Base class for native objects (FlatBuffer data de-serialized into native +// C++ data structures). +// Contains no functionality, purely documentative. +struct NativeTable {}; + +/// @brief Function types to be used with resolving hashes into objects and +/// back again. The resolver gets a pointer to a field inside an object API +/// object that is of the type specified in the schema using the attribute +/// `cpp_type` (it is thus important whatever you write to this address +/// matches that type). The value of this field is initially null, so you +/// may choose to implement a delayed binding lookup using this function +/// if you wish. The resolver does the opposite lookup, for when the object +/// is being serialized again. +typedef uint64_t hash_value_t; +typedef std::function + resolver_function_t; +typedef std::function rehasher_function_t; + +// Helper function to test if a field is present, using any of the field +// enums in the generated code. +// `table` must be a generated table type. Since this is a template parameter, +// this is not typechecked to be a subclass of Table, so beware! +// Note: this function will return false for fields equal to the default +// value, since they're not stored in the buffer (unless force_defaults was +// used). +template +bool IsFieldPresent(const T *table, typename T::FlatBuffersVTableOffset field) { + // Cast, since Table is a private baseclass of any table types. + return reinterpret_cast(table)->CheckField( + static_cast(field)); +} + +// Utility function for reverse lookups on the EnumNames*() functions +// (in the generated C++ code) +// names must be NULL terminated. +inline int LookupEnum(const char **names, const char *name) { + for (const char **p = names; *p; p++) + if (!strcmp(*p, name)) return static_cast(p - names); + return -1; +} + +// These macros allow us to layout a struct with a guarantee that they'll end +// up looking the same on different compilers and platforms. +// It does this by disallowing the compiler to do any padding, and then +// does padding itself by inserting extra padding fields that make every +// element aligned to its own size. +// Additionally, it manually sets the alignment of the struct as a whole, +// which is typically its largest element, or a custom size set in the schema +// by the force_align attribute. +// These are used in the generated code only. + +// clang-format off +#if defined(_MSC_VER) + #define FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(alignment) \ + __pragma(pack(1)) \ + struct __declspec(align(alignment)) + #define FLATBUFFERS_STRUCT_END(name, size) \ + __pragma(pack()) \ + static_assert(sizeof(name) == size, "compiler breaks packing rules") +#elif defined(__GNUC__) || defined(__clang__) || defined(__ICCARM__) + #define FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(alignment) \ + _Pragma("pack(1)") \ + struct __attribute__((aligned(alignment))) + #define FLATBUFFERS_STRUCT_END(name, size) \ + _Pragma("pack()") \ + static_assert(sizeof(name) == size, "compiler breaks packing rules") +#else + #error Unknown compiler, please define structure alignment macros +#endif +// clang-format on + +// Minimal reflection via code generation. +// Besides full-fat reflection (see reflection.h) and parsing/printing by +// loading schemas (see idl.h), we can also have code generation for minimal +// reflection data which allows pretty-printing and other uses without needing +// a schema or a parser. +// Generate code with --reflect-types (types only) or --reflect-names (names +// also) to enable. +// See minireflect.h for utilities using this functionality. + +// These types are organized slightly differently as the ones in idl.h. +enum SequenceType { ST_TABLE, ST_STRUCT, ST_UNION, ST_ENUM }; + +// Scalars have the same order as in idl.h +// clang-format off +#define FLATBUFFERS_GEN_ELEMENTARY_TYPES(ET) \ + ET(ET_UTYPE) \ + ET(ET_BOOL) \ + ET(ET_CHAR) \ + ET(ET_UCHAR) \ + ET(ET_SHORT) \ + ET(ET_USHORT) \ + ET(ET_INT) \ + ET(ET_UINT) \ + ET(ET_LONG) \ + ET(ET_ULONG) \ + ET(ET_FLOAT) \ + ET(ET_DOUBLE) \ + ET(ET_STRING) \ + ET(ET_SEQUENCE) // See SequenceType. + +enum ElementaryType { + #define FLATBUFFERS_ET(E) E, + FLATBUFFERS_GEN_ELEMENTARY_TYPES(FLATBUFFERS_ET) + #undef FLATBUFFERS_ET +}; + +inline const char * const *ElementaryTypeNames() { + static const char * const names[] = { + #define FLATBUFFERS_ET(E) #E, + FLATBUFFERS_GEN_ELEMENTARY_TYPES(FLATBUFFERS_ET) + #undef FLATBUFFERS_ET + }; + return names; +} +// clang-format on + +// Basic type info cost just 16bits per field! +// We're explicitly defining the signedness since the signedness of integer +// bitfields is otherwise implementation-defined and causes warnings on older +// GCC compilers. +struct TypeCode { + // ElementaryType + unsigned short base_type : 4; + // Either vector (in table) or array (in struct) + unsigned short is_repeating : 1; + // Index into type_refs below, or -1 for none. + signed short sequence_ref : 11; +}; + +static_assert(sizeof(TypeCode) == 2, "TypeCode"); + +struct TypeTable; + +// Signature of the static method present in each type. +typedef const TypeTable *(*TypeFunction)(); + +struct TypeTable { + SequenceType st; + size_t num_elems; // of type_codes, values, names (but not type_refs). + const TypeCode *type_codes; // num_elems count + const TypeFunction *type_refs; // less than num_elems entries (see TypeCode). + const int16_t *array_sizes; // less than num_elems entries (see TypeCode). + const int64_t *values; // Only set for non-consecutive enum/union or structs. + const char *const *names; // Only set if compiled with --reflect-names. +}; + +// String which identifies the current version of FlatBuffers. +inline const char *flatbuffers_version_string() { + return "FlatBuffers " FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "." + FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "." + FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION); +} + +// clang-format off +#define FLATBUFFERS_DEFINE_BITMASK_OPERATORS(E, T)\ + inline E operator | (E lhs, E rhs){\ + return E(T(lhs) | T(rhs));\ + }\ + inline E operator & (E lhs, E rhs){\ + return E(T(lhs) & T(rhs));\ + }\ + inline E operator ^ (E lhs, E rhs){\ + return E(T(lhs) ^ T(rhs));\ + }\ + inline E operator ~ (E lhs){\ + return E(~T(lhs));\ + }\ + inline E operator |= (E &lhs, E rhs){\ + lhs = lhs | rhs;\ + return lhs;\ + }\ + inline E operator &= (E &lhs, E rhs){\ + lhs = lhs & rhs;\ + return lhs;\ + }\ + inline E operator ^= (E &lhs, E rhs){\ + lhs = lhs ^ rhs;\ + return lhs;\ + }\ + inline bool operator !(E rhs) \ + {\ + return !bool(T(rhs)); \ + } +/// @endcond +} // namespace flatbuffers + +// clang-format on + +#endif // FLATBUFFERS_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/stl_emulation.h b/3rdparty/flatbuffers/include/flatbuffers/stl_emulation.h new file mode 100644 index 0000000000..452ddb832f --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/stl_emulation.h @@ -0,0 +1,510 @@ +/* + * Copyright 2017 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_STL_EMULATION_H_ +#define FLATBUFFERS_STL_EMULATION_H_ + +// clang-format off +#include "flatbuffers/base.h" + +#include +#include +#include +#include +#include + +#ifndef FLATBUFFERS_USE_STD_OPTIONAL + // Detect C++17 compatible compiler. + // __cplusplus >= 201703L - a compiler has support of 'static inline' variables. + #if (defined(__cplusplus) && __cplusplus >= 201703L) \ + || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) + #define FLATBUFFERS_USE_STD_OPTIONAL 1 + #else + #define FLATBUFFERS_USE_STD_OPTIONAL 0 + #endif // (defined(__cplusplus) && __cplusplus >= 201703L) ... +#endif // FLATBUFFERS_USE_STD_OPTIONAL + +#if FLATBUFFERS_USE_STD_OPTIONAL + #include +#endif + +// The __cpp_lib_span is the predefined feature macro. +#if defined(FLATBUFFERS_USE_STD_SPAN) + #include +#elif defined(__cpp_lib_span) && defined(__has_include) + #if __has_include() + #include + #include + #define FLATBUFFERS_USE_STD_SPAN + #endif +#else + // Disable non-trivial ctors if FLATBUFFERS_SPAN_MINIMAL defined. + #if !defined(FLATBUFFERS_TEMPLATES_ALIASES) + #define FLATBUFFERS_SPAN_MINIMAL + #else + // Enable implicit construction of a span from a std::array. + #include + #endif +#endif // defined(FLATBUFFERS_USE_STD_SPAN) + +// This header provides backwards compatibility for older versions of the STL. +namespace flatbuffers { + +#if defined(FLATBUFFERS_TEMPLATES_ALIASES) + template + using numeric_limits = std::numeric_limits; +#else + template class numeric_limits : + public std::numeric_limits {}; +#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) + +#if defined(FLATBUFFERS_TEMPLATES_ALIASES) + template using is_scalar = std::is_scalar; + template using is_same = std::is_same; + template using is_floating_point = std::is_floating_point; + template using is_unsigned = std::is_unsigned; + template using is_enum = std::is_enum; + template using make_unsigned = std::make_unsigned; + template + using conditional = std::conditional; + template + using integral_constant = std::integral_constant; + template + using bool_constant = integral_constant; + using true_type = std::true_type; + using false_type = std::false_type; +#else + // MSVC 2010 doesn't support C++11 aliases. + template struct is_scalar : public std::is_scalar {}; + template struct is_same : public std::is_same {}; + template struct is_floating_point : + public std::is_floating_point {}; + template struct is_unsigned : public std::is_unsigned {}; + template struct is_enum : public std::is_enum {}; + template struct make_unsigned : public std::make_unsigned {}; + template + struct conditional : public std::conditional {}; + template + struct integral_constant : public std::integral_constant {}; + template + struct bool_constant : public integral_constant {}; + typedef bool_constant true_type; + typedef bool_constant false_type; +#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) + +#if defined(FLATBUFFERS_TEMPLATES_ALIASES) + template using unique_ptr = std::unique_ptr; +#else + // MSVC 2010 doesn't support C++11 aliases. + // We're manually "aliasing" the class here as we want to bring unique_ptr + // into the flatbuffers namespace. We have unique_ptr in the flatbuffers + // namespace we have a completely independent implementation (see below) + // for C++98 STL implementations. + template class unique_ptr : public std::unique_ptr { + public: + unique_ptr() {} + explicit unique_ptr(T* p) : std::unique_ptr(p) {} + unique_ptr(std::unique_ptr&& u) { *this = std::move(u); } + unique_ptr(unique_ptr&& u) { *this = std::move(u); } + unique_ptr& operator=(std::unique_ptr&& u) { + std::unique_ptr::reset(u.release()); + return *this; + } + unique_ptr& operator=(unique_ptr&& u) { + std::unique_ptr::reset(u.release()); + return *this; + } + unique_ptr& operator=(T* p) { + return std::unique_ptr::operator=(p); + } + }; +#endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) + +#if FLATBUFFERS_USE_STD_OPTIONAL +template +using Optional = std::optional; +using nullopt_t = std::nullopt_t; +inline constexpr nullopt_t nullopt = std::nullopt; + +#else +// Limited implementation of Optional type for a scalar T. +// This implementation limited by trivial types compatible with +// std::is_arithmetic or std::is_enum type traits. + +// A tag to indicate an empty flatbuffers::optional. +struct nullopt_t { + explicit FLATBUFFERS_CONSTEXPR_CPP11 nullopt_t(int) {} +}; + +#if defined(FLATBUFFERS_CONSTEXPR_DEFINED) + namespace internal { + template struct nullopt_holder { + static constexpr nullopt_t instance_ = nullopt_t(0); + }; + template + constexpr nullopt_t nullopt_holder::instance_; + } + static constexpr const nullopt_t &nullopt = internal::nullopt_holder::instance_; + +#else + namespace internal { + template struct nullopt_holder { + static const nullopt_t instance_; + }; + template + const nullopt_t nullopt_holder::instance_ = nullopt_t(0); + } + static const nullopt_t &nullopt = internal::nullopt_holder::instance_; + +#endif + +template +class Optional FLATBUFFERS_FINAL_CLASS { + // Non-scalar 'T' would extremely complicated Optional. + // Use is_scalar checking because flatbuffers flatbuffers::is_arithmetic + // isn't implemented. + static_assert(flatbuffers::is_scalar::value, "unexpected type T"); + + public: + ~Optional() {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional() FLATBUFFERS_NOEXCEPT + : value_(), has_value_(false) {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional(nullopt_t) FLATBUFFERS_NOEXCEPT + : value_(), has_value_(false) {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional(T val) FLATBUFFERS_NOEXCEPT + : value_(val), has_value_(true) {} + + FLATBUFFERS_CONSTEXPR_CPP11 Optional(const Optional &other) FLATBUFFERS_NOEXCEPT + : value_(other.value_), has_value_(other.has_value_) {} + + FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(const Optional &other) FLATBUFFERS_NOEXCEPT { + value_ = other.value_; + has_value_ = other.has_value_; + return *this; + } + + FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(nullopt_t) FLATBUFFERS_NOEXCEPT { + value_ = T(); + has_value_ = false; + return *this; + } + + FLATBUFFERS_CONSTEXPR_CPP14 Optional &operator=(T val) FLATBUFFERS_NOEXCEPT { + value_ = val; + has_value_ = true; + return *this; + } + + void reset() FLATBUFFERS_NOEXCEPT { + *this = nullopt; + } + + void swap(Optional &other) FLATBUFFERS_NOEXCEPT { + std::swap(value_, other.value_); + std::swap(has_value_, other.has_value_); + } + + FLATBUFFERS_CONSTEXPR_CPP11 FLATBUFFERS_EXPLICIT_CPP11 operator bool() const FLATBUFFERS_NOEXCEPT { + return has_value_; + } + + FLATBUFFERS_CONSTEXPR_CPP11 bool has_value() const FLATBUFFERS_NOEXCEPT { + return has_value_; + } + + FLATBUFFERS_CONSTEXPR_CPP11 const T& operator*() const FLATBUFFERS_NOEXCEPT { + return value_; + } + + const T& value() const { + FLATBUFFERS_ASSERT(has_value()); + return value_; + } + + T value_or(T default_value) const FLATBUFFERS_NOEXCEPT { + return has_value() ? value_ : default_value; + } + + private: + T value_; + bool has_value_; +}; + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional& opt, nullopt_t) FLATBUFFERS_NOEXCEPT { + return !opt; +} +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(nullopt_t, const Optional& opt) FLATBUFFERS_NOEXCEPT { + return !opt; +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional& lhs, const U& rhs) FLATBUFFERS_NOEXCEPT { + return static_cast(lhs) && (*lhs == rhs); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const T& lhs, const Optional& rhs) FLATBUFFERS_NOEXCEPT { + return static_cast(rhs) && (lhs == *rhs); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 bool operator==(const Optional& lhs, const Optional& rhs) FLATBUFFERS_NOEXCEPT { + return static_cast(lhs) != static_cast(rhs) + ? false + : !static_cast(lhs) ? false : (*lhs == *rhs); +} +#endif // FLATBUFFERS_USE_STD_OPTIONAL + + +// Very limited and naive partial implementation of C++20 std::span. +#if defined(FLATBUFFERS_USE_STD_SPAN) + inline constexpr std::size_t dynamic_extent = std::dynamic_extent; + template + using span = std::span; + +#else // !defined(FLATBUFFERS_USE_STD_SPAN) +FLATBUFFERS_CONSTEXPR std::size_t dynamic_extent = static_cast(-1); + +// Exclude this code if MSVC2010 or non-STL Android is active. +// The non-STL Android doesn't have `std::is_convertible` required for SFINAE. +#if !defined(FLATBUFFERS_SPAN_MINIMAL) +namespace internal { + // This is SFINAE helper class for checking of a common condition: + // > This overload only participates in overload resolution + // > Check whether a pointer to an array of From can be converted + // > to a pointer to an array of To. + // This helper is used for checking of 'From -> const From'. + template + struct is_span_convertible { + using type = + typename std::conditional::value + && (Extent == dynamic_extent || N == Extent), + int, void>::type; + }; + + template + struct SpanIterator { + // TODO: upgrade to std::random_access_iterator_tag. + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = typename std::remove_cv::type; + using reference = T&; + using pointer = T*; + + // Convince MSVC compiler that this iterator is trusted (it is verified). + #ifdef _MSC_VER + using _Unchecked_type = pointer; + #endif // _MSC_VER + + SpanIterator(pointer ptr) : ptr_(ptr) {} + reference operator*() const { return *ptr_; } + pointer operator->() { return ptr_; } + SpanIterator& operator++() { ptr_++; return *this; } + SpanIterator operator++(int) { auto tmp = *this; ++(*this); return tmp; } + + friend bool operator== (const SpanIterator& lhs, const SpanIterator& rhs) { return lhs.ptr_ == rhs.ptr_; } + friend bool operator!= (const SpanIterator& lhs, const SpanIterator& rhs) { return lhs.ptr_ != rhs.ptr_; } + + private: + pointer ptr_; + }; +} // namespace internal +#endif // !defined(FLATBUFFERS_SPAN_MINIMAL) + +// T - element type; must be a complete type that is not an abstract +// class type. +// Extent - the number of elements in the sequence, or dynamic. +template +class span FLATBUFFERS_FINAL_CLASS { + public: + typedef T element_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef std::size_t size_type; + + static FLATBUFFERS_CONSTEXPR size_type extent = Extent; + + // Returns the number of elements in the span. + FLATBUFFERS_CONSTEXPR_CPP11 size_type size() const FLATBUFFERS_NOEXCEPT { + return count_; + } + + // Returns the size of the sequence in bytes. + FLATBUFFERS_CONSTEXPR_CPP11 + size_type size_bytes() const FLATBUFFERS_NOEXCEPT { + return size() * sizeof(element_type); + } + + // Checks if the span is empty. + FLATBUFFERS_CONSTEXPR_CPP11 bool empty() const FLATBUFFERS_NOEXCEPT { + return size() == 0; + } + + // Returns a pointer to the beginning of the sequence. + FLATBUFFERS_CONSTEXPR_CPP11 pointer data() const FLATBUFFERS_NOEXCEPT { + return data_; + } + + #if !defined(FLATBUFFERS_SPAN_MINIMAL) + using Iterator = internal::SpanIterator; + + Iterator begin() const { return Iterator(data()); } + Iterator end() const { return Iterator(data() + size()); } + #endif + + // Returns a reference to the idx-th element of the sequence. + // The behavior is undefined if the idx is greater than or equal to size(). + FLATBUFFERS_CONSTEXPR_CPP11 reference operator[](size_type idx) const { + return data()[idx]; + } + + FLATBUFFERS_CONSTEXPR_CPP11 span(const span &other) FLATBUFFERS_NOEXCEPT + : data_(other.data_), count_(other.count_) {} + + FLATBUFFERS_CONSTEXPR_CPP14 span &operator=(const span &other) + FLATBUFFERS_NOEXCEPT { + data_ = other.data_; + count_ = other.count_; + } + + // Limited implementation of + // `template constexpr std::span(It first, size_type count);`. + // + // Constructs a span that is a view over the range [first, first + count); + // the resulting span has: data() == first and size() == count. + // The behavior is undefined if [first, first + count) is not a valid range, + // or if (extent != flatbuffers::dynamic_extent && count != extent). + FLATBUFFERS_CONSTEXPR_CPP11 + explicit span(pointer first, size_type count) FLATBUFFERS_NOEXCEPT + : data_ (Extent == dynamic_extent ? first : (Extent == count ? first : nullptr)), + count_(Extent == dynamic_extent ? count : (Extent == count ? Extent : 0)) { + // Make span empty if the count argument is incompatible with span. + } + + // Exclude this code if MSVC2010 is active. The MSVC2010 isn't C++11 + // compliant, it doesn't support default template arguments for functions. + #if defined(FLATBUFFERS_SPAN_MINIMAL) + FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr), + count_(0) { + static_assert(extent == 0 || extent == dynamic_extent, "invalid span"); + } + + #else + // Constructs an empty span whose data() == nullptr and size() == 0. + // This overload only participates in overload resolution if + // extent == 0 || extent == flatbuffers::dynamic_extent. + // A dummy template argument N is need dependency for SFINAE. + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span() FLATBUFFERS_NOEXCEPT : data_(nullptr), + count_(0) { + static_assert(extent == 0 || extent == dynamic_extent, "invalid span"); + } + + // Constructs a span that is a view over the array arr; the resulting span + // has size() == N and data() == std::data(arr). These overloads only + // participate in overload resolution if + // extent == std::dynamic_extent || N == extent is true and + // std::remove_pointer_t(*)[] + // is convertible to element_type (*)[]. + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(element_type (&arr)[N]) FLATBUFFERS_NOEXCEPT + : data_(arr), count_(N) {} + + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(std::array &arr) FLATBUFFERS_NOEXCEPT + : data_(arr.data()), count_(N) {} + + //template + //FLATBUFFERS_CONSTEXPR_CPP11 span(std::array &arr) FLATBUFFERS_NOEXCEPT + // : data_(arr.data()), count_(N) {} + + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(const std::array &arr) FLATBUFFERS_NOEXCEPT + : data_(arr.data()), count_(N) {} + + // Converting constructor from another span s; + // the resulting span has size() == s.size() and data() == s.data(). + // This overload only participates in overload resolution + // if extent == std::dynamic_extent || N == extent is true and U (*)[] + // is convertible to element_type (*)[]. + template::type = 0> + FLATBUFFERS_CONSTEXPR_CPP11 span(const flatbuffers::span &s) FLATBUFFERS_NOEXCEPT + : span(s.data(), s.size()) { + } + + #endif // !defined(FLATBUFFERS_SPAN_MINIMAL) + + private: + // This is a naive implementation with 'count_' member even if (Extent != dynamic_extent). + pointer const data_; + size_type count_; +}; +#endif // defined(FLATBUFFERS_USE_STD_SPAN) + +#if !defined(FLATBUFFERS_SPAN_MINIMAL) +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(ElementType(&arr)[Extent]) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(const ElementType(&arr)[Extent]) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(std::array &arr) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(const std::array &arr) FLATBUFFERS_NOEXCEPT { + return span(arr); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(ElementType *first, std::size_t count) FLATBUFFERS_NOEXCEPT { + return span(first, count); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 +flatbuffers::span make_span(const ElementType *first, std::size_t count) FLATBUFFERS_NOEXCEPT { + return span(first, count); +} +#endif // !defined(FLATBUFFERS_SPAN_MINIMAL) + +} // namespace flatbuffers + +#endif // FLATBUFFERS_STL_EMULATION_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/string.h b/3rdparty/flatbuffers/include/flatbuffers/string.h new file mode 100644 index 0000000000..97e399fd64 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/string.h @@ -0,0 +1,64 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_STRING_H_ +#define FLATBUFFERS_STRING_H_ + +#include "flatbuffers/base.h" +#include "flatbuffers/vector.h" + +namespace flatbuffers { + +struct String : public Vector { + const char *c_str() const { return reinterpret_cast(Data()); } + std::string str() const { return std::string(c_str(), size()); } + + // clang-format off + #ifdef FLATBUFFERS_HAS_STRING_VIEW + flatbuffers::string_view string_view() const { + return flatbuffers::string_view(c_str(), size()); + } + #endif // FLATBUFFERS_HAS_STRING_VIEW + // clang-format on + + bool operator<(const String &o) const { + return StringLessThan(this->data(), this->size(), o.data(), o.size()); + } +}; + +// Convenience function to get std::string from a String returning an empty +// string on null pointer. +static inline std::string GetString(const String *str) { + return str ? str->str() : ""; +} + +// Convenience function to get char* from a String returning an empty string on +// null pointer. +static inline const char *GetCstring(const String *str) { + return str ? str->c_str() : ""; +} + +#ifdef FLATBUFFERS_HAS_STRING_VIEW +// Convenience function to get string_view from a String returning an empty +// string_view on null pointer. +static inline flatbuffers::string_view GetStringView(const String *str) { + return str ? str->string_view() : flatbuffers::string_view(); +} +#endif // FLATBUFFERS_HAS_STRING_VIEW + +} // namespace flatbuffers + +#endif // FLATBUFFERS_STRING_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/struct.h b/3rdparty/flatbuffers/include/flatbuffers/struct.h new file mode 100644 index 0000000000..abacc8a9a6 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/struct.h @@ -0,0 +1,53 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_STRUCT_H_ +#define FLATBUFFERS_STRUCT_H_ + +#include "flatbuffers/base.h" + +namespace flatbuffers { + +// "structs" are flat structures that do not have an offset table, thus +// always have all members present and do not support forwards/backwards +// compatible extensions. + +class Struct FLATBUFFERS_FINAL_CLASS { + public: + template T GetField(uoffset_t o) const { + return ReadScalar(&data_[o]); + } + + template T GetStruct(uoffset_t o) const { + return reinterpret_cast(&data_[o]); + } + + const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; } + uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; } + + private: + // private constructor & copy constructor: you obtain instances of this + // class by pointing to existing data only + Struct(); + Struct(const Struct &); + Struct &operator=(const Struct &); + + uint8_t data_[1]; +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_STRUCT_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/table.h b/3rdparty/flatbuffers/include/flatbuffers/table.h new file mode 100644 index 0000000000..11b292476b --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/table.h @@ -0,0 +1,168 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_TABLE_H_ +#define FLATBUFFERS_TABLE_H_ + +#include "flatbuffers/base.h" +#include "flatbuffers/verifier.h" + +namespace flatbuffers { + +// "tables" use an offset table (possibly shared) that allows fields to be +// omitted and added at will, but uses an extra indirection to read. +class Table { + public: + const uint8_t *GetVTable() const { + return data_ - ReadScalar(data_); + } + + // This gets the field offset for any of the functions below it, or 0 + // if the field was not present. + voffset_t GetOptionalFieldOffset(voffset_t field) const { + // The vtable offset is always at the start. + auto vtable = GetVTable(); + // The first element is the size of the vtable (fields + type id + itself). + auto vtsize = ReadScalar(vtable); + // If the field we're accessing is outside the vtable, we're reading older + // data, so it's the same as if the offset was 0 (not present). + return field < vtsize ? ReadScalar(vtable + field) : 0; + } + + template T GetField(voffset_t field, T defaultval) const { + auto field_offset = GetOptionalFieldOffset(field); + return field_offset ? ReadScalar(data_ + field_offset) : defaultval; + } + + template P GetPointer(voffset_t field) { + auto field_offset = GetOptionalFieldOffset(field); + auto p = data_ + field_offset; + return field_offset ? reinterpret_cast

(p + ReadScalar(p)) + : nullptr; + } + template P GetPointer(voffset_t field) const { + return const_cast(this)->GetPointer

(field); + } + + template P GetStruct(voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + auto p = const_cast(data_ + field_offset); + return field_offset ? reinterpret_cast

(p) : nullptr; + } + + template + flatbuffers::Optional GetOptional(voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + auto p = data_ + field_offset; + return field_offset ? Optional(static_cast(ReadScalar(p))) + : Optional(); + } + + template bool SetField(voffset_t field, T val, T def) { + auto field_offset = GetOptionalFieldOffset(field); + if (!field_offset) return IsTheSameAs(val, def); + WriteScalar(data_ + field_offset, val); + return true; + } + template bool SetField(voffset_t field, T val) { + auto field_offset = GetOptionalFieldOffset(field); + if (!field_offset) return false; + WriteScalar(data_ + field_offset, val); + return true; + } + + bool SetPointer(voffset_t field, const uint8_t *val) { + auto field_offset = GetOptionalFieldOffset(field); + if (!field_offset) return false; + WriteScalar(data_ + field_offset, + static_cast(val - (data_ + field_offset))); + return true; + } + + uint8_t *GetAddressOf(voffset_t field) { + auto field_offset = GetOptionalFieldOffset(field); + return field_offset ? data_ + field_offset : nullptr; + } + const uint8_t *GetAddressOf(voffset_t field) const { + return const_cast

(this)->GetAddressOf(field); + } + + bool CheckField(voffset_t field) const { + return GetOptionalFieldOffset(field) != 0; + } + + // Verify the vtable of this table. + // Call this once per table, followed by VerifyField once per field. + bool VerifyTableStart(Verifier &verifier) const { + return verifier.VerifyTableStart(data_); + } + + // Verify a particular field. + template + bool VerifyField(const Verifier &verifier, voffset_t field, + size_t align) const { + // Calling GetOptionalFieldOffset should be safe now thanks to + // VerifyTable(). + auto field_offset = GetOptionalFieldOffset(field); + // Check the actual field. + return !field_offset || verifier.VerifyField(data_, field_offset, align); + } + + // VerifyField for required fields. + template + bool VerifyFieldRequired(const Verifier &verifier, voffset_t field, + size_t align) const { + auto field_offset = GetOptionalFieldOffset(field); + return verifier.Check(field_offset != 0) && + verifier.VerifyField(data_, field_offset, align); + } + + // Versions for offsets. + bool VerifyOffset(const Verifier &verifier, voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + return !field_offset || verifier.VerifyOffset(data_, field_offset); + } + + bool VerifyOffsetRequired(const Verifier &verifier, voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + return verifier.Check(field_offset != 0) && + verifier.VerifyOffset(data_, field_offset); + } + + private: + // private constructor & copy constructor: you obtain instances of this + // class by pointing to existing data only + Table(); + Table(const Table &other); + Table &operator=(const Table &); + + uint8_t data_[1]; +}; + +// This specialization allows avoiding warnings like: +// MSVC C4800: type: forcing value to bool 'true' or 'false'. +template<> +inline flatbuffers::Optional Table::GetOptional( + voffset_t field) const { + auto field_offset = GetOptionalFieldOffset(field); + auto p = data_ + field_offset; + return field_offset ? Optional(ReadScalar(p) != 0) + : Optional(); +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_TABLE_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/vector.h b/3rdparty/flatbuffers/include/flatbuffers/vector.h new file mode 100644 index 0000000000..9cb6a2da89 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/vector.h @@ -0,0 +1,393 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_VECTOR_H_ +#define FLATBUFFERS_VECTOR_H_ + +#include "flatbuffers/base.h" +#include "flatbuffers/buffer.h" +#include "flatbuffers/stl_emulation.h" + +namespace flatbuffers { + +struct String; + +// An STL compatible iterator implementation for Vector below, effectively +// calling Get() for every element. +template +struct VectorIterator { + typedef std::random_access_iterator_tag iterator_category; + typedef IT value_type; + typedef ptrdiff_t difference_type; + typedef IT *pointer; + typedef IT &reference; + + VectorIterator(Data data, uoffset_t i) + : data_(data + IndirectHelper::element_stride * i) {} + VectorIterator(const VectorIterator &other) : data_(other.data_) {} + VectorIterator() : data_(nullptr) {} + + VectorIterator &operator=(const VectorIterator &other) { + data_ = other.data_; + return *this; + } + + VectorIterator &operator=(VectorIterator &&other) { + data_ = other.data_; + return *this; + } + + bool operator==(const VectorIterator &other) const { + return data_ == other.data_; + } + + bool operator<(const VectorIterator &other) const { + return data_ < other.data_; + } + + bool operator!=(const VectorIterator &other) const { + return data_ != other.data_; + } + + difference_type operator-(const VectorIterator &other) const { + return (data_ - other.data_) / IndirectHelper::element_stride; + } + + // Note: return type is incompatible with the standard + // `reference operator*()`. + IT operator*() const { return IndirectHelper::Read(data_, 0); } + + // Note: return type is incompatible with the standard + // `pointer operator->()`. + IT operator->() const { return IndirectHelper::Read(data_, 0); } + + VectorIterator &operator++() { + data_ += IndirectHelper::element_stride; + return *this; + } + + VectorIterator operator++(int) { + VectorIterator temp(data_, 0); + data_ += IndirectHelper::element_stride; + return temp; + } + + VectorIterator operator+(const uoffset_t &offset) const { + return VectorIterator(data_ + offset * IndirectHelper::element_stride, + 0); + } + + VectorIterator &operator+=(const uoffset_t &offset) { + data_ += offset * IndirectHelper::element_stride; + return *this; + } + + VectorIterator &operator--() { + data_ -= IndirectHelper::element_stride; + return *this; + } + + VectorIterator operator--(int) { + VectorIterator temp(data_, 0); + data_ -= IndirectHelper::element_stride; + return temp; + } + + VectorIterator operator-(const uoffset_t &offset) const { + return VectorIterator(data_ - offset * IndirectHelper::element_stride, + 0); + } + + VectorIterator &operator-=(const uoffset_t &offset) { + data_ -= offset * IndirectHelper::element_stride; + return *this; + } + + private: + Data data_; +}; + +template +using VectorConstIterator = VectorIterator; + +template +struct VectorReverseIterator : public std::reverse_iterator { + explicit VectorReverseIterator(Iterator iter) + : std::reverse_iterator(iter) {} + + // Note: return type is incompatible with the standard + // `reference operator*()`. + typename Iterator::value_type operator*() const { + auto tmp = std::reverse_iterator::current; + return *--tmp; + } + + // Note: return type is incompatible with the standard + // `pointer operator->()`. + typename Iterator::value_type operator->() const { + auto tmp = std::reverse_iterator::current; + return *--tmp; + } +}; + +// This is used as a helper type for accessing vectors. +// Vector::data() assumes the vector elements start after the length field. +template class Vector { + public: + typedef VectorIterator::mutable_return_type> + iterator; + typedef VectorConstIterator::return_type> + const_iterator; + typedef VectorReverseIterator reverse_iterator; + typedef VectorReverseIterator const_reverse_iterator; + + typedef typename flatbuffers::bool_constant::value> + scalar_tag; + + static FLATBUFFERS_CONSTEXPR bool is_span_observable = + scalar_tag::value && (FLATBUFFERS_LITTLEENDIAN || sizeof(T) == 1); + + uoffset_t size() const { return EndianScalar(length_); } + + // Deprecated: use size(). Here for backwards compatibility. + FLATBUFFERS_ATTRIBUTE([[deprecated("use size() instead")]]) + uoffset_t Length() const { return size(); } + + typedef typename IndirectHelper::return_type return_type; + typedef typename IndirectHelper::mutable_return_type mutable_return_type; + typedef return_type value_type; + + return_type Get(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return IndirectHelper::Read(Data(), i); + } + + return_type operator[](uoffset_t i) const { return Get(i); } + + // If this is a Vector of enums, T will be its storage type, not the enum + // type. This function makes it convenient to retrieve value with enum + // type E. + template E GetEnum(uoffset_t i) const { + return static_cast(Get(i)); + } + + // If this a vector of unions, this does the cast for you. There's no check + // to make sure this is the right type! + template const U *GetAs(uoffset_t i) const { + return reinterpret_cast(Get(i)); + } + + // If this a vector of unions, this does the cast for you. There's no check + // to make sure this is actually a string! + const String *GetAsString(uoffset_t i) const { + return reinterpret_cast(Get(i)); + } + + const void *GetStructFromOffset(size_t o) const { + return reinterpret_cast(Data() + o); + } + + iterator begin() { return iterator(Data(), 0); } + const_iterator begin() const { return const_iterator(Data(), 0); } + + iterator end() { return iterator(Data(), size()); } + const_iterator end() const { return const_iterator(Data(), size()); } + + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(end()); + } + + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { + return const_reverse_iterator(begin()); + } + + const_iterator cbegin() const { return begin(); } + + const_iterator cend() const { return end(); } + + const_reverse_iterator crbegin() const { return rbegin(); } + + const_reverse_iterator crend() const { return rend(); } + + // Change elements if you have a non-const pointer to this object. + // Scalars only. See reflection.h, and the documentation. + void Mutate(uoffset_t i, const T &val) { + FLATBUFFERS_ASSERT(i < size()); + WriteScalar(data() + i, val); + } + + // Change an element of a vector of tables (or strings). + // "val" points to the new table/string, as you can obtain from + // e.g. reflection::AddFlatBuffer(). + void MutateOffset(uoffset_t i, const uint8_t *val) { + FLATBUFFERS_ASSERT(i < size()); + static_assert(sizeof(T) == sizeof(uoffset_t), "Unrelated types"); + WriteScalar(data() + i, + static_cast(val - (Data() + i * sizeof(uoffset_t)))); + } + + // Get a mutable pointer to tables/strings inside this vector. + mutable_return_type GetMutableObject(uoffset_t i) const { + FLATBUFFERS_ASSERT(i < size()); + return const_cast(IndirectHelper::Read(Data(), i)); + } + + // The raw data in little endian format. Use with care. + const uint8_t *Data() const { + return reinterpret_cast(&length_ + 1); + } + + uint8_t *Data() { return reinterpret_cast(&length_ + 1); } + + // Similarly, but typed, much like std::vector::data + const T *data() const { return reinterpret_cast(Data()); } + T *data() { return reinterpret_cast(Data()); } + + template return_type LookupByKey(K key) const { + void *search_result = std::bsearch( + &key, Data(), size(), IndirectHelper::element_stride, KeyCompare); + + if (!search_result) { + return nullptr; // Key not found. + } + + const uint8_t *element = reinterpret_cast(search_result); + + return IndirectHelper::Read(element, 0); + } + + template mutable_return_type MutableLookupByKey(K key) { + return const_cast(LookupByKey(key)); + } + + protected: + // This class is only used to access pre-existing data. Don't ever + // try to construct these manually. + Vector(); + + uoffset_t length_; + + private: + // This class is a pointer. Copying will therefore create an invalid object. + // Private and unimplemented copy constructor. + Vector(const Vector &); + Vector &operator=(const Vector &); + + template static int KeyCompare(const void *ap, const void *bp) { + const K *key = reinterpret_cast(ap); + const uint8_t *data = reinterpret_cast(bp); + auto table = IndirectHelper::Read(data, 0); + + // std::bsearch compares with the operands transposed, so we negate the + // result here. + return -table->KeyCompareWithValue(*key); + } +}; + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Vector &vec) + FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.data(), vec.size()); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( + const Vector &vec) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.data(), vec.size()); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_bytes_span( + Vector &vec) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::scalar_tag::value, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.Data(), vec.size() * sizeof(U)); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_bytes_span( + const Vector &vec) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::scalar_tag::value, + "wrong type U, only LE-scalar, or byte types are allowed"); + return span(vec.Data(), vec.size() * sizeof(U)); +} + +// Convenient helper functions to get a span of any vector, regardless +// of whether it is null or not (the field is not set). +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span(Vector *ptr) + FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return ptr ? make_span(*ptr) : span(); +} + +template +FLATBUFFERS_CONSTEXPR_CPP11 flatbuffers::span make_span( + const Vector *ptr) FLATBUFFERS_NOEXCEPT { + static_assert(Vector::is_span_observable, + "wrong type U, only LE-scalar, or byte types are allowed"); + return ptr ? make_span(*ptr) : span(); +} + +// Represent a vector much like the template above, but in this case we +// don't know what the element types are (used with reflection.h). +class VectorOfAny { + public: + uoffset_t size() const { return EndianScalar(length_); } + + const uint8_t *Data() const { + return reinterpret_cast(&length_ + 1); + } + uint8_t *Data() { return reinterpret_cast(&length_ + 1); } + + protected: + VectorOfAny(); + + uoffset_t length_; + + private: + VectorOfAny(const VectorOfAny &); + VectorOfAny &operator=(const VectorOfAny &); +}; + +template +Vector> *VectorCast(Vector> *ptr) { + static_assert(std::is_base_of::value, "Unrelated types"); + return reinterpret_cast> *>(ptr); +} + +template +const Vector> *VectorCast(const Vector> *ptr) { + static_assert(std::is_base_of::value, "Unrelated types"); + return reinterpret_cast> *>(ptr); +} + +// Convenient helper function to get the length of any vector, regardless +// of whether it is null or not (the field is not set). +template static inline size_t VectorLength(const Vector *v) { + return v ? v->size() : 0; +} + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VERIFIER_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/vector_downward.h b/3rdparty/flatbuffers/include/flatbuffers/vector_downward.h new file mode 100644 index 0000000000..e0aed840b0 --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/vector_downward.h @@ -0,0 +1,273 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_VECTOR_DOWNWARD_H_ +#define FLATBUFFERS_VECTOR_DOWNWARD_H_ + +#include + +#include "flatbuffers/base.h" +#include "flatbuffers/default_allocator.h" +#include "flatbuffers/detached_buffer.h" + +namespace flatbuffers { + +// This is a minimal replication of std::vector functionality, +// except growing from higher to lower addresses. i.e. push_back() inserts data +// in the lowest address in the vector. +// Since this vector leaves the lower part unused, we support a "scratch-pad" +// that can be stored there for temporary data, to share the allocated space. +// Essentially, this supports 2 std::vectors in a single buffer. +class vector_downward { + public: + explicit vector_downward(size_t initial_size, Allocator *allocator, + bool own_allocator, size_t buffer_minalign) + : allocator_(allocator), + own_allocator_(own_allocator), + initial_size_(initial_size), + buffer_minalign_(buffer_minalign), + reserved_(0), + size_(0), + buf_(nullptr), + cur_(nullptr), + scratch_(nullptr) {} + + vector_downward(vector_downward &&other) noexcept + // clang-format on + : allocator_(other.allocator_), + own_allocator_(other.own_allocator_), + initial_size_(other.initial_size_), + buffer_minalign_(other.buffer_minalign_), + reserved_(other.reserved_), + size_(other.size_), + buf_(other.buf_), + cur_(other.cur_), + scratch_(other.scratch_) { + // No change in other.allocator_ + // No change in other.initial_size_ + // No change in other.buffer_minalign_ + other.own_allocator_ = false; + other.reserved_ = 0; + other.buf_ = nullptr; + other.cur_ = nullptr; + other.scratch_ = nullptr; + } + + vector_downward &operator=(vector_downward &&other) noexcept { + // Move construct a temporary and swap idiom + vector_downward temp(std::move(other)); + swap(temp); + return *this; + } + + ~vector_downward() { + clear_buffer(); + clear_allocator(); + } + + void reset() { + clear_buffer(); + clear(); + } + + void clear() { + if (buf_) { + cur_ = buf_ + reserved_; + } else { + reserved_ = 0; + cur_ = nullptr; + } + size_ = 0; + clear_scratch(); + } + + void clear_scratch() { scratch_ = buf_; } + + void clear_allocator() { + if (own_allocator_ && allocator_) { delete allocator_; } + allocator_ = nullptr; + own_allocator_ = false; + } + + void clear_buffer() { + if (buf_) Deallocate(allocator_, buf_, reserved_); + buf_ = nullptr; + } + + // Relinquish the pointer to the caller. + uint8_t *release_raw(size_t &allocated_bytes, size_t &offset) { + auto *buf = buf_; + allocated_bytes = reserved_; + offset = static_cast(cur_ - buf_); + + // release_raw only relinquishes the buffer ownership. + // Does not deallocate or reset the allocator. Destructor will do that. + buf_ = nullptr; + clear(); + return buf; + } + + // Relinquish the pointer to the caller. + DetachedBuffer release() { + // allocator ownership (if any) is transferred to DetachedBuffer. + DetachedBuffer fb(allocator_, own_allocator_, buf_, reserved_, cur_, + size()); + if (own_allocator_) { + allocator_ = nullptr; + own_allocator_ = false; + } + buf_ = nullptr; + clear(); + return fb; + } + + size_t ensure_space(size_t len) { + FLATBUFFERS_ASSERT(cur_ >= scratch_ && scratch_ >= buf_); + if (len > static_cast(cur_ - scratch_)) { reallocate(len); } + // Beyond this, signed offsets may not have enough range: + // (FlatBuffers > 2GB not supported). + FLATBUFFERS_ASSERT(size() < FLATBUFFERS_MAX_BUFFER_SIZE); + return len; + } + + inline uint8_t *make_space(size_t len) { + if (len) { + ensure_space(len); + cur_ -= len; + size_ += static_cast(len); + } + return cur_; + } + + // Returns nullptr if using the DefaultAllocator. + Allocator *get_custom_allocator() { return allocator_; } + + inline uoffset_t size() const { return size_; } + + uoffset_t scratch_size() const { + return static_cast(scratch_ - buf_); + } + + size_t capacity() const { return reserved_; } + + uint8_t *data() const { + FLATBUFFERS_ASSERT(cur_); + return cur_; + } + + uint8_t *scratch_data() const { + FLATBUFFERS_ASSERT(buf_); + return buf_; + } + + uint8_t *scratch_end() const { + FLATBUFFERS_ASSERT(scratch_); + return scratch_; + } + + uint8_t *data_at(size_t offset) const { return buf_ + reserved_ - offset; } + + void push(const uint8_t *bytes, size_t num) { + if (num > 0) { memcpy(make_space(num), bytes, num); } + } + + // Specialized version of push() that avoids memcpy call for small data. + template void push_small(const T &little_endian_t) { + make_space(sizeof(T)); + *reinterpret_cast(cur_) = little_endian_t; + } + + template void scratch_push_small(const T &t) { + ensure_space(sizeof(T)); + *reinterpret_cast(scratch_) = t; + scratch_ += sizeof(T); + } + + // fill() is most frequently called with small byte counts (<= 4), + // which is why we're using loops rather than calling memset. + void fill(size_t zero_pad_bytes) { + make_space(zero_pad_bytes); + for (size_t i = 0; i < zero_pad_bytes; i++) cur_[i] = 0; + } + + // Version for when we know the size is larger. + // Precondition: zero_pad_bytes > 0 + void fill_big(size_t zero_pad_bytes) { + memset(make_space(zero_pad_bytes), 0, zero_pad_bytes); + } + + void pop(size_t bytes_to_remove) { + cur_ += bytes_to_remove; + size_ -= static_cast(bytes_to_remove); + } + + void scratch_pop(size_t bytes_to_remove) { scratch_ -= bytes_to_remove; } + + void swap(vector_downward &other) { + using std::swap; + swap(allocator_, other.allocator_); + swap(own_allocator_, other.own_allocator_); + swap(initial_size_, other.initial_size_); + swap(buffer_minalign_, other.buffer_minalign_); + swap(reserved_, other.reserved_); + swap(size_, other.size_); + swap(buf_, other.buf_); + swap(cur_, other.cur_); + swap(scratch_, other.scratch_); + } + + void swap_allocator(vector_downward &other) { + using std::swap; + swap(allocator_, other.allocator_); + swap(own_allocator_, other.own_allocator_); + } + + private: + // You shouldn't really be copying instances of this class. + FLATBUFFERS_DELETE_FUNC(vector_downward(const vector_downward &)); + FLATBUFFERS_DELETE_FUNC(vector_downward &operator=(const vector_downward &)); + + Allocator *allocator_; + bool own_allocator_; + size_t initial_size_; + size_t buffer_minalign_; + size_t reserved_; + uoffset_t size_; + uint8_t *buf_; + uint8_t *cur_; // Points at location between empty (below) and used (above). + uint8_t *scratch_; // Points to the end of the scratchpad in use. + + void reallocate(size_t len) { + auto old_reserved = reserved_; + auto old_size = size(); + auto old_scratch_size = scratch_size(); + reserved_ += + (std::max)(len, old_reserved ? old_reserved / 2 : initial_size_); + reserved_ = (reserved_ + buffer_minalign_ - 1) & ~(buffer_minalign_ - 1); + if (buf_) { + buf_ = ReallocateDownward(allocator_, buf_, old_reserved, reserved_, + old_size, old_scratch_size); + } else { + buf_ = Allocate(allocator_, reserved_); + } + cur_ = buf_ + reserved_ - old_size; + scratch_ = buf_ + old_scratch_size; + } +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VECTOR_DOWNWARD_H_ diff --git a/3rdparty/flatbuffers/include/flatbuffers/verifier.h b/3rdparty/flatbuffers/include/flatbuffers/verifier.h new file mode 100644 index 0000000000..87d3f54a5d --- /dev/null +++ b/3rdparty/flatbuffers/include/flatbuffers/verifier.h @@ -0,0 +1,317 @@ +/* + * Copyright 2021 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLATBUFFERS_VERIFIER_H_ +#define FLATBUFFERS_VERIFIER_H_ + +#include "flatbuffers/base.h" +#include "flatbuffers/vector.h" + +namespace flatbuffers { + +// Helper class to verify the integrity of a FlatBuffer +class Verifier FLATBUFFERS_FINAL_CLASS { + public: + struct Options { + // The maximum nesting of tables and vectors before we call it invalid. + uoffset_t max_depth = 64; + // The maximum number of tables we will verify before we call it invalid. + uoffset_t max_tables = 1000000; + // If true, verify all data is aligned. + bool check_alignment = true; + // If true, run verifier on nested flatbuffers + bool check_nested_flatbuffers = true; + }; + + explicit Verifier(const uint8_t *const buf, const size_t buf_len, + const Options &opts) + : buf_(buf), size_(buf_len), opts_(opts) { + FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE); + } + + // Deprecated API, please construct with Verifier::Options. + Verifier(const uint8_t *const buf, const size_t buf_len, + const uoffset_t max_depth = 64, const uoffset_t max_tables = 1000000, + const bool check_alignment = true) + : Verifier(buf, buf_len, [&] { + Options opts; + opts.max_depth = max_depth; + opts.max_tables = max_tables; + opts.check_alignment = check_alignment; + return opts; + }()) {} + + // Central location where any verification failures register. + bool Check(const bool ok) const { + // clang-format off + #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE + FLATBUFFERS_ASSERT(ok); + #endif + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + if (!ok) + upper_bound_ = 0; + #endif + // clang-format on + return ok; + } + + // Verify any range within the buffer. + bool Verify(const size_t elem, const size_t elem_len) const { + // clang-format off + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + auto upper_bound = elem + elem_len; + if (upper_bound_ < upper_bound) + upper_bound_ = upper_bound; + #endif + // clang-format on + return Check(elem_len < size_ && elem <= size_ - elem_len); + } + + bool VerifyAlignment(const size_t elem, const size_t align) const { + return Check((elem & (align - 1)) == 0 || !opts_.check_alignment); + } + + // Verify a range indicated by sizeof(T). + template bool Verify(const size_t elem) const { + return VerifyAlignment(elem, sizeof(T)) && Verify(elem, sizeof(T)); + } + + bool VerifyFromPointer(const uint8_t *const p, const size_t len) { + return Verify(static_cast(p - buf_), len); + } + + // Verify relative to a known-good base pointer. + bool VerifyFieldStruct(const uint8_t *const base, const voffset_t elem_off, + const size_t elem_len, const size_t align) const { + const auto f = static_cast(base - buf_) + elem_off; + return VerifyAlignment(f, align) && Verify(f, elem_len); + } + + template + bool VerifyField(const uint8_t *const base, const voffset_t elem_off, + const size_t align) const { + const auto f = static_cast(base - buf_) + elem_off; + return VerifyAlignment(f, align) && Verify(f, sizeof(T)); + } + + // Verify a pointer (may be NULL) of a table type. + template bool VerifyTable(const T *const table) { + return !table || table->Verify(*this); + } + + // Verify a pointer (may be NULL) of any vector type. + template bool VerifyVector(const Vector *const vec) const { + return !vec || VerifyVectorOrString(reinterpret_cast(vec), + sizeof(T)); + } + + // Verify a pointer (may be NULL) of a vector to struct. + template + bool VerifyVector(const Vector *const vec) const { + return VerifyVector(reinterpret_cast *>(vec)); + } + + // Verify a pointer (may be NULL) to string. + bool VerifyString(const String *const str) const { + size_t end; + return !str || (VerifyVectorOrString(reinterpret_cast(str), + 1, &end) && + Verify(end, 1) && // Must have terminator + Check(buf_[end] == '\0')); // Terminating byte must be 0. + } + + // Common code between vectors and strings. + bool VerifyVectorOrString(const uint8_t *const vec, const size_t elem_size, + size_t *const end = nullptr) const { + const auto veco = static_cast(vec - buf_); + // Check we can read the size field. + if (!Verify(veco)) return false; + // Check the whole array. If this is a string, the byte past the array must + // be 0. + const auto size = ReadScalar(vec); + const auto max_elems = FLATBUFFERS_MAX_BUFFER_SIZE / elem_size; + if (!Check(size < max_elems)) + return false; // Protect against byte_size overflowing. + const auto byte_size = sizeof(size) + elem_size * size; + if (end) *end = veco + byte_size; + return Verify(veco, byte_size); + } + + // Special case for string contents, after the above has been called. + bool VerifyVectorOfStrings(const Vector> *const vec) const { + if (vec) { + for (uoffset_t i = 0; i < vec->size(); i++) { + if (!VerifyString(vec->Get(i))) return false; + } + } + return true; + } + + // Special case for table contents, after the above has been called. + template + bool VerifyVectorOfTables(const Vector> *const vec) { + if (vec) { + for (uoffset_t i = 0; i < vec->size(); i++) { + if (!vec->Get(i)->Verify(*this)) return false; + } + } + return true; + } + + __suppress_ubsan__("unsigned-integer-overflow") bool VerifyTableStart( + const uint8_t *const table) { + // Check the vtable offset. + const auto tableo = static_cast(table - buf_); + if (!Verify(tableo)) return false; + // This offset may be signed, but doing the subtraction unsigned always + // gives the result we want. + const auto vtableo = + tableo - static_cast(ReadScalar(table)); + // Check the vtable size field, then check vtable fits in its entirety. + if (!(VerifyComplexity() && Verify(vtableo) && + VerifyAlignment(ReadScalar(buf_ + vtableo), + sizeof(voffset_t)))) + return false; + const auto vsize = ReadScalar(buf_ + vtableo); + return Check((vsize & 1) == 0) && Verify(vtableo, vsize); + } + + template + bool VerifyBufferFromStart(const char *const identifier, const size_t start) { + // Buffers have to be of some size to be valid. The reason it is a runtime + // check instead of static_assert, is that nested flatbuffers go through + // this call and their size is determined at runtime. + if (!Check(size_ >= FLATBUFFERS_MIN_BUFFER_SIZE)) return false; + + // If an identifier is provided, check that we have a buffer + if (identifier && !Check((size_ >= 2 * sizeof(flatbuffers::uoffset_t) && + BufferHasIdentifier(buf_ + start, identifier)))) { + return false; + } + + // Call T::Verify, which must be in the generated code for this type. + const auto o = VerifyOffset(start); + return Check(o != 0) && + reinterpret_cast(buf_ + start + o)->Verify(*this) + // clang-format off + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + && GetComputedSize() + #endif + ; + // clang-format on + } + + template + bool VerifyNestedFlatBuffer(const Vector *const buf, + const char *const identifier) { + // Caller opted out of this. + if (!opts_.check_nested_flatbuffers) return true; + + // An empty buffer is OK as it indicates not present. + if (!buf) return true; + + // If there is a nested buffer, it must be greater than the min size. + if (!Check(buf->size() >= FLATBUFFERS_MIN_BUFFER_SIZE)) return false; + + Verifier nested_verifier(buf->data(), buf->size()); + return nested_verifier.VerifyBuffer(identifier); + } + + // Verify this whole buffer, starting with root type T. + template bool VerifyBuffer() { return VerifyBuffer(nullptr); } + + template bool VerifyBuffer(const char *const identifier) { + return VerifyBufferFromStart(identifier, 0); + } + + template + bool VerifySizePrefixedBuffer(const char *const identifier) { + return Verify(0U) && + Check(ReadScalar(buf_) == size_ - sizeof(uoffset_t)) && + VerifyBufferFromStart(identifier, sizeof(uoffset_t)); + } + + uoffset_t VerifyOffset(const size_t start) const { + if (!Verify(start)) return 0; + const auto o = ReadScalar(buf_ + start); + // May not point to itself. + if (!Check(o != 0)) return 0; + // Can't wrap around / buffers are max 2GB. + if (!Check(static_cast(o) >= 0)) return 0; + // Must be inside the buffer to create a pointer from it (pointer outside + // buffer is UB). + if (!Verify(start + o, 1)) return 0; + return o; + } + + uoffset_t VerifyOffset(const uint8_t *const base, + const voffset_t start) const { + return VerifyOffset(static_cast(base - buf_) + start); + } + + // Called at the start of a table to increase counters measuring data + // structure depth and amount, and possibly bails out with false if limits set + // by the constructor have been hit. Needs to be balanced with EndTable(). + bool VerifyComplexity() { + depth_++; + num_tables_++; + return Check(depth_ <= opts_.max_depth && num_tables_ <= opts_.max_tables); + } + + // Called at the end of a table to pop the depth count. + bool EndTable() { + depth_--; + return true; + } + + // Returns the message size in bytes + size_t GetComputedSize() const { + // clang-format off + #ifdef FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE + uintptr_t size = upper_bound_; + // Align the size to uoffset_t + size = (size - 1 + sizeof(uoffset_t)) & ~(sizeof(uoffset_t) - 1); + return (size > size_) ? 0 : size; + #else + // Must turn on FLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE for this to work. + (void)upper_bound_; + FLATBUFFERS_ASSERT(false); + return 0; + #endif + // clang-format on + } + + std::vector *GetFlexReuseTracker() { return flex_reuse_tracker_; } + + void SetFlexReuseTracker(std::vector *const rt) { + flex_reuse_tracker_ = rt; + } + + private: + const uint8_t *buf_; + const size_t size_; + const Options opts_; + + mutable size_t upper_bound_ = 0; + + uoffset_t depth_ = 0; + uoffset_t num_tables_ = 0; + std::vector *flex_reuse_tracker_ = nullptr; +}; + +} // namespace flatbuffers + +#endif // FLATBUFFERS_VERIFIER_H_ diff --git a/CMakeLists.txt b/CMakeLists.txt index 5543cba93a..6f4a3513f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,7 +471,7 @@ OCV_OPTION(WITH_OBSENSOR "Include obsensor support (Orbbec RGB-D modules: Astra+ OCV_OPTION(WITH_CANN "Include CANN support" OFF VISIBLE_IF TRUE VERIFY HAVE_CANN) -OCV_OPTION(WITH_FLATBUFFERS "Include FlatBuffers support" OFF +OCV_OPTION(WITH_FLATBUFFERS "Include Flatbuffers support (required by DNN/TFLite importer)" ON VISIBLE_IF TRUE VERIFY HAVE_FLATBUFFERS) @@ -753,7 +753,7 @@ include(cmake/OpenCVFindLibsVideo.cmake) include(cmake/OpenCVFindLibsPerf.cmake) include(cmake/OpenCVFindLAPACK.cmake) include(cmake/OpenCVFindProtobuf.cmake) -include(cmake/OpenCVFindFlatBuffers.cmake) +include(cmake/OpenCVDetectFlatbuffers.cmake) if(WITH_TENGINE) include(cmake/OpenCVFindTengine.cmake) endif() diff --git a/cmake/OpenCVDetectFlatbuffers.cmake b/cmake/OpenCVDetectFlatbuffers.cmake new file mode 100644 index 0000000000..aad3b3817f --- /dev/null +++ b/cmake/OpenCVDetectFlatbuffers.cmake @@ -0,0 +1,19 @@ +if(WITH_FLATBUFFERS) + set(HAVE_FLATBUFFERS 1) + set(flatbuffers_VERSION "23.1.21") + ocv_install_3rdparty_licenses(flatbuffers "${OpenCV_SOURCE_DIR}/3rdparty/flatbuffers/LICENSE.txt") + ocv_add_external_target(flatbuffers "${OpenCV_SOURCE_DIR}/3rdparty/flatbuffers/include" "" "HAVE_FLATBUFFERS=1") + set(CUSTOM_STATUS_flatbuffers " Flatbuffers:" "builtin/3rdparty (${flatbuffers_VERSION})") +endif() + +if(WITH_FLATBUFFERS OR HAVE_FLATBUFFERS) + list(APPEND CUSTOM_STATUS flatbuffers) + + if(HAVE_FLATBUFFERS) + if(NOT CUSTOM_STATUS_flatbuffers) + list(APPEND CUSTOM_STATUS_flatbuffers " Flatbuffers:" "${flatbuffers_VERSION}") + endif() + else() + list(APPEND CUSTOM_STATUS_flatbuffers " Flatbuffers:" "NO") + endif() +endif() diff --git a/cmake/OpenCVFindFlatBuffers.cmake b/cmake/OpenCVFindFlatBuffers.cmake deleted file mode 100644 index 2b204314eb..0000000000 --- a/cmake/OpenCVFindFlatBuffers.cmake +++ /dev/null @@ -1,15 +0,0 @@ -set(HAVE_FLATBUFFERS FALSE) - -if(NOT WITH_FLATBUFFERS) - return() -endif() - -list(APPEND CUSTOM_STATUS flatbuffers) - -find_package(flatbuffers QUIET) -if(flatbuffers_FOUND) - set(HAVE_FLATBUFFERS 1) - list(APPEND CUSTOM_STATUS_flatbuffers " FlatBuffers:" "${flatbuffers_VERSION}") -else() - list(APPEND CUSTOM_STATUS_flatbuffers " FlatBuffers:" "NO") -endif() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index e0e991069a..437042958e 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -1632,6 +1632,17 @@ function(ocv_add_external_target name inc link def) endif() endfunction() +function(ocv_install_used_external_targets) + if(NOT BUILD_SHARED_LIBS + AND NOT (CMAKE_VERSION VERSION_LESS "3.13.0") # upgrade CMake: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/2152 + ) + foreach(tgt in ${ARGN}) + if(tgt MATCHES "^ocv\.3rdparty\.") + install(TARGETS ${tgt} EXPORT OpenCVModules) + endif() + endforeach() + endif() +endfunction() # Returns the first non-interface target function(ocv_get_imported_target imported interface) diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index e5aca128be..d285e544c0 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -127,23 +127,27 @@ else() set(fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/caffe" "${CMAKE_CURRENT_LIST_DIR}/misc/tensorflow" "${CMAKE_CURRENT_LIST_DIR}/misc/onnx") endif() +ocv_option(OPENCV_DNN_TFLITE "Build with TFLite support" (TARGET ocv.3rdparty.flatbuffers)) +if(TARGET ocv.3rdparty.flatbuffers AND OPENCV_DNN_TFLITE) + if(NOT HAVE_FLATBUFFERS) + message(FATAL_ERROR "DNN: TFLite is not supported without enabled 'flatbuffers'. Check build configuration.") + endif() + list(APPEND libs ocv.3rdparty.flatbuffers) + list(APPEND fw_hdrs "${CMAKE_CURRENT_LIST_DIR}/misc/tflite/schema_generated.h") + list(APPEND fw_inc "${CMAKE_CURRENT_LIST_DIR}/misc/tflite") + + # Schema is generated by this command: + #add_custom_command( + # OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h" + # COMMAND flatbuffers::flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_LIST_DIR}/src/tflite/schema.fbs") +endif() + list(APPEND include_dirs ${fw_inc}) list(APPEND libs ${Protobuf_LIBRARIES}) if(NOT BUILD_PROTOBUF) list(APPEND include_dirs ${Protobuf_INCLUDE_DIRS}) endif() -if(HAVE_FLATBUFFERS) - list(APPEND libs flatbuffers::flatbuffers) - list(APPEND fw_srcs "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h") - - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema_generated.h" - COMMAND flatbuffers::flatc --cpp -o "${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_LIST_DIR}/src/tflite/schema.fbs") - - ocv_target_compile_definitions(${the_module} PRIVATE "HAVE_FLATBUFFERS=1") -endif() - set(sources_options "") list(APPEND libs ${LAPACK_LIBRARIES}) @@ -232,6 +236,9 @@ if(TARGET ocv.3rdparty.openvino AND OPENCV_DNN_OPENVINO) endif() endif() + +ocv_install_used_external_targets(${libs} ${dnn_runtime_libs}) + ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs} ${webnn_srcs}) ocv_create_module(${libs} ${dnn_runtime_libs}) ocv_add_samples() @@ -292,8 +299,12 @@ if(TARGET ocv.3rdparty.cann AND OPENCV_TEST_DNN_CANN) endif() endif() -if(HAVE_FLATBUFFERS) +ocv_option(OPENCV_TEST_DNN_TFLITE "Build test with TFLite" (OPENCV_DNN_TFLITE)) +if(OPENCV_TEST_DNN_TFLITE) if(TARGET opencv_test_dnn) - ocv_target_compile_definitions(opencv_test_dnn PRIVATE "HAVE_FLATBUFFERS=1") + ocv_target_compile_definitions(opencv_test_dnn PRIVATE "OPENCV_TEST_DNN_TFLITE=1") + endif() + if(TARGET opencv_perf_dnn) + ocv_target_compile_definitions(opencv_perf_dnn PRIVATE "OPENCV_TEST_DNN_TFLITE=1") endif() endif() diff --git a/modules/dnn/misc/tflite/schema_generated.h b/modules/dnn/misc/tflite/schema_generated.h new file mode 100644 index 0000000000..33872813da --- /dev/null +++ b/modules/dnn/misc/tflite/schema_generated.h @@ -0,0 +1,10543 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_SCHEMA_OPENCV_TFLITE_H_ +#define FLATBUFFERS_GENERATED_SCHEMA_OPENCV_TFLITE_H_ + +#include "flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 23 && + FLATBUFFERS_VERSION_MINOR == 1 && + FLATBUFFERS_VERSION_REVISION == 21, + "Non-compatible flatbuffers version included"); + +namespace opencv_tflite { + +struct CustomQuantization; +struct CustomQuantizationBuilder; + +struct QuantizationParameters; +struct QuantizationParametersBuilder; + +struct Int32Vector; +struct Int32VectorBuilder; + +struct Uint16Vector; +struct Uint16VectorBuilder; + +struct Uint8Vector; +struct Uint8VectorBuilder; + +struct DimensionMetadata; +struct DimensionMetadataBuilder; + +struct SparsityParameters; +struct SparsityParametersBuilder; + +struct VariantSubType; +struct VariantSubTypeBuilder; + +struct Tensor; +struct TensorBuilder; + +struct Conv2DOptions; +struct Conv2DOptionsBuilder; + +struct Conv3DOptions; +struct Conv3DOptionsBuilder; + +struct Pool2DOptions; +struct Pool2DOptionsBuilder; + +struct DepthwiseConv2DOptions; +struct DepthwiseConv2DOptionsBuilder; + +struct ConcatEmbeddingsOptions; +struct ConcatEmbeddingsOptionsBuilder; + +struct LSHProjectionOptions; +struct LSHProjectionOptionsBuilder; + +struct SVDFOptions; +struct SVDFOptionsBuilder; + +struct RNNOptions; +struct RNNOptionsBuilder; + +struct SequenceRNNOptions; +struct SequenceRNNOptionsBuilder; + +struct BidirectionalSequenceRNNOptions; +struct BidirectionalSequenceRNNOptionsBuilder; + +struct FullyConnectedOptions; +struct FullyConnectedOptionsBuilder; + +struct SoftmaxOptions; +struct SoftmaxOptionsBuilder; + +struct ConcatenationOptions; +struct ConcatenationOptionsBuilder; + +struct AddOptions; +struct AddOptionsBuilder; + +struct MulOptions; +struct MulOptionsBuilder; + +struct L2NormOptions; +struct L2NormOptionsBuilder; + +struct LocalResponseNormalizationOptions; +struct LocalResponseNormalizationOptionsBuilder; + +struct LSTMOptions; +struct LSTMOptionsBuilder; + +struct UnidirectionalSequenceLSTMOptions; +struct UnidirectionalSequenceLSTMOptionsBuilder; + +struct BidirectionalSequenceLSTMOptions; +struct BidirectionalSequenceLSTMOptionsBuilder; + +struct ResizeBilinearOptions; +struct ResizeBilinearOptionsBuilder; + +struct ResizeNearestNeighborOptions; +struct ResizeNearestNeighborOptionsBuilder; + +struct CallOptions; +struct CallOptionsBuilder; + +struct PadOptions; +struct PadOptionsBuilder; + +struct PadV2Options; +struct PadV2OptionsBuilder; + +struct ReshapeOptions; +struct ReshapeOptionsBuilder; + +struct SpaceToBatchNDOptions; +struct SpaceToBatchNDOptionsBuilder; + +struct BatchToSpaceNDOptions; +struct BatchToSpaceNDOptionsBuilder; + +struct SkipGramOptions; +struct SkipGramOptionsBuilder; + +struct SpaceToDepthOptions; +struct SpaceToDepthOptionsBuilder; + +struct DepthToSpaceOptions; +struct DepthToSpaceOptionsBuilder; + +struct SubOptions; +struct SubOptionsBuilder; + +struct DivOptions; +struct DivOptionsBuilder; + +struct TopKV2Options; +struct TopKV2OptionsBuilder; + +struct EmbeddingLookupSparseOptions; +struct EmbeddingLookupSparseOptionsBuilder; + +struct GatherOptions; +struct GatherOptionsBuilder; + +struct TransposeOptions; +struct TransposeOptionsBuilder; + +struct ExpOptions; +struct ExpOptionsBuilder; + +struct CosOptions; +struct CosOptionsBuilder; + +struct ReducerOptions; +struct ReducerOptionsBuilder; + +struct SqueezeOptions; +struct SqueezeOptionsBuilder; + +struct SplitOptions; +struct SplitOptionsBuilder; + +struct SplitVOptions; +struct SplitVOptionsBuilder; + +struct StridedSliceOptions; +struct StridedSliceOptionsBuilder; + +struct LogSoftmaxOptions; +struct LogSoftmaxOptionsBuilder; + +struct CastOptions; +struct CastOptionsBuilder; + +struct DequantizeOptions; +struct DequantizeOptionsBuilder; + +struct MaximumMinimumOptions; +struct MaximumMinimumOptionsBuilder; + +struct TileOptions; +struct TileOptionsBuilder; + +struct ArgMaxOptions; +struct ArgMaxOptionsBuilder; + +struct ArgMinOptions; +struct ArgMinOptionsBuilder; + +struct GreaterOptions; +struct GreaterOptionsBuilder; + +struct GreaterEqualOptions; +struct GreaterEqualOptionsBuilder; + +struct LessOptions; +struct LessOptionsBuilder; + +struct LessEqualOptions; +struct LessEqualOptionsBuilder; + +struct NegOptions; +struct NegOptionsBuilder; + +struct SelectOptions; +struct SelectOptionsBuilder; + +struct SliceOptions; +struct SliceOptionsBuilder; + +struct TransposeConvOptions; +struct TransposeConvOptionsBuilder; + +struct ExpandDimsOptions; +struct ExpandDimsOptionsBuilder; + +struct SparseToDenseOptions; +struct SparseToDenseOptionsBuilder; + +struct EqualOptions; +struct EqualOptionsBuilder; + +struct NotEqualOptions; +struct NotEqualOptionsBuilder; + +struct ShapeOptions; +struct ShapeOptionsBuilder; + +struct RankOptions; +struct RankOptionsBuilder; + +struct PowOptions; +struct PowOptionsBuilder; + +struct FakeQuantOptions; +struct FakeQuantOptionsBuilder; + +struct PackOptions; +struct PackOptionsBuilder; + +struct LogicalOrOptions; +struct LogicalOrOptionsBuilder; + +struct OneHotOptions; +struct OneHotOptionsBuilder; + +struct AbsOptions; +struct AbsOptionsBuilder; + +struct HardSwishOptions; +struct HardSwishOptionsBuilder; + +struct LogicalAndOptions; +struct LogicalAndOptionsBuilder; + +struct LogicalNotOptions; +struct LogicalNotOptionsBuilder; + +struct UnpackOptions; +struct UnpackOptionsBuilder; + +struct FloorDivOptions; +struct FloorDivOptionsBuilder; + +struct SquareOptions; +struct SquareOptionsBuilder; + +struct ZerosLikeOptions; +struct ZerosLikeOptionsBuilder; + +struct FillOptions; +struct FillOptionsBuilder; + +struct FloorModOptions; +struct FloorModOptionsBuilder; + +struct RangeOptions; +struct RangeOptionsBuilder; + +struct LeakyReluOptions; +struct LeakyReluOptionsBuilder; + +struct SquaredDifferenceOptions; +struct SquaredDifferenceOptionsBuilder; + +struct MirrorPadOptions; +struct MirrorPadOptionsBuilder; + +struct UniqueOptions; +struct UniqueOptionsBuilder; + +struct ReverseV2Options; +struct ReverseV2OptionsBuilder; + +struct AddNOptions; +struct AddNOptionsBuilder; + +struct GatherNdOptions; +struct GatherNdOptionsBuilder; + +struct WhereOptions; +struct WhereOptionsBuilder; + +struct ReverseSequenceOptions; +struct ReverseSequenceOptionsBuilder; + +struct MatrixDiagOptions; +struct MatrixDiagOptionsBuilder; + +struct QuantizeOptions; +struct QuantizeOptionsBuilder; + +struct MatrixSetDiagOptions; +struct MatrixSetDiagOptionsBuilder; + +struct IfOptions; +struct IfOptionsBuilder; + +struct CallOnceOptions; +struct CallOnceOptionsBuilder; + +struct WhileOptions; +struct WhileOptionsBuilder; + +struct NonMaxSuppressionV4Options; +struct NonMaxSuppressionV4OptionsBuilder; + +struct NonMaxSuppressionV5Options; +struct NonMaxSuppressionV5OptionsBuilder; + +struct ScatterNdOptions; +struct ScatterNdOptionsBuilder; + +struct SelectV2Options; +struct SelectV2OptionsBuilder; + +struct DensifyOptions; +struct DensifyOptionsBuilder; + +struct SegmentSumOptions; +struct SegmentSumOptionsBuilder; + +struct BatchMatMulOptions; +struct BatchMatMulOptionsBuilder; + +struct CumsumOptions; +struct CumsumOptionsBuilder; + +struct BroadcastToOptions; +struct BroadcastToOptionsBuilder; + +struct Rfft2dOptions; +struct Rfft2dOptionsBuilder; + +struct HashtableOptions; +struct HashtableOptionsBuilder; + +struct HashtableFindOptions; +struct HashtableFindOptionsBuilder; + +struct HashtableImportOptions; +struct HashtableImportOptionsBuilder; + +struct HashtableSizeOptions; +struct HashtableSizeOptionsBuilder; + +struct VarHandleOptions; +struct VarHandleOptionsBuilder; + +struct ReadVariableOptions; +struct ReadVariableOptionsBuilder; + +struct AssignVariableOptions; +struct AssignVariableOptionsBuilder; + +struct RandomOptions; +struct RandomOptionsBuilder; + +struct BucketizeOptions; +struct BucketizeOptionsBuilder; + +struct GeluOptions; +struct GeluOptionsBuilder; + +struct DynamicUpdateSliceOptions; +struct DynamicUpdateSliceOptionsBuilder; + +struct UnsortedSegmentProdOptions; +struct UnsortedSegmentProdOptionsBuilder; + +struct UnsortedSegmentMaxOptions; +struct UnsortedSegmentMaxOptionsBuilder; + +struct UnsortedSegmentSumOptions; +struct UnsortedSegmentSumOptionsBuilder; + +struct ATan2Options; +struct ATan2OptionsBuilder; + +struct UnsortedSegmentMinOptions; +struct UnsortedSegmentMinOptionsBuilder; + +struct SignOptions; +struct SignOptionsBuilder; + +struct OperatorCode; +struct OperatorCodeBuilder; + +struct Operator; +struct OperatorBuilder; + +struct SubGraph; +struct SubGraphBuilder; + +struct Buffer; +struct BufferBuilder; + +struct Metadata; +struct MetadataBuilder; + +struct TensorMap; +struct TensorMapBuilder; + +struct SignatureDef; +struct SignatureDefBuilder; + +struct Model; +struct ModelBuilder; + +enum TensorType : int8_t { + TensorType_FLOAT32 = 0, + TensorType_FLOAT16 = 1, + TensorType_INT32 = 2, + TensorType_UINT8 = 3, + TensorType_INT64 = 4, + TensorType_STRING = 5, + TensorType_BOOL = 6, + TensorType_INT16 = 7, + TensorType_COMPLEX64 = 8, + TensorType_INT8 = 9, + TensorType_FLOAT64 = 10, + TensorType_COMPLEX128 = 11, + TensorType_UINT64 = 12, + TensorType_RESOURCE = 13, + TensorType_VARIANT = 14, + TensorType_UINT32 = 15, + TensorType_UINT16 = 16, + TensorType_INT4 = 17, + TensorType_MIN = TensorType_FLOAT32, + TensorType_MAX = TensorType_INT4 +}; + +inline const TensorType (&EnumValuesTensorType())[18] { + static const TensorType values[] = { + TensorType_FLOAT32, + TensorType_FLOAT16, + TensorType_INT32, + TensorType_UINT8, + TensorType_INT64, + TensorType_STRING, + TensorType_BOOL, + TensorType_INT16, + TensorType_COMPLEX64, + TensorType_INT8, + TensorType_FLOAT64, + TensorType_COMPLEX128, + TensorType_UINT64, + TensorType_RESOURCE, + TensorType_VARIANT, + TensorType_UINT32, + TensorType_UINT16, + TensorType_INT4 + }; + return values; +} + +inline const char * const *EnumNamesTensorType() { + static const char * const names[19] = { + "FLOAT32", + "FLOAT16", + "INT32", + "UINT8", + "INT64", + "STRING", + "BOOL", + "INT16", + "COMPLEX64", + "INT8", + "FLOAT64", + "COMPLEX128", + "UINT64", + "RESOURCE", + "VARIANT", + "UINT32", + "UINT16", + "INT4", + nullptr + }; + return names; +} + +inline const char *EnumNameTensorType(TensorType e) { + if (::flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_INT4)) return ""; + const size_t index = static_cast(e); + return EnumNamesTensorType()[index]; +} + +enum QuantizationDetails : uint8_t { + QuantizationDetails_NONE = 0, + QuantizationDetails_CustomQuantization = 1, + QuantizationDetails_MIN = QuantizationDetails_NONE, + QuantizationDetails_MAX = QuantizationDetails_CustomQuantization +}; + +inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] { + static const QuantizationDetails values[] = { + QuantizationDetails_NONE, + QuantizationDetails_CustomQuantization + }; + return values; +} + +inline const char * const *EnumNamesQuantizationDetails() { + static const char * const names[3] = { + "NONE", + "CustomQuantization", + nullptr + }; + return names; +} + +inline const char *EnumNameQuantizationDetails(QuantizationDetails e) { + if (::flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization)) return ""; + const size_t index = static_cast(e); + return EnumNamesQuantizationDetails()[index]; +} + +template struct QuantizationDetailsTraits { + static const QuantizationDetails enum_value = QuantizationDetails_NONE; +}; + +template<> struct QuantizationDetailsTraits { + static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; +}; + +bool VerifyQuantizationDetails(::flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type); +bool VerifyQuantizationDetailsVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types); + +enum DimensionType : int8_t { + DimensionType_DENSE = 0, + DimensionType_SPARSE_CSR = 1, + DimensionType_MIN = DimensionType_DENSE, + DimensionType_MAX = DimensionType_SPARSE_CSR +}; + +inline const DimensionType (&EnumValuesDimensionType())[2] { + static const DimensionType values[] = { + DimensionType_DENSE, + DimensionType_SPARSE_CSR + }; + return values; +} + +inline const char * const *EnumNamesDimensionType() { + static const char * const names[3] = { + "DENSE", + "SPARSE_CSR", + nullptr + }; + return names; +} + +inline const char *EnumNameDimensionType(DimensionType e) { + if (::flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR)) return ""; + const size_t index = static_cast(e); + return EnumNamesDimensionType()[index]; +} + +enum SparseIndexVector : uint8_t { + SparseIndexVector_NONE = 0, + SparseIndexVector_Int32Vector = 1, + SparseIndexVector_Uint16Vector = 2, + SparseIndexVector_Uint8Vector = 3, + SparseIndexVector_MIN = SparseIndexVector_NONE, + SparseIndexVector_MAX = SparseIndexVector_Uint8Vector +}; + +inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4] { + static const SparseIndexVector values[] = { + SparseIndexVector_NONE, + SparseIndexVector_Int32Vector, + SparseIndexVector_Uint16Vector, + SparseIndexVector_Uint8Vector + }; + return values; +} + +inline const char * const *EnumNamesSparseIndexVector() { + static const char * const names[5] = { + "NONE", + "Int32Vector", + "Uint16Vector", + "Uint8Vector", + nullptr + }; + return names; +} + +inline const char *EnumNameSparseIndexVector(SparseIndexVector e) { + if (::flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector)) return ""; + const size_t index = static_cast(e); + return EnumNamesSparseIndexVector()[index]; +} + +template struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_NONE; +}; + +template<> struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector; +}; + +template<> struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector; +}; + +template<> struct SparseIndexVectorTraits { + static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector; +}; + +bool VerifySparseIndexVector(::flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type); +bool VerifySparseIndexVectorVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types); + +enum BuiltinOperator : int32_t { + BuiltinOperator_ADD = 0, + BuiltinOperator_AVERAGE_POOL_2D = 1, + BuiltinOperator_CONCATENATION = 2, + BuiltinOperator_CONV_2D = 3, + BuiltinOperator_DEPTHWISE_CONV_2D = 4, + BuiltinOperator_DEPTH_TO_SPACE = 5, + BuiltinOperator_DEQUANTIZE = 6, + BuiltinOperator_EMBEDDING_LOOKUP = 7, + BuiltinOperator_FLOOR = 8, + BuiltinOperator_FULLY_CONNECTED = 9, + BuiltinOperator_HASHTABLE_LOOKUP = 10, + BuiltinOperator_L2_NORMALIZATION = 11, + BuiltinOperator_L2_POOL_2D = 12, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION = 13, + BuiltinOperator_LOGISTIC = 14, + BuiltinOperator_LSH_PROJECTION = 15, + BuiltinOperator_LSTM = 16, + BuiltinOperator_MAX_POOL_2D = 17, + BuiltinOperator_MUL = 18, + BuiltinOperator_RELU = 19, + BuiltinOperator_RELU_N1_TO_1 = 20, + BuiltinOperator_RELU6 = 21, + BuiltinOperator_RESHAPE = 22, + BuiltinOperator_RESIZE_BILINEAR = 23, + BuiltinOperator_RNN = 24, + BuiltinOperator_SOFTMAX = 25, + BuiltinOperator_SPACE_TO_DEPTH = 26, + BuiltinOperator_SVDF = 27, + BuiltinOperator_TANH = 28, + BuiltinOperator_CONCAT_EMBEDDINGS = 29, + BuiltinOperator_SKIP_GRAM = 30, + BuiltinOperator_CALL = 31, + BuiltinOperator_CUSTOM = 32, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE = 33, + BuiltinOperator_PAD = 34, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN = 35, + BuiltinOperator_GATHER = 36, + BuiltinOperator_BATCH_TO_SPACE_ND = 37, + BuiltinOperator_SPACE_TO_BATCH_ND = 38, + BuiltinOperator_TRANSPOSE = 39, + BuiltinOperator_MEAN = 40, + BuiltinOperator_SUB = 41, + BuiltinOperator_DIV = 42, + BuiltinOperator_SQUEEZE = 43, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + BuiltinOperator_STRIDED_SLICE = 45, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, + BuiltinOperator_EXP = 47, + BuiltinOperator_TOPK_V2 = 48, + BuiltinOperator_SPLIT = 49, + BuiltinOperator_LOG_SOFTMAX = 50, + BuiltinOperator_DELEGATE = 51, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, + BuiltinOperator_CAST = 53, + BuiltinOperator_PRELU = 54, + BuiltinOperator_MAXIMUM = 55, + BuiltinOperator_ARG_MAX = 56, + BuiltinOperator_MINIMUM = 57, + BuiltinOperator_LESS = 58, + BuiltinOperator_NEG = 59, + BuiltinOperator_PADV2 = 60, + BuiltinOperator_GREATER = 61, + BuiltinOperator_GREATER_EQUAL = 62, + BuiltinOperator_LESS_EQUAL = 63, + BuiltinOperator_SELECT = 64, + BuiltinOperator_SLICE = 65, + BuiltinOperator_SIN = 66, + BuiltinOperator_TRANSPOSE_CONV = 67, + BuiltinOperator_SPARSE_TO_DENSE = 68, + BuiltinOperator_TILE = 69, + BuiltinOperator_EXPAND_DIMS = 70, + BuiltinOperator_EQUAL = 71, + BuiltinOperator_NOT_EQUAL = 72, + BuiltinOperator_LOG = 73, + BuiltinOperator_SUM = 74, + BuiltinOperator_SQRT = 75, + BuiltinOperator_RSQRT = 76, + BuiltinOperator_SHAPE = 77, + BuiltinOperator_POW = 78, + BuiltinOperator_ARG_MIN = 79, + BuiltinOperator_FAKE_QUANT = 80, + BuiltinOperator_REDUCE_PROD = 81, + BuiltinOperator_REDUCE_MAX = 82, + BuiltinOperator_PACK = 83, + BuiltinOperator_LOGICAL_OR = 84, + BuiltinOperator_ONE_HOT = 85, + BuiltinOperator_LOGICAL_AND = 86, + BuiltinOperator_LOGICAL_NOT = 87, + BuiltinOperator_UNPACK = 88, + BuiltinOperator_REDUCE_MIN = 89, + BuiltinOperator_FLOOR_DIV = 90, + BuiltinOperator_REDUCE_ANY = 91, + BuiltinOperator_SQUARE = 92, + BuiltinOperator_ZEROS_LIKE = 93, + BuiltinOperator_FILL = 94, + BuiltinOperator_FLOOR_MOD = 95, + BuiltinOperator_RANGE = 96, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR = 97, + BuiltinOperator_LEAKY_RELU = 98, + BuiltinOperator_SQUARED_DIFFERENCE = 99, + BuiltinOperator_MIRROR_PAD = 100, + BuiltinOperator_ABS = 101, + BuiltinOperator_SPLIT_V = 102, + BuiltinOperator_UNIQUE = 103, + BuiltinOperator_CEIL = 104, + BuiltinOperator_REVERSE_V2 = 105, + BuiltinOperator_ADD_N = 106, + BuiltinOperator_GATHER_ND = 107, + BuiltinOperator_COS = 108, + BuiltinOperator_WHERE = 109, + BuiltinOperator_RANK = 110, + BuiltinOperator_ELU = 111, + BuiltinOperator_REVERSE_SEQUENCE = 112, + BuiltinOperator_MATRIX_DIAG = 113, + BuiltinOperator_QUANTIZE = 114, + BuiltinOperator_MATRIX_SET_DIAG = 115, + BuiltinOperator_ROUND = 116, + BuiltinOperator_HARD_SWISH = 117, + BuiltinOperator_IF = 118, + BuiltinOperator_WHILE = 119, + BuiltinOperator_NON_MAX_SUPPRESSION_V4 = 120, + BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121, + BuiltinOperator_SCATTER_ND = 122, + BuiltinOperator_SELECT_V2 = 123, + BuiltinOperator_DENSIFY = 124, + BuiltinOperator_SEGMENT_SUM = 125, + BuiltinOperator_BATCH_MATMUL = 126, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + BuiltinOperator_CUMSUM = 128, + BuiltinOperator_CALL_ONCE = 129, + BuiltinOperator_BROADCAST_TO = 130, + BuiltinOperator_RFFT2D = 131, + BuiltinOperator_CONV_3D = 132, + BuiltinOperator_IMAG = 133, + BuiltinOperator_REAL = 134, + BuiltinOperator_COMPLEX_ABS = 135, + BuiltinOperator_HASHTABLE = 136, + BuiltinOperator_HASHTABLE_FIND = 137, + BuiltinOperator_HASHTABLE_IMPORT = 138, + BuiltinOperator_HASHTABLE_SIZE = 139, + BuiltinOperator_REDUCE_ALL = 140, + BuiltinOperator_CONV_3D_TRANSPOSE = 141, + BuiltinOperator_VAR_HANDLE = 142, + BuiltinOperator_READ_VARIABLE = 143, + BuiltinOperator_ASSIGN_VARIABLE = 144, + BuiltinOperator_BROADCAST_ARGS = 145, + BuiltinOperator_RANDOM_STANDARD_NORMAL = 146, + BuiltinOperator_BUCKETIZE = 147, + BuiltinOperator_RANDOM_UNIFORM = 148, + BuiltinOperator_MULTINOMIAL = 149, + BuiltinOperator_GELU = 150, + BuiltinOperator_DYNAMIC_UPDATE_SLICE = 151, + BuiltinOperator_RELU_0_TO_1 = 152, + BuiltinOperator_UNSORTED_SEGMENT_PROD = 153, + BuiltinOperator_UNSORTED_SEGMENT_MAX = 154, + BuiltinOperator_UNSORTED_SEGMENT_SUM = 155, + BuiltinOperator_ATAN2 = 156, + BuiltinOperator_UNSORTED_SEGMENT_MIN = 157, + BuiltinOperator_SIGN = 158, + BuiltinOperator_MIN = BuiltinOperator_ADD, + BuiltinOperator_MAX = BuiltinOperator_SIGN +}; + +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[159] { + static const BuiltinOperator values[] = { + BuiltinOperator_ADD, + BuiltinOperator_AVERAGE_POOL_2D, + BuiltinOperator_CONCATENATION, + BuiltinOperator_CONV_2D, + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_DEPTH_TO_SPACE, + BuiltinOperator_DEQUANTIZE, + BuiltinOperator_EMBEDDING_LOOKUP, + BuiltinOperator_FLOOR, + BuiltinOperator_FULLY_CONNECTED, + BuiltinOperator_HASHTABLE_LOOKUP, + BuiltinOperator_L2_NORMALIZATION, + BuiltinOperator_L2_POOL_2D, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOperator_LOGISTIC, + BuiltinOperator_LSH_PROJECTION, + BuiltinOperator_LSTM, + BuiltinOperator_MAX_POOL_2D, + BuiltinOperator_MUL, + BuiltinOperator_RELU, + BuiltinOperator_RELU_N1_TO_1, + BuiltinOperator_RELU6, + BuiltinOperator_RESHAPE, + BuiltinOperator_RESIZE_BILINEAR, + BuiltinOperator_RNN, + BuiltinOperator_SOFTMAX, + BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOperator_SVDF, + BuiltinOperator_TANH, + BuiltinOperator_CONCAT_EMBEDDINGS, + BuiltinOperator_SKIP_GRAM, + BuiltinOperator_CALL, + BuiltinOperator_CUSTOM, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + BuiltinOperator_PAD, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_GATHER, + BuiltinOperator_BATCH_TO_SPACE_ND, + BuiltinOperator_SPACE_TO_BATCH_ND, + BuiltinOperator_TRANSPOSE, + BuiltinOperator_MEAN, + BuiltinOperator_SUB, + BuiltinOperator_DIV, + BuiltinOperator_SQUEEZE, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_STRIDED_SLICE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_EXP, + BuiltinOperator_TOPK_V2, + BuiltinOperator_SPLIT, + BuiltinOperator_LOG_SOFTMAX, + BuiltinOperator_DELEGATE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_CAST, + BuiltinOperator_PRELU, + BuiltinOperator_MAXIMUM, + BuiltinOperator_ARG_MAX, + BuiltinOperator_MINIMUM, + BuiltinOperator_LESS, + BuiltinOperator_NEG, + BuiltinOperator_PADV2, + BuiltinOperator_GREATER, + BuiltinOperator_GREATER_EQUAL, + BuiltinOperator_LESS_EQUAL, + BuiltinOperator_SELECT, + BuiltinOperator_SLICE, + BuiltinOperator_SIN, + BuiltinOperator_TRANSPOSE_CONV, + BuiltinOperator_SPARSE_TO_DENSE, + BuiltinOperator_TILE, + BuiltinOperator_EXPAND_DIMS, + BuiltinOperator_EQUAL, + BuiltinOperator_NOT_EQUAL, + BuiltinOperator_LOG, + BuiltinOperator_SUM, + BuiltinOperator_SQRT, + BuiltinOperator_RSQRT, + BuiltinOperator_SHAPE, + BuiltinOperator_POW, + BuiltinOperator_ARG_MIN, + BuiltinOperator_FAKE_QUANT, + BuiltinOperator_REDUCE_PROD, + BuiltinOperator_REDUCE_MAX, + BuiltinOperator_PACK, + BuiltinOperator_LOGICAL_OR, + BuiltinOperator_ONE_HOT, + BuiltinOperator_LOGICAL_AND, + BuiltinOperator_LOGICAL_NOT, + BuiltinOperator_UNPACK, + BuiltinOperator_REDUCE_MIN, + BuiltinOperator_FLOOR_DIV, + BuiltinOperator_REDUCE_ANY, + BuiltinOperator_SQUARE, + BuiltinOperator_ZEROS_LIKE, + BuiltinOperator_FILL, + BuiltinOperator_FLOOR_MOD, + BuiltinOperator_RANGE, + BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + BuiltinOperator_LEAKY_RELU, + BuiltinOperator_SQUARED_DIFFERENCE, + BuiltinOperator_MIRROR_PAD, + BuiltinOperator_ABS, + BuiltinOperator_SPLIT_V, + BuiltinOperator_UNIQUE, + BuiltinOperator_CEIL, + BuiltinOperator_REVERSE_V2, + BuiltinOperator_ADD_N, + BuiltinOperator_GATHER_ND, + BuiltinOperator_COS, + BuiltinOperator_WHERE, + BuiltinOperator_RANK, + BuiltinOperator_ELU, + BuiltinOperator_REVERSE_SEQUENCE, + BuiltinOperator_MATRIX_DIAG, + BuiltinOperator_QUANTIZE, + BuiltinOperator_MATRIX_SET_DIAG, + BuiltinOperator_ROUND, + BuiltinOperator_HARD_SWISH, + BuiltinOperator_IF, + BuiltinOperator_WHILE, + BuiltinOperator_NON_MAX_SUPPRESSION_V4, + BuiltinOperator_NON_MAX_SUPPRESSION_V5, + BuiltinOperator_SCATTER_ND, + BuiltinOperator_SELECT_V2, + BuiltinOperator_DENSIFY, + BuiltinOperator_SEGMENT_SUM, + BuiltinOperator_BATCH_MATMUL, + BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES, + BuiltinOperator_CUMSUM, + BuiltinOperator_CALL_ONCE, + BuiltinOperator_BROADCAST_TO, + BuiltinOperator_RFFT2D, + BuiltinOperator_CONV_3D, + BuiltinOperator_IMAG, + BuiltinOperator_REAL, + BuiltinOperator_COMPLEX_ABS, + BuiltinOperator_HASHTABLE, + BuiltinOperator_HASHTABLE_FIND, + BuiltinOperator_HASHTABLE_IMPORT, + BuiltinOperator_HASHTABLE_SIZE, + BuiltinOperator_REDUCE_ALL, + BuiltinOperator_CONV_3D_TRANSPOSE, + BuiltinOperator_VAR_HANDLE, + BuiltinOperator_READ_VARIABLE, + BuiltinOperator_ASSIGN_VARIABLE, + BuiltinOperator_BROADCAST_ARGS, + BuiltinOperator_RANDOM_STANDARD_NORMAL, + BuiltinOperator_BUCKETIZE, + BuiltinOperator_RANDOM_UNIFORM, + BuiltinOperator_MULTINOMIAL, + BuiltinOperator_GELU, + BuiltinOperator_DYNAMIC_UPDATE_SLICE, + BuiltinOperator_RELU_0_TO_1, + BuiltinOperator_UNSORTED_SEGMENT_PROD, + BuiltinOperator_UNSORTED_SEGMENT_MAX, + BuiltinOperator_UNSORTED_SEGMENT_SUM, + BuiltinOperator_ATAN2, + BuiltinOperator_UNSORTED_SEGMENT_MIN, + BuiltinOperator_SIGN + }; + return values; +} + +inline const char * const *EnumNamesBuiltinOperator() { + static const char * const names[160] = { + "ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "DEPTH_TO_SPACE", + "DEQUANTIZE", + "EMBEDDING_LOOKUP", + "FLOOR", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + "PAD", + "UNIDIRECTIONAL_SEQUENCE_RNN", + "GATHER", + "BATCH_TO_SPACE_ND", + "SPACE_TO_BATCH_ND", + "TRANSPOSE", + "MEAN", + "SUB", + "DIV", + "SQUEEZE", + "UNIDIRECTIONAL_SEQUENCE_LSTM", + "STRIDED_SLICE", + "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", + "TOPK_V2", + "SPLIT", + "LOG_SOFTMAX", + "DELEGATE", + "BIDIRECTIONAL_SEQUENCE_LSTM", + "CAST", + "PRELU", + "MAXIMUM", + "ARG_MAX", + "MINIMUM", + "LESS", + "NEG", + "PADV2", + "GREATER", + "GREATER_EQUAL", + "LESS_EQUAL", + "SELECT", + "SLICE", + "SIN", + "TRANSPOSE_CONV", + "SPARSE_TO_DENSE", + "TILE", + "EXPAND_DIMS", + "EQUAL", + "NOT_EQUAL", + "LOG", + "SUM", + "SQRT", + "RSQRT", + "SHAPE", + "POW", + "ARG_MIN", + "FAKE_QUANT", + "REDUCE_PROD", + "REDUCE_MAX", + "PACK", + "LOGICAL_OR", + "ONE_HOT", + "LOGICAL_AND", + "LOGICAL_NOT", + "UNPACK", + "REDUCE_MIN", + "FLOOR_DIV", + "REDUCE_ANY", + "SQUARE", + "ZEROS_LIKE", + "FILL", + "FLOOR_MOD", + "RANGE", + "RESIZE_NEAREST_NEIGHBOR", + "LEAKY_RELU", + "SQUARED_DIFFERENCE", + "MIRROR_PAD", + "ABS", + "SPLIT_V", + "UNIQUE", + "CEIL", + "REVERSE_V2", + "ADD_N", + "GATHER_ND", + "COS", + "WHERE", + "RANK", + "ELU", + "REVERSE_SEQUENCE", + "MATRIX_DIAG", + "QUANTIZE", + "MATRIX_SET_DIAG", + "ROUND", + "HARD_SWISH", + "IF", + "WHILE", + "NON_MAX_SUPPRESSION_V4", + "NON_MAX_SUPPRESSION_V5", + "SCATTER_ND", + "SELECT_V2", + "DENSIFY", + "SEGMENT_SUM", + "BATCH_MATMUL", + "PLACEHOLDER_FOR_GREATER_OP_CODES", + "CUMSUM", + "CALL_ONCE", + "BROADCAST_TO", + "RFFT2D", + "CONV_3D", + "IMAG", + "REAL", + "COMPLEX_ABS", + "HASHTABLE", + "HASHTABLE_FIND", + "HASHTABLE_IMPORT", + "HASHTABLE_SIZE", + "REDUCE_ALL", + "CONV_3D_TRANSPOSE", + "VAR_HANDLE", + "READ_VARIABLE", + "ASSIGN_VARIABLE", + "BROADCAST_ARGS", + "RANDOM_STANDARD_NORMAL", + "BUCKETIZE", + "RANDOM_UNIFORM", + "MULTINOMIAL", + "GELU", + "DYNAMIC_UPDATE_SLICE", + "RELU_0_TO_1", + "UNSORTED_SEGMENT_PROD", + "UNSORTED_SEGMENT_MAX", + "UNSORTED_SEGMENT_SUM", + "ATAN2", + "UNSORTED_SEGMENT_MIN", + "SIGN", + nullptr + }; + return names; +} + +inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { + if (::flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_SIGN)) return ""; + const size_t index = static_cast(e); + return EnumNamesBuiltinOperator()[index]; +} + +enum BuiltinOptions : uint8_t { + BuiltinOptions_NONE = 0, + BuiltinOptions_Conv2DOptions = 1, + BuiltinOptions_DepthwiseConv2DOptions = 2, + BuiltinOptions_ConcatEmbeddingsOptions = 3, + BuiltinOptions_LSHProjectionOptions = 4, + BuiltinOptions_Pool2DOptions = 5, + BuiltinOptions_SVDFOptions = 6, + BuiltinOptions_RNNOptions = 7, + BuiltinOptions_FullyConnectedOptions = 8, + BuiltinOptions_SoftmaxOptions = 9, + BuiltinOptions_ConcatenationOptions = 10, + BuiltinOptions_AddOptions = 11, + BuiltinOptions_L2NormOptions = 12, + BuiltinOptions_LocalResponseNormalizationOptions = 13, + BuiltinOptions_LSTMOptions = 14, + BuiltinOptions_ResizeBilinearOptions = 15, + BuiltinOptions_CallOptions = 16, + BuiltinOptions_ReshapeOptions = 17, + BuiltinOptions_SkipGramOptions = 18, + BuiltinOptions_SpaceToDepthOptions = 19, + BuiltinOptions_EmbeddingLookupSparseOptions = 20, + BuiltinOptions_MulOptions = 21, + BuiltinOptions_PadOptions = 22, + BuiltinOptions_GatherOptions = 23, + BuiltinOptions_BatchToSpaceNDOptions = 24, + BuiltinOptions_SpaceToBatchNDOptions = 25, + BuiltinOptions_TransposeOptions = 26, + BuiltinOptions_ReducerOptions = 27, + BuiltinOptions_SubOptions = 28, + BuiltinOptions_DivOptions = 29, + BuiltinOptions_SqueezeOptions = 30, + BuiltinOptions_SequenceRNNOptions = 31, + BuiltinOptions_StridedSliceOptions = 32, + BuiltinOptions_ExpOptions = 33, + BuiltinOptions_TopKV2Options = 34, + BuiltinOptions_SplitOptions = 35, + BuiltinOptions_LogSoftmaxOptions = 36, + BuiltinOptions_CastOptions = 37, + BuiltinOptions_DequantizeOptions = 38, + BuiltinOptions_MaximumMinimumOptions = 39, + BuiltinOptions_ArgMaxOptions = 40, + BuiltinOptions_LessOptions = 41, + BuiltinOptions_NegOptions = 42, + BuiltinOptions_PadV2Options = 43, + BuiltinOptions_GreaterOptions = 44, + BuiltinOptions_GreaterEqualOptions = 45, + BuiltinOptions_LessEqualOptions = 46, + BuiltinOptions_SelectOptions = 47, + BuiltinOptions_SliceOptions = 48, + BuiltinOptions_TransposeConvOptions = 49, + BuiltinOptions_SparseToDenseOptions = 50, + BuiltinOptions_TileOptions = 51, + BuiltinOptions_ExpandDimsOptions = 52, + BuiltinOptions_EqualOptions = 53, + BuiltinOptions_NotEqualOptions = 54, + BuiltinOptions_ShapeOptions = 55, + BuiltinOptions_PowOptions = 56, + BuiltinOptions_ArgMinOptions = 57, + BuiltinOptions_FakeQuantOptions = 58, + BuiltinOptions_PackOptions = 59, + BuiltinOptions_LogicalOrOptions = 60, + BuiltinOptions_OneHotOptions = 61, + BuiltinOptions_LogicalAndOptions = 62, + BuiltinOptions_LogicalNotOptions = 63, + BuiltinOptions_UnpackOptions = 64, + BuiltinOptions_FloorDivOptions = 65, + BuiltinOptions_SquareOptions = 66, + BuiltinOptions_ZerosLikeOptions = 67, + BuiltinOptions_FillOptions = 68, + BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, + BuiltinOptions_BidirectionalSequenceRNNOptions = 70, + BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, + BuiltinOptions_FloorModOptions = 72, + BuiltinOptions_RangeOptions = 73, + BuiltinOptions_ResizeNearestNeighborOptions = 74, + BuiltinOptions_LeakyReluOptions = 75, + BuiltinOptions_SquaredDifferenceOptions = 76, + BuiltinOptions_MirrorPadOptions = 77, + BuiltinOptions_AbsOptions = 78, + BuiltinOptions_SplitVOptions = 79, + BuiltinOptions_UniqueOptions = 80, + BuiltinOptions_ReverseV2Options = 81, + BuiltinOptions_AddNOptions = 82, + BuiltinOptions_GatherNdOptions = 83, + BuiltinOptions_CosOptions = 84, + BuiltinOptions_WhereOptions = 85, + BuiltinOptions_RankOptions = 86, + BuiltinOptions_ReverseSequenceOptions = 87, + BuiltinOptions_MatrixDiagOptions = 88, + BuiltinOptions_QuantizeOptions = 89, + BuiltinOptions_MatrixSetDiagOptions = 90, + BuiltinOptions_HardSwishOptions = 91, + BuiltinOptions_IfOptions = 92, + BuiltinOptions_WhileOptions = 93, + BuiltinOptions_DepthToSpaceOptions = 94, + BuiltinOptions_NonMaxSuppressionV4Options = 95, + BuiltinOptions_NonMaxSuppressionV5Options = 96, + BuiltinOptions_ScatterNdOptions = 97, + BuiltinOptions_SelectV2Options = 98, + BuiltinOptions_DensifyOptions = 99, + BuiltinOptions_SegmentSumOptions = 100, + BuiltinOptions_BatchMatMulOptions = 101, + BuiltinOptions_CumsumOptions = 102, + BuiltinOptions_CallOnceOptions = 103, + BuiltinOptions_BroadcastToOptions = 104, + BuiltinOptions_Rfft2dOptions = 105, + BuiltinOptions_Conv3DOptions = 106, + BuiltinOptions_HashtableOptions = 107, + BuiltinOptions_HashtableFindOptions = 108, + BuiltinOptions_HashtableImportOptions = 109, + BuiltinOptions_HashtableSizeOptions = 110, + BuiltinOptions_VarHandleOptions = 111, + BuiltinOptions_ReadVariableOptions = 112, + BuiltinOptions_AssignVariableOptions = 113, + BuiltinOptions_RandomOptions = 114, + BuiltinOptions_BucketizeOptions = 115, + BuiltinOptions_GeluOptions = 116, + BuiltinOptions_DynamicUpdateSliceOptions = 117, + BuiltinOptions_UnsortedSegmentProdOptions = 118, + BuiltinOptions_UnsortedSegmentMaxOptions = 119, + BuiltinOptions_UnsortedSegmentMinOptions = 120, + BuiltinOptions_UnsortedSegmentSumOptions = 121, + BuiltinOptions_ATan2Options = 122, + BuiltinOptions_SignOptions = 123, + BuiltinOptions_MIN = BuiltinOptions_NONE, + BuiltinOptions_MAX = BuiltinOptions_SignOptions +}; + +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[124] { + static const BuiltinOptions values[] = { + BuiltinOptions_NONE, + BuiltinOptions_Conv2DOptions, + BuiltinOptions_DepthwiseConv2DOptions, + BuiltinOptions_ConcatEmbeddingsOptions, + BuiltinOptions_LSHProjectionOptions, + BuiltinOptions_Pool2DOptions, + BuiltinOptions_SVDFOptions, + BuiltinOptions_RNNOptions, + BuiltinOptions_FullyConnectedOptions, + BuiltinOptions_SoftmaxOptions, + BuiltinOptions_ConcatenationOptions, + BuiltinOptions_AddOptions, + BuiltinOptions_L2NormOptions, + BuiltinOptions_LocalResponseNormalizationOptions, + BuiltinOptions_LSTMOptions, + BuiltinOptions_ResizeBilinearOptions, + BuiltinOptions_CallOptions, + BuiltinOptions_ReshapeOptions, + BuiltinOptions_SkipGramOptions, + BuiltinOptions_SpaceToDepthOptions, + BuiltinOptions_EmbeddingLookupSparseOptions, + BuiltinOptions_MulOptions, + BuiltinOptions_PadOptions, + BuiltinOptions_GatherOptions, + BuiltinOptions_BatchToSpaceNDOptions, + BuiltinOptions_SpaceToBatchNDOptions, + BuiltinOptions_TransposeOptions, + BuiltinOptions_ReducerOptions, + BuiltinOptions_SubOptions, + BuiltinOptions_DivOptions, + BuiltinOptions_SqueezeOptions, + BuiltinOptions_SequenceRNNOptions, + BuiltinOptions_StridedSliceOptions, + BuiltinOptions_ExpOptions, + BuiltinOptions_TopKV2Options, + BuiltinOptions_SplitOptions, + BuiltinOptions_LogSoftmaxOptions, + BuiltinOptions_CastOptions, + BuiltinOptions_DequantizeOptions, + BuiltinOptions_MaximumMinimumOptions, + BuiltinOptions_ArgMaxOptions, + BuiltinOptions_LessOptions, + BuiltinOptions_NegOptions, + BuiltinOptions_PadV2Options, + BuiltinOptions_GreaterOptions, + BuiltinOptions_GreaterEqualOptions, + BuiltinOptions_LessEqualOptions, + BuiltinOptions_SelectOptions, + BuiltinOptions_SliceOptions, + BuiltinOptions_TransposeConvOptions, + BuiltinOptions_SparseToDenseOptions, + BuiltinOptions_TileOptions, + BuiltinOptions_ExpandDimsOptions, + BuiltinOptions_EqualOptions, + BuiltinOptions_NotEqualOptions, + BuiltinOptions_ShapeOptions, + BuiltinOptions_PowOptions, + BuiltinOptions_ArgMinOptions, + BuiltinOptions_FakeQuantOptions, + BuiltinOptions_PackOptions, + BuiltinOptions_LogicalOrOptions, + BuiltinOptions_OneHotOptions, + BuiltinOptions_LogicalAndOptions, + BuiltinOptions_LogicalNotOptions, + BuiltinOptions_UnpackOptions, + BuiltinOptions_FloorDivOptions, + BuiltinOptions_SquareOptions, + BuiltinOptions_ZerosLikeOptions, + BuiltinOptions_FillOptions, + BuiltinOptions_BidirectionalSequenceLSTMOptions, + BuiltinOptions_BidirectionalSequenceRNNOptions, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + BuiltinOptions_FloorModOptions, + BuiltinOptions_RangeOptions, + BuiltinOptions_ResizeNearestNeighborOptions, + BuiltinOptions_LeakyReluOptions, + BuiltinOptions_SquaredDifferenceOptions, + BuiltinOptions_MirrorPadOptions, + BuiltinOptions_AbsOptions, + BuiltinOptions_SplitVOptions, + BuiltinOptions_UniqueOptions, + BuiltinOptions_ReverseV2Options, + BuiltinOptions_AddNOptions, + BuiltinOptions_GatherNdOptions, + BuiltinOptions_CosOptions, + BuiltinOptions_WhereOptions, + BuiltinOptions_RankOptions, + BuiltinOptions_ReverseSequenceOptions, + BuiltinOptions_MatrixDiagOptions, + BuiltinOptions_QuantizeOptions, + BuiltinOptions_MatrixSetDiagOptions, + BuiltinOptions_HardSwishOptions, + BuiltinOptions_IfOptions, + BuiltinOptions_WhileOptions, + BuiltinOptions_DepthToSpaceOptions, + BuiltinOptions_NonMaxSuppressionV4Options, + BuiltinOptions_NonMaxSuppressionV5Options, + BuiltinOptions_ScatterNdOptions, + BuiltinOptions_SelectV2Options, + BuiltinOptions_DensifyOptions, + BuiltinOptions_SegmentSumOptions, + BuiltinOptions_BatchMatMulOptions, + BuiltinOptions_CumsumOptions, + BuiltinOptions_CallOnceOptions, + BuiltinOptions_BroadcastToOptions, + BuiltinOptions_Rfft2dOptions, + BuiltinOptions_Conv3DOptions, + BuiltinOptions_HashtableOptions, + BuiltinOptions_HashtableFindOptions, + BuiltinOptions_HashtableImportOptions, + BuiltinOptions_HashtableSizeOptions, + BuiltinOptions_VarHandleOptions, + BuiltinOptions_ReadVariableOptions, + BuiltinOptions_AssignVariableOptions, + BuiltinOptions_RandomOptions, + BuiltinOptions_BucketizeOptions, + BuiltinOptions_GeluOptions, + BuiltinOptions_DynamicUpdateSliceOptions, + BuiltinOptions_UnsortedSegmentProdOptions, + BuiltinOptions_UnsortedSegmentMaxOptions, + BuiltinOptions_UnsortedSegmentMinOptions, + BuiltinOptions_UnsortedSegmentSumOptions, + BuiltinOptions_ATan2Options, + BuiltinOptions_SignOptions + }; + return values; +} + +inline const char * const *EnumNamesBuiltinOptions() { + static const char * const names[125] = { + "NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + "PadOptions", + "GatherOptions", + "BatchToSpaceNDOptions", + "SpaceToBatchNDOptions", + "TransposeOptions", + "ReducerOptions", + "SubOptions", + "DivOptions", + "SqueezeOptions", + "SequenceRNNOptions", + "StridedSliceOptions", + "ExpOptions", + "TopKV2Options", + "SplitOptions", + "LogSoftmaxOptions", + "CastOptions", + "DequantizeOptions", + "MaximumMinimumOptions", + "ArgMaxOptions", + "LessOptions", + "NegOptions", + "PadV2Options", + "GreaterOptions", + "GreaterEqualOptions", + "LessEqualOptions", + "SelectOptions", + "SliceOptions", + "TransposeConvOptions", + "SparseToDenseOptions", + "TileOptions", + "ExpandDimsOptions", + "EqualOptions", + "NotEqualOptions", + "ShapeOptions", + "PowOptions", + "ArgMinOptions", + "FakeQuantOptions", + "PackOptions", + "LogicalOrOptions", + "OneHotOptions", + "LogicalAndOptions", + "LogicalNotOptions", + "UnpackOptions", + "FloorDivOptions", + "SquareOptions", + "ZerosLikeOptions", + "FillOptions", + "BidirectionalSequenceLSTMOptions", + "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", + "RangeOptions", + "ResizeNearestNeighborOptions", + "LeakyReluOptions", + "SquaredDifferenceOptions", + "MirrorPadOptions", + "AbsOptions", + "SplitVOptions", + "UniqueOptions", + "ReverseV2Options", + "AddNOptions", + "GatherNdOptions", + "CosOptions", + "WhereOptions", + "RankOptions", + "ReverseSequenceOptions", + "MatrixDiagOptions", + "QuantizeOptions", + "MatrixSetDiagOptions", + "HardSwishOptions", + "IfOptions", + "WhileOptions", + "DepthToSpaceOptions", + "NonMaxSuppressionV4Options", + "NonMaxSuppressionV5Options", + "ScatterNdOptions", + "SelectV2Options", + "DensifyOptions", + "SegmentSumOptions", + "BatchMatMulOptions", + "CumsumOptions", + "CallOnceOptions", + "BroadcastToOptions", + "Rfft2dOptions", + "Conv3DOptions", + "HashtableOptions", + "HashtableFindOptions", + "HashtableImportOptions", + "HashtableSizeOptions", + "VarHandleOptions", + "ReadVariableOptions", + "AssignVariableOptions", + "RandomOptions", + "BucketizeOptions", + "GeluOptions", + "DynamicUpdateSliceOptions", + "UnsortedSegmentProdOptions", + "UnsortedSegmentMaxOptions", + "UnsortedSegmentMinOptions", + "UnsortedSegmentSumOptions", + "ATan2Options", + "SignOptions", + nullptr + }; + return names; +} + +inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { + if (::flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_SignOptions)) return ""; + const size_t index = static_cast(e); + return EnumNamesBuiltinOptions()[index]; +} + +template struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NONE; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NegOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TileOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PowOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_PackOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FillOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CosOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RankOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_IfOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BucketizeOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_GeluOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DynamicUpdateSliceOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentProdOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMaxOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentMinOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnsortedSegmentSumOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ATan2Options; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SignOptions; +}; + +bool VerifyBuiltinOptions(::flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); +bool VerifyBuiltinOptionsVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types); + +enum Padding : int8_t { + Padding_SAME = 0, + Padding_VALID = 1, + Padding_MIN = Padding_SAME, + Padding_MAX = Padding_VALID +}; + +inline const Padding (&EnumValuesPadding())[2] { + static const Padding values[] = { + Padding_SAME, + Padding_VALID + }; + return values; +} + +inline const char * const *EnumNamesPadding() { + static const char * const names[3] = { + "SAME", + "VALID", + nullptr + }; + return names; +} + +inline const char *EnumNamePadding(Padding e) { + if (::flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID)) return ""; + const size_t index = static_cast(e); + return EnumNamesPadding()[index]; +} + +enum ActivationFunctionType : int8_t { + ActivationFunctionType_NONE = 0, + ActivationFunctionType_RELU = 1, + ActivationFunctionType_RELU_N1_TO_1 = 2, + ActivationFunctionType_RELU6 = 3, + ActivationFunctionType_TANH = 4, + ActivationFunctionType_SIGN_BIT = 5, + ActivationFunctionType_MIN = ActivationFunctionType_NONE, + ActivationFunctionType_MAX = ActivationFunctionType_SIGN_BIT +}; + +inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { + static const ActivationFunctionType values[] = { + ActivationFunctionType_NONE, + ActivationFunctionType_RELU, + ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, + ActivationFunctionType_TANH, + ActivationFunctionType_SIGN_BIT + }; + return values; +} + +inline const char * const *EnumNamesActivationFunctionType() { + static const char * const names[7] = { + "NONE", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "TANH", + "SIGN_BIT", + nullptr + }; + return names; +} + +inline const char *EnumNameActivationFunctionType(ActivationFunctionType e) { + if (::flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT)) return ""; + const size_t index = static_cast(e); + return EnumNamesActivationFunctionType()[index]; +} + +enum LSHProjectionType : int8_t { + LSHProjectionType_UNKNOWN = 0, + LSHProjectionType_SPARSE = 1, + LSHProjectionType_DENSE = 2, + LSHProjectionType_MIN = LSHProjectionType_UNKNOWN, + LSHProjectionType_MAX = LSHProjectionType_DENSE +}; + +inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3] { + static const LSHProjectionType values[] = { + LSHProjectionType_UNKNOWN, + LSHProjectionType_SPARSE, + LSHProjectionType_DENSE + }; + return values; +} + +inline const char * const *EnumNamesLSHProjectionType() { + static const char * const names[4] = { + "UNKNOWN", + "SPARSE", + "DENSE", + nullptr + }; + return names; +} + +inline const char *EnumNameLSHProjectionType(LSHProjectionType e) { + if (::flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE)) return ""; + const size_t index = static_cast(e); + return EnumNamesLSHProjectionType()[index]; +} + +enum FullyConnectedOptionsWeightsFormat : int8_t { + FullyConnectedOptionsWeightsFormat_DEFAULT = 0, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1, + FullyConnectedOptionsWeightsFormat_MIN = FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_MAX = FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 +}; + +inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOptionsWeightsFormat())[2] { + static const FullyConnectedOptionsWeightsFormat values[] = { + FullyConnectedOptionsWeightsFormat_DEFAULT, + FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 + }; + return values; +} + +inline const char * const *EnumNamesFullyConnectedOptionsWeightsFormat() { + static const char * const names[3] = { + "DEFAULT", + "SHUFFLED4x16INT8", + nullptr + }; + return names; +} + +inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e) { + if (::flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT, FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)) return ""; + const size_t index = static_cast(e); + return EnumNamesFullyConnectedOptionsWeightsFormat()[index]; +} + +enum LSTMKernelType : int8_t { + LSTMKernelType_FULL = 0, + LSTMKernelType_BASIC = 1, + LSTMKernelType_MIN = LSTMKernelType_FULL, + LSTMKernelType_MAX = LSTMKernelType_BASIC +}; + +inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2] { + static const LSTMKernelType values[] = { + LSTMKernelType_FULL, + LSTMKernelType_BASIC + }; + return values; +} + +inline const char * const *EnumNamesLSTMKernelType() { + static const char * const names[3] = { + "FULL", + "BASIC", + nullptr + }; + return names; +} + +inline const char *EnumNameLSTMKernelType(LSTMKernelType e) { + if (::flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC)) return ""; + const size_t index = static_cast(e); + return EnumNamesLSTMKernelType()[index]; +} + +enum CombinerType : int8_t { + CombinerType_SUM = 0, + CombinerType_MEAN = 1, + CombinerType_SQRTN = 2, + CombinerType_MIN = CombinerType_SUM, + CombinerType_MAX = CombinerType_SQRTN +}; + +inline const CombinerType (&EnumValuesCombinerType())[3] { + static const CombinerType values[] = { + CombinerType_SUM, + CombinerType_MEAN, + CombinerType_SQRTN + }; + return values; +} + +inline const char * const *EnumNamesCombinerType() { + static const char * const names[4] = { + "SUM", + "MEAN", + "SQRTN", + nullptr + }; + return names; +} + +inline const char *EnumNameCombinerType(CombinerType e) { + if (::flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN)) return ""; + const size_t index = static_cast(e); + return EnumNamesCombinerType()[index]; +} + +enum MirrorPadMode : int8_t { + MirrorPadMode_REFLECT = 0, + MirrorPadMode_SYMMETRIC = 1, + MirrorPadMode_MIN = MirrorPadMode_REFLECT, + MirrorPadMode_MAX = MirrorPadMode_SYMMETRIC +}; + +inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2] { + static const MirrorPadMode values[] = { + MirrorPadMode_REFLECT, + MirrorPadMode_SYMMETRIC + }; + return values; +} + +inline const char * const *EnumNamesMirrorPadMode() { + static const char * const names[3] = { + "REFLECT", + "SYMMETRIC", + nullptr + }; + return names; +} + +inline const char *EnumNameMirrorPadMode(MirrorPadMode e) { + if (::flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC)) return ""; + const size_t index = static_cast(e); + return EnumNamesMirrorPadMode()[index]; +} + +enum CustomOptionsFormat : int8_t { + CustomOptionsFormat_FLEXBUFFERS = 0, + CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS, + CustomOptionsFormat_MAX = CustomOptionsFormat_FLEXBUFFERS +}; + +inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] { + static const CustomOptionsFormat values[] = { + CustomOptionsFormat_FLEXBUFFERS + }; + return values; +} + +inline const char * const *EnumNamesCustomOptionsFormat() { + static const char * const names[2] = { + "FLEXBUFFERS", + nullptr + }; + return names; +} + +inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) { + if (::flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS)) return ""; + const size_t index = static_cast(e); + return EnumNamesCustomOptionsFormat()[index]; +} + +struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef CustomQuantizationBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_CUSTOM = 4 + }; + const ::flatbuffers::Vector *custom() const { + return GetPointer *>(VT_CUSTOM); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_CUSTOM) && + verifier.VerifyVector(custom()) && + verifier.EndTable(); + } +}; + +struct CustomQuantizationBuilder { + typedef CustomQuantization Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_custom(::flatbuffers::Offset<::flatbuffers::Vector> custom) { + fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom); + } + explicit CustomQuantizationBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateCustomQuantization( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> custom = 0) { + CustomQuantizationBuilder builder_(_fbb); + builder_.add_custom(custom); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateCustomQuantizationDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *custom = nullptr) { + if (custom) { _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16); } + auto custom__ = custom ? _fbb.CreateVector(*custom) : 0; + return opencv_tflite::CreateCustomQuantization( + _fbb, + custom__); +} + +struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef QuantizationParametersBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_MIN = 4, + VT_MAX = 6, + VT_SCALE = 8, + VT_ZERO_POINT = 10, + VT_DETAILS_TYPE = 12, + VT_DETAILS = 14, + VT_QUANTIZED_DIMENSION = 16 + }; + const ::flatbuffers::Vector *min() const { + return GetPointer *>(VT_MIN); + } + const ::flatbuffers::Vector *max() const { + return GetPointer *>(VT_MAX); + } + const ::flatbuffers::Vector *scale() const { + return GetPointer *>(VT_SCALE); + } + const ::flatbuffers::Vector *zero_point() const { + return GetPointer *>(VT_ZERO_POINT); + } + opencv_tflite::QuantizationDetails details_type() const { + return static_cast(GetField(VT_DETAILS_TYPE, 0)); + } + const void *details() const { + return GetPointer(VT_DETAILS); + } + template const T *details_as() const; + const opencv_tflite::CustomQuantization *details_as_CustomQuantization() const { + return details_type() == opencv_tflite::QuantizationDetails_CustomQuantization ? static_cast(details()) : nullptr; + } + int32_t quantized_dimension() const { + return GetField(VT_QUANTIZED_DIMENSION, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_MIN) && + verifier.VerifyVector(min()) && + VerifyOffset(verifier, VT_MAX) && + verifier.VerifyVector(max()) && + VerifyOffset(verifier, VT_SCALE) && + verifier.VerifyVector(scale()) && + VerifyOffset(verifier, VT_ZERO_POINT) && + verifier.VerifyVector(zero_point()) && + VerifyField(verifier, VT_DETAILS_TYPE, 1) && + VerifyOffset(verifier, VT_DETAILS) && + VerifyQuantizationDetails(verifier, details(), details_type()) && + VerifyField(verifier, VT_QUANTIZED_DIMENSION, 4) && + verifier.EndTable(); + } +}; + +template<> inline const opencv_tflite::CustomQuantization *QuantizationParameters::details_as() const { + return details_as_CustomQuantization(); +} + +struct QuantizationParametersBuilder { + typedef QuantizationParameters Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_min(::flatbuffers::Offset<::flatbuffers::Vector> min) { + fbb_.AddOffset(QuantizationParameters::VT_MIN, min); + } + void add_max(::flatbuffers::Offset<::flatbuffers::Vector> max) { + fbb_.AddOffset(QuantizationParameters::VT_MAX, max); + } + void add_scale(::flatbuffers::Offset<::flatbuffers::Vector> scale) { + fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale); + } + void add_zero_point(::flatbuffers::Offset<::flatbuffers::Vector> zero_point) { + fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); + } + void add_details_type(opencv_tflite::QuantizationDetails details_type) { + fbb_.AddElement(QuantizationParameters::VT_DETAILS_TYPE, static_cast(details_type), 0); + } + void add_details(::flatbuffers::Offset details) { + fbb_.AddOffset(QuantizationParameters::VT_DETAILS, details); + } + void add_quantized_dimension(int32_t quantized_dimension) { + fbb_.AddElement(QuantizationParameters::VT_QUANTIZED_DIMENSION, quantized_dimension, 0); + } + explicit QuantizationParametersBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateQuantizationParameters( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> min = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> max = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> scale = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> zero_point = 0, + opencv_tflite::QuantizationDetails details_type = opencv_tflite::QuantizationDetails_NONE, + ::flatbuffers::Offset details = 0, + int32_t quantized_dimension = 0) { + QuantizationParametersBuilder builder_(_fbb); + builder_.add_quantized_dimension(quantized_dimension); + builder_.add_details(details); + builder_.add_zero_point(zero_point); + builder_.add_scale(scale); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_details_type(details_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateQuantizationParametersDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *min = nullptr, + const std::vector *max = nullptr, + const std::vector *scale = nullptr, + const std::vector *zero_point = nullptr, + opencv_tflite::QuantizationDetails details_type = opencv_tflite::QuantizationDetails_NONE, + ::flatbuffers::Offset details = 0, + int32_t quantized_dimension = 0) { + auto min__ = min ? _fbb.CreateVector(*min) : 0; + auto max__ = max ? _fbb.CreateVector(*max) : 0; + auto scale__ = scale ? _fbb.CreateVector(*scale) : 0; + auto zero_point__ = zero_point ? _fbb.CreateVector(*zero_point) : 0; + return opencv_tflite::CreateQuantizationParameters( + _fbb, + min__, + max__, + scale__, + zero_point__, + details_type, + details, + quantized_dimension); +} + +struct Int32Vector FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Int32VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES = 4 + }; + const ::flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + verifier.EndTable(); + } +}; + +struct Int32VectorBuilder { + typedef Int32Vector Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_values(::flatbuffers::Offset<::flatbuffers::Vector> values) { + fbb_.AddOffset(Int32Vector::VT_VALUES, values); + } + explicit Int32VectorBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateInt32Vector( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> values = 0) { + Int32VectorBuilder builder_(_fbb); + builder_.add_values(values); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateInt32VectorDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *values = nullptr) { + auto values__ = values ? _fbb.CreateVector(*values) : 0; + return opencv_tflite::CreateInt32Vector( + _fbb, + values__); +} + +struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Uint16VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES = 4 + }; + const ::flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + verifier.EndTable(); + } +}; + +struct Uint16VectorBuilder { + typedef Uint16Vector Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_values(::flatbuffers::Offset<::flatbuffers::Vector> values) { + fbb_.AddOffset(Uint16Vector::VT_VALUES, values); + } + explicit Uint16VectorBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUint16Vector( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> values = 0) { + Uint16VectorBuilder builder_(_fbb); + builder_.add_values(values); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateUint16VectorDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *values = nullptr) { + if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4); } + auto values__ = values ? _fbb.CreateVector(*values) : 0; + return opencv_tflite::CreateUint16Vector( + _fbb, + values__); +} + +struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Uint8VectorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES = 4 + }; + const ::flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + verifier.EndTable(); + } +}; + +struct Uint8VectorBuilder { + typedef Uint8Vector Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_values(::flatbuffers::Offset<::flatbuffers::Vector> values) { + fbb_.AddOffset(Uint8Vector::VT_VALUES, values); + } + explicit Uint8VectorBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUint8Vector( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> values = 0) { + Uint8VectorBuilder builder_(_fbb); + builder_.add_values(values); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateUint8VectorDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *values = nullptr) { + if (values) { _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4); } + auto values__ = values ? _fbb.CreateVector(*values) : 0; + return opencv_tflite::CreateUint8Vector( + _fbb, + values__); +} + +struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DimensionMetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FORMAT = 4, + VT_DENSE_SIZE = 6, + VT_ARRAY_SEGMENTS_TYPE = 8, + VT_ARRAY_SEGMENTS = 10, + VT_ARRAY_INDICES_TYPE = 12, + VT_ARRAY_INDICES = 14 + }; + opencv_tflite::DimensionType format() const { + return static_cast(GetField(VT_FORMAT, 0)); + } + int32_t dense_size() const { + return GetField(VT_DENSE_SIZE, 0); + } + opencv_tflite::SparseIndexVector array_segments_type() const { + return static_cast(GetField(VT_ARRAY_SEGMENTS_TYPE, 0)); + } + const void *array_segments() const { + return GetPointer(VT_ARRAY_SEGMENTS); + } + template const T *array_segments_as() const; + const opencv_tflite::Int32Vector *array_segments_as_Int32Vector() const { + return array_segments_type() == opencv_tflite::SparseIndexVector_Int32Vector ? static_cast(array_segments()) : nullptr; + } + const opencv_tflite::Uint16Vector *array_segments_as_Uint16Vector() const { + return array_segments_type() == opencv_tflite::SparseIndexVector_Uint16Vector ? static_cast(array_segments()) : nullptr; + } + const opencv_tflite::Uint8Vector *array_segments_as_Uint8Vector() const { + return array_segments_type() == opencv_tflite::SparseIndexVector_Uint8Vector ? static_cast(array_segments()) : nullptr; + } + opencv_tflite::SparseIndexVector array_indices_type() const { + return static_cast(GetField(VT_ARRAY_INDICES_TYPE, 0)); + } + const void *array_indices() const { + return GetPointer(VT_ARRAY_INDICES); + } + template const T *array_indices_as() const; + const opencv_tflite::Int32Vector *array_indices_as_Int32Vector() const { + return array_indices_type() == opencv_tflite::SparseIndexVector_Int32Vector ? static_cast(array_indices()) : nullptr; + } + const opencv_tflite::Uint16Vector *array_indices_as_Uint16Vector() const { + return array_indices_type() == opencv_tflite::SparseIndexVector_Uint16Vector ? static_cast(array_indices()) : nullptr; + } + const opencv_tflite::Uint8Vector *array_indices_as_Uint8Vector() const { + return array_indices_type() == opencv_tflite::SparseIndexVector_Uint8Vector ? static_cast(array_indices()) : nullptr; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FORMAT, 1) && + VerifyField(verifier, VT_DENSE_SIZE, 4) && + VerifyField(verifier, VT_ARRAY_SEGMENTS_TYPE, 1) && + VerifyOffset(verifier, VT_ARRAY_SEGMENTS) && + VerifySparseIndexVector(verifier, array_segments(), array_segments_type()) && + VerifyField(verifier, VT_ARRAY_INDICES_TYPE, 1) && + VerifyOffset(verifier, VT_ARRAY_INDICES) && + VerifySparseIndexVector(verifier, array_indices(), array_indices_type()) && + verifier.EndTable(); + } +}; + +template<> inline const opencv_tflite::Int32Vector *DimensionMetadata::array_segments_as() const { + return array_segments_as_Int32Vector(); +} + +template<> inline const opencv_tflite::Uint16Vector *DimensionMetadata::array_segments_as() const { + return array_segments_as_Uint16Vector(); +} + +template<> inline const opencv_tflite::Uint8Vector *DimensionMetadata::array_segments_as() const { + return array_segments_as_Uint8Vector(); +} + +template<> inline const opencv_tflite::Int32Vector *DimensionMetadata::array_indices_as() const { + return array_indices_as_Int32Vector(); +} + +template<> inline const opencv_tflite::Uint16Vector *DimensionMetadata::array_indices_as() const { + return array_indices_as_Uint16Vector(); +} + +template<> inline const opencv_tflite::Uint8Vector *DimensionMetadata::array_indices_as() const { + return array_indices_as_Uint8Vector(); +} + +struct DimensionMetadataBuilder { + typedef DimensionMetadata Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_format(opencv_tflite::DimensionType format) { + fbb_.AddElement(DimensionMetadata::VT_FORMAT, static_cast(format), 0); + } + void add_dense_size(int32_t dense_size) { + fbb_.AddElement(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0); + } + void add_array_segments_type(opencv_tflite::SparseIndexVector array_segments_type) { + fbb_.AddElement(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE, static_cast(array_segments_type), 0); + } + void add_array_segments(::flatbuffers::Offset array_segments) { + fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments); + } + void add_array_indices_type(opencv_tflite::SparseIndexVector array_indices_type) { + fbb_.AddElement(DimensionMetadata::VT_ARRAY_INDICES_TYPE, static_cast(array_indices_type), 0); + } + void add_array_indices(::flatbuffers::Offset array_indices) { + fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices); + } + explicit DimensionMetadataBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDimensionMetadata( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::DimensionType format = opencv_tflite::DimensionType_DENSE, + int32_t dense_size = 0, + opencv_tflite::SparseIndexVector array_segments_type = opencv_tflite::SparseIndexVector_NONE, + ::flatbuffers::Offset array_segments = 0, + opencv_tflite::SparseIndexVector array_indices_type = opencv_tflite::SparseIndexVector_NONE, + ::flatbuffers::Offset array_indices = 0) { + DimensionMetadataBuilder builder_(_fbb); + builder_.add_array_indices(array_indices); + builder_.add_array_segments(array_segments); + builder_.add_dense_size(dense_size); + builder_.add_array_indices_type(array_indices_type); + builder_.add_array_segments_type(array_segments_type); + builder_.add_format(format); + return builder_.Finish(); +} + +struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SparsityParametersBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TRAVERSAL_ORDER = 4, + VT_BLOCK_MAP = 6, + VT_DIM_METADATA = 8 + }; + const ::flatbuffers::Vector *traversal_order() const { + return GetPointer *>(VT_TRAVERSAL_ORDER); + } + const ::flatbuffers::Vector *block_map() const { + return GetPointer *>(VT_BLOCK_MAP); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *dim_metadata() const { + return GetPointer> *>(VT_DIM_METADATA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TRAVERSAL_ORDER) && + verifier.VerifyVector(traversal_order()) && + VerifyOffset(verifier, VT_BLOCK_MAP) && + verifier.VerifyVector(block_map()) && + VerifyOffset(verifier, VT_DIM_METADATA) && + verifier.VerifyVector(dim_metadata()) && + verifier.VerifyVectorOfTables(dim_metadata()) && + verifier.EndTable(); + } +}; + +struct SparsityParametersBuilder { + typedef SparsityParameters Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_traversal_order(::flatbuffers::Offset<::flatbuffers::Vector> traversal_order) { + fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order); + } + void add_block_map(::flatbuffers::Offset<::flatbuffers::Vector> block_map) { + fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map); + } + void add_dim_metadata(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> dim_metadata) { + fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata); + } + explicit SparsityParametersBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSparsityParameters( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> traversal_order = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> block_map = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> dim_metadata = 0) { + SparsityParametersBuilder builder_(_fbb); + builder_.add_dim_metadata(dim_metadata); + builder_.add_block_map(block_map); + builder_.add_traversal_order(traversal_order); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateSparsityParametersDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *traversal_order = nullptr, + const std::vector *block_map = nullptr, + const std::vector<::flatbuffers::Offset> *dim_metadata = nullptr) { + auto traversal_order__ = traversal_order ? _fbb.CreateVector(*traversal_order) : 0; + auto block_map__ = block_map ? _fbb.CreateVector(*block_map) : 0; + auto dim_metadata__ = dim_metadata ? _fbb.CreateVector<::flatbuffers::Offset>(*dim_metadata) : 0; + return opencv_tflite::CreateSparsityParameters( + _fbb, + traversal_order__, + block_map__, + dim_metadata__); +} + +struct VariantSubType FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef VariantSubTypeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_HAS_RANK = 8 + }; + const ::flatbuffers::Vector *shape() const { + return GetPointer *>(VT_SHAPE); + } + opencv_tflite::TensorType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + bool has_rank() const { + return GetField(VT_HAS_RANK, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyVector(shape()) && + VerifyField(verifier, VT_TYPE, 1) && + VerifyField(verifier, VT_HAS_RANK, 1) && + verifier.EndTable(); + } +}; + +struct VariantSubTypeBuilder { + typedef VariantSubType Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_shape(::flatbuffers::Offset<::flatbuffers::Vector> shape) { + fbb_.AddOffset(VariantSubType::VT_SHAPE, shape); + } + void add_type(opencv_tflite::TensorType type) { + fbb_.AddElement(VariantSubType::VT_TYPE, static_cast(type), 0); + } + void add_has_rank(bool has_rank) { + fbb_.AddElement(VariantSubType::VT_HAS_RANK, static_cast(has_rank), 0); + } + explicit VariantSubTypeBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateVariantSubType( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> shape = 0, + opencv_tflite::TensorType type = opencv_tflite::TensorType_FLOAT32, + bool has_rank = false) { + VariantSubTypeBuilder builder_(_fbb); + builder_.add_shape(shape); + builder_.add_has_rank(has_rank); + builder_.add_type(type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateVariantSubTypeDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *shape = nullptr, + opencv_tflite::TensorType type = opencv_tflite::TensorType_FLOAT32, + bool has_rank = false) { + auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; + return opencv_tflite::CreateVariantSubType( + _fbb, + shape__, + type, + has_rank); +} + +struct Tensor FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TensorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SHAPE = 4, + VT_TYPE = 6, + VT_BUFFER = 8, + VT_NAME = 10, + VT_QUANTIZATION = 12, + VT_IS_VARIABLE = 14, + VT_SPARSITY = 16, + VT_SHAPE_SIGNATURE = 18, + VT_HAS_RANK = 20, + VT_VARIANT_TENSORS = 22 + }; + const ::flatbuffers::Vector *shape() const { + return GetPointer *>(VT_SHAPE); + } + opencv_tflite::TensorType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + uint32_t buffer() const { + return GetField(VT_BUFFER, 0); + } + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + const opencv_tflite::QuantizationParameters *quantization() const { + return GetPointer(VT_QUANTIZATION); + } + bool is_variable() const { + return GetField(VT_IS_VARIABLE, 0) != 0; + } + const opencv_tflite::SparsityParameters *sparsity() const { + return GetPointer(VT_SPARSITY); + } + const ::flatbuffers::Vector *shape_signature() const { + return GetPointer *>(VT_SHAPE_SIGNATURE); + } + bool has_rank() const { + return GetField(VT_HAS_RANK, 0) != 0; + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *variant_tensors() const { + return GetPointer> *>(VT_VARIANT_TENSORS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.VerifyVector(shape()) && + VerifyField(verifier, VT_TYPE, 1) && + VerifyField(verifier, VT_BUFFER, 4) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_QUANTIZATION) && + verifier.VerifyTable(quantization()) && + VerifyField(verifier, VT_IS_VARIABLE, 1) && + VerifyOffset(verifier, VT_SPARSITY) && + verifier.VerifyTable(sparsity()) && + VerifyOffset(verifier, VT_SHAPE_SIGNATURE) && + verifier.VerifyVector(shape_signature()) && + VerifyField(verifier, VT_HAS_RANK, 1) && + VerifyOffset(verifier, VT_VARIANT_TENSORS) && + verifier.VerifyVector(variant_tensors()) && + verifier.VerifyVectorOfTables(variant_tensors()) && + verifier.EndTable(); + } +}; + +struct TensorBuilder { + typedef Tensor Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_shape(::flatbuffers::Offset<::flatbuffers::Vector> shape) { + fbb_.AddOffset(Tensor::VT_SHAPE, shape); + } + void add_type(opencv_tflite::TensorType type) { + fbb_.AddElement(Tensor::VT_TYPE, static_cast(type), 0); + } + void add_buffer(uint32_t buffer) { + fbb_.AddElement(Tensor::VT_BUFFER, buffer, 0); + } + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(Tensor::VT_NAME, name); + } + void add_quantization(::flatbuffers::Offset quantization) { + fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); + } + void add_is_variable(bool is_variable) { + fbb_.AddElement(Tensor::VT_IS_VARIABLE, static_cast(is_variable), 0); + } + void add_sparsity(::flatbuffers::Offset sparsity) { + fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity); + } + void add_shape_signature(::flatbuffers::Offset<::flatbuffers::Vector> shape_signature) { + fbb_.AddOffset(Tensor::VT_SHAPE_SIGNATURE, shape_signature); + } + void add_has_rank(bool has_rank) { + fbb_.AddElement(Tensor::VT_HAS_RANK, static_cast(has_rank), 0); + } + void add_variant_tensors(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> variant_tensors) { + fbb_.AddOffset(Tensor::VT_VARIANT_TENSORS, variant_tensors); + } + explicit TensorBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTensor( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> shape = 0, + opencv_tflite::TensorType type = opencv_tflite::TensorType_FLOAT32, + uint32_t buffer = 0, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset quantization = 0, + bool is_variable = false, + ::flatbuffers::Offset sparsity = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> shape_signature = 0, + bool has_rank = false, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> variant_tensors = 0) { + TensorBuilder builder_(_fbb); + builder_.add_variant_tensors(variant_tensors); + builder_.add_shape_signature(shape_signature); + builder_.add_sparsity(sparsity); + builder_.add_quantization(quantization); + builder_.add_name(name); + builder_.add_buffer(buffer); + builder_.add_shape(shape); + builder_.add_has_rank(has_rank); + builder_.add_is_variable(is_variable); + builder_.add_type(type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateTensorDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *shape = nullptr, + opencv_tflite::TensorType type = opencv_tflite::TensorType_FLOAT32, + uint32_t buffer = 0, + const char *name = nullptr, + ::flatbuffers::Offset quantization = 0, + bool is_variable = false, + ::flatbuffers::Offset sparsity = 0, + const std::vector *shape_signature = nullptr, + bool has_rank = false, + const std::vector<::flatbuffers::Offset> *variant_tensors = nullptr) { + auto shape__ = shape ? _fbb.CreateVector(*shape) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + auto shape_signature__ = shape_signature ? _fbb.CreateVector(*shape_signature) : 0; + auto variant_tensors__ = variant_tensors ? _fbb.CreateVector<::flatbuffers::Offset>(*variant_tensors) : 0; + return opencv_tflite::CreateTensor( + _fbb, + shape__, + type, + buffer, + name__, + quantization, + is_variable, + sparsity, + shape_signature__, + has_rank, + variant_tensors__); +} + +struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Conv2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10, + VT_DILATION_W_FACTOR = 12, + VT_DILATION_H_FACTOR = 14 + }; + opencv_tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 1); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 1); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_DILATION_W_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_H_FACTOR, 4) && + verifier.EndTable(); + } +}; + +struct Conv2DOptionsBuilder { + typedef Conv2DOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_padding(opencv_tflite::Padding padding) { + fbb_.AddElement(Conv2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Conv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Conv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(Conv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(Conv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv2DOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateConv2DOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::Padding padding = opencv_tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, + int32_t dilation_h_factor = 1) { + Conv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Conv3DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_D = 6, + VT_STRIDE_W = 8, + VT_STRIDE_H = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_D_FACTOR = 14, + VT_DILATION_W_FACTOR = 16, + VT_DILATION_H_FACTOR = 18 + }; + opencv_tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_d() const { + return GetField(VT_STRIDE_D, 0); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_d_factor() const { + return GetField(VT_DILATION_D_FACTOR, 1); + } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 1); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 1); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_D, 4) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_DILATION_D_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_W_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_H_FACTOR, 4) && + verifier.EndTable(); + } +}; + +struct Conv3DOptionsBuilder { + typedef Conv3DOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_padding(opencv_tflite::Padding padding) { + fbb_.AddElement(Conv3DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_d(int32_t stride_d) { + fbb_.AddElement(Conv3DOptions::VT_STRIDE_D, stride_d, 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Conv3DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Conv3DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_dilation_d_factor(int32_t dilation_d_factor) { + fbb_.AddElement(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1); + } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit Conv3DOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateConv3DOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::Padding padding = opencv_tflite::Padding_SAME, + int32_t stride_d = 0, + int32_t stride_w = 0, + int32_t stride_h = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + int32_t dilation_d_factor = 1, + int32_t dilation_w_factor = 1, + int32_t dilation_h_factor = 1) { + Conv3DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_dilation_d_factor(dilation_d_factor); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_stride_d(stride_d); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Pool2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FILTER_WIDTH = 10, + VT_FILTER_HEIGHT = 12, + VT_FUSED_ACTIVATION_FUNCTION = 14 + }; + opencv_tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t filter_width() const { + return GetField(VT_FILTER_WIDTH, 0); + } + int32_t filter_height() const { + return GetField(VT_FILTER_HEIGHT, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FILTER_WIDTH, 4) && + VerifyField(verifier, VT_FILTER_HEIGHT, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } +}; + +struct Pool2DOptionsBuilder { + typedef Pool2DOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_padding(opencv_tflite::Padding padding) { + fbb_.AddElement(Pool2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(Pool2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(Pool2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_filter_width(int32_t filter_width) { + fbb_.AddElement(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0); + } + void add_filter_height(int32_t filter_height) { + fbb_.AddElement(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit Pool2DOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatePool2DOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::Padding padding = opencv_tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t filter_width = 0, + int32_t filter_height = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE) { + Pool2DOptionsBuilder builder_(_fbb); + builder_.add_filter_height(filter_height); + builder_.add_filter_width(filter_width); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DepthwiseConv2DOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_DEPTH_MULTIPLIER = 10, + VT_FUSED_ACTIVATION_FUNCTION = 12, + VT_DILATION_W_FACTOR = 14, + VT_DILATION_H_FACTOR = 16 + }; + opencv_tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t depth_multiplier() const { + return GetField(VT_DEPTH_MULTIPLIER, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + int32_t dilation_w_factor() const { + return GetField(VT_DILATION_W_FACTOR, 1); + } + int32_t dilation_h_factor() const { + return GetField(VT_DILATION_H_FACTOR, 1); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_DEPTH_MULTIPLIER, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_DILATION_W_FACTOR, 4) && + VerifyField(verifier, VT_DILATION_H_FACTOR, 4) && + verifier.EndTable(); + } +}; + +struct DepthwiseConv2DOptionsBuilder { + typedef DepthwiseConv2DOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_padding(opencv_tflite::Padding padding) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0); + } + void add_depth_multiplier(int32_t depth_multiplier) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_dilation_w_factor(int32_t dilation_w_factor) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1); + } + void add_dilation_h_factor(int32_t dilation_h_factor) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1); + } + explicit DepthwiseConv2DOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDepthwiseConv2DOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::Padding padding = opencv_tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t depth_multiplier = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + int32_t dilation_w_factor = 1, + int32_t dilation_h_factor = 1) { + DepthwiseConv2DOptionsBuilder builder_(_fbb); + builder_.add_dilation_h_factor(dilation_h_factor); + builder_.add_dilation_w_factor(dilation_w_factor); + builder_.add_depth_multiplier(depth_multiplier); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ConcatEmbeddingsOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM_CHANNELS = 4, + VT_NUM_COLUMNS_PER_CHANNEL = 6, + VT_EMBEDDING_DIM_PER_CHANNEL = 8 + }; + int32_t num_channels() const { + return GetField(VT_NUM_CHANNELS, 0); + } + const ::flatbuffers::Vector *num_columns_per_channel() const { + return GetPointer *>(VT_NUM_COLUMNS_PER_CHANNEL); + } + const ::flatbuffers::Vector *embedding_dim_per_channel() const { + return GetPointer *>(VT_EMBEDDING_DIM_PER_CHANNEL); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_CHANNELS, 4) && + VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) && + verifier.VerifyVector(num_columns_per_channel()) && + VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) && + verifier.VerifyVector(embedding_dim_per_channel()) && + verifier.EndTable(); + } +}; + +struct ConcatEmbeddingsOptionsBuilder { + typedef ConcatEmbeddingsOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_num_channels(int32_t num_channels) { + fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); + } + void add_num_columns_per_channel(::flatbuffers::Offset<::flatbuffers::Vector> num_columns_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); + } + void add_embedding_dim_per_channel(::flatbuffers::Offset<::flatbuffers::Vector> embedding_dim_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); + } + explicit ConcatEmbeddingsOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateConcatEmbeddingsOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> num_columns_per_channel = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> embedding_dim_per_channel = 0) { + ConcatEmbeddingsOptionsBuilder builder_(_fbb); + builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); + builder_.add_num_columns_per_channel(num_columns_per_channel); + builder_.add_num_channels(num_channels); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateConcatEmbeddingsOptionsDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + const std::vector *num_columns_per_channel = nullptr, + const std::vector *embedding_dim_per_channel = nullptr) { + auto num_columns_per_channel__ = num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0; + auto embedding_dim_per_channel__ = embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0; + return opencv_tflite::CreateConcatEmbeddingsOptions( + _fbb, + num_channels, + num_columns_per_channel__, + embedding_dim_per_channel__); +} + +struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LSHProjectionOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TYPE = 4 + }; + opencv_tflite::LSHProjectionType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TYPE, 1) && + verifier.EndTable(); + } +}; + +struct LSHProjectionOptionsBuilder { + typedef LSHProjectionOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_type(opencv_tflite::LSHProjectionType type) { + fbb_.AddElement(LSHProjectionOptions::VT_TYPE, static_cast(type), 0); + } + explicit LSHProjectionOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLSHProjectionOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::LSHProjectionType type = opencv_tflite::LSHProjectionType_UNKNOWN) { + LSHProjectionOptionsBuilder builder_(_fbb); + builder_.add_type(type); + return builder_.Finish(); +} + +struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SVDFOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_RANK = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 + }; + int32_t rank() const { + return GetField(VT_RANK, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_RANK, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct SVDFOptionsBuilder { + typedef SVDFOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_rank(int32_t rank) { + fbb_.AddElement(SVDFOptions::VT_RANK, rank, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(SVDFOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit SVDFOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSVDFOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t rank = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { + SVDFOptionsBuilder builder_(_fbb); + builder_.add_rank(rank); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct RNNOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 6 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct RNNOptionsBuilder { + typedef RNNOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(RNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit RNNOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRNNOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { + RNNOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SequenceRNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 + }; + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct SequenceRNNOptionsBuilder { + typedef SequenceRNNOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) { + fbb_.AddElement(SequenceRNNOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(SequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit SequenceRNNOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSequenceRNNOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool time_major = false, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + bool asymmetric_quantize_inputs = false) { + SequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BidirectionalSequenceRNNOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6, + VT_MERGE_OUTPUTS = 8, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 10 + }; + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool merge_outputs() const { + return GetField(VT_MERGE_OUTPUTS, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_MERGE_OUTPUTS, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct BidirectionalSequenceRNNOptionsBuilder { + typedef BidirectionalSequenceRNNOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_time_major(bool time_major) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_merge_outputs(bool merge_outputs) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_MERGE_OUTPUTS, static_cast(merge_outputs), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit BidirectionalSequenceRNNOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBidirectionalSequenceRNNOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool time_major = false, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + bool merge_outputs = false, + bool asymmetric_quantize_inputs = false) { + BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_time_major(time_major); + return builder_.Finish(); +} + +struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FullyConnectedOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_WEIGHTS_FORMAT = 6, + VT_KEEP_NUM_DIMS = 8, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 10 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + opencv_tflite::FullyConnectedOptionsWeightsFormat weights_format() const { + return static_cast(GetField(VT_WEIGHTS_FORMAT, 0)); + } + bool keep_num_dims() const { + return GetField(VT_KEEP_NUM_DIMS, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_WEIGHTS_FORMAT, 1) && + VerifyField(verifier, VT_KEEP_NUM_DIMS, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct FullyConnectedOptionsBuilder { + typedef FullyConnectedOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_weights_format(opencv_tflite::FullyConnectedOptionsWeightsFormat weights_format) { + fbb_.AddElement(FullyConnectedOptions::VT_WEIGHTS_FORMAT, static_cast(weights_format), 0); + } + void add_keep_num_dims(bool keep_num_dims) { + fbb_.AddElement(FullyConnectedOptions::VT_KEEP_NUM_DIMS, static_cast(keep_num_dims), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(FullyConnectedOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit FullyConnectedOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFullyConnectedOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + opencv_tflite::FullyConnectedOptionsWeightsFormat weights_format = opencv_tflite::FullyConnectedOptionsWeightsFormat_DEFAULT, + bool keep_num_dims = false, + bool asymmetric_quantize_inputs = false) { + FullyConnectedOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_keep_num_dims(keep_num_dims); + builder_.add_weights_format(weights_format); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SoftmaxOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BETA = 4 + }; + float beta() const { + return GetField(VT_BETA, 0.0f); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BETA, 4) && + verifier.EndTable(); + } +}; + +struct SoftmaxOptionsBuilder { + typedef SoftmaxOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_beta(float beta) { + fbb_.AddElement(SoftmaxOptions::VT_BETA, beta, 0.0f); + } + explicit SoftmaxOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSoftmaxOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + float beta = 0.0f) { + SoftmaxOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + return builder_.Finish(); +} + +struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ConcatenationOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_AXIS = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } +}; + +struct ConcatenationOptionsBuilder { + typedef ConcatenationOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(ConcatenationOptions::VT_AXIS, axis, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit ConcatenationOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateConcatenationOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE) { + ConcatenationOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct AddOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef AddOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 1) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_POT_SCALE_INT16, 1) && + verifier.EndTable(); + } +}; + +struct AddOptionsBuilder { + typedef AddOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_pot_scale_int16(bool pot_scale_int16) { + fbb_.AddElement(AddOptions::VT_POT_SCALE_INT16, static_cast(pot_scale_int16), 1); + } + explicit AddOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateAddOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { + AddOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct MulOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MulOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } +}; + +struct MulOptionsBuilder { + typedef MulOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit MulOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMulOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE) { + MulOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef L2NormOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } +}; + +struct L2NormOptionsBuilder { + typedef L2NormOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit L2NormOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateL2NormOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE) { + L2NormOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LocalResponseNormalizationOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_RADIUS = 4, + VT_BIAS = 6, + VT_ALPHA = 8, + VT_BETA = 10 + }; + int32_t radius() const { + return GetField(VT_RADIUS, 0); + } + float bias() const { + return GetField(VT_BIAS, 0.0f); + } + float alpha() const { + return GetField(VT_ALPHA, 0.0f); + } + float beta() const { + return GetField(VT_BETA, 0.0f); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_RADIUS, 4) && + VerifyField(verifier, VT_BIAS, 4) && + VerifyField(verifier, VT_ALPHA, 4) && + VerifyField(verifier, VT_BETA, 4) && + verifier.EndTable(); + } +}; + +struct LocalResponseNormalizationOptionsBuilder { + typedef LocalResponseNormalizationOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_radius(int32_t radius) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0); + } + void add_bias(float bias) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f); + } + void add_alpha(float alpha) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f); + } + void add_beta(float beta) { + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); + } + explicit LocalResponseNormalizationOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLocalResponseNormalizationOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t radius = 0, + float bias = 0.0f, + float alpha = 0.0f, + float beta = 0.0f) { + LocalResponseNormalizationOptionsBuilder builder_(_fbb); + builder_.add_beta(beta); + builder_.add_alpha(alpha); + builder_.add_bias(bias); + builder_.add_radius(radius); + return builder_.Finish(); +} + +struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_KERNEL_TYPE = 10, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 12 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + opencv_tflite::LSTMKernelType kernel_type() const { + return static_cast(GetField(VT_KERNEL_TYPE, 0)); + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_CELL_CLIP, 4) && + VerifyField(verifier, VT_PROJ_CLIP, 4) && + VerifyField(verifier, VT_KERNEL_TYPE, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct LSTMOptionsBuilder { + typedef LSTMOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_kernel_type(opencv_tflite::LSTMKernelType kernel_type) { + fbb_.AddElement(LSTMOptions::VT_KERNEL_TYPE, static_cast(kernel_type), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(LSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit LSTMOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLSTMOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f, + opencv_tflite::LSTMKernelType kernel_type = opencv_tflite::LSTMKernelType_FULL, + bool asymmetric_quantize_inputs = false) { + LSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_kernel_type(kernel_type); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnidirectionalSequenceLSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_TIME_MAJOR = 10, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 12, + VT_DIAGONAL_RECURRENT_TENSORS = 14 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool diagonal_recurrent_tensors() const { + return GetField(VT_DIAGONAL_RECURRENT_TENSORS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_CELL_CLIP, 4) && + VerifyField(verifier, VT_PROJ_CLIP, 4) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + VerifyField(verifier, VT_DIAGONAL_RECURRENT_TENSORS, 1) && + verifier.EndTable(); + } +}; + +struct UnidirectionalSequenceLSTMOptionsBuilder { + typedef UnidirectionalSequenceLSTMOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_time_major(bool time_major) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + void add_diagonal_recurrent_tensors(bool diagonal_recurrent_tensors) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_DIAGONAL_RECURRENT_TENSORS, static_cast(diagonal_recurrent_tensors), 0); + } + explicit UnidirectionalSequenceLSTMOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f, + bool time_major = false, + bool asymmetric_quantize_inputs = false, + bool diagonal_recurrent_tensors = false) { + UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_diagonal_recurrent_tensors(diagonal_recurrent_tensors); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_time_major(time_major); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BidirectionalSequenceLSTMOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8, + VT_MERGE_OUTPUTS = 10, + VT_TIME_MAJOR = 12, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 14 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool merge_outputs() const { + return GetField(VT_MERGE_OUTPUTS, 0) != 0; + } + bool time_major() const { + return GetField(VT_TIME_MAJOR, 1) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_CELL_CLIP, 4) && + VerifyField(verifier, VT_PROJ_CLIP, 4) && + VerifyField(verifier, VT_MERGE_OUTPUTS, 1) && + VerifyField(verifier, VT_TIME_MAJOR, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct BidirectionalSequenceLSTMOptionsBuilder { + typedef BidirectionalSequenceLSTMOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + void add_merge_outputs(bool merge_outputs) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast(merge_outputs), 0); + } + void add_time_major(bool time_major) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast(time_major), 1); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(BidirectionalSequenceLSTMOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit BidirectionalSequenceLSTMOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBidirectionalSequenceLSTMOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f, + bool merge_outputs = false, + bool time_major = true, + bool asymmetric_quantize_inputs = false) { + BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_time_major(time_major); + builder_.add_merge_outputs(merge_outputs); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ResizeBilinearOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ALIGN_CORNERS = 8, + VT_HALF_PIXEL_CENTERS = 10 + }; + bool align_corners() const { + return GetField(VT_ALIGN_CORNERS, 0) != 0; + } + bool half_pixel_centers() const { + return GetField(VT_HALF_PIXEL_CENTERS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ALIGN_CORNERS, 1) && + VerifyField(verifier, VT_HALF_PIXEL_CENTERS, 1) && + verifier.EndTable(); + } +}; + +struct ResizeBilinearOptionsBuilder { + typedef ResizeBilinearOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) { + fbb_.AddElement(ResizeBilinearOptions::VT_ALIGN_CORNERS, static_cast(align_corners), 0); + } + void add_half_pixel_centers(bool half_pixel_centers) { + fbb_.AddElement(ResizeBilinearOptions::VT_HALF_PIXEL_CENTERS, static_cast(half_pixel_centers), 0); + } + explicit ResizeBilinearOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateResizeBilinearOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool align_corners = false, + bool half_pixel_centers = false) { + ResizeBilinearOptionsBuilder builder_(_fbb); + builder_.add_half_pixel_centers(half_pixel_centers); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ResizeNearestNeighborOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ALIGN_CORNERS = 4, + VT_HALF_PIXEL_CENTERS = 6 + }; + bool align_corners() const { + return GetField(VT_ALIGN_CORNERS, 0) != 0; + } + bool half_pixel_centers() const { + return GetField(VT_HALF_PIXEL_CENTERS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ALIGN_CORNERS, 1) && + VerifyField(verifier, VT_HALF_PIXEL_CENTERS, 1) && + verifier.EndTable(); + } +}; + +struct ResizeNearestNeighborOptionsBuilder { + typedef ResizeNearestNeighborOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_align_corners(bool align_corners) { + fbb_.AddElement(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS, static_cast(align_corners), 0); + } + void add_half_pixel_centers(bool half_pixel_centers) { + fbb_.AddElement(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS, static_cast(half_pixel_centers), 0); + } + explicit ResizeNearestNeighborOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateResizeNearestNeighborOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool align_corners = false, + bool half_pixel_centers = false) { + ResizeNearestNeighborOptionsBuilder builder_(_fbb); + builder_.add_half_pixel_centers(half_pixel_centers); + builder_.add_align_corners(align_corners); + return builder_.Finish(); +} + +struct CallOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef CallOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SUBGRAPH = 4 + }; + uint32_t subgraph() const { + return GetField(VT_SUBGRAPH, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SUBGRAPH, 4) && + verifier.EndTable(); + } +}; + +struct CallOptionsBuilder { + typedef CallOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_subgraph(uint32_t subgraph) { + fbb_.AddElement(CallOptions::VT_SUBGRAPH, subgraph, 0); + } + explicit CallOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateCallOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t subgraph = 0) { + CallOptionsBuilder builder_(_fbb); + builder_.add_subgraph(subgraph); + return builder_.Finish(); +} + +struct PadOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef PadOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct PadOptionsBuilder { + typedef PadOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit PadOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatePadOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + PadOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct PadV2Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef PadV2OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct PadV2OptionsBuilder { + typedef PadV2Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit PadV2OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatePadV2Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + PadV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ReshapeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NEW_SHAPE = 4 + }; + const ::flatbuffers::Vector *new_shape() const { + return GetPointer *>(VT_NEW_SHAPE); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NEW_SHAPE) && + verifier.VerifyVector(new_shape()) && + verifier.EndTable(); + } +}; + +struct ReshapeOptionsBuilder { + typedef ReshapeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_new_shape(::flatbuffers::Offset<::flatbuffers::Vector> new_shape) { + fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape); + } + explicit ReshapeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateReshapeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> new_shape = 0) { + ReshapeOptionsBuilder builder_(_fbb); + builder_.add_new_shape(new_shape); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateReshapeOptionsDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *new_shape = nullptr) { + auto new_shape__ = new_shape ? _fbb.CreateVector(*new_shape) : 0; + return opencv_tflite::CreateReshapeOptions( + _fbb, + new_shape__); +} + +struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SpaceToBatchNDOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SpaceToBatchNDOptionsBuilder { + typedef SpaceToBatchNDOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SpaceToBatchNDOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSpaceToBatchNDOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SpaceToBatchNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BatchToSpaceNDOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct BatchToSpaceNDOptionsBuilder { + typedef BatchToSpaceNDOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit BatchToSpaceNDOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBatchToSpaceNDOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + BatchToSpaceNDOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SkipGramOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NGRAM_SIZE = 4, + VT_MAX_SKIP_SIZE = 6, + VT_INCLUDE_ALL_NGRAMS = 8 + }; + int32_t ngram_size() const { + return GetField(VT_NGRAM_SIZE, 0); + } + int32_t max_skip_size() const { + return GetField(VT_MAX_SKIP_SIZE, 0); + } + bool include_all_ngrams() const { + return GetField(VT_INCLUDE_ALL_NGRAMS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NGRAM_SIZE, 4) && + VerifyField(verifier, VT_MAX_SKIP_SIZE, 4) && + VerifyField(verifier, VT_INCLUDE_ALL_NGRAMS, 1) && + verifier.EndTable(); + } +}; + +struct SkipGramOptionsBuilder { + typedef SkipGramOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_ngram_size(int32_t ngram_size) { + fbb_.AddElement(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0); + } + void add_max_skip_size(int32_t max_skip_size) { + fbb_.AddElement(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0); + } + void add_include_all_ngrams(bool include_all_ngrams) { + fbb_.AddElement(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast(include_all_ngrams), 0); + } + explicit SkipGramOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSkipGramOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t ngram_size = 0, + int32_t max_skip_size = 0, + bool include_all_ngrams = false) { + SkipGramOptionsBuilder builder_(_fbb); + builder_.add_max_skip_size(max_skip_size); + builder_.add_ngram_size(ngram_size); + builder_.add_include_all_ngrams(include_all_ngrams); + return builder_.Finish(); +} + +struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SpaceToDepthOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { + return GetField(VT_BLOCK_SIZE, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BLOCK_SIZE, 4) && + verifier.EndTable(); + } +}; + +struct SpaceToDepthOptionsBuilder { + typedef SpaceToDepthOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) { + fbb_.AddElement(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit SpaceToDepthOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSpaceToDepthOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t block_size = 0) { + SpaceToDepthOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DepthToSpaceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { + return GetField(VT_BLOCK_SIZE, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BLOCK_SIZE, 4) && + verifier.EndTable(); + } +}; + +struct DepthToSpaceOptionsBuilder { + typedef DepthToSpaceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_block_size(int32_t block_size) { + fbb_.AddElement(DepthToSpaceOptions::VT_BLOCK_SIZE, block_size, 0); + } + explicit DepthToSpaceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDepthToSpaceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t block_size = 0) { + DepthToSpaceOptionsBuilder builder_(_fbb); + builder_.add_block_size(block_size); + return builder_.Finish(); +} + +struct SubOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SubOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 1) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + VerifyField(verifier, VT_POT_SCALE_INT16, 1) && + verifier.EndTable(); + } +}; + +struct SubOptionsBuilder { + typedef SubOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_pot_scale_int16(bool pot_scale_int16) { + fbb_.AddElement(SubOptions::VT_POT_SCALE_INT16, static_cast(pot_scale_int16), 1); + } + explicit SubOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSubOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE, + bool pot_scale_int16 = true) { + SubOptionsBuilder builder_(_fbb); + builder_.add_pot_scale_int16(pot_scale_int16); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct DivOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DivOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } +}; + +struct DivOptionsBuilder { + typedef DivOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit DivOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDivOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE) { + DivOptionsBuilder builder_(_fbb); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TopKV2OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct TopKV2OptionsBuilder { + typedef TopKV2Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit TopKV2OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTopKV2Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + TopKV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef EmbeddingLookupSparseOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_COMBINER = 4 + }; + opencv_tflite::CombinerType combiner() const { + return static_cast(GetField(VT_COMBINER, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_COMBINER, 1) && + verifier.EndTable(); + } +}; + +struct EmbeddingLookupSparseOptionsBuilder { + typedef EmbeddingLookupSparseOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_combiner(opencv_tflite::CombinerType combiner) { + fbb_.AddElement(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast(combiner), 0); + } + explicit EmbeddingLookupSparseOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateEmbeddingLookupSparseOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::CombinerType combiner = opencv_tflite::CombinerType_SUM) { + EmbeddingLookupSparseOptionsBuilder builder_(_fbb); + builder_.add_combiner(combiner); + return builder_.Finish(); +} + +struct GatherOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef GatherOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_AXIS = 4, + VT_BATCH_DIMS = 6 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + int32_t batch_dims() const { + return GetField(VT_BATCH_DIMS, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS, 4) && + VerifyField(verifier, VT_BATCH_DIMS, 4) && + verifier.EndTable(); + } +}; + +struct GatherOptionsBuilder { + typedef GatherOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(GatherOptions::VT_AXIS, axis, 0); + } + void add_batch_dims(int32_t batch_dims) { + fbb_.AddElement(GatherOptions::VT_BATCH_DIMS, batch_dims, 0); + } + explicit GatherOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateGatherOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0, + int32_t batch_dims = 0) { + GatherOptionsBuilder builder_(_fbb); + builder_.add_batch_dims(batch_dims); + builder_.add_axis(axis); + return builder_.Finish(); +} + +struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TransposeOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct TransposeOptionsBuilder { + typedef TransposeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit TransposeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTransposeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + TransposeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ExpOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ExpOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ExpOptionsBuilder { + typedef ExpOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ExpOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateExpOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ExpOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct CosOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef CosOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct CosOptionsBuilder { + typedef CosOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit CosOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateCosOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + CosOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ReducerOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEEP_DIMS = 4 + }; + bool keep_dims() const { + return GetField(VT_KEEP_DIMS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_KEEP_DIMS, 1) && + verifier.EndTable(); + } +}; + +struct ReducerOptionsBuilder { + typedef ReducerOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_keep_dims(bool keep_dims) { + fbb_.AddElement(ReducerOptions::VT_KEEP_DIMS, static_cast(keep_dims), 0); + } + explicit ReducerOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateReducerOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool keep_dims = false) { + ReducerOptionsBuilder builder_(_fbb); + builder_.add_keep_dims(keep_dims); + return builder_.Finish(); +} + +struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SqueezeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SQUEEZE_DIMS = 4 + }; + const ::flatbuffers::Vector *squeeze_dims() const { + return GetPointer *>(VT_SQUEEZE_DIMS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SQUEEZE_DIMS) && + verifier.VerifyVector(squeeze_dims()) && + verifier.EndTable(); + } +}; + +struct SqueezeOptionsBuilder { + typedef SqueezeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_squeeze_dims(::flatbuffers::Offset<::flatbuffers::Vector> squeeze_dims) { + fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims); + } + explicit SqueezeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSqueezeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> squeeze_dims = 0) { + SqueezeOptionsBuilder builder_(_fbb); + builder_.add_squeeze_dims(squeeze_dims); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateSqueezeOptionsDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *squeeze_dims = nullptr) { + auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector(*squeeze_dims) : 0; + return opencv_tflite::CreateSqueezeOptions( + _fbb, + squeeze_dims__); +} + +struct SplitOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SplitOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { + return GetField(VT_NUM_SPLITS, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_SPLITS, 4) && + verifier.EndTable(); + } +}; + +struct SplitOptionsBuilder { + typedef SplitOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) { + fbb_.AddElement(SplitOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSplitOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) { + SplitOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SplitVOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { + return GetField(VT_NUM_SPLITS, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_SPLITS, 4) && + verifier.EndTable(); + } +}; + +struct SplitVOptionsBuilder { + typedef SplitVOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) { + fbb_.AddElement(SplitVOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitVOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSplitVOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) { + SplitVOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef StridedSliceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BEGIN_MASK = 4, + VT_END_MASK = 6, + VT_ELLIPSIS_MASK = 8, + VT_NEW_AXIS_MASK = 10, + VT_SHRINK_AXIS_MASK = 12 + }; + int32_t begin_mask() const { + return GetField(VT_BEGIN_MASK, 0); + } + int32_t end_mask() const { + return GetField(VT_END_MASK, 0); + } + int32_t ellipsis_mask() const { + return GetField(VT_ELLIPSIS_MASK, 0); + } + int32_t new_axis_mask() const { + return GetField(VT_NEW_AXIS_MASK, 0); + } + int32_t shrink_axis_mask() const { + return GetField(VT_SHRINK_AXIS_MASK, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BEGIN_MASK, 4) && + VerifyField(verifier, VT_END_MASK, 4) && + VerifyField(verifier, VT_ELLIPSIS_MASK, 4) && + VerifyField(verifier, VT_NEW_AXIS_MASK, 4) && + VerifyField(verifier, VT_SHRINK_AXIS_MASK, 4) && + verifier.EndTable(); + } +}; + +struct StridedSliceOptionsBuilder { + typedef StridedSliceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_begin_mask(int32_t begin_mask) { + fbb_.AddElement(StridedSliceOptions::VT_BEGIN_MASK, begin_mask, 0); + } + void add_end_mask(int32_t end_mask) { + fbb_.AddElement(StridedSliceOptions::VT_END_MASK, end_mask, 0); + } + void add_ellipsis_mask(int32_t ellipsis_mask) { + fbb_.AddElement(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0); + } + void add_new_axis_mask(int32_t new_axis_mask) { + fbb_.AddElement(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0); + } + void add_shrink_axis_mask(int32_t shrink_axis_mask) { + fbb_.AddElement(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0); + } + explicit StridedSliceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateStridedSliceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t ellipsis_mask = 0, + int32_t new_axis_mask = 0, + int32_t shrink_axis_mask = 0) { + StridedSliceOptionsBuilder builder_(_fbb); + builder_.add_shrink_axis_mask(shrink_axis_mask); + builder_.add_new_axis_mask(new_axis_mask); + builder_.add_ellipsis_mask(ellipsis_mask); + builder_.add_end_mask(end_mask); + builder_.add_begin_mask(begin_mask); + return builder_.Finish(); +} + +struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LogSoftmaxOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct LogSoftmaxOptionsBuilder { + typedef LogSoftmaxOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit LogSoftmaxOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLogSoftmaxOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + LogSoftmaxOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct CastOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef CastOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_IN_DATA_TYPE = 4, + VT_OUT_DATA_TYPE = 6 + }; + opencv_tflite::TensorType in_data_type() const { + return static_cast(GetField(VT_IN_DATA_TYPE, 0)); + } + opencv_tflite::TensorType out_data_type() const { + return static_cast(GetField(VT_OUT_DATA_TYPE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_IN_DATA_TYPE, 1) && + VerifyField(verifier, VT_OUT_DATA_TYPE, 1) && + verifier.EndTable(); + } +}; + +struct CastOptionsBuilder { + typedef CastOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_in_data_type(opencv_tflite::TensorType in_data_type) { + fbb_.AddElement(CastOptions::VT_IN_DATA_TYPE, static_cast(in_data_type), 0); + } + void add_out_data_type(opencv_tflite::TensorType out_data_type) { + fbb_.AddElement(CastOptions::VT_OUT_DATA_TYPE, static_cast(out_data_type), 0); + } + explicit CastOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateCastOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::TensorType in_data_type = opencv_tflite::TensorType_FLOAT32, + opencv_tflite::TensorType out_data_type = opencv_tflite::TensorType_FLOAT32) { + CastOptionsBuilder builder_(_fbb); + builder_.add_out_data_type(out_data_type); + builder_.add_in_data_type(in_data_type); + return builder_.Finish(); +} + +struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DequantizeOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct DequantizeOptionsBuilder { + typedef DequantizeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit DequantizeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDequantizeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + DequantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MaximumMinimumOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct MaximumMinimumOptionsBuilder { + typedef MaximumMinimumOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit MaximumMinimumOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMaximumMinimumOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + MaximumMinimumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct TileOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TileOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct TileOptionsBuilder { + typedef TileOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit TileOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTileOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + TileOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ArgMaxOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OUTPUT_TYPE = 4 + }; + opencv_tflite::TensorType output_type() const { + return static_cast(GetField(VT_OUTPUT_TYPE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUTPUT_TYPE, 1) && + verifier.EndTable(); + } +}; + +struct ArgMaxOptionsBuilder { + typedef ArgMaxOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_output_type(opencv_tflite::TensorType output_type) { + fbb_.AddElement(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast(output_type), 0); + } + explicit ArgMaxOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateArgMaxOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::TensorType output_type = opencv_tflite::TensorType_FLOAT32) { + ArgMaxOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ArgMinOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OUTPUT_TYPE = 4 + }; + opencv_tflite::TensorType output_type() const { + return static_cast(GetField(VT_OUTPUT_TYPE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUTPUT_TYPE, 1) && + verifier.EndTable(); + } +}; + +struct ArgMinOptionsBuilder { + typedef ArgMinOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_output_type(opencv_tflite::TensorType output_type) { + fbb_.AddElement(ArgMinOptions::VT_OUTPUT_TYPE, static_cast(output_type), 0); + } + explicit ArgMinOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateArgMinOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::TensorType output_type = opencv_tflite::TensorType_FLOAT32) { + ArgMinOptionsBuilder builder_(_fbb); + builder_.add_output_type(output_type); + return builder_.Finish(); +} + +struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef GreaterOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct GreaterOptionsBuilder { + typedef GreaterOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit GreaterOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateGreaterOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + GreaterOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef GreaterEqualOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct GreaterEqualOptionsBuilder { + typedef GreaterEqualOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit GreaterEqualOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateGreaterEqualOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + GreaterEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LessOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LessOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct LessOptionsBuilder { + typedef LessOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit LessOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLessOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + LessOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LessEqualOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct LessEqualOptionsBuilder { + typedef LessEqualOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit LessEqualOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLessEqualOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + LessEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NegOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef NegOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct NegOptionsBuilder { + typedef NegOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit NegOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateNegOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + NegOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SelectOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SelectOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SelectOptionsBuilder { + typedef SelectOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SelectOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSelectOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SelectOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SliceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SliceOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SliceOptionsBuilder { + typedef SliceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SliceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSliceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SliceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TransposeConvOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_PADDING = 4, + VT_STRIDE_W = 6, + VT_STRIDE_H = 8, + VT_FUSED_ACTIVATION_FUNCTION = 10 + }; + opencv_tflite::Padding padding() const { + return static_cast(GetField(VT_PADDING, 0)); + } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + opencv_tflite::ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_PADDING, 1) && + VerifyField(verifier, VT_STRIDE_W, 4) && + VerifyField(verifier, VT_STRIDE_H, 4) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION, 1) && + verifier.EndTable(); + } +}; + +struct TransposeConvOptionsBuilder { + typedef TransposeConvOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_padding(opencv_tflite::Padding padding) { + fbb_.AddElement(TransposeConvOptions::VT_PADDING, static_cast(padding), 0); + } + void add_stride_w(int32_t stride_w) { + fbb_.AddElement(TransposeConvOptions::VT_STRIDE_W, stride_w, 0); + } + void add_stride_h(int32_t stride_h) { + fbb_.AddElement(TransposeConvOptions::VT_STRIDE_H, stride_h, 0); + } + void add_fused_activation_function(opencv_tflite::ActivationFunctionType fused_activation_function) { + fbb_.AddElement(TransposeConvOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit TransposeConvOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTransposeConvOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::Padding padding = opencv_tflite::Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + opencv_tflite::ActivationFunctionType fused_activation_function = opencv_tflite::ActivationFunctionType_NONE) { + TransposeConvOptionsBuilder builder_(_fbb); + builder_.add_stride_h(stride_h); + builder_.add_stride_w(stride_w); + builder_.add_fused_activation_function(fused_activation_function); + builder_.add_padding(padding); + return builder_.Finish(); +} + +struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ExpandDimsOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ExpandDimsOptionsBuilder { + typedef ExpandDimsOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ExpandDimsOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateExpandDimsOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ExpandDimsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SparseToDenseOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALIDATE_INDICES = 4 + }; + bool validate_indices() const { + return GetField(VT_VALIDATE_INDICES, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VALIDATE_INDICES, 1) && + verifier.EndTable(); + } +}; + +struct SparseToDenseOptionsBuilder { + typedef SparseToDenseOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_validate_indices(bool validate_indices) { + fbb_.AddElement(SparseToDenseOptions::VT_VALIDATE_INDICES, static_cast(validate_indices), 0); + } + explicit SparseToDenseOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSparseToDenseOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool validate_indices = false) { + SparseToDenseOptionsBuilder builder_(_fbb); + builder_.add_validate_indices(validate_indices); + return builder_.Finish(); +} + +struct EqualOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef EqualOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct EqualOptionsBuilder { + typedef EqualOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit EqualOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateEqualOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + EqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef NotEqualOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct NotEqualOptionsBuilder { + typedef NotEqualOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit NotEqualOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateNotEqualOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + NotEqualOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ShapeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OUT_TYPE = 4 + }; + opencv_tflite::TensorType out_type() const { + return static_cast(GetField(VT_OUT_TYPE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OUT_TYPE, 1) && + verifier.EndTable(); + } +}; + +struct ShapeOptionsBuilder { + typedef ShapeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_out_type(opencv_tflite::TensorType out_type) { + fbb_.AddElement(ShapeOptions::VT_OUT_TYPE, static_cast(out_type), 0); + } + explicit ShapeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateShapeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::TensorType out_type = opencv_tflite::TensorType_FLOAT32) { + ShapeOptionsBuilder builder_(_fbb); + builder_.add_out_type(out_type); + return builder_.Finish(); +} + +struct RankOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RankOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct RankOptionsBuilder { + typedef RankOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit RankOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRankOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + RankOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct PowOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef PowOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct PowOptionsBuilder { + typedef PowOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit PowOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatePowOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + PowOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FakeQuantOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_MIN = 4, + VT_MAX = 6, + VT_NUM_BITS = 8, + VT_NARROW_RANGE = 10 + }; + float min() const { + return GetField(VT_MIN, 0.0f); + } + float max() const { + return GetField(VT_MAX, 0.0f); + } + int32_t num_bits() const { + return GetField(VT_NUM_BITS, 0); + } + bool narrow_range() const { + return GetField(VT_NARROW_RANGE, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_MIN, 4) && + VerifyField(verifier, VT_MAX, 4) && + VerifyField(verifier, VT_NUM_BITS, 4) && + VerifyField(verifier, VT_NARROW_RANGE, 1) && + verifier.EndTable(); + } +}; + +struct FakeQuantOptionsBuilder { + typedef FakeQuantOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_min(float min) { + fbb_.AddElement(FakeQuantOptions::VT_MIN, min, 0.0f); + } + void add_max(float max) { + fbb_.AddElement(FakeQuantOptions::VT_MAX, max, 0.0f); + } + void add_num_bits(int32_t num_bits) { + fbb_.AddElement(FakeQuantOptions::VT_NUM_BITS, num_bits, 0); + } + void add_narrow_range(bool narrow_range) { + fbb_.AddElement(FakeQuantOptions::VT_NARROW_RANGE, static_cast(narrow_range), 0); + } + explicit FakeQuantOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFakeQuantOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + float min = 0.0f, + float max = 0.0f, + int32_t num_bits = 0, + bool narrow_range = false) { + FakeQuantOptionsBuilder builder_(_fbb); + builder_.add_num_bits(num_bits); + builder_.add_max(max); + builder_.add_min(min); + builder_.add_narrow_range(narrow_range); + return builder_.Finish(); +} + +struct PackOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef PackOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VALUES_COUNT = 4, + VT_AXIS = 6 + }; + int32_t values_count() const { + return GetField(VT_VALUES_COUNT, 0); + } + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VALUES_COUNT, 4) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } +}; + +struct PackOptionsBuilder { + typedef PackOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_values_count(int32_t values_count) { + fbb_.AddElement(PackOptions::VT_VALUES_COUNT, values_count, 0); + } + void add_axis(int32_t axis) { + fbb_.AddElement(PackOptions::VT_AXIS, axis, 0); + } + explicit PackOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreatePackOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t values_count = 0, + int32_t axis = 0) { + PackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_values_count(values_count); + return builder_.Finish(); +} + +struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LogicalOrOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct LogicalOrOptionsBuilder { + typedef LogicalOrOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit LogicalOrOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLogicalOrOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + LogicalOrOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef OneHotOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_AXIS = 4 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } +}; + +struct OneHotOptionsBuilder { + typedef OneHotOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_axis(int32_t axis) { + fbb_.AddElement(OneHotOptions::VT_AXIS, axis, 0); + } + explicit OneHotOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateOneHotOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) { + OneHotOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + return builder_.Finish(); +} + +struct AbsOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef AbsOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct AbsOptionsBuilder { + typedef AbsOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit AbsOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateAbsOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + AbsOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef HardSwishOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct HardSwishOptionsBuilder { + typedef HardSwishOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit HardSwishOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateHardSwishOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + HardSwishOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LogicalAndOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct LogicalAndOptionsBuilder { + typedef LogicalAndOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit LogicalAndOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLogicalAndOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + LogicalAndOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LogicalNotOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct LogicalNotOptionsBuilder { + typedef LogicalNotOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit LogicalNotOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLogicalNotOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + LogicalNotOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnpackOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NUM = 4, + VT_AXIS = 6 + }; + int32_t num() const { + return GetField(VT_NUM, 0); + } + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM, 4) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } +}; + +struct UnpackOptionsBuilder { + typedef UnpackOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_num(int32_t num) { + fbb_.AddElement(UnpackOptions::VT_NUM, num, 0); + } + void add_axis(int32_t axis) { + fbb_.AddElement(UnpackOptions::VT_AXIS, axis, 0); + } + explicit UnpackOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnpackOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t num = 0, + int32_t axis = 0) { + UnpackOptionsBuilder builder_(_fbb); + builder_.add_axis(axis); + builder_.add_num(num); + return builder_.Finish(); +} + +struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FloorDivOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct FloorDivOptionsBuilder { + typedef FloorDivOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit FloorDivOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFloorDivOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + FloorDivOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SquareOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SquareOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SquareOptionsBuilder { + typedef SquareOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SquareOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSquareOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SquareOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ZerosLikeOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ZerosLikeOptionsBuilder { + typedef ZerosLikeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ZerosLikeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateZerosLikeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ZerosLikeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FillOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FillOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct FillOptionsBuilder { + typedef FillOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit FillOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFillOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + FillOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FloorModOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct FloorModOptionsBuilder { + typedef FloorModOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit FloorModOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFloorModOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + FloorModOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct RangeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RangeOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct RangeOptionsBuilder { + typedef RangeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit RangeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRangeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + RangeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef LeakyReluOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ALPHA = 4 + }; + float alpha() const { + return GetField(VT_ALPHA, 0.0f); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ALPHA, 4) && + verifier.EndTable(); + } +}; + +struct LeakyReluOptionsBuilder { + typedef LeakyReluOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_alpha(float alpha) { + fbb_.AddElement(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); + } + explicit LeakyReluOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateLeakyReluOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + float alpha = 0.0f) { + LeakyReluOptionsBuilder builder_(_fbb); + builder_.add_alpha(alpha); + return builder_.Finish(); +} + +struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SquaredDifferenceOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SquaredDifferenceOptionsBuilder { + typedef SquaredDifferenceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SquaredDifferenceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSquaredDifferenceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SquaredDifferenceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MirrorPadOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_MODE = 4 + }; + opencv_tflite::MirrorPadMode mode() const { + return static_cast(GetField(VT_MODE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_MODE, 1) && + verifier.EndTable(); + } +}; + +struct MirrorPadOptionsBuilder { + typedef MirrorPadOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_mode(opencv_tflite::MirrorPadMode mode) { + fbb_.AddElement(MirrorPadOptions::VT_MODE, static_cast(mode), 0); + } + explicit MirrorPadOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMirrorPadOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::MirrorPadMode mode = opencv_tflite::MirrorPadMode_REFLECT) { + MirrorPadOptionsBuilder builder_(_fbb); + builder_.add_mode(mode); + return builder_.Finish(); +} + +struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UniqueOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_IDX_OUT_TYPE = 4 + }; + opencv_tflite::TensorType idx_out_type() const { + return static_cast(GetField(VT_IDX_OUT_TYPE, 2)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_IDX_OUT_TYPE, 1) && + verifier.EndTable(); + } +}; + +struct UniqueOptionsBuilder { + typedef UniqueOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_idx_out_type(opencv_tflite::TensorType idx_out_type) { + fbb_.AddElement(UniqueOptions::VT_IDX_OUT_TYPE, static_cast(idx_out_type), 2); + } + explicit UniqueOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUniqueOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + opencv_tflite::TensorType idx_out_type = opencv_tflite::TensorType_INT32) { + UniqueOptionsBuilder builder_(_fbb); + builder_.add_idx_out_type(idx_out_type); + return builder_.Finish(); +} + +struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ReverseV2OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ReverseV2OptionsBuilder { + typedef ReverseV2Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ReverseV2OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateReverseV2Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ReverseV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct AddNOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef AddNOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct AddNOptionsBuilder { + typedef AddNOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit AddNOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateAddNOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + AddNOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef GatherNdOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct GatherNdOptionsBuilder { + typedef GatherNdOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit GatherNdOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateGatherNdOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + GatherNdOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct WhereOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef WhereOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct WhereOptionsBuilder { + typedef WhereOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit WhereOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateWhereOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + WhereOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ReverseSequenceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SEQ_DIM = 4, + VT_BATCH_DIM = 6 + }; + int32_t seq_dim() const { + return GetField(VT_SEQ_DIM, 0); + } + int32_t batch_dim() const { + return GetField(VT_BATCH_DIM, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SEQ_DIM, 4) && + VerifyField(verifier, VT_BATCH_DIM, 4) && + verifier.EndTable(); + } +}; + +struct ReverseSequenceOptionsBuilder { + typedef ReverseSequenceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_seq_dim(int32_t seq_dim) { + fbb_.AddElement(ReverseSequenceOptions::VT_SEQ_DIM, seq_dim, 0); + } + void add_batch_dim(int32_t batch_dim) { + fbb_.AddElement(ReverseSequenceOptions::VT_BATCH_DIM, batch_dim, 0); + } + explicit ReverseSequenceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateReverseSequenceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t seq_dim = 0, + int32_t batch_dim = 0) { + ReverseSequenceOptionsBuilder builder_(_fbb); + builder_.add_batch_dim(batch_dim); + builder_.add_seq_dim(seq_dim); + return builder_.Finish(); +} + +struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MatrixDiagOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct MatrixDiagOptionsBuilder { + typedef MatrixDiagOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit MatrixDiagOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMatrixDiagOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + MatrixDiagOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef QuantizeOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct QuantizeOptionsBuilder { + typedef QuantizeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit QuantizeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateQuantizeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + QuantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MatrixSetDiagOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct MatrixSetDiagOptionsBuilder { + typedef MatrixSetDiagOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit MatrixSetDiagOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMatrixSetDiagOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + MatrixSetDiagOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct IfOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef IfOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_THEN_SUBGRAPH_INDEX = 4, + VT_ELSE_SUBGRAPH_INDEX = 6 + }; + int32_t then_subgraph_index() const { + return GetField(VT_THEN_SUBGRAPH_INDEX, 0); + } + int32_t else_subgraph_index() const { + return GetField(VT_ELSE_SUBGRAPH_INDEX, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_THEN_SUBGRAPH_INDEX, 4) && + VerifyField(verifier, VT_ELSE_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } +}; + +struct IfOptionsBuilder { + typedef IfOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_then_subgraph_index(int32_t then_subgraph_index) { + fbb_.AddElement(IfOptions::VT_THEN_SUBGRAPH_INDEX, then_subgraph_index, 0); + } + void add_else_subgraph_index(int32_t else_subgraph_index) { + fbb_.AddElement(IfOptions::VT_ELSE_SUBGRAPH_INDEX, else_subgraph_index, 0); + } + explicit IfOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateIfOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t then_subgraph_index = 0, + int32_t else_subgraph_index = 0) { + IfOptionsBuilder builder_(_fbb); + builder_.add_else_subgraph_index(else_subgraph_index); + builder_.add_then_subgraph_index(then_subgraph_index); + return builder_.Finish(); +} + +struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef CallOnceOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_INIT_SUBGRAPH_INDEX = 4 + }; + int32_t init_subgraph_index() const { + return GetField(VT_INIT_SUBGRAPH_INDEX, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_INIT_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } +}; + +struct CallOnceOptionsBuilder { + typedef CallOnceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_init_subgraph_index(int32_t init_subgraph_index) { + fbb_.AddElement(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0); + } + explicit CallOnceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateCallOnceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t init_subgraph_index = 0) { + CallOnceOptionsBuilder builder_(_fbb); + builder_.add_init_subgraph_index(init_subgraph_index); + return builder_.Finish(); +} + +struct WhileOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef WhileOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_COND_SUBGRAPH_INDEX = 4, + VT_BODY_SUBGRAPH_INDEX = 6 + }; + int32_t cond_subgraph_index() const { + return GetField(VT_COND_SUBGRAPH_INDEX, 0); + } + int32_t body_subgraph_index() const { + return GetField(VT_BODY_SUBGRAPH_INDEX, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_COND_SUBGRAPH_INDEX, 4) && + VerifyField(verifier, VT_BODY_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } +}; + +struct WhileOptionsBuilder { + typedef WhileOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_cond_subgraph_index(int32_t cond_subgraph_index) { + fbb_.AddElement(WhileOptions::VT_COND_SUBGRAPH_INDEX, cond_subgraph_index, 0); + } + void add_body_subgraph_index(int32_t body_subgraph_index) { + fbb_.AddElement(WhileOptions::VT_BODY_SUBGRAPH_INDEX, body_subgraph_index, 0); + } + explicit WhileOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateWhileOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t cond_subgraph_index = 0, + int32_t body_subgraph_index = 0) { + WhileOptionsBuilder builder_(_fbb); + builder_.add_body_subgraph_index(body_subgraph_index); + builder_.add_cond_subgraph_index(cond_subgraph_index); + return builder_.Finish(); +} + +struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef NonMaxSuppressionV4OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct NonMaxSuppressionV4OptionsBuilder { + typedef NonMaxSuppressionV4Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit NonMaxSuppressionV4OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateNonMaxSuppressionV4Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + NonMaxSuppressionV4OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef NonMaxSuppressionV5OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct NonMaxSuppressionV5OptionsBuilder { + typedef NonMaxSuppressionV5Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit NonMaxSuppressionV5OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateNonMaxSuppressionV5Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + NonMaxSuppressionV5OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ScatterNdOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ScatterNdOptionsBuilder { + typedef ScatterNdOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ScatterNdOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateScatterNdOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ScatterNdOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SelectV2OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SelectV2OptionsBuilder { + typedef SelectV2Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SelectV2OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSelectV2Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SelectV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DensifyOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct DensifyOptionsBuilder { + typedef DensifyOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit DensifyOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDensifyOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + DensifyOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SegmentSumOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SegmentSumOptionsBuilder { + typedef SegmentSumOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SegmentSumOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSegmentSumOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SegmentSumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BatchMatMulOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ADJ_X = 4, + VT_ADJ_Y = 6, + VT_ASYMMETRIC_QUANTIZE_INPUTS = 8 + }; + bool adj_x() const { + return GetField(VT_ADJ_X, 0) != 0; + } + bool adj_y() const { + return GetField(VT_ADJ_Y, 0) != 0; + } + bool asymmetric_quantize_inputs() const { + return GetField(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_ADJ_X, 1) && + VerifyField(verifier, VT_ADJ_Y, 1) && + VerifyField(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS, 1) && + verifier.EndTable(); + } +}; + +struct BatchMatMulOptionsBuilder { + typedef BatchMatMulOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_adj_x(bool adj_x) { + fbb_.AddElement(BatchMatMulOptions::VT_ADJ_X, static_cast(adj_x), 0); + } + void add_adj_y(bool adj_y) { + fbb_.AddElement(BatchMatMulOptions::VT_ADJ_Y, static_cast(adj_y), 0); + } + void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs) { + fbb_.AddElement(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS, static_cast(asymmetric_quantize_inputs), 0); + } + explicit BatchMatMulOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBatchMatMulOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool adj_x = false, + bool adj_y = false, + bool asymmetric_quantize_inputs = false) { + BatchMatMulOptionsBuilder builder_(_fbb); + builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs); + builder_.add_adj_y(adj_y); + builder_.add_adj_x(adj_x); + return builder_.Finish(); +} + +struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef CumsumOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_EXCLUSIVE = 4, + VT_REVERSE = 6 + }; + bool exclusive() const { + return GetField(VT_EXCLUSIVE, 0) != 0; + } + bool reverse() const { + return GetField(VT_REVERSE, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_EXCLUSIVE, 1) && + VerifyField(verifier, VT_REVERSE, 1) && + verifier.EndTable(); + } +}; + +struct CumsumOptionsBuilder { + typedef CumsumOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_exclusive(bool exclusive) { + fbb_.AddElement(CumsumOptions::VT_EXCLUSIVE, static_cast(exclusive), 0); + } + void add_reverse(bool reverse) { + fbb_.AddElement(CumsumOptions::VT_REVERSE, static_cast(reverse), 0); + } + explicit CumsumOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateCumsumOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool exclusive = false, + bool reverse = false) { + CumsumOptionsBuilder builder_(_fbb); + builder_.add_reverse(reverse); + builder_.add_exclusive(exclusive); + return builder_.Finish(); +} + +struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BroadcastToOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct BroadcastToOptionsBuilder { + typedef BroadcastToOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit BroadcastToOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBroadcastToOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + BroadcastToOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef Rfft2dOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct Rfft2dOptionsBuilder { + typedef Rfft2dOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit Rfft2dOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRfft2dOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + Rfft2dOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef HashtableOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TABLE_ID = 4, + VT_KEY_DTYPE = 6, + VT_VALUE_DTYPE = 8 + }; + int32_t table_id() const { + return GetField(VT_TABLE_ID, 0); + } + opencv_tflite::TensorType key_dtype() const { + return static_cast(GetField(VT_KEY_DTYPE, 0)); + } + opencv_tflite::TensorType value_dtype() const { + return static_cast(GetField(VT_VALUE_DTYPE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TABLE_ID, 4) && + VerifyField(verifier, VT_KEY_DTYPE, 1) && + VerifyField(verifier, VT_VALUE_DTYPE, 1) && + verifier.EndTable(); + } +}; + +struct HashtableOptionsBuilder { + typedef HashtableOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_table_id(int32_t table_id) { + fbb_.AddElement(HashtableOptions::VT_TABLE_ID, table_id, 0); + } + void add_key_dtype(opencv_tflite::TensorType key_dtype) { + fbb_.AddElement(HashtableOptions::VT_KEY_DTYPE, static_cast(key_dtype), 0); + } + void add_value_dtype(opencv_tflite::TensorType value_dtype) { + fbb_.AddElement(HashtableOptions::VT_VALUE_DTYPE, static_cast(value_dtype), 0); + } + explicit HashtableOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateHashtableOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int32_t table_id = 0, + opencv_tflite::TensorType key_dtype = opencv_tflite::TensorType_FLOAT32, + opencv_tflite::TensorType value_dtype = opencv_tflite::TensorType_FLOAT32) { + HashtableOptionsBuilder builder_(_fbb); + builder_.add_table_id(table_id); + builder_.add_value_dtype(value_dtype); + builder_.add_key_dtype(key_dtype); + return builder_.Finish(); +} + +struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef HashtableFindOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct HashtableFindOptionsBuilder { + typedef HashtableFindOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit HashtableFindOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateHashtableFindOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + HashtableFindOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef HashtableImportOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct HashtableImportOptionsBuilder { + typedef HashtableImportOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit HashtableImportOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateHashtableImportOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + HashtableImportOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef HashtableSizeOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct HashtableSizeOptionsBuilder { + typedef HashtableSizeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit HashtableSizeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateHashtableSizeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + HashtableSizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef VarHandleOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_CONTAINER = 4, + VT_SHARED_NAME = 6 + }; + const ::flatbuffers::String *container() const { + return GetPointer(VT_CONTAINER); + } + const ::flatbuffers::String *shared_name() const { + return GetPointer(VT_SHARED_NAME); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_CONTAINER) && + verifier.VerifyString(container()) && + VerifyOffset(verifier, VT_SHARED_NAME) && + verifier.VerifyString(shared_name()) && + verifier.EndTable(); + } +}; + +struct VarHandleOptionsBuilder { + typedef VarHandleOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_container(::flatbuffers::Offset<::flatbuffers::String> container) { + fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container); + } + void add_shared_name(::flatbuffers::Offset<::flatbuffers::String> shared_name) { + fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name); + } + explicit VarHandleOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateVarHandleOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> container = 0, + ::flatbuffers::Offset<::flatbuffers::String> shared_name = 0) { + VarHandleOptionsBuilder builder_(_fbb); + builder_.add_shared_name(shared_name); + builder_.add_container(container); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateVarHandleOptionsDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *container = nullptr, + const char *shared_name = nullptr) { + auto container__ = container ? _fbb.CreateString(container) : 0; + auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0; + return opencv_tflite::CreateVarHandleOptions( + _fbb, + container__, + shared_name__); +} + +struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ReadVariableOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ReadVariableOptionsBuilder { + typedef ReadVariableOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ReadVariableOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateReadVariableOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ReadVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef AssignVariableOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct AssignVariableOptionsBuilder { + typedef AssignVariableOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit AssignVariableOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateAssignVariableOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + AssignVariableOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct RandomOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef RandomOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_SEED = 4, + VT_SEED2 = 6 + }; + int64_t seed() const { + return GetField(VT_SEED, 0); + } + int64_t seed2() const { + return GetField(VT_SEED2, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_SEED, 8) && + VerifyField(verifier, VT_SEED2, 8) && + verifier.EndTable(); + } +}; + +struct RandomOptionsBuilder { + typedef RandomOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_seed(int64_t seed) { + fbb_.AddElement(RandomOptions::VT_SEED, seed, 0); + } + void add_seed2(int64_t seed2) { + fbb_.AddElement(RandomOptions::VT_SEED2, seed2, 0); + } + explicit RandomOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateRandomOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + int64_t seed = 0, + int64_t seed2 = 0) { + RandomOptionsBuilder builder_(_fbb); + builder_.add_seed2(seed2); + builder_.add_seed(seed); + return builder_.Finish(); +} + +struct BucketizeOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BucketizeOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BOUNDARIES = 4 + }; + const ::flatbuffers::Vector *boundaries() const { + return GetPointer *>(VT_BOUNDARIES); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_BOUNDARIES) && + verifier.VerifyVector(boundaries()) && + verifier.EndTable(); + } +}; + +struct BucketizeOptionsBuilder { + typedef BucketizeOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_boundaries(::flatbuffers::Offset<::flatbuffers::Vector> boundaries) { + fbb_.AddOffset(BucketizeOptions::VT_BOUNDARIES, boundaries); + } + explicit BucketizeOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBucketizeOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> boundaries = 0) { + BucketizeOptionsBuilder builder_(_fbb); + builder_.add_boundaries(boundaries); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateBucketizeOptionsDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *boundaries = nullptr) { + auto boundaries__ = boundaries ? _fbb.CreateVector(*boundaries) : 0; + return opencv_tflite::CreateBucketizeOptions( + _fbb, + boundaries__); +} + +struct GeluOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef GeluOptionsBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_APPROXIMATE = 4 + }; + bool approximate() const { + return GetField(VT_APPROXIMATE, 0) != 0; + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_APPROXIMATE, 1) && + verifier.EndTable(); + } +}; + +struct GeluOptionsBuilder { + typedef GeluOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_approximate(bool approximate) { + fbb_.AddElement(GeluOptions::VT_APPROXIMATE, static_cast(approximate), 0); + } + explicit GeluOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateGeluOptions( + ::flatbuffers::FlatBufferBuilder &_fbb, + bool approximate = false) { + GeluOptionsBuilder builder_(_fbb); + builder_.add_approximate(approximate); + return builder_.Finish(); +} + +struct DynamicUpdateSliceOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DynamicUpdateSliceOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct DynamicUpdateSliceOptionsBuilder { + typedef DynamicUpdateSliceOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit DynamicUpdateSliceOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDynamicUpdateSliceOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + DynamicUpdateSliceOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnsortedSegmentProdOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnsortedSegmentProdOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct UnsortedSegmentProdOptionsBuilder { + typedef UnsortedSegmentProdOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit UnsortedSegmentProdOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnsortedSegmentProdOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentProdOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnsortedSegmentMaxOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnsortedSegmentMaxOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct UnsortedSegmentMaxOptionsBuilder { + typedef UnsortedSegmentMaxOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit UnsortedSegmentMaxOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnsortedSegmentMaxOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentMaxOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnsortedSegmentSumOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnsortedSegmentSumOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct UnsortedSegmentSumOptionsBuilder { + typedef UnsortedSegmentSumOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit UnsortedSegmentSumOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnsortedSegmentSumOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentSumOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct ATan2Options FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ATan2OptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct ATan2OptionsBuilder { + typedef ATan2Options Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit ATan2OptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateATan2Options( + ::flatbuffers::FlatBufferBuilder &_fbb) { + ATan2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct UnsortedSegmentMinOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef UnsortedSegmentMinOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct UnsortedSegmentMinOptionsBuilder { + typedef UnsortedSegmentMinOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit UnsortedSegmentMinOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateUnsortedSegmentMinOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + UnsortedSegmentMinOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct SignOptions FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SignOptionsBuilder Builder; + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } +}; + +struct SignOptionsBuilder { + typedef SignOptions Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + explicit SignOptionsBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSignOptions( + ::flatbuffers::FlatBufferBuilder &_fbb) { + SignOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +struct OperatorCode FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef OperatorCodeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DEPRECATED_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6, + VT_VERSION = 8, + VT_BUILTIN_CODE = 10 + }; + int8_t deprecated_builtin_code() const { + return GetField(VT_DEPRECATED_BUILTIN_CODE, 0); + } + const ::flatbuffers::String *custom_code() const { + return GetPointer(VT_CUSTOM_CODE); + } + int32_t version() const { + return GetField(VT_VERSION, 1); + } + opencv_tflite::BuiltinOperator builtin_code() const { + return static_cast(GetField(VT_BUILTIN_CODE, 0)); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_DEPRECATED_BUILTIN_CODE, 1) && + VerifyOffset(verifier, VT_CUSTOM_CODE) && + verifier.VerifyString(custom_code()) && + VerifyField(verifier, VT_VERSION, 4) && + VerifyField(verifier, VT_BUILTIN_CODE, 4) && + verifier.EndTable(); + } +}; + +struct OperatorCodeBuilder { + typedef OperatorCode Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_deprecated_builtin_code(int8_t deprecated_builtin_code) { + fbb_.AddElement(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0); + } + void add_custom_code(::flatbuffers::Offset<::flatbuffers::String> custom_code) { + fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code); + } + void add_version(int32_t version) { + fbb_.AddElement(OperatorCode::VT_VERSION, version, 1); + } + void add_builtin_code(opencv_tflite::BuiltinOperator builtin_code) { + fbb_.AddElement(OperatorCode::VT_BUILTIN_CODE, static_cast(builtin_code), 0); + } + explicit OperatorCodeBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateOperatorCode( + ::flatbuffers::FlatBufferBuilder &_fbb, + int8_t deprecated_builtin_code = 0, + ::flatbuffers::Offset<::flatbuffers::String> custom_code = 0, + int32_t version = 1, + opencv_tflite::BuiltinOperator builtin_code = opencv_tflite::BuiltinOperator_ADD) { + OperatorCodeBuilder builder_(_fbb); + builder_.add_builtin_code(builtin_code); + builder_.add_version(version); + builder_.add_custom_code(custom_code); + builder_.add_deprecated_builtin_code(deprecated_builtin_code); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateOperatorCodeDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + int8_t deprecated_builtin_code = 0, + const char *custom_code = nullptr, + int32_t version = 1, + opencv_tflite::BuiltinOperator builtin_code = opencv_tflite::BuiltinOperator_ADD) { + auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0; + return opencv_tflite::CreateOperatorCode( + _fbb, + deprecated_builtin_code, + custom_code__, + version, + builtin_code); +} + +struct Operator FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef OperatorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_OPCODE_INDEX = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_BUILTIN_OPTIONS_TYPE = 10, + VT_BUILTIN_OPTIONS = 12, + VT_CUSTOM_OPTIONS = 14, + VT_CUSTOM_OPTIONS_FORMAT = 16, + VT_MUTATING_VARIABLE_INPUTS = 18, + VT_INTERMEDIATES = 20 + }; + uint32_t opcode_index() const { + return GetField(VT_OPCODE_INDEX, 0); + } + const ::flatbuffers::Vector *inputs() const { + return GetPointer *>(VT_INPUTS); + } + const ::flatbuffers::Vector *outputs() const { + return GetPointer *>(VT_OUTPUTS); + } + opencv_tflite::BuiltinOptions builtin_options_type() const { + return static_cast(GetField(VT_BUILTIN_OPTIONS_TYPE, 0)); + } + const void *builtin_options() const { + return GetPointer(VT_BUILTIN_OPTIONS); + } + template const T *builtin_options_as() const; + const opencv_tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_Conv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_DepthwiseConv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ConcatEmbeddingsOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LSHProjectionOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_Pool2DOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SVDFOptions *builtin_options_as_SVDFOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SVDFOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::RNNOptions *builtin_options_as_RNNOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_RNNOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_FullyConnectedOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SoftmaxOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ConcatenationOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::AddOptions *builtin_options_as_AddOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_AddOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::L2NormOptions *builtin_options_as_L2NormOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_L2NormOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LocalResponseNormalizationOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LSTMOptions *builtin_options_as_LSTMOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ResizeBilinearOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::CallOptions *builtin_options_as_CallOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_CallOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ReshapeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SkipGramOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SpaceToDepthOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::MulOptions *builtin_options_as_MulOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_MulOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::PadOptions *builtin_options_as_PadOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_PadOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::GatherOptions *builtin_options_as_GatherOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_GatherOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_BatchToSpaceNDOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SpaceToBatchNDOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::TransposeOptions *builtin_options_as_TransposeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_TransposeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ReducerOptions *builtin_options_as_ReducerOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ReducerOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SubOptions *builtin_options_as_SubOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SubOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::DivOptions *builtin_options_as_DivOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_DivOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SqueezeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SequenceRNNOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_StridedSliceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ExpOptions *builtin_options_as_ExpOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ExpOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::TopKV2Options *builtin_options_as_TopKV2Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_TopKV2Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SplitOptions *builtin_options_as_SplitOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SplitOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LogSoftmaxOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::CastOptions *builtin_options_as_CastOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_CastOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_DequantizeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_MaximumMinimumOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ArgMaxOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LessOptions *builtin_options_as_LessOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LessOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::NegOptions *builtin_options_as_NegOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_NegOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::PadV2Options *builtin_options_as_PadV2Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_PadV2Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::GreaterOptions *builtin_options_as_GreaterOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_GreaterOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_GreaterEqualOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LessEqualOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SelectOptions *builtin_options_as_SelectOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SelectOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SliceOptions *builtin_options_as_SliceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SliceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_TransposeConvOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SparseToDenseOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::TileOptions *builtin_options_as_TileOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_TileOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ExpandDimsOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::EqualOptions *builtin_options_as_EqualOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_EqualOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_NotEqualOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ShapeOptions *builtin_options_as_ShapeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ShapeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::PowOptions *builtin_options_as_PowOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_PowOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ArgMinOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_FakeQuantOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::PackOptions *builtin_options_as_PackOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_PackOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LogicalOrOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::OneHotOptions *builtin_options_as_OneHotOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_OneHotOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LogicalAndOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LogicalNotOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UnpackOptions *builtin_options_as_UnpackOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UnpackOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_FloorDivOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SquareOptions *builtin_options_as_SquareOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SquareOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ZerosLikeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::FillOptions *builtin_options_as_FillOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_FillOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::BidirectionalSequenceLSTMOptions *builtin_options_as_BidirectionalSequenceLSTMOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::FloorModOptions *builtin_options_as_FloorModOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_FloorModOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::RangeOptions *builtin_options_as_RangeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_RangeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ResizeNearestNeighborOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_LeakyReluOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SquaredDifferenceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_MirrorPadOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::AbsOptions *builtin_options_as_AbsOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_AbsOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SplitVOptions *builtin_options_as_SplitVOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SplitVOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UniqueOptions *builtin_options_as_UniqueOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UniqueOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ReverseV2Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::AddNOptions *builtin_options_as_AddNOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_AddNOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_GatherNdOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::CosOptions *builtin_options_as_CosOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_CosOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::WhereOptions *builtin_options_as_WhereOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_WhereOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::RankOptions *builtin_options_as_RankOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_RankOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ReverseSequenceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_MatrixDiagOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_QuantizeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_MatrixSetDiagOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_HardSwishOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::IfOptions *builtin_options_as_IfOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_IfOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::WhileOptions *builtin_options_as_WhileOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_WhileOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_DepthToSpaceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_NonMaxSuppressionV4Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_NonMaxSuppressionV5Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ScatterNdOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SelectV2Options *builtin_options_as_SelectV2Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SelectV2Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::DensifyOptions *builtin_options_as_DensifyOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_DensifyOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SegmentSumOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_BatchMatMulOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::CumsumOptions *builtin_options_as_CumsumOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_CumsumOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_CallOnceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_BroadcastToOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_Rfft2dOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_Conv3DOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::HashtableOptions *builtin_options_as_HashtableOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_HashtableOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_HashtableFindOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_HashtableImportOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_HashtableSizeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_VarHandleOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ReadVariableOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_AssignVariableOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::RandomOptions *builtin_options_as_RandomOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_RandomOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::BucketizeOptions *builtin_options_as_BucketizeOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_BucketizeOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::GeluOptions *builtin_options_as_GeluOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_GeluOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::DynamicUpdateSliceOptions *builtin_options_as_DynamicUpdateSliceOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_DynamicUpdateSliceOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UnsortedSegmentProdOptions *builtin_options_as_UnsortedSegmentProdOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UnsortedSegmentProdOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UnsortedSegmentMaxOptions *builtin_options_as_UnsortedSegmentMaxOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UnsortedSegmentMaxOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UnsortedSegmentMinOptions *builtin_options_as_UnsortedSegmentMinOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UnsortedSegmentMinOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::UnsortedSegmentSumOptions *builtin_options_as_UnsortedSegmentSumOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_UnsortedSegmentSumOptions ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::ATan2Options *builtin_options_as_ATan2Options() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_ATan2Options ? static_cast(builtin_options()) : nullptr; + } + const opencv_tflite::SignOptions *builtin_options_as_SignOptions() const { + return builtin_options_type() == opencv_tflite::BuiltinOptions_SignOptions ? static_cast(builtin_options()) : nullptr; + } + const ::flatbuffers::Vector *custom_options() const { + return GetPointer *>(VT_CUSTOM_OPTIONS); + } + opencv_tflite::CustomOptionsFormat custom_options_format() const { + return static_cast(GetField(VT_CUSTOM_OPTIONS_FORMAT, 0)); + } + const ::flatbuffers::Vector *mutating_variable_inputs() const { + return GetPointer *>(VT_MUTATING_VARIABLE_INPUTS); + } + const ::flatbuffers::Vector *intermediates() const { + return GetPointer *>(VT_INTERMEDIATES); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_OPCODE_INDEX, 4) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.VerifyVector(outputs()) && + VerifyField(verifier, VT_BUILTIN_OPTIONS_TYPE, 1) && + VerifyOffset(verifier, VT_BUILTIN_OPTIONS) && + VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) && + VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && + verifier.VerifyVector(custom_options()) && + VerifyField(verifier, VT_CUSTOM_OPTIONS_FORMAT, 1) && + VerifyOffset(verifier, VT_MUTATING_VARIABLE_INPUTS) && + verifier.VerifyVector(mutating_variable_inputs()) && + VerifyOffset(verifier, VT_INTERMEDIATES) && + verifier.VerifyVector(intermediates()) && + verifier.EndTable(); + } +}; + +template<> inline const opencv_tflite::Conv2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Conv2DOptions(); +} + +template<> inline const opencv_tflite::DepthwiseConv2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_DepthwiseConv2DOptions(); +} + +template<> inline const opencv_tflite::ConcatEmbeddingsOptions *Operator::builtin_options_as() const { + return builtin_options_as_ConcatEmbeddingsOptions(); +} + +template<> inline const opencv_tflite::LSHProjectionOptions *Operator::builtin_options_as() const { + return builtin_options_as_LSHProjectionOptions(); +} + +template<> inline const opencv_tflite::Pool2DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Pool2DOptions(); +} + +template<> inline const opencv_tflite::SVDFOptions *Operator::builtin_options_as() const { + return builtin_options_as_SVDFOptions(); +} + +template<> inline const opencv_tflite::RNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_RNNOptions(); +} + +template<> inline const opencv_tflite::FullyConnectedOptions *Operator::builtin_options_as() const { + return builtin_options_as_FullyConnectedOptions(); +} + +template<> inline const opencv_tflite::SoftmaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_SoftmaxOptions(); +} + +template<> inline const opencv_tflite::ConcatenationOptions *Operator::builtin_options_as() const { + return builtin_options_as_ConcatenationOptions(); +} + +template<> inline const opencv_tflite::AddOptions *Operator::builtin_options_as() const { + return builtin_options_as_AddOptions(); +} + +template<> inline const opencv_tflite::L2NormOptions *Operator::builtin_options_as() const { + return builtin_options_as_L2NormOptions(); +} + +template<> inline const opencv_tflite::LocalResponseNormalizationOptions *Operator::builtin_options_as() const { + return builtin_options_as_LocalResponseNormalizationOptions(); +} + +template<> inline const opencv_tflite::LSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_LSTMOptions(); +} + +template<> inline const opencv_tflite::ResizeBilinearOptions *Operator::builtin_options_as() const { + return builtin_options_as_ResizeBilinearOptions(); +} + +template<> inline const opencv_tflite::CallOptions *Operator::builtin_options_as() const { + return builtin_options_as_CallOptions(); +} + +template<> inline const opencv_tflite::ReshapeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReshapeOptions(); +} + +template<> inline const opencv_tflite::SkipGramOptions *Operator::builtin_options_as() const { + return builtin_options_as_SkipGramOptions(); +} + +template<> inline const opencv_tflite::SpaceToDepthOptions *Operator::builtin_options_as() const { + return builtin_options_as_SpaceToDepthOptions(); +} + +template<> inline const opencv_tflite::EmbeddingLookupSparseOptions *Operator::builtin_options_as() const { + return builtin_options_as_EmbeddingLookupSparseOptions(); +} + +template<> inline const opencv_tflite::MulOptions *Operator::builtin_options_as() const { + return builtin_options_as_MulOptions(); +} + +template<> inline const opencv_tflite::PadOptions *Operator::builtin_options_as() const { + return builtin_options_as_PadOptions(); +} + +template<> inline const opencv_tflite::GatherOptions *Operator::builtin_options_as() const { + return builtin_options_as_GatherOptions(); +} + +template<> inline const opencv_tflite::BatchToSpaceNDOptions *Operator::builtin_options_as() const { + return builtin_options_as_BatchToSpaceNDOptions(); +} + +template<> inline const opencv_tflite::SpaceToBatchNDOptions *Operator::builtin_options_as() const { + return builtin_options_as_SpaceToBatchNDOptions(); +} + +template<> inline const opencv_tflite::TransposeOptions *Operator::builtin_options_as() const { + return builtin_options_as_TransposeOptions(); +} + +template<> inline const opencv_tflite::ReducerOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReducerOptions(); +} + +template<> inline const opencv_tflite::SubOptions *Operator::builtin_options_as() const { + return builtin_options_as_SubOptions(); +} + +template<> inline const opencv_tflite::DivOptions *Operator::builtin_options_as() const { + return builtin_options_as_DivOptions(); +} + +template<> inline const opencv_tflite::SqueezeOptions *Operator::builtin_options_as() const { + return builtin_options_as_SqueezeOptions(); +} + +template<> inline const opencv_tflite::SequenceRNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_SequenceRNNOptions(); +} + +template<> inline const opencv_tflite::StridedSliceOptions *Operator::builtin_options_as() const { + return builtin_options_as_StridedSliceOptions(); +} + +template<> inline const opencv_tflite::ExpOptions *Operator::builtin_options_as() const { + return builtin_options_as_ExpOptions(); +} + +template<> inline const opencv_tflite::TopKV2Options *Operator::builtin_options_as() const { + return builtin_options_as_TopKV2Options(); +} + +template<> inline const opencv_tflite::SplitOptions *Operator::builtin_options_as() const { + return builtin_options_as_SplitOptions(); +} + +template<> inline const opencv_tflite::LogSoftmaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogSoftmaxOptions(); +} + +template<> inline const opencv_tflite::CastOptions *Operator::builtin_options_as() const { + return builtin_options_as_CastOptions(); +} + +template<> inline const opencv_tflite::DequantizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_DequantizeOptions(); +} + +template<> inline const opencv_tflite::MaximumMinimumOptions *Operator::builtin_options_as() const { + return builtin_options_as_MaximumMinimumOptions(); +} + +template<> inline const opencv_tflite::ArgMaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_ArgMaxOptions(); +} + +template<> inline const opencv_tflite::LessOptions *Operator::builtin_options_as() const { + return builtin_options_as_LessOptions(); +} + +template<> inline const opencv_tflite::NegOptions *Operator::builtin_options_as() const { + return builtin_options_as_NegOptions(); +} + +template<> inline const opencv_tflite::PadV2Options *Operator::builtin_options_as() const { + return builtin_options_as_PadV2Options(); +} + +template<> inline const opencv_tflite::GreaterOptions *Operator::builtin_options_as() const { + return builtin_options_as_GreaterOptions(); +} + +template<> inline const opencv_tflite::GreaterEqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_GreaterEqualOptions(); +} + +template<> inline const opencv_tflite::LessEqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_LessEqualOptions(); +} + +template<> inline const opencv_tflite::SelectOptions *Operator::builtin_options_as() const { + return builtin_options_as_SelectOptions(); +} + +template<> inline const opencv_tflite::SliceOptions *Operator::builtin_options_as() const { + return builtin_options_as_SliceOptions(); +} + +template<> inline const opencv_tflite::TransposeConvOptions *Operator::builtin_options_as() const { + return builtin_options_as_TransposeConvOptions(); +} + +template<> inline const opencv_tflite::SparseToDenseOptions *Operator::builtin_options_as() const { + return builtin_options_as_SparseToDenseOptions(); +} + +template<> inline const opencv_tflite::TileOptions *Operator::builtin_options_as() const { + return builtin_options_as_TileOptions(); +} + +template<> inline const opencv_tflite::ExpandDimsOptions *Operator::builtin_options_as() const { + return builtin_options_as_ExpandDimsOptions(); +} + +template<> inline const opencv_tflite::EqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_EqualOptions(); +} + +template<> inline const opencv_tflite::NotEqualOptions *Operator::builtin_options_as() const { + return builtin_options_as_NotEqualOptions(); +} + +template<> inline const opencv_tflite::ShapeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ShapeOptions(); +} + +template<> inline const opencv_tflite::PowOptions *Operator::builtin_options_as() const { + return builtin_options_as_PowOptions(); +} + +template<> inline const opencv_tflite::ArgMinOptions *Operator::builtin_options_as() const { + return builtin_options_as_ArgMinOptions(); +} + +template<> inline const opencv_tflite::FakeQuantOptions *Operator::builtin_options_as() const { + return builtin_options_as_FakeQuantOptions(); +} + +template<> inline const opencv_tflite::PackOptions *Operator::builtin_options_as() const { + return builtin_options_as_PackOptions(); +} + +template<> inline const opencv_tflite::LogicalOrOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogicalOrOptions(); +} + +template<> inline const opencv_tflite::OneHotOptions *Operator::builtin_options_as() const { + return builtin_options_as_OneHotOptions(); +} + +template<> inline const opencv_tflite::LogicalAndOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogicalAndOptions(); +} + +template<> inline const opencv_tflite::LogicalNotOptions *Operator::builtin_options_as() const { + return builtin_options_as_LogicalNotOptions(); +} + +template<> inline const opencv_tflite::UnpackOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnpackOptions(); +} + +template<> inline const opencv_tflite::FloorDivOptions *Operator::builtin_options_as() const { + return builtin_options_as_FloorDivOptions(); +} + +template<> inline const opencv_tflite::SquareOptions *Operator::builtin_options_as() const { + return builtin_options_as_SquareOptions(); +} + +template<> inline const opencv_tflite::ZerosLikeOptions *Operator::builtin_options_as() const { + return builtin_options_as_ZerosLikeOptions(); +} + +template<> inline const opencv_tflite::FillOptions *Operator::builtin_options_as() const { + return builtin_options_as_FillOptions(); +} + +template<> inline const opencv_tflite::BidirectionalSequenceLSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_BidirectionalSequenceLSTMOptions(); +} + +template<> inline const opencv_tflite::BidirectionalSequenceRNNOptions *Operator::builtin_options_as() const { + return builtin_options_as_BidirectionalSequenceRNNOptions(); +} + +template<> inline const opencv_tflite::UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnidirectionalSequenceLSTMOptions(); +} + +template<> inline const opencv_tflite::FloorModOptions *Operator::builtin_options_as() const { + return builtin_options_as_FloorModOptions(); +} + +template<> inline const opencv_tflite::RangeOptions *Operator::builtin_options_as() const { + return builtin_options_as_RangeOptions(); +} + +template<> inline const opencv_tflite::ResizeNearestNeighborOptions *Operator::builtin_options_as() const { + return builtin_options_as_ResizeNearestNeighborOptions(); +} + +template<> inline const opencv_tflite::LeakyReluOptions *Operator::builtin_options_as() const { + return builtin_options_as_LeakyReluOptions(); +} + +template<> inline const opencv_tflite::SquaredDifferenceOptions *Operator::builtin_options_as() const { + return builtin_options_as_SquaredDifferenceOptions(); +} + +template<> inline const opencv_tflite::MirrorPadOptions *Operator::builtin_options_as() const { + return builtin_options_as_MirrorPadOptions(); +} + +template<> inline const opencv_tflite::AbsOptions *Operator::builtin_options_as() const { + return builtin_options_as_AbsOptions(); +} + +template<> inline const opencv_tflite::SplitVOptions *Operator::builtin_options_as() const { + return builtin_options_as_SplitVOptions(); +} + +template<> inline const opencv_tflite::UniqueOptions *Operator::builtin_options_as() const { + return builtin_options_as_UniqueOptions(); +} + +template<> inline const opencv_tflite::ReverseV2Options *Operator::builtin_options_as() const { + return builtin_options_as_ReverseV2Options(); +} + +template<> inline const opencv_tflite::AddNOptions *Operator::builtin_options_as() const { + return builtin_options_as_AddNOptions(); +} + +template<> inline const opencv_tflite::GatherNdOptions *Operator::builtin_options_as() const { + return builtin_options_as_GatherNdOptions(); +} + +template<> inline const opencv_tflite::CosOptions *Operator::builtin_options_as() const { + return builtin_options_as_CosOptions(); +} + +template<> inline const opencv_tflite::WhereOptions *Operator::builtin_options_as() const { + return builtin_options_as_WhereOptions(); +} + +template<> inline const opencv_tflite::RankOptions *Operator::builtin_options_as() const { + return builtin_options_as_RankOptions(); +} + +template<> inline const opencv_tflite::ReverseSequenceOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReverseSequenceOptions(); +} + +template<> inline const opencv_tflite::MatrixDiagOptions *Operator::builtin_options_as() const { + return builtin_options_as_MatrixDiagOptions(); +} + +template<> inline const opencv_tflite::QuantizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_QuantizeOptions(); +} + +template<> inline const opencv_tflite::MatrixSetDiagOptions *Operator::builtin_options_as() const { + return builtin_options_as_MatrixSetDiagOptions(); +} + +template<> inline const opencv_tflite::HardSwishOptions *Operator::builtin_options_as() const { + return builtin_options_as_HardSwishOptions(); +} + +template<> inline const opencv_tflite::IfOptions *Operator::builtin_options_as() const { + return builtin_options_as_IfOptions(); +} + +template<> inline const opencv_tflite::WhileOptions *Operator::builtin_options_as() const { + return builtin_options_as_WhileOptions(); +} + +template<> inline const opencv_tflite::DepthToSpaceOptions *Operator::builtin_options_as() const { + return builtin_options_as_DepthToSpaceOptions(); +} + +template<> inline const opencv_tflite::NonMaxSuppressionV4Options *Operator::builtin_options_as() const { + return builtin_options_as_NonMaxSuppressionV4Options(); +} + +template<> inline const opencv_tflite::NonMaxSuppressionV5Options *Operator::builtin_options_as() const { + return builtin_options_as_NonMaxSuppressionV5Options(); +} + +template<> inline const opencv_tflite::ScatterNdOptions *Operator::builtin_options_as() const { + return builtin_options_as_ScatterNdOptions(); +} + +template<> inline const opencv_tflite::SelectV2Options *Operator::builtin_options_as() const { + return builtin_options_as_SelectV2Options(); +} + +template<> inline const opencv_tflite::DensifyOptions *Operator::builtin_options_as() const { + return builtin_options_as_DensifyOptions(); +} + +template<> inline const opencv_tflite::SegmentSumOptions *Operator::builtin_options_as() const { + return builtin_options_as_SegmentSumOptions(); +} + +template<> inline const opencv_tflite::BatchMatMulOptions *Operator::builtin_options_as() const { + return builtin_options_as_BatchMatMulOptions(); +} + +template<> inline const opencv_tflite::CumsumOptions *Operator::builtin_options_as() const { + return builtin_options_as_CumsumOptions(); +} + +template<> inline const opencv_tflite::CallOnceOptions *Operator::builtin_options_as() const { + return builtin_options_as_CallOnceOptions(); +} + +template<> inline const opencv_tflite::BroadcastToOptions *Operator::builtin_options_as() const { + return builtin_options_as_BroadcastToOptions(); +} + +template<> inline const opencv_tflite::Rfft2dOptions *Operator::builtin_options_as() const { + return builtin_options_as_Rfft2dOptions(); +} + +template<> inline const opencv_tflite::Conv3DOptions *Operator::builtin_options_as() const { + return builtin_options_as_Conv3DOptions(); +} + +template<> inline const opencv_tflite::HashtableOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableOptions(); +} + +template<> inline const opencv_tflite::HashtableFindOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableFindOptions(); +} + +template<> inline const opencv_tflite::HashtableImportOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableImportOptions(); +} + +template<> inline const opencv_tflite::HashtableSizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_HashtableSizeOptions(); +} + +template<> inline const opencv_tflite::VarHandleOptions *Operator::builtin_options_as() const { + return builtin_options_as_VarHandleOptions(); +} + +template<> inline const opencv_tflite::ReadVariableOptions *Operator::builtin_options_as() const { + return builtin_options_as_ReadVariableOptions(); +} + +template<> inline const opencv_tflite::AssignVariableOptions *Operator::builtin_options_as() const { + return builtin_options_as_AssignVariableOptions(); +} + +template<> inline const opencv_tflite::RandomOptions *Operator::builtin_options_as() const { + return builtin_options_as_RandomOptions(); +} + +template<> inline const opencv_tflite::BucketizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_BucketizeOptions(); +} + +template<> inline const opencv_tflite::GeluOptions *Operator::builtin_options_as() const { + return builtin_options_as_GeluOptions(); +} + +template<> inline const opencv_tflite::DynamicUpdateSliceOptions *Operator::builtin_options_as() const { + return builtin_options_as_DynamicUpdateSliceOptions(); +} + +template<> inline const opencv_tflite::UnsortedSegmentProdOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentProdOptions(); +} + +template<> inline const opencv_tflite::UnsortedSegmentMaxOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentMaxOptions(); +} + +template<> inline const opencv_tflite::UnsortedSegmentMinOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentMinOptions(); +} + +template<> inline const opencv_tflite::UnsortedSegmentSumOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnsortedSegmentSumOptions(); +} + +template<> inline const opencv_tflite::ATan2Options *Operator::builtin_options_as() const { + return builtin_options_as_ATan2Options(); +} + +template<> inline const opencv_tflite::SignOptions *Operator::builtin_options_as() const { + return builtin_options_as_SignOptions(); +} + +struct OperatorBuilder { + typedef Operator Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_opcode_index(uint32_t opcode_index) { + fbb_.AddElement(Operator::VT_OPCODE_INDEX, opcode_index, 0); + } + void add_inputs(::flatbuffers::Offset<::flatbuffers::Vector> inputs) { + fbb_.AddOffset(Operator::VT_INPUTS, inputs); + } + void add_outputs(::flatbuffers::Offset<::flatbuffers::Vector> outputs) { + fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); + } + void add_builtin_options_type(opencv_tflite::BuiltinOptions builtin_options_type) { + fbb_.AddElement(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast(builtin_options_type), 0); + } + void add_builtin_options(::flatbuffers::Offset builtin_options) { + fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options); + } + void add_custom_options(::flatbuffers::Offset<::flatbuffers::Vector> custom_options) { + fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); + } + void add_custom_options_format(opencv_tflite::CustomOptionsFormat custom_options_format) { + fbb_.AddElement(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast(custom_options_format), 0); + } + void add_mutating_variable_inputs(::flatbuffers::Offset<::flatbuffers::Vector> mutating_variable_inputs) { + fbb_.AddOffset(Operator::VT_MUTATING_VARIABLE_INPUTS, mutating_variable_inputs); + } + void add_intermediates(::flatbuffers::Offset<::flatbuffers::Vector> intermediates) { + fbb_.AddOffset(Operator::VT_INTERMEDIATES, intermediates); + } + explicit OperatorBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateOperator( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> inputs = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> outputs = 0, + opencv_tflite::BuiltinOptions builtin_options_type = opencv_tflite::BuiltinOptions_NONE, + ::flatbuffers::Offset builtin_options = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> custom_options = 0, + opencv_tflite::CustomOptionsFormat custom_options_format = opencv_tflite::CustomOptionsFormat_FLEXBUFFERS, + ::flatbuffers::Offset<::flatbuffers::Vector> mutating_variable_inputs = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> intermediates = 0) { + OperatorBuilder builder_(_fbb); + builder_.add_intermediates(intermediates); + builder_.add_mutating_variable_inputs(mutating_variable_inputs); + builder_.add_custom_options(custom_options); + builder_.add_builtin_options(builtin_options); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_opcode_index(opcode_index); + builder_.add_custom_options_format(custom_options_format); + builder_.add_builtin_options_type(builtin_options_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateOperatorDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, + const std::vector *inputs = nullptr, + const std::vector *outputs = nullptr, + opencv_tflite::BuiltinOptions builtin_options_type = opencv_tflite::BuiltinOptions_NONE, + ::flatbuffers::Offset builtin_options = 0, + const std::vector *custom_options = nullptr, + opencv_tflite::CustomOptionsFormat custom_options_format = opencv_tflite::CustomOptionsFormat_FLEXBUFFERS, + const std::vector *mutating_variable_inputs = nullptr, + const std::vector *intermediates = nullptr) { + auto inputs__ = inputs ? _fbb.CreateVector(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector(*outputs) : 0; + auto custom_options__ = custom_options ? _fbb.CreateVector(*custom_options) : 0; + auto mutating_variable_inputs__ = mutating_variable_inputs ? _fbb.CreateVector(*mutating_variable_inputs) : 0; + auto intermediates__ = intermediates ? _fbb.CreateVector(*intermediates) : 0; + return opencv_tflite::CreateOperator( + _fbb, + opcode_index, + inputs__, + outputs__, + builtin_options_type, + builtin_options, + custom_options__, + custom_options_format, + mutating_variable_inputs__, + intermediates__); +} + +struct SubGraph FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SubGraphBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TENSORS = 4, + VT_INPUTS = 6, + VT_OUTPUTS = 8, + VT_OPERATORS = 10, + VT_NAME = 12 + }; + const ::flatbuffers::Vector<::flatbuffers::Offset> *tensors() const { + return GetPointer> *>(VT_TENSORS); + } + const ::flatbuffers::Vector *inputs() const { + return GetPointer *>(VT_INPUTS); + } + const ::flatbuffers::Vector *outputs() const { + return GetPointer *>(VT_OUTPUTS); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *operators() const { + return GetPointer> *>(VT_OPERATORS); + } + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TENSORS) && + verifier.VerifyVector(tensors()) && + verifier.VerifyVectorOfTables(tensors()) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.VerifyVector(outputs()) && + VerifyOffset(verifier, VT_OPERATORS) && + verifier.VerifyVector(operators()) && + verifier.VerifyVectorOfTables(operators()) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + verifier.EndTable(); + } +}; + +struct SubGraphBuilder { + typedef SubGraph Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_tensors(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> tensors) { + fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); + } + void add_inputs(::flatbuffers::Offset<::flatbuffers::Vector> inputs) { + fbb_.AddOffset(SubGraph::VT_INPUTS, inputs); + } + void add_outputs(::flatbuffers::Offset<::flatbuffers::Vector> outputs) { + fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); + } + void add_operators(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> operators) { + fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); + } + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(SubGraph::VT_NAME, name); + } + explicit SubGraphBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSubGraph( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> tensors = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> inputs = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> outputs = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> operators = 0, + ::flatbuffers::Offset<::flatbuffers::String> name = 0) { + SubGraphBuilder builder_(_fbb); + builder_.add_name(name); + builder_.add_operators(operators); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + builder_.add_tensors(tensors); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateSubGraphDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<::flatbuffers::Offset> *tensors = nullptr, + const std::vector *inputs = nullptr, + const std::vector *outputs = nullptr, + const std::vector<::flatbuffers::Offset> *operators = nullptr, + const char *name = nullptr) { + auto tensors__ = tensors ? _fbb.CreateVector<::flatbuffers::Offset>(*tensors) : 0; + auto inputs__ = inputs ? _fbb.CreateVector(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector(*outputs) : 0; + auto operators__ = operators ? _fbb.CreateVector<::flatbuffers::Offset>(*operators) : 0; + auto name__ = name ? _fbb.CreateString(name) : 0; + return opencv_tflite::CreateSubGraph( + _fbb, + tensors__, + inputs__, + outputs__, + operators__, + name__); +} + +struct Buffer FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BufferBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DATA = 4 + }; + const ::flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } +}; + +struct BufferBuilder { + typedef Buffer Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_data(::flatbuffers::Offset<::flatbuffers::Vector> data) { + fbb_.AddOffset(Buffer::VT_DATA, data); + } + explicit BufferBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBuffer( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> data = 0) { + BufferBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateBufferDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *data = nullptr) { + if (data) { _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16); } + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return opencv_tflite::CreateBuffer( + _fbb, + data__); +} + +struct Metadata FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef MetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_BUFFER = 6 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + uint32_t buffer() const { + return GetField(VT_BUFFER, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_BUFFER, 4) && + verifier.EndTable(); + } +}; + +struct MetadataBuilder { + typedef Metadata Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(Metadata::VT_NAME, name); + } + void add_buffer(uint32_t buffer) { + fbb_.AddElement(Metadata::VT_BUFFER, buffer, 0); + } + explicit MetadataBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateMetadata( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + uint32_t buffer = 0) { + MetadataBuilder builder_(_fbb); + builder_.add_buffer(buffer); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateMetadataDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint32_t buffer = 0) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return opencv_tflite::CreateMetadata( + _fbb, + name__, + buffer); +} + +struct TensorMap FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TensorMapBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_TENSOR_INDEX = 6 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + uint32_t tensor_index() const { + return GetField(VT_TENSOR_INDEX, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyField(verifier, VT_TENSOR_INDEX, 4) && + verifier.EndTable(); + } +}; + +struct TensorMapBuilder { + typedef TensorMap Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(TensorMap::VT_NAME, name); + } + void add_tensor_index(uint32_t tensor_index) { + fbb_.AddElement(TensorMap::VT_TENSOR_INDEX, tensor_index, 0); + } + explicit TensorMapBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTensorMap( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + uint32_t tensor_index = 0) { + TensorMapBuilder builder_(_fbb); + builder_.add_tensor_index(tensor_index); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateTensorMapDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + uint32_t tensor_index = 0) { + auto name__ = name ? _fbb.CreateString(name) : 0; + return opencv_tflite::CreateTensorMap( + _fbb, + name__, + tensor_index); +} + +struct SignatureDef FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SignatureDefBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_INPUTS = 4, + VT_OUTPUTS = 6, + VT_SIGNATURE_KEY = 8, + VT_SUBGRAPH_INDEX = 12 + }; + const ::flatbuffers::Vector<::flatbuffers::Offset> *inputs() const { + return GetPointer> *>(VT_INPUTS); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *outputs() const { + return GetPointer> *>(VT_OUTPUTS); + } + const ::flatbuffers::String *signature_key() const { + return GetPointer(VT_SIGNATURE_KEY); + } + uint32_t subgraph_index() const { + return GetField(VT_SUBGRAPH_INDEX, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.VerifyVector(inputs()) && + verifier.VerifyVectorOfTables(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.VerifyVector(outputs()) && + verifier.VerifyVectorOfTables(outputs()) && + VerifyOffset(verifier, VT_SIGNATURE_KEY) && + verifier.VerifyString(signature_key()) && + VerifyField(verifier, VT_SUBGRAPH_INDEX, 4) && + verifier.EndTable(); + } +}; + +struct SignatureDefBuilder { + typedef SignatureDef Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_inputs(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> inputs) { + fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs); + } + void add_outputs(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> outputs) { + fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs); + } + void add_signature_key(::flatbuffers::Offset<::flatbuffers::String> signature_key) { + fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key); + } + void add_subgraph_index(uint32_t subgraph_index) { + fbb_.AddElement(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0); + } + explicit SignatureDefBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSignatureDef( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> inputs = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> outputs = 0, + ::flatbuffers::Offset<::flatbuffers::String> signature_key = 0, + uint32_t subgraph_index = 0) { + SignatureDefBuilder builder_(_fbb); + builder_.add_subgraph_index(subgraph_index); + builder_.add_signature_key(signature_key); + builder_.add_outputs(outputs); + builder_.add_inputs(inputs); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateSignatureDefDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<::flatbuffers::Offset> *inputs = nullptr, + const std::vector<::flatbuffers::Offset> *outputs = nullptr, + const char *signature_key = nullptr, + uint32_t subgraph_index = 0) { + auto inputs__ = inputs ? _fbb.CreateVector<::flatbuffers::Offset>(*inputs) : 0; + auto outputs__ = outputs ? _fbb.CreateVector<::flatbuffers::Offset>(*outputs) : 0; + auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0; + return opencv_tflite::CreateSignatureDef( + _fbb, + inputs__, + outputs__, + signature_key__, + subgraph_index); +} + +struct Model FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ModelBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VERSION = 4, + VT_OPERATOR_CODES = 6, + VT_SUBGRAPHS = 8, + VT_DESCRIPTION = 10, + VT_BUFFERS = 12, + VT_METADATA_BUFFER = 14, + VT_METADATA = 16, + VT_SIGNATURE_DEFS = 18 + }; + uint32_t version() const { + return GetField(VT_VERSION, 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *operator_codes() const { + return GetPointer> *>(VT_OPERATOR_CODES); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *subgraphs() const { + return GetPointer> *>(VT_SUBGRAPHS); + } + const ::flatbuffers::String *description() const { + return GetPointer(VT_DESCRIPTION); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *buffers() const { + return GetPointer> *>(VT_BUFFERS); + } + const ::flatbuffers::Vector *metadata_buffer() const { + return GetPointer *>(VT_METADATA_BUFFER); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *metadata() const { + return GetPointer> *>(VT_METADATA); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *signature_defs() const { + return GetPointer> *>(VT_SIGNATURE_DEFS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VERSION, 4) && + VerifyOffset(verifier, VT_OPERATOR_CODES) && + verifier.VerifyVector(operator_codes()) && + verifier.VerifyVectorOfTables(operator_codes()) && + VerifyOffset(verifier, VT_SUBGRAPHS) && + verifier.VerifyVector(subgraphs()) && + verifier.VerifyVectorOfTables(subgraphs()) && + VerifyOffset(verifier, VT_DESCRIPTION) && + verifier.VerifyString(description()) && + VerifyOffset(verifier, VT_BUFFERS) && + verifier.VerifyVector(buffers()) && + verifier.VerifyVectorOfTables(buffers()) && + VerifyOffset(verifier, VT_METADATA_BUFFER) && + verifier.VerifyVector(metadata_buffer()) && + VerifyOffset(verifier, VT_METADATA) && + verifier.VerifyVector(metadata()) && + verifier.VerifyVectorOfTables(metadata()) && + VerifyOffset(verifier, VT_SIGNATURE_DEFS) && + verifier.VerifyVector(signature_defs()) && + verifier.VerifyVectorOfTables(signature_defs()) && + verifier.EndTable(); + } +}; + +struct ModelBuilder { + typedef Model Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_version(uint32_t version) { + fbb_.AddElement(Model::VT_VERSION, version, 0); + } + void add_operator_codes(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> operator_codes) { + fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); + } + void add_subgraphs(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> subgraphs) { + fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); + } + void add_description(::flatbuffers::Offset<::flatbuffers::String> description) { + fbb_.AddOffset(Model::VT_DESCRIPTION, description); + } + void add_buffers(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> buffers) { + fbb_.AddOffset(Model::VT_BUFFERS, buffers); + } + void add_metadata_buffer(::flatbuffers::Offset<::flatbuffers::Vector> metadata_buffer) { + fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer); + } + void add_metadata(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> metadata) { + fbb_.AddOffset(Model::VT_METADATA, metadata); + } + void add_signature_defs(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> signature_defs) { + fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs); + } + explicit ModelBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateModel( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> operator_codes = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> subgraphs = 0, + ::flatbuffers::Offset<::flatbuffers::String> description = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> buffers = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> metadata_buffer = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> metadata = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> signature_defs = 0) { + ModelBuilder builder_(_fbb); + builder_.add_signature_defs(signature_defs); + builder_.add_metadata(metadata); + builder_.add_metadata_buffer(metadata_buffer); + builder_.add_buffers(buffers); + builder_.add_description(description); + builder_.add_subgraphs(subgraphs); + builder_.add_operator_codes(operator_codes); + builder_.add_version(version); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateModelDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + const std::vector<::flatbuffers::Offset> *operator_codes = nullptr, + const std::vector<::flatbuffers::Offset> *subgraphs = nullptr, + const char *description = nullptr, + const std::vector<::flatbuffers::Offset> *buffers = nullptr, + const std::vector *metadata_buffer = nullptr, + const std::vector<::flatbuffers::Offset> *metadata = nullptr, + const std::vector<::flatbuffers::Offset> *signature_defs = nullptr) { + auto operator_codes__ = operator_codes ? _fbb.CreateVector<::flatbuffers::Offset>(*operator_codes) : 0; + auto subgraphs__ = subgraphs ? _fbb.CreateVector<::flatbuffers::Offset>(*subgraphs) : 0; + auto description__ = description ? _fbb.CreateString(description) : 0; + auto buffers__ = buffers ? _fbb.CreateVector<::flatbuffers::Offset>(*buffers) : 0; + auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector(*metadata_buffer) : 0; + auto metadata__ = metadata ? _fbb.CreateVector<::flatbuffers::Offset>(*metadata) : 0; + auto signature_defs__ = signature_defs ? _fbb.CreateVector<::flatbuffers::Offset>(*signature_defs) : 0; + return opencv_tflite::CreateModel( + _fbb, + version, + operator_codes__, + subgraphs__, + description__, + buffers__, + metadata_buffer__, + metadata__, + signature_defs__); +} + +inline bool VerifyQuantizationDetails(::flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type) { + switch (type) { + case QuantizationDetails_NONE: { + return true; + } + case QuantizationDetails_CustomQuantization: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyQuantizationDetailsVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyQuantizationDetails( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline bool VerifySparseIndexVector(::flatbuffers::Verifier &verifier, const void *obj, SparseIndexVector type) { + switch (type) { + case SparseIndexVector_NONE: { + return true; + } + case SparseIndexVector_Int32Vector: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case SparseIndexVector_Uint16Vector: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case SparseIndexVector_Uint8Vector: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifySparseIndexVectorVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifySparseIndexVector( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline bool VerifyBuiltinOptions(::flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) { + switch (type) { + case BuiltinOptions_NONE: { + return true; + } + case BuiltinOptions_Conv2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthwiseConv2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatEmbeddingsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSHProjectionOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Pool2DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SVDFOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FullyConnectedOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ConcatenationOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_L2NormOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LocalResponseNormalizationOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeBilinearOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReshapeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SkipGramOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToDepthOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EmbeddingLookupSparseOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MulOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GatherOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BatchToSpaceNDOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SpaceToBatchNDOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReducerOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SubOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DivOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SqueezeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SequenceRNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_StridedSliceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogSoftmaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MaximumMinimumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NegOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PadV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GreaterEqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LessEqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SelectOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SliceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TransposeConvOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SparseToDenseOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_TileOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ExpandDimsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_EqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NotEqualOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ShapeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PowOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ArgMinOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FakeQuantOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_PackOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalOrOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_OneHotOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalAndOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LogicalNotOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnpackOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorDivOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquareOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ZerosLikeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FillOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BidirectionalSequenceRNNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ResizeNearestNeighborOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_LeakyReluOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SquaredDifferenceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MirrorPadOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AbsOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SplitVOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UniqueOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReverseV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AddNOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GatherNdOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CosOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReverseSequenceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MatrixDiagOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_QuantizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_MatrixSetDiagOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HardSwishOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_IfOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_WhileOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DepthToSpaceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NonMaxSuppressionV4Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_NonMaxSuppressionV5Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ScatterNdOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SelectV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DensifyOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SegmentSumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BatchMatMulOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CumsumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_CallOnceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Rfft2dOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_Conv3DOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableFindOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableImportOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_HashtableSizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_VarHandleOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ReadVariableOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_AssignVariableOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RandomOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_BucketizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_GeluOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_DynamicUpdateSliceOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentProdOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentMaxOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentMinOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_UnsortedSegmentSumOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_ATan2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_SignOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyBuiltinOptionsVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyBuiltinOptions( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline const opencv_tflite::Model *GetModel(const void *buf) { + return ::flatbuffers::GetRoot(buf); +} + +inline const opencv_tflite::Model *GetSizePrefixedModel(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline const char *ModelIdentifier() { + return "TFL3"; +} + +inline bool ModelBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, ModelIdentifier()); +} + +inline bool SizePrefixedModelBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, ModelIdentifier(), true); +} + +inline bool VerifyModelBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(ModelIdentifier()); +} + +inline bool VerifySizePrefixedModelBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(ModelIdentifier()); +} + +inline const char *ModelExtension() { + return "tflite"; +} + +inline void FinishModelBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.Finish(root, ModelIdentifier()); +} + +inline void FinishSizePrefixedModelBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root, ModelIdentifier()); +} + +} // namespace opencv_tflite + +#endif // FLATBUFFERS_GENERATED_SCHEMA_OPENCV_TFLITE_H_ diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp index ee051547f9..bdd061cab8 100644 --- a/modules/dnn/src/tflite/tflite_importer.cpp +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -119,7 +119,7 @@ TFLiteImporter::TFLiteImporter(Net& dstNet, const char* modelBuffer, size_t bufS CV_Assert(model); CV_Assert(model->subgraphs()); CV_Assert(model->buffers()); - CV_CheckEQ(model->subgraphs()->size(), 1, ""); + CV_CheckEQ((size_t)model->subgraphs()->size(), 1u, ""); modelTensors = model->subgraphs()->Get(0)->tensors(); CV_Assert(modelTensors); diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index 41c3133593..bffdaa5b03 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -12,17 +12,15 @@ Test for TFLite models loading #include // CV_DNN_REGISTER_LAYER_CLASS #include -namespace opencv_test -{ +#ifdef OPENCV_TEST_DNN_TFLITE + +namespace opencv_test { namespace { using namespace cv; using namespace cv::dnn; -void testModel(const std::string& modelName, const Mat& input, double norm = 1e-5) { -#ifndef HAVE_FLATBUFFERS - throw SkipTestException("FlatBuffers required for TFLite importer"); -#endif - +void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) +{ Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); net.setInput(input); @@ -34,20 +32,21 @@ void testModel(const std::string& modelName, const Mat& input, double norm = 1e- ASSERT_EQ(outs.size(), outNames.size()); for (int i = 0; i < outNames.size(); ++i) { Mat ref = blobFromNPY(findDataFile(format("dnn/tflite/%s_out_%s.npy", modelName.c_str(), outNames[i].c_str()))); - normAssert(ref.reshape(1, 1), outs[i].reshape(1, 1), outNames[i].c_str(), norm); + normAssert(ref.reshape(1, 1), outs[i].reshape(1, 1), outNames[i].c_str(), l1, lInf); } } -void testModel(const std::string& modelName, const Size& inpSize, double norm = 1e-5) { +void testModel(const std::string& modelName, const Size& inpSize, double l1 = 1e-5, double lInf = 1e-4) +{ Mat input = imread(findDataFile("cv/shared/lena.png")); input = blobFromImage(input, 1.0 / 255, inpSize, 0, true); - testModel(modelName, input, norm); + testModel(modelName, input, l1, lInf); } // https://google.github.io/mediapipe/solutions/face_mesh TEST(Test_TFLite, face_landmark) { - testModel("face_landmark", Size(192, 192), 2e-5); + testModel("face_landmark", Size(192, 192), 2e-5, 2e-4); } // https://google.github.io/mediapipe/solutions/face_detection @@ -64,9 +63,6 @@ TEST(Test_TFLite, selfie_segmentation) TEST(Test_TFLite, max_unpooling) { -#ifndef HAVE_FLATBUFFERS - throw SkipTestException("FlatBuffers required for TFLite importer"); -#endif // Due Max Unpoling is a numerically unstable operation and small difference between frameworks // might lead to positional difference of maximal elements in the tensor, this test checks // behavior of Max Unpooling layer only. @@ -120,4 +116,6 @@ TEST(Test_TFLite, max_unpooling) } } -} +}} // namespace + +#endif // OPENCV_TEST_DNN_TFLITE diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index c405ca422f..d8a7bb050a 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -281,16 +281,7 @@ if(tgts STREQUAL "PRIVATE") set(tgts "") endif() -# install used dependencies only -if(NOT BUILD_SHARED_LIBS - AND NOT (CMAKE_VERSION VERSION_LESS "3.13.0") # upgrade CMake: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/2152 -) - foreach(tgt in ${tgts}) - if(tgt MATCHES "^ocv\.3rdparty\.") - install(TARGETS ${tgt} EXPORT OpenCVModules) - endif() - endforeach() -endif() +ocv_install_used_external_targets(${tgts}) source_group("Src" FILES ${highgui_srcs} ${highgui_hdrs}) source_group("Include" FILES ${highgui_ext_hdrs}) diff --git a/modules/videoio/CMakeLists.txt b/modules/videoio/CMakeLists.txt index 3261b9d084..eee706d306 100644 --- a/modules/videoio/CMakeLists.txt +++ b/modules/videoio/CMakeLists.txt @@ -263,16 +263,7 @@ if(tgts STREQUAL "PRIVATE") set(tgts "") endif() -# install used dependencies only -if(NOT BUILD_SHARED_LIBS - AND NOT (CMAKE_VERSION VERSION_LESS "3.13.0") # upgrade CMake: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/2152 -) - foreach(tgt in ${tgts}) - if(tgt MATCHES "^ocv\.3rdparty\.") - install(TARGETS ${tgt} EXPORT OpenCVModules) - endif() - endforeach() -endif() +ocv_install_used_external_targets(${tgts}) ocv_set_module_sources(HEADERS ${videoio_ext_hdrs} ${videoio_hdrs} SOURCES ${videoio_srcs}) ocv_module_include_directories() From 9eb5e39ff30ef3f7357b7dfde335140bf58fbda4 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 21 Feb 2023 21:20:37 +0000 Subject: [PATCH 034/199] dnn(tflite): fix wrong axis normalization --- modules/dnn/src/tflite/tflite_importer.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp index bdd061cab8..d556bf6f6f 100644 --- a/modules/dnn/src/tflite/tflite_importer.cpp +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -497,9 +497,8 @@ void TFLiteImporter::parseConcat(const Operator& op, const std::string& opcode, DataLayout inpLayout = layouts[op.inputs()->Get(0)]; if (inpLayout == DATA_LAYOUT_NHWC) { // OpenCV works in NCHW data layout. So change the axis correspondingly. - CV_Check(axis, -4 < axis && axis < 4, ""); - int remap[] = {0, 2, 3, 1}; - axis = axis > 0 ? axis : 4 + axis; + axis = normalize_axis(axis, 4); + static const int remap[] = {0, 2, 3, 1}; axis = remap[axis]; } layerParams.set("axis", axis); From a5f04f9e171b53c179d780bea00685758e734003 Mon Sep 17 00:00:00 2001 From: CSBVision Date: Thu, 23 Feb 2023 11:18:59 +0100 Subject: [PATCH 035/199] Add ENABLE_DELAYLOAD option --- CMakeLists.txt | 1 + cmake/OpenCVModule.cmake | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6f4a3513f0..fd47d83a2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -511,6 +511,7 @@ OCV_OPTION(INSTALL_TESTS "Install accuracy and performance test binar # =================================================== OCV_OPTION(ENABLE_CCACHE "Use ccache" (UNIX AND (CMAKE_GENERATOR MATCHES "Makefile" OR CMAKE_GENERATOR MATCHES "Ninja" OR CMAKE_GENERATOR MATCHES "Xcode")) ) OCV_OPTION(ENABLE_PRECOMPILED_HEADERS "Use precompiled headers" MSVC IF (MSVC OR (NOT IOS AND NOT CMAKE_CROSSCOMPILING) ) ) +OCV_OPTION(ENABLE_DELAYLOAD "Enable delayed loading of OpenCV DLLs" OFF VISIBLE_IF MSVC AND BUILD_SHARED_LIBS) OCV_OPTION(ENABLE_SOLUTION_FOLDERS "Solution folder in Visual Studio or in other IDEs" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) ) OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add flags: -g -pg)" OFF IF CV_GCC ) OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC ) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 9981620f25..b6cee904a9 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -368,6 +368,29 @@ macro(ocv_glob_modules main_root) # resolve dependencies __ocv_resolve_dependencies() + # optionally configure delay load + if(MSVC AND BUILD_SHARED_LIBS AND ENABLE_DELAYLOAD AND NOT BUILD_opencv_world) + if(${CMAKE_SHARED_LINKER_FLAGS} MATCHES "delayimp.lib") + set(DELAYFLAGS "") + else() + set(DELAYFLAGS "delayimp.lib") + endif() + + foreach(mod ${OPENCV_MODULES_BUILD}) + if(NOT ${mod} STREQUAL "opencv_core" AND NOT ${mod} MATCHES "bindings_generator|python") + set(DELAYFLAGS "${DELAYFLAGS} /DELAYLOAD:${mod}${OPENCV_VERSION_MAJOR}${OPENCV_VERSION_MINOR}${OPENCV_VERSION_PATCH}.dll") + endif() + endforeach() + + if(NOT ${CMAKE_SHARED_LINKER_FLAGS} MATCHES "/IGNORE:4199") + set(DELAYFLAGS "${DELAYFLAGS} /IGNORE:4199") + endif() + + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${DELAYFLAGS}") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DELAYFLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DELAYFLAGS}") + endif() + # create modules set(OPENCV_INITIAL_PASS OFF) ocv_cmake_hook(PRE_MODULES_CREATE) From fe59a5695f9afd9cbf02fd20a1551ed0d4dfeac8 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 27 Feb 2023 03:17:46 +0000 Subject: [PATCH 036/199] core(simd): 64-bit integer EQ/NE without misused 64F guard --- .../include/opencv2/core/hal/intrin_cpp.hpp | 8 +- .../include/opencv2/core/hal/intrin_neon.hpp | 54 +++++++++---- modules/core/test/test_intrin_utils.hpp | 76 +++++++++++-------- 3 files changed, 85 insertions(+), 53 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 46222140e6..9a97376898 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -879,14 +879,10 @@ OPENCV_HAL_IMPL_CMP_OP(<=) For all types except 64-bit integer values. */ OPENCV_HAL_IMPL_CMP_OP(>=) -/** @brief Equal comparison - -For all types except 64-bit integer values. */ +/** @brief Equal comparison */ OPENCV_HAL_IMPL_CMP_OP(==) -/** @brief Not equal comparison - -For all types except 64-bit integer values. */ +/** @brief Not equal comparison */ OPENCV_HAL_IMPL_CMP_OP(!=) template diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 5792694a40..3897cee12b 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1038,18 +1038,6 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64) OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64) #endif -#if CV_SIMD128_64F -inline int64x2_t vmvnq_s64(int64x2_t a) -{ - int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF)); - return veorq_s64(a, vx); -} -inline uint64x2_t vmvnq_u64(uint64x2_t a) -{ - uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); - return veorq_u64(a, vx); -} -#endif #define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ { return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \ @@ -1071,9 +1059,47 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) +#if defined(__aarch64__) || defined(_M_ARM64) +static inline uint64x2_t vmvnq_u64(uint64x2_t a) +{ + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return veorq_u64(a, vx); +} +//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) +//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) +static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b) +{ return v_uint64x2(vceqq_u64(a.val, b.val)); } +static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b) +{ return v_uint64x2(vmvnq_u64(vceqq_u64(a.val, b.val))); } +static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b) +{ return v_int64x2(vreinterpretq_s64_u64(vceqq_s64(a.val, b.val))); } +static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b) +{ return v_int64x2(vreinterpretq_s64_u64(vmvnq_u64(vceqq_s64(a.val, b.val)))); } +#else +static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b) +{ + uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val)); + uint32x4_t swapped = vrev64q_u32(cmp); + return v_uint64x2(vreinterpretq_u64_u32(vandq_u32(cmp, swapped))); +} +static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b) +{ + uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val)); + uint32x4_t swapped = vrev64q_u32(cmp); + uint64x2_t v_eq = vreinterpretq_u64_u32(vandq_u32(cmp, swapped)); + uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); + return v_uint64x2(veorq_u64(v_eq, vx)); +} +static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b) +{ + return v_reinterpret_as_s64(v_reinterpret_as_u64(a) == v_reinterpret_as_u64(b)); +} +static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b) +{ + return v_reinterpret_as_s64(v_reinterpret_as_u64(a) != v_reinterpret_as_u64(b)); +} +#endif #if CV_SIMD128_64F -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) -OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64) #endif diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 3f196f1342..da1f26790c 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -97,7 +97,7 @@ template struct Data { *this = r; } - operator R () + operator R () const { return initializer().init(*this); } @@ -1559,49 +1559,60 @@ template struct TheTest } #endif -#if CV_SIMD_64F - TheTest & test_cmp64() + void do_check_cmp64(const Data& dataA, const Data& dataB) { - Data dataA, dataB; - R a = dataA, b = dataB; - - for (int i = 0; i < R::nlanes; ++i) - { - dataA[i] = dataB[i]; - } - dataA[0]++; - - a = dataA, b = dataB; + R a = dataA; + R b = dataB; - Data resC = (a == b); - Data resD = (a != b); + Data dataEQ = (a == b); + Data dataNE = (a != b); for (int i = 0; i < R::nlanes; ++i) { SCOPED_TRACE(cv::format("i=%d", i)); - EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); - EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); + if (cvtest::debugLevel > 0) cout << "i=" << i << " ( " << dataA[i] << " vs " << dataB[i] << " ): eq=" << dataEQ[i] << " ne=" << dataNE[i] << endl; + EXPECT_NE((LaneType)dataEQ[i], (LaneType)dataNE[i]); + if (dataA[i] == dataB[i]) + EXPECT_EQ((LaneType)-1, (LaneType)dataEQ[i]); + else + EXPECT_EQ((LaneType)0, (LaneType)dataEQ[i]); + if (dataA[i] != dataB[i]) + EXPECT_EQ((LaneType)-1, (LaneType)dataNE[i]); + else + EXPECT_EQ((LaneType)0, (LaneType)dataNE[i]); } + } + + TheTest & test_cmp64() + { + Data dataA; + Data dataB; for (int i = 0; i < R::nlanes; ++i) { - dataA[i] = dataB[i] = (LaneType)-1; + dataA[i] = dataB[i]; } + dataA[0]++; - a = dataA, b = dataB; + do_check_cmp64(dataA, dataB); + do_check_cmp64(dataB, dataA); - resC = (a == b); - resD = (a != b); + dataA[0] = dataB[0]; + dataA[1] += (((LaneType)1) << 32); + do_check_cmp64(dataA, dataB); + do_check_cmp64(dataB, dataA); + + dataA[0] = (LaneType)-1; + dataB[0] = (LaneType)-1; + dataA[1] = (LaneType)-1; + dataB[1] = (LaneType)2; + + do_check_cmp64(dataA, dataB); + do_check_cmp64(dataB, dataA); - for (int i = 0; i < R::nlanes; ++i) - { - SCOPED_TRACE(cv::format("i=%d", i)); - EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0); - EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0); - } return *this; } -#endif + }; @@ -1837,9 +1848,8 @@ void test_hal_intrin_uint64() TheTest() .test_loadstore() .test_addsub() -#if CV_SIMD_64F .test_cmp64() -#endif + //.test_cmp() - not declared as supported .test_shift<1>().test_shift<8>() .test_logic() .test_reverse() @@ -1857,9 +1867,8 @@ void test_hal_intrin_int64() TheTest() .test_loadstore() .test_addsub() -#if CV_SIMD_64F .test_cmp64() -#endif + //.test_cmp() - not declared as supported .test_shift<1>().test_shift<8>() .test_logic() .test_reverse() @@ -1936,7 +1945,8 @@ void test_hal_intrin_float64() .test_rotate<2>().test_rotate<3>() #endif ; - +#else + std::cout << "SKIP: CV_SIMD_64F is not available" << std::endl; #endif } From a035608100fb366a7195ec7cca344ca399e1bd8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corentin=20No=C3=ABl?= Date: Wed, 15 Feb 2023 16:02:24 +0100 Subject: [PATCH 037/199] highgui: Reduce the difference between GTK+2 and GTK+3 version Make the GTK+3 API the default one by wrapping the missing GTK+2 functions in defines Make sure to always guard with GTK_VERSION2 or GTK_VERSION3 to allow future addition of Gtk4 --- modules/highgui/src/window_gtk.cpp | 175 +++++++++-------------------- 1 file changed, 50 insertions(+), 125 deletions(-) diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 98698cbade..28d863c89b 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -57,10 +57,13 @@ #include #include +#if (GTK_MAJOR_VERSION == 2) + #define GTK_VERSION2 1 +#endif //GTK_MAJOR_VERSION >= 2.0 #if (GTK_MAJOR_VERSION == 3) #define GTK_VERSION3 1 #endif //GTK_MAJOR_VERSION >= 3 -#if (GTK_MAJOR_VERSION > 3 || (GTK_MAJOR_VERSION == 3 && GTK_MINOR_VERSION >= 4)) +#if (GTK_MAJOR_VERSION == 3 && GTK_MINOR_VERSION >= 4) #define GTK_VERSION3_4 1 #endif @@ -193,7 +196,7 @@ cvImageWidget_realize (GtkWidget *widget) attributes.y = allocation.y; attributes.width = allocation.width; attributes.height = allocation.height; -#else +#elif defined(GTK_VERSION2) attributes.x = widget->allocation.x; attributes.y = widget->allocation.y; attributes.width = widget->allocation.width; @@ -236,7 +239,7 @@ cvImageWidget_realize (GtkWidget *widget) gtk_widget_get_window(widget), GTK_STATE_ACTIVE ); - #else + #elif defined(GTK_VERSION2) // The following lines are included to prevent breaking // compatibility with older Gtk2 (width, requisition->height); } -#endif //GTK_VERSION3 +#endif //GTK_VERSION2 static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_height){ CvImageWidget * image_widget = CV_IMAGE_WIDGET( widget ); @@ -380,7 +384,7 @@ cvImageWidget_size_allocate (GtkWidget *widget, #if defined (GTK_VERSION3) gtk_widget_set_allocation(widget, allocation); -#else +#elif defined (GTK_VERSION2) widget->allocation = *allocation; #endif //GTK_VERSION3 image_widget = CV_IMAGE_WIDGET (widget); @@ -410,7 +414,7 @@ cvImageWidget_size_allocate (GtkWidget *widget, allocation->width = image_widget->original_image->cols; allocation->height = image_widget->original_image->rows; gtk_widget_set_allocation(widget, allocation); -#else +#elif defined (GTK_VERSION2) widget->allocation.width = image_widget->original_image->cols; widget->allocation.height = image_widget->original_image->rows; #endif //GTK_VERSION3 @@ -462,13 +466,11 @@ static void cvImageWidget_class_init (gpointer g_class, gpointer /*class_data*/) CvImageWidgetClass* klass = (CvImageWidgetClass*)g_class; #if defined (GTK_VERSION3) GtkWidgetClass *widget_class = GTK_WIDGET_CLASS (klass); -#else - GtkObjectClass *object_class; - GtkWidgetClass *widget_class; - - object_class = (GtkObjectClass*) klass; - widget_class = (GtkWidgetClass*) klass; #endif //GTK_VERSION3 +#if defined (GTK_VERSION2) + GtkObjectClass *object_class = (GtkObjectClass*) klass; + GtkWidgetClass *widget_class = (GtkWidgetClass*) klass; +#endif //GTK_VERSION2 parent_class = GTK_WIDGET_CLASS( g_type_class_peek (gtk_widget_get_type ()) ); @@ -476,10 +478,11 @@ static void cvImageWidget_class_init (gpointer g_class, gpointer /*class_data*/) widget_class->destroy = cvImageWidget_destroy; widget_class->get_preferred_width = cvImageWidget_get_preferred_width; widget_class->get_preferred_height = cvImageWidget_get_preferred_height; -#else +#endif //GTK_VERSION3 +#if defined (GTK_VERSION2) object_class->destroy = cvImageWidget_destroy; widget_class->size_request = cvImageWidget_size_request; -#endif //GTK_VERSION3 +#endif //GTK_VERSION2 widget_class->realize = cvImageWidget_realize; widget_class->size_allocate = cvImageWidget_size_allocate; @@ -760,6 +763,11 @@ CvRect cvGetWindowRect_GTK(const char* name) return cvRect(getImageRect_(window)); } +#if defined(GTK_VERSION2) + #define gtk_widget_get_allocated_width(widget) (widget->allocation.width) + #define gtk_widget_get_allocated_height(widget) (widget->allocation.height) +#endif + static Rect getImageRect_(const std::shared_ptr& window) { CV_Assert(window); @@ -768,28 +776,18 @@ static Rect getImageRect_(const std::shared_ptr& window) #ifdef HAVE_OPENGL if (window->useGl) { gtk_widget_translate_coordinates(window->widget, gtk_widget_get_toplevel(window->widget), 0, 0, &wx, &wy); - return Rect(wx, wy, window->widget->allocation.width, window->widget->allocation.height); + return Rect(wx, wy, gtk_widget_get_allocated_width(window->widget), gtk_widget_get_allocated_height(window->widget)); } #endif CvImageWidget * image_widget = CV_IMAGE_WIDGET( window->widget ); gtk_widget_translate_coordinates(&image_widget->widget, gtk_widget_get_toplevel(&image_widget->widget), 0, 0, &wx, &wy); if (image_widget->scaled_image) { -#if defined (GTK_VERSION3) return Rect(wx, wy, MIN(image_widget->scaled_image->cols, gtk_widget_get_allocated_width(window->widget)), MIN(image_widget->scaled_image->rows, gtk_widget_get_allocated_height(window->widget))); -#else - return Rect(wx, wy, MIN(image_widget->scaled_image->cols, window->widget->allocation.width), - MIN(image_widget->scaled_image->rows, window->widget->allocation.height)); -#endif //GTK_VERSION3 } else if (image_widget->original_image) { -#if defined (GTK_VERSION3) return Rect(wx, wy, MIN(image_widget->original_image->cols, gtk_widget_get_allocated_width(window->widget)), MIN(image_widget->original_image->rows, gtk_widget_get_allocated_height(window->widget))); -#else - return Rect(wx, wy, MIN(image_widget->original_image->cols, window->widget->allocation.width), - MIN(image_widget->original_image->rows, window->widget->allocation.height)); -#endif //GTK_VERSION3 } return Rect(-1, -1, -1, -1); @@ -895,12 +893,8 @@ double cvGetRatioWindow_GTK(const char* name) static double getRatioWindow_(const std::shared_ptr& window) { -#if defined (GTK_VERSION3) double result = static_cast( gtk_widget_get_allocated_width(window->widget)) / gtk_widget_get_allocated_height(window->widget); -#else - double result = static_cast(window->widget->allocation.width) / window->widget->allocation.height; -#endif // GTK_VERSION3 return result; } @@ -954,7 +948,7 @@ namespace if (!gdk_gl_drawable_gl_begin (gldrawable, glcontext)) CV_Error( CV_OpenGlApiCallError, "Can't Activate The GL Rendering Context" ); - glViewport(0, 0, window->widget->allocation.width, window->widget->allocation.height); + glViewport(0, 0, gtk_widget_get_allocated_width(window->widget), gtk_widget_get_allocated_height(window->widget)); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); @@ -972,8 +966,11 @@ namespace #endif // HAVE_OPENGL -#if defined (GTK_VERSION3) +#if defined (GTK_VERSION2) +static gboolean cvImageWidget_expose(GtkWidget* widget, GdkEventExpose* event, gpointer data) +#elif defined (GTK_VERSION3) static gboolean cvImageWidget_draw(GtkWidget* widget, cairo_t *cr, gpointer data) +#endif { #ifdef HAVE_OPENGL CvWindow* window = (CvWindow*)data; @@ -986,6 +983,9 @@ static gboolean cvImageWidget_draw(GtkWidget* widget, cairo_t *cr, gpointer data #else (void)data; #endif +#if defined (GTK_VERSION2) + (void)event; +#endif CvImageWidget *image_widget = NULL; GdkPixbuf *pixbuf = NULL; @@ -994,108 +994,38 @@ static gboolean cvImageWidget_draw(GtkWidget* widget, cairo_t *cr, gpointer data g_return_val_if_fail (CV_IS_IMAGE_WIDGET (widget), FALSE); image_widget = CV_IMAGE_WIDGET (widget); +#if defined (GTK_VERSION2) + cairo_t *cr = gdk_cairo_create(widget->window); +#endif if( image_widget->scaled_image ){ // center image in available region -#if defined (GTK_VERSION3) int x0 = (gtk_widget_get_allocated_width(widget) - image_widget->scaled_image->cols)/2; int y0 = (gtk_widget_get_allocated_height(widget) - image_widget->scaled_image->rows)/2; -#else - int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2; - int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2; -#endif //GTK_VERSION3 -#if defined (GTK_VERSION3) pixbuf = gdk_pixbuf_new_from_data(image_widget->scaled_image->data.ptr, GDK_COLORSPACE_RGB, false, 8, MIN(image_widget->scaled_image->cols, gtk_widget_get_allocated_width(widget)), MIN(image_widget->scaled_image->rows, gtk_widget_get_allocated_height(widget)), image_widget->scaled_image->step, NULL, NULL); -#else - pixbuf = gdk_pixbuf_new_from_data(image_widget->scaled_image->data.ptr, GDK_COLORSPACE_RGB, false, - 8, MIN(image_widget->scaled_image->cols, widget->allocation.width), - MIN(image_widget->scaled_image->rows, widget->allocation.height), - image_widget->scaled_image->step, NULL, NULL); -#endif //GTK_VERSION3 gdk_cairo_set_source_pixbuf(cr, pixbuf, x0, y0); } else if( image_widget->original_image ){ -#if defined (GTK_VERSION3) pixbuf = gdk_pixbuf_new_from_data(image_widget->original_image->data.ptr, GDK_COLORSPACE_RGB, false, 8, MIN(image_widget->original_image->cols, gtk_widget_get_allocated_width(widget)), MIN(image_widget->original_image->rows, gtk_widget_get_allocated_height(widget)), image_widget->original_image->step, NULL, NULL); -#else - pixbuf = gdk_pixbuf_new_from_data(image_widget->original_image->data.ptr, GDK_COLORSPACE_RGB, false, - 8, MIN(image_widget->original_image->cols, widget->allocation.width), - MIN(image_widget->original_image->rows, widget->allocation.height), - image_widget->original_image->step, NULL, NULL); -#endif //GTK_VERSION3 - gdk_cairo_set_source_pixbuf(cr, pixbuf, 0, 0); - } - - cairo_paint(cr); - if(pixbuf) - g_object_unref(pixbuf); - return TRUE; -} - -#else -static gboolean cvImageWidget_expose(GtkWidget* widget, GdkEventExpose* event, gpointer data) -{ -#ifdef HAVE_OPENGL - CvWindow* window = (CvWindow*)data; - - if (window->useGl) - { - drawGl(window); - return TRUE; - } -#else - (void)data; -#endif - - CvImageWidget *image_widget = NULL; - cairo_t *cr = NULL; - GdkPixbuf *pixbuf = NULL; - - g_return_val_if_fail (widget != NULL, FALSE); - g_return_val_if_fail (CV_IS_IMAGE_WIDGET (widget), FALSE); - g_return_val_if_fail (event != NULL, FALSE); - - if (event->count > 0) - return FALSE; - - cr = gdk_cairo_create(widget->window); - image_widget = CV_IMAGE_WIDGET (widget); - - if( image_widget->scaled_image ){ - // center image in available region - int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2; - int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2; - - pixbuf = gdk_pixbuf_new_from_data(image_widget->scaled_image->data.ptr, GDK_COLORSPACE_RGB, false, - 8, MIN(image_widget->scaled_image->cols, widget->allocation.width), - MIN(image_widget->scaled_image->rows, widget->allocation.height), - image_widget->scaled_image->step, NULL, NULL); - - gdk_cairo_set_source_pixbuf(cr, pixbuf, x0, y0); - } - else if( image_widget->original_image ){ - pixbuf = gdk_pixbuf_new_from_data(image_widget->original_image->data.ptr, GDK_COLORSPACE_RGB, false, - 8, MIN(image_widget->original_image->cols, widget->allocation.width), - MIN(image_widget->original_image->rows, widget->allocation.height), - image_widget->original_image->step, NULL, NULL); gdk_cairo_set_source_pixbuf(cr, pixbuf, 0, 0); } cairo_paint(cr); if(pixbuf) g_object_unref(pixbuf); +#if defined (GTK_VERSION2) cairo_destroy(cr); +#endif return TRUE; } -#endif //GTK_VERSION3 static std::shared_ptr namedWindow_(const std::string& name, int flags); CV_IMPL int cvNamedWindow( const char* name, int flags ) @@ -1807,20 +1737,20 @@ static void icvShowSaveAsDialog(GtkWidget* widget, CvWindow* window) } } -#if defined (GTK_VERSION3) -#define GDK_Escape GDK_KEY_Escape -#define GDK_Return GDK_KEY_Return -#define GDK_Linefeed GDK_KEY_Linefeed -#define GDK_Tab GDK_KEY_Tab -#define GDK_s GDK_KEY_s -#define GDK_S GDK_KEY_S -#endif //GTK_VERSION3 +#if defined(GTK_VERSION2) && !defined (GDK_KEY_Escape) +#define GDK_KEY_Escape GDK_Escape +#define GDK_KEY_Return GDK_Return +#define GDK_KEY_Linefeed GDK_Linefeed +#define GDK_KEY_Tab GDK_Tab +#define GDK_KEY_s GDK_s +#define GDK_KEY_S GDK_S +#endif //GDK_KEY_Escape static gboolean icvOnKeyPress(GtkWidget* widget, GdkEventKey* event, gpointer user_data) { int code = 0; - if ( BIT_ALLIN(event->state, GDK_CONTROL_MASK) && (event->keyval == GDK_s || event->keyval == GDK_S)) + if ( BIT_ALLIN(event->state, GDK_CONTROL_MASK) && (event->keyval == GDK_KEY_s || event->keyval == GDK_KEY_S)) { try { @@ -1834,14 +1764,14 @@ static gboolean icvOnKeyPress(GtkWidget* widget, GdkEventKey* event, gpointer us switch( event->keyval ) { - case GDK_Escape: + case GDK_KEY_Escape: code = 27; break; - case GDK_Return: - case GDK_Linefeed: + case GDK_KEY_Return: + case GDK_KEY_Linefeed: code = 13; break; - case GDK_Tab: + case GDK_KEY_Tab: code = '\t'; break; default: @@ -2005,13 +1935,8 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da image_widget->scaled_image ) { // image origin is not necessarily at (0,0) -#if defined (GTK_VERSION3) int x0 = (gtk_widget_get_allocated_width(widget) - image_widget->scaled_image->cols)/2; int y0 = (gtk_widget_get_allocated_height(widget) - image_widget->scaled_image->rows)/2; -#else - int x0 = (widget->allocation.width - image_widget->scaled_image->cols)/2; - int y0 = (widget->allocation.height - image_widget->scaled_image->rows)/2; -#endif //GTK_VERSION3 pt.x = cvFloor( ((pt32f.x-x0)*image_widget->original_image->cols)/ image_widget->scaled_image->cols ); pt.y = cvFloor( ((pt32f.y-y0)*image_widget->original_image->rows)/ From 8ad8ec679fe8ff970c075fb327d5b97f61a48220 Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Tue, 7 Mar 2023 13:05:38 +0100 Subject: [PATCH 038/199] Merge pull request #22441 from vrabaud:hls_while In case of huge (and probably invalid) input, make sure we do not rely only on the while loops for truncation. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch --- modules/imgproc/src/color_hsv.simd.hpp | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/modules/imgproc/src/color_hsv.simd.hpp b/modules/imgproc/src/color_hsv.simd.hpp index 424a6d1494..0c30564a12 100644 --- a/modules/imgproc/src/color_hsv.simd.hpp +++ b/modules/imgproc/src/color_hsv.simd.hpp @@ -419,6 +419,14 @@ inline void HSV2RGB_simd(const v_float32& h, const v_float32& s, const v_float32 } #endif +// Compute the sector and the new H for HSV and HLS 2 RGB conversions. +inline void ComputeSectorAndClampedH(float& h, int §or) { + sector = cvFloor(h); + h -= sector; + sector %= 6; + sector += sector < 0 ? 6 : 0; +} + inline void HSV2RGB_native(float h, float s, float v, float& b, float& g, float& r, @@ -433,14 +441,7 @@ inline void HSV2RGB_native(float h, float s, float v, float tab[4]; int sector; h *= hscale; - h = fmod(h, 6.f); - sector = cvFloor(h); - h -= sector; - if( (unsigned)sector >= 6u ) - { - sector = 0; - h = 0.f; - } + ComputeSectorAndClampedH(h, sector); tab[0] = v; tab[1] = v*(1.f - s); @@ -987,13 +988,7 @@ struct HLS2RGB_f float p1 = 2*l - p2; h *= hscale; - // We need both loops to clamp (e.g. for h == -1e-40). - while( h < 0 ) h += 6; - while( h >= 6 ) h -= 6; - - CV_DbgAssert( 0 <= h && h < 6 ); - sector = cvFloor(h); - h -= sector; + ComputeSectorAndClampedH(h, sector); tab[0] = p2; tab[1] = p1; From 7ea6b356c782ae8ae76e6bb5c49f6a77ef5bbf7e Mon Sep 17 00:00:00 2001 From: Bhavit Patel Date: Fri, 10 Mar 2023 01:50:36 -0500 Subject: [PATCH 039/199] Merge pull request #23305 from bhavitp:fix/calib3d/undistortion_grid Resolves https://github.com/opencv/opencv/issues/23304 Fixes the incorrect pixel grid Switches type to double to avoid precision loss as all callers use doubles ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [X] There is a reference to the original bug report and related work - [X] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [X] The feature is well documented and sample code can be built with the project CMake --- modules/calib3d/src/calibration.cpp | 32 ++++---- modules/calib3d/test/test_undistort.cpp | 99 +++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 16 deletions(-) diff --git a/modules/calib3d/src/calibration.cpp b/modules/calib3d/src/calibration.cpp index 7081ee8b4d..2cf10afba6 100644 --- a/modules/calib3d/src/calibration.cpp +++ b/modules/calib3d/src/calibration.cpp @@ -2258,28 +2258,28 @@ double cvStereoCalibrate( const CvMat* _objectPoints, const CvMat* _imagePoints1 static void icvGetRectangles( const CvMat* cameraMatrix, const CvMat* distCoeffs, const CvMat* R, const CvMat* newCameraMatrix, CvSize imgSize, - cv::Rect_& inner, cv::Rect_& outer ) + cv::Rect_& inner, cv::Rect_& outer ) { const int N = 9; int x, y, k; - cv::Ptr _pts(cvCreateMat(1, N*N, CV_32FC2)); - CvPoint2D32f* pts = (CvPoint2D32f*)(_pts->data.ptr); + cv::Ptr _pts(cvCreateMat(1, N*N, CV_64FC2)); + CvPoint2D64f* pts = (CvPoint2D64f*)(_pts->data.ptr); for( y = k = 0; y < N; y++ ) for( x = 0; x < N; x++ ) - pts[k++] = cvPoint2D32f((float)x*imgSize.width/(N-1), - (float)y*imgSize.height/(N-1)); + pts[k++] = cvPoint2D64f((double)x*(imgSize.width-1)/(N-1), + (double)y*(imgSize.height-1)/(N-1)); cvUndistortPoints(_pts, _pts, cameraMatrix, distCoeffs, R, newCameraMatrix); - float iX0=-FLT_MAX, iX1=FLT_MAX, iY0=-FLT_MAX, iY1=FLT_MAX; - float oX0=FLT_MAX, oX1=-FLT_MAX, oY0=FLT_MAX, oY1=-FLT_MAX; + double iX0=-FLT_MAX, iX1=FLT_MAX, iY0=-FLT_MAX, iY1=FLT_MAX; + double oX0=FLT_MAX, oX1=-FLT_MAX, oY0=FLT_MAX, oY1=-FLT_MAX; // find the inscribed rectangle. // the code will likely not work with extreme rotation matrices (R) (>45%) for( y = k = 0; y < N; y++ ) for( x = 0; x < N; x++ ) { - CvPoint2D32f p = pts[k++]; + CvPoint2D64f p = pts[k++]; oX0 = MIN(oX0, p.x); oX1 = MAX(oX1, p.x); oY0 = MIN(oY0, p.y); @@ -2294,8 +2294,8 @@ icvGetRectangles( const CvMat* cameraMatrix, const CvMat* distCoeffs, if( y == N-1 ) iY1 = MIN(iY1, p.y); } - inner = cv::Rect_(iX0, iY0, iX1-iX0, iY1-iY0); - outer = cv::Rect_(oX0, oY0, oX1-oX0, oY1-oY0); + inner = cv::Rect_(iX0, iY0, iX1-iX0, iY1-iY0); + outer = cv::Rect_(oX0, oY0, oX1-oX0, oY1-oY0); } @@ -2308,7 +2308,7 @@ void cvStereoRectify( const CvMat* _cameraMatrix1, const CvMat* _cameraMatrix2, { double _om[3], _t[3] = {0}, _uu[3]={0,0,0}, _r_r[3][3], _pp[3][4]; double _ww[3], _wr[3][3], _z[3] = {0,0,0}, _ri[3][3]; - cv::Rect_ inner1, inner2, outer1, outer2; + cv::Rect_ inner1, inner2, outer1, outer2; CvMat om = cvMat(3, 1, CV_64F, _om); CvMat t = cvMat(3, 1, CV_64F, _t); @@ -2515,7 +2515,7 @@ void cvGetOptimalNewCameraMatrix( const CvMat* cameraMatrix, const CvMat* distCo CvMat* newCameraMatrix, CvSize newImgSize, CvRect* validPixROI, int centerPrincipalPoint ) { - cv::Rect_ inner, outer; + cv::Rect_ inner, outer; newImgSize = newImgSize.width*newImgSize.height != 0 ? newImgSize : imgSize; double M[3][3]; @@ -2545,10 +2545,10 @@ void cvGetOptimalNewCameraMatrix( const CvMat* cameraMatrix, const CvMat* distCo if( validPixROI ) { - inner = cv::Rect_((float)((inner.x - cx0)*s + cx), - (float)((inner.y - cy0)*s + cy), - (float)(inner.width*s), - (float)(inner.height*s)); + inner = cv::Rect_((double)((inner.x - cx0)*s + cx), + (double)((inner.y - cy0)*s + cy), + (double)(inner.width*s), + (double)(inner.height*s)); cv::Rect r(cvCeil(inner.x), cvCeil(inner.y), cvFloor(inner.width), cvFloor(inner.height)); r &= cv::Rect(0, 0, newImgSize.width, newImgSize.height); *validPixROI = cvRect(r); diff --git a/modules/calib3d/test/test_undistort.cpp b/modules/calib3d/test/test_undistort.cpp index 4d90ecab4d..db6c2f764e 100644 --- a/modules/calib3d/test/test_undistort.cpp +++ b/modules/calib3d/test/test_undistort.cpp @@ -157,6 +157,104 @@ void CV_DefaultNewCameraMatrixTest::prepare_to_validation( int /*test_case_idx*/ //--------- +class CV_GetOptimalNewCameraMatrixNoDistortionTest : public cvtest::ArrayTest +{ +public: + CV_GetOptimalNewCameraMatrixNoDistortionTest(); +protected: + int prepare_test_case (int test_case_idx); + void prepare_to_validation(int test_case_idx); + void get_test_array_types_and_sizes(int test_case_idx, vector >& sizes, vector >& types); + void run_func(); + +private: + cv::Mat camera_mat; + cv::Mat distortion_coeffs; + cv::Mat new_camera_mat; + + cv::Size img_size; + double alpha; + bool center_principal_point; + + int matrix_type; + + static const int MAX_X = 2048; + static const int MAX_Y = 2048; +}; + +CV_GetOptimalNewCameraMatrixNoDistortionTest::CV_GetOptimalNewCameraMatrixNoDistortionTest() +{ + test_array[INPUT].push_back(NULL); // camera_mat + test_array[INPUT].push_back(NULL); // distortion_coeffs + test_array[OUTPUT].push_back(NULL); // new_camera_mat + test_array[REF_OUTPUT].push_back(NULL); + + alpha = 0.0; + center_principal_point = false; + matrix_type = 0; +} + +void CV_GetOptimalNewCameraMatrixNoDistortionTest::get_test_array_types_and_sizes(int test_case_idx, vector >& sizes, vector >& types) +{ + cvtest::ArrayTest::get_test_array_types_and_sizes(test_case_idx, sizes, types); + RNG& rng = ts->get_rng(); + matrix_type = types[INPUT][0] = types[INPUT][1] = types[OUTPUT][0] = types[REF_OUTPUT][0] = cvtest::randInt(rng)%2 ? CV_64F : CV_32F; + sizes[INPUT][0] = sizes[OUTPUT][0] = sizes[REF_OUTPUT][0] = cvSize(3,3); + sizes[INPUT][1] = cvSize(1,4); +} + +int CV_GetOptimalNewCameraMatrixNoDistortionTest::prepare_test_case(int test_case_idx) +{ + int code = cvtest::ArrayTest::prepare_test_case( test_case_idx ); + + if (code <= 0) + return code; + + RNG& rng = ts->get_rng(); + + alpha = cvtest::randReal(rng); + center_principal_point = ((cvtest::randInt(rng) % 2)!=0); + + // Generate random camera matrix. Use floating point precision for source to avoid precision loss + img_size.width = cvtest::randInt(rng) % MAX_X + 1; + img_size.height = cvtest::randInt(rng) % MAX_Y + 1; + const float aspect_ratio = static_cast(img_size.width) / img_size.height; + float cam_array[9] = {0,0,0,0,0,0,0,0,1}; + cam_array[2] = static_cast((img_size.width - 1)*0.5); // center + cam_array[5] = static_cast((img_size.height - 1)*0.5); // center + cam_array[0] = static_cast(MAX(img_size.width, img_size.height)/(0.9 - cvtest::randReal(rng)*0.6)); + cam_array[4] = aspect_ratio*cam_array[0]; + + Mat& input_camera_mat = test_mat[INPUT][0]; + cvtest::convert(Mat(3, 3, CV_32F, cam_array), input_camera_mat, input_camera_mat.type()); + camera_mat = input_camera_mat; + + // Generate zero distortion matrix + const Mat zero_dist_coeffs = Mat::zeros(1, 4, CV_32F); + Mat& input_dist_coeffs = test_mat[INPUT][1]; + cvtest::convert(zero_dist_coeffs, input_dist_coeffs, input_dist_coeffs.type()); + distortion_coeffs = input_dist_coeffs; + + return code; +} + +void CV_GetOptimalNewCameraMatrixNoDistortionTest::run_func() +{ + new_camera_mat = cv::getOptimalNewCameraMatrix(camera_mat, distortion_coeffs, img_size, alpha, img_size, NULL, center_principal_point); +} + +void CV_GetOptimalNewCameraMatrixNoDistortionTest::prepare_to_validation(int /*test_case_idx*/) +{ + const Mat& src = test_mat[INPUT][0]; + Mat& dst = test_mat[REF_OUTPUT][0]; + cvtest::copy(src, dst); + + Mat& output = test_mat[OUTPUT][0]; + cvtest::convert(new_camera_mat, output, output.type()); +} + +//--------- + class CV_UndistortPointsTest : public cvtest::ArrayTest { public: @@ -935,6 +1033,7 @@ double CV_InitUndistortRectifyMapTest::get_success_error_level( int /*test_case_ ////////////////////////////////////////////////////////////////////////////////////////////////////// TEST(Calib3d_DefaultNewCameraMatrix, accuracy) { CV_DefaultNewCameraMatrixTest test; test.safe_run(); } +TEST(Calib3d_GetOptimalNewCameraMatrixNoDistortion, accuracy) { CV_GetOptimalNewCameraMatrixNoDistortionTest test; test.safe_run(); } TEST(Calib3d_UndistortPoints, accuracy) { CV_UndistortPointsTest test; test.safe_run(); } TEST(Calib3d_InitUndistortRectifyMap, accuracy) { CV_InitUndistortRectifyMapTest test; test.safe_run(); } From c6e5f6052513b6b1fb07682371ec08a6d4c0584b Mon Sep 17 00:00:00 2001 From: Alexey Shtern Date: Fri, 10 Mar 2023 10:37:43 +0200 Subject: [PATCH 040/199] Merge pull request #23301 from shtern:fix_quaternion Fixed strict type in slerp and spline; Fixed nlerp usage condition Fixes #23293 The PR is fixing the issue described in [Issue #23293 ](https://github.com/opencv/opencv/issues/23293) - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [X] There is a reference to the original bug report and related work - [X] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [X] The feature is well documented and sample code can be built with the project CMake --- modules/core/include/opencv2/core/quaternion.inl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/core/include/opencv2/core/quaternion.inl.hpp b/modules/core/include/opencv2/core/quaternion.inl.hpp index 29a16d9f7d..b901ecbc68 100644 --- a/modules/core/include/opencv2/core/quaternion.inl.hpp +++ b/modules/core/include/opencv2/core/quaternion.inl.hpp @@ -745,8 +745,8 @@ Quat Quat::lerp(const Quat &q0, const Quat &q1, const T t) template Quat Quat::slerp(const Quat &q0, const Quat &q1, const T t, QuatAssumeType assumeUnit, bool directChange) { - Quatd v0(q0); - Quatd v1(q1); + Quat v0(q0); + Quat v1(q1); if (!assumeUnit) { v0 = v0.normalize(); @@ -754,7 +754,7 @@ Quat Quat::slerp(const Quat &q0, const Quat &q1, const T t, QuatAssu } T cosTheta = v0.dot(v1); constexpr T DOT_THRESHOLD = 0.995; - if (cosTheta > DOT_THRESHOLD) + if (std::abs(cosTheta) > DOT_THRESHOLD) { return nlerp(v0, v1, t, QUAT_ASSUME_UNIT); } @@ -843,7 +843,7 @@ Quat Quat::interPoint(const Quat &q0, const Quat &q1, template Quat Quat::spline(const Quat &q0, const Quat &q1, const Quat &q2, const Quat &q3, const T t, QuatAssumeType assumeUnit) { - Quatd v0(q0), v1(q1), v2(q2), v3(q3); + Quat v0(q0), v1(q1), v2(q2), v3(q3); if (!assumeUnit) { v0 = v0.normalize(); From e03e2e7f94a1f476618cedc2a65eba886b9caafd Mon Sep 17 00:00:00 2001 From: Zihao Mu Date: Fri, 10 Mar 2023 16:59:49 +0800 Subject: [PATCH 041/199] Merge pull request #23192 from zihaomu:clean_up_SIMD_code ### Purpose of this PR: - Move all dispatch and SIMD code of `convolution layer` into `simd.hpp` file. - Support Winograd at AVX-only machine. - Re-name the folder from `fast_conv` to `cpu_kernels`. In the future, we can put other layers of CPU optimization into it, like `GEMM` or `MatMul`. ## Performance Test Since this patch just focuses on the code style, the performance is expected as the same as before. Test with the following script: `./bin/opencv_perf_dnn '--gtest_filter=*conv*' --gtest_output="xml:../1-0th.xml" --perf_threads=1` ### Test on X86 platform Min (ms) |Name of Test|4.x | patch | 4.x vs patch (x-factor)| |---|:-:|:-:|:-:| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 19}, OCN=2, G=2, S=2, P=(1, 1), BIAS, OCV/CPU)|0.001|0.001|0.98| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 25}, OCN=2, G=2, P=(2, 2), PM=SAME, OCV/CPU)|0.001|0.001|0.95| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 6, 10}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.001|0.001|0.97| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 4, 9, 10, 10}, OCN=4, S=[1 x 1 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.002|0.002|1.04| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 8, 1, 10, 10}, OCN=8, G=8, P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.002|0.002|0.94| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 3 x 3], IN={1, 2, 19, 19, 19}, OCN=2, G=2, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.040|0.044|0.93| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 4 x 2], IN={1, 4, 8, 10, 10}, OCN=4, G=4, S=[1 x 2 x 1], BIAS, OCV/CPU)|0.010|0.010|1.00| |conv3d::Conv3D::(GFLOPS=0.001, K=[3 x 3 x 3], IN={1, 2, 25, 19, 19}, OCN=2, G=2, S=[1 x 2 x 2], P=(2, 2) x (2, 2) x (2, 2), PM=SAME, OCV/CPU)|0.106|0.103|1.03| |conv3d::Conv3D::(GFLOPS=0.002, K=[3 x 1 x 4], IN={1, 14, 5, 10, 10}, OCN=14, PM=SAME, OCV/CPU)|0.041|0.040|1.03| |conv3d::Conv3D::(GFLOPS=0.006, K=[5 x 5 x 5], IN={1, 4, 50, 19, 19}, OCN=4, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.340|0.329|1.03| |conv3d::Conv3D::(GFLOPS=0.027, K=[3 x 3 x 3], IN={1, 6, 10, 38, 50}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.590|0.567|1.04| |conv3d::Conv3D::(GFLOPS=0.030, K=[5 x 5 x 5], IN={1, 6, 19, 19, 19}, OCN=6, G=2, OCV/CPU)|1.374|1.314|1.05| |conv3d::Conv3D::(GFLOPS=0.045, K=[7 x 7 x 7], IN={1, 2, 38, 38, 38}, OCN=2, S=[1 x 2 x 1], OCV/CPU)|3.715|3.528|1.05| |conv3d::Conv3D::(GFLOPS=0.053, K=[3 x 3 x 3], IN={1, 10, 98, 10, 10}, OCN=10, PM=SAME, OCV/CPU)|1.181|1.166|1.01| |conv3d::Conv3D::(GFLOPS=0.071, K=[7 x 7 x 7], IN={1, 6, 15, 19, 19}, OCN=6, S=[2 x 1 x 1], P=(3, 3) x (3, 3) x (3, 3), PM=SAME, BIAS, OCV/CPU)|2.689|2.587|1.04| |conv3d::Conv3D::(GFLOPS=0.093, K=[5 x 5 x 5], IN={1, 4, 40, 75, 75}, OCN=4, S=[2 x 2 x 2], OCV/CPU)|4.754|4.500|1.06| |conv3d::Conv3D::(GFLOPS=0.116, K=[5 x 5 x 5], IN={1, 2, 21, 75, 100}, OCN=2, BIAS, OCV/CPU)|9.612|9.112|1.05| |conv3d::Conv3D::(GFLOPS=1.267, K=[5 x 5 x 5], IN={1, 3, 75, 75, 100}, OCN=3, PM=SAME, BIAS, OCV/CPU)|69.000|64.676|1.07| |conv3d::Conv3D::(GFLOPS=1.343, K=[3 x 3 x 3], IN={1, 11, 9, 150, 200}, OCN=11, PM=VALID, BIAS, OCV/CPU)|20.248|18.451|1.10| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU)|1.395|1.392|1.00| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU)|1.990|1.984|1.00| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU)|1.393|1.360|1.02| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU)|1.813|1.744|1.04| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU)|1.190|1.191|1.00| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU)|1.286|1.284|1.00| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|2.295|2.279|1.01| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU)|1.322|1.331|0.99| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU)|3.784|3.533|1.07| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.838|1.844|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU)|1.957|1.959|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU)|2.596|2.573|1.01| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU)|4.183|4.083|1.02| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|2.413|2.406|1.00| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|2.538|2.546|1.00| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU)|2.972|2.980|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU)|3.452|3.464|1.00| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU)|3.082|3.105|0.99| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|4.043|3.919|1.03| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU)|5.538|5.531|1.00| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|3.393|3.418|0.99| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU)|4.325|4.234|1.02| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU)|6.009|5.908|1.02| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU)|6.557|6.376|1.03| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|10.114|9.472|1.07| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|10.373|9.879|1.05| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|12.782|11.624|1.10| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU)|90.931|90.552|1.00| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|6.091|5.818|1.05| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU)|7.083|6.643|1.07| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU)|5.054|5.059|1.00| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|5.005|4.931|1.02| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|4.951|5.065|0.98| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU)|11.957|11.293|1.06| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU)|5.328|5.250|1.01| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU)|5.544|5.292|1.05| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU)|6.186|5.893|1.05| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU)|6.153|5.834|1.05| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU)|8.154|8.107|1.01| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU)|12.699|12.256|1.04| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU)|11.355|11.217|1.01| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU)|19.062|17.814|1.07| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU)|6.820|6.531|1.04| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU)|14.502|13.483|1.08| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU)|6.270|6.123|1.02| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU)|13.173|12.451|1.06| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU)|8.326|7.652|1.09| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU)|17.605|16.465|1.07| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU)|15.675|14.771|1.06| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU)|0.420|0.423|0.99| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU)|6.788|6.491|1.05| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU)|6.456|6.168|1.05| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU)|0.263|0.261|1.01| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU)|7.690|7.398|1.04| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU)|0.200|0.202|0.99| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|10.542|10.464|1.01| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU)|10.876|10.728|1.01| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU)|7.194|6.768|1.06| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU)|7.099|6.731|1.05| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU)|0.147|0.162|0.91| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU)|18.558|17.141|1.08| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU)|7.641|7.219|1.06| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU)|22.666|20.999|1.08| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU)|8.523|7.921|1.08| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU)|8.514|8.109|1.05| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU)|8.300|7.878|1.05| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU)|13.403|13.131|1.02| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU)|8.920|8.357|1.07| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU)|28.827|27.616|1.04| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|12.895|12.670|1.02| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|14.120|13.078|1.08| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|27.541|27.582|1.00| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU)|32.367|31.140|1.04| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|14.934|14.910|1.00| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|18.289|18.491|0.99| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU)|37.857|36.845|1.03| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|37.402|36.566|1.02| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|19.031|19.164|0.99| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|19.019|19.135|0.99| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|20.077|19.400|1.03| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|21.883|21.302|1.03| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU)|51.288|49.851|1.03| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|27.349|28.359|0.96| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU)|24.915|25.130|0.99| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|25.488|25.899|0.98| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|27.346|27.390|1.00| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU)|28.033|28.301|0.99| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|50.216|49.970|1.00| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|29.670|29.513|1.01| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|50.565|49.634|1.02| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|37.900|37.814|1.00| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|41.367|39.742|1.04| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|49.128|50.350|0.98| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|79.643|80.645|0.99| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU)|41.439|40.895|1.01| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU)|46.504|46.220|1.01| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU)|98.086|96.842|1.01| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|102.447|97.299|1.05| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|145.047|144.996|1.00| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU)|206.104|195.543|1.05| ### Test on M1(ARM) platform |Name of Test|4.x|patch|4.x vs patch (x-factor)| |---|:-:|:-:|:-:| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 19}, OCN=2, G=2, S=2, P=(1, 1), BIAS, OCV/CPU)|0.001|0.001|0.97| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 2, 25}, OCN=2, G=2, P=(2, 2), PM=SAME, OCV/CPU)|0.001|0.001|0.94| |conv1d::Conv1D::(GFLOPS=0.000, K=[3], IN={1, 6, 10}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.002|0.002|0.92| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 4, 9, 10, 10}, OCN=4, S=[1 x 1 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.003|0.003|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[1 x 1 x 1], IN={1, 8, 1, 10, 10}, OCN=8, G=8, P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.003|0.003|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 3 x 3], IN={1, 2, 19, 19, 19}, OCN=2, G=2, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), BIAS, OCV/CPU)|0.031|0.031|1.00| |conv3d::Conv3D::(GFLOPS=0.000, K=[3 x 4 x 2], IN={1, 4, 8, 10, 10}, OCN=4, G=4, S=[1 x 2 x 1], BIAS, OCV/CPU)|0.009|0.009|1.00| |conv3d::Conv3D::(GFLOPS=0.001, K=[3 x 3 x 3], IN={1, 2, 25, 19, 19}, OCN=2, G=2, S=[1 x 2 x 2], P=(2, 2) x (2, 2) x (2, 2), PM=SAME, OCV/CPU)|0.066|0.066|1.01| |conv3d::Conv3D::(GFLOPS=0.002, K=[3 x 1 x 4], IN={1, 14, 5, 10, 10}, OCN=14, PM=SAME, OCV/CPU)|0.102|0.102|1.00| |conv3d::Conv3D::(GFLOPS=0.006, K=[5 x 5 x 5], IN={1, 4, 50, 19, 19}, OCN=4, S=[2 x 2 x 2], P=(1, 1) x (1, 1) x (1, 1), PM=VALID, OCV/CPU)|0.328|0.328|1.00| |conv3d::Conv3D::(GFLOPS=0.027, K=[3 x 3 x 3], IN={1, 6, 10, 38, 50}, OCN=6, PM=VALID, BIAS, OCV/CPU)|0.693|0.747|0.93| |conv3d::Conv3D::(GFLOPS=0.030, K=[5 x 5 x 5], IN={1, 6, 19, 19, 19}, OCN=6, G=2, OCV/CPU)|1.268|1.266|1.00| |conv3d::Conv3D::(GFLOPS=0.045, K=[7 x 7 x 7], IN={1, 2, 38, 38, 38}, OCN=2, S=[1 x 2 x 1], OCV/CPU)|3.530|3.581|0.99| |conv3d::Conv3D::(GFLOPS=0.053, K=[3 x 3 x 3], IN={1, 10, 98, 10, 10}, OCN=10, PM=SAME, OCV/CPU)|1.186|1.188|1.00| |conv3d::Conv3D::(GFLOPS=0.071, K=[7 x 7 x 7], IN={1, 6, 15, 19, 19}, OCN=6, S=[2 x 1 x 1], P=(3, 3) x (3, 3) x (3, 3), PM=SAME, BIAS, OCV/CPU)|2.682|2.683|1.00| |conv3d::Conv3D::(GFLOPS=0.093, K=[5 x 5 x 5], IN={1, 4, 40, 75, 75}, OCN=4, S=[2 x 2 x 2], OCV/CPU)|4.490|4.501|1.00| |conv3d::Conv3D::(GFLOPS=0.116, K=[5 x 5 x 5], IN={1, 2, 21, 75, 100}, OCN=2, BIAS, OCV/CPU)|8.914|8.938|1.00| |conv3d::Conv3D::(GFLOPS=1.267, K=[5 x 5 x 5], IN={1, 3, 75, 75, 100}, OCN=3, PM=SAME, BIAS, OCV/CPU)|69.819|69.876|1.00| |conv3d::Conv3D::(GFLOPS=1.343, K=[3 x 3 x 3], IN={1, 11, 9, 150, 200}, OCN=11, PM=VALID, BIAS, OCV/CPU)|24.058|22.420|1.07| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 512, 26, 26}, OCN=256, OCV/CPU)|2.240|2.236|1.00| |conv::Conv::(GFLOPS=0.177, K=[1 x 1], IN={1, 1024, 13, 13}, OCN=512, OCV/CPU)|3.132|3.136|1.00| |conv::Conv::(GFLOPS=0.178, K=[1 x 1], IN={1, 256, 52, 52}, OCN=128, OCV/CPU)|1.920|1.919|1.00| |conv::Conv::(GFLOPS=0.210, K=[1 x 1], IN={1, 576, 38, 50}, OCN=96, PM=SAME, BIAS, OCV/CPU)|2.343|2.346|1.00| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 128, 56, 56}, OCN=32, P=[1 x 1], OCV/CPU)|1.234|1.116|1.11| |conv::Conv::(GFLOPS=0.231, K=[3 x 3], IN={1, 256, 14, 14}, OCN=256, P=[1 x 1], OCV/CPU)|1.109|1.121|0.99| |conv::Conv::(GFLOPS=0.280, K=[1 x 1], IN={1, 576, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|3.197|3.084|1.04| |conv::Conv::(GFLOPS=0.302, K=[3 x 3], IN={1, 64, 64, 64}, OCN=64, PM=SAME, OCV/CPU)|1.123|1.148|0.98| |conv::Conv::(GFLOPS=0.357, K=[1 x 1], IN={1, 64, 208, 208}, OCN=64, OCV/CPU)|4.836|5.061|0.96| |conv::Conv::(GFLOPS=0.420, K=[3 x 3], IN={1, 96, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.535|1.463|1.05| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 128, 40, 40}, OCN=128, PM=SAME, OCV/CPU)|1.756|1.584|1.11| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 256, 20, 20}, OCN=256, PM=SAME, OCV/CPU)|1.821|1.820|1.00| |conv::Conv::(GFLOPS=0.472, K=[3 x 3], IN={1, 512, 10, 10}, OCN=512, PM=SAME, OCV/CPU)|7.049|6.672|1.06| |conv::Conv::(GFLOPS=0.561, K=[3 x 3], IN={1, 128, 38, 50}, OCN=128, PM=SAME, BIAS, OCV/CPU)|1.967|1.922|1.02| |conv::Conv::(GFLOPS=0.624, K=[3 x 3], IN={1, 128, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|1.943|1.977|0.98| |conv::Conv::(GFLOPS=0.701, K=[3 x 3], IN={1, 128, 38, 50}, OCN=160, PM=SAME, BIAS, OCV/CPU)|2.464|2.310|1.07| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 64, 104, 104}, OCN=64, P=[1 x 1], OCV/CPU)|2.860|2.904|0.98| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 128, 52, 52}, OCN=128, P=[1 x 1], OCV/CPU)|2.428|2.483|0.98| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 256, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|2.955|2.983|0.99| |conv::Conv::(GFLOPS=0.798, K=[3 x 3], IN={1, 512, 13, 13}, OCN=512, P=[1 x 1], OCV/CPU)|4.328|4.484|0.97| |conv::Conv::(GFLOPS=0.830, K=[3 x 3], IN={1, 64, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|2.712|2.778|0.98| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 192, 38, 38}, OCN=192, PM=SAME, OCV/CPU)|3.205|3.331|0.96| |conv::Conv::(GFLOPS=0.958, K=[3 x 3], IN={1, 384, 19, 19}, OCN=384, PM=SAME, OCV/CPU)|4.193|4.412|0.95| |conv::Conv::(GFLOPS=1.022, K=[3 x 3], IN={1, 576, 19, 19}, OCN=273, PM=SAME, BIAS, OCV/CPU)|5.026|4.565|1.10| |conv::Conv::(GFLOPS=1.112, K=[3 x 3], IN={1, 512, 10, 10}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|14.490|14.213|1.02| |conv::Conv::(GFLOPS=1.181, K=[3 x 3], IN={1, 64, 160, 200}, OCN=128, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|14.886|14.003|1.06| |conv::Conv::(GFLOPS=1.182, K=[3 x 3], IN={1, 32, 320, 400}, OCN=64, S=[2 x 2], P=[1 x 1], BIAS, OCV/CPU)|15.923|15.184|1.05| |conv::Conv::(GFLOPS=1.195, K=[9 x 9], IN={1, 32, 240, 320}, OCN=3, P=[4 x 4], BIAS, OCV/CPU)|45.136|41.696|1.08| |conv::Conv::(GFLOPS=1.196, K=[3 x 3], IN={1, 384, 26, 26}, OCN=256, P=[1 x 1], OCV/CPU)|4.995|4.631|1.08| |conv::Conv::(GFLOPS=1.210, K=[3 x 3], IN={1, 32, 256, 256}, OCN=32, PM=SAME, OCV/CPU)|6.402|6.261|1.02| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 64, 75, 75}, OCN=192, PM=SAME, BIAS, OCV/CPU)|4.478|3.965|1.13| |conv::Conv::(GFLOPS=1.245, K=[3 x 3], IN={1, 96, 75, 100}, OCN=96, PM=SAME, BIAS, OCV/CPU)|3.908|3.978|0.98| |conv::Conv::(GFLOPS=1.248, K=[3 x 3], IN={1, 256, 46, 46}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|4.176|4.206|0.99| |conv::Conv::(GFLOPS=1.258, K=[3 x 3], IN={1, 1280, 10, 10}, OCN=546, PM=SAME, BIAS, OCV/CPU)|21.509|21.136|1.02| |conv::Conv::(GFLOPS=1.261, K=[3 x 3], IN={1, 192, 38, 50}, OCN=192, PM=SAME, BIAS, OCV/CPU)|4.426|4.082|1.08| |conv::Conv::(GFLOPS=1.416, K=[3 x 3], IN={1, 128, 62, 82}, OCN=128, BIAS, OCV/CPU)|4.098|4.289|0.96| |conv::Conv::(GFLOPS=1.500, K=[3 x 3], IN={1, 128, 64, 84}, OCN=128, BIAS, OCV/CPU)|4.646|5.105|0.91| |conv::Conv::(GFLOPS=1.586, K=[3 x 3], IN={1, 128, 66, 86}, OCN=128, BIAS, OCV/CPU)|4.746|4.724|1.00| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 26, 26}, OCN=512, P=[1 x 1], OCV/CPU)|5.614|5.779|0.97| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 256, 52, 52}, OCN=512, S=[2 x 2], P=[1 x 1], OCV/CPU)|21.909|20.718|1.06| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 13, 13}, OCN=1024, P=[1 x 1], OCV/CPU)|8.256|8.290|1.00| |conv::Conv::(GFLOPS=1.595, K=[3 x 3], IN={1, 512, 26, 26}, OCN=1024, S=[2 x 2], P=[1 x 1], OCV/CPU)|25.196|23.267|1.08| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 104, 104}, OCN=128, P=[1 x 1], OCV/CPU)|5.721|5.172|1.11| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 64, 208, 208}, OCN=128, S=[2 x 2], P=[1 x 1], OCV/CPU)|20.066|18.322|1.10| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 52, 52}, OCN=256, P=[1 x 1], OCV/CPU)|4.448|4.542|0.98| |conv::Conv::(GFLOPS=1.596, K=[3 x 3], IN={1, 128, 104, 104}, OCN=256, S=[2 x 2], P=[1 x 1], OCV/CPU)|19.193|19.013|1.01| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 208, 208}, OCN=64, P=[1 x 1], OCV/CPU)|6.009|5.964|1.01| |conv::Conv::(GFLOPS=1.598, K=[3 x 3], IN={1, 32, 416, 416}, OCN=64, S=[2 x 2], P=[1 x 1], OCV/CPU)|20.169|20.009|1.01| |conv::Conv::(GFLOPS=1.659, K=[3 x 3], IN={1, 960, 10, 10}, OCN=960, PM=SAME, OCV/CPU)|22.584|23.423|0.96| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, G=128, P=[1 x 1], BIAS, OCV/CPU)|0.372|0.504|0.74| |conv::Conv::(GFLOPS=1.660, K=[3 x 3], IN={1, 128, 75, 75}, OCN=128, PM=SAME, OCV/CPU)|5.426|5.456|0.99| |conv::Conv::(GFLOPS=1.675, K=[3 x 3], IN={1, 128, 68, 88}, OCN=128, BIAS, OCV/CPU)|4.945|5.221|0.95| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, G=256, P=[1 x 1], BIAS, OCV/CPU)|0.210|0.261|0.81| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 256, 38, 38}, OCN=256, PM=SAME, OCV/CPU)|5.720|5.997|0.95| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, G=512, P=[1 x 1], BIAS, OCV/CPU)|0.149|0.161|0.93| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|7.154|7.225|0.99| |conv::Conv::(GFLOPS=1.704, K=[3 x 3], IN={1, 512, 19, 19}, OCN=512, PM=SAME, OCV/CPU)|7.184|7.223|0.99| |conv::Conv::(GFLOPS=1.766, K=[3 x 3], IN={1, 128, 70, 90}, OCN=128, BIAS, OCV/CPU)|5.324|5.343|1.00| |conv::Conv::(GFLOPS=1.859, K=[3 x 3], IN={1, 128, 72, 92}, OCN=128, BIAS, OCV/CPU)|5.114|5.238|0.98| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, G=1024, P=[1 x 1], BIAS, OCV/CPU)|0.111|0.121|0.92| |conv::Conv::(GFLOPS=1.888, K=[3 x 3], IN={1, 1024, 10, 10}, OCN=1024, PM=SAME, OCV/CPU)|25.907|26.804|0.97| |conv::Conv::(GFLOPS=1.954, K=[3 x 3], IN={1, 128, 74, 94}, OCN=128, BIAS, OCV/CPU)|5.695|5.654|1.01| |conv::Conv::(GFLOPS=1.995, K=[9 x 9], IN={1, 3, 320, 400}, OCN=32, P=[4 x 4], BIAS, OCV/CPU)|27.435|27.566|1.00| |conv::Conv::(GFLOPS=2.052, K=[3 x 3], IN={1, 128, 76, 96}, OCN=128, BIAS, OCV/CPU)|6.944|6.164|1.13| |conv::Conv::(GFLOPS=2.100, K=[3 x 3], IN={1, 144, 75, 75}, OCN=144, PM=SAME, OCV/CPU)|7.180|6.717|1.07| |conv::Conv::(GFLOPS=2.153, K=[3 x 3], IN={1, 128, 78, 98}, OCN=128, BIAS, OCV/CPU)|6.817|6.050|1.13| |conv::Conv::(GFLOPS=2.156, K=[3 x 3], IN={1, 576, 19, 19}, OCN=576, PM=SAME, OCV/CPU)|9.225|8.660|1.07| |conv::Conv::(GFLOPS=2.255, K=[3 x 3], IN={1, 128, 80, 100}, OCN=128, BIAS, OCV/CPU)|7.496|6.625|1.13| |conv::Conv::(GFLOPS=2.719, K=[3 x 3], IN={1, 96, 256, 256}, OCN=96, S=[2 x 2], PM=SAME, OCV/CPU)|35.520|36.056|0.99| |conv::Conv::(GFLOPS=3.319, K=[3 x 3], IN={1, 128, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|9.990|9.702|1.03| |conv::Conv::(GFLOPS=3.321, K=[3 x 3], IN={1, 64, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|10.517|10.746|0.98| |conv::Conv::(GFLOPS=3.398, K=[7 x 7], IN={1, 128, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|36.702|36.731|1.00| |conv::Conv::(GFLOPS=3.407, K=[3 x 3], IN={1, 512, 19, 19}, OCN=1024, D=[6 x 6], P=[6 x 6], BIAS, OCV/CPU)|41.035|38.280|1.07| |conv::Conv::(GFLOPS=3.408, K=[3 x 3], IN={1, 256, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|10.981|10.573|1.04| |conv::Conv::(GFLOPS=4.247, K=[3 x 3], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|12.863|12.384|1.04| |conv::Conv::(GFLOPS=4.247, K=[5 x 5], IN={1, 144, 128, 128}, OCN=144, S=[2 x 2], PM=SAME, OCV/CPU)|50.437|54.088|0.93| |conv::Conv::(GFLOPS=4.566, K=[7 x 7], IN={1, 172, 46, 46}, OCN=128, P=[3 x 3], BIAS, OCV/CPU)|50.650|50.635|1.00| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 256, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|14.696|14.606|1.01| |conv::Conv::(GFLOPS=4.993, K=[3 x 3], IN={1, 512, 46, 46}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|16.201|15.426|1.05| |conv::Conv::(GFLOPS=4.994, K=[3 x 3], IN={1, 128, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|16.061|14.292|1.12| |conv::Conv::(GFLOPS=4.997, K=[3 x 3], IN={1, 64, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|17.743|18.250|0.97| |conv::Conv::(GFLOPS=5.780, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, S=[2 x 2], PM=SAME, OCV/CPU)|77.909|78.165|1.00| |conv::Conv::(GFLOPS=6.116, K=[3 x 3], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|21.579|21.879|0.99| |conv::Conv::(GFLOPS=6.118, K=[3 x 3], IN={1, 144, 128, 128}, OCN=144, PM=SAME, OCV/CPU)|20.424|19.589|1.04| |conv::Conv::(GFLOPS=6.637, K=[3 x 3], IN={1, 256, 75, 75}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|19.389|19.461|1.00| |conv::Conv::(GFLOPS=6.638, K=[3 x 3], IN={1, 128, 150, 150}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|21.319|20.358|1.05| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 150, 200}, OCN=192, PM=SAME, BIAS, OCV/CPU)|22.609|21.826|1.04| |conv::Conv::(GFLOPS=6.641, K=[3 x 3], IN={1, 64, 300, 300}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|25.497|25.789|0.99| |conv::Conv::(GFLOPS=6.814, K=[3 x 3], IN={1, 512, 38, 38}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|21.966|22.108|0.99| |conv::Conv::(GFLOPS=8.025, K=[3 x 3], IN={1, 1024, 19, 19}, OCN=1206, P=[1 x 1], BIAS, OCV/CPU)|35.883|33.470|1.07| |conv::Conv::(GFLOPS=9.986, K=[3 x 3], IN={1, 512, 46, 46}, OCN=512, P=[1 x 1], BIAS, OCV/CPU)|31.041|29.314|1.06| |conv::Conv::(GFLOPS=9.987, K=[3 x 3], IN={1, 256, 92, 92}, OCN=256, P=[1 x 1], BIAS, OCV/CPU)|29.922|28.145|1.06| |conv::Conv::(GFLOPS=9.989, K=[3 x 3], IN={1, 128, 184, 184}, OCN=128, P=[1 x 1], BIAS, OCV/CPU)|31.624|31.148|1.02| |conv::Conv::(GFLOPS=9.993, K=[3 x 3], IN={1, 64, 368, 368}, OCN=64, P=[1 x 1], BIAS, OCV/CPU)|38.564|39.164|0.98| |conv::Conv::(GFLOPS=10.087, K=[3 x 3], IN={1, 576, 38, 50}, OCN=512, PM=SAME, BIAS, OCV/CPU)|31.502|30.269|1.04| |conv::Conv::(GFLOPS=10.701, K=[3 x 3], IN={1, 512, 38, 38}, OCN=804, P=[1 x 1], BIAS, OCV/CPU)|34.248|34.589|0.99| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 240, 64, 64}, OCN=240, PM=SAME, OCV/CPU)|130.211|134.120|0.97| |conv::Conv::(GFLOPS=11.797, K=[5 x 5], IN={1, 480, 32, 32}, OCN=480, PM=SAME, OCV/CPU)|127.490|132.874|0.96| |conv::Conv::(GFLOPS=16.987, K=[5 x 5], IN={1, 1152, 16, 16}, OCN=1152, PM=SAME, OCV/CPU)|199.834|200.081|1.00| |conv::Conv::(GFLOPS=23.122, K=[5 x 5], IN={1, 672, 32, 32}, OCN=672, PM=SAME, OCV/CPU)|247.346|247.523|1.00| ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake ``` force_builders=Linux AVX2,Custom Win build_image:Custom Win=msvs2019 CPU_BASELINE:Custom Win=AVX512_SKX ``` --- modules/dnn/CMakeLists.txt | 3 + modules/dnn/src/layers/convolution_layer.cpp | 2 +- .../layers/cpu_kernels/conv_block.simd.hpp | 259 ++++ .../conv_depthwise.cpp} | 258 ++-- .../cpu_kernels/conv_depthwise.simd.hpp | 591 +++++++++ .../layers/cpu_kernels/conv_winograd_f63.cpp | 764 +++++++++++ .../cpu_kernels/conv_winograd_f63.simd.hpp | 886 +++++++++++++ .../convolution.cpp} | 560 +++++++- .../convolution.hpp} | 40 +- .../fast_convolution.avx2.cpp | 499 ------- .../fast_convolution.simd.hpp | 567 -------- .../fast_convolution/winograd_3x3s1_f63.cpp | 1153 ----------------- modules/dnn/src/layers/layers_common.simd.hpp | 561 -------- 13 files changed, 3167 insertions(+), 2976 deletions(-) create mode 100644 modules/dnn/src/layers/cpu_kernels/conv_block.simd.hpp rename modules/dnn/src/layers/{fast_convolution/depthwise_convolution.cpp => cpu_kernels/conv_depthwise.cpp} (91%) create mode 100644 modules/dnn/src/layers/cpu_kernels/conv_depthwise.simd.hpp create mode 100644 modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp create mode 100644 modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.simd.hpp rename modules/dnn/src/layers/{fast_convolution/fast_convolution.cpp => cpu_kernels/convolution.cpp} (73%) rename modules/dnn/src/layers/{fast_convolution/fast_convolution.hpp => cpu_kernels/convolution.hpp} (69%) delete mode 100644 modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp delete mode 100644 modules/dnn/src/layers/fast_convolution/fast_convolution.simd.hpp delete mode 100644 modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index d285e544c0..88f4347bb6 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -10,6 +10,9 @@ set(the_description "Deep neural network module. It allows to load models from d ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX RVV LASX) ocv_add_dispatched_file_force_all("int8layers/layers_common" AVX2 AVX512_SKX LASX) +ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_block" AVX AVX2) +ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_depthwise" AVX AVX2 RVV LASX) +ocv_add_dispatched_file_force_all("layers/cpu_kernels/conv_winograd_f63" AVX AVX2) ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 5567a58a2a..3e62887bd7 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -72,7 +72,7 @@ using namespace cv::dnn::ocl4dnn; using namespace cv::dnn::cuda4dnn; #endif -#include "fast_convolution/fast_convolution.hpp" +#include "cpu_kernels/convolution.hpp" namespace cv { diff --git a/modules/dnn/src/layers/cpu_kernels/conv_block.simd.hpp b/modules/dnn/src/layers/cpu_kernels/conv_block.simd.hpp new file mode 100644 index 0000000000..71b17dcc9b --- /dev/null +++ b/modules/dnn/src/layers/cpu_kernels/conv_block.simd.hpp @@ -0,0 +1,259 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/core/hal/intrin.hpp" + +namespace cv { +namespace dnn { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +void convBlock(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int convMR, const int convNR); + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX + +#if !CV_FMA3 // AVX workaround +#undef _mm256_fmadd_ps +#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b)) +#endif + +void convBlock(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int convMR, const int convNR) +{ + CV_Assert(convMR == 4 && convNR == 24); + __m256 c00 = _mm256_set1_ps(0.f), c01 = c00, c02 = c00; + __m256 c10 = c00, c11 = c00, c12 = c00; + __m256 c20 = c00, c21 = c00, c22 = c00; + __m256 c30 = c00, c31 = c00, c32 = c00; + + __m256 a0 = _mm256_setzero_ps(), a1 = _mm256_setzero_ps(); + __m256 b0 = _mm256_setzero_ps(), b1 = _mm256_setzero_ps(), b2 = _mm256_setzero_ps(); + + for (int p = 0; p < np; p++, a += convMR, b += convNR) + { + a0 = _mm256_set1_ps(a[0]), a1 = _mm256_set1_ps(a[1]); + b0 = _mm256_load_ps(b), b1 = _mm256_load_ps(b + 8), b2 = _mm256_load_ps(b + 16); + + c00 = _mm256_fmadd_ps(b0, a0, c00); + c01 = _mm256_fmadd_ps(b1, a0, c01); + c02 = _mm256_fmadd_ps(b2, a0, c02); + + c10 = _mm256_fmadd_ps(b0, a1, c10); + c11 = _mm256_fmadd_ps(b1, a1, c11); + c12 = _mm256_fmadd_ps(b2, a1, c12); + + a0 = _mm256_set1_ps(a[2]), a1 = _mm256_set1_ps(a[3]); + + c20 = _mm256_fmadd_ps(b0, a0, c20); + c21 = _mm256_fmadd_ps(b1, a0, c21); + c22 = _mm256_fmadd_ps(b2, a0, c22); + + c30 = _mm256_fmadd_ps(b0, a1, c30); + c31 = _mm256_fmadd_ps(b1, a1, c31); + c32 = _mm256_fmadd_ps(b2, a1, c32); + } + + if (!init_c) + { + c00 = _mm256_add_ps(c00, _mm256_load_ps(c)); + c01 = _mm256_add_ps(c01, _mm256_load_ps(c + 8)); + c02 = _mm256_add_ps(c02, _mm256_load_ps(c + 16)); + + c10 = _mm256_add_ps(c10, _mm256_load_ps(c + ldc)); + c11 = _mm256_add_ps(c11, _mm256_load_ps(c + ldc + 8)); + c12 = _mm256_add_ps(c12, _mm256_load_ps(c + ldc + 16)); + + c20 = _mm256_add_ps(c20, _mm256_load_ps(c + ldc*2)); + c21 = _mm256_add_ps(c21, _mm256_load_ps(c + ldc*2 + 8)); + c22 = _mm256_add_ps(c22, _mm256_load_ps(c + ldc*2 + 16)); + + c30 = _mm256_add_ps(c30, _mm256_load_ps(c + ldc*3)); + c31 = _mm256_add_ps(c31, _mm256_load_ps(c + ldc*3 + 8)); + c32 = _mm256_add_ps(c32, _mm256_load_ps(c + ldc*3 + 16)); + } + + _mm256_storeu_ps(c, c00), _mm256_storeu_ps(c+8, c01), _mm256_storeu_ps(c+16, c02); + _mm256_storeu_ps(c + ldc, c10), _mm256_storeu_ps(c + ldc + 8, c11), _mm256_storeu_ps(c + ldc + 16, c12); + _mm256_storeu_ps(c + ldc*2, c20), _mm256_storeu_ps(c + ldc*2 + 8, c21), _mm256_storeu_ps(c + ldc*2 + 16, c22); + _mm256_storeu_ps(c + ldc*3, c30), _mm256_storeu_ps(c + ldc*3 + 8, c31), _mm256_storeu_ps(c + ldc*3 + 16, c32); + _mm256_zeroupper(); +} + +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +CV_CPU_OPTIMIZATION_NAMESPACE_END + +// NEON code work around. +namespace opt_NEON +{ +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_NEON + +void convBlock(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int convMR, const int convNR) +{ +#if CV_NEON_AARCH64 + if (convMR == 4 && convNR == 28) // AARCH64 + { + float32x4_t c00 = vdupq_n_f32(0.f), c01 = c00, c02 = c00, c03 = c00, c04 = c00, c05 = c00, c06 = c00; + float32x4_t c10 = vdupq_n_f32(0.f), c11 = c10, c12 = c10, c13 = c10, c14 = c10, c15 = c10, c16 = c10; + float32x4_t c20 = vdupq_n_f32(0.f), c21 = c20, c22 = c20, c23 = c20, c24 = c20, c25 = c20, c26 = c20; + float32x4_t c30 = vdupq_n_f32(0.f), c31 = c30, c32 = c30, c33 = c30, c34 = c30, c35 = c30, c36 = c30; + + for( int p = 0; p < np; p++, a += convMR, b += convNR ) + { + float32x4_t a0 = vld1q_f32(a), b0, b1, b2; + b0 = vld1q_f32(b); b1 = vld1q_f32(b + 4); b2 = vld1q_f32(b + 8); + + c00 = vfmaq_laneq_f32(c00, b0, a0, 0); + c01 = vfmaq_laneq_f32(c01, b1, a0, 0); + c02 = vfmaq_laneq_f32(c02, b2, a0, 0); + c10 = vfmaq_laneq_f32(c10, b0, a0, 1); + c11 = vfmaq_laneq_f32(c11, b1, a0, 1); + c12 = vfmaq_laneq_f32(c12, b2, a0, 1); + c20 = vfmaq_laneq_f32(c20, b0, a0, 2); + c21 = vfmaq_laneq_f32(c21, b1, a0, 2); + c22 = vfmaq_laneq_f32(c22, b2, a0, 2); + c30 = vfmaq_laneq_f32(c30, b0, a0, 3); + c31 = vfmaq_laneq_f32(c31, b1, a0, 3); + c32 = vfmaq_laneq_f32(c32, b2, a0, 3); + + b0 = vld1q_f32(b + 12); b1 = vld1q_f32(b + 16); b2 = vld1q_f32(b + 20); + + c03 = vfmaq_laneq_f32(c03, b0, a0, 0); + c04 = vfmaq_laneq_f32(c04, b1, a0, 0); + c05 = vfmaq_laneq_f32(c05, b2, a0, 0); + c13 = vfmaq_laneq_f32(c13, b0, a0, 1); + c14 = vfmaq_laneq_f32(c14, b1, a0, 1); + c15 = vfmaq_laneq_f32(c15, b2, a0, 1); + c23 = vfmaq_laneq_f32(c23, b0, a0, 2); + c24 = vfmaq_laneq_f32(c24, b1, a0, 2); + c25 = vfmaq_laneq_f32(c25, b2, a0, 2); + c33 = vfmaq_laneq_f32(c33, b0, a0, 3); + c34 = vfmaq_laneq_f32(c34, b1, a0, 3); + c35 = vfmaq_laneq_f32(c35, b2, a0, 3); + + b0 = vld1q_f32(b + 24); + c06 = vfmaq_laneq_f32(c06, b0, a0, 0); + c16 = vfmaq_laneq_f32(c16, b0, a0, 1); + c26 = vfmaq_laneq_f32(c26, b0, a0, 2); + c36 = vfmaq_laneq_f32(c36, b0, a0, 3); + } + + if (!init_c) + { + c00 = vaddq_f32(c00, vld1q_f32(c)); + c01 = vaddq_f32(c01, vld1q_f32(c + 4)); + c02 = vaddq_f32(c02, vld1q_f32(c + 8)); + c03 = vaddq_f32(c03, vld1q_f32(c + 12)); + c04 = vaddq_f32(c04, vld1q_f32(c + 16)); + c05 = vaddq_f32(c05, vld1q_f32(c + 20)); + c06 = vaddq_f32(c06, vld1q_f32(c + 24)); + + c10 = vaddq_f32(c10, vld1q_f32(c + ldc)); + c11 = vaddq_f32(c11, vld1q_f32(c + ldc + 4)); + c12 = vaddq_f32(c12, vld1q_f32(c + ldc + 8)); + c13 = vaddq_f32(c13, vld1q_f32(c + ldc + 12)); + c14 = vaddq_f32(c14, vld1q_f32(c + ldc + 16)); + c15 = vaddq_f32(c15, vld1q_f32(c + ldc + 20)); + c16 = vaddq_f32(c16, vld1q_f32(c + ldc + 24)); + + c20 = vaddq_f32(c20, vld1q_f32(c + ldc*2)); + c21 = vaddq_f32(c21, vld1q_f32(c + ldc*2 + 4)); + c22 = vaddq_f32(c22, vld1q_f32(c + ldc*2 + 8)); + c23 = vaddq_f32(c23, vld1q_f32(c + ldc*2 + 12)); + c24 = vaddq_f32(c24, vld1q_f32(c + ldc*2 + 16)); + c25 = vaddq_f32(c25, vld1q_f32(c + ldc*2 + 20)); + c26 = vaddq_f32(c26, vld1q_f32(c + ldc*2 + 24)); + + c30 = vaddq_f32(c30, vld1q_f32(c + ldc*3)); + c31 = vaddq_f32(c31, vld1q_f32(c + ldc*3 + 4)); + c32 = vaddq_f32(c32, vld1q_f32(c + ldc*3 + 8)); + c33 = vaddq_f32(c33, vld1q_f32(c + ldc*3 + 12)); + c34 = vaddq_f32(c34, vld1q_f32(c + ldc*3 + 16)); + c35 = vaddq_f32(c35, vld1q_f32(c + ldc*3 + 20)); + c36 = vaddq_f32(c36, vld1q_f32(c + ldc*3 + 24)); + } + + vst1q_f32(c, c00); vst1q_f32(c+4, c01); + vst1q_f32(c+8, c02); vst1q_f32(c+12, c03); + vst1q_f32(c+16, c04); vst1q_f32(c+20, c05); + vst1q_f32(c+24, c06); + + vst1q_f32(c+ldc, c10); vst1q_f32(c+ldc+4, c11); + vst1q_f32(c+ldc+8, c12); vst1q_f32(c+ldc+12, c13); + vst1q_f32(c+ldc+16, c14); vst1q_f32(c+ldc+20, c15); + vst1q_f32(c+ldc+24, c16); + + vst1q_f32(c+ldc*2, c20); vst1q_f32(c+ldc*2+4, c21); + vst1q_f32(c+ldc*2+8, c22); vst1q_f32(c+ldc*2+12, c23); + vst1q_f32(c+ldc*2+16, c24); vst1q_f32(c+ldc*2+20, c25); + vst1q_f32(c+ldc*2+24, c26); + + vst1q_f32(c+ldc*3, c30); vst1q_f32(c+ldc*3+4, c31); + vst1q_f32(c+ldc*3+8, c32); vst1q_f32(c+ldc*3+12, c33); + vst1q_f32(c+ldc*3+16, c34); vst1q_f32(c+ldc*3+20, c35); + vst1q_f32(c+ldc*3+24, c36); + } + else +#endif + if (convMR == 4 && convNR == 12) // ARMv7 + { + float32x4_t c0 = vdupq_n_f32(0.f), c1 = c0, c2 = c0; + float32x4_t c3 = vdupq_n_f32(0.f), c4 = c3, c5 = c3; + float32x4_t c6 = vdupq_n_f32(0.f), c7 = c6, c8 = c6; + float32x4_t c9 = vdupq_n_f32(0.f), c10 = c9, c11 = c9; + + float32x2_t a0 = vdup_n_f32(0.0f), a1 = a0; + float32x4_t b0 = vdupq_n_f32(0.0f), b1 = vdupq_n_f32(0.0f), b2 = vdupq_n_f32(0.0f); + + for (int p = 0; p < np; p++, a += convMR, b += convNR) + { + a0 = vld1_f32(a), a1 = vld1_f32(a+2); + b0 = vld1q_f32(b), b1 = vld1q_f32(b + 4), b2 = vld1q_f32(b + 8); + + c0 = vmlaq_lane_f32(c0, b0, a0, 0); + c1 = vmlaq_lane_f32(c1, b1, a0, 0); + c2 = vmlaq_lane_f32(c2, b2, a0, 0); + + c3 = vmlaq_lane_f32(c3, b0, a0, 1); + c4 = vmlaq_lane_f32(c4, b1, a0, 1); + c5 = vmlaq_lane_f32(c5, b2, a0, 1); + + c6 = vmlaq_lane_f32(c6, b0, a1, 0); + c7 = vmlaq_lane_f32(c7, b1, a1, 0); + c8 = vmlaq_lane_f32(c8, b2, a1, 0); + + c9 = vmlaq_lane_f32(c9 , b0, a1, 1); + c10 = vmlaq_lane_f32(c10, b1, a1, 1); + c11 = vmlaq_lane_f32(c11, b2, a1, 1); + } + + if (!init_c) + { + c0 = vaddq_f32(c0, vld1q_f32(c)); + c1 = vaddq_f32(c1, vld1q_f32(c + 4)); + c2 = vaddq_f32(c2, vld1q_f32(c + 8)); + + c3 = vaddq_f32(c3, vld1q_f32(c + ldc)); + c4 = vaddq_f32(c4, vld1q_f32(c + ldc + 4)); + c5 = vaddq_f32(c5, vld1q_f32(c + ldc + 8)); + + c6 = vaddq_f32(c6, vld1q_f32(c + ldc * 2)); + c7 = vaddq_f32(c7, vld1q_f32(c + ldc * 2 + 4)); + c8 = vaddq_f32(c8, vld1q_f32(c + ldc * 2 + 8)); + + c9 = vaddq_f32(c9 , vld1q_f32(c + ldc * 3)); + c10 = vaddq_f32(c10, vld1q_f32(c + ldc * 3 + 4)); + c11 = vaddq_f32(c11, vld1q_f32(c + ldc * 3 + 8)); + } + + vst1q_f32(c, c0), vst1q_f32(c+4, c1), vst1q_f32(c+8, c2); + vst1q_f32(c + ldc, c3), vst1q_f32(c + ldc + 4, c4), vst1q_f32(c + ldc + 8, c5); + vst1q_f32(c + ldc*2, c6), vst1q_f32(c + ldc*2 + 4, c7), vst1q_f32(c + ldc*2 + 8, c8); + vst1q_f32(c + ldc*3, c9), vst1q_f32(c + ldc*3 + 4, c10), vst1q_f32(c + ldc*3 + 8, c11); + } + else + CV_Error(Error::StsNotImplemented, "Unsupported convMR and/or convNR in opt_NEON::convBlock"); +} + +#endif +} +}} // namespace cv::dnn diff --git a/modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp b/modules/dnn/src/layers/cpu_kernels/conv_depthwise.cpp similarity index 91% rename from modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp rename to modules/dnn/src/layers/cpu_kernels/conv_depthwise.cpp index b690156941..3e969336ad 100644 --- a/modules/dnn/src/layers/fast_convolution/depthwise_convolution.cpp +++ b/modules/dnn/src/layers/cpu_kernels/conv_depthwise.cpp @@ -2,20 +2,147 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. -// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpConv.fx). -// Here is the original license: -/* - This file is a part of ficus language project. - See ficus/LICENSE for the licensing terms -*/ - #include "../../precomp.hpp" -#include "fast_convolution.hpp" -#include "../layers_common.hpp" +#include "convolution.hpp" + +#include "conv_depthwise.simd.hpp" +#include "layers/cpu_kernels/conv_depthwise.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content namespace cv { namespace dnn { -static void depthWiseBlockConv2D(const float* wptr, +void depthWiseBlockConv2D(const float* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const float* biasptr, const float* relu, + const float* inptr_, + int height, int width, + float* outptr_, + int out_d, int outH, int outW, bool fusedAdd); + +void depthWiseBlockConv1D(const float* wptr, + int kernel_w, int stride_w, int dilation_w, int pad_l, + const float* biasptr, const float* relu, + const float* inptr_, int width, + float* outptr_, + int out_d, int outW, bool fusedAdd); + +void runDepthwise(InputArray _input, OutputArray _output, const Ptr& conv, ActivationLayer* activ_, + const std::vector& reluslope, bool fusedAdd) +{ + Mat input = _input.getMat(); + Mat output = _output.getMat(); + MatShape inputShape = shape(input); + MatShape outputShape = shape(output); + + CV_Assert(inputShape.size() == 3 || inputShape.size() == 4); + CV_Assert(inputShape.size() == outputShape.size()); + + int conv_dim = conv->conv_dim; + CV_Assert((conv_dim == CONV_2D || conv_dim == CONV_1D) && + "DNN: Currently we do not support depth-wise for Convolution 3D!"); + + ActivationLayer* activ = reluslope.empty() ? activ_ : nullptr; + int N = inputShape[0], C = inputShape[1]; + + int Hi = conv_dim == CONV_1D ? 1 : inputShape[inputShape.size() - 2]; + int Wi = inputShape[inputShape.size() - 1]; + + int K = conv->K, Hk = conv->Hk, Wk = conv->Wk; + + int H0 = conv_dim == CONV_1D ? 1 : outputShape[outputShape.size() - 2]; + int W0 = outputShape[outputShape.size() - 1]; + int ngroups = conv->ngroups; + + const size_t inp_planesize = (size_t) Hi * Wi; + const size_t out_planesize = (size_t) H0 * W0; + + CV_Assert(ngroups > 1 && ngroups == K && ngroups == C); + + int stride_h = conv->stride_h, stride_w = conv->stride_w; + int dilation_h = conv->dilation_h, dilation_w = conv->dilation_w; + + int pad_top = conv->pad_top, pad_bottom = conv->pad_bottom; + int pad_left = conv->pad_left, pad_right = conv->pad_right; + + int ksize = Hk * Wk; + + const int VEC_NLANES = 32; + int padded_ksize = ((ksize + VEC_NLANES-1) / VEC_NLANES) * VEC_NLANES; + + const float *inp = input.ptr(); + float *out = output.ptr(); + +#if CV_TRY_AVX2 || CV_TRY_AVX || CV_TRY_RVV + // TODO: remove the following limitation, need change code in conv_depthwise.simd.hpp. + bool canRunOpt = Wi >= 16 + dilation_w*(Wk - 1) && !fusedAdd; +#endif + std::vector ofstab_(3 * ksize, 0); + int *ofstab = ofstab_.data(); + int *yxtab = ofstab + ksize; + + for (int k = 0; k < ksize; k++) + { + int y = k < ksize ? k / Wk : 0; + int x = k < ksize ? k % Wk : 0; + int dy = y * dilation_h, dx = x * dilation_w; + yxtab[k * 2] = dy; + yxtab[k * 2 + 1] = dx; + ofstab[k] = dy * Wi + dx; + } + + const float *weights0 = conv->weightsBufPtr, *bias = conv->biasBuf.data(); + const float* relu = reluslope.data(); + CV_Assert(ksize > 1 || (pad_left == 0 && pad_right == 0 && pad_top == 0 && pad_bottom == 0)); + + parallel_for_(Range(0, N * C), [&](const Range &r0) { + for (int nc = r0.start; nc < r0.end; nc++) + { + int c = nc % C; + const float *inptr0 = inp + inp_planesize * nc; + float *outptr0 = out + out_planesize * nc; + + const float *weights = weights0 + c * padded_ksize; + + if (conv_dim == CONV_2D) + { +#if CV_TRY_AVX2 + if(canRunOpt && conv->useAVX2) + opt_AVX2::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, + pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0); + else +#endif +#if CV_TRY_AVX + if(canRunOpt && conv->useAVX) + opt_AVX::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, + pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0); + else +#endif +#if CV_TRY_RVV + if(canRunOpt && conv->useRVV) + opt_RVV::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, + pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0); + else +#endif + depthWiseBlockConv2D(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, + pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0, fusedAdd); + } + else // conv_dim == CONV_1D, spatial branch for depth-wise Conv1D. + { + depthWiseBlockConv1D(weights, Wk, stride_w, dilation_w, pad_left, bias, relu, inptr0, Wi, outptr0, c, W0, fusedAdd); + } + + if (activ) + activ->forwardSlice(outptr0, outptr0, (int) out_planesize, out_planesize, c, c+1); + }}); +} + +/****************************************************************************************\ + SIMD and no-SIMD code for depthWiseBlockConv +\****************************************************************************************/ + +void depthWiseBlockConv2D(const float* wptr, int kernel_h, int kernel_w, int stride_h, int stride_w, int dilation_h, int dilation_w, @@ -199,7 +326,7 @@ static void depthWiseBlockConv2D(const float* wptr, } } -static void depthWiseBlockConv1D(const float* wptr, +void depthWiseBlockConv1D(const float* wptr, int kernel_w, int stride_w, int dilation_w, int pad_l, const float* biasptr, const float* relu, const float* inptr_, int width, @@ -332,114 +459,5 @@ static void depthWiseBlockConv1D(const float* wptr, } } -void runDepthwise(InputArray _input, OutputArray _output, const Ptr& conv, ActivationLayer* activ_, - const std::vector& reluslope, bool fusedAdd) -{ - Mat input = _input.getMat(); - Mat output = _output.getMat(); - MatShape inputShape = shape(input); - MatShape outputShape = shape(output); - - CV_Assert(inputShape.size() == 3 || inputShape.size() == 4); - CV_Assert(inputShape.size() == outputShape.size()); - - int conv_dim = conv->conv_dim; - CV_Assert((conv_dim == CONV_2D || conv_dim == CONV_1D) && - "DNN: Currently we do not support depth-wise for Convolution 3D!"); - - ActivationLayer* activ = reluslope.empty() ? activ_ : nullptr; - int N = inputShape[0], C = inputShape[1]; - - int Hi = conv_dim == CONV_1D ? 1 : inputShape[inputShape.size() - 2]; - int Wi = inputShape[inputShape.size() - 1]; - - int K = conv->K, Hk = conv->Hk, Wk = conv->Wk; - - int H0 = conv_dim == CONV_1D ? 1 : outputShape[outputShape.size() - 2]; - int W0 = outputShape[outputShape.size() - 1]; - int ngroups = conv->ngroups; - - const size_t inp_planesize = (size_t) Hi * Wi; - const size_t out_planesize = (size_t) H0 * W0; - - CV_Assert(ngroups > 1 && ngroups == K && ngroups == C); - - int stride_h = conv->stride_h, stride_w = conv->stride_w; - int dilation_h = conv->dilation_h, dilation_w = conv->dilation_w; - - int pad_top = conv->pad_top, pad_bottom = conv->pad_bottom; - int pad_left = conv->pad_left, pad_right = conv->pad_right; - - int ksize = Hk * Wk; - - const int VEC_NLANES = 32; - int padded_ksize = ((ksize + VEC_NLANES-1) / VEC_NLANES) * VEC_NLANES; - - const float *inp = input.ptr(); - float *out = output.ptr(); - -#if CV_TRY_AVX2 || CV_TRY_AVX || CV_TRY_RVV - // TODO: remove the following limitation, need change code in layers_common.simd.hpp. - bool canRunOpt = Wi >= 16 + dilation_w*(Wk - 1) && !fusedAdd; -#endif - std::vector ofstab_(3 * ksize, 0); - int *ofstab = ofstab_.data(); - int *yxtab = ofstab + ksize; - - for (int k = 0; k < ksize; k++) - { - int y = k < ksize ? k / Wk : 0; - int x = k < ksize ? k % Wk : 0; - int dy = y * dilation_h, dx = x * dilation_w; - yxtab[k * 2] = dy; - yxtab[k * 2 + 1] = dx; - ofstab[k] = dy * Wi + dx; - } - - const float *weights0 = conv->weightsBufPtr, *bias = conv->biasBuf.data(); - const float* relu = reluslope.data(); - CV_Assert(ksize > 1 || (pad_left == 0 && pad_right == 0 && pad_top == 0 && pad_bottom == 0)); - - parallel_for_(Range(0, N * C), [&](const Range &r0) { - for (int nc = r0.start; nc < r0.end; nc++) - { - int c = nc % C; - const float *inptr0 = inp + inp_planesize * nc; - float *outptr0 = out + out_planesize * nc; - - const float *weights = weights0 + c * padded_ksize; - - if (conv_dim == CONV_2D) - { -#if CV_TRY_AVX2 - if(canRunOpt && conv->useAVX2) - opt_AVX2::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, - pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0); - else -#endif -#if CV_TRY_AVX - if(canRunOpt && conv->useAVX) - opt_AVX::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, - pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0); - else -#endif -#if CV_TRY_RVV - if(canRunOpt && conv->useRVV) - opt_RVV::fastDepthwiseConv(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, - pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0); - else -#endif - depthWiseBlockConv2D(weights, Hk, Wk, stride_h, stride_w, dilation_h, dilation_w, - pad_top, pad_left, bias, relu, inptr0, Hi, Wi, outptr0, c, H0, W0, fusedAdd); - } - else // conv_dim == CONV_1D, spatial branch for depth-wise Conv1D. - { - depthWiseBlockConv1D(weights, Wk, stride_w, dilation_w, pad_left, bias, relu, inptr0, Wi, outptr0, c, W0, fusedAdd); - } - - if (activ) - activ->forwardSlice(outptr0, outptr0, (int) out_planesize, out_planesize, c, c+1); - }}); -} }} // namespace cv::dnn diff --git a/modules/dnn/src/layers/cpu_kernels/conv_depthwise.simd.hpp b/modules/dnn/src/layers/cpu_kernels/conv_depthwise.simd.hpp new file mode 100644 index 0000000000..1d561e9864 --- /dev/null +++ b/modules/dnn/src/layers/cpu_kernels/conv_depthwise.simd.hpp @@ -0,0 +1,591 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/core/hal/intrin.hpp" + +namespace cv { +namespace dnn { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +void fastDepthwiseConv(const float* weights, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const float* bias, const float* relu, + const float* inptr, + int height, int width, + float* outptr, + int out_d, int outH, int outW); + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX + +#if !CV_FMA3 // AVX workaround +#undef _mm256_fmadd_ps +#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b)) +#endif + +static inline void _mm256_load_deinterleave(const float* ptr, __m256& a, __m256& b) +{ + __m256 t0 = _mm256_loadu_ps(ptr); + __m256 t1 = _mm256_loadu_ps(ptr + 8); + + __m256 lo = _mm256_permute2f128_ps(t0, t1, 0+2*16); + __m256 hi = _mm256_permute2f128_ps(t0, t1, 1+3*16); + a = _mm256_shuffle_ps(lo, hi, 0x88); + b = _mm256_shuffle_ps(lo, hi, 0xdd); +} + +void fastDepthwiseConv( const float* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const float* biasptr, const float* relu, + const float* inptr_, + int height, int width, + float* outptr_, + int out_d, int outH, int outW ) +{ + const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], + w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], + w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; + int outW1 = min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); + float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; + + for (int out_i = 0; out_i < outH; out_i++) + { + int in_i = out_i * stride_h - pad_t, out_j = 0; + const float* imgptr0 = inptr_ + in_i*width; + const float* imgptr1 = imgptr0 + dilation_h*width; + const float* imgptr2 = imgptr0 + (dilation_h*2)*width; + float out, w00 = w00_, w01 = w01_, w02 = w02_; + float w20 = w20_, w21 = w21_, w22 = w22_; + if (in_i < 0) + { + w00 = w01 = w02 = 0.f; + imgptr0 = imgptr1; + } + else if (in_i + dilation_h*(kernel_h-1) >= height) + { + w20 = w21 = w22 = 0.f; + imgptr2 = imgptr1; + } + float* outptr = outptr_ + out_i*outW; + if (pad_l > 0) + { + out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + + imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + + imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[0] = out; + out_j = 1; + } + + if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) + { + const int VECSZ = 8; + __m256 vw00 = _mm256_set1_ps(w00), vw01 = _mm256_set1_ps(w01), vw02 = _mm256_set1_ps(w02), + vw10 = _mm256_set1_ps(w10), vw11 = _mm256_set1_ps(w11), vw12 = _mm256_set1_ps(w12), + vw20 = _mm256_set1_ps(w20), vw21 = _mm256_set1_ps(w21), vw22 = _mm256_set1_ps(w22); + __m256 z = _mm256_setzero_ps(), vbias = _mm256_set1_ps(bias), vrc = _mm256_set1_ps(relu_coeff); + + if( stride_w == 1 ) + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1 && out_j > pad_l) + out_j = outW1 - VECSZ; + int in_j = out_j * stride_w - pad_l; + __m256 v00 = _mm256_loadu_ps(imgptr0 + in_j), + v01 = _mm256_loadu_ps(imgptr0 + in_j + dilation_w), + v02 = _mm256_loadu_ps(imgptr0 + in_j + dilation_w*2), + v10 = _mm256_loadu_ps(imgptr1 + in_j), + v11 = _mm256_loadu_ps(imgptr1 + in_j + dilation_w), + v12 = _mm256_loadu_ps(imgptr1 + in_j + dilation_w*2), + v20 = _mm256_loadu_ps(imgptr2 + in_j), + v21 = _mm256_loadu_ps(imgptr2 + in_j + dilation_w), + v22 = _mm256_loadu_ps(imgptr2 + in_j + dilation_w*2); + + __m256 vout0 = _mm256_fmadd_ps(v00, vw00, vbias); + __m256 vout1 = _mm256_mul_ps(v01, vw01); + __m256 vout2 = _mm256_mul_ps(v02, vw02); + + vout0 = _mm256_fmadd_ps(v10, vw10, vout0); + vout1 = _mm256_fmadd_ps(v11, vw11, vout1); + vout2 = _mm256_fmadd_ps(v12, vw12, vout2); + + vout0 = _mm256_fmadd_ps(v20, vw20, vout0); + vout1 = _mm256_fmadd_ps(v21, vw21, vout1); + vout2 = _mm256_fmadd_ps(v22, vw22, vout2); + + vout0 = _mm256_add_ps(_mm256_add_ps(vout0, vout1), vout2); + if (relu) + { + __m256 m = _mm256_cmp_ps(vout0, z, _CMP_GT_OQ); + vout0 = _mm256_blendv_ps(_mm256_mul_ps(vout0, vrc), vout0, m); + } + _mm256_storeu_ps(outptr + out_j, vout0); + } + else + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1 && out_j > pad_l) + out_j = outW1 - VECSZ; + int in_j = out_j * stride_w - pad_l; + __m256 v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; + _mm256_load_deinterleave(imgptr0 + in_j, v00, v01); + _mm256_load_deinterleave(imgptr0 + in_j + 2, v02, unused); + _mm256_load_deinterleave(imgptr1 + in_j, v10, v11); + _mm256_load_deinterleave(imgptr1 + in_j + 2, v12, unused); + _mm256_load_deinterleave(imgptr2 + in_j, v20, v21); + _mm256_load_deinterleave(imgptr2 + in_j + 2, v22, unused); + + __m256 vout0 = _mm256_fmadd_ps(v00, vw00, vbias); + __m256 vout1 = _mm256_mul_ps(v01, vw01); + __m256 vout2 = _mm256_mul_ps(v02, vw02); + + vout0 = _mm256_fmadd_ps(v10, vw10, vout0); + vout1 = _mm256_fmadd_ps(v11, vw11, vout1); + vout2 = _mm256_fmadd_ps(v12, vw12, vout2); + + vout0 = _mm256_fmadd_ps(v20, vw20, vout0); + vout1 = _mm256_fmadd_ps(v21, vw21, vout1); + vout2 = _mm256_fmadd_ps(v22, vw22, vout2); + + vout0 = _mm256_add_ps(_mm256_add_ps(vout0, vout1), vout2); + if (relu) + { + __m256 m = _mm256_cmp_ps(vout0, z, _CMP_GT_OQ); + vout0 = _mm256_blendv_ps(_mm256_mul_ps(vout0, vrc), vout0, m); + } + _mm256_storeu_ps(outptr + out_j, vout0); + } + } + + for (; out_j < outW1; out_j++) + { + int in_j = out_j * stride_w - pad_l; + out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + + imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + + imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + + for (; out_j < outW; out_j++ ) + { + int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; + float s0 = 1.f, s1 = 1.f, s2 = 1.f; + if (in_j0 >= width) + { + in_j0 = 0; + s0 = 0.f; + } + if (in_j1 >= width) + { + in_j1 = 0; + s1 = 0.f; + } + if (in_j2 >= width) + { + in_j2 = 0; + s2 = 0.f; + } + out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + + imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + + imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + } + _mm256_zeroupper(); +} + +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_RVV + +/* +Example for load_deinterleave: + input: ptr[16] = {1,2,3, ... ,14,15,16} + output: a = {1, 3, 5, 7, 9, 11, 13, 15} + output: b = {2, 4, 6, 8,10, 12, 14, 16} +*/ +static inline void vfloat32m2_load_deinterleave(const float* ptr, vfloat32m2_t& a, vfloat32m2_t& b, int vl) +{ + vuint64m4_t mask = vmv_v_x_u64m4(1,vl*2); + vuint32m4_t mask_re = vreinterpret_v_u64m4_u32m4(mask); + vbool8_t mask0 = vmseq_vx_u32m4_b8 (mask_re, 1, vl*2); + vbool8_t mask1 = vmseq_vx_u32m4_b8 (mask_re, 0, vl*2); + vfloat32m4_t tempa = vundefined_f32m4(), tempb = vundefined_f32m4(); + vfloat32m4_t vw = vle32_v_f32m4(ptr, vl*2); + tempa = vcompress_vm_f32m4(mask0, tempa, vw, vl*2); + tempb = vcompress_vm_f32m4(mask1, tempb, vw, vl*2); + /* The following instructions have not to be supported by the GNU toolchain. + So we temporarily use store and load instead. + // a = vlmul_trunc_v_f32m4_f32m2(tempa); + // b = vlmul_trunc_v_f32m4_f32m2(tempb); + */ + cv::AutoBuffer cvBuffer(sizeof(float)*vl*2); + float* buffer = (float*)cvBuffer.data(); + vse32_v_f32m4(buffer, tempa, vl); + a = vle32_v_f32m2(buffer, vl); + vse32_v_f32m4(buffer, tempb, vl); + b = vle32_v_f32m2(buffer, vl); +} + +void fastDepthwiseConv( const float* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const float* biasptr, const float* relu, + const float* inptr_, + int height, int width, + float* outptr_, + int out_d, int outH, int outW ) +{ + int vl; + const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], + w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], + w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; + int outW1 = std::min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); + float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; + + for (int out_i = 0; out_i < outH; out_i++) + { + int in_i = out_i * stride_h - pad_t, out_j = 0; + const float* imgptr0 = inptr_ + in_i*width; + const float* imgptr1 = imgptr0 + dilation_h*width; + const float* imgptr2 = imgptr0 + (dilation_h*2)*width; + float out, w00 = w00_, w01 = w01_, w02 = w02_; + float w20 = w20_, w21 = w21_, w22 = w22_; + if (in_i < 0) + { + w00 = w01 = w02 = 0.f; + imgptr0 = imgptr1; + } + else if (in_i + dilation_h*(kernel_h-1) >= height) + { + w20 = w21 = w22 = 0.f; + imgptr2 = imgptr1; + } + float* outptr = outptr_ + out_i*outW; + if (pad_l > 0) + { + out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + + imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + + imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[0] = out; + out_j = 1; + } + + if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) + { + int avl = outW1 - out_j; + if( stride_w == 1 ) + for( ; out_j < outW1; out_j += vl, avl -= vl) + { + vl = vsetvl_e32m2(avl); + int in_j = out_j * stride_w - pad_l; + vfloat32m2_t v00 = vle32_v_f32m2(imgptr0 + in_j, vl), + v01 = vle32_v_f32m2(imgptr0 + in_j + dilation_w, vl), + v02 = vle32_v_f32m2(imgptr0 + in_j + dilation_w*2, vl), + v10 = vle32_v_f32m2(imgptr1 + in_j, vl), + v11 = vle32_v_f32m2(imgptr1 + in_j + dilation_w, vl), + v12 = vle32_v_f32m2(imgptr1 + in_j + dilation_w*2, vl), + v20 = vle32_v_f32m2(imgptr2 + in_j, vl), + v21 = vle32_v_f32m2(imgptr2 + in_j + dilation_w, vl), + v22 = vle32_v_f32m2(imgptr2 + in_j + dilation_w*2, vl); + + vfloat32m2_t vout0 = vfmul_vf_f32m2(v00, w00, vl); + vfloat32m2_t vout1 = vfmul_vf_f32m2(v01, w01, vl); + vfloat32m2_t vout2 = vfmul_vf_f32m2(v02, w02, vl); + vout0 = vfadd_vf_f32m2(vout0, bias, vl); + + vout0 = vfmacc_vf_f32m2(vout0, w10, v10, vl); + vout1 = vfmacc_vf_f32m2(vout1, w11, v11, vl); + vout2 = vfmacc_vf_f32m2(vout2, w12, v12, vl); + + vout0 = vfmacc_vf_f32m2(vout0, w20, v20, vl); + vout1 = vfmacc_vf_f32m2(vout1, w21, v21, vl); + vout2 = vfmacc_vf_f32m2(vout2, w22, v22, vl); + + vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl); + if (relu) + { + vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl); + vout0 = vmerge_vvm_f32m2(m, vfmul_vf_f32m2(vout0, relu_coeff, vl), vout0, vl); + } + vse32_v_f32m2(outptr + out_j, vout0, vl); + } + else //stride_w == 2 && dilation_w == 1 + for( ; out_j < outW1; out_j += vl, avl -= vl) + { + vl = vsetvl_e32m2(avl); + int in_j = out_j * stride_w - pad_l; + vfloat32m2_t v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; + vfloat32m2_load_deinterleave(imgptr0 + in_j, v00, v01, vl); + vfloat32m2_load_deinterleave(imgptr0 + in_j + 2, v02, unused, vl); + vfloat32m2_load_deinterleave(imgptr1 + in_j, v10, v11, vl); + vfloat32m2_load_deinterleave(imgptr1 + in_j + 2, v12, unused, vl); + vfloat32m2_load_deinterleave(imgptr2 + in_j, v20, v21, vl); + vfloat32m2_load_deinterleave(imgptr2 + in_j + 2, v22, unused, vl); + + vfloat32m2_t vout0 = vfmul_vf_f32m2(v00, w00, vl); + vfloat32m2_t vout1 = vfmul_vf_f32m2(v01, w01, vl); + vfloat32m2_t vout2 = vfmul_vf_f32m2(v02, w02, vl); + vout0 = vfadd_vf_f32m2(vout0, bias, vl); + + vout0 = vfmacc_vf_f32m2(vout0, w10, v10, vl); + vout1 = vfmacc_vf_f32m2(vout1, w11, v11, vl); + vout2 = vfmacc_vf_f32m2(vout2, w12, v12, vl); + + vout0 = vfmacc_vf_f32m2(vout0, w20, v20, vl); + vout1 = vfmacc_vf_f32m2(vout1, w21, v21, vl); + vout2 = vfmacc_vf_f32m2(vout2, w22, v22, vl); + + vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl); + if (relu) + { + vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl); + vout0 = vmerge_vvm_f32m2(m, vfmul_vf_f32m2(vout0, relu_coeff, vl), vout0, vl); + } + vse32_v_f32m2(outptr + out_j, vout0, vl); + } + } + + for (; out_j < outW1; out_j++) + { + int in_j = out_j * stride_w - pad_l; + out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + + imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + + imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + + for (; out_j < outW; out_j++ ) + { + int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; + float s0 = 1.f, s1 = 1.f, s2 = 1.f; + if (in_j0 >= width) + { + in_j0 = 0; + s0 = 0.f; + } + if (in_j1 >= width) + { + in_j1 = 0; + s1 = 0.f; + } + if (in_j2 >= width) + { + in_j2 = 0; + s2 = 0.f; + } + out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + + imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + + imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + } +} + +#endif // CV_RVV + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_LASX + +static inline void _v256_load_deinterleave(const float* ptr, __m256& a, __m256& b) +{ + __m256 t0 = (__m256)__lasx_xvld(ptr, 0); + __m256 t1 = (__m256)__lasx_xvld(ptr, 8*4); + + __m256 lo = (__m256)__lasx_xvpermi_q(t0, t1, 2+0*16); + __m256 hi = (__m256)__lasx_xvpermi_q(t0, t1, 3+1*16); + + a = (__m256)__lasx_xvpermi_w(hi, lo, 0x88); + b = (__m256)__lasx_xvpermi_w(hi, lo, 0xdd); +} + +void fastDepthwiseConv( const float* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const float* biasptr, const float* relu, + const float* inptr_, + int height, int width, + float* outptr_, + int out_d, int outH, int outW ) +{ + const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], + w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], + w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; + int outW1 = min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); + float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; + + for (int out_i = 0; out_i < outH; out_i++) + { + int in_i = out_i * stride_h - pad_t, out_j = 0; + const float* imgptr0 = inptr_ + in_i*width; + const float* imgptr1 = imgptr0 + dilation_h*width; + const float* imgptr2 = imgptr0 + (dilation_h*2)*width; + float out, w00 = w00_, w01 = w01_, w02 = w02_; + float w20 = w20_, w21 = w21_, w22 = w22_; + if (in_i < 0) + { + w00 = w01 = w02 = 0.f; + imgptr0 = imgptr1; + } + else if (in_i + dilation_h*(kernel_h-1) >= height) + { + w20 = w21 = w22 = 0.f; + imgptr2 = imgptr1; + } + float* outptr = outptr_ + out_i*outW; + if (pad_l > 0) + { + out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + + imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + + imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[0] = out; + out_j = 1; + } + + if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) + { + const int VECSZ = 8; + __m256 vw00 = _v256_setall_ps(w00), vw01 = _v256_setall_ps(w01), vw02 = _v256_setall_ps(w02), + vw10 = _v256_setall_ps(w10), vw11 = _v256_setall_ps(w11), vw12 = _v256_setall_ps(w12), + vw20 = _v256_setall_ps(w20), vw21 = _v256_setall_ps(w21), vw22 = _v256_setall_ps(w22); + __m256 z = (__m256)__lasx_xvxor_v((__m256i)vw00, (__m256i)vw00), + vbias = _v256_setall_ps(bias), vrc = _v256_setall_ps(relu_coeff); + + if( stride_w == 1 ) + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1 && out_j > pad_l) + out_j = outW1 - VECSZ; + int in_j = out_j * stride_w - pad_l; + __m256 v00 = (__m256)__lasx_xvld(imgptr0 + in_j, 0), + v01 = (__m256)__lasx_xvld(imgptr0 + in_j + dilation_w, 0), + v02 = (__m256)__lasx_xvld(imgptr0 + in_j + dilation_w*2, 0), + v10 = (__m256)__lasx_xvld(imgptr1 + in_j, 0), + v11 = (__m256)__lasx_xvld(imgptr1 + in_j + dilation_w, 0), + v12 = (__m256)__lasx_xvld(imgptr1 + in_j + dilation_w*2, 0), + v20 = (__m256)__lasx_xvld(imgptr2 + in_j, 0), + v21 = (__m256)__lasx_xvld(imgptr2 + in_j + dilation_w, 0), + v22 = (__m256)__lasx_xvld(imgptr2 + in_j + dilation_w*2, 0); + + __m256 vout0 = __lasx_xvfmadd_s(v00, vw00, vbias); + __m256 vout1 = __lasx_xvfmul_s(v01, vw01); + __m256 vout2 = __lasx_xvfmul_s(v02, vw02); + + vout0 = __lasx_xvfmadd_s(v10, vw10, vout0); + vout1 = __lasx_xvfmadd_s(v11, vw11, vout1); + vout2 = __lasx_xvfmadd_s(v12, vw12, vout2); + + vout0 = __lasx_xvfmadd_s(v20, vw20, vout0); + vout1 = __lasx_xvfmadd_s(v21, vw21, vout1); + vout2 = __lasx_xvfmadd_s(v22, vw22, vout2); + + vout0 = __lasx_xvfadd_s(__lasx_xvfadd_s(vout0, vout1), vout2); + if (relu) + { + __m256i m = __lasx_xvfcmp_clt_s(z, vout0); + vout0 = (__m256)__lasx_xvbitsel_v((__m256i)__lasx_xvfmul_s(vout0, vrc), (__m256i)vout0, m); + } + __lasx_xvst(vout0, outptr + out_j, 0); + } + else + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1 && out_j > pad_l) + out_j = outW1 - VECSZ; + int in_j = out_j * stride_w - pad_l; + __m256 v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; + _v256_load_deinterleave(imgptr0 + in_j, v00, v01); + _v256_load_deinterleave(imgptr0 + in_j + 2, v02, unused); + _v256_load_deinterleave(imgptr1 + in_j, v10, v11); + _v256_load_deinterleave(imgptr1 + in_j + 2, v12, unused); + _v256_load_deinterleave(imgptr2 + in_j, v20, v21); + _v256_load_deinterleave(imgptr2 + in_j + 2, v22, unused); + + __m256 vout0 = __lasx_xvfmadd_s(v00, vw00, vbias); + __m256 vout1 = __lasx_xvfmul_s(v01, vw01); + __m256 vout2 = __lasx_xvfmul_s(v02, vw02); + + vout0 = __lasx_xvfmadd_s(v10, vw10, vout0); + vout1 = __lasx_xvfmadd_s(v11, vw11, vout1); + vout2 = __lasx_xvfmadd_s(v12, vw12, vout2); + + vout0 = __lasx_xvfmadd_s(v20, vw20, vout0); + vout1 = __lasx_xvfmadd_s(v21, vw21, vout1); + vout2 = __lasx_xvfmadd_s(v22, vw22, vout2); + + vout0 = __lasx_xvfadd_s(__lasx_xvfadd_s(vout0, vout1), vout2); + if (relu) + { + __m256i m = __lasx_xvfcmp_clt_s(z, vout0); + vout0 = (__m256)__lasx_xvbitsel_v((__m256i)__lasx_xvfmul_s(vout0, vrc), (__m256i)vout0, m); + } + __lasx_xvst(vout0, outptr + out_j, 0); + } + } + + for (; out_j < outW1; out_j++) + { + int in_j = out_j * stride_w - pad_l; + out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + + imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + + imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + + for (; out_j < outW; out_j++ ) + { + int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; + float s0 = 1.f, s1 = 1.f, s2 = 1.f; + if (in_j0 >= width) + { + in_j0 = 0; + s0 = 0.f; + } + if (in_j1 >= width) + { + in_j1 = 0; + s1 = 0.f; + } + if (in_j2 >= width) + { + in_j2 = 0; + s2 = 0.f; + } + out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + + imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + + imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + } +} + +#endif // CV_LASX + +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // namespace diff --git a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp new file mode 100644 index 0000000000..27998e4bcc --- /dev/null +++ b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp @@ -0,0 +1,764 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpConv_Winograd.fx). +// Here is the original license: +/* + This file is a part of ficus language project. + See ficus/LICENSE for the licensing terms +*/ + +#include "../../precomp.hpp" +#include "convolution.hpp" + +#include "conv_winograd_f63.simd.hpp" +#include "layers/cpu_kernels/conv_winograd_f63.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content + +namespace cv { namespace dnn { + +#if CV_NEON || CV_SIMD128 || CV_TRY_AVX2 +enum { VEC_ALIGN = 32, DFT_TYPE = CV_32F }; // Memory alignment. + +void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock, + const int winoIblock, const int winoKblock, const int winoAtomF32, const int winoNatomF32); + +/*Input transform*/ +void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, + float* outptr, int Cg, const int winoIblock, const int winoAtomF32); + +/*Output transform*/ +void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, float* bpptr, int bpstep, float* outptr, int outstep, + float bias, float minval, float maxval, bool ifMinMaxAct); + + +int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr& conv, + int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct) +{ + Mat input = _input.getMat(); + Mat output = _output.getMat(); + Mat fusedAddMat = _fusedAddMat.getMat(); + + MatShape inputShape = shape(input); + MatShape outputShape = shape(output); + CV_Assert(inputShape.size() == 4 && outputShape.size() == 4); + + int N = inputShape[0], C = inputShape[1], Hi = inputShape[2], Wi = inputShape[3]; // [N, C, H, W] + int K = conv->K; + int H0 = outputShape[2], W0 = outputShape[3]; + + int pad_top = conv->pad_top; + int pad_left = conv->pad_left; + + int ngroups = conv->ngroups, Cg = C/ngroups, Kg = K/ngroups; + int Kg_nblocks = (Kg + CONV_WINO_KBLOCK - 1)/CONV_WINO_KBLOCK; + const size_t inp_planesize = (size_t)Hi*Wi; + const size_t out_planesize = (size_t)H0*W0; + + int blocks_per_row = (W0+CONV_WINO_STEP-1)/CONV_WINO_STEP; + int blocks_per_plane = ((H0+CONV_WINO_STEP-1)/CONV_WINO_STEP)*blocks_per_row; + int blocks_per_plane_aligned = ((blocks_per_plane + + CONV_WINO_IBLOCK-1)/CONV_WINO_IBLOCK)*CONV_WINO_IBLOCK; + + size_t totalbufsize = (size_t)N*C*blocks_per_plane_aligned*CONV_WINO_AREA; + + AutoBuffer _buf; + _buf.allocate(totalbufsize + VEC_ALIGN); + float* wbuf_all = alignPtr(_buf.data(), VEC_ALIGN); + + float* inp = input.ptr(); + float* out = output.ptr(); + + float* fusedAddPtr = fusedAddMat.empty() ? nullptr : fusedAddMat.ptr(); + + // Phase 1. compute forward Winograd transforms for all input blocks, + // all input planes, all samples in the batch. + // [TODO]: maybe, if there are too many input channels, it makes sense to + // transform only part of input channels at once and then compute the partial + // accumulated sums (i.e. update the output buffers several times, + // rather than compute them in one pass). + parallel_for_(Range(0, ntasks), [&](const Range& r0) { + for (int task_id = r0.start; task_id < r0.end; task_id++) + { + int nc0 = (N*C)*task_id/ntasks; + int nc1 = (N*C)*(task_id+1)/ntasks; + for(; nc0 < nc1; nc0++) + { + int n = nc0 / C; + int c = nc0 - n*C; + int g = c / Cg; + c -= g*Cg; + for (int block_id = 0; block_id < blocks_per_plane; block_id += CONV_WINO_IBLOCK) + { + for (int db = 0; db < CONV_WINO_IBLOCK; db++) + { + size_t inwofs = ((n*ngroups + g)*blocks_per_plane_aligned + + block_id)*Cg*CONV_WINO_AREA + + (c*CONV_WINO_IBLOCK + db)*CONV_WINO_ATOM_F32; + float* inwptr = (float*)wbuf_all + inwofs; + + if (block_id + db < blocks_per_plane) + { + int y0 = (block_id + db) / blocks_per_row; + int x0 = (block_id + db) - y0 * blocks_per_row; + y0 = y0*CONV_WINO_STEP - pad_top; + x0 = x0*CONV_WINO_STEP - pad_left; + bool partial = y0 < 0 || y0 + CONV_WINO_SIZE > Hi || + x0 < 0 || x0 + CONV_WINO_SIZE > Wi; + int dx1 = 0, dx2 = CONV_WINO_SIZE, dy1 = 0, dy2 = CONV_WINO_SIZE; + int inpstep = Wi; + + float inpbuf[CONV_WINO_AREA]; + float* inptr0 = (float*)inp + nc0*inp_planesize + y0*Wi + x0; + float* inptr = inptr0; + + if (partial) + { + memset(inpbuf, 0, sizeof(inpbuf)); + dy1 = -y0 > 0 ? -y0 : 0; + dy2 = Hi - y0 < CONV_WINO_SIZE ? Hi - y0 : CONV_WINO_SIZE; + + if (dy2 < dy1) {dy2 = dy1 = 0;} + dx1 = -x0 > 0 ? -x0 : 0; + dx2 = Wi - x0 < CONV_WINO_SIZE ? Wi - x0 : CONV_WINO_SIZE; + + if (dx2 < dx1) {dx2 = dx1 = 0;} + inptr0 -= y0*Wi + x0; + + if (dx1 < dx2 && dy1 < dy2) + { + for(int dy = dy1; dy < dy2; dy++) + memcpy(&inpbuf[dy*CONV_WINO_SIZE + dx1], + inptr0 + (y0+dy)*Wi + (x0+dx1), + (dx2-dx1)*sizeof(inpbuf[0])); + } + + inptr = inpbuf; + inpstep = CONV_WINO_SIZE; + } +#if CV_TRY_AVX2 + if (conv->useAVX2) + opt_AVX2::winofunc_BtXB_8x8_f32(inptr, inpstep, inwptr, Cg, CONV_WINO_IBLOCK, CONV_WINO_ATOM_F32); + else +#endif +#if CV_TRY_AVX + if (conv->useAVX) + opt_AVX::winofunc_BtXB_8x8_f32(inptr, inpstep, inwptr, Cg, CONV_WINO_IBLOCK, CONV_WINO_ATOM_F32); + else +#endif +#if CV_NEON && CV_NEON_AARCH64 + if (conv->useNEON) + opt_NEON::winofunc_BtXB_8x8_f32(inptr, inpstep, inwptr, Cg, CONV_WINO_IBLOCK, CONV_WINO_ATOM_F32); + else +#endif + winofunc_BtXB_8x8_f32(inptr, inpstep, inwptr, Cg, CONV_WINO_IBLOCK, CONV_WINO_ATOM_F32); + } + else + { + for (int i = 0; i < CONV_WINO_NATOMS_F32; i++, inwptr += CONV_WINO_IBLOCK*CONV_WINO_ATOM_F32) + memset(inwptr, 0, CONV_WINO_ATOM_F32*sizeof(inwptr[0])); + } + } + } + } + }}); + + // Phase 2. compute elemwise-weighted sums of transformed blocks, + // apply inverse Winograd transforms to the sums, + // add bias, apply activation function if any and store the results. + parallel_for_(Range(0, ntasks), [&](const Range& r0) { + for (int task_id = r0.start; task_id < r0.end; task_id++) + { + size_t out_wbuf_size = CONV_WINO_AREA*CONV_WINO_KBLOCK*CONV_WINO_IBLOCK; + size_t outbuf_size = CONV_WINO_AREA; + AutoBuffer out_wbuf_, outbuf_; + out_wbuf_.allocate(out_wbuf_size + VEC_ALIGN); + float* out_wbuf = alignPtr(out_wbuf_.data(), VEC_ALIGN); + outbuf_.allocate(outbuf_size + VEC_ALIGN); + float* outbuf = alignPtr(outbuf_.data(), VEC_ALIGN); + + memset(out_wbuf, 0, out_wbuf_size * sizeof(float)); + memset(outbuf, 0, outbuf_size * sizeof(float)); + + int ngk0 = (int)(((int64_t)N*Kg_nblocks*ngroups)*task_id/ntasks); + int ngk1 = (int)(((int64_t)N*Kg_nblocks*ngroups)*(task_id+1)/ntasks); + + for(; ngk0 < ngk1; ngk0++) + { + int n = ngk0 / (Kg_nblocks*ngroups); + int gk0 = ngk0 % (Kg_nblocks*ngroups); + int g = gk0 / Kg_nblocks; + int k0 = (gk0 % Kg_nblocks)*CONV_WINO_KBLOCK; + int k1 = k0 + CONV_WINO_KBLOCK <= Kg ? k0 + CONV_WINO_KBLOCK : Kg; + + for (int block_id0 = 0; block_id0 < blocks_per_plane; block_id0 += CONV_WINO_IBLOCK) + { + int block_id1 = block_id0 + CONV_WINO_IBLOCK; + block_id1 = block_id1 < blocks_per_plane ? block_id1 : blocks_per_plane; + size_t inwofs = ((n*ngroups + g)*blocks_per_plane_aligned + block_id0)*Cg*CONV_WINO_AREA; + size_t wofs = (g*Kg_nblocks*CONV_WINO_KBLOCK + k0)*Cg*CONV_WINO_AREA; + + float* inwptr = wbuf_all + inwofs; + const float* wptr = conv->weightsWinoBufPtr + wofs; + +#if CV_TRY_AVX2 + if (conv->useAVX2) + opt_AVX2::winofunc_accum_f32(inwptr, wptr, out_wbuf, Cg, block_id1 - block_id0, CONV_WINO_IBLOCK, + CONV_WINO_KBLOCK, CONV_WINO_ATOM_F32, CONV_WINO_NATOMS_F32); + else +#endif +#if CV_TRY_AVX + if (conv->useAVX) + opt_AVX::winofunc_accum_f32(inwptr, wptr, out_wbuf, Cg, block_id1 - block_id0, CONV_WINO_IBLOCK, + CONV_WINO_KBLOCK, CONV_WINO_ATOM_F32, CONV_WINO_NATOMS_F32); + else +#endif +#if CV_NEON && CV_NEON_AARCH64 + if (conv->useNEON) + opt_NEON::winofunc_accum_f32(inwptr, wptr, out_wbuf, Cg, block_id1 - block_id0, CONV_WINO_IBLOCK, + CONV_WINO_KBLOCK, CONV_WINO_ATOM_F32, CONV_WINO_NATOMS_F32); + else +#endif + + winofunc_accum_f32(inwptr, wptr, out_wbuf, Cg, block_id1 - block_id0, CONV_WINO_IBLOCK, + CONV_WINO_KBLOCK, CONV_WINO_ATOM_F32, CONV_WINO_NATOMS_F32); + for (int k = k0; k < k1; k++) + { + float biasv = conv->biasBuf[g*Kg + k]; + for (int block_id = block_id0; block_id < block_id1; block_id++) + { + int y0 = block_id / blocks_per_row; + int x0 = block_id - y0 * blocks_per_row; + y0 = y0*CONV_WINO_STEP; + x0 = x0*CONV_WINO_STEP; + int dy1 = H0 - y0; + if (dy1 > CONV_WINO_STEP) dy1 = CONV_WINO_STEP; + int dx1 = W0 - x0; + if (dx1 > CONV_WINO_STEP) dx1 = CONV_WINO_STEP; + assert(dx1 > 0 && dy1 > 0); + bool partial = activ || dy1 < CONV_WINO_STEP || dx1 < CONV_WINO_STEP; + size_t outofs = (n*K + g*Kg + k)*out_planesize + y0*W0 + x0; + int outstep = W0; + + float* outptr0 = (float*)out + outofs; + float* pbptr0 = fusedAddPtr ? fusedAddPtr + outofs : nullptr; + float *outptr = outptr0, *bpptr = pbptr0; + + if (partial) + { + outptr = outbuf; + outstep = CONV_WINO_SIZE; + if (pbptr0) + { + bpptr = outbuf; + for (int y = 0; y < dy1; y++) + memcpy(outbuf + y*CONV_WINO_SIZE, pbptr0 + y*W0, + dx1*sizeof(pbptr0[0])); + } + } +#if CV_TRY_AVX2 + if (conv->useAVX2) + opt_AVX::winofunc_AtXA_8x8_f32(out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE, + bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct); + else +#endif +#if CV_TRY_AVX + if (conv->useAVX) + opt_AVX::winofunc_AtXA_8x8_f32(out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE, + bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct); + else +#endif +#if CV_NEON && CV_NEON_AARCH64 + if (conv->useNEON) + // NEON optimization is only for ARMv8 device, and for ARMv7 device, we use the Universal intrinsics. + opt_NEON::winofunc_AtXA_8x8_f32(out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE, + bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct); + else +#endif + winofunc_AtXA_8x8_f32(out_wbuf + ((k - k0)*CONV_WINO_IBLOCK + (block_id - block_id0))*CONV_WINO_AREA, CONV_WINO_SIZE, + bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct); + if (partial) + { + if (activ) + activ->forwardSlice(outptr, outptr, CONV_WINO_SIZE*CONV_WINO_STEP, 0, g*Kg + k, g*Kg + k + 1); + for (int y = 0; y < dy1; y++) + memcpy(outptr0 + y*W0, outptr + y*CONV_WINO_SIZE,dx1*sizeof(outptr0[0])); + } + } + } + } + } + }}); + return 1; +} + +/****************************************************************************************\ + SIMD for winograd function +\****************************************************************************************/ + +#if CV_SIMD128 + +void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock, + const int winoIblock, const int winoKblock, const int winoAtomF32, const int winoNatomF32) +{ +#if 1 + CV_Assert(winoIblock == 3 && winoKblock == 4 && winoAtomF32 == 4); + for (int atom_id = 0; atom_id < winoNatomF32; atom_id++, + outbuf += winoAtomF32) + { + v_float32x4 s00 = v_setzero_f32(), s01 = s00, s02 = s00; + v_float32x4 s10 = v_setzero_f32(), s11 = s00, s12 = s00; + v_float32x4 s20 = v_setzero_f32(), s21 = s00, s22 = s00; + v_float32x4 s30 = v_setzero_f32(), s31 = s00, s32 = s00; + + for (int c = 0; c < Cg; c++, inwptr += winoIblock*winoAtomF32, + wptr += winoKblock*winoAtomF32) + { + v_float32x4 x0, x1, x2; + x0 = v_load(inwptr); + x1 = v_load(inwptr + 4); + x2 = v_load(inwptr + 8); + + v_float32x4 w0 = v_load(wptr); + s00 = v_fma(w0, x0, s00); + s01 = v_fma(w0, x1, s01); + s02 = v_fma(w0, x2, s02); + + w0 = v_load(wptr + 4); + s10 = v_fma(w0, x0, s10); + s11 = v_fma(w0, x1, s11); + s12 = v_fma(w0, x2, s12); + + w0 = v_load(wptr + 8); + s20 = v_fma(w0, x0, s20); + s21 = v_fma(w0, x1, s21); + s22 = v_fma(w0, x2, s22); + + w0 = v_load(wptr + 12); + s30 = v_fma(w0, x0, s30); + s31 = v_fma(w0, x1, s31); + s32 = v_fma(w0, x2, s32); + } + + v_store(outbuf, s00); + v_store(outbuf + 1*64, s01); + v_store(outbuf + 2*64, s02); + v_store(outbuf + 3*64, s10); + v_store(outbuf + 4*64, s11); + v_store(outbuf + 5*64, s12); + v_store(outbuf + 6*64, s20); + v_store(outbuf + 7*64, s21); + v_store(outbuf + 8*64, s22); + v_store(outbuf + 9*64, s30); + v_store(outbuf + 10*64, s31); + v_store(outbuf + 11*64, s32); + } +#else + // Naive C++ code, the code should never be run here. + for (int atom_id = 0; atom_id < winoNatomF32; + atom_id++, outbuf += winoAtomF32) + { + float sumbuf[winoIblock*winoKblock*winoAtomF32]; + memset(sumbuf, 0, sizeof(sumbuf)); + for (int c = 0; c < Cg; c++, inwptr += winoIblock*winoAtomF32, + wptr += winoKblock*winoAtomF32) + { + for (int i = 0; i < winoKblock; i++) + { + for (int j = 0; j < winoIblock; j++) + { + int i_ = i*winoAtomF32; + int j_ = j*winoAtomF32; + int ij_ = i_*winoIblock + j_; + float s0 = inwptr[j_ + 0]*wptr[i_ + 0]; + float s1 = inwptr[j_ + 1]*wptr[i_ + 1]; + float s2 = inwptr[j_ + 2]*wptr[i_ + 2]; + float s3 = inwptr[j_ + 3]*wptr[i_ + 3]; + sumbuf[ij_ + 0] += s0; + sumbuf[ij_ + 1] += s1; + sumbuf[ij_ + 2] += s2; + sumbuf[ij_ + 3] += s3; + } + } + } + for (int ij = 0; ij < winoKblock*winoIblock; ij++) + { + int ij_ = ij*winoAtomF32; + int ij_out = ij*CONV_WINO_AREA; + outbuf[ij_out + 0] = sumbuf[ij_ + 0]; + outbuf[ij_out + 1] = sumbuf[ij_ + 1]; + outbuf[ij_out + 2] = sumbuf[ij_ + 2]; + outbuf[ij_out + 3] = sumbuf[ij_ + 3]; + } + } +#endif +} + +/*Input transform*/ +void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, + float* outptr, int Cg, const int winoIblock, const int winoAtomF32) +{ + CV_Assert(CONV_WINO_IBLOCK == 3 && CONV_WINO_KBLOCK == 4 && CONV_WINO_ATOM_F32 == 4); + v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4); + v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4); + v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4); + v_float32x4 x30 = v_load(inptr + inpstep*3), x31 = v_load(inptr + inpstep*3 + 4); + v_float32x4 x40 = v_load(inptr + inpstep*4), x41 = v_load(inptr + inpstep*4 + 4); + v_float32x4 x50 = v_load(inptr + inpstep*5), x51 = v_load(inptr + inpstep*5 + 4); + v_float32x4 x60 = v_load(inptr + inpstep*6), x61 = v_load(inptr + inpstep*6 + 4); + v_float32x4 x70 = v_load(inptr + inpstep*7), x71 = v_load(inptr + inpstep*7 + 4); + + v_float32x4 z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51, z60, z61, z70, z71; + + { + /* Y[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*X */ + /* Y[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*X */ + v_float32x4 q5_25 = v_setall_f32(5.25f), t00, t01, t10, t11; + t00 = x40 - x20; + t01 = x41 - x21; + t10 = x30 - x50; + t11 = x31 - x51; + v_float32x4 y00 = v_fma(t00, q5_25, x00 - x60); + v_float32x4 y01 = v_fma(t01, q5_25, x01 - x61); + v_float32x4 y70 = v_fma(t10, q5_25, x70 - x10); + v_float32x4 y71 = v_fma(t11, q5_25, x71 - x11); + + /* Y[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*X */ + /* Y[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*X */ + v_float32x4 qm4_25 = v_setall_f32(-4.25f); + t00 = v_fma(x30, qm4_25, x10 + x50); + t01 = v_fma(x31, qm4_25, x11 + x51); + t10 = v_fma(x40, qm4_25, x20 + x60); + t11 = v_fma(x41, qm4_25, x21 + x61); + + v_float32x4 y10 = t00 + t10, y11 = t01 + t11; + v_float32x4 y20 = t10 - t00, y21 = t11 - t01; + + /* Y[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*X */ + /* Y[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*X */ + v_float32x4 q0_5 = v_setall_f32(0.5f), q0_25 = v_setall_f32(0.25f); + v_float32x4 qm2_5 = v_setall_f32(-2.5f), qm1_25 = v_setall_f32(-1.25f); + t00 = v_fma(x10, q0_5, x50 + x50); + t01 = v_fma(x11, q0_5, x51 + x51); + t10 = v_fma(x20, q0_25, x60); + t11 = v_fma(x21, q0_25, x61); + t00 = v_fma(x30, qm2_5, t00); + t01 = v_fma(x31, qm2_5, t01); + t10 = v_fma(x40, qm1_25, t10); + t11 = v_fma(x41, qm1_25, t11); + + v_float32x4 y30 = t00 + t10, y31 = t01 + t11; + v_float32x4 y40 = t10 - t00, y41 = t11 - t01; + + /* Y[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*X */ + /* Y[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*X */ + v_float32x4 q4 = v_setall_f32(4.f), qm5 = v_setall_f32(-5.f); + t00 = v_fma(x50, q0_5, x10 + x10); + t01 = v_fma(x51, q0_5, x11 + x11); + t10 = v_fma(x20, q4 , x60); + t11 = v_fma(x21, q4 , x61); + t00 = v_fma(x30, qm2_5, t00); + t01 = v_fma(x31, qm2_5, t01); + t10 = v_fma(x40, qm5 , t10); + t11 = v_fma(x41, qm5 , t11); + + v_float32x4 y50 = t00 + t10, y51 = t01 + t11; + v_float32x4 y60 = t10 - t00, y61 = t11 - t01; + + /* transpose 8x8 matrix in-place with some renumeration of the elements: */ + /* Y: */ + /* y00 y01 */ + /* y10 y11 */ + /* ... */ + /* y70 y71 */ + /* Y': */ + /* y00 y40 */ + /* y10 y50 */ + /* y20 y60 */ + /* y30 y70 */ + /* y01 y41 */ + /* y11 y51 */ + /* y21 y61 */ + /* y31 y71 */ + /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ + + v_transpose4x4(y00, y10, y20, y30, y00, y10, y20, y30); + v_transpose4x4(y01, y11, y21, y31, y01, y11, y21, y31); + v_transpose4x4(y40, y50, y60, y70, y40, y50, y60, y70); + v_transpose4x4(y41, y51, y61, y71, y41, y51, y61, y71); + + /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ + /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ + t00 = y01 - y20; + t01 = y41 - y60; + t10 = y30 - y11; + t11 = y70 - y51; + z00 = v_fma(t00, q5_25, y00 - y21); + z01 = v_fma(t01, q5_25, y40 - y61); + z70 = v_fma(t10, q5_25, y31 - y10); + z71 = v_fma(t11, q5_25, y71 - y50); + + /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ + /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ + t00 = v_fma(y30, qm4_25, y10 + y11); + t01 = v_fma(y70, qm4_25, y50 + y51); + t10 = v_fma(y01, qm4_25, y20 + y21); + t11 = v_fma(y41, qm4_25, y60 + y61); + + z10 = t00 + t10; z11 = t01 + t11; + z20 = t10 - t00; z21 = t11 - t01; + + /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ + /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ + t00 = v_fma(y10, q0_5, y11 + y11); + t01 = v_fma(y50, q0_5, y51 + y51); + t10 = v_fma(y20, q0_25, y21); + t11 = v_fma(y60, q0_25, y61); + t00 = v_fma(y30, qm2_5, t00); + t01 = v_fma(y70, qm2_5, t01); + t10 = v_fma(y01, qm1_25, t10); + t11 = v_fma(y41, qm1_25, t11); + + z30 = t00 + t10; z31 = t01 + t11; + z40 = t10 - t00; z41 = t11 - t01; + + /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ + /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ + t00 = v_fma(y11, q0_5, y10 + y10); + t01 = v_fma(y51, q0_5, y50 + y50); + t10 = v_fma(y20, q4, y21); + t11 = v_fma(y60, q4, y61); + t00 = v_fma(y30, qm2_5, t00); + t01 = v_fma(y70, qm2_5, t01); + t10 = v_fma(y01, qm5, t10); + t11 = v_fma(y41, qm5, t11); + + z50 = t00 + t10; z51 = t01 + t11; + z60 = t10 - t00; z61 = t11 - t01; + } + + const int outstep = winoIblock*winoAtomF32*Cg; + + v_store(outptr, z00); + v_store(outptr + outstep, z01); + v_store(outptr + outstep*2, z10); + v_store(outptr + outstep*3, z11); + v_store(outptr + outstep*4, z20); + v_store(outptr + outstep*5, z21); + v_store(outptr + outstep*6, z30); + v_store(outptr + outstep*7, z31); + v_store(outptr + outstep*8, z40); + v_store(outptr + outstep*9, z41); + v_store(outptr + outstep*10, z50); + v_store(outptr + outstep*11, z51); + v_store(outptr + outstep*12, z60); + v_store(outptr + outstep*13, z61); + v_store(outptr + outstep*14, z70); + v_store(outptr + outstep*15, z71); +} + +/*Output transform*/ +/* Inverse Winograd 8x8 transform: + out = (A'*inp*A)', where + inp is input 8x8 FP32 matrix, + A' is + [1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, + 0.f, 1.f, -1.f, 2.f, -2.f, 0.5f, -0.5f, 0.f, + 0.f, 1.f, 1.f, 4.f, 4.f, 0.25f, 0.25f, 0.f, + 0.f, 1.f, -1.f, 8.f, -8.f, 0.125f, -0.125f, 0.f, + 0.f, 1.f, 1.f, 16.f, 16.f, 1.f/16, 1.f/16, 0.f, + 0.f, 1.f, -1.f, 32.f, -32.f, 1.f/32, -1.f/32, 1.f] + + inp is pre-loaded into xij registers, + out will be stored in zij, where (0<=i<=7 for x, 0<=i<=5 for z), 0<=j<=1. + + After the inverse transform is done, we add bias, + optionally add results from the earlier tensors (by-pass), + optionally apply activation function and then + store the final results. + + That is, after both forward and then inverse transformation, + we get non-transposed result. + Of course, for the correct work of Winograd-based convolution, + the Winograd-transformed weights should also be transposed. + init_conv() (see OpConv.fx) takes care of that. +*/ +void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, + float* bpptr, int bpstep, float* outptr, int outstep, + float bias, float minval, float maxval, bool ifMinMaxAct) +{ + CV_Assert(CONV_WINO_IBLOCK == 3 && CONV_WINO_KBLOCK == 4 && CONV_WINO_ATOM_F32 == 4); + v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4); + v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4); + v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4); + v_float32x4 x30 = v_load(inptr + inpstep*3), x31 = v_load(inptr + inpstep*3 + 4); + v_float32x4 x40 = v_load(inptr + inpstep*4), x41 = v_load(inptr + inpstep*4 + 4); + v_float32x4 x50 = v_load(inptr + inpstep*5), x51 = v_load(inptr + inpstep*5 + 4); + v_float32x4 x60 = v_load(inptr + inpstep*6), x61 = v_load(inptr + inpstep*6 + 4); + v_float32x4 x70 = v_load(inptr + inpstep*7), x71 = v_load(inptr + inpstep*7 + 4); + v_float32x4 z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51; + + { + v_float32x4 s12_0, s12_1, s34_0, s34_1, s56_0, s56_1; + s12_0 = x10 + x20; s12_1 = x11 + x21; + s34_0 = x30 + x40; s34_1 = x31 + x41; + s56_0 = x50 + x60; s56_1 = x51 + x61; + + v_float32x4 y00 = x00 + s12_0 + s34_0 + s56_0; + v_float32x4 y01 = x01 + s12_1 + s34_1 + s56_1; + + v_float32x4 a0 = v_setall_f32(0.25f), a1 = v_setall_f32(4.0f); + v_float32x4 y20 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + v_float32x4 y21 = v_fma(s56_1, a0 ,v_fma(s34_1, a1, s12_1) ); + + a0 = v_setall_f32(1.f/16), a1 = v_setall_f32(16.0f); + v_float32x4 y40 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + v_float32x4 y41 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + s12_0 = x10 - x20; s12_1 = x11 - x21; + s34_0 = x30 - x40; s34_1 = x31 - x41; + s56_0 = x50 - x60; s56_1 = x51 - x61; + + a0 = v_setall_f32(1.f/32), a1 = v_setall_f32(32.f); + v_float32x4 y50 = v_fma(s56_0, a0, v_fma(s34_0, a1, x70 + s12_0)); + v_float32x4 y51 = v_fma(s56_1, a0, v_fma(s34_1, a1, x71 + s12_1)); + + a0 = v_setall_f32(0.5f), a1 = v_setall_f32(2.f); + v_float32x4 y10 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + v_float32x4 y11 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + a0 = v_setall_f32(0.125f), a1 = v_setall_f32(8.f); + v_float32x4 y30 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + v_float32x4 y31 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + v_float32x4 y60 = v_setall_f32(0.f), y61 = y60, y70 = y60, y71 = y60; + + /* transpose 8x8 matrix in-place with some renumeration of the elements: */ + /* Y: */ + /* y00 y01 */ + /* y10 y11 */ + /* ... */ + /* y50 y51 */ + /* 0 0 */ + /* 0 0 */ + /* Y': */ + /* y00 y40 */ + /* y10 y50 */ + /* y20 y60 */ + /* y30 y70 */ + /* y01 y41 */ + /* y11 y51 */ + /* y21 y61 */ + /* y31 y71 */ + /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ + + v_transpose4x4(y00, y10, y20, y30, y00, y10, y20, y30); + v_transpose4x4(y01, y11, y21, y31, y01, y11, y21, y31); + v_transpose4x4(y40, y50, y60, y70, y40, y50, y60, y70); + v_transpose4x4(y41, y51, y61, y71, y41, y51, y61, y71); + + s12_0 = y10 + y20; s12_1 = y50 + y60; + s34_0 = y30 + y01; s34_1 = y70 + y41; + s56_0 = y11 + y21; s56_1 = y51 + y61; + + z00 = y00 + s12_0 + s34_0 + s56_0; + z01 = y40 + s12_1 + s34_1 + s56_1; + + a0 = v_setall_f32(0.25f), a1 = v_setall_f32(4.0f); + z20 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + z21 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + a0 = v_setall_f32(1.f/16), a1 = v_setall_f32(16.0f); + z40 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + z41 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + s12_0 = y10 - y20; s12_1 = y50 - y60; + s34_0 = y30 - y01; s34_1 = y70 - y41; + s56_0 = y11 - y21; s56_1 = y51 - y61; + + a0 = v_setall_f32(1.f/32), a1 = v_setall_f32(32.0f); + z50 = v_fma(s56_0, a0, v_fma(s34_0, a1, y31 + s12_0)); + z51 = v_fma(s56_1, a0, v_fma(s34_1, a1, y71 + s12_1)); + + a0 = v_setall_f32(0.5f), a1 = v_setall_f32(2.0f); + z10 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + z11 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + a0 = v_setall_f32(0.125f), a1 = v_setall_f32(8.0f); + z30 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); + z31 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); + + v_float32x4 vbias = v_setall_f32(bias); + z00 += vbias; + z01 += vbias; + z10 += vbias; + z11 += vbias; + z20 += vbias; + z21 += vbias; + z30 += vbias; + z31 += vbias; + z40 += vbias; + z41 += vbias; + z50 += vbias; + z51 += vbias; + } + + if (bpptr) + { + z00 += v_load(bpptr); + z01 += v_load_low(bpptr + 4); + z10 += v_load(bpptr + bpstep); + z11 += v_load_low(bpptr + bpstep + 4); + z20 += v_load(bpptr + bpstep*2); + z21 += v_load_low(bpptr + bpstep*2 + 4); + z30 += v_load(bpptr + bpstep*3); + z31 += v_load_low(bpptr + bpstep*3 + 4); + z40 += v_load(bpptr + bpstep*4); + z41 += v_load_low(bpptr + bpstep*4 + 4); + z50 += v_load(bpptr + bpstep*5); + z51 += v_load_low(bpptr + bpstep*5 + 4); + } + + if (ifMinMaxAct) + { + v_float32x4 vmax = v_setall_f32(maxval); + v_float32x4 vmin = v_setall_f32(minval); + + z00 = v_min(v_max(z00, vmin), vmax); + z01 = v_min(v_max(z01, vmin), vmax); + z10 = v_min(v_max(z10, vmin), vmax); + z11 = v_min(v_max(z11, vmin), vmax); + z20 = v_min(v_max(z20, vmin), vmax); + z21 = v_min(v_max(z21, vmin), vmax); + z30 = v_min(v_max(z30, vmin), vmax); + z31 = v_min(v_max(z31, vmin), vmax); + z40 = v_min(v_max(z40, vmin), vmax); + z41 = v_min(v_max(z41, vmin), vmax); + z50 = v_min(v_max(z50, vmin), vmax); + z51 = v_min(v_max(z51, vmin), vmax); + } + + v_store(outptr, z00); + v_store_low(outptr + 4, z01); + v_store(outptr + outstep, z10); + v_store_low(outptr + outstep + 4, z11); + v_store(outptr + outstep*2, z20); + v_store_low(outptr + outstep*2 + 4, z21); + v_store(outptr + outstep*3, z30); + v_store_low(outptr + outstep*3 + 4, z31); + v_store(outptr + outstep*4, z40); + v_store_low(outptr + outstep*4 + 4, z41); + v_store(outptr + outstep*5, z50); + v_store_low(outptr + outstep*5 + 4, z51); +} +#endif + +#else +int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr& conv, + int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct) +{ + return 0; +} +#endif + +}} // namespace cv::dnn diff --git a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.simd.hpp b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.simd.hpp new file mode 100644 index 0000000000..2688c75785 --- /dev/null +++ b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.simd.hpp @@ -0,0 +1,886 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/core/hal/intrin.hpp" + +namespace cv { +namespace dnn { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +/* Accumulate */ +void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock, + const int winoIblock, const int winoKblock, const int winoAtomF32, const int winoNatomF32); + +/*Input transform*/ +void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, + float* outptr, int Cg, const int winoIblock, const int winoAtomF32); + +/*Output transform*/ +void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, + float* bpptr, int bpstep, float* outptr, int outstep, + float bias, float minval, float maxval, bool ifMinMaxAct); + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX + +#if !CV_FMA3 // AVX workaround +#undef _mm256_fmadd_ps +#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b)) +#endif + +void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock, + const int winoIblock, const int winoKblock, const int winoAtomF32, const int winoNatomF32) +{ + CV_Assert(winoIblock == 6 && winoKblock == 4 && winoAtomF32 == 8); + if (iblock > 3) + { + for (int atom_id = 0; atom_id < winoNatomF32; atom_id++, + outbuf += winoAtomF32) + { + __m256 s00 = _mm256_set1_ps(0.f), s01 = s00, s02 = s00, s03 = s00, s04 = s00, s05 = s00; + __m256 s10 = _mm256_set1_ps(0.f), s11 = s00, s12 = s00, s13 = s00, s14 = s00, s15 = s00; + __m256 s20 = _mm256_set1_ps(0.f), s21 = s00, s22 = s00, s23 = s00, s24 = s00, s25 = s00; + __m256 s30 = _mm256_set1_ps(0.f), s31 = s00, s32 = s00, s33 = s00, s34 = s00, s35 = s00; + for (int c = 0; c < Cg; c++, inwptr += winoIblock*winoAtomF32, + wptr += winoKblock*winoAtomF32) + { + __m256 w0 = _mm256_load_ps(wptr), w1 = _mm256_load_ps(wptr + 8); + __m256 w2 = _mm256_load_ps(wptr + 16), w3 = _mm256_load_ps(wptr + 24); + __m256 x0, x1; + x0 = _mm256_load_ps(inwptr); + x1 = _mm256_load_ps(inwptr + 8); + s00 = _mm256_fmadd_ps(w0, x0, s00); + s01 = _mm256_fmadd_ps(w0, x1, s01); + s10 = _mm256_fmadd_ps(w1, x0, s10); + s11 = _mm256_fmadd_ps(w1, x1, s11); + s20 = _mm256_fmadd_ps(w2, x0, s20); + s21 = _mm256_fmadd_ps(w2, x1, s21); + s30 = _mm256_fmadd_ps(w3, x0, s30); + s31 = _mm256_fmadd_ps(w3, x1, s31); + x0 = _mm256_load_ps(inwptr + 16); + x1 = _mm256_load_ps(inwptr + 24); + s02 = _mm256_fmadd_ps(w0, x0, s02); + s03 = _mm256_fmadd_ps(w0, x1, s03); + s12 = _mm256_fmadd_ps(w1, x0, s12); + s13 = _mm256_fmadd_ps(w1, x1, s13); + s22 = _mm256_fmadd_ps(w2, x0, s22); + s23 = _mm256_fmadd_ps(w2, x1, s23); + s32 = _mm256_fmadd_ps(w3, x0, s32); + s33 = _mm256_fmadd_ps(w3, x1, s33); + x0 = _mm256_load_ps(inwptr + 32); + x1 = _mm256_load_ps(inwptr + 40); + s04 = _mm256_fmadd_ps(w0, x0, s04); + s05 = _mm256_fmadd_ps(w0, x1, s05); + s14 = _mm256_fmadd_ps(w1, x0, s14); + s15 = _mm256_fmadd_ps(w1, x1, s15); + s24 = _mm256_fmadd_ps(w2, x0, s24); + s25 = _mm256_fmadd_ps(w2, x1, s25); + s34 = _mm256_fmadd_ps(w3, x0, s34); + s35 = _mm256_fmadd_ps(w3, x1, s35); + } + + _mm256_store_ps(outbuf, s00); + _mm256_store_ps(outbuf + 1*64, s01); + _mm256_store_ps(outbuf + 2*64, s02); + _mm256_store_ps(outbuf + 3*64, s03); + _mm256_store_ps(outbuf + 4*64, s04); + _mm256_store_ps(outbuf + 5*64, s05); + + _mm256_store_ps(outbuf + 6*64, s10); + _mm256_store_ps(outbuf + 7*64, s11); + _mm256_store_ps(outbuf + 8*64, s12); + _mm256_store_ps(outbuf + 9*64, s13); + _mm256_store_ps(outbuf + 10*64, s14); + _mm256_store_ps(outbuf + 11*64, s15); + + _mm256_store_ps(outbuf + 12*64, s20); + _mm256_store_ps(outbuf + 13*64, s21); + _mm256_store_ps(outbuf + 14*64, s22); + _mm256_store_ps(outbuf + 15*64, s23); + _mm256_store_ps(outbuf + 16*64, s24); + _mm256_store_ps(outbuf + 17*64, s25); + + _mm256_store_ps(outbuf + 18*64, s30); + _mm256_store_ps(outbuf + 19*64, s31); + _mm256_store_ps(outbuf + 20*64, s32); + _mm256_store_ps(outbuf + 21*64, s33); + _mm256_store_ps(outbuf + 22*64, s34); + _mm256_store_ps(outbuf + 23*64, s35); + } + } + else + { + for (int atom_id = 0; atom_id < winoNatomF32; atom_id++, + outbuf += winoAtomF32) + { + __m256 s00 = _mm256_set1_ps(0.f), s01 = s00, s02 = s00; + __m256 s10 = _mm256_set1_ps(0.f), s11 = s00, s12 = s00; + __m256 s20 = _mm256_set1_ps(0.f), s21 = s00, s22 = s00; + __m256 s30 = _mm256_set1_ps(0.f), s31 = s00, s32 = s00; + for (int c = 0; c < Cg; c++, inwptr += winoIblock*winoAtomF32, + wptr += winoKblock*winoAtomF32) { + __m256 w0 = _mm256_load_ps(wptr), w1 = _mm256_load_ps(wptr + 8); + __m256 w2 = _mm256_load_ps(wptr + 16), w3 = _mm256_load_ps(wptr + 24); + __m256 x0, x1, x2; + x0 = _mm256_load_ps(inwptr); + x1 = _mm256_load_ps(inwptr + 8); + x2 = _mm256_load_ps(inwptr + 16); + s00 = _mm256_fmadd_ps(w0, x0, s00); + s01 = _mm256_fmadd_ps(w0, x1, s01); + s02 = _mm256_fmadd_ps(w0, x2, s02); + s10 = _mm256_fmadd_ps(w1, x0, s10); + s11 = _mm256_fmadd_ps(w1, x1, s11); + s12 = _mm256_fmadd_ps(w1, x2, s12); + s20 = _mm256_fmadd_ps(w2, x0, s20); + s21 = _mm256_fmadd_ps(w2, x1, s21); + s22 = _mm256_fmadd_ps(w2, x2, s22); + s30 = _mm256_fmadd_ps(w3, x0, s30); + s31 = _mm256_fmadd_ps(w3, x1, s31); + s32 = _mm256_fmadd_ps(w3, x2, s32); + } + + _mm256_store_ps(outbuf, s00); + _mm256_store_ps(outbuf + 1*64, s01); + _mm256_store_ps(outbuf + 2*64, s02); + _mm256_store_ps(outbuf + 6*64, s10); + _mm256_store_ps(outbuf + 7*64, s11); + _mm256_store_ps(outbuf + 8*64, s12); + _mm256_store_ps(outbuf + 12*64, s20); + _mm256_store_ps(outbuf + 13*64, s21); + _mm256_store_ps(outbuf + 14*64, s22); + _mm256_store_ps(outbuf + 18*64, s30); + _mm256_store_ps(outbuf + 19*64, s31); + _mm256_store_ps(outbuf + 20*64, s32); + } + } + _mm256_zeroupper(); +} +static inline +void transpose8_ps(__m256 &row0, __m256 &row1, __m256 &row2, __m256 &row3, __m256 &row4, __m256 &row5, __m256 &row6, __m256 &row7) +{ + __m256 __t0, __t1, __t2, __t3, __t4, __t5, __t6, __t7; + __m256 __tt0, __tt1, __tt2, __tt3, __tt4, __tt5, __tt6, __tt7; + __t0 = _mm256_unpacklo_ps(row0, row1); + __t1 = _mm256_unpackhi_ps(row0, row1); + __t2 = _mm256_unpacklo_ps(row2, row3); + __t3 = _mm256_unpackhi_ps(row2, row3); + __t4 = _mm256_unpacklo_ps(row4, row5); + __t5 = _mm256_unpackhi_ps(row4, row5); + __t6 = _mm256_unpacklo_ps(row6, row7); + __t7 = _mm256_unpackhi_ps(row6, row7); + __tt0 = _mm256_shuffle_ps(__t0,__t2,_MM_SHUFFLE(1,0,1,0)); + __tt1 = _mm256_shuffle_ps(__t0,__t2,_MM_SHUFFLE(3,2,3,2)); + __tt2 = _mm256_shuffle_ps(__t1,__t3,_MM_SHUFFLE(1,0,1,0)); + __tt3 = _mm256_shuffle_ps(__t1,__t3,_MM_SHUFFLE(3,2,3,2)); + __tt4 = _mm256_shuffle_ps(__t4,__t6,_MM_SHUFFLE(1,0,1,0)); + __tt5 = _mm256_shuffle_ps(__t4,__t6,_MM_SHUFFLE(3,2,3,2)); + __tt6 = _mm256_shuffle_ps(__t5,__t7,_MM_SHUFFLE(1,0,1,0)); + __tt7 = _mm256_shuffle_ps(__t5,__t7,_MM_SHUFFLE(3,2,3,2)); + row0 = _mm256_permute2f128_ps(__tt0, __tt4, 0x20); + row1 = _mm256_permute2f128_ps(__tt1, __tt5, 0x20); + row2 = _mm256_permute2f128_ps(__tt2, __tt6, 0x20); + row3 = _mm256_permute2f128_ps(__tt3, __tt7, 0x20); + row4 = _mm256_permute2f128_ps(__tt0, __tt4, 0x31); + row5 = _mm256_permute2f128_ps(__tt1, __tt5, 0x31); + row6 = _mm256_permute2f128_ps(__tt2, __tt6, 0x31); + row7 = _mm256_permute2f128_ps(__tt3, __tt7, 0x31); +} + +/*Input transform*/ +void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, + float* outptr, int Cg, const int winoIblock, const int winoAtomF32) +{ + __m256 x00 = _mm256_loadu_ps(inptr); + __m256 x10 = _mm256_loadu_ps(inptr + inpstep); + __m256 x20 = _mm256_loadu_ps(inptr + inpstep*2); + __m256 x30 = _mm256_loadu_ps(inptr + inpstep*3); + __m256 x40 = _mm256_loadu_ps(inptr + inpstep*4); + __m256 x50 = _mm256_loadu_ps(inptr + inpstep*5); + __m256 x60 = _mm256_loadu_ps(inptr + inpstep*6); + __m256 x70 = _mm256_loadu_ps(inptr + inpstep*7); + + __m256 z00, z10, z20, z30, z40, z50, z60, z70; + + { + /* Y[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*X */ + /* Y[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*X */ + __m256 q5_25 = _mm256_set1_ps(5.25f), t00, t10; + t00 = _mm256_sub_ps(x40, x20); + t10 = _mm256_sub_ps(x30, x50); + + __m256 y00 = _mm256_fmadd_ps(t00, q5_25, _mm256_sub_ps(x00, x60)); + __m256 y70 = _mm256_fmadd_ps(t10, q5_25, _mm256_sub_ps(x70, x10)); + + /* Y[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*X */ + /* Y[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*X */ + __m256 qm4_25 = _mm256_set1_ps(-4.25f); + t00 = _mm256_fmadd_ps(x30, qm4_25, _mm256_add_ps(x10, x50)); + t10 = _mm256_fmadd_ps(x40, qm4_25, _mm256_add_ps(x20, x60)); + + __m256 y10 = _mm256_add_ps(t00, t10); + __m256 y20 = _mm256_sub_ps(t10, t00); + + /* Y[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*X */ + /* Y[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*X */ + __m256 q0_5 = _mm256_set1_ps(0.5f), q0_25 = _mm256_set1_ps(0.25f); + __m256 qm2_5 = _mm256_set1_ps(-2.5f), qm1_25 = _mm256_set1_ps(-1.25f); + t00 = _mm256_fmadd_ps(x10, q0_5, _mm256_add_ps(x50, x50)); + t10 = _mm256_fmadd_ps(x20, q0_25, x60); + t00 = _mm256_fmadd_ps(x30, qm2_5, t00); + t10 = _mm256_fmadd_ps(x40, qm1_25, t10); + + __m256 y30 = _mm256_add_ps(t00, t10); + __m256 y40 = _mm256_sub_ps(t10, t00); + + /* Y[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*X */ + /* Y[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*X */ + __m256 q4 = _mm256_set1_ps(4.f), qm5 = _mm256_set1_ps(-5.f); + t00 = _mm256_fmadd_ps(x50, q0_5, _mm256_add_ps(x10, x10)); + t10 = _mm256_fmadd_ps(x20, q4 , x60); + t00 = _mm256_fmadd_ps(x30, qm2_5, t00); + t10 = _mm256_fmadd_ps(x40, qm5 , t10); + + __m256 y50 = _mm256_add_ps(t00, t10); + __m256 y60 = _mm256_sub_ps(t10, t00); + + /* transpose 8x8 matrix in-place with some renumeration of the elements: */ + transpose8_ps(y00, y10, y20, y30, y40, y50, y60, y70); + + /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ + /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ + t00 = _mm256_sub_ps(y40, y20); + t10 = _mm256_sub_ps(y30, y50); + z00 = _mm256_fmadd_ps(t00, q5_25, _mm256_sub_ps(y00, y60)); + z70 = _mm256_fmadd_ps(t10, q5_25, _mm256_sub_ps(y70, y10)); + + /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ + /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ + t00 = _mm256_fmadd_ps(y30, qm4_25, _mm256_add_ps(y10, y50)); + t10 = _mm256_fmadd_ps(y40, qm4_25, _mm256_add_ps(y20, y60)); + z10 = _mm256_add_ps(t00, t10); + z20 = _mm256_sub_ps(t10, t00); + + /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ + /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ + t00 = _mm256_fmadd_ps(y10, q0_5, _mm256_add_ps(y50, y50)); + t10 = _mm256_fmadd_ps(y20, q0_25, y60); + t00 = _mm256_fmadd_ps(y30, qm2_5, t00); + t10 = _mm256_fmadd_ps(y40, qm1_25, t10); + + z30 = _mm256_add_ps(t00, t10); + z40 = _mm256_sub_ps(t10, t00); + + /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ + /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ + t00 = _mm256_fmadd_ps(y50, q0_5, _mm256_add_ps(y10, y10)); + t10 = _mm256_fmadd_ps(y20, q4, y60); + t00 = _mm256_fmadd_ps(y30, qm2_5, t00); + t10 = _mm256_fmadd_ps(y40, qm5, t10); + + z50 = _mm256_add_ps(t00, t10); + z60 = _mm256_sub_ps(t10, t00); + } + + const int outstep = winoIblock*winoAtomF32*Cg; + + _mm256_storeu_ps(outptr, z00); + _mm256_storeu_ps(outptr + outstep, z10); + _mm256_storeu_ps(outptr + outstep*2, z20); + _mm256_storeu_ps(outptr + outstep*3, z30); + _mm256_storeu_ps(outptr + outstep*4, z40); + _mm256_storeu_ps(outptr + outstep*5, z50); + _mm256_storeu_ps(outptr + outstep*6, z60); + _mm256_storeu_ps(outptr + outstep*7, z70); + _mm256_zeroupper(); +} + +#define STORE6_ELE_FROM_16(ptr, z00, lowM, highM) \ + lowM = _mm256_castps256_ps128(z00); \ + highM = _mm256_extractf128_ps(z00, 1); \ + _mm_storeu_ps(ptr, lowM); \ + _mm_storel_epi64((__m128i*)(ptr + 4), _mm_castps_si128(highM)) + +/* Inverse Winograd 8x8 transform: + out = (A'*inp*A)', where + inp is input 8x8 FP32 matrix, + A' is + [1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, + 0.f, 1.f, -1.f, 2.f, -2.f, 0.5f, -0.5f, 0.f, + 0.f, 1.f, 1.f, 4.f, 4.f, 0.25f, 0.25f, 0.f, + 0.f, 1.f, -1.f, 8.f, -8.f, 0.125f, -0.125f, 0.f, + 0.f, 1.f, 1.f, 16.f, 16.f, 1.f/16, 1.f/16, 0.f, + 0.f, 1.f, -1.f, 32.f, -32.f, 1.f/32, -1.f/32, 1.f] +*/ +void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, + float* bpptr, int bpstep, float* outptr, int outstep, + float bias, float minval, float maxval, bool ifMinMaxAct) +{ + + __m256 x00 = _mm256_load_ps(inptr); + __m256 x10 = _mm256_load_ps(inptr + inpstep); + __m256 x20 = _mm256_load_ps(inptr + inpstep*2); + __m256 x30 = _mm256_load_ps(inptr + inpstep*3); + __m256 x40 = _mm256_load_ps(inptr + inpstep*4); + __m256 x50 = _mm256_load_ps(inptr + inpstep*5); + __m256 x60 = _mm256_load_ps(inptr + inpstep*6); + __m256 x70 = _mm256_load_ps(inptr + inpstep*7); + __m256 z00, z10, z20, z30, z40, z50; + + { + __m256 s12_0, s34_0, s56_0; + s12_0 = _mm256_add_ps(x10, x20); + s34_0 = _mm256_add_ps(x30, x40); + s56_0 = _mm256_add_ps(x50, x60); + + __m256 y00 = _mm256_add_ps(x00, _mm256_add_ps(s12_0, _mm256_add_ps(s34_0, s56_0))); + __m256 y20 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.25f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(4.0f), s12_0)); + __m256 y40 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/16), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(16.0f), s12_0)); + + s12_0 = _mm256_sub_ps(x10, x20); + s34_0 = _mm256_sub_ps(x30, x40); + s56_0 = _mm256_sub_ps(x50, x60); + __m256 y50 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/32), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(32.f), _mm256_add_ps(x70, s12_0))); + __m256 y10 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.5f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(2.f), s12_0)); + __m256 y30 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.125f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(8.f), s12_0)); + __m256 y60 = _mm256_set1_ps(0.f), y70 = y60; + + /* transpose 8x8 matrix in-place with some renumeration of the elements: */ + + transpose8_ps(y00, y10, y20, y30, y40, y50, y60, y70); + + s12_0 = _mm256_add_ps(y10, y20); + s34_0 = _mm256_add_ps(y30, y40); + s56_0 = _mm256_add_ps(y50, y60); + + z00 = _mm256_add_ps(y00, _mm256_add_ps(s12_0, _mm256_add_ps(s34_0, s56_0))); + z20 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.25f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(4.0f), s12_0)); + z40 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/16), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(16.0f), s12_0)); + + s12_0 = _mm256_sub_ps(y10, y20); + s34_0 = _mm256_sub_ps(y30, y40); + s56_0 = _mm256_sub_ps(y50, y60); + + z50 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/32), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(32.0f), _mm256_add_ps(y70, s12_0))); + z10 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.5f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(2.0f), s12_0)); + z30 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.125f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(8.0f), s12_0)); + + __m256 vbias = _mm256_set1_ps(bias); + z00 = _mm256_add_ps(vbias, z00); + z10 = _mm256_add_ps(vbias, z10); + z20 = _mm256_add_ps(vbias, z20); + z30 = _mm256_add_ps(vbias, z30); + z40 = _mm256_add_ps(vbias, z40); + z50 = _mm256_add_ps(vbias, z50); + } + + if (bpptr) + { + z00 = _mm256_add_ps(z00, _mm256_loadu_ps(bpptr)); + z10 = _mm256_add_ps(z10, _mm256_loadu_ps(bpptr + bpstep)); + z20 = _mm256_add_ps(z20, _mm256_loadu_ps(bpptr + bpstep*2)); + z30 = _mm256_add_ps(z30, _mm256_loadu_ps(bpptr + bpstep*3)); + z40 = _mm256_add_ps(z40, _mm256_loadu_ps(bpptr + bpstep*4)); + z50 = _mm256_add_ps(z50, _mm256_loadu_ps(bpptr + bpstep*5)); + } + + if (ifMinMaxAct) + { + __m256 vmax = _mm256_set1_ps(maxval); + __m256 vmin = _mm256_set1_ps(minval); + + z00 = _mm256_min_ps(_mm256_max_ps(z00, vmin), vmax); + z10 = _mm256_min_ps(_mm256_max_ps(z10, vmin), vmax); + z20 = _mm256_min_ps(_mm256_max_ps(z20, vmin), vmax); + z30 = _mm256_min_ps(_mm256_max_ps(z30, vmin), vmax); + z40 = _mm256_min_ps(_mm256_max_ps(z40, vmin), vmax); + z50 = _mm256_min_ps(_mm256_max_ps(z50, vmin), vmax); + } + + __m128 lowM, highM; + STORE6_ELE_FROM_16(outptr, z00, lowM, highM); + STORE6_ELE_FROM_16(outptr + outstep, z10, lowM, highM); + STORE6_ELE_FROM_16(outptr + outstep * 2, z20, lowM, highM); + STORE6_ELE_FROM_16(outptr + outstep * 3, z30, lowM, highM); + STORE6_ELE_FROM_16(outptr + outstep * 4, z40, lowM, highM); + STORE6_ELE_FROM_16(outptr + outstep * 5, z50, lowM, highM); + _mm256_zeroupper(); +} +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +CV_CPU_OPTIMIZATION_NAMESPACE_END + +// NEON code work around. +namespace opt_NEON +{ + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_NEON && CV_NEON_AARCH64 +/* Accumulate */ +void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock, + const int winoIblock, const int winoKblock, const int winoAtomF32, const int winoNatomF32); + +/*Input transform*/ +void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, + float* outptr, int Cg, const int winoIblock, const int winoAtomF32); + +/*Output transform*/ +void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, + float* bpptr, int bpstep, float* outptr, int outstep, + float bias, float minval, float maxval, bool ifMinMaxAct); + +void winofunc_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock, + const int winoIblock, const int winoKblock, const int winoAtomF32, const int winoNatomF32) +{ + CV_Assert(winoIblock == 6 && winoKblock == 4 && winoAtomF32 == 4); + if (iblock > 3) + { + for (int atom_id = 0; atom_id < winoNatomF32; atom_id++, + outbuf += winoAtomF32) + { + float32x4_t s00 = vdupq_n_f32(0.f), s01 = s00, s02 = s00, s03 = s00, s04 = s00, s05 = s00; + float32x4_t s10 = vdupq_n_f32(0.f), s11 = s00, s12 = s00, s13 = s00, s14 = s00, s15 = s00; + float32x4_t s20 = vdupq_n_f32(0.f), s21 = s00, s22 = s00, s23 = s00, s24 = s00, s25 = s00; + float32x4_t s30 = vdupq_n_f32(0.f), s31 = s00, s32 = s00, s33 = s00, s34 = s00, s35 = s00; + for (int c = 0; c < Cg; c++, inwptr += winoIblock*winoAtomF32, + wptr += winoKblock*winoAtomF32) { + float32x4_t w0 = vld1q_f32(wptr), w1 = vld1q_f32(wptr + 4); + float32x4_t w2 = vld1q_f32(wptr + 8), w3 = vld1q_f32(wptr + 12); + float32x4_t x0, x1; + x0 = vld1q_f32(inwptr); + x1 = vld1q_f32(inwptr + 4); + s00 = vfmaq_f32(s00, w0, x0); + s01 = vfmaq_f32(s01, w0, x1); + s10 = vfmaq_f32(s10, w1, x0); + s11 = vfmaq_f32(s11, w1, x1); + s20 = vfmaq_f32(s20, w2, x0); + s21 = vfmaq_f32(s21, w2, x1); + s30 = vfmaq_f32(s30, w3, x0); + s31 = vfmaq_f32(s31, w3, x1); + x0 = vld1q_f32(inwptr + 8); + x1 = vld1q_f32(inwptr + 12); + s02 = vfmaq_f32(s02, w0, x0); + s03 = vfmaq_f32(s03, w0, x1); + s12 = vfmaq_f32(s12, w1, x0); + s13 = vfmaq_f32(s13, w1, x1); + s22 = vfmaq_f32(s22, w2, x0); + s23 = vfmaq_f32(s23, w2, x1); + s32 = vfmaq_f32(s32, w3, x0); + s33 = vfmaq_f32(s33, w3, x1); + x0 = vld1q_f32(inwptr + 16); + x1 = vld1q_f32(inwptr + 20); + s04 = vfmaq_f32(s04, w0, x0); + s05 = vfmaq_f32(s05, w0, x1); + s14 = vfmaq_f32(s14, w1, x0); + s15 = vfmaq_f32(s15, w1, x1); + s24 = vfmaq_f32(s24, w2, x0); + s25 = vfmaq_f32(s25, w2, x1); + s34 = vfmaq_f32(s34, w3, x0); + s35 = vfmaq_f32(s35, w3, x1); + } + + vst1q_f32(outbuf, s00); + vst1q_f32(outbuf + 1*64, s01); + vst1q_f32(outbuf + 2*64, s02); + vst1q_f32(outbuf + 3*64, s03); + vst1q_f32(outbuf + 4*64, s04); + vst1q_f32(outbuf + 5*64, s05); + + vst1q_f32(outbuf + 6*64, s10); + vst1q_f32(outbuf + 7*64, s11); + vst1q_f32(outbuf + 8*64, s12); + vst1q_f32(outbuf + 9*64, s13); + vst1q_f32(outbuf + 10*64, s14); + vst1q_f32(outbuf + 11*64, s15); + + vst1q_f32(outbuf + 12*64, s20); + vst1q_f32(outbuf + 13*64, s21); + vst1q_f32(outbuf + 14*64, s22); + vst1q_f32(outbuf + 15*64, s23); + vst1q_f32(outbuf + 16*64, s24); + vst1q_f32(outbuf + 17*64, s25); + + vst1q_f32(outbuf + 18*64, s30); + vst1q_f32(outbuf + 19*64, s31); + vst1q_f32(outbuf + 20*64, s32); + vst1q_f32(outbuf + 21*64, s33); + vst1q_f32(outbuf + 22*64, s34); + vst1q_f32(outbuf + 23*64, s35); + } + } + else + { + for (int atom_id = 0; atom_id < winoNatomF32; atom_id++, + outbuf += winoAtomF32) + { + float32x4_t s00 = vdupq_n_f32(0.f), s01 = s00, s02 = s00; + float32x4_t s10 = vdupq_n_f32(0.f), s11 = s00, s12 = s00; + float32x4_t s20 = vdupq_n_f32(0.f), s21 = s00, s22 = s00; + float32x4_t s30 = vdupq_n_f32(0.f), s31 = s00, s32 = s00; + for (int c = 0; c < Cg; c++, inwptr += winoIblock*winoAtomF32, + wptr += winoKblock*winoAtomF32) { + float32x4_t w0 = vld1q_f32(wptr), w1 = vld1q_f32(wptr + 4); + float32x4_t w2 = vld1q_f32(wptr + 8), w3 = vld1q_f32(wptr + 12); + float32x4_t x0, x1, x2; + x0 = vld1q_f32(inwptr); + x1 = vld1q_f32(inwptr + 4); + x2 = vld1q_f32(inwptr + 8); + s00 = vfmaq_f32(s00, w0, x0); + s01 = vfmaq_f32(s01, w0, x1); + s02 = vfmaq_f32(s02, w0, x2); + s10 = vfmaq_f32(s10, w1, x0); + s11 = vfmaq_f32(s11, w1, x1); + s12 = vfmaq_f32(s12, w1, x2); + s20 = vfmaq_f32(s20, w2, x0); + s21 = vfmaq_f32(s21, w2, x1); + s22 = vfmaq_f32(s22, w2, x2); + s30 = vfmaq_f32(s30, w3, x0); + s31 = vfmaq_f32(s31, w3, x1); + s32 = vfmaq_f32(s32, w3, x2); + } + + vst1q_f32(outbuf, s00); + vst1q_f32(outbuf + 1*64, s01); + vst1q_f32(outbuf + 2*64, s02); + vst1q_f32(outbuf + 6*64, s10); + vst1q_f32(outbuf + 7*64, s11); + vst1q_f32(outbuf + 8*64, s12); + vst1q_f32(outbuf + 12*64, s20); + vst1q_f32(outbuf + 13*64, s21); + vst1q_f32(outbuf + 14*64, s22); + vst1q_f32(outbuf + 18*64, s30); + vst1q_f32(outbuf + 19*64, s31); + vst1q_f32(outbuf + 20*64, s32); + } + } +} + +#define T4x4(a, b, c, d, tr0, tr1) \ + tr0 = vtrnq_f32(a, b); \ + tr1 = vtrnq_f32(c, d); \ + a = vcombine_f32(vget_low_f32(tr0.val[0]), vget_low_f32(tr1.val[0])); \ + b = vcombine_f32(vget_low_f32(tr0.val[1]), vget_low_f32(tr1.val[1])); \ + c = vcombine_f32(vget_high_f32(tr0.val[0]), vget_high_f32(tr1.val[0])); \ + d = vcombine_f32(vget_high_f32(tr0.val[1]), vget_high_f32(tr1.val[1])) + +/*Input transform*/ +void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, + float* outptr, int Cg, const int winoIblock, const int winoAtomF32) +{ + float32x4_t x00 = vld1q_f32(inptr), x01 = vld1q_f32(inptr + 4); + float32x4_t x10 = vld1q_f32(inptr + inpstep), x11 = vld1q_f32(inptr + inpstep + 4); + float32x4_t x20 = vld1q_f32(inptr + inpstep*2), x21 = vld1q_f32(inptr + inpstep*2 + 4); + float32x4_t x30 = vld1q_f32(inptr + inpstep*3), x31 = vld1q_f32(inptr + inpstep*3 + 4); + float32x4_t x40 = vld1q_f32(inptr + inpstep*4), x41 = vld1q_f32(inptr + inpstep*4 + 4); + float32x4_t x50 = vld1q_f32(inptr + inpstep*5), x51 = vld1q_f32(inptr + inpstep*5 + 4); + float32x4_t x60 = vld1q_f32(inptr + inpstep*6), x61 = vld1q_f32(inptr + inpstep*6 + 4); + float32x4_t x70 = vld1q_f32(inptr + inpstep*7), x71 = vld1q_f32(inptr + inpstep*7 + 4); + + float32x4_t z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51, z60, z61, z70, z71; + + { + /* Y[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*X */ + /* Y[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*X */ + float32x4_t q5_25 = vdupq_n_f32(5.25f), t00, t01, t10, t11; + t00 = vsubq_f32(x40, x20); + t01 = vsubq_f32(x41, x21); + t10 = vsubq_f32(x30, x50); + t11 = vsubq_f32(x31, x51); + float32x4_t y00 = vfmaq_f32(vsubq_f32(x00, x60), t00, q5_25); + float32x4_t y01 = vfmaq_f32(vsubq_f32(x01, x61), t01, q5_25); + float32x4_t y70 = vfmaq_f32(vsubq_f32(x70, x10), t10, q5_25); + float32x4_t y71 = vfmaq_f32(vsubq_f32(x71, x11), t11, q5_25); + + /* Y[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*X */ + /* Y[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*X */ + float32x4_t qm4_25 = vdupq_n_f32(-4.25f); + t00 = vfmaq_f32(vaddq_f32(x10, x50), x30, qm4_25); + t01 = vfmaq_f32(vaddq_f32(x11, x51), x31, qm4_25); + t10 = vfmaq_f32(vaddq_f32(x20, x60), x40, qm4_25); + t11 = vfmaq_f32(vaddq_f32(x21, x61), x41, qm4_25); + + float32x4_t y10 = vaddq_f32(t00, t10), y11 = vaddq_f32(t01, t11); + float32x4_t y20 = vsubq_f32(t10, t00), y21 = vsubq_f32(t11, t01); + + /* Y[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*X */ + /* Y[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*X */ + float32x4_t q0_5 = vdupq_n_f32(0.5f), q0_25 = vdupq_n_f32(0.25f); + float32x4_t qm2_5 = vdupq_n_f32(-2.5f), qm1_25 = vdupq_n_f32(-1.25f); + t00 = vfmaq_f32(vaddq_f32(x50, x50), x10, q0_5); + t01 = vfmaq_f32(vaddq_f32(x51, x51), x11, q0_5); + t10 = vfmaq_f32(x60, x20, q0_25); + t11 = vfmaq_f32(x61, x21, q0_25); + t00 = vfmaq_f32(t00, x30, qm2_5); + t01 = vfmaq_f32(t01, x31, qm2_5); + t10 = vfmaq_f32(t10, x40, qm1_25); + t11 = vfmaq_f32(t11, x41, qm1_25); + + float32x4_t y30 = vaddq_f32(t00, t10), y31 = vaddq_f32(t01, t11); + float32x4_t y40 = vsubq_f32(t10, t00), y41 = vsubq_f32(t11, t01); + + /* Y[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*X */ + /* Y[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*X */ + float32x4_t q4 = vdupq_n_f32(4.f), qm5 = vdupq_n_f32(-5.f); + t00 = vfmaq_f32(vaddq_f32(x10, x10), x50, q0_5); + t01 = vfmaq_f32(vaddq_f32(x11, x11), x51, q0_5); + t10 = vfmaq_f32(x60, x20, q4); + t11 = vfmaq_f32(x61, x21, q4); + t00 = vfmaq_f32(t00, x30, qm2_5); + t01 = vfmaq_f32(t01, x31, qm2_5); + t10 = vfmaq_f32(t10, x40, qm5); + t11 = vfmaq_f32(t11, x41, qm5); + + float32x4_t y50 = vaddq_f32(t00, t10), y51 = vaddq_f32(t01, t11); + float32x4_t y60 = vsubq_f32(t10, t00), y61 = vsubq_f32(t11, t01); + + /* transpose 8x8 matrix in-place with some renumeration of the elements: */ + /* Y: */ + /* y00 y01 */ + /* y10 y11 */ + /* ... */ + /* y70 y71 */ + /* Y': */ + /* y00 y40 */ + /* y10 y50 */ + /* y20 y60 */ + /* y30 y70 */ + /* y01 y41 */ + /* y11 y51 */ + /* y21 y61 */ + /* y31 y71 */ + /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ + float32x4x2_t tr0, tr1; + + T4x4(y00, y10, y20, y30, tr0, tr1); + T4x4(y01, y11, y21, y31, tr0, tr1); + T4x4(y40, y50, y60, y70, tr0, tr1); + T4x4(y41, y51, y61, y71, tr0, tr1); + + /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ + /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ + t00 = vsubq_f32(y01, y20); + t01 = vsubq_f32(y41, y60); + t10 = vsubq_f32(y30, y11); + t11 = vsubq_f32(y70, y51); + z00 = vfmaq_f32(vsubq_f32(y00, y21), t00, q5_25); + z01 = vfmaq_f32(vsubq_f32(y40, y61), t01, q5_25); + z70 = vfmaq_f32(vsubq_f32(y31, y10), t10, q5_25); + z71 = vfmaq_f32(vsubq_f32(y71, y50), t11, q5_25); + + /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ + /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ + t00 = vfmaq_f32(vaddq_f32(y10, y11), y30, qm4_25); + t01 = vfmaq_f32(vaddq_f32(y50, y51), y70, qm4_25); + t10 = vfmaq_f32(vaddq_f32(y20, y21), y01, qm4_25); + t11 = vfmaq_f32(vaddq_f32(y60, y61), y41, qm4_25); + + z10 = vaddq_f32(t00, t10); z11 = vaddq_f32(t01, t11); + z20 = vsubq_f32(t10, t00); z21 = vsubq_f32(t11, t01); + + /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ + /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ + t00 = vfmaq_f32(vaddq_f32(y11, y11), y10, q0_5); + t01 = vfmaq_f32(vaddq_f32(y51, y51), y50, q0_5); + t10 = vfmaq_f32(y21, y20, q0_25); + t11 = vfmaq_f32(y61, y60, q0_25); + t00 = vfmaq_f32(t00, y30, qm2_5); + t01 = vfmaq_f32(t01, y70, qm2_5); + t10 = vfmaq_f32(t10, y01, qm1_25); + t11 = vfmaq_f32(t11, y41, qm1_25); + + z30 = vaddq_f32(t00, t10); z31 = vaddq_f32(t01, t11); + z40 = vsubq_f32(t10, t00); z41 = vsubq_f32(t11, t01); + + /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ + /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ + t00 = vfmaq_f32(vaddq_f32(y10, y10), y11, q0_5); + t01 = vfmaq_f32(vaddq_f32(y50, y50), y51, q0_5); + t10 = vfmaq_f32(y21, y20, q4); + t11 = vfmaq_f32(y61, y60, q4); + t00 = vfmaq_f32(t00, y30, qm2_5); + t01 = vfmaq_f32(t01, y70, qm2_5); + t10 = vfmaq_f32(t10, y01, qm5); + t11 = vfmaq_f32(t11, y41, qm5); + + z50 = vaddq_f32(t00, t10); z51 = vaddq_f32(t01, t11); + z60 = vsubq_f32(t10, t00); z61 = vsubq_f32(t11, t01); + } + + const int outstep = winoIblock*winoAtomF32*Cg; + + vst1q_f32(outptr, z00); + vst1q_f32(outptr + outstep, z01); + vst1q_f32(outptr + outstep*2, z10); + vst1q_f32(outptr + outstep*3, z11); + vst1q_f32(outptr + outstep*4, z20); + vst1q_f32(outptr + outstep*5, z21); + vst1q_f32(outptr + outstep*6, z30); + vst1q_f32(outptr + outstep*7, z31); + vst1q_f32(outptr + outstep*8, z40); + vst1q_f32(outptr + outstep*9, z41); + vst1q_f32(outptr + outstep*10, z50); + vst1q_f32(outptr + outstep*11, z51); + vst1q_f32(outptr + outstep*12, z60); + vst1q_f32(outptr + outstep*13, z61); + vst1q_f32(outptr + outstep*14, z70); + vst1q_f32(outptr + outstep*15, z71); +} + +/*Output transform*/ +void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, + float* bpptr, int bpstep, float* outptr, int outstep, + float bias, float minval, float maxval, bool ifMinMaxAct) +{ + float32x4_t x00 = vld1q_f32(inptr), x01 = vld1q_f32(inptr + 4); + float32x4_t x10 = vld1q_f32(inptr + inpstep), x11 = vld1q_f32(inptr + inpstep + 4); + float32x4_t x20 = vld1q_f32(inptr + inpstep*2), x21 = vld1q_f32(inptr + inpstep*2 + 4); + float32x4_t x30 = vld1q_f32(inptr + inpstep*3), x31 = vld1q_f32(inptr + inpstep*3 + 4); + float32x4_t x40 = vld1q_f32(inptr + inpstep*4), x41 = vld1q_f32(inptr + inpstep*4 + 4); + float32x4_t x50 = vld1q_f32(inptr + inpstep*5), x51 = vld1q_f32(inptr + inpstep*5 + 4); + float32x4_t x60 = vld1q_f32(inptr + inpstep*6), x61 = vld1q_f32(inptr + inpstep*6 + 4); + float32x4_t x70 = vld1q_f32(inptr + inpstep*7), x71 = vld1q_f32(inptr + inpstep*7 + 4); + float32x4_t z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51; + + { + float32x4_t s12_0, s12_1, s34_0, s34_1, s56_0, s56_1; + s12_0 = vaddq_f32(x10, x20); s12_1 = vaddq_f32(x11, x21); + s34_0 = vaddq_f32(x30, x40); s34_1 = vaddq_f32(x31, x41); + s56_0 = vaddq_f32(x50, x60); s56_1 = vaddq_f32(x51, x61); + + float32x4_t y00 = vaddq_f32(vaddq_f32(vaddq_f32(x00, s12_0), s34_0), s56_0); + float32x4_t y01 = vaddq_f32(vaddq_f32(vaddq_f32(x01, s12_1), s34_1), s56_1); + float32x4_t y20 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 4.0f), s56_0, 0.25f); + float32x4_t y21 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 4.0f), s56_1, 0.25f); + float32x4_t y40 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 16.0f), s56_0, 1.f/16); + float32x4_t y41 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 16.0f), s56_1, 1.f/16); + + s12_0 = vsubq_f32(x10, x20); s12_1 = vsubq_f32(x11, x21); + s34_0 = vsubq_f32(x30, x40); s34_1 = vsubq_f32(x31, x41); + s56_0 = vsubq_f32(x50, x60); s56_1 = vsubq_f32(x51, x61); + + float32x4_t y50 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(x70, s12_0), + s34_0, 32.f), s56_0, 1.f/32); + float32x4_t y51 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(x71, s12_1), + s34_1, 32.f), s56_1, 1.f/32); + float32x4_t y10 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 2.0f), s56_0, 0.5f); + float32x4_t y11 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 2.0f), s56_1, 0.5f); + float32x4_t y30 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 8.0f), s56_0, 0.125f); + float32x4_t y31 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 8.0f), s56_1, 0.125f); + float32x4_t y60 = vdupq_n_f32(0.f), y61 = y60, y70 = y60, y71 = y60; + + /* transpose 8x8 matrix in-place with some renumeration of the elements: */ + /* Y: */ + /* y00 y01 */ + /* y10 y11 */ + /* ... */ + /* y50 y51 */ + /* 0 0 */ + /* 0 0 */ + /* Y': */ + /* y00 y40 */ + /* y10 y50 */ + /* y20 y60 */ + /* y30 y70 */ + /* y01 y41 */ + /* y11 y51 */ + /* y21 y61 */ + /* y31 y71 */ + /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ + float32x4x2_t tr0, tr1; + + T4x4(y00, y10, y20, y30, tr0, tr1); + T4x4(y01, y11, y21, y31, tr0, tr1); + T4x4(y40, y50, y60, y70, tr0, tr1); + T4x4(y41, y51, y61, y71, tr0, tr1); + + s12_0 = vaddq_f32(y10, y20); s12_1 = vaddq_f32(y50, y60); + s34_0 = vaddq_f32(y30, y01); s34_1 = vaddq_f32(y70, y41); + s56_0 = vaddq_f32(y11, y21); s56_1 = vaddq_f32(y51, y61); + + z00 = vaddq_f32(vaddq_f32(vaddq_f32(y00, s12_0), s34_0), s56_0); + z01 = vaddq_f32(vaddq_f32(vaddq_f32(y40, s12_1), s34_1), s56_1); + z20 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 4.0f), s56_0, 0.25f); + z21 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 4.0f), s56_1, 0.25f); + z40 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 16.0f), s56_0, 1.f/16); + z41 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 16.0f), s56_1, 1.f/16); + + s12_0 = vsubq_f32(y10, y20); s12_1 = vsubq_f32(y50, y60); + s34_0 = vsubq_f32(y30, y01); s34_1 = vsubq_f32(y70, y41); + s56_0 = vsubq_f32(y11, y21); s56_1 = vsubq_f32(y51, y61); + + z50 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(y31, s12_0), + s34_0, 32.f), s56_0, 1.f/32); + z51 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(y71, s12_1), + s34_1, 32.f), s56_1, 1.f/32); + z10 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 2.0f), s56_0, 0.5f); + z11 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 2.0f), s56_1, 0.5f); + z30 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 8.0f), s56_0, 0.125f); + z31 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 8.0f), s56_1, 0.125f); + float32x4_t vbias = vdupq_n_f32(bias); + + z00 = vaddq_f32(z00, vbias); + z01 = vaddq_f32(z01, vbias); + z10 = vaddq_f32(z10, vbias); + z11 = vaddq_f32(z11, vbias); + z20 = vaddq_f32(z20, vbias); + z21 = vaddq_f32(z21, vbias); + z30 = vaddq_f32(z30, vbias); + z31 = vaddq_f32(z31, vbias); + z40 = vaddq_f32(z40, vbias); + z41 = vaddq_f32(z41, vbias); + z50 = vaddq_f32(z50, vbias); + z51 = vaddq_f32(z51, vbias); + } + + if (bpptr) + { + float32x2_t zhalf = vdup_n_f32(0.f); + z00 = vaddq_f32(z00, vld1q_f32(bpptr)); + z01 = vaddq_f32(z01, vcombine_f32(vld1_f32(bpptr + 4), zhalf)); + z10 = vaddq_f32(z10, vld1q_f32(bpptr + bpstep)); + z11 = vaddq_f32(z11, vcombine_f32(vld1_f32(bpptr + bpstep + 4), zhalf)); + z20 = vaddq_f32(z20, vld1q_f32(bpptr + bpstep*2)); + z21 = vaddq_f32(z21, vcombine_f32(vld1_f32(bpptr + bpstep*2 + 4), zhalf)); + z30 = vaddq_f32(z30, vld1q_f32(bpptr + bpstep*3)); + z31 = vaddq_f32(z31, vcombine_f32(vld1_f32(bpptr + bpstep*3 + 4), zhalf)); + z40 = vaddq_f32(z40, vld1q_f32(bpptr + bpstep*4)); + z41 = vaddq_f32(z41, vcombine_f32(vld1_f32(bpptr + bpstep*4 + 4), zhalf)); + z50 = vaddq_f32(z50, vld1q_f32(bpptr + bpstep*5)); + z51 = vaddq_f32(z51, vcombine_f32(vld1_f32(bpptr + bpstep*5 + 4), zhalf)); + } + + if (ifMinMaxAct) + { + float32x4_t vmax = vdupq_n_f32(maxval); + float32x4_t vmin = vdupq_n_f32(minval); + + z00 = vminq_f32(vmaxq_f32(z00, vmin), vmax); + z01 = vminq_f32(vmaxq_f32(z01, vmin), vmax); + z10 = vminq_f32(vmaxq_f32(z10, vmin), vmax); + z11 = vminq_f32(vmaxq_f32(z11, vmin), vmax); + z20 = vminq_f32(vmaxq_f32(z20, vmin), vmax); + z21 = vminq_f32(vmaxq_f32(z21, vmin), vmax); + z30 = vminq_f32(vmaxq_f32(z30, vmin), vmax); + z31 = vminq_f32(vmaxq_f32(z31, vmin), vmax); + z40 = vminq_f32(vmaxq_f32(z40, vmin), vmax); + z41 = vminq_f32(vmaxq_f32(z41, vmin), vmax); + z50 = vminq_f32(vmaxq_f32(z50, vmin), vmax); + z51 = vminq_f32(vmaxq_f32(z51, vmin), vmax); + } + + vst1q_f32(outptr, z00); + vst1_f32(outptr + 4, vget_low_f32(z01)); + vst1q_f32(outptr + outstep, z10); + vst1_f32(outptr + outstep + 4, vget_low_f32(z11)); + vst1q_f32(outptr + outstep*2, z20); + vst1_f32(outptr + outstep*2 + 4, vget_low_f32(z21)); + vst1q_f32(outptr + outstep*3, z30); + vst1_f32(outptr + outstep*3 + 4, vget_low_f32(z31)); + vst1q_f32(outptr + outstep*4, z40); + vst1_f32(outptr + outstep*4 + 4, vget_low_f32(z41)); + vst1q_f32(outptr + outstep*5, z50); + vst1_f32(outptr + outstep*5 + 4, vget_low_f32(z51)); +} + +#endif +} + +}} // namespace diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp b/modules/dnn/src/layers/cpu_kernels/convolution.cpp similarity index 73% rename from modules/dnn/src/layers/fast_convolution/fast_convolution.cpp rename to modules/dnn/src/layers/cpu_kernels/convolution.cpp index 51abf8facc..0f0da11ec7 100644 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.cpp @@ -10,11 +10,19 @@ */ #include "../../precomp.hpp" -#include "fast_convolution.hpp" -#include "fast_convolution.simd.hpp" +#include "convolution.hpp" + +#include "conv_block.simd.hpp" +#include "layers/cpu_kernels/conv_block.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content namespace cv { namespace dnn { enum { VEC_ALIGN = 32, DFT_TYPE = CV_32F }; // Memory alignment. + +void convBlock(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int outLen, + const int convMR, const int convNR); +void convBlockMR1(int np, const float* a, const float* b, float *c, const float bias, bool init_c, + const float minval, const float maxval, bool ifMinMaxAct, const int outLen, const int convNR); + Ptr initFastConv( InputArray _weightsMat, float* srcBias, @@ -94,21 +102,15 @@ Ptr initFastConv( } } - conv->conv_type = ifRunDepthWise && conv_dim != CONV_3D ? _FX_CONV_TYPE_DEPTHWISE : + conv->conv_type = ifRunDepthWise && conv_dim != CONV_3D ? CONV_TYPE_DEPTHWISE : useWinograd && (conv_dim == CONV_2D && (conv->useSIMD128 || conv->useAVX2 || conv->useNEON) && Hk == 3 && Wk == 3 && dilation_h == 1 && dilation_w == 1 && stride_h == 1 && stride_w == 1) ? - _FX_CONV_TYPE_WINOGRAD3X3 : - (ifRunDepthWiseRemain ? _FX_CONV_TYPE_DEPTHWISE_REMAIN : _FX_CONV_TYPE_GENERIC); + CONV_TYPE_WINOGRAD3X3 : + (ifRunDepthWiseRemain ? CONV_TYPE_DEPTHWISE_REMAIN : CONV_TYPE_GENERIC); #if !(CV_NEON || CV_SIMD128 || CV_TRY_AVX2) - if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // Disabel Winograd when CV_NEON, CV_SIMD128 and CV_TRY_AVX2 are not available. - conv->conv_type = _FX_CONV_TYPE_GENERIC; -#endif - -#if CV_TRY_AVX2 - // Disabel Winograd when CV_TRY_AVX2 is true, but conv->useAVX2 is false. - if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3 && !conv->useAVX2) - conv->conv_type = _FX_CONV_TYPE_GENERIC; + if (conv->conv_type == CONV_TYPE_WINOGRAD3X3) // Disabel Winograd when CV_NEON, CV_SIMD128 and CV_TRY_AVX2 are not available. + conv->conv_type = CONV_TYPE_GENERIC; #endif Mat weightsMat = _weightsMat.getMat(); @@ -116,7 +118,7 @@ Ptr initFastConv( const size_t wstep = weightsMat.step1(); float *srcWeights = (float *)weightsMat.data; - if (conv->conv_type == _FX_CONV_TYPE_DEPTHWISE || conv->conv_type == _FX_CONV_TYPE_DEPTHWISE_REMAIN) + if (conv->conv_type == CONV_TYPE_DEPTHWISE || conv->conv_type == CONV_TYPE_DEPTHWISE_REMAIN) { // Handle the Conv1D, Conv2D and Conv3D depth-wise. // for depth-wise convolutions on NCHW data we just preserve the weights in KCHW layout, @@ -138,7 +140,7 @@ Ptr initFastConv( weightsBufPtr[c*padded_ksize + k] = srcWeights[c*wstep + k]; }}); } - else if(conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // winograd + else if(conv->conv_type == CONV_TYPE_WINOGRAD3X3) // winograd { static const float ktm[8][3] = { {1.0f, 0.0f, 0.0f}, @@ -156,24 +158,24 @@ Ptr initFastConv( // where W is the size of Winograd-transformed kernel (8x8), // ATOM_SIZE is number of lanes in SIMD register (4 for NEON and FP32), // KBLOCK is some platform-dependent constant dependent on the number of SIMD registers. - int ksize = _FX_WINO_KSIZE * _FX_WINO_KSIZE; + int ksize = CONV_WINO_KSIZE * CONV_WINO_KSIZE; int Cg = C/ngroups; int Kg = K/ngroups; - int Kg_nblocks = (Kg + _FX_WINO_KBLOCK - 1)/_FX_WINO_KBLOCK; - size_t nweights = ngroups*Kg_nblocks*Cg*_FX_WINO_KBLOCK*_FX_WINO_AREA; + int Kg_nblocks = (Kg + CONV_WINO_KBLOCK - 1)/CONV_WINO_KBLOCK; + size_t nweights = ngroups*Kg_nblocks*Cg*CONV_WINO_KBLOCK*CONV_WINO_AREA; conv->weightsWinoBuf.reserve(nweights + VEC_ALIGN); conv->weightsWinoBufPtr = alignPtr(conv->weightsWinoBuf.data(), VEC_ALIGN); float* wptrWino = conv->weightsWinoBufPtr; memset(wptrWino, 0, nweights * sizeof(wptrWino[0])); parallel_for_(Range(0, K), [&](const Range& r0){ - float kernelTm[_FX_WINO_AREA]; + float kernelTm[CONV_WINO_AREA]; for (int k = r0.start; k < r0.end; k++) { int g = k / Kg; int k_ = k - g*Kg; - int ki = k_ / _FX_WINO_KBLOCK; - int dk = k_ - ki*_FX_WINO_KBLOCK; + int ki = k_ / CONV_WINO_KBLOCK; + int dk = k_ - ki*CONV_WINO_KBLOCK; for (int c = 0; c < Cg; c++) { @@ -204,18 +206,18 @@ Ptr initFastConv( } // repack the data. - float* wptr = wptrWino + (g*Kg_nblocks + ki) * Cg *_FX_WINO_KBLOCK*_FX_WINO_AREA + - (c*_FX_WINO_KBLOCK + dk)*_FX_WINO_ATOM_F32; - for (int i = 0; i < _FX_WINO_NATOMS_F32; i++, - wptr += Cg * _FX_WINO_KBLOCK * _FX_WINO_ATOM_F32) + float* wptr = wptrWino + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA + + (c*CONV_WINO_KBLOCK + dk)*CONV_WINO_ATOM_F32; + for (int i = 0; i < CONV_WINO_NATOMS_F32; i++, + wptr += Cg * CONV_WINO_KBLOCK * CONV_WINO_ATOM_F32) { - CV_Assert(conv->weightsWinoBufPtr <= wptr && wptr + _FX_WINO_ATOM_F32 <= conv->weightsWinoBufPtr + nweights); - memcpy(wptr, kernelTm + i * _FX_WINO_ATOM_F32, _FX_WINO_ATOM_F32*sizeof (wptr[0])); + CV_Assert(conv->weightsWinoBufPtr <= wptr && wptr + CONV_WINO_ATOM_F32 <= conv->weightsWinoBufPtr + nweights); + memcpy(wptr, kernelTm + i * CONV_WINO_ATOM_F32, CONV_WINO_ATOM_F32*sizeof (wptr[0])); } } }}); } - else if (conv->conv_type == _FX_CONV_TYPE_GENERIC) + else if (conv->conv_type == CONV_TYPE_GENERIC) { // The weights are packed as // ngroups x (ceil((K/ngroups)/CONV_MR)*CONV_MR) x (Cg*Hk*Wk*Dk) x CONV_MR tensor @@ -372,7 +374,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co fusedAddMat = _output.getMat(); } - if (conv->conv_type == _FX_CONV_TYPE_DEPTHWISE) + if (conv->conv_type == CONV_TYPE_DEPTHWISE) { // Depthwise-Convolution layer should not be followed by Add layer. CV_Assert((conv_dim == CONV_1D || conv_dim == CONV_2D)); @@ -420,7 +422,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co else activ = nullptr; - if (conv->conv_type == _FX_CONV_TYPE_WINOGRAD3X3) // winograd + if (conv->conv_type == CONV_TYPE_WINOGRAD3X3) // winograd { CV_Assert(conv->weightsWinoBufPtr && input.dims == 4 && conv_dim == CONV_2D); if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct)) @@ -454,8 +456,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co int dilation_d = conv->dilation_d, dilation_h = conv->dilation_h, dilation_w = conv->dilation_w; int ksize = Dk*Hk*Wk; - bool fast_1x1 = ksize == 1 && stride_d == 1 && stride_w == 1 && stride_h == 1 && - pad_front == 0 && pad_top == 0 && pad_left == 0; + bool fast_1x1 = ksize == 1 && stride_d == 1 && stride_w == 1 && stride_h == 1; int DkHkWkCg = Dk*Hk*Wk*Cg; std::vector ofstab_(Hk*Wk*Dk*4, 0); @@ -504,14 +505,14 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co int MAX_STRIPES = (56 + CONV_NR - 1)/CONV_NR; // Friendly to L1 cache - const int K_BLOCK_SIZE = conv->conv_type == _FX_CONV_TYPE_DEPTHWISE_REMAIN ? 1 : 32; + const int K_BLOCK_SIZE = conv->conv_type == CONV_TYPE_DEPTHWISE_REMAIN ? 1 : 32; const int C_BLOCK_SIZE = 256; int Kg_nblocks = (Kg + CONV_MR-1)/CONV_MR, Kg_aligned = Kg_nblocks * CONV_MR; int stripes_per_sample = ((int)out_planesize + CONV_NR - 1) / CONV_NR; - if (stripes_per_sample < ntasks * 4 && conv->conv_type != _FX_CONV_TYPE_DEPTHWISE_REMAIN) + if (stripes_per_sample < ntasks * 4 && conv->conv_type != CONV_TYPE_DEPTHWISE_REMAIN) { MAX_STRIPES = 1; stripes_per_sample = 1; @@ -555,7 +556,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co int k0, k1; int zyx0, zyx_limit, zyx_block_limit = 0; - if (stripes_per_sample == 1 && conv->conv_type != _FX_CONV_TYPE_DEPTHWISE_REMAIN) + if (stripes_per_sample == 1 && conv->conv_type != CONV_TYPE_DEPTHWISE_REMAIN) { k0 = kzyx0 * CONV_MR; k1 = kzyx1 * CONV_MR; @@ -618,7 +619,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co } } } - else if (conv->conv_type == _FX_CONV_TYPE_DEPTHWISE_REMAIN) + else if (conv->conv_type == CONV_TYPE_DEPTHWISE_REMAIN) { CV_Assert(Cg == 1); const int HW0 = H0 * W0; @@ -928,7 +929,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co // spacial branch for depth-wise convolution implemented using generic convolution. // In this case, CONV_MR is 1, and CONV_NR is the same. - if (conv->conv_type == _FX_CONV_TYPE_DEPTHWISE_REMAIN) + if (conv->conv_type == CONV_TYPE_DEPTHWISE_REMAIN) { size_t outofs = (n * ngroups + g) * out_planesize + zyx0; float *cptr0 = cbuf_task; @@ -947,12 +948,8 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co memcpy(cptr0, cptr, outLen * sizeof(cptr[0])); cptr = cptr0; } -#if CV_TRY_AVX2 - if (conv->useAVX2 && outLen > CONV_NR/3) - opt_AVX2::convBlockMR1(DkHkWkCg, weights, inptr, cptr, biasVal, fusedAdd, minval, maxval, ifMinMaxAct); - else -#endif - convBlockMR1(DkHkWkCg, weights, inptr, cptr, biasVal, fusedAdd, minval, maxval, ifMinMaxAct, outLen); + + convBlockMR1(DkHkWkCg, weights, inptr, cptr, biasVal, fusedAdd, minval, maxval, ifMinMaxAct, outLen, CONV_NR); if (ifBuffer) { @@ -980,7 +977,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co { const int outLen = std::min(out_width - stripe * CONV_NR, CONV_NR); -#if CV_TRY_AVX2 || CV_TRY_NEON +#if CV_TRY_AVX || CV_TRY_AVX2 || CV_NEON // The possible CONV_NR is 28, 24, 12, so the possible CONV_NR/3 is 9, 8, 4. bool runOpt = outLen > std::min(8, CONV_NR/3); #endif @@ -992,16 +989,21 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co { #if CV_TRY_AVX2 if (conv->useAVX2 && runOpt) - opt_AVX2::convBlock_AVX2(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0); + opt_AVX2::convBlock(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0, CONV_MR, CONV_NR); else #endif -#if CV_TRY_NEON +#if CV_TRY_AVX + if (conv->useAVX && runOpt) + opt_AVX::convBlock(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0, CONV_MR, CONV_NR); + else +#endif +#if CV_NEON if (conv->useNEON && runOpt) - opt_NEON::convBlock_NEON(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0); + opt_NEON::convBlock(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0, CONV_MR, CONV_NR); else #endif // The possible outLen range is 24 or 8~1. - convBlock(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0, outLen); + convBlock(c1 - c0, wptr, inptr, cptr, ldc, c0 == 0, outLen, CONV_MR, CONV_NR); } } } @@ -1087,4 +1089,466 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co } }); } + + +/****************************************************************************************\ + SIMD and no-SIMD code for convBlock +\****************************************************************************************/ + +static void convBlockMR1NoSIMD(int np, const float* a, const float* b, float *c, const float bias, bool init_c, + const float minval, const float maxval, bool ifMinMaxAct, const int outLen, const int convNR) +{ + std::vector cbuffer(outLen, 0); + float* cbuf = cbuffer.data(); + for( int p = 0; p < np; p++ ) + { + float ai = a[p]; + for( int j = 0; j < outLen; j++ ) + cbuf[j] += b[convNR*p + j] * ai; + } + + if (init_c) + { + for(int j = 0; j < outLen; j++) + { + c[j] += cbuf[j] + bias; + if (ifMinMaxAct) + c[j] = std::min(std::max(c[j], minval), maxval); + } + } + else + { + for(int j = 0; j < outLen; j++) + { + c[j] = cbuf[j] + bias; + if (ifMinMaxAct) + c[j] = std::min(std::max(c[j], minval), maxval); + } + } +} + +#if CV_SIMD128 +static void convBlockMR1x28(int np, const float* a, const float* b, float *c, const float bias, bool init_c, + const float minval, const float maxval, bool ifMinMaxAct, const int outLen, const int convNR) +{ + CV_Assert(convNR == 28); + v_float32x4 c0 = v_setall_f32(bias), c1 = c0, c2 = c0; + v_float32x4 c3 = c0, c4 = c0, c5 = c0; + v_float32x4 c6 = c0; + + for (int p = 0; p < np; p++, a++, b += convNR) + { + v_float32x4 a0 = v_setall_f32(a[0]); + v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8); + v_float32x4 b3 = v_load(b + 12), b4 = v_load(b + 16), b5 = v_load(b + 20); + v_float32x4 b6 = v_load(b + 24); + + c0 = v_fma(b0, a0, c0); + c1 = v_fma(b1, a0, c1); + c2 = v_fma(b2, a0, c2); + c3 = v_fma(b3, a0, c3); + c4 = v_fma(b4, a0, c4); + c5 = v_fma(b5, a0, c5); + c6 = v_fma(b6, a0, c6); + } + + if (init_c) + { + c0 += v_load(c); + c1 += v_load(c + 4); + c2 += v_load(c + 8); + c3 += v_load(c + 12); + c4 += v_load(c + 16); + c5 += v_load(c + 20); + c6 += v_load(c + 24); + } + + if (ifMinMaxAct) + { + v_float32x4 vmax = v_setall_f32(maxval), vmin = v_setall_f32(minval); + c0 = v_min(v_max(c0, vmin), vmax); + c1 = v_min(v_max(c1, vmin), vmax); + c2 = v_min(v_max(c2, vmin), vmax); + c3 = v_min(v_max(c3, vmin), vmax); + c4 = v_min(v_max(c4, vmin), vmax); + c5 = v_min(v_max(c5, vmin), vmax); + c6 = v_min(v_max(c6, vmin), vmax); + } + + v_store(c, c0); + v_store(c + 4, c1); + v_store(c + 8, c2); + v_store(c + 12, c3); + v_store(c + 16, c4); + v_store(c + 20, c5); + v_store(c + 24, c6); +} + +static void convBlockMR1x24(int np, const float* a, const float* b, float *c, const float bias, bool init_c, + const float minval, const float maxval, bool ifMinMaxAct, const int outLen, const int convNR) +{ + CV_Assert(convNR == 24); + v_float32x4 c0 = v_setall_f32(bias), c1 = c0, c2 = c0; + v_float32x4 c3 = c0, c4 = c0, c5 = c0; + + for (int p = 0; p < np; p++, a++, b += convNR) + { + v_float32x4 a0 = v_setall_f32(a[0]); + v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8); + v_float32x4 b3 = v_load(b + 12), b4 = v_load(b + 16), b5 = v_load(b + 20); + + c0 = v_fma(b0, a0, c0); + c1 = v_fma(b1, a0, c1); + c2 = v_fma(b2, a0, c2); + c3 = v_fma(b3, a0, c3); + c4 = v_fma(b4, a0, c4); + c5 = v_fma(b5, a0, c5); + } + + if (init_c) + { + c0 += v_load(c); + c1 += v_load(c + 4); + c2 += v_load(c + 8); + c3 += v_load(c + 12); + c4 += v_load(c + 16); + c5 += v_load(c + 20); + } + + if (ifMinMaxAct) + { + v_float32x4 vmax = v_setall_f32(maxval), vmin = v_setall_f32(minval); + c0 = v_min(v_max(c0, vmin), vmax); + c1 = v_min(v_max(c1, vmin), vmax); + c2 = v_min(v_max(c2, vmin), vmax); + c3 = v_min(v_max(c3, vmin), vmax); + c4 = v_min(v_max(c4, vmin), vmax); + c5 = v_min(v_max(c5, vmin), vmax); + } + + v_store(c, c0); + v_store(c + 4, c1); + v_store(c + 8, c2); + v_store(c + 12, c3); + v_store(c + 16, c4); + v_store(c + 20, c5); +} + +static void convBlockMR1x12(int np, const float* a, const float* b, float *c, const float bias, bool init_c, + const float minval, const float maxval, bool ifMinMaxAct, const int outLen, const int convNR) +{ + CV_Assert(convNR == 12); + v_float32x4 c0 = v_setall_f32(bias), c1 = c0, c2 = c0; + for (int p = 0; p < np; p++, a++, b += convNR) + { + v_float32x4 a0 = v_setall_f32(a[0]); + v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8); + + c0 = v_fma(b0, a0, c0); + c1 = v_fma(b1, a0, c1); + c2 = v_fma(b2, a0, c2); + } + + if (init_c) + { + c0 += v_load(c); + c1 += v_load(c + 4); + c2 += v_load(c + 8); + } + + if (ifMinMaxAct) + { + v_float32x4 vmax = v_setall_f32(maxval), vmin = v_setall_f32(minval); + c0 = v_min(v_max(c0, vmin), vmax); + c1 = v_min(v_max(c1, vmin), vmax); + c2 = v_min(v_max(c2, vmin), vmax); + } + + v_store(c, c0); + v_store(c + 4, c1); + v_store(c + 8, c2); +} +#endif + +void convBlockMR1(int np, const float* a, const float* b, float *c, const float bias, bool init_c, + const float minval, const float maxval, bool ifMinMaxAct, const int outLen, const int convNR) +{ +#if CV_SIMD128 + // The outLen represents the valid output value in CONV_NR length. + // When outLen is very small, we use the no-SIMD branch. + const int convNRby3 = convNR/3; + if (outLen > convNRby3) + { + if (convNR == 28) + convBlockMR1x28(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen, convNR); + else if (convNR == 24) + convBlockMR1x24(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen, convNR); + else if (convNR == 12) + convBlockMR1x12(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen, convNR); + else + convBlockMR1NoSIMD(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen, convNR); + } + else + convBlockMR1NoSIMD(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen, convNR); +#else + convBlockMR1NoSIMD(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen, convNR); +#endif +} + +#if CV_SIMD128 +static void convBlock4x24(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int convMR, const int convNR) +{ + v_float32x4 c0 = v_setzero_f32(), c1 = c0, c2 = c0, c3 = c0, c4 = c0, c5 = c0; + v_float32x4 c6 = v_setzero_f32(), c7 = c6, c8 = c6, c9 = c6, c10 = c6, c11 = c6; + v_float32x4 c12 = v_setzero_f32(), c13 = c12, c14 = c12, c15 = c12, c16 = c12, c17 = c12; + v_float32x4 c18 = v_setzero_f32(), c19 = c18, c20 = c18, c21 = c18, c22 = c18, c23 = c18; + + for (int p = 0; p < np; p++, a += convMR, b += convNR) + { + v_float32x4 a0 = v_setall_f32(a[0]); + v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8); + v_float32x4 b3 = v_load(b + 12), b4 = v_load(b + 16), b5 = v_load(b + 20); + + c0 = v_fma(b0, a0, c0); + c1 = v_fma(b1, a0, c1); + c2 = v_fma(b2, a0, c2); + c3 = v_fma(b3, a0, c3); + c4 = v_fma(b4, a0, c4); + c5 = v_fma(b5, a0, c5); + + a0 = v_setall_f32(a[1]); + c6 = v_fma(b0, a0, c6); + c7 = v_fma(b1, a0, c7); + c8 = v_fma(b2, a0, c8); + c9 = v_fma(b3, a0, c9); + c10 = v_fma(b4, a0, c10); + c11 = v_fma(b5, a0, c11); + + a0 = v_setall_f32(a[2]); + c12 = v_fma(b0, a0, c12); + c13 = v_fma(b1, a0, c13); + c14 = v_fma(b2, a0, c14); + c15 = v_fma(b3, a0, c15); + c16 = v_fma(b4, a0, c16); + c17 = v_fma(b5, a0, c17); + + a0 = v_setall_f32(a[3]); + c18 = v_fma(b0, a0, c18); + c19 = v_fma(b1, a0, c19); + c20 = v_fma(b2, a0, c20); + c21 = v_fma(b3, a0, c21); + c22 = v_fma(b4, a0, c22); + c23 = v_fma(b5, a0, c23); + } + + if (!init_c) + { + c0 += v_load(c); + c1 += v_load(c + 4); + c2 += v_load(c + 8); + c3 += v_load(c + 12); + c4 += v_load(c + 16); + c5 += v_load(c + 20); + + c6 += v_load(c + ldc); + c7 += v_load(c + ldc + 4); + c8 += v_load(c + ldc + 8); + c9 += v_load(c + ldc + 12); + c10 += v_load(c + ldc + 16); + c11 += v_load(c + ldc + 20); + + c12 += v_load(c + ldc*2); + c13 += v_load(c + ldc*2 + 4); + c14 += v_load(c + ldc*2 + 8); + c15 += v_load(c + ldc*2 + 12); + c16 += v_load(c + ldc*2 + 16); + c17 += v_load(c + ldc*2 + 20); + + c18 += v_load(c + ldc*3); + c19 += v_load(c + ldc*3 + 4); + c20 += v_load(c + ldc*3 + 8); + c21 += v_load(c + ldc*3 + 12); + c22 += v_load(c + ldc*3 + 16); + c23 += v_load(c + ldc*3 + 20); + } + + v_store(c, c0); + v_store(c + 4, c1); + v_store(c + 8, c2); + v_store(c + 12, c3); + v_store(c + 16, c4); + v_store(c + 20, c5); + + v_store(c + ldc, c6); + v_store(c + ldc + 4, c7); + v_store(c + ldc + 8, c8); + v_store(c + ldc + 12, c9); + v_store(c + ldc + 16, c10); + v_store(c + ldc + 20, c11); + + v_store(c + ldc * 2, c12); + v_store(c + ldc * 2 + 4, c13); + v_store(c + ldc * 2 + 8, c14); + v_store(c + ldc * 2 + 12, c15); + v_store(c + ldc * 2 + 16, c16); + v_store(c + ldc * 2 + 20, c17); + + v_store(c + ldc * 3, c18); + v_store(c + ldc * 3 + 4, c19); + v_store(c + ldc * 3 + 8, c20); + v_store(c + ldc * 3 + 12, c21); + v_store(c + ldc * 3 + 16, c22); + v_store(c + ldc * 3 + 20, c23); +} + +static void convBlock4x8(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int convMR, const int convNR) +{ + CV_Assert(convNR >= 4); + v_float32x4 c0 = v_setzero_f32(), c1 = c0, c2 = c0, c3 = c0; + v_float32x4 c4 = c0, c5 = c0, c6 = c0, c7 = c0; + + for (int p = 0; p < np; p++, a += convMR, b += convNR) + { + v_float32x4 a0 = v_setall_f32(a[0]); + v_float32x4 a1 = v_setall_f32(a[1]); + v_float32x4 a2 = v_setall_f32(a[2]); + v_float32x4 a3 = v_setall_f32(a[3]); + + v_float32x4 b0 = v_load(b), b1 = v_load(b + 4); + + c0 = v_fma(b0, a0, c0); + c1 = v_fma(b1, a0, c1); + + c2 = v_fma(b0, a1, c2); + c3 = v_fma(b1, a1, c3); + + c4 = v_fma(b0, a2, c4); + c5 = v_fma(b1, a2, c5); + + c6 = v_fma(b0, a3, c6); + c7 = v_fma(b1, a3, c7); + } + + if (!init_c) + { + c0 += v_load(c); + c1 += v_load(c + 4); + + c2 += v_load(c + ldc); + c3 += v_load(c + ldc + 4); + + c4 += v_load(c + ldc*2); + c5 += v_load(c + ldc*2 + 4); + + c6 += v_load(c + ldc*3); + c7 += v_load(c + ldc*3 + 4); + } + + v_store(c, c0); + v_store(c + 4, c1); + v_store(c + ldc, c2); + v_store(c + ldc + 4, c3); + v_store(c + ldc * 2, c4); + v_store(c + ldc * 2 + 4, c5); + v_store(c + ldc * 3, c6); + v_store(c + ldc * 3 + 4, c7); +} + +static void convBlock4x4(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int convMR, const int convNR) +{ + CV_Assert(convNR >= 4); + v_float32x4 c0 = v_setzero_f32(), c1 = c0, c2 = c0, c3 = c0; + + for (int p = 0; p < np; p++, a += convMR, b += convNR) + { + v_float32x4 a0 = v_setall_f32(a[0]); + v_float32x4 a1 = v_setall_f32(a[1]); + v_float32x4 a2 = v_setall_f32(a[2]); + v_float32x4 a3 = v_setall_f32(a[3]); + + v_float32x4 b0 = v_load(b); + + c0 = v_fma(b0, a0, c0); + c1 = v_fma(b0, a1, c1); + c2 = v_fma(b0, a2, c2); + c3 = v_fma(b0, a3, c3); + } + + if (!init_c) + { + c0 += v_load(c); + c1 += v_load(c + ldc); + c2 += v_load(c + ldc*2); + c3 += v_load(c + ldc*3); + } + + v_store(c, c0); + v_store(c + ldc, c1); + v_store(c + ldc * 2, c2); + v_store(c + ldc * 3, c3); +} +#endif + +static void convBlockNoSIMD(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int outLen, + const int convMR, const int convNR) +{ + std::vector cbuffer(convMR * outLen, 0); + float* cbuf = cbuffer.data(); + for( int p = 0; p < np; p++ ) + { + for( int i = 0; i < convMR; i++ ) + { + float ai = a[convMR*p + i]; + for( int j = 0; j < outLen; j++ ) + cbuf[i * outLen+j] += b[convNR*p + j] * ai; + } + } + + if (!init_c) + { + for(int i = 0; i < convMR; i++) + { + for(int j = 0; j < outLen; j++) + c[i*ldc + j] += cbuf[i*outLen + j]; + } + } + else + { + for(int i = 0; i < convMR; i++) + { + for(int j = 0; j < outLen; j++) + c[i*ldc + j] = cbuf[i*outLen + j]; + } + } +} + +void convBlock(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int outLen, + const int convMR, const int convNR) +{ + // The possible outLen range is [24, 8~1]. +#if CV_SIMD128 + CV_Assert(convMR == 4); + if (outLen > 8 && convNR == 24) + { + convBlock4x24(np, a, b, c, ldc, init_c, convMR, convNR); + return; + } + + if (outLen <= 8 && outLen > 4) + { + convBlock4x8(np, a, b, c, ldc, init_c, convMR, convNR); + return; + } + + if (outLen <= 4 && outLen > 1) + { + convBlock4x4(np, a, b, c, ldc, init_c, convMR, convNR); + return; + } + convBlockNoSIMD(np, a, b, c, ldc, init_c, outLen, convMR, convNR); +#else + convBlockNoSIMD(np, a, b, c, ldc, init_c, outLen, convMR, convNR); +#endif +} + }} // namespace cv::dnn diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp similarity index 69% rename from modules/dnn/src/layers/fast_convolution/fast_convolution.hpp rename to modules/dnn/src/layers/cpu_kernels/convolution.hpp index 7794078bb4..0a077bf800 100644 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.hpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.hpp @@ -22,27 +22,29 @@ // Winograd Params enum { - _FX_WINO_STEP=6, - _FX_WINO_KSIZE=3, - _FX_WINO_SIZE=_FX_WINO_STEP+_FX_WINO_KSIZE-1, - _FX_WINO_AREA=_FX_WINO_SIZE*_FX_WINO_SIZE, + CONV_WINO_STEP=6, + CONV_WINO_KSIZE=3, + CONV_WINO_SIZE=CONV_WINO_STEP+CONV_WINO_KSIZE-1, // 8 + CONV_WINO_AREA=CONV_WINO_SIZE*CONV_WINO_SIZE, - _FX_WINO_KBLOCK = 4, + CONV_WINO_KBLOCK = 4, #if (CV_NEON && CV_NEON_AARCH64) || CV_TRY_AVX2 - _FX_WINO_IBLOCK = 6, + CONV_WINO_IBLOCK = 6, #else - _FX_WINO_IBLOCK = 3, + CONV_WINO_IBLOCK = 3, #endif #if CV_TRY_AVX2 - _FX_WINO_ATOM_F32 = 8, + CONV_WINO_ATOM_F32 = 8, #else - _FX_WINO_ATOM_F32 = 4, + CONV_WINO_ATOM_F32 = 4, #endif - _FX_WINO_NATOMS_F32 = _FX_WINO_AREA / _FX_WINO_ATOM_F32, // for AVX2, it is 8, otherwise, it's 16. + CONV_WINO_NATOMS_F32 = CONV_WINO_AREA / CONV_WINO_ATOM_F32, // for AVX2, it is 8, otherwise, it's 16. }; -enum { _FX_CONV_TYPE_GENERIC=0, _FX_CONV_TYPE_DEPTHWISE=1, _FX_CONV_TYPE_WINOGRAD3X3=2, _FX_CONV_TYPE_DEPTHWISE_REMAIN=3 }; + +// NOTE that: CONV_TYPE_DEPTHWISE is for 3x3 depthwise conv, and others depthwise will be set as CONV_TYPE_DEPTHWISE_REMAIN. +enum { CONV_TYPE_GENERIC=0, CONV_TYPE_DEPTHWISE=1, CONV_TYPE_WINOGRAD3X3=2, CONV_TYPE_DEPTHWISE_REMAIN=3 }; enum { CONV_1D = 0, CONV_2D = 1, CONV_3D = 2 }; #endif @@ -105,22 +107,6 @@ void runDepthwise(InputArray _input, OutputArray _output, const Ptr& c int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct); -namespace opt_AVX2 -{ -#if CV_TRY_AVX2 -void convBlock_AVX2(int np, const float* a, const float* b, float* c, int ldc, bool init_c); - -void convBlockMR1(int np, const float* a, const float* b, float *c, const float bias, bool init_c, const float minval, - const float maxval, bool ifMinMaxAct); - -void _fx_winograd_accum_f32(const float* inwptr, const float* wptr, float* outbuf, int Cg, int iblock); -void _fx_winograd_BtXB_8x8_f32(const float* inptr, int inpstep, float* outptr, int Cg); -void _fx_winograd_AtXA_8x8_f32(const float* inptr, int inpstep, float* bpptr, int bpstep, float* outptr, int outstep, - float bias, float minval, float maxval, bool ifMinMaxAct); - -#endif -} // namespace opt_AVX2 - } // namespace dnn } // namespace cv diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp deleted file mode 100644 index c98fbe72bd..0000000000 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.avx2.cpp +++ /dev/null @@ -1,499 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#include "../../precomp.hpp" -#include "fast_convolution.hpp" - -namespace cv { -namespace dnn { -namespace opt_AVX2 -{ -#if CV_TRY_AVX2 -void convBlockMR1(int np, const float* a, const float* b, float *c, const float bias, bool init_c, - const float minval, const float maxval, bool ifMinMaxAct) -{ -#if CONV_NR == 24 - __m256 c0 = _mm256_set1_ps(bias), c1 = c0, c2 = c0; - - for (int p = 0; p < np; p++, a++, b += CONV_NR) - { - __m256 a0 = _mm256_set1_ps(a[0]); - __m256 b0 = _mm256_loadu_ps(b), b1 = _mm256_loadu_ps(b + 8), b2 = _mm256_loadu_ps(b + 16); - - c0 = _mm256_fmadd_ps(b0, a0, c0); - c1 = _mm256_fmadd_ps(b1, a0, c1); - c2 = _mm256_fmadd_ps(b2, a0, c2); - } - - if (init_c) - { - c0 = _mm256_add_ps(_mm256_loadu_ps(c), c0); - c1 = _mm256_add_ps(_mm256_loadu_ps(c + 8), c1); - c2 = _mm256_add_ps(_mm256_loadu_ps(c + 16), c2); - } - - if (ifMinMaxAct) - { - __m256 vmax = _mm256_set1_ps(maxval); - __m256 vmin = _mm256_set1_ps(minval); - - c0 = _mm256_min_ps(_mm256_max_ps(c0, vmin), vmax); - c1 = _mm256_min_ps(_mm256_max_ps(c1, vmin), vmax); - c2 = _mm256_min_ps(_mm256_max_ps(c2, vmin), vmax); - } - - _mm256_storeu_ps(c, c0); - _mm256_storeu_ps(c + 8, c1); - _mm256_storeu_ps(c + 16, c2); - _mm256_zeroupper(); -#else -#error "unsupported CONV_NR in convBlockMR1." -#endif -} - -void convBlock_AVX2(int np, const float* a, const float* b, float* c, int ldc, bool init_c) -{ -#if CONV_MR == 4 && CONV_NR == 24 - __m256 c00 = _mm256_set1_ps(0.f), c01 = c00, c02 = c00; - __m256 c10 = c00, c11 = c00, c12 = c00; - __m256 c20 = c00, c21 = c00, c22 = c00; - __m256 c30 = c00, c31 = c00, c32 = c00; - - __m256 a0 = _mm256_setzero_ps(), a1 = _mm256_setzero_ps(); - __m256 b0 = _mm256_setzero_ps(), b1 = _mm256_setzero_ps(), b2 = _mm256_setzero_ps(); - - for (int p = 0; p < np; p++, a += CONV_MR, b += CONV_NR) - { - a0 = _mm256_set1_ps(a[0]), a1 = _mm256_set1_ps(a[1]); - b0 = _mm256_load_ps(b), b1 = _mm256_load_ps(b + 8), b2 = _mm256_load_ps(b + 16); - - c00 = _mm256_fmadd_ps(b0, a0, c00); - c01 = _mm256_fmadd_ps(b1, a0, c01); - c02 = _mm256_fmadd_ps(b2, a0, c02); - - c10 = _mm256_fmadd_ps(b0, a1, c10); - c11 = _mm256_fmadd_ps(b1, a1, c11); - c12 = _mm256_fmadd_ps(b2, a1, c12); - - a0 = _mm256_set1_ps(a[2]), a1 = _mm256_set1_ps(a[3]); - - c20 = _mm256_fmadd_ps(b0, a0, c20); - c21 = _mm256_fmadd_ps(b1, a0, c21); - c22 = _mm256_fmadd_ps(b2, a0, c22); - - c30 = _mm256_fmadd_ps(b0, a1, c30); - c31 = _mm256_fmadd_ps(b1, a1, c31); - c32 = _mm256_fmadd_ps(b2, a1, c32); - } - - if (!init_c) - { - c00 = _mm256_add_ps(c00, _mm256_load_ps(c)); - c01 = _mm256_add_ps(c01, _mm256_load_ps(c + 8)); - c02 = _mm256_add_ps(c02, _mm256_load_ps(c + 16)); - - c10 = _mm256_add_ps(c10, _mm256_load_ps(c + ldc)); - c11 = _mm256_add_ps(c11, _mm256_load_ps(c + ldc + 8)); - c12 = _mm256_add_ps(c12, _mm256_load_ps(c + ldc + 16)); - - c20 = _mm256_add_ps(c20, _mm256_load_ps(c + ldc*2)); - c21 = _mm256_add_ps(c21, _mm256_load_ps(c + ldc*2 + 8)); - c22 = _mm256_add_ps(c22, _mm256_load_ps(c + ldc*2 + 16)); - - c30 = _mm256_add_ps(c30, _mm256_load_ps(c + ldc*3)); - c31 = _mm256_add_ps(c31, _mm256_load_ps(c + ldc*3 + 8)); - c32 = _mm256_add_ps(c32, _mm256_load_ps(c + ldc*3 + 16)); - } - - _mm256_storeu_ps(c, c00), _mm256_storeu_ps(c+8, c01), _mm256_storeu_ps(c+16, c02); - _mm256_storeu_ps(c + ldc, c10), _mm256_storeu_ps(c + ldc + 8, c11), _mm256_storeu_ps(c + ldc + 16, c12); - _mm256_storeu_ps(c + ldc*2, c20), _mm256_storeu_ps(c + ldc*2 + 8, c21), _mm256_storeu_ps(c + ldc*2 + 16, c22); - _mm256_storeu_ps(c + ldc*3, c30), _mm256_storeu_ps(c + ldc*3 + 8, c31), _mm256_storeu_ps(c + ldc*3 + 16, c32); - _mm256_zeroupper(); -#else -#error "unsupported CONV_MR and/or CONV_NR in convBlock_AVX2." -#endif -} - -void _fx_winograd_accum_f32(const float* inwptr, const float* wptr, - float* outbuf, int Cg, int iblock) -{ - CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 8); - if (iblock > 3) - { - for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, - outbuf += _FX_WINO_ATOM_F32) - { - __m256 s00 = _mm256_set1_ps(0.f), s01 = s00, s02 = s00, s03 = s00, s04 = s00, s05 = s00; - __m256 s10 = _mm256_set1_ps(0.f), s11 = s00, s12 = s00, s13 = s00, s14 = s00, s15 = s00; - __m256 s20 = _mm256_set1_ps(0.f), s21 = s00, s22 = s00, s23 = s00, s24 = s00, s25 = s00; - __m256 s30 = _mm256_set1_ps(0.f), s31 = s00, s32 = s00, s33 = s00, s34 = s00, s35 = s00; - for (int c = 0; c < Cg; c++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32, - wptr += _FX_WINO_KBLOCK*_FX_WINO_ATOM_F32) - { - __m256 w0 = _mm256_load_ps(wptr), w1 = _mm256_load_ps(wptr + 8); - __m256 w2 = _mm256_load_ps(wptr + 16), w3 = _mm256_load_ps(wptr + 24); - __m256 x0, x1; - x0 = _mm256_load_ps(inwptr); - x1 = _mm256_load_ps(inwptr + 8); - s00 = _mm256_fmadd_ps(w0, x0, s00); - s01 = _mm256_fmadd_ps(w0, x1, s01); - s10 = _mm256_fmadd_ps(w1, x0, s10); - s11 = _mm256_fmadd_ps(w1, x1, s11); - s20 = _mm256_fmadd_ps(w2, x0, s20); - s21 = _mm256_fmadd_ps(w2, x1, s21); - s30 = _mm256_fmadd_ps(w3, x0, s30); - s31 = _mm256_fmadd_ps(w3, x1, s31); - x0 = _mm256_load_ps(inwptr + 16); - x1 = _mm256_load_ps(inwptr + 24); - s02 = _mm256_fmadd_ps(w0, x0, s02); - s03 = _mm256_fmadd_ps(w0, x1, s03); - s12 = _mm256_fmadd_ps(w1, x0, s12); - s13 = _mm256_fmadd_ps(w1, x1, s13); - s22 = _mm256_fmadd_ps(w2, x0, s22); - s23 = _mm256_fmadd_ps(w2, x1, s23); - s32 = _mm256_fmadd_ps(w3, x0, s32); - s33 = _mm256_fmadd_ps(w3, x1, s33); - x0 = _mm256_load_ps(inwptr + 32); - x1 = _mm256_load_ps(inwptr + 40); - s04 = _mm256_fmadd_ps(w0, x0, s04); - s05 = _mm256_fmadd_ps(w0, x1, s05); - s14 = _mm256_fmadd_ps(w1, x0, s14); - s15 = _mm256_fmadd_ps(w1, x1, s15); - s24 = _mm256_fmadd_ps(w2, x0, s24); - s25 = _mm256_fmadd_ps(w2, x1, s25); - s34 = _mm256_fmadd_ps(w3, x0, s34); - s35 = _mm256_fmadd_ps(w3, x1, s35); - } - - _mm256_store_ps(outbuf, s00); - _mm256_store_ps(outbuf + 1*64, s01); - _mm256_store_ps(outbuf + 2*64, s02); - _mm256_store_ps(outbuf + 3*64, s03); - _mm256_store_ps(outbuf + 4*64, s04); - _mm256_store_ps(outbuf + 5*64, s05); - - _mm256_store_ps(outbuf + 6*64, s10); - _mm256_store_ps(outbuf + 7*64, s11); - _mm256_store_ps(outbuf + 8*64, s12); - _mm256_store_ps(outbuf + 9*64, s13); - _mm256_store_ps(outbuf + 10*64, s14); - _mm256_store_ps(outbuf + 11*64, s15); - - _mm256_store_ps(outbuf + 12*64, s20); - _mm256_store_ps(outbuf + 13*64, s21); - _mm256_store_ps(outbuf + 14*64, s22); - _mm256_store_ps(outbuf + 15*64, s23); - _mm256_store_ps(outbuf + 16*64, s24); - _mm256_store_ps(outbuf + 17*64, s25); - - _mm256_store_ps(outbuf + 18*64, s30); - _mm256_store_ps(outbuf + 19*64, s31); - _mm256_store_ps(outbuf + 20*64, s32); - _mm256_store_ps(outbuf + 21*64, s33); - _mm256_store_ps(outbuf + 22*64, s34); - _mm256_store_ps(outbuf + 23*64, s35); - } - } - else - { - for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, - outbuf += _FX_WINO_ATOM_F32) - { - __m256 s00 = _mm256_set1_ps(0.f), s01 = s00, s02 = s00; - __m256 s10 = _mm256_set1_ps(0.f), s11 = s00, s12 = s00; - __m256 s20 = _mm256_set1_ps(0.f), s21 = s00, s22 = s00; - __m256 s30 = _mm256_set1_ps(0.f), s31 = s00, s32 = s00; - for (int c = 0; c < Cg; c++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32, - wptr += _FX_WINO_KBLOCK*_FX_WINO_ATOM_F32) { - __m256 w0 = _mm256_load_ps(wptr), w1 = _mm256_load_ps(wptr + 8); - __m256 w2 = _mm256_load_ps(wptr + 16), w3 = _mm256_load_ps(wptr + 24); - __m256 x0, x1, x2; - x0 = _mm256_load_ps(inwptr); - x1 = _mm256_load_ps(inwptr + 8); - x2 = _mm256_load_ps(inwptr + 16); - s00 = _mm256_fmadd_ps(w0, x0, s00); - s01 = _mm256_fmadd_ps(w0, x1, s01); - s02 = _mm256_fmadd_ps(w0, x2, s02); - s10 = _mm256_fmadd_ps(w1, x0, s10); - s11 = _mm256_fmadd_ps(w1, x1, s11); - s12 = _mm256_fmadd_ps(w1, x2, s12); - s20 = _mm256_fmadd_ps(w2, x0, s20); - s21 = _mm256_fmadd_ps(w2, x1, s21); - s22 = _mm256_fmadd_ps(w2, x2, s22); - s30 = _mm256_fmadd_ps(w3, x0, s30); - s31 = _mm256_fmadd_ps(w3, x1, s31); - s32 = _mm256_fmadd_ps(w3, x2, s32); - } - - _mm256_store_ps(outbuf, s00); - _mm256_store_ps(outbuf + 1*64, s01); - _mm256_store_ps(outbuf + 2*64, s02); - _mm256_store_ps(outbuf + 6*64, s10); - _mm256_store_ps(outbuf + 7*64, s11); - _mm256_store_ps(outbuf + 8*64, s12); - _mm256_store_ps(outbuf + 12*64, s20); - _mm256_store_ps(outbuf + 13*64, s21); - _mm256_store_ps(outbuf + 14*64, s22); - _mm256_store_ps(outbuf + 18*64, s30); - _mm256_store_ps(outbuf + 19*64, s31); - _mm256_store_ps(outbuf + 20*64, s32); - } - } - _mm256_zeroupper(); -} -static inline -void transpose8_ps(__m256 &row0, __m256 &row1, __m256 &row2, __m256 &row3, __m256 &row4, __m256 &row5, __m256 &row6, __m256 &row7) -{ - __m256 __t0, __t1, __t2, __t3, __t4, __t5, __t6, __t7; - __m256 __tt0, __tt1, __tt2, __tt3, __tt4, __tt5, __tt6, __tt7; - __t0 = _mm256_unpacklo_ps(row0, row1); - __t1 = _mm256_unpackhi_ps(row0, row1); - __t2 = _mm256_unpacklo_ps(row2, row3); - __t3 = _mm256_unpackhi_ps(row2, row3); - __t4 = _mm256_unpacklo_ps(row4, row5); - __t5 = _mm256_unpackhi_ps(row4, row5); - __t6 = _mm256_unpacklo_ps(row6, row7); - __t7 = _mm256_unpackhi_ps(row6, row7); - __tt0 = _mm256_shuffle_ps(__t0,__t2,_MM_SHUFFLE(1,0,1,0)); - __tt1 = _mm256_shuffle_ps(__t0,__t2,_MM_SHUFFLE(3,2,3,2)); - __tt2 = _mm256_shuffle_ps(__t1,__t3,_MM_SHUFFLE(1,0,1,0)); - __tt3 = _mm256_shuffle_ps(__t1,__t3,_MM_SHUFFLE(3,2,3,2)); - __tt4 = _mm256_shuffle_ps(__t4,__t6,_MM_SHUFFLE(1,0,1,0)); - __tt5 = _mm256_shuffle_ps(__t4,__t6,_MM_SHUFFLE(3,2,3,2)); - __tt6 = _mm256_shuffle_ps(__t5,__t7,_MM_SHUFFLE(1,0,1,0)); - __tt7 = _mm256_shuffle_ps(__t5,__t7,_MM_SHUFFLE(3,2,3,2)); - row0 = _mm256_permute2f128_ps(__tt0, __tt4, 0x20); - row1 = _mm256_permute2f128_ps(__tt1, __tt5, 0x20); - row2 = _mm256_permute2f128_ps(__tt2, __tt6, 0x20); - row3 = _mm256_permute2f128_ps(__tt3, __tt7, 0x20); - row4 = _mm256_permute2f128_ps(__tt0, __tt4, 0x31); - row5 = _mm256_permute2f128_ps(__tt1, __tt5, 0x31); - row6 = _mm256_permute2f128_ps(__tt2, __tt6, 0x31); - row7 = _mm256_permute2f128_ps(__tt3, __tt7, 0x31); -} - -/*Input transform*/ -void _fx_winograd_BtXB_8x8_f32(const float* inptr, int inpstep, float* outptr, int Cg) -{ - __m256 x00 = _mm256_loadu_ps(inptr); - __m256 x10 = _mm256_loadu_ps(inptr + inpstep); - __m256 x20 = _mm256_loadu_ps(inptr + inpstep*2); - __m256 x30 = _mm256_loadu_ps(inptr + inpstep*3); - __m256 x40 = _mm256_loadu_ps(inptr + inpstep*4); - __m256 x50 = _mm256_loadu_ps(inptr + inpstep*5); - __m256 x60 = _mm256_loadu_ps(inptr + inpstep*6); - __m256 x70 = _mm256_loadu_ps(inptr + inpstep*7); - - __m256 z00, z10, z20, z30, z40, z50, z60, z70; - - { - /* Y[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*X */ - /* Y[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*X */ - __m256 q5_25 = _mm256_set1_ps(5.25f), t00, t10; - t00 = _mm256_sub_ps(x40, x20); - t10 = _mm256_sub_ps(x30, x50); - - __m256 y00 = _mm256_fmadd_ps(t00, q5_25, _mm256_sub_ps(x00, x60)); - __m256 y70 = _mm256_fmadd_ps(t10, q5_25, _mm256_sub_ps(x70, x10)); - - /* Y[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*X */ - /* Y[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*X */ - __m256 qm4_25 = _mm256_set1_ps(-4.25f); - t00 = _mm256_fmadd_ps(x30, qm4_25, _mm256_add_ps(x10, x50)); - t10 = _mm256_fmadd_ps(x40, qm4_25, _mm256_add_ps(x20, x60)); - - __m256 y10 = _mm256_add_ps(t00, t10); - __m256 y20 = _mm256_sub_ps(t10, t00); - - /* Y[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*X */ - /* Y[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*X */ - __m256 q0_5 = _mm256_set1_ps(0.5f), q0_25 = _mm256_set1_ps(0.25f); - __m256 qm2_5 = _mm256_set1_ps(-2.5f), qm1_25 = _mm256_set1_ps(-1.25f); - t00 = _mm256_fmadd_ps(x10, q0_5, _mm256_add_ps(x50, x50)); - t10 = _mm256_fmadd_ps(x20, q0_25, x60); - t00 = _mm256_fmadd_ps(x30, qm2_5, t00); - t10 = _mm256_fmadd_ps(x40, qm1_25, t10); - - __m256 y30 = _mm256_add_ps(t00, t10); - __m256 y40 = _mm256_sub_ps(t10, t00); - - /* Y[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*X */ - /* Y[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*X */ - __m256 q4 = _mm256_set1_ps(4.f), qm5 = _mm256_set1_ps(-5.f); - t00 = _mm256_fmadd_ps(x50, q0_5, _mm256_add_ps(x10, x10)); - t10 = _mm256_fmadd_ps(x20, q4 , x60); - t00 = _mm256_fmadd_ps(x30, qm2_5, t00); - t10 = _mm256_fmadd_ps(x40, qm5 , t10); - - __m256 y50 = _mm256_add_ps(t00, t10); - __m256 y60 = _mm256_sub_ps(t10, t00); - - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - transpose8_ps(y00, y10, y20, y30, y40, y50, y60, y70); - - /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ - /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ - t00 = _mm256_sub_ps(y40, y20); - t10 = _mm256_sub_ps(y30, y50); - z00 = _mm256_fmadd_ps(t00, q5_25, _mm256_sub_ps(y00, y60)); - z70 = _mm256_fmadd_ps(t10, q5_25, _mm256_sub_ps(y70, y10)); - - /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ - /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ - t00 = _mm256_fmadd_ps(y30, qm4_25, _mm256_add_ps(y10, y50)); - t10 = _mm256_fmadd_ps(y40, qm4_25, _mm256_add_ps(y20, y60)); - z10 = _mm256_add_ps(t00, t10); - z20 = _mm256_sub_ps(t10, t00); - - /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ - /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ - t00 = _mm256_fmadd_ps(y10, q0_5, _mm256_add_ps(y50, y50)); - t10 = _mm256_fmadd_ps(y20, q0_25, y60); - t00 = _mm256_fmadd_ps(y30, qm2_5, t00); - t10 = _mm256_fmadd_ps(y40, qm1_25, t10); - - z30 = _mm256_add_ps(t00, t10); - z40 = _mm256_sub_ps(t10, t00); - - /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ - /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ - t00 = _mm256_fmadd_ps(y50, q0_5, _mm256_add_ps(y10, y10)); - t10 = _mm256_fmadd_ps(y20, q4, y60); - t00 = _mm256_fmadd_ps(y30, qm2_5, t00); - t10 = _mm256_fmadd_ps(y40, qm5, t10); - - z50 = _mm256_add_ps(t00, t10); - z60 = _mm256_sub_ps(t10, t00); - } - - const int outstep = _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32*Cg; - - _mm256_storeu_ps(outptr, z00); - _mm256_storeu_ps(outptr + outstep, z10); - _mm256_storeu_ps(outptr + outstep*2, z20); - _mm256_storeu_ps(outptr + outstep*3, z30); - _mm256_storeu_ps(outptr + outstep*4, z40); - _mm256_storeu_ps(outptr + outstep*5, z50); - _mm256_storeu_ps(outptr + outstep*6, z60); - _mm256_storeu_ps(outptr + outstep*7, z70); - _mm256_zeroupper(); -} - -#define STORE6_ELE_FROM_16(ptr, z00, lowM, highM) \ - lowM = _mm256_castps256_ps128(z00); \ - highM = _mm256_extractf128_ps(z00, 1); \ - _mm_storeu_ps(ptr, lowM); \ - _mm_storel_epi64((__m128i*)(ptr + 4), _mm_castps_si128(highM)) - -/* Inverse Winograd 8x8 transform: - out = (A'*inp*A)', where - inp is input 8x8 FP32 matrix, - A' is - [1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, - 0.f, 1.f, -1.f, 2.f, -2.f, 0.5f, -0.5f, 0.f, - 0.f, 1.f, 1.f, 4.f, 4.f, 0.25f, 0.25f, 0.f, - 0.f, 1.f, -1.f, 8.f, -8.f, 0.125f, -0.125f, 0.f, - 0.f, 1.f, 1.f, 16.f, 16.f, 1.f/16, 1.f/16, 0.f, - 0.f, 1.f, -1.f, 32.f, -32.f, 1.f/32, -1.f/32, 1.f] -*/ -void _fx_winograd_AtXA_8x8_f32(const float* inptr, int inpstep, - float* bpptr, int bpstep, float* outptr, int outstep, - float bias, float minval, float maxval, bool ifMinMaxAct) -{ - - __m256 x00 = _mm256_load_ps(inptr); - __m256 x10 = _mm256_load_ps(inptr + inpstep); - __m256 x20 = _mm256_load_ps(inptr + inpstep*2); - __m256 x30 = _mm256_load_ps(inptr + inpstep*3); - __m256 x40 = _mm256_load_ps(inptr + inpstep*4); - __m256 x50 = _mm256_load_ps(inptr + inpstep*5); - __m256 x60 = _mm256_load_ps(inptr + inpstep*6); - __m256 x70 = _mm256_load_ps(inptr + inpstep*7); - __m256 z00, z10, z20, z30, z40, z50; - - { - __m256 s12_0, s34_0, s56_0; - s12_0 = _mm256_add_ps(x10, x20); - s34_0 = _mm256_add_ps(x30, x40); - s56_0 = _mm256_add_ps(x50, x60); - - __m256 y00 = _mm256_add_ps(x00, _mm256_add_ps(s12_0, _mm256_add_ps(s34_0, s56_0))); - __m256 y20 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.25f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(4.0f), s12_0)); - __m256 y40 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/16), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(16.0f), s12_0)); - - s12_0 = _mm256_sub_ps(x10, x20); - s34_0 = _mm256_sub_ps(x30, x40); - s56_0 = _mm256_sub_ps(x50, x60); - __m256 y50 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/32), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(32.f), _mm256_add_ps(x70, s12_0))); - __m256 y10 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.5f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(2.f), s12_0)); - __m256 y30 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.125f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(8.f), s12_0)); - __m256 y60 = _mm256_set1_ps(0.f), y70 = y60; - - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - - transpose8_ps(y00, y10, y20, y30, y40, y50, y60, y70); - - s12_0 = _mm256_add_ps(y10, y20); - s34_0 = _mm256_add_ps(y30, y40); - s56_0 = _mm256_add_ps(y50, y60); - - z00 = _mm256_add_ps(y00, _mm256_add_ps(s12_0, _mm256_add_ps(s34_0, s56_0))); - z20 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.25f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(4.0f), s12_0)); - z40 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/16), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(16.0f), s12_0)); - - s12_0 = _mm256_sub_ps(y10, y20); - s34_0 = _mm256_sub_ps(y30, y40); - s56_0 = _mm256_sub_ps(y50, y60); - - z50 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(1.f/32), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(32.0f), _mm256_add_ps(y70, s12_0))); - z10 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.5f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(2.0f), s12_0)); - z30 = _mm256_fmadd_ps(s56_0, _mm256_set1_ps(0.125f), _mm256_fmadd_ps(s34_0, _mm256_set1_ps(8.0f), s12_0)); - - __m256 vbias = _mm256_set1_ps(bias); - z00 = _mm256_add_ps(vbias, z00); - z10 = _mm256_add_ps(vbias, z10); - z20 = _mm256_add_ps(vbias, z20); - z30 = _mm256_add_ps(vbias, z30); - z40 = _mm256_add_ps(vbias, z40); - z50 = _mm256_add_ps(vbias, z50); - } - - if (bpptr) - { - z00 = _mm256_add_ps(z00, _mm256_loadu_ps(bpptr)); - z10 = _mm256_add_ps(z10, _mm256_loadu_ps(bpptr + bpstep)); - z20 = _mm256_add_ps(z20, _mm256_loadu_ps(bpptr + bpstep*2)); - z30 = _mm256_add_ps(z30, _mm256_loadu_ps(bpptr + bpstep*3)); - z40 = _mm256_add_ps(z40, _mm256_loadu_ps(bpptr + bpstep*4)); - z50 = _mm256_add_ps(z50, _mm256_loadu_ps(bpptr + bpstep*5)); - } - - if (ifMinMaxAct) - { - __m256 vmax = _mm256_set1_ps(maxval); - __m256 vmin = _mm256_set1_ps(minval); - - z00 = _mm256_min_ps(_mm256_max_ps(z00, vmin), vmax); - z10 = _mm256_min_ps(_mm256_max_ps(z10, vmin), vmax); - z20 = _mm256_min_ps(_mm256_max_ps(z20, vmin), vmax); - z30 = _mm256_min_ps(_mm256_max_ps(z30, vmin), vmax); - z40 = _mm256_min_ps(_mm256_max_ps(z40, vmin), vmax); - z50 = _mm256_min_ps(_mm256_max_ps(z50, vmin), vmax); - } - - __m128 lowM, highM; - STORE6_ELE_FROM_16(outptr, z00, lowM, highM); - STORE6_ELE_FROM_16(outptr + outstep, z10, lowM, highM); - STORE6_ELE_FROM_16(outptr + outstep * 2, z20, lowM, highM); - STORE6_ELE_FROM_16(outptr + outstep * 3, z30, lowM, highM); - STORE6_ELE_FROM_16(outptr + outstep * 4, z40, lowM, highM); - STORE6_ELE_FROM_16(outptr + outstep * 5, z50, lowM, highM); - _mm256_zeroupper(); -} - -#endif -} // namespace opt_AVX2 -} // namespace dnn -} // namespace cv \ No newline at end of file diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.simd.hpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.simd.hpp deleted file mode 100644 index e146c0974e..0000000000 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.simd.hpp +++ /dev/null @@ -1,567 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#ifndef OPENCV_FAST_CONVOLUTION_SIMD_HPP -#define OPENCV_FAST_CONVOLUTION_SIMD_HPP - -#include "opencv2/core/hal/intrin.hpp" -#include - -namespace cv { -namespace dnn { - -static void convBlockMR1NoSIMD(int np, const float* a, const float* b, float *c, const float bias, bool init_c, - const float minval, const float maxval, bool ifMinMaxAct, const int outLen) -{ - std::vector cbuffer(outLen, 0); - float* cbuf = cbuffer.data(); - for( int p = 0; p < np; p++ ) - { - float ai = a[p]; - for( int j = 0; j < outLen; j++ ) - cbuf[j] += b[CONV_NR*p + j] * ai; - } - - if (init_c) - { - for(int j = 0; j < outLen; j++) - { - c[j] += cbuf[j] + bias; - if (ifMinMaxAct) - c[j] = std::min(std::max(c[j], minval), maxval); - } - } - else - { - for(int j = 0; j < outLen; j++) - { - c[j] = cbuf[j] + bias; - if (ifMinMaxAct) - c[j] = std::min(std::max(c[j], minval), maxval); - } - } -} - -void convBlockMR1(int np, const float* a, const float* b, float *c, const float bias, bool init_c, - const float minval, const float maxval, bool ifMinMaxAct, const int outLen) -{ -#if CV_SIMD128 - // The outLen represents the valid output value in CONV_NR length. - // When outLen is very small, we use the no-SIMD branch. - const int CONV_NRby3 = CONV_NR/3; - if (outLen > CONV_NRby3) - { - v_float32x4 c0 = v_setall_f32(bias), c1 = c0, c2 = c0; // CONV_NR == 12 -#if CONV_NR == 28 || CONV_NR == 24 - v_float32x4 c3 = c0, c4 = c0, c5 = c0; -#endif -#if CONV_NR == 28 - v_float32x4 c6 = c0; -#endif - for (int p = 0; p < np; p++, a++, b += CONV_NR) - { - v_float32x4 a0 = v_setall_f32(a[0]); - v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8); -#if CONV_NR == 28 || CONV_NR == 24 - v_float32x4 b3 = v_load(b + 12), b4 = v_load(b + 16), b5 = v_load(b + 20); -#endif -#if CONV_NR == 28 - v_float32x4 b6 = v_load(b + 24); -#endif - - c0 = v_fma(b0, a0, c0); - c1 = v_fma(b1, a0, c1); - c2 = v_fma(b2, a0, c2); -#if CONV_NR == 28 || CONV_NR == 24 - c3 = v_fma(b3, a0, c3); - c4 = v_fma(b4, a0, c4); - c5 = v_fma(b5, a0, c5); -#endif -#if CONV_NR == 28 - c6 = v_fma(b6, a0, c6); -#endif - } - - if (init_c) - { - c0 += v_load(c); - c1 += v_load(c + 4); - c2 += v_load(c + 8); -#if CONV_NR == 28 || CONV_NR == 24 - c3 += v_load(c + 12); - c4 += v_load(c + 16); - c5 += v_load(c + 20); -#endif -#if CONV_NR == 28 - c6 += v_load(c + 24); -#endif - } - - if (ifMinMaxAct) - { - v_float32x4 vmax = v_setall_f32(maxval), vmin = v_setall_f32(minval); - c0 = v_min(v_max(c0, vmin), vmax); - c1 = v_min(v_max(c1, vmin), vmax); - c2 = v_min(v_max(c2, vmin), vmax); -#if CONV_NR == 28 || CONV_NR == 24 - c3 = v_min(v_max(c3, vmin), vmax); - c4 = v_min(v_max(c4, vmin), vmax); - c5 = v_min(v_max(c5, vmin), vmax); -#endif -#if CONV_NR == 28 - c6 = v_min(v_max(c6, vmin), vmax); -#endif - } - - v_store(c, c0); - v_store(c + 4, c1); - v_store(c + 8, c2); -#if CONV_NR == 28 || CONV_NR == 24 - v_store(c + 12, c3); - v_store(c + 16, c4); - v_store(c + 20, c5); -#endif -#if CONV_NR == 28 - v_store(c + 24, c6); -#endif - } - else - convBlockMR1NoSIMD(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen); -#else - convBlockMR1NoSIMD(np, a, b, c, bias, init_c, minval, maxval, ifMinMaxAct, outLen); -#endif -} - -#if CV_SIMD128 -#if CONV_MR == 4 && CONV_NR == 24 -static void convBlock4x24(int np, const float* a, const float* b, float* c, int ldc, bool init_c) -{ - v_float32x4 c0 = v_setzero_f32(), c1 = c0, c2 = c0, c3 = c0, c4 = c0, c5 = c0; - v_float32x4 c6 = v_setzero_f32(), c7 = c6, c8 = c6, c9 = c6, c10 = c6, c11 = c6; - v_float32x4 c12 = v_setzero_f32(), c13 = c12, c14 = c12, c15 = c12, c16 = c12, c17 = c12; - v_float32x4 c18 = v_setzero_f32(), c19 = c18, c20 = c18, c21 = c18, c22 = c18, c23 = c18; - - for (int p = 0; p < np; p++, a += CONV_MR, b += CONV_NR) - { - v_float32x4 a0 = v_setall_f32(a[0]); - v_float32x4 b0 = v_load(b), b1 = v_load(b + 4), b2 = v_load(b + 8); - v_float32x4 b3 = v_load(b + 12), b4 = v_load(b + 16), b5 = v_load(b + 20); - - c0 = v_fma(b0, a0, c0); - c1 = v_fma(b1, a0, c1); - c2 = v_fma(b2, a0, c2); - c3 = v_fma(b3, a0, c3); - c4 = v_fma(b4, a0, c4); - c5 = v_fma(b5, a0, c5); - - a0 = v_setall_f32(a[1]); - c6 = v_fma(b0, a0, c6); - c7 = v_fma(b1, a0, c7); - c8 = v_fma(b2, a0, c8); - c9 = v_fma(b3, a0, c9); - c10 = v_fma(b4, a0, c10); - c11 = v_fma(b5, a0, c11); - - a0 = v_setall_f32(a[2]); - c12 = v_fma(b0, a0, c12); - c13 = v_fma(b1, a0, c13); - c14 = v_fma(b2, a0, c14); - c15 = v_fma(b3, a0, c15); - c16 = v_fma(b4, a0, c16); - c17 = v_fma(b5, a0, c17); - - a0 = v_setall_f32(a[3]); - c18 = v_fma(b0, a0, c18); - c19 = v_fma(b1, a0, c19); - c20 = v_fma(b2, a0, c20); - c21 = v_fma(b3, a0, c21); - c22 = v_fma(b4, a0, c22); - c23 = v_fma(b5, a0, c23); - } - - if (!init_c) - { - c0 += v_load(c); - c1 += v_load(c + 4); - c2 += v_load(c + 8); - c3 += v_load(c + 12); - c4 += v_load(c + 16); - c5 += v_load(c + 20); - - c6 += v_load(c + ldc); - c7 += v_load(c + ldc + 4); - c8 += v_load(c + ldc + 8); - c9 += v_load(c + ldc + 12); - c10 += v_load(c + ldc + 16); - c11 += v_load(c + ldc + 20); - - c12 += v_load(c + ldc*2); - c13 += v_load(c + ldc*2 + 4); - c14 += v_load(c + ldc*2 + 8); - c15 += v_load(c + ldc*2 + 12); - c16 += v_load(c + ldc*2 + 16); - c17 += v_load(c + ldc*2 + 20); - - c18 += v_load(c + ldc*3); - c19 += v_load(c + ldc*3 + 4); - c20 += v_load(c + ldc*3 + 8); - c21 += v_load(c + ldc*3 + 12); - c22 += v_load(c + ldc*3 + 16); - c23 += v_load(c + ldc*3 + 20); - } - - v_store(c, c0); - v_store(c + 4, c1); - v_store(c + 8, c2); - v_store(c + 12, c3); - v_store(c + 16, c4); - v_store(c + 20, c5); - - v_store(c + ldc, c6); - v_store(c + ldc + 4, c7); - v_store(c + ldc + 8, c8); - v_store(c + ldc + 12, c9); - v_store(c + ldc + 16, c10); - v_store(c + ldc + 20, c11); - - v_store(c + ldc * 2, c12); - v_store(c + ldc * 2 + 4, c13); - v_store(c + ldc * 2 + 8, c14); - v_store(c + ldc * 2 + 12, c15); - v_store(c + ldc * 2 + 16, c16); - v_store(c + ldc * 2 + 20, c17); - - v_store(c + ldc * 3, c18); - v_store(c + ldc * 3 + 4, c19); - v_store(c + ldc * 3 + 8, c20); - v_store(c + ldc * 3 + 12, c21); - v_store(c + ldc * 3 + 16, c22); - v_store(c + ldc * 3 + 20, c23); -} -#endif - -static void convBlock4x8(int np, const float* a, const float* b, float* c, int ldc, bool init_c) -{ - CV_Assert(CONV_NR >= 4); - v_float32x4 c0 = v_setzero_f32(), c1 = c0, c2 = c0, c3 = c0; - v_float32x4 c4 = c0, c5 = c0, c6 = c0, c7 = c0; - - for (int p = 0; p < np; p++, a += CONV_MR, b += CONV_NR) - { - v_float32x4 a0 = v_setall_f32(a[0]); - v_float32x4 a1 = v_setall_f32(a[1]); - v_float32x4 a2 = v_setall_f32(a[2]); - v_float32x4 a3 = v_setall_f32(a[3]); - - v_float32x4 b0 = v_load(b), b1 = v_load(b + 4); - - c0 = v_fma(b0, a0, c0); - c1 = v_fma(b1, a0, c1); - - c2 = v_fma(b0, a1, c2); - c3 = v_fma(b1, a1, c3); - - c4 = v_fma(b0, a2, c4); - c5 = v_fma(b1, a2, c5); - - c6 = v_fma(b0, a3, c6); - c7 = v_fma(b1, a3, c7); - } - - if (!init_c) - { - c0 += v_load(c); - c1 += v_load(c + 4); - - c2 += v_load(c + ldc); - c3 += v_load(c + ldc + 4); - - c4 += v_load(c + ldc*2); - c5 += v_load(c + ldc*2 + 4); - - c6 += v_load(c + ldc*3); - c7 += v_load(c + ldc*3 + 4); - } - - v_store(c, c0); - v_store(c + 4, c1); - v_store(c + ldc, c2); - v_store(c + ldc + 4, c3); - v_store(c + ldc * 2, c4); - v_store(c + ldc * 2 + 4, c5); - v_store(c + ldc * 3, c6); - v_store(c + ldc * 3 + 4, c7); -} - -static void convBlock4x4(int np, const float* a, const float* b, float* c, int ldc, bool init_c) -{ - CV_Assert(CONV_NR >= 4); - v_float32x4 c0 = v_setzero_f32(), c1 = c0, c2 = c0, c3 = c0; - - for (int p = 0; p < np; p++, a += CONV_MR, b += CONV_NR) - { - v_float32x4 a0 = v_setall_f32(a[0]); - v_float32x4 a1 = v_setall_f32(a[1]); - v_float32x4 a2 = v_setall_f32(a[2]); - v_float32x4 a3 = v_setall_f32(a[3]); - - v_float32x4 b0 = v_load(b); - - c0 = v_fma(b0, a0, c0); - c1 = v_fma(b0, a1, c1); - c2 = v_fma(b0, a2, c2); - c3 = v_fma(b0, a3, c3); - } - - if (!init_c) - { - c0 += v_load(c); - c1 += v_load(c + ldc); - c2 += v_load(c + ldc*2); - c3 += v_load(c + ldc*3); - } - - v_store(c, c0); - v_store(c + ldc, c1); - v_store(c + ldc * 2, c2); - v_store(c + ldc * 3, c3); -} -#endif - -static void convBlockNoSIMD(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int outLen) -{ - std::vector cbuffer(CONV_MR * outLen, 0); - float* cbuf = cbuffer.data(); - for( int p = 0; p < np; p++ ) - { - for( int i = 0; i < CONV_MR; i++ ) - { - float ai = a[CONV_MR*p + i]; - for( int j = 0; j < outLen; j++ ) - cbuf[i * outLen+j] += b[CONV_NR*p + j] * ai; - } - } - - if (!init_c) - { - for(int i = 0; i < CONV_MR; i++) - { - for(int j = 0; j < outLen; j++) - c[i*ldc + j] += cbuf[i*outLen + j]; - } - } - else - { - for(int i = 0; i < CONV_MR; i++) - { - for(int j = 0; j < outLen; j++) - c[i*ldc + j] = cbuf[i*outLen + j]; - } - } -} - -void convBlock(int np, const float* a, const float* b, float* c, int ldc, bool init_c, const int outLen) -{ - // The possible outLen range is [24, 8~1]. -#if CV_SIMD128 -#if CONV_MR == 4 && CONV_NR == 24 - const int CONV_NRby3 = CONV_NR/3; - if (outLen > CONV_NRby3) - { - convBlock4x24(np, a, b, c, ldc, init_c); - return; - } -#endif - - if (outLen <= 8 && outLen > 4) - { - convBlock4x8(np, a, b, c, ldc, init_c); - return; - } - - if (outLen <= 4 && outLen > 1) - { - convBlock4x4(np, a, b, c, ldc, init_c); - return; - } - convBlockNoSIMD(np, a, b, c, ldc, init_c, outLen); -#else - convBlockNoSIMD(np, a, b, c, ldc, init_c, outLen); -#endif -} -} // namespace dnn - -namespace opt_NEON -{ -#if CV_TRY_NEON -void convBlock_NEON(int np, const float* a, const float* b, float* c, int ldc, bool init_c) -{ -#if CONV_MR == 4 && CONV_NR == 28 // AARCH64 - { - float32x4_t c00 = vdupq_n_f32(0.f), c01 = c00, c02 = c00, c03 = c00, c04 = c00, c05 = c00, c06 = c00; - float32x4_t c10 = vdupq_n_f32(0.f), c11 = c10, c12 = c10, c13 = c10, c14 = c10, c15 = c10, c16 = c10; - float32x4_t c20 = vdupq_n_f32(0.f), c21 = c20, c22 = c20, c23 = c20, c24 = c20, c25 = c20, c26 = c20; - float32x4_t c30 = vdupq_n_f32(0.f), c31 = c30, c32 = c30, c33 = c30, c34 = c30, c35 = c30, c36 = c30; - - for( int p = 0; p < np; p++, a += CONV_MR, b += CONV_NR ) - { - float32x4_t a0 = vld1q_f32(a), b0, b1, b2; - b0 = vld1q_f32(b); b1 = vld1q_f32(b + 4); b2 = vld1q_f32(b + 8); - - c00 = vfmaq_laneq_f32(c00, b0, a0, 0); - c01 = vfmaq_laneq_f32(c01, b1, a0, 0); - c02 = vfmaq_laneq_f32(c02, b2, a0, 0); - c10 = vfmaq_laneq_f32(c10, b0, a0, 1); - c11 = vfmaq_laneq_f32(c11, b1, a0, 1); - c12 = vfmaq_laneq_f32(c12, b2, a0, 1); - c20 = vfmaq_laneq_f32(c20, b0, a0, 2); - c21 = vfmaq_laneq_f32(c21, b1, a0, 2); - c22 = vfmaq_laneq_f32(c22, b2, a0, 2); - c30 = vfmaq_laneq_f32(c30, b0, a0, 3); - c31 = vfmaq_laneq_f32(c31, b1, a0, 3); - c32 = vfmaq_laneq_f32(c32, b2, a0, 3); - - b0 = vld1q_f32(b + 12); b1 = vld1q_f32(b + 16); b2 = vld1q_f32(b + 20); - - c03 = vfmaq_laneq_f32(c03, b0, a0, 0); - c04 = vfmaq_laneq_f32(c04, b1, a0, 0); - c05 = vfmaq_laneq_f32(c05, b2, a0, 0); - c13 = vfmaq_laneq_f32(c13, b0, a0, 1); - c14 = vfmaq_laneq_f32(c14, b1, a0, 1); - c15 = vfmaq_laneq_f32(c15, b2, a0, 1); - c23 = vfmaq_laneq_f32(c23, b0, a0, 2); - c24 = vfmaq_laneq_f32(c24, b1, a0, 2); - c25 = vfmaq_laneq_f32(c25, b2, a0, 2); - c33 = vfmaq_laneq_f32(c33, b0, a0, 3); - c34 = vfmaq_laneq_f32(c34, b1, a0, 3); - c35 = vfmaq_laneq_f32(c35, b2, a0, 3); - - b0 = vld1q_f32(b + 24); - c06 = vfmaq_laneq_f32(c06, b0, a0, 0); - c16 = vfmaq_laneq_f32(c16, b0, a0, 1); - c26 = vfmaq_laneq_f32(c26, b0, a0, 2); - c36 = vfmaq_laneq_f32(c36, b0, a0, 3); - } - - if (!init_c) - { - c00 = vaddq_f32(c00, vld1q_f32(c)); - c01 = vaddq_f32(c01, vld1q_f32(c + 4)); - c02 = vaddq_f32(c02, vld1q_f32(c + 8)); - c03 = vaddq_f32(c03, vld1q_f32(c + 12)); - c04 = vaddq_f32(c04, vld1q_f32(c + 16)); - c05 = vaddq_f32(c05, vld1q_f32(c + 20)); - c06 = vaddq_f32(c06, vld1q_f32(c + 24)); - - c10 = vaddq_f32(c10, vld1q_f32(c + ldc)); - c11 = vaddq_f32(c11, vld1q_f32(c + ldc + 4)); - c12 = vaddq_f32(c12, vld1q_f32(c + ldc + 8)); - c13 = vaddq_f32(c13, vld1q_f32(c + ldc + 12)); - c14 = vaddq_f32(c14, vld1q_f32(c + ldc + 16)); - c15 = vaddq_f32(c15, vld1q_f32(c + ldc + 20)); - c16 = vaddq_f32(c16, vld1q_f32(c + ldc + 24)); - - c20 = vaddq_f32(c20, vld1q_f32(c + ldc*2)); - c21 = vaddq_f32(c21, vld1q_f32(c + ldc*2 + 4)); - c22 = vaddq_f32(c22, vld1q_f32(c + ldc*2 + 8)); - c23 = vaddq_f32(c23, vld1q_f32(c + ldc*2 + 12)); - c24 = vaddq_f32(c24, vld1q_f32(c + ldc*2 + 16)); - c25 = vaddq_f32(c25, vld1q_f32(c + ldc*2 + 20)); - c26 = vaddq_f32(c26, vld1q_f32(c + ldc*2 + 24)); - - c30 = vaddq_f32(c30, vld1q_f32(c + ldc*3)); - c31 = vaddq_f32(c31, vld1q_f32(c + ldc*3 + 4)); - c32 = vaddq_f32(c32, vld1q_f32(c + ldc*3 + 8)); - c33 = vaddq_f32(c33, vld1q_f32(c + ldc*3 + 12)); - c34 = vaddq_f32(c34, vld1q_f32(c + ldc*3 + 16)); - c35 = vaddq_f32(c35, vld1q_f32(c + ldc*3 + 20)); - c36 = vaddq_f32(c36, vld1q_f32(c + ldc*3 + 24)); - } - - vst1q_f32(c, c00); vst1q_f32(c+4, c01); - vst1q_f32(c+8, c02); vst1q_f32(c+12, c03); - vst1q_f32(c+16, c04); vst1q_f32(c+20, c05); - vst1q_f32(c+24, c06); - - vst1q_f32(c+ldc, c10); vst1q_f32(c+ldc+4, c11); - vst1q_f32(c+ldc+8, c12); vst1q_f32(c+ldc+12, c13); - vst1q_f32(c+ldc+16, c14); vst1q_f32(c+ldc+20, c15); - vst1q_f32(c+ldc+24, c16); - - vst1q_f32(c+ldc*2, c20); vst1q_f32(c+ldc*2+4, c21); - vst1q_f32(c+ldc*2+8, c22); vst1q_f32(c+ldc*2+12, c23); - vst1q_f32(c+ldc*2+16, c24); vst1q_f32(c+ldc*2+20, c25); - vst1q_f32(c+ldc*2+24, c26); - - vst1q_f32(c+ldc*3, c30); vst1q_f32(c+ldc*3+4, c31); - vst1q_f32(c+ldc*3+8, c32); vst1q_f32(c+ldc*3+12, c33); - vst1q_f32(c+ldc*3+16, c34); vst1q_f32(c+ldc*3+20, c35); - vst1q_f32(c+ldc*3+24, c36); - } -#elif CONV_MR == 4 && CONV_NR == 12 // ARMv7 - { - float32x4_t c0 = vdupq_n_f32(0.f), c1 = c0, c2 = c0; - float32x4_t c3 = vdupq_n_f32(0.f), c4 = c3, c5 = c3; - float32x4_t c6 = vdupq_n_f32(0.f), c7 = c6, c8 = c6; - float32x4_t c9 = vdupq_n_f32(0.f), c10 = c9, c11 = c9; - - - float32x2_t a0 = vdup_n_f32(0.0f), a1 = a0; - float32x4_t b0 = vdupq_n_f32(0.0f), b1 = vdupq_n_f32(0.0f), b2 = vdupq_n_f32(0.0f); - - for (int p = 0; p < np; p++, a += CONV_MR, b += CONV_NR) - { - a0 = vld1_f32(a), a1 = vld1_f32(a+2); - b0 = vld1q_f32(b), b1 = vld1q_f32(b + 4), b2 = vld1q_f32(b + 8); - - c0 = vmlaq_lane_f32(c0, b0, a0, 0); - c1 = vmlaq_lane_f32(c1, b1, a0, 0); - c2 = vmlaq_lane_f32(c2, b2, a0, 0); - - c3 = vmlaq_lane_f32(c3, b0, a0, 1); - c4 = vmlaq_lane_f32(c4, b1, a0, 1); - c5 = vmlaq_lane_f32(c5, b2, a0, 1); - - c6 = vmlaq_lane_f32(c6, b0, a1, 0); - c7 = vmlaq_lane_f32(c7, b1, a1, 0); - c8 = vmlaq_lane_f32(c8, b2, a1, 0); - - c9 = vmlaq_lane_f32(c9 , b0, a1, 1); - c10 = vmlaq_lane_f32(c10, b1, a1, 1); - c11 = vmlaq_lane_f32(c11, b2, a1, 1); - } - - if (!init_c) - { - c0 = vaddq_f32(c0, vld1q_f32(c)); - c1 = vaddq_f32(c1, vld1q_f32(c + 4)); - c2 = vaddq_f32(c2, vld1q_f32(c + 8)); - - c3 = vaddq_f32(c3, vld1q_f32(c + ldc)); - c4 = vaddq_f32(c4, vld1q_f32(c + ldc + 4)); - c5 = vaddq_f32(c5, vld1q_f32(c + ldc + 8)); - - c6 = vaddq_f32(c6, vld1q_f32(c + ldc * 2)); - c7 = vaddq_f32(c7, vld1q_f32(c + ldc * 2 + 4)); - c8 = vaddq_f32(c8, vld1q_f32(c + ldc * 2 + 8)); - - c9 = vaddq_f32(c9 , vld1q_f32(c + ldc * 3)); - c10 = vaddq_f32(c10, vld1q_f32(c + ldc * 3 + 4)); - c11 = vaddq_f32(c11, vld1q_f32(c + ldc * 3 + 8)); - } - - vst1q_f32(c, c0), vst1q_f32(c+4, c1), vst1q_f32(c+8, c2); - vst1q_f32(c + ldc, c3), vst1q_f32(c + ldc + 4, c4), vst1q_f32(c + ldc + 8, c5); - vst1q_f32(c + ldc*2, c6), vst1q_f32(c + ldc*2 + 4, c7), vst1q_f32(c + ldc*2 + 8, c8); - vst1q_f32(c + ldc*3, c9), vst1q_f32(c + ldc*3 + 4, c10), vst1q_f32(c + ldc*3 + 8, c11); - } -//#else -//#error "unsupported CONV_MR and/or CONV_NR in convBlock_NEON." -#endif -} -#endif -} // namespace opt_NEON - -} // namespace cv -#endif //OPENCV_FAST_CONVOLUTION_SIMD_HPP diff --git a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp deleted file mode 100644 index b0ccfd0cd2..0000000000 --- a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp +++ /dev/null @@ -1,1153 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -// This file is modified from the ficus (https://github.com/vpisarev/ficus/blob/master/lib/NN/OpConv_Winograd.fx). -// Here is the original license: -/* - This file is a part of ficus language project. - See ficus/LICENSE for the licensing terms -*/ - -#include "../../precomp.hpp" -#include "fast_convolution.hpp" - -namespace cv { namespace dnn { - -#if CV_NEON || CV_SIMD128 || CV_TRY_AVX2 -enum { VEC_ALIGN = 32, DFT_TYPE = CV_32F }; // Memory alignment. - -static void -_fx_winograd_accum_f32(const float* inwptr, const float* wptr, - float* outbuf, int Cg, int iblock) - { -#if CV_NEON && CV_NEON_AARCH64 - CV_Assert(_FX_WINO_IBLOCK == 6 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4); - if (iblock > 3) - { - for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, - outbuf += _FX_WINO_ATOM_F32) - { - float32x4_t s00 = vdupq_n_f32(0.f), s01 = s00, s02 = s00, s03 = s00, s04 = s00, s05 = s00; - float32x4_t s10 = vdupq_n_f32(0.f), s11 = s00, s12 = s00, s13 = s00, s14 = s00, s15 = s00; - float32x4_t s20 = vdupq_n_f32(0.f), s21 = s00, s22 = s00, s23 = s00, s24 = s00, s25 = s00; - float32x4_t s30 = vdupq_n_f32(0.f), s31 = s00, s32 = s00, s33 = s00, s34 = s00, s35 = s00; - for (int c = 0; c < Cg; c++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32, - wptr += _FX_WINO_KBLOCK*_FX_WINO_ATOM_F32) { - float32x4_t w0 = vld1q_f32(wptr), w1 = vld1q_f32(wptr + 4); - float32x4_t w2 = vld1q_f32(wptr + 8), w3 = vld1q_f32(wptr + 12); - float32x4_t x0, x1; - x0 = vld1q_f32(inwptr); - x1 = vld1q_f32(inwptr + 4); - s00 = vfmaq_f32(s00, w0, x0); - s01 = vfmaq_f32(s01, w0, x1); - s10 = vfmaq_f32(s10, w1, x0); - s11 = vfmaq_f32(s11, w1, x1); - s20 = vfmaq_f32(s20, w2, x0); - s21 = vfmaq_f32(s21, w2, x1); - s30 = vfmaq_f32(s30, w3, x0); - s31 = vfmaq_f32(s31, w3, x1); - x0 = vld1q_f32(inwptr + 8); - x1 = vld1q_f32(inwptr + 12); - s02 = vfmaq_f32(s02, w0, x0); - s03 = vfmaq_f32(s03, w0, x1); - s12 = vfmaq_f32(s12, w1, x0); - s13 = vfmaq_f32(s13, w1, x1); - s22 = vfmaq_f32(s22, w2, x0); - s23 = vfmaq_f32(s23, w2, x1); - s32 = vfmaq_f32(s32, w3, x0); - s33 = vfmaq_f32(s33, w3, x1); - x0 = vld1q_f32(inwptr + 16); - x1 = vld1q_f32(inwptr + 20); - s04 = vfmaq_f32(s04, w0, x0); - s05 = vfmaq_f32(s05, w0, x1); - s14 = vfmaq_f32(s14, w1, x0); - s15 = vfmaq_f32(s15, w1, x1); - s24 = vfmaq_f32(s24, w2, x0); - s25 = vfmaq_f32(s25, w2, x1); - s34 = vfmaq_f32(s34, w3, x0); - s35 = vfmaq_f32(s35, w3, x1); - } - - vst1q_f32(outbuf, s00); - vst1q_f32(outbuf + 1*64, s01); - vst1q_f32(outbuf + 2*64, s02); - vst1q_f32(outbuf + 3*64, s03); - vst1q_f32(outbuf + 4*64, s04); - vst1q_f32(outbuf + 5*64, s05); - - vst1q_f32(outbuf + 6*64, s10); - vst1q_f32(outbuf + 7*64, s11); - vst1q_f32(outbuf + 8*64, s12); - vst1q_f32(outbuf + 9*64, s13); - vst1q_f32(outbuf + 10*64, s14); - vst1q_f32(outbuf + 11*64, s15); - - vst1q_f32(outbuf + 12*64, s20); - vst1q_f32(outbuf + 13*64, s21); - vst1q_f32(outbuf + 14*64, s22); - vst1q_f32(outbuf + 15*64, s23); - vst1q_f32(outbuf + 16*64, s24); - vst1q_f32(outbuf + 17*64, s25); - - vst1q_f32(outbuf + 18*64, s30); - vst1q_f32(outbuf + 19*64, s31); - vst1q_f32(outbuf + 20*64, s32); - vst1q_f32(outbuf + 21*64, s33); - vst1q_f32(outbuf + 22*64, s34); - vst1q_f32(outbuf + 23*64, s35); - } - } - else - { - for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, - outbuf += _FX_WINO_ATOM_F32) - { - float32x4_t s00 = vdupq_n_f32(0.f), s01 = s00, s02 = s00; - float32x4_t s10 = vdupq_n_f32(0.f), s11 = s00, s12 = s00; - float32x4_t s20 = vdupq_n_f32(0.f), s21 = s00, s22 = s00; - float32x4_t s30 = vdupq_n_f32(0.f), s31 = s00, s32 = s00; - for (int c = 0; c < Cg; c++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32, - wptr += _FX_WINO_KBLOCK*_FX_WINO_ATOM_F32) { - float32x4_t w0 = vld1q_f32(wptr), w1 = vld1q_f32(wptr + 4); - float32x4_t w2 = vld1q_f32(wptr + 8), w3 = vld1q_f32(wptr + 12); - float32x4_t x0, x1, x2; - x0 = vld1q_f32(inwptr); - x1 = vld1q_f32(inwptr + 4); - x2 = vld1q_f32(inwptr + 8); - s00 = vfmaq_f32(s00, w0, x0); - s01 = vfmaq_f32(s01, w0, x1); - s02 = vfmaq_f32(s02, w0, x2); - s10 = vfmaq_f32(s10, w1, x0); - s11 = vfmaq_f32(s11, w1, x1); - s12 = vfmaq_f32(s12, w1, x2); - s20 = vfmaq_f32(s20, w2, x0); - s21 = vfmaq_f32(s21, w2, x1); - s22 = vfmaq_f32(s22, w2, x2); - s30 = vfmaq_f32(s30, w3, x0); - s31 = vfmaq_f32(s31, w3, x1); - s32 = vfmaq_f32(s32, w3, x2); - } - - vst1q_f32(outbuf, s00); - vst1q_f32(outbuf + 1*64, s01); - vst1q_f32(outbuf + 2*64, s02); - vst1q_f32(outbuf + 6*64, s10); - vst1q_f32(outbuf + 7*64, s11); - vst1q_f32(outbuf + 8*64, s12); - vst1q_f32(outbuf + 12*64, s20); - vst1q_f32(outbuf + 13*64, s21); - vst1q_f32(outbuf + 14*64, s22); - vst1q_f32(outbuf + 18*64, s30); - vst1q_f32(outbuf + 19*64, s31); - vst1q_f32(outbuf + 20*64, s32); - } - } -#elif CV_SIMD128 - CV_Assert(_FX_WINO_IBLOCK == 3 && _FX_WINO_KBLOCK == 4 && _FX_WINO_ATOM_F32 == 4); - for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; atom_id++, - outbuf += _FX_WINO_ATOM_F32) - { - v_float32x4 s00 = v_setzero_f32(), s01 = s00, s02 = s00; - v_float32x4 s10 = v_setzero_f32(), s11 = s00, s12 = s00; - v_float32x4 s20 = v_setzero_f32(), s21 = s00, s22 = s00; - v_float32x4 s30 = v_setzero_f32(), s31 = s00, s32 = s00; - - for (int c = 0; c < Cg; c++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32, - wptr += _FX_WINO_KBLOCK*_FX_WINO_ATOM_F32) - { - v_float32x4 x0, x1, x2; - x0 = v_load(inwptr); - x1 = v_load(inwptr + 4); - x2 = v_load(inwptr + 8); - - v_float32x4 w0 = v_load(wptr); - s00 = v_fma(w0, x0, s00); - s01 = v_fma(w0, x1, s01); - s02 = v_fma(w0, x2, s02); - - w0 = v_load(wptr + 4); - s10 = v_fma(w0, x0, s10); - s11 = v_fma(w0, x1, s11); - s12 = v_fma(w0, x2, s12); - - w0 = v_load(wptr + 8); - s20 = v_fma(w0, x0, s20); - s21 = v_fma(w0, x1, s21); - s22 = v_fma(w0, x2, s22); - - w0 = v_load(wptr + 12); - s30 = v_fma(w0, x0, s30); - s31 = v_fma(w0, x1, s31); - s32 = v_fma(w0, x2, s32); - } - - v_store(outbuf, s00); - v_store(outbuf + 1*64, s01); - v_store(outbuf + 2*64, s02); - v_store(outbuf + 3*64, s10); - v_store(outbuf + 4*64, s11); - v_store(outbuf + 5*64, s12); - v_store(outbuf + 6*64, s20); - v_store(outbuf + 7*64, s21); - v_store(outbuf + 8*64, s22); - v_store(outbuf + 9*64, s30); - v_store(outbuf + 10*64, s31); - v_store(outbuf + 11*64, s32); - } -#else - for (int atom_id = 0; atom_id < _FX_WINO_NATOMS_F32; - atom_id++, outbuf += _FX_WINO_ATOM_F32) - { - float sumbuf[_FX_WINO_IBLOCK*_FX_WINO_KBLOCK*_FX_WINO_ATOM_F32]; - memset(sumbuf, 0, sizeof(sumbuf)); - for (int c = 0; c < Cg; c++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32, - wptr += _FX_WINO_KBLOCK*_FX_WINO_ATOM_F32) - { - for (int i = 0; i < _FX_WINO_KBLOCK; i++) - { - for (int j = 0; j < _FX_WINO_IBLOCK; j++) - { - int i_ = i*_FX_WINO_ATOM_F32; - int j_ = j*_FX_WINO_ATOM_F32; - int ij_ = i_*_FX_WINO_IBLOCK + j_; - float s0 = inwptr[j_ + 0]*wptr[i_ + 0]; - float s1 = inwptr[j_ + 1]*wptr[i_ + 1]; - float s2 = inwptr[j_ + 2]*wptr[i_ + 2]; - float s3 = inwptr[j_ + 3]*wptr[i_ + 3]; - sumbuf[ij_ + 0] += s0; - sumbuf[ij_ + 1] += s1; - sumbuf[ij_ + 2] += s2; - sumbuf[ij_ + 3] += s3; - } - } - } - for (int ij = 0; ij < _FX_WINO_KBLOCK*_FX_WINO_IBLOCK; ij++) - { - int ij_ = ij*_FX_WINO_ATOM_F32; - int ij_out = ij*_FX_WINO_AREA; - outbuf[ij_out + 0] = sumbuf[ij_ + 0]; - outbuf[ij_out + 1] = sumbuf[ij_ + 1]; - outbuf[ij_out + 2] = sumbuf[ij_ + 2]; - outbuf[ij_out + 3] = sumbuf[ij_ + 3]; - } - } -#endif -} - -#if CV_NEON -#define T4x4(a, b, c, d, tr0, tr1) \ - tr0 = vtrnq_f32(a, b); \ - tr1 = vtrnq_f32(c, d); \ - a = vcombine_f32(vget_low_f32(tr0.val[0]), vget_low_f32(tr1.val[0])); \ - b = vcombine_f32(vget_low_f32(tr0.val[1]), vget_low_f32(tr1.val[1])); \ - c = vcombine_f32(vget_high_f32(tr0.val[0]), vget_high_f32(tr1.val[0])); \ - d = vcombine_f32(vget_high_f32(tr0.val[1]), vget_high_f32(tr1.val[1])) -#endif - -/*Input transform*/ -static void -_fx_winograd_BtXB_8x8_f32(const float* inptr, int inpstep, - float* outptr, int Cg) -{ -#if CV_NEON && CV_NEON_AARCH64 - float32x4_t x00 = vld1q_f32(inptr), x01 = vld1q_f32(inptr + 4); - float32x4_t x10 = vld1q_f32(inptr + inpstep), x11 = vld1q_f32(inptr + inpstep + 4); - float32x4_t x20 = vld1q_f32(inptr + inpstep*2), x21 = vld1q_f32(inptr + inpstep*2 + 4); - float32x4_t x30 = vld1q_f32(inptr + inpstep*3), x31 = vld1q_f32(inptr + inpstep*3 + 4); - float32x4_t x40 = vld1q_f32(inptr + inpstep*4), x41 = vld1q_f32(inptr + inpstep*4 + 4); - float32x4_t x50 = vld1q_f32(inptr + inpstep*5), x51 = vld1q_f32(inptr + inpstep*5 + 4); - float32x4_t x60 = vld1q_f32(inptr + inpstep*6), x61 = vld1q_f32(inptr + inpstep*6 + 4); - float32x4_t x70 = vld1q_f32(inptr + inpstep*7), x71 = vld1q_f32(inptr + inpstep*7 + 4); - - float32x4_t z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51, z60, z61, z70, z71; - - { - /* Y[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*X */ - /* Y[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*X */ - float32x4_t q5_25 = vdupq_n_f32(5.25f), t00, t01, t10, t11; - t00 = vsubq_f32(x40, x20); - t01 = vsubq_f32(x41, x21); - t10 = vsubq_f32(x30, x50); - t11 = vsubq_f32(x31, x51); - float32x4_t y00 = vfmaq_f32(vsubq_f32(x00, x60), t00, q5_25); - float32x4_t y01 = vfmaq_f32(vsubq_f32(x01, x61), t01, q5_25); - float32x4_t y70 = vfmaq_f32(vsubq_f32(x70, x10), t10, q5_25); - float32x4_t y71 = vfmaq_f32(vsubq_f32(x71, x11), t11, q5_25); - - /* Y[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*X */ - /* Y[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*X */ - float32x4_t qm4_25 = vdupq_n_f32(-4.25f); - t00 = vfmaq_f32(vaddq_f32(x10, x50), x30, qm4_25); - t01 = vfmaq_f32(vaddq_f32(x11, x51), x31, qm4_25); - t10 = vfmaq_f32(vaddq_f32(x20, x60), x40, qm4_25); - t11 = vfmaq_f32(vaddq_f32(x21, x61), x41, qm4_25); - - float32x4_t y10 = vaddq_f32(t00, t10), y11 = vaddq_f32(t01, t11); - float32x4_t y20 = vsubq_f32(t10, t00), y21 = vsubq_f32(t11, t01); - - /* Y[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*X */ - /* Y[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*X */ - float32x4_t q0_5 = vdupq_n_f32(0.5f), q0_25 = vdupq_n_f32(0.25f); - float32x4_t qm2_5 = vdupq_n_f32(-2.5f), qm1_25 = vdupq_n_f32(-1.25f); - t00 = vfmaq_f32(vaddq_f32(x50, x50), x10, q0_5); - t01 = vfmaq_f32(vaddq_f32(x51, x51), x11, q0_5); - t10 = vfmaq_f32(x60, x20, q0_25); - t11 = vfmaq_f32(x61, x21, q0_25); - t00 = vfmaq_f32(t00, x30, qm2_5); - t01 = vfmaq_f32(t01, x31, qm2_5); - t10 = vfmaq_f32(t10, x40, qm1_25); - t11 = vfmaq_f32(t11, x41, qm1_25); - - float32x4_t y30 = vaddq_f32(t00, t10), y31 = vaddq_f32(t01, t11); - float32x4_t y40 = vsubq_f32(t10, t00), y41 = vsubq_f32(t11, t01); - - /* Y[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*X */ - /* Y[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*X */ - float32x4_t q4 = vdupq_n_f32(4.f), qm5 = vdupq_n_f32(-5.f); - t00 = vfmaq_f32(vaddq_f32(x10, x10), x50, q0_5); - t01 = vfmaq_f32(vaddq_f32(x11, x11), x51, q0_5); - t10 = vfmaq_f32(x60, x20, q4); - t11 = vfmaq_f32(x61, x21, q4); - t00 = vfmaq_f32(t00, x30, qm2_5); - t01 = vfmaq_f32(t01, x31, qm2_5); - t10 = vfmaq_f32(t10, x40, qm5); - t11 = vfmaq_f32(t11, x41, qm5); - - float32x4_t y50 = vaddq_f32(t00, t10), y51 = vaddq_f32(t01, t11); - float32x4_t y60 = vsubq_f32(t10, t00), y61 = vsubq_f32(t11, t01); - - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - /* Y: */ - /* y00 y01 */ - /* y10 y11 */ - /* ... */ - /* y70 y71 */ - /* Y': */ - /* y00 y40 */ - /* y10 y50 */ - /* y20 y60 */ - /* y30 y70 */ - /* y01 y41 */ - /* y11 y51 */ - /* y21 y61 */ - /* y31 y71 */ - /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ - float32x4x2_t tr0, tr1; - - T4x4(y00, y10, y20, y30, tr0, tr1); - T4x4(y01, y11, y21, y31, tr0, tr1); - T4x4(y40, y50, y60, y70, tr0, tr1); - T4x4(y41, y51, y61, y71, tr0, tr1); - - /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ - /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ - t00 = vsubq_f32(y01, y20); - t01 = vsubq_f32(y41, y60); - t10 = vsubq_f32(y30, y11); - t11 = vsubq_f32(y70, y51); - z00 = vfmaq_f32(vsubq_f32(y00, y21), t00, q5_25); - z01 = vfmaq_f32(vsubq_f32(y40, y61), t01, q5_25); - z70 = vfmaq_f32(vsubq_f32(y31, y10), t10, q5_25); - z71 = vfmaq_f32(vsubq_f32(y71, y50), t11, q5_25); - - /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ - /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ - t00 = vfmaq_f32(vaddq_f32(y10, y11), y30, qm4_25); - t01 = vfmaq_f32(vaddq_f32(y50, y51), y70, qm4_25); - t10 = vfmaq_f32(vaddq_f32(y20, y21), y01, qm4_25); - t11 = vfmaq_f32(vaddq_f32(y60, y61), y41, qm4_25); - - z10 = vaddq_f32(t00, t10); z11 = vaddq_f32(t01, t11); - z20 = vsubq_f32(t10, t00); z21 = vsubq_f32(t11, t01); - - /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ - /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ - t00 = vfmaq_f32(vaddq_f32(y11, y11), y10, q0_5); - t01 = vfmaq_f32(vaddq_f32(y51, y51), y50, q0_5); - t10 = vfmaq_f32(y21, y20, q0_25); - t11 = vfmaq_f32(y61, y60, q0_25); - t00 = vfmaq_f32(t00, y30, qm2_5); - t01 = vfmaq_f32(t01, y70, qm2_5); - t10 = vfmaq_f32(t10, y01, qm1_25); - t11 = vfmaq_f32(t11, y41, qm1_25); - - z30 = vaddq_f32(t00, t10); z31 = vaddq_f32(t01, t11); - z40 = vsubq_f32(t10, t00); z41 = vsubq_f32(t11, t01); - - /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ - /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ - t00 = vfmaq_f32(vaddq_f32(y10, y10), y11, q0_5); - t01 = vfmaq_f32(vaddq_f32(y50, y50), y51, q0_5); - t10 = vfmaq_f32(y21, y20, q4); - t11 = vfmaq_f32(y61, y60, q4); - t00 = vfmaq_f32(t00, y30, qm2_5); - t01 = vfmaq_f32(t01, y70, qm2_5); - t10 = vfmaq_f32(t10, y01, qm5); - t11 = vfmaq_f32(t11, y41, qm5); - - z50 = vaddq_f32(t00, t10); z51 = vaddq_f32(t01, t11); - z60 = vsubq_f32(t10, t00); z61 = vsubq_f32(t11, t01); - } - - const int outstep = _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32*Cg; - - vst1q_f32(outptr, z00); - vst1q_f32(outptr + outstep, z01); - vst1q_f32(outptr + outstep*2, z10); - vst1q_f32(outptr + outstep*3, z11); - vst1q_f32(outptr + outstep*4, z20); - vst1q_f32(outptr + outstep*5, z21); - vst1q_f32(outptr + outstep*6, z30); - vst1q_f32(outptr + outstep*7, z31); - vst1q_f32(outptr + outstep*8, z40); - vst1q_f32(outptr + outstep*9, z41); - vst1q_f32(outptr + outstep*10, z50); - vst1q_f32(outptr + outstep*11, z51); - vst1q_f32(outptr + outstep*12, z60); - vst1q_f32(outptr + outstep*13, z61); - vst1q_f32(outptr + outstep*14, z70); - vst1q_f32(outptr + outstep*15, z71); -#elif CV_SIMD128 - v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4); - v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4); - v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4); - v_float32x4 x30 = v_load(inptr + inpstep*3), x31 = v_load(inptr + inpstep*3 + 4); - v_float32x4 x40 = v_load(inptr + inpstep*4), x41 = v_load(inptr + inpstep*4 + 4); - v_float32x4 x50 = v_load(inptr + inpstep*5), x51 = v_load(inptr + inpstep*5 + 4); - v_float32x4 x60 = v_load(inptr + inpstep*6), x61 = v_load(inptr + inpstep*6 + 4); - v_float32x4 x70 = v_load(inptr + inpstep*7), x71 = v_load(inptr + inpstep*7 + 4); - - v_float32x4 z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51, z60, z61, z70, z71; - - { - /* Y[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*X */ - /* Y[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*X */ - v_float32x4 q5_25 = v_setall_f32(5.25f), t00, t01, t10, t11; - t00 = x40 - x20; - t01 = x41 - x21; - t10 = x30 - x50; - t11 = x31 - x51; - v_float32x4 y00 = v_fma(t00, q5_25, x00 - x60); - v_float32x4 y01 = v_fma(t01, q5_25, x01 - x61); - v_float32x4 y70 = v_fma(t10, q5_25, x70 - x10); - v_float32x4 y71 = v_fma(t11, q5_25, x71 - x11); - - /* Y[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*X */ - /* Y[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*X */ - v_float32x4 qm4_25 = v_setall_f32(-4.25f); - t00 = v_fma(x30, qm4_25, x10 + x50); - t01 = v_fma(x31, qm4_25, x11 + x51); - t10 = v_fma(x40, qm4_25, x20 + x60); - t11 = v_fma(x41, qm4_25, x21 + x61); - - v_float32x4 y10 = t00 + t10, y11 = t01 + t11; - v_float32x4 y20 = t10 - t00, y21 = t11 - t01; - - /* Y[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*X */ - /* Y[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*X */ - v_float32x4 q0_5 = v_setall_f32(0.5f), q0_25 = v_setall_f32(0.25f); - v_float32x4 qm2_5 = v_setall_f32(-2.5f), qm1_25 = v_setall_f32(-1.25f); - t00 = v_fma(x10, q0_5, x50 + x50); - t01 = v_fma(x11, q0_5, x51 + x51); - t10 = v_fma(x20, q0_25, x60); - t11 = v_fma(x21, q0_25, x61); - t00 = v_fma(x30, qm2_5, t00); - t01 = v_fma(x31, qm2_5, t01); - t10 = v_fma(x40, qm1_25, t10); - t11 = v_fma(x41, qm1_25, t11); - - v_float32x4 y30 = t00 + t10, y31 = t01 + t11; - v_float32x4 y40 = t10 - t00, y41 = t11 - t01; - - /* Y[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*X */ - /* Y[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*X */ - v_float32x4 q4 = v_setall_f32(4.f), qm5 = v_setall_f32(-5.f); - t00 = v_fma(x50, q0_5, x10 + x10); - t01 = v_fma(x51, q0_5, x11 + x11); - t10 = v_fma(x20, q4 , x60); - t11 = v_fma(x21, q4 , x61); - t00 = v_fma(x30, qm2_5, t00); - t01 = v_fma(x31, qm2_5, t01); - t10 = v_fma(x40, qm5 , t10); - t11 = v_fma(x41, qm5 , t11); - - v_float32x4 y50 = t00 + t10, y51 = t01 + t11; - v_float32x4 y60 = t10 - t00, y61 = t11 - t01; - - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - /* Y: */ - /* y00 y01 */ - /* y10 y11 */ - /* ... */ - /* y70 y71 */ - /* Y': */ - /* y00 y40 */ - /* y10 y50 */ - /* y20 y60 */ - /* y30 y70 */ - /* y01 y41 */ - /* y11 y51 */ - /* y21 y61 */ - /* y31 y71 */ - /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ - - v_transpose4x4(y00, y10, y20, y30, y00, y10, y20, y30); - v_transpose4x4(y01, y11, y21, y31, y01, y11, y21, y31); - v_transpose4x4(y40, y50, y60, y70, y40, y50, y60, y70); - v_transpose4x4(y41, y51, y61, y71, y41, y51, y61, y71); - - /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ - /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ - t00 = y01 - y20; - t01 = y41 - y60; - t10 = y30 - y11; - t11 = y70 - y51; - z00 = v_fma(t00, q5_25, y00 - y21); - z01 = v_fma(t01, q5_25, y40 - y61); - z70 = v_fma(t10, q5_25, y31 - y10); - z71 = v_fma(t11, q5_25, y71 - y50); - - /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ - /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ - t00 = v_fma(y30, qm4_25, y10 + y11); - t01 = v_fma(y70, qm4_25, y50 + y51); - t10 = v_fma(y01, qm4_25, y20 + y21); - t11 = v_fma(y41, qm4_25, y60 + y61); - - z10 = t00 + t10; z11 = t01 + t11; - z20 = t10 - t00; z21 = t11 - t01; - - /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ - /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ - t00 = v_fma(y10, q0_5, y11 + y11); - t01 = v_fma(y50, q0_5, y51 + y51); - t10 = v_fma(y20, q0_25, y21); - t11 = v_fma(y60, q0_25, y61); - t00 = v_fma(y30, qm2_5, t00); - t01 = v_fma(y70, qm2_5, t01); - t10 = v_fma(y01, qm1_25, t10); - t11 = v_fma(y41, qm1_25, t11); - - z30 = t00 + t10; z31 = t01 + t11; - z40 = t10 - t00; z41 = t11 - t01; - - /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ - /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ - t00 = v_fma(y11, q0_5, y10 + y10); - t01 = v_fma(y51, q0_5, y50 + y50); - t10 = v_fma(y20, q4, y21); - t11 = v_fma(y60, q4, y61); - t00 = v_fma(y30, qm2_5, t00); - t01 = v_fma(y70, qm2_5, t01); - t10 = v_fma(y01, qm5, t10); - t11 = v_fma(y41, qm5, t11); - - z50 = t00 + t10; z51 = t01 + t11; - z60 = t10 - t00; z61 = t11 - t01; - } - - const int outstep = _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32*Cg; - - v_store(outptr, z00); - v_store(outptr + outstep, z01); - v_store(outptr + outstep*2, z10); - v_store(outptr + outstep*3, z11); - v_store(outptr + outstep*4, z20); - v_store(outptr + outstep*5, z21); - v_store(outptr + outstep*6, z30); - v_store(outptr + outstep*7, z31); - v_store(outptr + outstep*8, z40); - v_store(outptr + outstep*9, z41); - v_store(outptr + outstep*10, z50); - v_store(outptr + outstep*11, z51); - v_store(outptr + outstep*12, z60); - v_store(outptr + outstep*13, z61); - v_store(outptr + outstep*14, z70); - v_store(outptr + outstep*15, z71); -#else -#error "Only SIMD128, AVX2 and NEON are supported in Winograd." -#endif -} - -/* Inverse Winograd 8x8 transform: - out = (A'*inp*A)', where - inp is input 8x8 FP32 matrix, - A' is - [1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, - 0.f, 1.f, -1.f, 2.f, -2.f, 0.5f, -0.5f, 0.f, - 0.f, 1.f, 1.f, 4.f, 4.f, 0.25f, 0.25f, 0.f, - 0.f, 1.f, -1.f, 8.f, -8.f, 0.125f, -0.125f, 0.f, - 0.f, 1.f, 1.f, 16.f, 16.f, 1.f/16, 1.f/16, 0.f, - 0.f, 1.f, -1.f, 32.f, -32.f, 1.f/32, -1.f/32, 1.f] - - inp is pre-loaded into xij registers, - out will be stored in zij, where (0<=i<=7 for x, 0<=i<=5 for z), 0<=j<=1. - - After the inverse transform is done, we add bias, - optionally add results from the earlier tensors (by-pass), - optionally apply activation function and then - store the final results. - - Note that both _FX_WINOGRAD_FWD_8x8() and - _FX_WINOGRAD_INV_8x8() produce tranposed output. - That is, after both forward and then inverse transformation, - we get non-transposed result. - Of course, for the correct work of Winograd-based convolution, - the Winograd-transformed weights should also be transposed. - init_conv() (see OpConv.fx) takes care of that. -*/ -static void -_fx_winograd_AtXA_8x8_f32(const float* inptr, int inpstep, - float* bpptr, int bpstep, float* outptr, int outstep, - float bias, float minval, float maxval, bool ifMinMaxAct) -{ -#if CV_NEON && CV_NEON_AARCH64 - float32x4_t x00 = vld1q_f32(inptr), x01 = vld1q_f32(inptr + 4); - float32x4_t x10 = vld1q_f32(inptr + inpstep), x11 = vld1q_f32(inptr + inpstep + 4); - float32x4_t x20 = vld1q_f32(inptr + inpstep*2), x21 = vld1q_f32(inptr + inpstep*2 + 4); - float32x4_t x30 = vld1q_f32(inptr + inpstep*3), x31 = vld1q_f32(inptr + inpstep*3 + 4); - float32x4_t x40 = vld1q_f32(inptr + inpstep*4), x41 = vld1q_f32(inptr + inpstep*4 + 4); - float32x4_t x50 = vld1q_f32(inptr + inpstep*5), x51 = vld1q_f32(inptr + inpstep*5 + 4); - float32x4_t x60 = vld1q_f32(inptr + inpstep*6), x61 = vld1q_f32(inptr + inpstep*6 + 4); - float32x4_t x70 = vld1q_f32(inptr + inpstep*7), x71 = vld1q_f32(inptr + inpstep*7 + 4); - float32x4_t z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51; - - { - float32x4_t s12_0, s12_1, s34_0, s34_1, s56_0, s56_1; - s12_0 = vaddq_f32(x10, x20); s12_1 = vaddq_f32(x11, x21); - s34_0 = vaddq_f32(x30, x40); s34_1 = vaddq_f32(x31, x41); - s56_0 = vaddq_f32(x50, x60); s56_1 = vaddq_f32(x51, x61); - - float32x4_t y00 = vaddq_f32(vaddq_f32(vaddq_f32(x00, s12_0), s34_0), s56_0); - float32x4_t y01 = vaddq_f32(vaddq_f32(vaddq_f32(x01, s12_1), s34_1), s56_1); - float32x4_t y20 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 4.0f), s56_0, 0.25f); - float32x4_t y21 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 4.0f), s56_1, 0.25f); - float32x4_t y40 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 16.0f), s56_0, 1.f/16); - float32x4_t y41 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 16.0f), s56_1, 1.f/16); - - s12_0 = vsubq_f32(x10, x20); s12_1 = vsubq_f32(x11, x21); - s34_0 = vsubq_f32(x30, x40); s34_1 = vsubq_f32(x31, x41); - s56_0 = vsubq_f32(x50, x60); s56_1 = vsubq_f32(x51, x61); - - float32x4_t y50 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(x70, s12_0), - s34_0, 32.f), s56_0, 1.f/32); - float32x4_t y51 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(x71, s12_1), - s34_1, 32.f), s56_1, 1.f/32); - float32x4_t y10 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 2.0f), s56_0, 0.5f); - float32x4_t y11 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 2.0f), s56_1, 0.5f); - float32x4_t y30 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 8.0f), s56_0, 0.125f); - float32x4_t y31 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 8.0f), s56_1, 0.125f); - float32x4_t y60 = vdupq_n_f32(0.f), y61 = y60, y70 = y60, y71 = y60; - - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - /* Y: */ - /* y00 y01 */ - /* y10 y11 */ - /* ... */ - /* y50 y51 */ - /* 0 0 */ - /* 0 0 */ - /* Y': */ - /* y00 y40 */ - /* y10 y50 */ - /* y20 y60 */ - /* y30 y70 */ - /* y01 y41 */ - /* y11 y51 */ - /* y21 y61 */ - /* y31 y71 */ - /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ - float32x4x2_t tr0, tr1; - - T4x4(y00, y10, y20, y30, tr0, tr1); - T4x4(y01, y11, y21, y31, tr0, tr1); - T4x4(y40, y50, y60, y70, tr0, tr1); - T4x4(y41, y51, y61, y71, tr0, tr1); - - s12_0 = vaddq_f32(y10, y20); s12_1 = vaddq_f32(y50, y60); - s34_0 = vaddq_f32(y30, y01); s34_1 = vaddq_f32(y70, y41); - s56_0 = vaddq_f32(y11, y21); s56_1 = vaddq_f32(y51, y61); - - z00 = vaddq_f32(vaddq_f32(vaddq_f32(y00, s12_0), s34_0), s56_0); - z01 = vaddq_f32(vaddq_f32(vaddq_f32(y40, s12_1), s34_1), s56_1); - z20 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 4.0f), s56_0, 0.25f); - z21 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 4.0f), s56_1, 0.25f); - z40 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 16.0f), s56_0, 1.f/16); - z41 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 16.0f), s56_1, 1.f/16); - - s12_0 = vsubq_f32(y10, y20); s12_1 = vsubq_f32(y50, y60); - s34_0 = vsubq_f32(y30, y01); s34_1 = vsubq_f32(y70, y41); - s56_0 = vsubq_f32(y11, y21); s56_1 = vsubq_f32(y51, y61); - - z50 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(y31, s12_0), - s34_0, 32.f), s56_0, 1.f/32); - z51 = vfmaq_n_f32(vfmaq_n_f32(vaddq_f32(y71, s12_1), - s34_1, 32.f), s56_1, 1.f/32); - z10 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 2.0f), s56_0, 0.5f); - z11 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 2.0f), s56_1, 0.5f); - z30 = vfmaq_n_f32(vfmaq_n_f32(s12_0, s34_0, 8.0f), s56_0, 0.125f); - z31 = vfmaq_n_f32(vfmaq_n_f32(s12_1, s34_1, 8.0f), s56_1, 0.125f); - float32x4_t vbias = vdupq_n_f32(bias); - - z00 = vaddq_f32(z00, vbias); - z01 = vaddq_f32(z01, vbias); - z10 = vaddq_f32(z10, vbias); - z11 = vaddq_f32(z11, vbias); - z20 = vaddq_f32(z20, vbias); - z21 = vaddq_f32(z21, vbias); - z30 = vaddq_f32(z30, vbias); - z31 = vaddq_f32(z31, vbias); - z40 = vaddq_f32(z40, vbias); - z41 = vaddq_f32(z41, vbias); - z50 = vaddq_f32(z50, vbias); - z51 = vaddq_f32(z51, vbias); - } - - if (bpptr) - { - float32x2_t zhalf = vdup_n_f32(0.f); - z00 = vaddq_f32(z00, vld1q_f32(bpptr)); - z01 = vaddq_f32(z01, vcombine_f32(vld1_f32(bpptr + 4), zhalf)); - z10 = vaddq_f32(z10, vld1q_f32(bpptr + bpstep)); - z11 = vaddq_f32(z11, vcombine_f32(vld1_f32(bpptr + bpstep + 4), zhalf)); - z20 = vaddq_f32(z20, vld1q_f32(bpptr + bpstep*2)); - z21 = vaddq_f32(z21, vcombine_f32(vld1_f32(bpptr + bpstep*2 + 4), zhalf)); - z30 = vaddq_f32(z30, vld1q_f32(bpptr + bpstep*3)); - z31 = vaddq_f32(z31, vcombine_f32(vld1_f32(bpptr + bpstep*3 + 4), zhalf)); - z40 = vaddq_f32(z40, vld1q_f32(bpptr + bpstep*4)); - z41 = vaddq_f32(z41, vcombine_f32(vld1_f32(bpptr + bpstep*4 + 4), zhalf)); - z50 = vaddq_f32(z50, vld1q_f32(bpptr + bpstep*5)); - z51 = vaddq_f32(z51, vcombine_f32(vld1_f32(bpptr + bpstep*5 + 4), zhalf)); - } - - if (ifMinMaxAct) - { - float32x4_t vmax = vdupq_n_f32(maxval); - float32x4_t vmin = vdupq_n_f32(minval); - - z00 = vminq_f32(vmaxq_f32(z00, vmin), vmax); - z01 = vminq_f32(vmaxq_f32(z01, vmin), vmax); - z10 = vminq_f32(vmaxq_f32(z10, vmin), vmax); - z11 = vminq_f32(vmaxq_f32(z11, vmin), vmax); - z20 = vminq_f32(vmaxq_f32(z20, vmin), vmax); - z21 = vminq_f32(vmaxq_f32(z21, vmin), vmax); - z30 = vminq_f32(vmaxq_f32(z30, vmin), vmax); - z31 = vminq_f32(vmaxq_f32(z31, vmin), vmax); - z40 = vminq_f32(vmaxq_f32(z40, vmin), vmax); - z41 = vminq_f32(vmaxq_f32(z41, vmin), vmax); - z50 = vminq_f32(vmaxq_f32(z50, vmin), vmax); - z51 = vminq_f32(vmaxq_f32(z51, vmin), vmax); - } - - vst1q_f32(outptr, z00); - vst1_f32(outptr + 4, vget_low_f32(z01)); - vst1q_f32(outptr + outstep, z10); - vst1_f32(outptr + outstep + 4, vget_low_f32(z11)); - vst1q_f32(outptr + outstep*2, z20); - vst1_f32(outptr + outstep*2 + 4, vget_low_f32(z21)); - vst1q_f32(outptr + outstep*3, z30); - vst1_f32(outptr + outstep*3 + 4, vget_low_f32(z31)); - vst1q_f32(outptr + outstep*4, z40); - vst1_f32(outptr + outstep*4 + 4, vget_low_f32(z41)); - vst1q_f32(outptr + outstep*5, z50); - vst1_f32(outptr + outstep*5 + 4, vget_low_f32(z51)); -#elif CV_SIMD128 - v_float32x4 x00 = v_load(inptr), x01 = v_load(inptr + 4); - v_float32x4 x10 = v_load(inptr + inpstep), x11 = v_load(inptr + inpstep + 4); - v_float32x4 x20 = v_load(inptr + inpstep*2), x21 = v_load(inptr + inpstep*2 + 4); - v_float32x4 x30 = v_load(inptr + inpstep*3), x31 = v_load(inptr + inpstep*3 + 4); - v_float32x4 x40 = v_load(inptr + inpstep*4), x41 = v_load(inptr + inpstep*4 + 4); - v_float32x4 x50 = v_load(inptr + inpstep*5), x51 = v_load(inptr + inpstep*5 + 4); - v_float32x4 x60 = v_load(inptr + inpstep*6), x61 = v_load(inptr + inpstep*6 + 4); - v_float32x4 x70 = v_load(inptr + inpstep*7), x71 = v_load(inptr + inpstep*7 + 4); - v_float32x4 z00, z01, z10, z11, z20, z21, z30, z31, z40, z41, z50, z51; - - { - v_float32x4 s12_0, s12_1, s34_0, s34_1, s56_0, s56_1; - s12_0 = x10 + x20; s12_1 = x11 + x21; - s34_0 = x30 + x40; s34_1 = x31 + x41; - s56_0 = x50 + x60; s56_1 = x51 + x61; - - v_float32x4 y00 = x00 + s12_0 + s34_0 + s56_0; - v_float32x4 y01 = x01 + s12_1 + s34_1 + s56_1; - - v_float32x4 a0 = v_setall_f32(0.25f), a1 = v_setall_f32(4.0f); - v_float32x4 y20 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - v_float32x4 y21 = v_fma(s56_1, a0 ,v_fma(s34_1, a1, s12_1) ); - - a0 = v_setall_f32(1.f/16), a1 = v_setall_f32(16.0f); - v_float32x4 y40 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - v_float32x4 y41 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - s12_0 = x10 - x20; s12_1 = x11 - x21; - s34_0 = x30 - x40; s34_1 = x31 - x41; - s56_0 = x50 - x60; s56_1 = x51 - x61; - - a0 = v_setall_f32(1.f/32), a1 = v_setall_f32(32.f); - v_float32x4 y50 = v_fma(s56_0, a0, v_fma(s34_0, a1, x70 + s12_0)); - v_float32x4 y51 = v_fma(s56_1, a0, v_fma(s34_1, a1, x71 + s12_1)); - - a0 = v_setall_f32(0.5f), a1 = v_setall_f32(2.f); - v_float32x4 y10 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - v_float32x4 y11 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - a0 = v_setall_f32(0.125f), a1 = v_setall_f32(8.f); - v_float32x4 y30 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - v_float32x4 y31 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - v_float32x4 y60 = v_setall_f32(0.f), y61 = y60, y70 = y60, y71 = y60; - - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - /* Y: */ - /* y00 y01 */ - /* y10 y11 */ - /* ... */ - /* y50 y51 */ - /* 0 0 */ - /* 0 0 */ - /* Y': */ - /* y00 y40 */ - /* y10 y50 */ - /* y20 y60 */ - /* y30 y70 */ - /* y01 y41 */ - /* y11 y51 */ - /* y21 y61 */ - /* y31 y71 */ - /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ - - v_transpose4x4(y00, y10, y20, y30, y00, y10, y20, y30); - v_transpose4x4(y01, y11, y21, y31, y01, y11, y21, y31); - v_transpose4x4(y40, y50, y60, y70, y40, y50, y60, y70); - v_transpose4x4(y41, y51, y61, y71, y41, y51, y61, y71); - - s12_0 = y10 + y20; s12_1 = y50 + y60; - s34_0 = y30 + y01; s34_1 = y70 + y41; - s56_0 = y11 + y21; s56_1 = y51 + y61; - - z00 = y00 + s12_0 + s34_0 + s56_0; - z01 = y40 + s12_1 + s34_1 + s56_1; - - a0 = v_setall_f32(0.25f), a1 = v_setall_f32(4.0f); - z20 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - z21 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - a0 = v_setall_f32(1.f/16), a1 = v_setall_f32(16.0f); - z40 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - z41 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - s12_0 = y10 - y20; s12_1 = y50 - y60; - s34_0 = y30 - y01; s34_1 = y70 - y41; - s56_0 = y11 - y21; s56_1 = y51 - y61; - - a0 = v_setall_f32(1.f/32), a1 = v_setall_f32(32.0f); - z50 = v_fma(s56_0, a0, v_fma(s34_0, a1, y31 + s12_0)); - z51 = v_fma(s56_1, a0, v_fma(s34_1, a1, y71 + s12_1)); - - a0 = v_setall_f32(0.5f), a1 = v_setall_f32(2.0f); - z10 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - z11 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - a0 = v_setall_f32(0.125f), a1 = v_setall_f32(8.0f); - z30 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); - z31 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - - v_float32x4 vbias = v_setall_f32(bias); - z00 += vbias; - z01 += vbias; - z10 += vbias; - z11 += vbias; - z20 += vbias; - z21 += vbias; - z30 += vbias; - z31 += vbias; - z40 += vbias; - z41 += vbias; - z50 += vbias; - z51 += vbias; - } - - if (bpptr) - { - z00 += v_load(bpptr); - z01 += v_load_low(bpptr + 4); - z10 += v_load(bpptr + bpstep); - z11 += v_load_low(bpptr + bpstep + 4); - z20 += v_load(bpptr + bpstep*2); - z21 += v_load_low(bpptr + bpstep*2 + 4); - z30 += v_load(bpptr + bpstep*3); - z31 += v_load_low(bpptr + bpstep*3 + 4); - z40 += v_load(bpptr + bpstep*4); - z41 += v_load_low(bpptr + bpstep*4 + 4); - z50 += v_load(bpptr + bpstep*5); - z51 += v_load_low(bpptr + bpstep*5 + 4); - } - - if (ifMinMaxAct) - { - v_float32x4 vmax = v_setall_f32(maxval); - v_float32x4 vmin = v_setall_f32(minval); - - z00 = v_min(v_max(z00, vmin), vmax); - z01 = v_min(v_max(z01, vmin), vmax); - z10 = v_min(v_max(z10, vmin), vmax); - z11 = v_min(v_max(z11, vmin), vmax); - z20 = v_min(v_max(z20, vmin), vmax); - z21 = v_min(v_max(z21, vmin), vmax); - z30 = v_min(v_max(z30, vmin), vmax); - z31 = v_min(v_max(z31, vmin), vmax); - z40 = v_min(v_max(z40, vmin), vmax); - z41 = v_min(v_max(z41, vmin), vmax); - z50 = v_min(v_max(z50, vmin), vmax); - z51 = v_min(v_max(z51, vmin), vmax); - } - - v_store(outptr, z00); - v_store_low(outptr + 4, z01); - v_store(outptr + outstep, z10); - v_store_low(outptr + outstep + 4, z11); - v_store(outptr + outstep*2, z20); - v_store_low(outptr + outstep*2 + 4, z21); - v_store(outptr + outstep*3, z30); - v_store_low(outptr + outstep*3 + 4, z31); - v_store(outptr + outstep*4, z40); - v_store_low(outptr + outstep*4 + 4, z41); - v_store(outptr + outstep*5, z50); - v_store_low(outptr + outstep*5 + 4, z51); -#else -#error "Only SIMD128, AVX2 and NEON are supported in Winograd." -#endif -} - -int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr& conv, - int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct) -{ - Mat input = _input.getMat(); - Mat output = _output.getMat(); - Mat fusedAddMat = _fusedAddMat.getMat(); - - MatShape inputShape = shape(input); - MatShape outputShape = shape(output); - CV_Assert(inputShape.size() == 4 && outputShape.size() == 4); - - int N = inputShape[0], C = inputShape[1], Hi = inputShape[2], Wi = inputShape[3]; // [N, C, H, W] - int K = conv->K; - int H0 = outputShape[2], W0 = outputShape[3]; - - int pad_top = conv->pad_top; - int pad_left = conv->pad_left; - - int ngroups = conv->ngroups, Cg = C/ngroups, Kg = K/ngroups; - int Kg_nblocks = (Kg + _FX_WINO_KBLOCK - 1)/_FX_WINO_KBLOCK; - const size_t inp_planesize = (size_t)Hi*Wi; - const size_t out_planesize = (size_t)H0*W0; - - int blocks_per_row = (W0+_FX_WINO_STEP-1)/_FX_WINO_STEP; - int blocks_per_plane = ((H0+_FX_WINO_STEP-1)/_FX_WINO_STEP)*blocks_per_row; - int blocks_per_plane_aligned = ((blocks_per_plane + - _FX_WINO_IBLOCK-1)/_FX_WINO_IBLOCK)*_FX_WINO_IBLOCK; - - size_t totalbufsize = (size_t)N*C*blocks_per_plane_aligned*_FX_WINO_AREA; - - AutoBuffer _buf; - _buf.allocate(totalbufsize + VEC_ALIGN); - float* wbuf_all = alignPtr(_buf.data(), VEC_ALIGN); - - float* inp = input.ptr(); - float* out = output.ptr(); - - float* fusedAddPtr = fusedAddMat.empty() ? nullptr : fusedAddMat.ptr(); - - // Phase 1. compute forward Winograd transforms for all input blocks, - // all input planes, all samples in the batch. - // [TODO]: maybe, if there are too many input channels, it makes sense to - // transform only part of input channels at once and then compute the partial - // accumulated sums (i.e. update the output buffers several times, - // rather than compute them in one pass). - parallel_for_(Range(0, ntasks), [&](const Range& r0) { - for (int task_id = r0.start; task_id < r0.end; task_id++) - { - int nc0 = (N*C)*task_id/ntasks; - int nc1 = (N*C)*(task_id+1)/ntasks; - for(; nc0 < nc1; nc0++) - { - int n = nc0 / C; - int c = nc0 - n*C; - int g = c / Cg; - c -= g*Cg; - for (int block_id = 0; block_id < blocks_per_plane; block_id += _FX_WINO_IBLOCK) - { - for (int db = 0; db < _FX_WINO_IBLOCK; db++) - { - size_t inwofs = ((n*ngroups + g)*blocks_per_plane_aligned + - block_id)*Cg*_FX_WINO_AREA + - (c*_FX_WINO_IBLOCK + db)*_FX_WINO_ATOM_F32; - float* inwptr = (float*)wbuf_all + inwofs; - - if (block_id + db < blocks_per_plane) - { - int y0 = (block_id + db) / blocks_per_row; - int x0 = (block_id + db) - y0 * blocks_per_row; - y0 = y0*_FX_WINO_STEP - pad_top; - x0 = x0*_FX_WINO_STEP - pad_left; - bool partial = y0 < 0 || y0 + _FX_WINO_SIZE > Hi || - x0 < 0 || x0 + _FX_WINO_SIZE > Wi; - int dx1 = 0, dx2 = _FX_WINO_SIZE, dy1 = 0, dy2 = _FX_WINO_SIZE; - int inpstep = Wi; - - float inpbuf[_FX_WINO_AREA]; - float* inptr0 = (float*)inp + nc0*inp_planesize + y0*Wi + x0; - float* inptr = inptr0; - - if (partial) - { - memset(inpbuf, 0, sizeof(inpbuf)); - dy1 = -y0 > 0 ? -y0 : 0; - dy2 = Hi - y0 < _FX_WINO_SIZE ? Hi - y0 : _FX_WINO_SIZE; - - if (dy2 < dy1) {dy2 = dy1 = 0;} - dx1 = -x0 > 0 ? -x0 : 0; - dx2 = Wi - x0 < _FX_WINO_SIZE ? Wi - x0 : _FX_WINO_SIZE; - - if (dx2 < dx1) {dx2 = dx1 = 0;} - inptr0 -= y0*Wi + x0; - - if (dx1 < dx2 && dy1 < dy2) - { - for(int dy = dy1; dy < dy2; dy++) - memcpy(&inpbuf[dy*_FX_WINO_SIZE + dx1], - inptr0 + (y0+dy)*Wi + (x0+dx1), - (dx2-dx1)*sizeof(inpbuf[0])); - } - - inptr = inpbuf; - inpstep = _FX_WINO_SIZE; - } -#if CV_TRY_AVX2 - if (conv->useAVX2) - opt_AVX2::_fx_winograd_BtXB_8x8_f32(inptr, inpstep, inwptr, Cg); - else -#endif - _fx_winograd_BtXB_8x8_f32(inptr, inpstep, inwptr, Cg); - } - else - { - for (int i = 0; i < _FX_WINO_NATOMS_F32; i++, inwptr += _FX_WINO_IBLOCK*_FX_WINO_ATOM_F32) - memset(inwptr, 0, _FX_WINO_ATOM_F32*sizeof(inwptr[0])); - } - } - } - } - }}); - - // Phase 2. compute elemwise-weighted sums of transformed blocks, - // apply inverse Winograd transforms to the sums, - // add bias, apply activation function if any and store the results. - parallel_for_(Range(0, ntasks), [&](const Range& r0) { - for (int task_id = r0.start; task_id < r0.end; task_id++) - { - size_t out_wbuf_size = _FX_WINO_AREA*_FX_WINO_KBLOCK*_FX_WINO_IBLOCK; - size_t outbuf_size = _FX_WINO_AREA; - AutoBuffer out_wbuf_, outbuf_; - out_wbuf_.allocate(out_wbuf_size + VEC_ALIGN); - float* out_wbuf = alignPtr(out_wbuf_.data(), VEC_ALIGN); - outbuf_.allocate(outbuf_size + VEC_ALIGN); - float* outbuf = alignPtr(outbuf_.data(), VEC_ALIGN); - - memset(out_wbuf, 0, out_wbuf_size * sizeof(float)); - memset(outbuf, 0, outbuf_size * sizeof(float)); - - int ngk0 = (int)(((int64_t)N*Kg_nblocks*ngroups)*task_id/ntasks); - int ngk1 = (int)(((int64_t)N*Kg_nblocks*ngroups)*(task_id+1)/ntasks); - - for(; ngk0 < ngk1; ngk0++) - { - int n = ngk0 / (Kg_nblocks*ngroups); - int gk0 = ngk0 % (Kg_nblocks*ngroups); - int g = gk0 / Kg_nblocks; - int k0 = (gk0 % Kg_nblocks)*_FX_WINO_KBLOCK; - int k1 = k0 + _FX_WINO_KBLOCK <= Kg ? k0 + _FX_WINO_KBLOCK : Kg; - - for (int block_id0 = 0; block_id0 < blocks_per_plane; block_id0 += _FX_WINO_IBLOCK) - { - int block_id1 = block_id0 + _FX_WINO_IBLOCK; - block_id1 = block_id1 < blocks_per_plane ? block_id1 : blocks_per_plane; - size_t inwofs = ((n*ngroups + g)*blocks_per_plane_aligned + block_id0)*Cg*_FX_WINO_AREA; - size_t wofs = (g*Kg_nblocks*_FX_WINO_KBLOCK + k0)*Cg*_FX_WINO_AREA; - - float* inwptr = wbuf_all + inwofs; - const float* wptr = conv->weightsWinoBufPtr + wofs; - -#if CV_TRY_AVX2 - if (conv->useAVX2) - opt_AVX2::_fx_winograd_accum_f32(inwptr, wptr, out_wbuf, Cg, block_id1 - block_id0); - else -#endif - _fx_winograd_accum_f32(inwptr, wptr, out_wbuf, Cg, block_id1 - block_id0); - for (int k = k0; k < k1; k++) - { - float biasv = conv->biasBuf[g*Kg + k]; - for (int block_id = block_id0; block_id < block_id1; block_id++) - { - int y0 = block_id / blocks_per_row; - int x0 = block_id - y0 * blocks_per_row; - y0 = y0*_FX_WINO_STEP; - x0 = x0*_FX_WINO_STEP; - int dy1 = H0 - y0; - if (dy1 > _FX_WINO_STEP) dy1 = _FX_WINO_STEP; - int dx1 = W0 - x0; - if (dx1 > _FX_WINO_STEP) dx1 = _FX_WINO_STEP; - assert(dx1 > 0 && dy1 > 0); - bool partial = activ || dy1 < _FX_WINO_STEP || dx1 < _FX_WINO_STEP; - size_t outofs = (n*K + g*Kg + k)*out_planesize + y0*W0 + x0; - int outstep = W0; - - float* outptr0 = (float*)out + outofs; - float* pbptr0 = fusedAddPtr ? fusedAddPtr + outofs : nullptr; - float *outptr = outptr0, *bpptr = pbptr0; - - if (partial) - { - outptr = outbuf; - outstep = _FX_WINO_SIZE; - if (pbptr0) - { - bpptr = outbuf; - for (int y = 0; y < dy1; y++) - memcpy(outbuf + y*_FX_WINO_SIZE, pbptr0 + y*W0, - dx1*sizeof(pbptr0[0])); - } - } -#if CV_TRY_AVX2 - if (conv->useAVX2) - opt_AVX2::_fx_winograd_AtXA_8x8_f32(out_wbuf + ((k - k0)*_FX_WINO_IBLOCK + (block_id - block_id0))*_FX_WINO_AREA, _FX_WINO_SIZE, - bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct); - else -#endif - _fx_winograd_AtXA_8x8_f32(out_wbuf + ((k - k0)*_FX_WINO_IBLOCK + (block_id - block_id0))*_FX_WINO_AREA, _FX_WINO_SIZE, - bpptr, outstep, outptr, outstep, biasv, minval, maxval, ifMinMaxAct); - if (partial) - { - if (activ) - activ->forwardSlice(outptr, outptr, _FX_WINO_SIZE*_FX_WINO_STEP, 0, g*Kg + k, g*Kg + k + 1); - for (int y = 0; y < dy1; y++) - memcpy(outptr0 + y*W0, outptr + y*_FX_WINO_SIZE,dx1*sizeof(outptr0[0])); - } - } - } - } - } - }}); - return 1; -} - -#else - -int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _output, const Ptr& conv, - int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct) -{ - return 0; -} -#endif -}} // namespace cv::dnn diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp index eb1735639e..4bae86911c 100644 --- a/modules/dnn/src/layers/layers_common.simd.hpp +++ b/modules/dnn/src/layers/layers_common.simd.hpp @@ -46,16 +46,6 @@ namespace cv { namespace dnn { CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN -void fastDepthwiseConv( const float* weights, - int kernel_h, int kernel_w, - int stride_h, int stride_w, - int dilation_h, int dilation_w, - int pad_t, int pad_l, - const float* bias, const float* relu, - const float* inptr, - int height, int width, - float* outptr, - int out_d, int outH, int outW ); void fastGEMM1T( const float* vec, const float* weights, size_t wstep, const float* bias, float* dst, int nvecs, int vecsize ); @@ -70,185 +60,6 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr, #define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b)) #endif -static inline void _mm256_load_deinterleave(const float* ptr, __m256& a, __m256& b) -{ - __m256 t0 = _mm256_loadu_ps(ptr); - __m256 t1 = _mm256_loadu_ps(ptr + 8); - - __m256 lo = _mm256_permute2f128_ps(t0, t1, 0+2*16); - __m256 hi = _mm256_permute2f128_ps(t0, t1, 1+3*16); - a = _mm256_shuffle_ps(lo, hi, 0x88); - b = _mm256_shuffle_ps(lo, hi, 0xdd); -} - -void fastDepthwiseConv( const float* wptr, - int kernel_h, int kernel_w, - int stride_h, int stride_w, - int dilation_h, int dilation_w, - int pad_t, int pad_l, - const float* biasptr, const float* relu, - const float* inptr_, - int height, int width, - float* outptr_, - int out_d, int outH, int outW ) -{ - const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], - w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], - w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; - int outW1 = min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); - float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; - - for (int out_i = 0; out_i < outH; out_i++) - { - int in_i = out_i * stride_h - pad_t, out_j = 0; - const float* imgptr0 = inptr_ + in_i*width; - const float* imgptr1 = imgptr0 + dilation_h*width; - const float* imgptr2 = imgptr0 + (dilation_h*2)*width; - float out, w00 = w00_, w01 = w01_, w02 = w02_; - float w20 = w20_, w21 = w21_, w22 = w22_; - if (in_i < 0) - { - w00 = w01 = w02 = 0.f; - imgptr0 = imgptr1; - } - else if (in_i + dilation_h*(kernel_h-1) >= height) - { - w20 = w21 = w22 = 0.f; - imgptr2 = imgptr1; - } - float* outptr = outptr_ + out_i*outW; - if (pad_l > 0) - { - out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + - imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + - imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[0] = out; - out_j = 1; - } - - if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) - { - const int VECSZ = 8; - __m256 vw00 = _mm256_set1_ps(w00), vw01 = _mm256_set1_ps(w01), vw02 = _mm256_set1_ps(w02), - vw10 = _mm256_set1_ps(w10), vw11 = _mm256_set1_ps(w11), vw12 = _mm256_set1_ps(w12), - vw20 = _mm256_set1_ps(w20), vw21 = _mm256_set1_ps(w21), vw22 = _mm256_set1_ps(w22); - __m256 z = _mm256_setzero_ps(), vbias = _mm256_set1_ps(bias), vrc = _mm256_set1_ps(relu_coeff); - - if( stride_w == 1 ) - for( ; out_j < outW1; out_j += VECSZ ) - { - if (out_j + VECSZ > outW1 && out_j > pad_l) - out_j = outW1 - VECSZ; - int in_j = out_j * stride_w - pad_l; - __m256 v00 = _mm256_loadu_ps(imgptr0 + in_j), - v01 = _mm256_loadu_ps(imgptr0 + in_j + dilation_w), - v02 = _mm256_loadu_ps(imgptr0 + in_j + dilation_w*2), - v10 = _mm256_loadu_ps(imgptr1 + in_j), - v11 = _mm256_loadu_ps(imgptr1 + in_j + dilation_w), - v12 = _mm256_loadu_ps(imgptr1 + in_j + dilation_w*2), - v20 = _mm256_loadu_ps(imgptr2 + in_j), - v21 = _mm256_loadu_ps(imgptr2 + in_j + dilation_w), - v22 = _mm256_loadu_ps(imgptr2 + in_j + dilation_w*2); - - __m256 vout0 = _mm256_fmadd_ps(v00, vw00, vbias); - __m256 vout1 = _mm256_mul_ps(v01, vw01); - __m256 vout2 = _mm256_mul_ps(v02, vw02); - - vout0 = _mm256_fmadd_ps(v10, vw10, vout0); - vout1 = _mm256_fmadd_ps(v11, vw11, vout1); - vout2 = _mm256_fmadd_ps(v12, vw12, vout2); - - vout0 = _mm256_fmadd_ps(v20, vw20, vout0); - vout1 = _mm256_fmadd_ps(v21, vw21, vout1); - vout2 = _mm256_fmadd_ps(v22, vw22, vout2); - - vout0 = _mm256_add_ps(_mm256_add_ps(vout0, vout1), vout2); - if (relu) - { - __m256 m = _mm256_cmp_ps(vout0, z, _CMP_GT_OQ); - vout0 = _mm256_blendv_ps(_mm256_mul_ps(vout0, vrc), vout0, m); - } - _mm256_storeu_ps(outptr + out_j, vout0); - } - else - for( ; out_j < outW1; out_j += VECSZ ) - { - if (out_j + VECSZ > outW1 && out_j > pad_l) - out_j = outW1 - VECSZ; - int in_j = out_j * stride_w - pad_l; - __m256 v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; - _mm256_load_deinterleave(imgptr0 + in_j, v00, v01); - _mm256_load_deinterleave(imgptr0 + in_j + 2, v02, unused); - _mm256_load_deinterleave(imgptr1 + in_j, v10, v11); - _mm256_load_deinterleave(imgptr1 + in_j + 2, v12, unused); - _mm256_load_deinterleave(imgptr2 + in_j, v20, v21); - _mm256_load_deinterleave(imgptr2 + in_j + 2, v22, unused); - - __m256 vout0 = _mm256_fmadd_ps(v00, vw00, vbias); - __m256 vout1 = _mm256_mul_ps(v01, vw01); - __m256 vout2 = _mm256_mul_ps(v02, vw02); - - vout0 = _mm256_fmadd_ps(v10, vw10, vout0); - vout1 = _mm256_fmadd_ps(v11, vw11, vout1); - vout2 = _mm256_fmadd_ps(v12, vw12, vout2); - - vout0 = _mm256_fmadd_ps(v20, vw20, vout0); - vout1 = _mm256_fmadd_ps(v21, vw21, vout1); - vout2 = _mm256_fmadd_ps(v22, vw22, vout2); - - vout0 = _mm256_add_ps(_mm256_add_ps(vout0, vout1), vout2); - if (relu) - { - __m256 m = _mm256_cmp_ps(vout0, z, _CMP_GT_OQ); - vout0 = _mm256_blendv_ps(_mm256_mul_ps(vout0, vrc), vout0, m); - } - _mm256_storeu_ps(outptr + out_j, vout0); - } - } - - for (; out_j < outW1; out_j++) - { - int in_j = out_j * stride_w - pad_l; - out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + - imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + - imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[out_j] = out; - } - - for (; out_j < outW; out_j++ ) - { - int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; - float s0 = 1.f, s1 = 1.f, s2 = 1.f; - if (in_j0 >= width) - { - in_j0 = 0; - s0 = 0.f; - } - if (in_j1 >= width) - { - in_j1 = 0; - s1 = 0.f; - } - if (in_j2 >= width) - { - in_j2 = 0; - s2 = 0.f; - } - out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + - imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + - imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[out_j] = out; - } - } - _mm256_zeroupper(); -} - // Used to generate the mask used when calculating tails static const uint32_t tailMaskArray[15] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -654,382 +465,10 @@ void fastGEMM1T( const float* vec, const float* weights, } } -/* -Example for load_deinterleave: - input: ptr[16] = {1,2,3, ... ,14,15,16} - output: a = {1, 3, 5, 7, 9, 11, 13, 15} - output: b = {2, 4, 6, 8,10, 12, 14, 16} -*/ -static inline void vfloat32m2_load_deinterleave(const float* ptr, vfloat32m2_t& a, vfloat32m2_t& b, int vl) -{ - vuint64m4_t mask = vmv_v_x_u64m4(1,vl*2); - vuint32m4_t mask_re = vreinterpret_v_u64m4_u32m4(mask); - vbool8_t mask0 = vmseq_vx_u32m4_b8 (mask_re, 1, vl*2); - vbool8_t mask1 = vmseq_vx_u32m4_b8 (mask_re, 0, vl*2); - vfloat32m4_t tempa = vundefined_f32m4(), tempb = vundefined_f32m4(); - vfloat32m4_t vw = vle32_v_f32m4(ptr, vl*2); - tempa = vcompress_vm_f32m4(mask0, tempa, vw, vl*2); - tempb = vcompress_vm_f32m4(mask1, tempb, vw, vl*2); - /* The following instructions have not to be supported by the GNU toolchain. - So we temporarily use store and load instead. - // a = vlmul_trunc_v_f32m4_f32m2(tempa); - // b = vlmul_trunc_v_f32m4_f32m2(tempb); - */ - cv::AutoBuffer cvBuffer(sizeof(float)*vl*2); - float* buffer = (float*)cvBuffer.data(); - vse32_v_f32m4(buffer, tempa, vl); - a = vle32_v_f32m2(buffer, vl); - vse32_v_f32m4(buffer, tempb, vl); - b = vle32_v_f32m2(buffer, vl); -} - -void fastDepthwiseConv( const float* wptr, - int kernel_h, int kernel_w, - int stride_h, int stride_w, - int dilation_h, int dilation_w, - int pad_t, int pad_l, - const float* biasptr, const float* relu, - const float* inptr_, - int height, int width, - float* outptr_, - int out_d, int outH, int outW ) -{ - int vl; - const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], - w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], - w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; - int outW1 = std::min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); - float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; - - for (int out_i = 0; out_i < outH; out_i++) - { - int in_i = out_i * stride_h - pad_t, out_j = 0; - const float* imgptr0 = inptr_ + in_i*width; - const float* imgptr1 = imgptr0 + dilation_h*width; - const float* imgptr2 = imgptr0 + (dilation_h*2)*width; - float out, w00 = w00_, w01 = w01_, w02 = w02_; - float w20 = w20_, w21 = w21_, w22 = w22_; - if (in_i < 0) - { - w00 = w01 = w02 = 0.f; - imgptr0 = imgptr1; - } - else if (in_i + dilation_h*(kernel_h-1) >= height) - { - w20 = w21 = w22 = 0.f; - imgptr2 = imgptr1; - } - float* outptr = outptr_ + out_i*outW; - if (pad_l > 0) - { - out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + - imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + - imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[0] = out; - out_j = 1; - } - - if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) - { - int avl = outW1 - out_j; - if( stride_w == 1 ) - for( ; out_j < outW1; out_j += vl, avl -= vl) - { - vl = vsetvl_e32m2(avl); - int in_j = out_j * stride_w - pad_l; - vfloat32m2_t v00 = vle32_v_f32m2(imgptr0 + in_j, vl), - v01 = vle32_v_f32m2(imgptr0 + in_j + dilation_w, vl), - v02 = vle32_v_f32m2(imgptr0 + in_j + dilation_w*2, vl), - v10 = vle32_v_f32m2(imgptr1 + in_j, vl), - v11 = vle32_v_f32m2(imgptr1 + in_j + dilation_w, vl), - v12 = vle32_v_f32m2(imgptr1 + in_j + dilation_w*2, vl), - v20 = vle32_v_f32m2(imgptr2 + in_j, vl), - v21 = vle32_v_f32m2(imgptr2 + in_j + dilation_w, vl), - v22 = vle32_v_f32m2(imgptr2 + in_j + dilation_w*2, vl); - - vfloat32m2_t vout0 = vfmul_vf_f32m2(v00, w00, vl); - vfloat32m2_t vout1 = vfmul_vf_f32m2(v01, w01, vl); - vfloat32m2_t vout2 = vfmul_vf_f32m2(v02, w02, vl); - vout0 = vfadd_vf_f32m2(vout0, bias, vl); - - vout0 = vfmacc_vf_f32m2(vout0, w10, v10, vl); - vout1 = vfmacc_vf_f32m2(vout1, w11, v11, vl); - vout2 = vfmacc_vf_f32m2(vout2, w12, v12, vl); - - vout0 = vfmacc_vf_f32m2(vout0, w20, v20, vl); - vout1 = vfmacc_vf_f32m2(vout1, w21, v21, vl); - vout2 = vfmacc_vf_f32m2(vout2, w22, v22, vl); - - vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl); - if (relu) - { - vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl); - vout0 = vmerge_vvm_f32m2(m, vfmul_vf_f32m2(vout0, relu_coeff, vl), vout0, vl); - } - vse32_v_f32m2(outptr + out_j, vout0, vl); - } - else //stride_w == 2 && dilation_w == 1 - for( ; out_j < outW1; out_j += vl, avl -= vl) - { - vl = vsetvl_e32m2(avl); - int in_j = out_j * stride_w - pad_l; - vfloat32m2_t v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; - vfloat32m2_load_deinterleave(imgptr0 + in_j, v00, v01, vl); - vfloat32m2_load_deinterleave(imgptr0 + in_j + 2, v02, unused, vl); - vfloat32m2_load_deinterleave(imgptr1 + in_j, v10, v11, vl); - vfloat32m2_load_deinterleave(imgptr1 + in_j + 2, v12, unused, vl); - vfloat32m2_load_deinterleave(imgptr2 + in_j, v20, v21, vl); - vfloat32m2_load_deinterleave(imgptr2 + in_j + 2, v22, unused, vl); - - vfloat32m2_t vout0 = vfmul_vf_f32m2(v00, w00, vl); - vfloat32m2_t vout1 = vfmul_vf_f32m2(v01, w01, vl); - vfloat32m2_t vout2 = vfmul_vf_f32m2(v02, w02, vl); - vout0 = vfadd_vf_f32m2(vout0, bias, vl); - - vout0 = vfmacc_vf_f32m2(vout0, w10, v10, vl); - vout1 = vfmacc_vf_f32m2(vout1, w11, v11, vl); - vout2 = vfmacc_vf_f32m2(vout2, w12, v12, vl); - - vout0 = vfmacc_vf_f32m2(vout0, w20, v20, vl); - vout1 = vfmacc_vf_f32m2(vout1, w21, v21, vl); - vout2 = vfmacc_vf_f32m2(vout2, w22, v22, vl); - - vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl); - if (relu) - { - vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl); - vout0 = vmerge_vvm_f32m2(m, vfmul_vf_f32m2(vout0, relu_coeff, vl), vout0, vl); - } - vse32_v_f32m2(outptr + out_j, vout0, vl); - } - } - - for (; out_j < outW1; out_j++) - { - int in_j = out_j * stride_w - pad_l; - out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + - imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + - imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[out_j] = out; - } - - for (; out_j < outW; out_j++ ) - { - int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; - float s0 = 1.f, s1 = 1.f, s2 = 1.f; - if (in_j0 >= width) - { - in_j0 = 0; - s0 = 0.f; - } - if (in_j1 >= width) - { - in_j1 = 0; - s1 = 0.f; - } - if (in_j2 >= width) - { - in_j2 = 0; - s2 = 0.f; - } - out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + - imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + - imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[out_j] = out; - } - } -} - #endif // CV_RVV #if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_LASX -static inline void _v256_load_deinterleave(const float* ptr, __m256& a, __m256& b) -{ - __m256 t0 = (__m256)__lasx_xvld(ptr, 0); - __m256 t1 = (__m256)__lasx_xvld(ptr, 8*4); - - __m256 lo = (__m256)__lasx_xvpermi_q(t0, t1, 2+0*16); - __m256 hi = (__m256)__lasx_xvpermi_q(t0, t1, 3+1*16); - - a = (__m256)__lasx_xvpermi_w(hi, lo, 0x88); - b = (__m256)__lasx_xvpermi_w(hi, lo, 0xdd); -} - -void fastDepthwiseConv( const float* wptr, - int kernel_h, int kernel_w, - int stride_h, int stride_w, - int dilation_h, int dilation_w, - int pad_t, int pad_l, - const float* biasptr, const float* relu, - const float* inptr_, - int height, int width, - float* outptr_, - int out_d, int outH, int outW ) -{ - const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], - w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], - w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; - int outW1 = min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); - float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; - - for (int out_i = 0; out_i < outH; out_i++) - { - int in_i = out_i * stride_h - pad_t, out_j = 0; - const float* imgptr0 = inptr_ + in_i*width; - const float* imgptr1 = imgptr0 + dilation_h*width; - const float* imgptr2 = imgptr0 + (dilation_h*2)*width; - float out, w00 = w00_, w01 = w01_, w02 = w02_; - float w20 = w20_, w21 = w21_, w22 = w22_; - if (in_i < 0) - { - w00 = w01 = w02 = 0.f; - imgptr0 = imgptr1; - } - else if (in_i + dilation_h*(kernel_h-1) >= height) - { - w20 = w21 = w22 = 0.f; - imgptr2 = imgptr1; - } - float* outptr = outptr_ + out_i*outW; - if (pad_l > 0) - { - out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + - imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + - imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[0] = out; - out_j = 1; - } - - if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) - { - const int VECSZ = 8; - __m256 vw00 = _v256_setall_ps(w00), vw01 = _v256_setall_ps(w01), vw02 = _v256_setall_ps(w02), - vw10 = _v256_setall_ps(w10), vw11 = _v256_setall_ps(w11), vw12 = _v256_setall_ps(w12), - vw20 = _v256_setall_ps(w20), vw21 = _v256_setall_ps(w21), vw22 = _v256_setall_ps(w22); - __m256 z = (__m256)__lasx_xvxor_v((__m256i)vw00, (__m256i)vw00), - vbias = _v256_setall_ps(bias), vrc = _v256_setall_ps(relu_coeff); - - if( stride_w == 1 ) - for( ; out_j < outW1; out_j += VECSZ ) - { - if (out_j + VECSZ > outW1 && out_j > pad_l) - out_j = outW1 - VECSZ; - int in_j = out_j * stride_w - pad_l; - __m256 v00 = (__m256)__lasx_xvld(imgptr0 + in_j, 0), - v01 = (__m256)__lasx_xvld(imgptr0 + in_j + dilation_w, 0), - v02 = (__m256)__lasx_xvld(imgptr0 + in_j + dilation_w*2, 0), - v10 = (__m256)__lasx_xvld(imgptr1 + in_j, 0), - v11 = (__m256)__lasx_xvld(imgptr1 + in_j + dilation_w, 0), - v12 = (__m256)__lasx_xvld(imgptr1 + in_j + dilation_w*2, 0), - v20 = (__m256)__lasx_xvld(imgptr2 + in_j, 0), - v21 = (__m256)__lasx_xvld(imgptr2 + in_j + dilation_w, 0), - v22 = (__m256)__lasx_xvld(imgptr2 + in_j + dilation_w*2, 0); - - __m256 vout0 = __lasx_xvfmadd_s(v00, vw00, vbias); - __m256 vout1 = __lasx_xvfmul_s(v01, vw01); - __m256 vout2 = __lasx_xvfmul_s(v02, vw02); - - vout0 = __lasx_xvfmadd_s(v10, vw10, vout0); - vout1 = __lasx_xvfmadd_s(v11, vw11, vout1); - vout2 = __lasx_xvfmadd_s(v12, vw12, vout2); - - vout0 = __lasx_xvfmadd_s(v20, vw20, vout0); - vout1 = __lasx_xvfmadd_s(v21, vw21, vout1); - vout2 = __lasx_xvfmadd_s(v22, vw22, vout2); - - vout0 = __lasx_xvfadd_s(__lasx_xvfadd_s(vout0, vout1), vout2); - if (relu) - { - __m256i m = __lasx_xvfcmp_clt_s(z, vout0); - vout0 = (__m256)__lasx_xvbitsel_v((__m256i)__lasx_xvfmul_s(vout0, vrc), (__m256i)vout0, m); - } - __lasx_xvst(vout0, outptr + out_j, 0); - } - else - for( ; out_j < outW1; out_j += VECSZ ) - { - if (out_j + VECSZ > outW1 && out_j > pad_l) - out_j = outW1 - VECSZ; - int in_j = out_j * stride_w - pad_l; - __m256 v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; - _v256_load_deinterleave(imgptr0 + in_j, v00, v01); - _v256_load_deinterleave(imgptr0 + in_j + 2, v02, unused); - _v256_load_deinterleave(imgptr1 + in_j, v10, v11); - _v256_load_deinterleave(imgptr1 + in_j + 2, v12, unused); - _v256_load_deinterleave(imgptr2 + in_j, v20, v21); - _v256_load_deinterleave(imgptr2 + in_j + 2, v22, unused); - - __m256 vout0 = __lasx_xvfmadd_s(v00, vw00, vbias); - __m256 vout1 = __lasx_xvfmul_s(v01, vw01); - __m256 vout2 = __lasx_xvfmul_s(v02, vw02); - - vout0 = __lasx_xvfmadd_s(v10, vw10, vout0); - vout1 = __lasx_xvfmadd_s(v11, vw11, vout1); - vout2 = __lasx_xvfmadd_s(v12, vw12, vout2); - - vout0 = __lasx_xvfmadd_s(v20, vw20, vout0); - vout1 = __lasx_xvfmadd_s(v21, vw21, vout1); - vout2 = __lasx_xvfmadd_s(v22, vw22, vout2); - - vout0 = __lasx_xvfadd_s(__lasx_xvfadd_s(vout0, vout1), vout2); - if (relu) - { - __m256i m = __lasx_xvfcmp_clt_s(z, vout0); - vout0 = (__m256)__lasx_xvbitsel_v((__m256i)__lasx_xvfmul_s(vout0, vrc), (__m256i)vout0, m); - } - __lasx_xvst(vout0, outptr + out_j, 0); - } - } - - for (; out_j < outW1; out_j++) - { - int in_j = out_j * stride_w - pad_l; - out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + - imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + - imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[out_j] = out; - } - - for (; out_j < outW; out_j++ ) - { - int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; - float s0 = 1.f, s1 = 1.f, s2 = 1.f; - if (in_j0 >= width) - { - in_j0 = 0; - s0 = 0.f; - } - if (in_j1 >= width) - { - in_j1 = 0; - s1 = 0.f; - } - if (in_j2 >= width) - { - in_j2 = 0; - s2 = 0.f; - } - out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + - imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + - imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; - if (relu) - out = out > 0.f ? out : out*relu_coeff; - outptr[out_j] = out; - } - } -} - // dst = vec * weights^t + bias void fastGEMM1T( const float* vec, const float* weights, size_t wstep, const float* bias, From 6c763e1ea509404465e79e0556a0d02b7921b8e3 Mon Sep 17 00:00:00 2001 From: anderskiaer Date: Sat, 11 Mar 2023 21:03:18 +0100 Subject: [PATCH 042/199] Add possibility for disabling inlining `wasm` in `opencv.js` --- doc/js_tutorials/js_setup/js_setup/js_setup.markdown | 3 +++ modules/js/CMakeLists.txt | 2 +- platforms/js/build_js.py | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown index 26a4e419bd..5b0e65b250 100644 --- a/doc/js_tutorials/js_setup/js_setup/js_setup.markdown +++ b/doc/js_tutorials/js_setup/js_setup/js_setup.markdown @@ -83,6 +83,9 @@ Building OpenCV.js from Source It requires `python` and `cmake` installed in your development environment. -# The build script builds asm.js version by default. To build WebAssembly version, append `--build_wasm` switch. + By default everything is bundled into one JavaScript file by `base64` encoding the WebAssembly code. For production + builds you can add `--disable_single_file` which will reduce total size by writing the WebAssembly code + to a dedicated `.wasm` file which the generated JavaScript file will automatically load. For example, to build wasm version in `build_wasm` directory: @code{.bash} diff --git a/modules/js/CMakeLists.txt b/modules/js/CMakeLists.txt index 5996e419dd..19f0b19790 100644 --- a/modules/js/CMakeLists.txt +++ b/modules/js/CMakeLists.txt @@ -70,7 +70,7 @@ if(COMPILE_FLAGS) endif() set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} --memory-init-file 0 -s TOTAL_MEMORY=128MB -s WASM_MEM_MAX=1GB -s ALLOW_MEMORY_GROWTH=1") -set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s MODULARIZE=1 -s SINGLE_FILE=1") +set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s MODULARIZE=1") set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s EXPORT_NAME=\"'cv'\" -s DEMANGLE_SUPPORT=1") set(EMSCRIPTEN_LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS} -s FORCE_FILESYSTEM=1 --use-preload-plugins --bind --post-js ${JS_HELPER} ${COMPILE_FLAGS}") set_target_properties(${the_module} PROPERTIES LINK_FLAGS "${EMSCRIPTEN_LINK_FLAGS}") diff --git a/platforms/js/build_js.py b/platforms/js/build_js.py index 64dc1a6c67..3e8edfe4ad 100644 --- a/platforms/js/build_js.py +++ b/platforms/js/build_js.py @@ -180,6 +180,8 @@ class Builder: flags += "-s WASM=1 " elif self.options.disable_wasm: flags += "-s WASM=0 " + if not self.options.disable_single_file: + flags += "-s SINGLE_FILE=1 " if self.options.threads: flags += "-s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=4 " else: @@ -233,6 +235,7 @@ if __name__ == "__main__": parser.add_argument('--emscripten_dir', default=emscripten_dir, help="Path to Emscripten to use for build (deprecated in favor of 'emcmake' launcher)") parser.add_argument('--build_wasm', action="store_true", help="Build OpenCV.js in WebAssembly format") parser.add_argument('--disable_wasm', action="store_true", help="Build OpenCV.js in Asm.js format") + parser.add_argument('--disable_single_file', action="store_true", help="Do not merge JavaScript and WebAssembly into one single file") parser.add_argument('--threads', action="store_true", help="Build OpenCV.js with threads optimization") parser.add_argument('--simd', action="store_true", help="Build OpenCV.js with SIMD optimization") parser.add_argument('--build_test', action="store_true", help="Build tests") From ee3740af0013ff65483585b9b6f9f6a5bee73e7c Mon Sep 17 00:00:00 2001 From: zihaomu Date: Mon, 13 Mar 2023 22:16:51 +0800 Subject: [PATCH 043/199] move global skip out of if loop, and add opencv_deny_list --- modules/dnn/test/test_common.hpp | 1 + modules/dnn/test/test_onnx_conformance.cpp | 15 ++++++++++++++- ...nformance_layer_filter_opencv_denylist.inl.hpp | 0 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 modules/dnn/test/test_onnx_conformance_layer_filter_opencv_denylist.inl.hpp diff --git a/modules/dnn/test/test_common.hpp b/modules/dnn/test/test_common.hpp index df93e50c91..e3c7a553f8 100644 --- a/modules/dnn/test/test_common.hpp +++ b/modules/dnn/test/test_common.hpp @@ -47,6 +47,7 @@ #define CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE "dnn_skip_onnx_conformance" #define CV_TEST_TAG_DNN_SKIP_PARSER "dnn_skip_parser" +#define CV_TEST_TAG_DNN_SKIP_GLOBAL "dnn_skip_global" #define CV_TEST_TAG_DNN_SKIP_TIMVX "dnn_skip_timvx" #define CV_TEST_TAG_DNN_SKIP_CANN "dnn_skip_cann" diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index fc766c2b81..8f24fdf135 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -937,6 +937,7 @@ public: static std::set parser_deny_list; static std::set global_deny_list; + static std::set opencv_deny_list; static std::set opencl_fp16_deny_list; static std::set opencl_deny_list; static std::set cpu_deny_list; @@ -1001,6 +1002,10 @@ public: #include "test_onnx_conformance_layer_filter_opencv_all_denylist.inl.hpp" }; + opencv_deny_list = { + #include "test_onnx_conformance_layer_filter_opencv_denylist.inl.hpp" + }; + opencl_fp16_deny_list = { #include "test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp" }; @@ -1036,6 +1041,7 @@ public: std::set Test_ONNX_conformance::parser_deny_list; std::set Test_ONNX_conformance::global_deny_list; +std::set Test_ONNX_conformance::opencv_deny_list; std::set Test_ONNX_conformance::opencl_fp16_deny_list; std::set Test_ONNX_conformance::opencl_deny_list; std::set Test_ONNX_conformance::cpu_deny_list; @@ -1057,14 +1063,21 @@ TEST_P(Test_ONNX_conformance, Layer_Test) bool checkLayersFallbacks = true; bool checkAccuracy = true; + // SKIP when the test case is in the parser deny list. if (parser_deny_list.find(name) != parser_deny_list.end()) { applyTestTag(CV_TEST_TAG_DNN_SKIP_PARSER, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE); } + // SKIP when the test case is in the global deny list. + if (global_deny_list.find(name) != global_deny_list.end()) + { + applyTestTag(CV_TEST_TAG_DNN_SKIP_GLOBAL, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE); + } + if (backend == DNN_BACKEND_OPENCV) { - if (global_deny_list.find(name) != global_deny_list.end()) + if (opencv_deny_list.find(name) != opencv_deny_list.end()) { applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCV_BACKEND, CV_TEST_TAG_DNN_SKIP_ONNX_CONFORMANCE); } diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_denylist.inl.hpp new file mode 100644 index 0000000000..e69de29bb2 From b94e13c8aefedfd9c0ed73da278b7df4c30c0e24 Mon Sep 17 00:00:00 2001 From: Yuantao Feng Date: Tue, 14 Mar 2023 02:46:33 +0800 Subject: [PATCH 044/199] Merge pull request #23319 from fengyuentau:fix_zoo_issue_136 Related issue: https://github.com/opencv/opencv_zoo/issues/136 Features added: - Support operators with multiple output: ONNX Split. - Support Slice without steps. Bugs fixed: - Wrong settings in ClipByValue (Relu6). - Wrong calculation of pads in convolution layer (It is wrong generally but only fixed specifically for CANN for now). ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/include/opencv2/dnn/dnn.hpp | 8 +- modules/dnn/src/layer.cpp | 3 +- modules/dnn/src/layers/batch_norm_layer.cpp | 16 +-- modules/dnn/src/layers/blank_layer.cpp | 8 +- modules/dnn/src/layers/concat_layer.cpp | 8 +- modules/dnn/src/layers/const_layer.cpp | 6 +- modules/dnn/src/layers/convolution_layer.cpp | 34 +++-- modules/dnn/src/layers/elementwise_layers.cpp | 136 ++++++++++-------- modules/dnn/src/layers/eltwise_layer.cpp | 33 ++--- modules/dnn/src/layers/flatten_layer.cpp | 8 +- .../dnn/src/layers/fully_connected_layer.cpp | 12 +- modules/dnn/src/layers/lrn_layer.cpp | 8 +- .../dnn/src/layers/nary_eltwise_layers.cpp | 33 ++--- modules/dnn/src/layers/padding_layer.cpp | 12 +- modules/dnn/src/layers/permute_layer.cpp | 8 +- modules/dnn/src/layers/pooling_layer.cpp | 14 +- modules/dnn/src/layers/reshape_layer.cpp | 10 +- modules/dnn/src/layers/resize_layer.cpp | 23 ++- modules/dnn/src/layers/slice_layer.cpp | 85 +++++++++-- modules/dnn/src/layers/softmax_layer.cpp | 8 +- modules/dnn/src/net_cann.cpp | 43 ++++-- modules/dnn/src/op_cann.cpp | 2 +- modules/dnn/src/op_cann.hpp | 1 + 23 files changed, 317 insertions(+), 202 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 11ad69b8d9..dca20adb27 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -347,11 +347,11 @@ CV__DNN_INLINE_NS_BEGIN /** * @brief Returns a CANN backend node * - * @param inputsWrapper layer inputs - * @param index layer id for op name - * @param nodes inputs of this node + * @param inputsWrapper input tensors of this CANN operator + * @param nodes nodes of input tensors */ - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes); + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes); /** * @brief Automatic Halide scheduling based on layer hyper-parameters. diff --git a/modules/dnn/src/layer.cpp b/modules/dnn/src/layer.cpp index 5305a5221d..730fae4cb8 100644 --- a/modules/dnn/src/layer.cpp +++ b/modules/dnn/src/layer.cpp @@ -84,7 +84,8 @@ Ptr Layer::initTimVX(void* timVxInfo, return Ptr(); } -Ptr Layer::initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) +Ptr Layer::initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) { CV_Error(Error::StsNotImplemented, "CANN pipeline of " + type + " layers is not defined."); return Ptr(); diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index e112ba0746..40d8054251 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -392,7 +392,8 @@ public: #endif // HAVE_HALIDE #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { CV_Assert(nodes.size() == 1); CV_Assert(blobs.size() == 4); // must have scale, offset, mean and variance @@ -401,8 +402,7 @@ public: auto channel = x->host->size[1]; // create operator - std::string op_name = cv::format("bn_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_epsilon(epsilon); @@ -412,24 +412,24 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); // set inputs : scale (blobs[2]) std::vector shape_{channel}; - auto op_const_scale = std::make_shared(blobs[2].data, blobs[2].type(), shape_, cv::format("%s_scale", op_name.c_str())); + auto op_const_scale = std::make_shared(blobs[2].data, blobs[2].type(), shape_, cv::format("%s_scale", name.c_str())); op->set_input_scale(*(op_const_scale->getOp())); op->update_input_desc_scale(*(op_const_scale->getTensorDesc())); // set inputs : offset (blobs[3]) - auto op_const_offset = std::make_shared(blobs[3].data, blobs[3].type(), shape_, cv::format("%s_offset", op_name.c_str())); + auto op_const_offset = std::make_shared(blobs[3].data, blobs[3].type(), shape_, cv::format("%s_offset", name.c_str())); op->set_input_offset(*(op_const_offset->getOp())); op->update_input_desc_offset(*(op_const_offset->getTensorDesc())); // set inputs : mean (blobs[0]) - auto op_const_mean = std::make_shared(blobs[0].data, blobs[0].type(), shape_, cv::format("%s_mean", op_name.c_str())); + auto op_const_mean = std::make_shared(blobs[0].data, blobs[0].type(), shape_, cv::format("%s_mean", name.c_str())); op->set_input_mean(*(op_const_mean->getOp())); op->update_input_desc_mean(*(op_const_mean->getTensorDesc())); // set inputs : variance (blobs[1]) - auto op_const_var = std::make_shared(blobs[1].data, blobs[1].type(), shape_, cv::format("%s_var", op_name.c_str())); + auto op_const_var = std::make_shared(blobs[1].data, blobs[1].type(), shape_, cv::format("%s_var", name.c_str())); op->set_input_variance(*(op_const_var->getOp())); op->update_input_desc_variance(*(op_const_var->getTensorDesc())); diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 972aa7c9c8..0aa65f62a3 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -121,7 +121,8 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); auto x_desc = x->getTensorDesc(); @@ -129,11 +130,10 @@ public: auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); // create operator - std::string op_name = cv::format("identity_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set inputs - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); // set output diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index 52330a8e42..44ccfc02e8 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -367,13 +367,13 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { CV_Assert(inputsWrapper.size() == nodes.size()); // create operator - std::string op_name = cv::format("concat_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes int N = inputsWrapper.size(); @@ -387,7 +387,7 @@ public: auto x_i = inputsWrapper[i].dynamicCast(); auto x_i_desc = x_i->getTensorDesc(); auto op_x_i = nodes[i].dynamicCast()->getOp(); - op->set_dynamic_input_x(i, *op_x_i, "y"); + op->set_dynamic_input_x(i, *op_x_i, x_i->name.c_str()); op->update_dynamic_input_desc_x(i, *x_i_desc); } diff --git a/modules/dnn/src/layers/const_layer.cpp b/modules/dnn/src/layers/const_layer.cpp index 2141ad987a..58cccbd552 100644 --- a/modules/dnn/src/layers/const_layer.cpp +++ b/modules/dnn/src/layers/const_layer.cpp @@ -84,7 +84,8 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto mat_shape = shape(blobs[0]); std::vector mat_shape_{mat_shape.begin(), mat_shape.end()}; @@ -110,8 +111,7 @@ public: ge_tensor->SetTensorDesc(*desc); ge_tensor->SetData(blobs[0].data, ge_shape.GetShapeSize() * size_of_type); - std::string op_name = cv::format("const_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); op->set_attr_value(*ge_tensor); return Ptr(new CannBackendNode(op)); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 3e62887bd7..0dcff60072 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -782,7 +782,8 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { CV_Assert(!blobs.empty()); CV_Assert(inputsWrapper.size() == 1); @@ -791,18 +792,35 @@ public: bool has_bias = hasBias() || fusedBias; auto x = inputsWrapper[0].dynamicCast(); - const int x_in_channel = x->host->size[1]; + const auto shape_x = x->host->size; // [b, c, h, w] const int filter_out_channel = blobs[0].size[1]; - const int groups = x_in_channel / filter_out_channel; + const int groups = shape_x[1] / filter_out_channel; // create operator - std::string op_name = cv::format("conv2d_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_strides(ge::Operator::OpListInt( {1, 1, (int64_t)strides[0], (int64_t)strides[1]} )); + // recalculate pads in case of "SAME" padMode with odd pads + // since in 'getConvPoolPaddings' pads are divided equally + // leading to the loss of one pad + if (padMode == "SAME") + { + for (int i = 0; i < pads_begin.size(); i++) { + if (strides[i] <= kernel_size[i]) + { + int pads_at_i = kernel_size[i] - 1 - (shape_x[i+2] - 1 + strides[i]) % strides[i]; + pads_begin[i] = pads_at_i / 2; + // if odd, add extra padding to the end for SAME_UPPER + // or to the beginning for SAME_LOWER. Since here we cannot + // identity SAME_UPPER and SAME_LOWER, extra padding is always + // added to the end. + pads_end[i] = pads_at_i - pads_begin[i]; + } + } + } op->set_attr_pads(ge::Operator::OpListInt( {(int64_t)pads_begin[1], (int64_t)pads_end[1], (int64_t)pads_begin[0], (int64_t)pads_end[0]} )); @@ -815,12 +833,12 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); // set inputs : weight const Mat& w_mat = blobs[0]; - auto op_const_weight = std::make_shared(w_mat.data, w_mat.type(), shape(w_mat), cv::format("%s_w", op_name.c_str())); + auto op_const_weight = std::make_shared(w_mat.data, w_mat.type(), shape(w_mat), cv::format("%s_w", name.c_str())); op->set_input_filter(*(op_const_weight->getOp())); op->update_input_desc_filter(*(op_const_weight->getTensorDesc())); // set inputs : bias @@ -830,7 +848,7 @@ public: Mat b_mat({out_channel}, CV_32F, &biasvec[0]); std::vector bias_shape{out_channel}; - auto op_const_bias = std::make_shared(b_mat.data, b_mat.type(), bias_shape, cv::format("%s_b", op_name.c_str())); + auto op_const_bias = std::make_shared(b_mat.data, b_mat.type(), bias_shape, cv::format("%s_b", name.c_str())); op->set_input_bias(*(op_const_bias->getOp())); op->update_input_desc_bias(*(op_const_bias->getTensorDesc())); } diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 9819073bc6..bb60410038 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -188,9 +188,10 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { - return func.initCannOp(inputsWrapper, index, nodes); + return func.initCannOp(Layer::name, inputsWrapper, nodes); } #endif // HAVE_CANN @@ -459,7 +460,9 @@ struct ReLUFunctor : public BaseFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); auto op_x = nodes[0].dynamicCast()->getOp(); @@ -469,10 +472,9 @@ struct ReLUFunctor : public BaseFunctor if (slope) { - std::string op_name = cv::format("leakyrelu_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); op->set_attr_negative_slope(slope); @@ -482,10 +484,9 @@ struct ReLUFunctor : public BaseFunctor return Ptr(new CannBackendNode(op)); } - std::string op_name = cv::format("relu_%d", index); - auto op = std::make_shared(op_name); // FIXIT: Relu6? + auto op = std::make_shared(name); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); op->update_output_desc_y(*output_desc); @@ -653,28 +654,29 @@ struct ReLU6Functor : public BaseFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("clip_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); Mat min_value_mat(1, 1, CV_32F, Scalar(minValue)); std::vector shape_{1}; - auto op_const_minv = std::make_shared(min_value_mat.data, min_value_mat.type(), shape_, cv::format("%s_min_value", op_name.c_str())); + auto op_const_minv = std::make_shared(min_value_mat.data, min_value_mat.type(), shape_, cv::format("%s_min_value", name.c_str())); op->set_input_clip_value_min(*(op_const_minv->getOp())); op->update_input_desc_clip_value_min(*(op_const_minv->getTensorDesc())); Mat max_value_mat(1, 1, CV_32F, Scalar(maxValue)); - auto op_const_maxv = std::make_shared(max_value_mat.data, max_value_mat.type(), shape_, cv::format("%s_max_value", op_name.c_str())); - op->set_input_clip_value_min(*(op_const_maxv->getOp())); - op->update_input_desc_clip_value_min(*(op_const_maxv->getTensorDesc())); + auto op_const_maxv = std::make_shared(max_value_mat.data, max_value_mat.type(), shape_, cv::format("%s_max_value", name.c_str())); + op->set_input_clip_value_max(*(op_const_maxv->getOp())); + op->update_input_desc_clip_value_max(*(op_const_maxv->getTensorDesc())); auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); op->update_output_desc_y(*output_desc); @@ -805,7 +807,9 @@ struct BaseDefaultFunctor : public BaseFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { CV_Error(Error::StsNotImplemented, ""); } @@ -925,15 +929,16 @@ struct TanHFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("tanh_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -991,17 +996,18 @@ struct SwishFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("swish_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); op->set_attr_scale(1.0f); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1068,15 +1074,16 @@ struct MishFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("mish_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1143,15 +1150,16 @@ struct SigmoidFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("sigmoid_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1220,17 +1228,18 @@ struct ELUFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("elu_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); op->set_attr_alpha(alpha); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1291,15 +1300,16 @@ struct AbsValFunctor : public BaseDefaultFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("abs_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1352,15 +1362,16 @@ struct BNLLFunctor : public BaseDefaultFunctor #endif #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("bnll_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1408,15 +1419,16 @@ struct CeilFunctor : public BaseDefaultFunctor #endif #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("bnll_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -1466,15 +1478,16 @@ struct FloorFunctor : public BaseDefaultFunctor #endif #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); - std::string op_name = cv::format("floor_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); @@ -2320,7 +2333,9 @@ struct PowerFunctor : public BaseFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { CV_Error(Error::StsNotImplemented, ""); } @@ -2574,7 +2589,9 @@ struct ChannelsPReLUFunctor : public BaseFunctor #endif // HAVE_HALIDE #ifdef HAVE_CANN - Ptr initCannOp(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) + Ptr initCannOp(const std::string& name, + const std::vector > &inputsWrapper, + const std::vector >& nodes) { auto x = inputsWrapper[0].dynamicCast(); auto op_x = nodes[0].dynamicCast()->getOp(); @@ -2582,14 +2599,13 @@ struct ChannelsPReLUFunctor : public BaseFunctor auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); - std::string op_name = cv::format("prelu_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); std::vector shape_{scale.size[0]}; // scale should be a 1d of shape [n] tensor, and it is a 2d mat of shape [n, 1] in opencv - auto op_const_slope = std::make_shared(scale.data, scale.type(), shape_, cv::format("%s_weight", op_name.c_str())); + auto op_const_slope = std::make_shared(scale.data, scale.type(), shape_, cv::format("%s_weight", name.c_str())); op->set_input_weight(*(op_const_slope->getOp())); op->update_input_desc_weight(*(op_const_slope->getTensorDesc())); diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index 24a87bcc17..5052bd1823 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -849,7 +849,8 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { CV_Assert(inputsWrapper.size() == 2); CV_Assert(nodes.size() == 2); @@ -866,22 +867,22 @@ public: // add, mul, div, max, min switch (op) { -#define BUILD_CANN_ELTWISE_OP(op_type, class_name, op_name) \ - case op_type: { \ - auto eltwise_op = \ - std::make_shared(op_name); \ - eltwise_op->set_input_x1_by_name(*op_x1, "y"); \ - eltwise_op->set_input_x2_by_name(*op_x2, "y"); \ - eltwise_op->update_input_desc_x1(*x1_desc); \ - eltwise_op->update_input_desc_x2(*x2_desc); \ - eltwise_op->update_output_desc_y(*output_desc); \ - eltwise_operator = eltwise_op; \ +#define BUILD_CANN_ELTWISE_OP(op_type, class_name, op_name) \ + case op_type: { \ + auto eltwise_op = \ + std::make_shared(op_name); \ + eltwise_op->set_input_x1_by_name(*op_x1, x1->name.c_str()); \ + eltwise_op->set_input_x2_by_name(*op_x2, x2->name.c_str()); \ + eltwise_op->update_input_desc_x1(*x1_desc); \ + eltwise_op->update_input_desc_x2(*x2_desc); \ + eltwise_op->update_output_desc_y(*output_desc); \ + eltwise_operator = eltwise_op; \ } break; - BUILD_CANN_ELTWISE_OP(SUM, Add, cv::format("add_%d", index)); - BUILD_CANN_ELTWISE_OP(PROD, Mul, cv::format("mul_%d", index)); - BUILD_CANN_ELTWISE_OP(DIV, Xdivy, cv::format("div_%d", index)); - BUILD_CANN_ELTWISE_OP(MAX, Maximum, cv::format("max_%d", index)); - BUILD_CANN_ELTWISE_OP(MIN, Minimum, cv::format("min_%d", index)); + BUILD_CANN_ELTWISE_OP(SUM, Add, name); + BUILD_CANN_ELTWISE_OP(PROD, Mul, name); + BUILD_CANN_ELTWISE_OP(DIV, Xdivy, name); + BUILD_CANN_ELTWISE_OP(MAX, Maximum, name); + BUILD_CANN_ELTWISE_OP(MIN, Minimum, name); #undef BUILD_CANN_ELTWISE_OP default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); } diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index ff30da3a11..226863fd3e 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -176,15 +176,15 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); auto x_desc = x->getTensorDesc(); auto op_x = nodes[0].dynamicCast()->getOp(); auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); - std::string op_name = cv::format("flatten_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes int num_axes = x->host->dims; @@ -194,7 +194,7 @@ public: op->set_attr_end_axis(end_axis); // set inputs - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); // set outputs op->update_output_desc_y(*output_desc); diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 34c8b33515..d33ebb6515 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -662,15 +662,15 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x1 = inputsWrapper[0].dynamicCast(); auto x1_desc = x1->getTensorDesc(); auto op_x1 = nodes[0].dynamicCast()->getOp(); auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); - std::string op_name = cv::format("matmul_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); if (!blobs.empty()) // if B is const { @@ -682,7 +682,7 @@ public: // set inputs // set inputs : x2 (weight) - auto op_const_weight = std::make_shared(weightsMat.data, weightsMat.type(), shape(weightsMat), cv::format("%s_w", op_name.c_str())); + auto op_const_weight = std::make_shared(weightsMat.data, weightsMat.type(), shape(weightsMat), cv::format("%s_w", name.c_str())); op->set_input_x2_by_name(*(op_const_weight->getOp()), "y"); op->update_input_desc_x2(*(op_const_weight->getTensorDesc())); } @@ -705,12 +705,12 @@ public: // set inputs // set inputs : x1 (input) - op->set_input_x1_by_name(*op_x1, "y"); + op->set_input_x1_by_name(*op_x1, x1->name.c_str()); op->update_input_desc_x1(*x1_desc); // set inputs : bias (bias) auto bias_mat = bias ? biasMat : Mat::zeros(1, weightsMat.size[0], weightsMat.type()); std::vector bias_shape{weightsMat.size[0]}; - auto op_const_bias = std::make_shared(bias_mat.data, bias_mat.type(), bias_shape, cv::format("%s_b", op_name.c_str())); + auto op_const_bias = std::make_shared(bias_mat.data, bias_mat.type(), bias_shape, cv::format("%s_b", name.c_str())); op->set_input_bias(*(op_const_bias->getOp())); op->update_input_desc_bias(*(op_const_bias->getTensorDesc())); diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index f012a91730..95599afdc1 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -445,13 +445,13 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); // create operator - std::string op_name = cv::format("lrn_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_depth_radius(size); @@ -465,7 +465,7 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 3232f0ae5c..280920af35 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -709,7 +709,8 @@ public: #endif #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { CV_Assert(inputsWrapper.size() == 2); CV_Assert(nodes.size() == 2); @@ -726,22 +727,22 @@ public: // add, mul, div, max, min switch (op) { -#define BUILD_CANN_ELTWISE_OP(op_type, class_name, op_name) \ - case op_type: { \ - auto eltwise_op = \ - std::make_shared(op_name); \ - eltwise_op->set_input_x1_by_name(*op_x1, "y"); \ - eltwise_op->set_input_x2_by_name(*op_x2, "y"); \ - eltwise_op->update_input_desc_x1(*x1_desc); \ - eltwise_op->update_input_desc_x2(*x2_desc); \ - eltwise_op->update_output_desc_y(*output_desc); \ - eltwise_operator = eltwise_op; \ +#define BUILD_CANN_ELTWISE_OP(op_type, class_name, op_name) \ + case op_type: { \ + auto eltwise_op = \ + std::make_shared(op_name); \ + eltwise_op->set_input_x1_by_name(*op_x1, x1->name.c_str()); \ + eltwise_op->set_input_x2_by_name(*op_x2, x2->name.c_str()); \ + eltwise_op->update_input_desc_x1(*x1_desc); \ + eltwise_op->update_input_desc_x2(*x2_desc); \ + eltwise_op->update_output_desc_y(*output_desc); \ + eltwise_operator = eltwise_op; \ } break; - BUILD_CANN_ELTWISE_OP(OPERATION::ADD, Add, cv::format("add_%d", index)); - BUILD_CANN_ELTWISE_OP(OPERATION::PROD, Mul, cv::format("mul_%d", index)); - BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, cv::format("div_%d", index)); - BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, cv::format("max_%d", index)); - BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, cv::format("min_%d", index)); + BUILD_CANN_ELTWISE_OP(OPERATION::ADD, Add, name); + BUILD_CANN_ELTWISE_OP(OPERATION::PROD, Mul, name); + BUILD_CANN_ELTWISE_OP(OPERATION::DIV, Xdivy, name); + BUILD_CANN_ELTWISE_OP(OPERATION::MAX, Maximum, name); + BUILD_CANN_ELTWISE_OP(OPERATION::MIN, Minimum, name); #undef BUILD_CANN_ELTWISE_OP default: CV_Error(Error::StsNotImplemented, "Unsupported eltwise operation"); } diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index 359c82a1a3..9cbac98e74 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -222,13 +222,13 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); // create operator - std::string op_name = cv::format("pad_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_mode(paddingType.c_str()); @@ -236,7 +236,7 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); // set inputs : paddings @@ -248,13 +248,13 @@ public: } std::vector pads_shape{(int)pads.size()}; Mat paddings_mat(pads_shape, CV_32S, &pads[0]); - auto op_const_paddings = std::make_shared(paddings_mat.data, paddings_mat.type(), pads_shape, cv::format("%s_paddings", op_name.c_str())); + auto op_const_paddings = std::make_shared(paddings_mat.data, paddings_mat.type(), pads_shape, cv::format("%s_paddings", name.c_str())); op->set_input_paddings(*(op_const_paddings->getOp())); op->update_input_desc_paddings(*(op_const_paddings->getTensorDesc())); // set inputs : constant_values std::vector constant_values_shape{1}; Mat constant_values_mat(1, 1, CV_32F, Scalar(paddingValue)); - auto op_const_constant_values = std::make_shared(constant_values_mat.data, constant_values_mat.type(), constant_values_shape, cv::format("%s_constant_values", op_name.c_str())); + auto op_const_constant_values = std::make_shared(constant_values_mat.data, constant_values_mat.type(), constant_values_shape, cv::format("%s_constant_values", name.c_str())); op->set_input_constant_values(*(op_const_constant_values->getOp())); op->update_input_desc_constant_values(*(op_const_constant_values->getTensorDesc())); diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index 1aee12d7ae..c2b20af350 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -441,13 +441,13 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); // create operator - std::string op_name = cv::format("permute_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_order(ge::Operator::OpListInt( @@ -457,7 +457,7 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 9b9ced468f..415887b411 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -548,18 +548,17 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); auto op_x = nodes[0].dynamicCast()->getOp(); auto x_desc = x->getTensorDesc(); auto output_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); - std::string op_name_base = cv::format("pooling_%d", index); if (type == MAX) { - std::string op_name = cv::format("max_%s", op_name_base.c_str()); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_ksize(ge::Operator::OpListInt( @@ -580,7 +579,7 @@ public: op->set_attr_ceil_mode(ceilMode); // set inputs - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); // set outputs op->update_output_desc_y(*output_desc); @@ -589,8 +588,7 @@ public: } else if (type == AVE) { - std::string op_name = cv::format("avg_%s", op_name_base.c_str()); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_ksize(ge::Operator::OpListInt( @@ -612,7 +610,7 @@ public: op->set_attr_exclusive(cann_exclusive); // set inputs - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); // set outputs op->update_output_desc_y(*output_desc); diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index 3ff8a225b7..e433ee1787 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -327,13 +327,13 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); // create operator - std::string op_name = cv::format("reshape_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_axis(axis); @@ -342,13 +342,13 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); // set inputs : shape std::vector shape_of_shape{(int)newShapeDesc.size()}; Mat shape_mat(shape_of_shape, CV_32S, newShapeDesc.data()); - auto op_const_shape = std::make_shared(shape_mat.data, shape_mat.type(), shape_of_shape, cv::format("%s_shape", op_name.c_str())); + auto op_const_shape = std::make_shared(shape_mat.data, shape_mat.type(), shape_of_shape, cv::format("%s_shape", name.c_str())); op->set_input_shape(*(op_const_shape->getOp())); op->update_input_desc_shape(*(op_const_shape->getTensorDesc())); diff --git a/modules/dnn/src/layers/resize_layer.cpp b/modules/dnn/src/layers/resize_layer.cpp index 4342b51b78..8f21266e57 100644 --- a/modules/dnn/src/layers/resize_layer.cpp +++ b/modules/dnn/src/layers/resize_layer.cpp @@ -312,7 +312,8 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); auto x_desc = x->getTensorDesc(); @@ -320,23 +321,21 @@ public: auto output_y_desc = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); // create operator - std::string op_name = cv::format("resize_%d", index); - if (interpolation == "nearest") { - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_align_corners(alignCorners); op->set_attr_half_pixel_centers(halfPixelCenters); // set inputs : x - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); // set inputs : size std::vector shape_of_size_mat{2}; Mat size_mat(2, 1, CV_32S, Scalar(outHeight, outWidth)); - auto op_const_size = std::make_shared(size_mat.data, size_mat.type(), shape_of_size_mat, cv::format("%s_size", op_name.c_str())); + auto op_const_size = std::make_shared(size_mat.data, size_mat.type(), shape_of_size_mat, cv::format("%s_size", name.c_str())); op->set_input_size(*(op_const_size->getOp())); op->update_input_desc_size(*(op_const_size->getTensorDesc())); @@ -347,21 +346,17 @@ public: } else if (interpolation == "opencv_linear" || interpolation == "bilinear") { - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_align_corners(alignCorners); op->set_attr_half_pixel_centers(halfPixelCenters); + std::vector taget_size{(int64_t)outHeight, (int64_t)outWidth}; + op->set_attr_size(taget_size); // set inputs : x - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); op->update_input_desc_x(*x_desc); - // set inputs : size - std::vector shape_of_size_mat{2}; - Mat size_mat(2, 1, CV_32S, Scalar(outHeight, outWidth)); - auto op_const_size = std::make_shared(size_mat.data, size_mat.type(), shape_of_size_mat, cv::format("%s_size", op_name.c_str())); - op->set_input_size(*(op_const_size->getOp())); - op->update_input_desc_size(*(op_const_size->getTensorDesc())); // set outputs op->update_output_desc_y(*output_y_desc); diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index bea497badd..2bf86995f6 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -634,18 +634,74 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { - CV_Assert(sliceRanges.size() == 1); - CV_Assert(sliceSteps.size() == 1); - CV_Assert(sliceRanges[0].size() == sliceSteps[0].size()); - + bool isSplit = sliceRanges.size() > 1; auto x = inputsWrapper[0].dynamicCast(); + + if (isSplit) + { + // create operator + auto op = std::make_shared(name); + + // set attr + int n_split = static_cast(sliceRanges[0].size()); + op->set_attr_num_split(n_split); + + // set inputs + // set inputs : x + auto op_x = nodes[0].dynamicCast()->getOp(); + op->set_input_x_by_name(*op_x, x->name.c_str()); + auto desc_x = x->getTensorDesc(); + op->update_input_desc_x(*desc_x); + // set inputs : size_splits + std::vector size_splits(n_split); + int cnt_split = 0; + for (size_t i = 0; i < sliceRanges.size() - 1; ++i) + { + auto target_range = sliceRanges[i].back(); + size_splits[i] = target_range.end - target_range.start; + cnt_split += size_splits[i]; + } + auto shape_x = desc_x->GetShape().GetDims(); + CV_CheckGT(shape_x[axis], cnt_split, "DNN/CANN: invalid splits"); + size_splits[n_split - 1] = shape_x[axis] - cnt_split; + std::vector shape_size_splits{(int)size_splits.size()}; + Mat size_splits_mat(shape_size_splits, CV_32S, size_splits.data()); + auto op_const_size_splits = std::make_shared(size_splits_mat.data, size_splits_mat.type(), shape_size_splits, cv::format("%s_size_splits", name.c_str())); + op->set_input_size_splits(*(op_const_size_splits->getOp())); + op->update_input_desc_size_splits(*(op_const_size_splits->getTensorDesc())); + // set inputs : split_dim + Mat split_dim_mat(1, 1, CV_32S, Scalar(axis)); + std::vector split_dim_shape{1}; + auto op_const_split_dim = std::make_shared(split_dim_mat.data, split_dim_mat.type(), split_dim_shape, cv::format("%s_split_dim", name.c_str())); + op->set_input_split_dim(*(op_const_split_dim->getOp())); + op->update_input_desc_split_dim(*(op_const_split_dim->getTensorDesc())); + + // set outputs + op->create_dynamic_output_y(n_split); + for (uint32_t i = 0; i < n_split; ++i) + { + auto desc_output_y_i = std::make_shared(ge::Shape(), ge::FORMAT_NCHW, ge::DT_FLOAT); + op->update_dynamic_output_desc_y(i, *desc_output_y_i); + } + + return Ptr(new CannBackendNode(op)); + } + + // ONNX-Slice + CV_CheckEQ(sliceRanges.size(), (size_t)1, ""); + if (hasSteps) + { + CV_CheckEQ(sliceSteps.size(), (size_t)1, "DNN/CANN/Slice: no support to multiple slices"); + CV_CheckEQ(sliceRanges[0].size(), sliceSteps[0].size(), "DNN/CANN/Slice: number of slice ranges does not match number of slice steps"); + } + const int dims = x->host->dims; // create operator - std::string op_name = cv::format("slice_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // retrieve begins, ends, axes and steps std::vector begins, ends, axes, steps; @@ -654,34 +710,37 @@ public: begins.push_back(sliceRanges[0][i].start); ends.push_back(sliceRanges[0][i].end); axes.push_back(i); - steps.push_back(sliceSteps[0][i]); + if (hasSteps) + steps.push_back(sliceSteps[0][i]); + else + steps.push_back(1); // put 1 by default } std::vector shape_{dims}; // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); // set inputs : begin Mat begin_mat(shape_, CV_32S, &begins[0]); - auto op_const_begin = std::make_shared(begin_mat.data, begin_mat.type(), shape_, cv::format("%s_begin", op_name.c_str())); + auto op_const_begin = std::make_shared(begin_mat.data, begin_mat.type(), shape_, cv::format("%s_begin", name.c_str())); op->set_input_begin(*(op_const_begin->getOp())); op->update_input_desc_begin(*(op_const_begin->getTensorDesc())); // set inputs : end Mat end_mat(shape_, CV_32S, &ends[0]); - auto op_const_end = std::make_shared(end_mat.data, end_mat.type(), shape_, cv::format("%s_end", op_name.c_str())); + auto op_const_end = std::make_shared(end_mat.data, end_mat.type(), shape_, cv::format("%s_end", name.c_str())); op->set_input_end(*(op_const_end->getOp())); op->update_input_desc_end(*(op_const_end->getTensorDesc())); // set inputs : axes Mat axes_mat(shape_, CV_32S, &axes[0]); - auto op_const_axes = std::make_shared(axes_mat.data, axes_mat.type(), shape_, cv::format("%s_axes", op_name.c_str())); + auto op_const_axes = std::make_shared(axes_mat.data, axes_mat.type(), shape_, cv::format("%s_axes", name.c_str())); op->set_input_axes(*(op_const_axes->getOp())); op->update_input_desc_axes(*(op_const_axes->getTensorDesc())); // set inputs : strides Mat strides_mat(shape_, CV_32S, &steps[0]); - auto op_const_strides = std::make_shared(strides_mat.data, strides_mat.type(), shape_, cv::format("%s_strides", op_name.c_str())); + auto op_const_strides = std::make_shared(strides_mat.data, strides_mat.type(), shape_, cv::format("%s_strides", name.c_str())); op->set_input_strides(*(op_const_strides->getOp())); op->update_input_desc_strides(*(op_const_strides->getTensorDesc())); diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index c1ea4d2297..57afe4dbfa 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -365,13 +365,13 @@ public: } #ifdef HAVE_CANN - virtual Ptr initCann(const std::vector > &inputsWrapper, const int index, const std::vector >& nodes) CV_OVERRIDE + virtual Ptr initCann(const std::vector > &inputsWrapper, + const std::vector >& nodes) CV_OVERRIDE { auto x = inputsWrapper[0].dynamicCast(); // create operator - std::string op_name = cv::format("softmax_%d", index); - auto op = std::make_shared(op_name); + auto op = std::make_shared(name); // set attributes op->set_attr_axes(ge::Operator::OpListInt( @@ -381,7 +381,7 @@ public: // set inputs // set inputs : x auto op_x = nodes[0].dynamicCast()->getOp(); - op->set_input_x_by_name(*op_x, "y"); + op->set_input_x_by_name(*op_x, x->name.c_str()); auto x_desc = x->getTensorDesc(); op->update_input_desc_x(*x_desc); diff --git a/modules/dnn/src/net_cann.cpp b/modules/dnn/src/net_cann.cpp index 62d45d85c5..0b73427e40 100644 --- a/modules/dnn/src/net_cann.cpp +++ b/modules/dnn/src/net_cann.cpp @@ -124,6 +124,30 @@ void NetImplCann::initBackend(const std::vector& blobsToKeep_) if (!newWasSupported) return ; + // initialize each blob wrappers' names + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); ++it) + { + const LayerData& ld = it->second; + if (ld.id == 0) + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + auto cannWrapper = ld.outputBlobsWrappers[i].dynamicCast(); + // cannWrapper->name = netInputLayer->outNames.empty() ? cv::format("%s_%d", ld.name.c_str(), i) : netInputLayer->outNames[i]; + cannWrapper->name = std::string("y"); + } + } + else + { + for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) + { + auto cannWrapper = ld.outputBlobsWrappers[i].dynamicCast(); + // cannWrapper->name = ld.outputBlobsWrappers.size() > 1 ? (ld.name + ":" + std::to_string(i)) : ld.name; + cannWrapper->name = ld.outputBlobsWrappers.size() > 1 ? (std::string("y") + std::to_string(i)) : std::string("y"); + } + } + } + // convert layers to CANN operators, // collect graph input and output operators, // collect and input and output wrappers @@ -141,15 +165,16 @@ void NetImplCann::initBackend(const std::vector& blobsToKeep_) { for (int i = 0; i < ld.outputBlobsWrappers.size(); i++) { - std::string inputName = netInputLayer->outNames.empty() ? cv::format("%s_%d", ld.name.c_str(), i) : netInputLayer->outNames[i]; - auto inputOp = std::make_shared(inputName); - // retrieve tensor description auto wrapper = ld.outputBlobsWrappers[i]; graphInputWrappers.push_back(wrapper); auto cannWrapper = wrapper.dynamicCast(); CV_Assert(!cannWrapper.empty()); + // create graph input op + std::string inputOpName = netInputLayer->outNames.empty() ? cv::format("%s_%d", ld.name.c_str(), i) : netInputLayer->outNames[i]; + auto inputOp = std::make_shared(inputOpName); + inputOp->update_input_desc_x(*(cannWrapper->desc_)); inputOp->update_output_desc_y(*(cannWrapper->desc_)); @@ -170,14 +195,14 @@ void NetImplCann::initBackend(const std::vector& blobsToKeep_) { layerInputNodes.push_back(netInputNodes[layerInputOid]); } - else // here we do not consider an op with multiple outputs + else { layerInputNodes.push_back(layers[layerInputLid].backendNodes[preferableBackend]); } } CV_LOG_INFO(NULL, "DNN/CANN: converting layer " << ld.name << "@" << ld.type << "@" << ld.id << " to CANN operator"); - auto backendNode = layer->initCann(ld.inputBlobsWrappers, ld.id, layerInputNodes); + auto backendNode = layer->initCann(ld.inputBlobsWrappers, layerInputNodes); // it's ok if ld.name is empty // collect outputs bool isOutputNode = ld.consumers.size() == 0 ? true : false; @@ -201,7 +226,7 @@ void NetImplCann::initBackend(const std::vector& blobsToKeep_) // build graph from collected graph inputs and outputs CV_LOG_INFO(NULL, "DNN/CANN: building ge::Graph"); - std::string graphName = cv::format("graph_%d", 0); + std::string graphName = cv::format("graph_%d", networkId); std::shared_ptr graph = std::make_shared(graphName.c_str()); (void)graph->SetInputs(graphInputOps); (void)graph->SetOutputs(graphOutputOps); @@ -292,9 +317,9 @@ std::shared_ptr compileCannGraph(std::shared_ptr #if 0 // (optional). Dump model - AscendString graph_name; - graph.GetName(graph_name); - aclgrphDumpGraph(graph, graph_name.GetString(), 7); + ge::AscendString graph_name; + graph->GetName(graph_name); + aclgrphDumpGraph(*graph, graph_name.GetString(), 7); // (optional). Save model aclgrphSaveModel(graph_name.GetString(), *om_model); #endif diff --git a/modules/dnn/src/op_cann.cpp b/modules/dnn/src/op_cann.cpp index 6d8a57446b..5894aef337 100644 --- a/modules/dnn/src/op_cann.cpp +++ b/modules/dnn/src/op_cann.cpp @@ -177,7 +177,7 @@ void CannNet::bindInputWrappers(const std::vector>& inputWra void CannNet::bindOutputWrappers(const std::vector>& outputWrappers) { - CV_Assert(outputWrappers.size() == getOutputNum()); + CV_CheckEQ(outputWrappers.size(), getOutputNum(), "DNN/CANN: Built graph does not have the same number of outputs of model description"); for (int i = 0; i < outputWrappers.size(); ++i) { auto wrapper = outputWrappers[i].dynamicCast(); diff --git a/modules/dnn/src/op_cann.hpp b/modules/dnn/src/op_cann.hpp index 2237dd4855..c60c311b7f 100644 --- a/modules/dnn/src/op_cann.hpp +++ b/modules/dnn/src/op_cann.hpp @@ -106,6 +106,7 @@ CV__DNN_INLINE_NS_END Mat* host; std::shared_ptr desc_; + std::string name; }; class CannNet From 95f087cd0b261ce4dc0f8b8e4d55ca5d1b1f6dec Mon Sep 17 00:00:00 2001 From: Spike Date: Mon, 13 Mar 2023 22:54:47 -0600 Subject: [PATCH 045/199] Fix reference counting errors in registerNewType --- modules/python/src2/cv2.cpp | 1 + modules/python/src2/pycompat.hpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 5d952412f3..3f976c9a81 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -284,6 +284,7 @@ static bool init_submodule(PyObject * root, const char * name, PyMethodDef * met static inline bool registerTypeInModuleScope(PyObject* module, const char* type_name, PyObject* type_obj) { + Py_INCREF(type_obj); /// Give PyModule_AddObject a reference to steal. if (PyModule_AddObject(module, type_name, type_obj) < 0) { PyErr_Format(PyExc_ImportError, diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp index ceaee06f49..bd3956dbc0 100644 --- a/modules/python/src2/pycompat.hpp +++ b/modules/python/src2/pycompat.hpp @@ -338,8 +338,10 @@ PyObject* pyopencv_from(const TYPE& src) if (!registerNewType(m, #EXPORT_NAME, (PyObject*)pyopencv_##CLASS_ID##_TypePtr, SCOPE)) \ { \ printf("Failed to register a new type: " #EXPORT_NAME ", base (" #BASE ") in " SCOPE " \n"); \ + Py_DECREF(pyopencv_##CLASS_ID##_TypePtr); \ ERROR_HANDLER; \ } \ + Py_DECREF(pyopencv_##CLASS_ID##_TypePtr); \ } // Debug module load: From 7d032de7e81242000a5a3e1a9312f8bb16fe8c87 Mon Sep 17 00:00:00 2001 From: "tingbo.liao" Date: Tue, 14 Mar 2023 17:02:44 +0800 Subject: [PATCH 046/199] Fix bugs of test case failure 4 failed tests in open_test_dnn listed below: * Test_Caffe_layers.Conv_Elu/0, where GetParam() = OCV/CPU * Test_ONNX_layers.ConvResizePool1d/0, where GetParam() = OCV/CPU * Test_TensorFlow_layers.tf_reshape_nhwc/0, where GetParam() = OCV/CPU * Test_Torch_layers.net_inception_block/0, where GetParam() = OCV/CPU In winofunc_AtXA_8x8_f32 and winofunc_BtXB_8x8_f32 implementation, incorrect input parameters cause tests failure. Add four new different variables for the last four input parameters of v_transpose4x4 to fix bugs, and update related comments. Signed-off-by: tingbo.liao --- .../layers/cpu_kernels/conv_winograd_f63.cpp | 139 +++++++----------- 1 file changed, 54 insertions(+), 85 deletions(-) diff --git a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp index 27998e4bcc..c844acd730 100644 --- a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp +++ b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp @@ -465,73 +465,59 @@ void winofunc_BtXB_8x8_f32(const float* inptr, int inpstep, v_float32x4 y50 = t00 + t10, y51 = t01 + t11; v_float32x4 y60 = t10 - t00, y61 = t11 - t01; - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - /* Y: */ - /* y00 y01 */ - /* y10 y11 */ - /* ... */ - /* y70 y71 */ - /* Y': */ - /* y00 y40 */ - /* y10 y50 */ - /* y20 y60 */ - /* y30 y70 */ - /* y01 y41 */ - /* y11 y51 */ - /* y21 y61 */ - /* y31 y71 */ - /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ - - v_transpose4x4(y00, y10, y20, y30, y00, y10, y20, y30); - v_transpose4x4(y01, y11, y21, y31, y01, y11, y21, y31); - v_transpose4x4(y40, y50, y60, y70, y40, y50, y60, y70); - v_transpose4x4(y41, y51, y61, y71, y41, y51, y61, y71); + /* transpose 8x8 matrix with v_transpose4x4 */ + + v_float32x4 y000, y100, y200, y300, y010, y110, y210, y310, y400, y500, y600, y700, y410, y510, y610, y710; + v_transpose4x4(y00, y10, y20, y30, y000, y100, y200, y300); + v_transpose4x4(y01, y11, y21, y31, y010, y110, y210, y310); + v_transpose4x4(y40, y50, y60, y70, y400, y500, y600, y700); + v_transpose4x4(y41, y51, y61, y71, y410, y510, y610, y710); /* Z[0] = [1.f, 0.f, -5.25f, 0.f, 5.25f, 0.f, -1.f, 0.f]*Y */ /* Z[7] = [0.f, -1.f, 0.f, 5.25f, 0.f, -5.25f, 0.f, 1.f]*Y */ - t00 = y01 - y20; - t01 = y41 - y60; - t10 = y30 - y11; - t11 = y70 - y51; - z00 = v_fma(t00, q5_25, y00 - y21); - z01 = v_fma(t01, q5_25, y40 - y61); - z70 = v_fma(t10, q5_25, y31 - y10); - z71 = v_fma(t11, q5_25, y71 - y50); + t00 = y010 - y200; + t01 = y410 - y600; + t10 = y300 - y110; + t11 = y700 - y510; + z00 = v_fma(t00, q5_25, y000 - y210); + z01 = v_fma(t01, q5_25, y400 - y610); + z70 = v_fma(t10, q5_25, y310 - y100); + z71 = v_fma(t11, q5_25, y710 - y500); /* Z[1] = [0.f, 1.f, 1.f, -4.25f, -4.25f, 1.f, 1.f, 0.f]*Y */ /* Z[2] = [0.f, -1.f, 1.f, 4.25f, -4.25f, -1.f, 1.f, 0.f]*Y */ - t00 = v_fma(y30, qm4_25, y10 + y11); - t01 = v_fma(y70, qm4_25, y50 + y51); - t10 = v_fma(y01, qm4_25, y20 + y21); - t11 = v_fma(y41, qm4_25, y60 + y61); + t00 = v_fma(y300, qm4_25, y100 + y110); + t01 = v_fma(y700, qm4_25, y500 + y510); + t10 = v_fma(y010, qm4_25, y200 + y210); + t11 = v_fma(y410, qm4_25, y600 + y610); z10 = t00 + t10; z11 = t01 + t11; z20 = t10 - t00; z21 = t11 - t01; /* Z[3] = [0.f, 0.5f, 0.25f, -2.5f, -1.25f, 2.f, 1.f, 0.f]*Y */ /* Z[4] = [0.f, -0.5f, 0.25f, 2.5f, -1.25f, -2.f, 1.f, 0.f]*Y */ - t00 = v_fma(y10, q0_5, y11 + y11); - t01 = v_fma(y50, q0_5, y51 + y51); - t10 = v_fma(y20, q0_25, y21); - t11 = v_fma(y60, q0_25, y61); - t00 = v_fma(y30, qm2_5, t00); - t01 = v_fma(y70, qm2_5, t01); - t10 = v_fma(y01, qm1_25, t10); - t11 = v_fma(y41, qm1_25, t11); + t00 = v_fma(y100, q0_5, y110 + y110); + t01 = v_fma(y500, q0_5, y510 + y510); + t10 = v_fma(y200, q0_25, y210); + t11 = v_fma(y600, q0_25, y610); + t00 = v_fma(y300, qm2_5, t00); + t01 = v_fma(y700, qm2_5, t01); + t10 = v_fma(y010, qm1_25, t10); + t11 = v_fma(y410, qm1_25, t11); z30 = t00 + t10; z31 = t01 + t11; z40 = t10 - t00; z41 = t11 - t01; /* Z[5] = [0.f, 2.f, 4.f, -2.5f, -5.f, 0.5f, 1.f, 0.f]*Y */ /* Z[6] = [0.f, -2.f, 4.f, 2.5f, -5.f, -0.5f, 1.f, 0.f]*Y */ - t00 = v_fma(y11, q0_5, y10 + y10); - t01 = v_fma(y51, q0_5, y50 + y50); - t10 = v_fma(y20, q4, y21); - t11 = v_fma(y60, q4, y61); - t00 = v_fma(y30, qm2_5, t00); - t01 = v_fma(y70, qm2_5, t01); - t10 = v_fma(y01, qm5, t10); - t11 = v_fma(y41, qm5, t11); + t00 = v_fma(y110, q0_5, y100 + y100); + t01 = v_fma(y510, q0_5, y500 + y500); + t10 = v_fma(y200, q4, y210); + t11 = v_fma(y600, q4, y610); + t00 = v_fma(y300, qm2_5, t00); + t01 = v_fma(y700, qm2_5, t01); + t10 = v_fma(y010, qm5, t10); + t11 = v_fma(y410, qm5, t11); z50 = t00 + t10; z51 = t01 + t11; z60 = t10 - t00; z61 = t11 - t01; @@ -633,36 +619,20 @@ void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, v_float32x4 y60 = v_setall_f32(0.f), y61 = y60, y70 = y60, y71 = y60; - /* transpose 8x8 matrix in-place with some renumeration of the elements: */ - /* Y: */ - /* y00 y01 */ - /* y10 y11 */ - /* ... */ - /* y50 y51 */ - /* 0 0 */ - /* 0 0 */ - /* Y': */ - /* y00 y40 */ - /* y10 y50 */ - /* y20 y60 */ - /* y30 y70 */ - /* y01 y41 */ - /* y11 y51 */ - /* y21 y61 */ - /* y31 y71 */ - /* in other words, y40 <-> y01, y50 <-> y11, y60 <-> y21, y70 <-> y31 */ - - v_transpose4x4(y00, y10, y20, y30, y00, y10, y20, y30); - v_transpose4x4(y01, y11, y21, y31, y01, y11, y21, y31); - v_transpose4x4(y40, y50, y60, y70, y40, y50, y60, y70); - v_transpose4x4(y41, y51, y61, y71, y41, y51, y61, y71); - - s12_0 = y10 + y20; s12_1 = y50 + y60; - s34_0 = y30 + y01; s34_1 = y70 + y41; - s56_0 = y11 + y21; s56_1 = y51 + y61; - - z00 = y00 + s12_0 + s34_0 + s56_0; - z01 = y40 + s12_1 + s34_1 + s56_1; + /* transpose 8x8 matrix with v_transpose4x4 */ + + v_float32x4 y000, y100, y200, y300, y010, y110, y210, y310, y400, y500, y600, y700, y410, y510, y610, y710; + v_transpose4x4(y00, y10, y20, y30, y000, y100, y200, y300); + v_transpose4x4(y01, y11, y21, y31, y010, y110, y210, y310); + v_transpose4x4(y40, y50, y60, y70, y400, y500, y600, y700); + v_transpose4x4(y41, y51, y61, y71, y410, y510, y610, y710); + + s12_0 = y100 + y200; s12_1 = y500 + y600; + s34_0 = y300 + y010; s34_1 = y700 + y410; + s56_0 = y110 + y210; s56_1 = y510 + y610; + + z00 = y000 + s12_0 + s34_0 + s56_0; + z01 = y400 + s12_1 + s34_1 + s56_1; a0 = v_setall_f32(0.25f), a1 = v_setall_f32(4.0f); z20 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); @@ -672,14 +642,13 @@ void winofunc_AtXA_8x8_f32(const float* inptr, int inpstep, z40 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); z41 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); - s12_0 = y10 - y20; s12_1 = y50 - y60; - s34_0 = y30 - y01; s34_1 = y70 - y41; - s56_0 = y11 - y21; s56_1 = y51 - y61; + s12_0 = y100 - y200; s12_1 = y500 - y600; + s34_0 = y300 - y010; s34_1 = y700 - y410; + s56_0 = y110 - y210; s56_1 = y510 - y610; a0 = v_setall_f32(1.f/32), a1 = v_setall_f32(32.0f); - z50 = v_fma(s56_0, a0, v_fma(s34_0, a1, y31 + s12_0)); - z51 = v_fma(s56_1, a0, v_fma(s34_1, a1, y71 + s12_1)); - + z50 = v_fma(s56_0, a0, v_fma(s34_0, a1, y310 + s12_0)); + z51 = v_fma(s56_1, a0, v_fma(s34_1, a1, y710 + s12_1)); a0 = v_setall_f32(0.5f), a1 = v_setall_f32(2.0f); z10 = v_fma(s56_0, a0, v_fma(s34_0, a1, s12_0)); z11 = v_fma(s56_1, a0, v_fma(s34_1, a1, s12_1)); From 386be97ce2deaeac9c5252d635b0c7491d16d25f Mon Sep 17 00:00:00 2001 From: zihaomu Date: Tue, 14 Mar 2023 19:06:06 +0800 Subject: [PATCH 047/199] fix bug in layer fusion --- modules/dnn/src/net_impl_fuse.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/net_impl_fuse.cpp b/modules/dnn/src/net_impl_fuse.cpp index 935f71833f..38aa6476cd 100644 --- a/modules/dnn/src/net_impl_fuse.cpp +++ b/modules/dnn/src/net_impl_fuse.cpp @@ -247,7 +247,7 @@ void Net::Impl::fuseLayers(const std::vector& blobsToKeep_) { // fuse naryEltwise layer // bias must already be computed to fuse => bias layer must appear before convolution - if (biasLayerData->id < ld.id) + if (biasLayerData->id < ld.id && biasLayerData->consumers.size() == 1) { // conv + naryEltwise. CV_Assert_N(biasLayerData->outputBlobs.size() == 1, ld.inputBlobs.size() == 1); From b204c3981588351aab39ec30c2e1aabc7b30c733 Mon Sep 17 00:00:00 2001 From: Vladimir Ponomarev Date: Tue, 14 Mar 2023 15:00:44 +0300 Subject: [PATCH 048/199] Merge pull request #23276 from vovka643:flann_corrections Fixed potential memory leak in flann Issue #22426 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../include/opencv2/flann/flann_base.hpp | 23 +++++++++++++++---- modules/flann/src/miniflann.cpp | 10 ++++---- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/modules/flann/include/opencv2/flann/flann_base.hpp b/modules/flann/include/opencv2/flann/flann_base.hpp index 258ec38d20..af0b380bbf 100644 --- a/modules/flann/include/opencv2/flann/flann_base.hpp +++ b/modules/flann/include/opencv2/flann/flann_base.hpp @@ -45,6 +45,21 @@ namespace cvflann { +class FILEScopeGuard { + +public: + explicit FILEScopeGuard(FILE* file) { + file_ = file; + }; + + ~FILEScopeGuard() { + fclose(file_); + }; + +private: + FILE* file_; +}; + /** * Sets the log level used for all flann functions @@ -69,7 +84,6 @@ struct SavedIndexParams : public IndexParams } }; - template NNIndex* load_saved_index(const Matrix& dataset, const cv::String& filename, Distance distance) { @@ -79,13 +93,13 @@ NNIndex* load_saved_index(const Matrix if (fin == NULL) { return NULL; } + FILEScopeGuard fscgd(fin); + IndexHeader header = load_header(fin); if (header.data_type != Datatype::type()) { - fclose(fin); FLANN_THROW(cv::Error::StsError, "Datatype of saved index is different than of the one to be created."); } if ((size_t(header.rows) != dataset.rows)||(size_t(header.cols) != dataset.cols)) { - fclose(fin); FLANN_THROW(cv::Error::StsError, "The index saved belongs to a different dataset"); } @@ -93,7 +107,6 @@ NNIndex* load_saved_index(const Matrix params["algorithm"] = header.index_type; NNIndex* nnIndex = create_index_by_type(dataset, params, distance); nnIndex->loadIndex(fin); - fclose(fin); return nnIndex; } @@ -107,7 +120,7 @@ public: typedef typename Distance::ResultType DistanceType; Index(const Matrix& features, const IndexParams& params, Distance distance = Distance() ) - : index_params_(params) + :index_params_(params) { flann_algorithm_t index_type = get_param(params,"algorithm"); loaded_ = false; diff --git a/modules/flann/src/miniflann.cpp b/modules/flann/src/miniflann.cpp index a1146ec2e7..ea0494ddec 100644 --- a/modules/flann/src/miniflann.cpp +++ b/modules/flann/src/miniflann.cpp @@ -767,11 +767,15 @@ bool Index::load(InputArray _data, const String& filename) Mat data = _data.getMat(); bool ok = true; release(); + FILE* fin = fopen(filename.c_str(), "rb"); - if (fin == NULL) + if (fin == NULL) { return false; + } + FILEScopeGuard fscgd(fin); ::cvflann::IndexHeader header = ::cvflann::load_header(fin); + algo = header.index_type; featureType = header.data_type == FLANN_UINT8 ? CV_8U : header.data_type == FLANN_INT8 ? CV_8S : @@ -786,7 +790,6 @@ bool Index::load(InputArray _data, const String& filename) { fprintf(stderr, "Reading FLANN index error: the saved data size (%d, %d) or type (%d) is different from the passed one (%d, %d), %d\n", (int)header.rows, (int)header.cols, featureType, data.rows, data.cols, data.type()); - fclose(fin); return false; } @@ -799,7 +802,6 @@ bool Index::load(InputArray _data, const String& filename) (distType != FLANN_DIST_HAMMING && featureType == CV_32F)) ) { fprintf(stderr, "Reading FLANN index error: unsupported feature type %d for the index type %d\n", featureType, algo); - fclose(fin); return false; } @@ -839,8 +841,6 @@ bool Index::load(InputArray _data, const String& filename) ok = false; } - if( fin ) - fclose(fin); return ok; } From 56a4877e3072b0a2b7c981795276af334e327cc2 Mon Sep 17 00:00:00 2001 From: TuNanTang Date: Tue, 14 Mar 2023 21:09:53 +0800 Subject: [PATCH 049/199] Merge pull request #23341 from TuNanTang:3.4 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [ ] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- .../core/mat_mask_operations/mat_mask_operations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp b/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp index 0fece804b2..c1c67b90ce 100644 --- a/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp +++ b/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp @@ -92,7 +92,7 @@ void Sharpen(const Mat& myImage,Mat& Result) for(int i= nChannels;i < nChannels*(myImage.cols-1); ++i) { - *output++ = saturate_cast(5*current[i] + output[i] = saturate_cast(5*current[i] -current[i-nChannels] - current[i+nChannels] - previous[i] - next[i]); } } From 6bac5453d1643f98922e131068c391c22ff92480 Mon Sep 17 00:00:00 2001 From: zihaomu Date: Wed, 15 Mar 2023 08:24:55 +0800 Subject: [PATCH 050/199] fix bug in 32 bit cpu --- modules/dnn/src/layers/cpu_kernels/convolution.cpp | 6 +++--- modules/dnn/src/layers/cpu_kernels/convolution.hpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.cpp b/modules/dnn/src/layers/cpu_kernels/convolution.cpp index 0f0da11ec7..6b0f9c865e 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.cpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.cpp @@ -103,13 +103,13 @@ Ptr initFastConv( } conv->conv_type = ifRunDepthWise && conv_dim != CONV_3D ? CONV_TYPE_DEPTHWISE : - useWinograd && (conv_dim == CONV_2D && (conv->useSIMD128 || conv->useAVX2 || conv->useNEON) && + useWinograd && (conv_dim == CONV_2D && (conv->useSIMD128 || conv->useAVX || conv->useAVX2 || conv->useNEON) && Hk == 3 && Wk == 3 && dilation_h == 1 && dilation_w == 1 && stride_h == 1 && stride_w == 1) ? CONV_TYPE_WINOGRAD3X3 : (ifRunDepthWiseRemain ? CONV_TYPE_DEPTHWISE_REMAIN : CONV_TYPE_GENERIC); -#if !(CV_NEON || CV_SIMD128 || CV_TRY_AVX2) - if (conv->conv_type == CONV_TYPE_WINOGRAD3X3) // Disabel Winograd when CV_NEON, CV_SIMD128 and CV_TRY_AVX2 are not available. +#if !(CV_NEON || CV_SIMD128 || CV_TRY_AVX || CV_TRY_AVX2) + if (conv->conv_type == CONV_TYPE_WINOGRAD3X3) // Disabel Winograd when CV_NEON, CV_SIMD128 ,CV_TRY_AVX and CV_TRY_AVX2 are not available. conv->conv_type = CONV_TYPE_GENERIC; #endif diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp index 0a077bf800..3d44c3189b 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.hpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.hpp @@ -28,13 +28,13 @@ enum { CONV_WINO_AREA=CONV_WINO_SIZE*CONV_WINO_SIZE, CONV_WINO_KBLOCK = 4, -#if (CV_NEON && CV_NEON_AARCH64) || CV_TRY_AVX2 +#if (CV_NEON && CV_NEON_AARCH64) || CV_TRY_AVX || CV_TRY_AVX2 CONV_WINO_IBLOCK = 6, #else CONV_WINO_IBLOCK = 3, #endif -#if CV_TRY_AVX2 +#if CV_TRY_AVX || CV_TRY_AVX2 CONV_WINO_ATOM_F32 = 8, #else CONV_WINO_ATOM_F32 = 4, From 0d455e05c10c4592f20c919347ddbc95b7d448b3 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 15 Mar 2023 11:27:55 +0300 Subject: [PATCH 051/199] add py charuco sample+choriginal.jpg+camera_params --- samples/data/aruco/choriginal.jpg | Bin 0 -> 117428 bytes .../data/aruco/tutorial_camera_charuco.yml | 21 ++++ samples/python/aruco_detect_board_charuco.py | 115 ++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 samples/data/aruco/choriginal.jpg create mode 100644 samples/data/aruco/tutorial_camera_charuco.yml create mode 100644 samples/python/aruco_detect_board_charuco.py diff --git a/samples/data/aruco/choriginal.jpg b/samples/data/aruco/choriginal.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3ca7c3149ffed5fced1c6f993f34dde54d68a161 GIT binary patch literal 117428 zcmbTdWmFtb*gZG|83+yufgppEV8MfHg1fuB4=%wqxC96iB)Ge~4iI1hgS$@#cb7lE z_dRF#%YNA1>eF?)Pgi%}+tpQ1JKt}^&0MXFUG0`zz|Ce5lurOa^{de-;L;m;HSE$Iy zsIP%&!2c=o|0#R!0pOz}t-lIHMxqA1!bd{JM|vIrfL?H-{tvttu>YHoULm8Pq5)rE zV7?S+#Cw5{jQj!`^#$=uX}_2202F*w0xEVfG(r^@Ra|=r=Ya3fvH+K(D zFK?fa(6I1`$nQ}}$v;w3(|)FB{LU{ZEGjN3Evu_IH49~c~h43CUX!KPc7yb*cSO3)k@IPKiuRLFN zWPB7p$CVqR@81`Vg`$y-yG$=5`*TpM(!|55uNX8-Sq1^@qI z_J0xkzj-YJu#k~nDi0YSAOcu!hQ0dDtZTC@1EAhQ$!=lNj0-9qWSxas8B2paXdaPv zo9#R3O6zIMxl96g{G9{tX261cWudRr(lM_!EWT}3@lssgZY9azBmWAKoqm6Oy-22r zv95$-P7jVTMvlt3?r*1MzSC$UJ5y7j1;esH2tAAf+tj4`WtrCAqoi6Oxz{s5MAj#u zPx4^j%6J=3CV9Li9`wBjlw&*69tRgfi={1apu*t`sBHmOPgTmUr1c#b;O$ zLp#cs8tpNSh?bchI0w9W+pWtB#JE=E&Bg>}@XQ+*Ks&1pvJgta`+uIxMj%>5dcdaI zCcz8Ju1vvT0XJ@nU(AaN2#>#&bq{q-RWJb1Any<=co#pFGTIrXV%_y=dRNWe6T1}) zQ#v)N$(b|chmaDPzu%tD15@eGc>@JkrX~QJ5{Cnv%rTn0&G%xrc$WL7n&m{0+vkXO zKF7*B6S!yAL*{;`=B8)2vSB_c48@3N@0?4}03#Yg{_&fp=B8&7#oZ~K8&)O{GPcz< zu1x0=07=6KwMak%I8wlL9gAOmbx1CUCh3GC=%E*1kgTn80iB73ya39-&ELcF)%Q(t=60j9tI zXNH~4o`owh4Rh>ZGNxH@_}vFQ#XUIt`G5=xcxp6A{yJ5GgkeJu#BsPN&7s}CB0~zg znD|&XVnWE0#gDfJy<4)OR-E6%WmuaE+-0*k8Q;c!C6{5f)TmDLaorn)P&Ijqn{oy0agWw#_Z@vBVmK{xt zOpGDlj}*(#0LI+i+A)M7Hc`Y_L|;-VN=#A4upY3LJ=o_iyF&wU&{4p->ZP!)Y5!~1 zg`glSTQE+95TTVn)hO^?u$9YC3}D*6T#6+E03&WRYB#)dY=wVuBoaf%#GV0vr9=t^ zfE-qwU{4dGvPE^ncV3S8HB*p$0aovAx$T1?q;0~bX_YdFP~E$F3uS1UYw>ODWIQjR z^ck>^Q=p~eupze-IaKuyh}|IzIfaWv-24^fKYayTi(S}xKN;tzw3>%H2~XQyG$Bi) zX7DshDT>U4H=E*;|2PIY>hTC9uo9}sV z6M^GIpti#~eYM$;c?g5OEj1oTyxfihJAo9jZw{v`?P_ zJ|^bEIh8hF^Y>K6yVUWLhClliDeb|fbKMNt~?U@d78t5lX9gTkO}wyCaXa6 zY|=2`x_mls{4QY$>s!_O@C;25dx}UzV~ZK1!-a(^uZSrPMR8sU%Y~d);={d=&<>5T zy{0O1;`F*y$dHho(G&hp)GqoYNe{;SKU1Sk0xsz^Wao)-wzYq;|FCovT{!DV*c|1W zWxY}kn)c21`_LGC5T`&z&tB7@oQvG{VcTd7g=_|M<&$b{O5x-IRr1)7uIY_P_;NOr zN(Z?EoMpf#ycivexSxQG2Up@}5M-10VUIKi+lEtk>3enYALQ?q3lT0Ek7TPv< z6)ZGWS!JG97-qOMf#4p797r-E(RC!1m}qy85JZ|FO%U6{Q5zJ;0qp*UE z$v5z8o^LoE=PkY$rIiDt#5YR7&SmDWJ=Qb%gh`YcHBrm1Ba^o`zNvB?7gao;iPPOAF(U1VR5^`U%ey z@!B+;Ec>Zfcgr?o7nPGT8QK!5O0_39lt9UEFR%7x`&%U)aQeK4@3}w+xq}3H2kCnxNhi^m9FR$+kfSx6h^K5F*OSn65 z)Q78HSFOo|Vc9=sL;n70oXnP<+36c>4iDBei8s{)RT3Z;^`pZQSL%ONI~U*9ULaKU zZx}aq0j$nv52%Eb0J`V*=L zf40!X8l+N+OY~Ry@4eR*d}k`?BX`KRv%!eAJBAdBA7#1D749jKP_kPzn>5drY(0EvqaPxjx4DX-sB)v%xEuz9Pb^VYgRif z*&I!QaEB-0$29gMt%1%)yh|z%b&g`bSJmfRBr5dUgDe)N+S*$lG-$Ph1$r_)Qtd2< zx8tIDM9a)%XGs=)Gx@HnuPa$)_~5yY0fiWOIzPwlT(PoMS8Gw$@v%aaV@}T~W)vEr zbfK$$i^J%a;z?$4ffjsszOxUrbKpU$r!mt!4CB;mKE%jUvihM}_MT|Omjp*KhIG1^ zKQR}39TfS%#I-j^p{~tMP4NOUuK0eVpBpBLXu2r}q_z&lnx;PGn|HRyNg31Pjl9Oq zq^Kr1UlNhsM-8vH4f@^O%$QHvX}=48$tbqjZ;=!)$Q zuWC2pA&uE#r31Urzu|x(H!%7kzva0$XLWY*QMBn;`F8I;X+zG-lG)wI0&@qVnIVsg zBt7YE+?#hsPtCeh{KCnujdZ?fuD?ng3crC5HbW;XrnZr6Y{j$I7yh(7a#BHTStHCc zXIKsw3DTb4)+hfe&D23tf>F$1CCVqU6c~~;GJyCT#PS#U7)ZqVedkOe;x&=psi}XF zhqmh|<2v`~PT$71)#C}24!BTlojhfCrO1kzWo1mQO3POkm{YI6=|BmVL`Q+#Se8nm z%r*@PIDAwgGU!$K*NZ%D-*Y^lt;9VuUSYWMg%)PSZgpMB0?R`tGxq-W$XJjhSY>!* z{UfB0_JH4o@2vIVbeySHuT^|;m+O0ee1FZ49_DecAaG7DA}hn*b5SNmrl~IW&OnD*hS?wds+MGylC)IX(|X8tQDx%knl zV%guyP{jZp@8iW(j0YC5GXXZEmwHILLS=3|jiAF?dk!Rnm5GxBoYx}ReX^|AZ}sSX zE6oQ-u7&5or>0?|J%8*9zYOAR)*G0`xcDQ=yB8Mi9a#=oRy4l%yf^0^;oBkzTARuY zD738yHrF<_z7mzhZle-%9UbYpdu@})Wl3)F>jzIz{?MEF^dmDAXQ^wSb9L5BB>O?f zMg!_)hhZJo-#xrg7Xi}-`bq93r*4N2BxKm4;lR=fjN_LI7`u}r zPgH@NB;UQoKxFE8fwZ}@8IwD~F-XIH>*0bc389wSPoIL1jNA4M5IPBnzo=}XY(Yw_ zGNGm&~oqCwOJFf~7&G&}5j9KuoA^Nm0P(x?En9V0DLHt82K(Vuqsk4nMd6xf35f>v0O`*Cx?JrYqi}H4rm}ox*=PYA zpSkw@84$NyQHj8>(J5xt+x05i2FY*5pYO}qfj7!^vZ?qT7dbQuByo!-M_xu*Q_K7O z`pMj7wv3;V!sCGYIQ*&lR*M~-6Ti?Td~J_die`?HfHw5Eu8@|8HuWt{J*32{Rr35W z{vb+E=&Smj9O-a{Yn*0Li^!Dxi2(GGtMU^g#Q7*?-Q)~b?E`|${KybTSQ|Wz9B&KA zed1`c0D0TcMh82mGc2HN3S21MJf@r3&Vi1|FF!18v z(zNj9#bZ6(9xtp*l37nu_Vd+9F4&(vh$Qms3Q(FU(c(eW?=A#EePn4hp}^IH`&3N+E*hnLc?x;>o!9EFgZv5TzcsC>NddQ zi?L{`-mIotydis>(4MdKr8cS4)#Z<ZYH;u~oU?n|1Y* zMQ!~giztneWlaT{8~7wWB^avISB~B=taorXlw&=)m6mZwmY|4(HVBM)abU8!PB!~A zxcsWazW(-8`_4!LL*ILl`K14_+E= z2|*7_%E1M?f$Qu-dRKnX1n)6)OX=P`ifpiO<-==aLW>U%u0#F}I+1bjYUJ`92(pk8 zOl3v`l|;_Qs+5Wmnl$wZ3>h_J(tM}_pqn>l2o>Eu4#RflNhN?d+gYiGJv{du2^0yE zZRdj>=A&(162^V|QzMd@iaJhqu`+ZK0)vjhb$ay}MCK}=~1b^2^5Yk-Ff zIw)dX4Cv!kF{S2!A5o=p=*1hX2wav9=vt$ioea2ae;X)a#1Yt{$*HoL&#X%c&#`f< zq>H&;zzm9RLjEUBHWh7$fk)bM6ir1J+qayATgwE6Yv#h3Rn1d{D0l-MHJOZdJ!Ud9 zr3*<@Cz!1m+`fQ{Uj;1>Cc*lXWSc$zihipNIEIi?uKU|EbDYc z@eNmbCIih$eeWrYMv4l0op}<2l@>{xr&U=va~h@^m@?hl?uBxZ7CuUd; z@PtL@EzGMp2GIpkAWrF`EG$664HT8CNwttep0 z6ulJ$F$W7g13Vv|xTfe2;ENCq>ls6V;8C7o%H`Y~wR1Ap9HUAltBvTZFqUg0TDchS zG)@9OhSJ-3-AMxq0eNq{AGF)KT8#erRf`TZ+A;Sg*u352DyLpH`Q|e6O=swVYR zg1qBO_V4Cl+T-w0PNh8kI{U3z>p}WV;QFZE3ofb`W3XAjK6x~mVYB^uJf?z;;LZ6k zYe<~$o#y3C)_~Q$UnCs8EIU72IH$BEiRp*KzAtlr^_Y0j;SkX9=@}6A&VuCn>NCsh zciJnGuWnNZ+b;iIzR{IT>-Y*r%O<;wt-NZsKTYyu9GmkQNOs`&8!aa@F~ko_eg-Ik zBzj(6Zy0K;m@PJW{Pvw~JNx%QI0>&AZ0TfNsAo)=VyaNTzj%QSfJ(pQ4=moZnL@z& z1UrVxinrrd7r!#VQk(*0XLt-(+8V|5H4MJp^~HNxhVg^oSNZ2og0^ItA#G$6Kqrd z8;Vc+uUX(Ndz+A|CC$k)#*;gE<5W(eO93RqPIxUCzA7a$2;=KT9E!ycpei0BE5?L` zEp`yj38B7^+d*sYir$Towd6(dK9iT;>T=EAEFJ5jq?MwN6zBX9zycTCDn|FZOVrhB zdZOYiwZjB4m*V6*_ip`-6Pd8BsL_qq2sc1hN+3WRvec&#MMf+J<ds+K3aNs7G5 zhwR%S@yEP~cv##FDkPqZi;Cp|Wa7(C`IwDa&PfB1RD2KqyCo7PP@BPnZ!7GyHn$E4 zc%Q%i3|K>fZQI@3pv#ThiV9#05#vHe-%@0j50(#%t-ao8O4LTZ|B|}htdxmAypH3w z?Z-arKFd-`XD7B1EgdNE{oQL0A2(=|$ZN4Sn)hLwB{p%5EisM`LeBi)0e94)YCMl9 zoln6hXxlMX!sF0FBMQ1!{4#Pne@D@{$DHE2U`C`ZCcb3%!PpCPGQ3Ued#jjgnhb7~ z3gu{ZnB&@Y+WF6k*QL!s-YZdC8Oo?PN||LAQ9HV z(&XsNB|Cl4K3r?^DkjJ5Z%Xa5Ncm9#`r$rZIK-aKK}sHwqFOiS*fmWPvawoeZJy@4 ztN=D8WR}zk9@A@*K^D{;xK3IXgEOc89oAw_T9ru=Zc!BQ;MTHcHvLn|{Fy*!shlA# zxUdnjy6nQf{bKn21k`n1_S-@;-39a-k^D_essv*&F40gXypJFD*4yi>V+SuU)5+y9 z9D{~%%L)I2&TQ(t!y4IGc3T9tO#y*%k`lnAf zz_VlJ01MkW;}lqjm|bEB&7KFG@)~(u@aZjgYT~+qkeQtfOEPl!=elyjk&#;;_$fn` zZSTodNLi|ZFfS-n8{;@~d zYU|{hx6Zf&AZfTLp>ZOs)D00K4i#o5tld6kUet1t0HZwoY~jZ5We8!i+Lm*rD8mg2 zH0r%8T70+=dPs77z-g(PiKVa*&ms$ytSyB8fGL(OF{kt?%U?aIGE3#@UZtH^kZlTY z-K_9NJ)2wG(520r3ZJw!(&-XUV z{#E~gIJYILRn%&9DF;Pbd>3PQ601dqk)24tbYwHWB^Nf9L@=+N0p9>`LL0nMT6i8E z4w{r$)V~8}hHwV0rmmmuX!HSi^8JY9>i}HEi)6(6F+vgtV(O9Dyi0E*8;Fn=0x@KB zj=uX|h+>uIjUipj;}pQa&DweO7Jut(>!P^Ac2NAL<_qt_FA`+4xgBylmo?G3>YV@) zZO{HJx~59DVPqjr2p=&##_jiU{}YWu;-IT9aWB2rEEy7nvef;$c&t5I4c(f3*%1=G za*fA%g$7ls^793&7~JTd5`UO6x8M^P|FIPZ9tOeRw%X+HrN+#0+(ez!hyxtluU;)b z&Sr)DJ&%dA{V^-dk!lri7+sHyr$Y0iY5AJj%3wK1wvgI>tor6FZL`tJ{#E3%qfCA> zuTUgI{8J|n2uoMT!D6<-T-8}BIlhXIqOp-ay~1FTVe$AVB^gUoyr-0s0OK7ne&C6> zL^0iq)Mw9=F;dt)GKB^htdtJ5E#x+_Re5QwzH;A{9+a5*RQh7Du6S9-d7Nc%GbyUmMoaz$ywXO0Xb(SN0su?yMv{M_}T3foJ)aQYn7@Q zbVcT$!z(uh9F8u=Le9Cs93c?H_gQy-%U1X+I?Uf3Xaa;f11haqGo8&oxtFZ4E%j&^ zublaxzQi%=)D7M38342uv%+*I^->RZc_$|d3cM3hR*}j*j3XF0VT@*Yp_Sk88sF&@l zXK0M*HM#!Kp_YX5-O&^4e0spY`xn2TQa>MlQMre-h>LZ;N-vpt|4XE)sx+W)tg%zW zK}*i~p!nL^R@#{lIZ*78v28y?xM;8TUz%-5se<-S`*|W{JzywVi$P6?y1oT%bLz}l z`;gQsH01JeoYwgd`Ovu~9eC+rxWF}A!~;-P3*FF6cUYfXw25EVFr$J_=Z))SJYkrw zxUMgJ2cRV^<}r|S@PMFiiKcyGd9JbF^liyxG6&2)1FS>`?#(}-cp_GRS#p!}tfx&L zS8aXsKXlj-15vd9c=ASQR&xItYSHr*VwHa5GG_lSE9>N2E@w`q^+*}i0qyEuwRRo- zfEs*bg7L;3Z6OU@x;9RH)&UX>QyR6zWBS^vqaY0O@*H8e*)(jldvPVZ>hhk=q!Pc7Q_F90IrZtuLqkv5*x>Iz9glH zyQtv7=4XJ-;2JOx7dh@1s(e4Ite3B@cxzwfJZ1LH!B@k0wUfsL0$n95f=^iV?j~6$ z{P8M~?oOSEL0#T>drw_g>evAZ#~3LmGsS*|6q@e#U_#=*+`L_?NX#*QzQY2WKdn-E z&dsJBSyHZo-%paIAr=iE+4t8PsZN_RJ@V{XD6_~8KP0lEd5P?YqskQR&o!r&0#3$G z#N`4CnktFE_8bvOIyoZkamF=-xlC(vu89`htZMYXstd596oVT5OoozNU6eXKk4XY|8tE+Vf^|LI5{-(_^$xRSZjIx$+DiOHoV$351n@f2DK{$7J-9wkR^BveyG z{Gq(0_yytRw$YESO>K&PTPF4{_3b?J+<^cXMc?ln(QGNkdc+@mbLI+d&e#RP zK){PeO#vyCN7(8QQWFhfQZ+4;(g=A3J%QEqqQAsn%NlDMiUuiH7YFE;zdnFJ1M1z$ zIW|eXFBR|)ubPig6m|*Q@5hC~!{oTM#(~ZZ7&c(u;57*uVyo+f8cKLGfz|n27n?=C zS%y>WElY*=(9v81MX@F7JBo!W$a-;j#W#af8o5cjnXhA(@bOfpz;zjdS4Zv51TJM# zTZ>h(pLHv2Dwk@C=@fC4^6K61KR&3|z#J|y1+aNGc3(rSu)6gBnp@%ShAB%)gq!8q z!nwOca^7lN#-^-?G)gcOAD6L?Fj9_zVbMkF(NeS`_G^;q9@I zo~IN!q3Coq^fM$bYb;g~(QrEFFNME(npR45bYP{58RP0_#nt@&eS-fyjuFMdpgCZm z^c_o&9UD5lY4`0#Tt~%FJ+Ko7O~2NqrBEg^=Tnod>ad%5PtScU3ml z$ZW3J$}CU^T))!f$iG^rSn~zRp4*!1#D1y`knG-=7`D`wc`WzIX6Jez8nII!LGlL) zi{=?1{-PkMp@yz?*hRmwXGNz&3qUnGty)um%-UtHvsU6XZH#~V zb(QMX(qPj&XwmZ8AFRnH+Bo%C9YydkZPr|l8n$8d_2yiemAdUz>B$J^hi8DLguqh& zZ`q0#Z0a;d{~UTDA2mm+CY}Qmn4?a=`5ax*&7oGDkC}Y!1Nk@(I7MM(J*xj$jMd56d2`fnXrtkyiJ1hqI%d}`Cx@sG5I+_1SDh2TmmyhEs2M?B0-v(b(xwhL@x!w?6?^Z{b z-zcY&0!c9JA%a)Ph<)Ym@A*>mhTAP9mLI7SrG(hDxh@a#fC83}SE&z{4CA^;BQUO* z`$Tzl|0nSx|A$SJ<&y1tzui365l56^#4ZL_N$3^R`CH+e;mh)a%<$coCP!S-U)>6g z90q!_Bu8#wvcQLZ6qBHfo!`#@-m+>?cml#Y;C@e;_~AEQoW$a))ddsBEoPmc~)mA)9uA<5U*$rdc?=0E&=@ELe_KSHtUG z24jf8ctZ#>xzYsV`LZ}MrXSk2i?Epm-s4xDCOfyUpQwxVL#+E@leL3HBtN>?M3I^6 z@o->~1w#SCsB)mFMK>T0CVt?Tz?^<=I{5bvp10dcSL*mp3la!e5EPm0Vt9X8NFXZI zu8YL^i`MIHES`VGpe?9uYIr9aE}jAN#Q6%Fs!Jq+QyRH9me|#1xxr1es&^`{pcodQ zgl2r|$b3T5%Y-`S-lIHm4MA9YPcziA-&#msyX-riglH2wtAKi<)0+99n)ZM+B?d8& zB6uqOYz3Q7?U~DRiSD`k*@xKAy$f<6Rea+T7MpUVjz!G@coOtLGcNPT&+L!Fja6-i zAuqj`D&E9r!RXgyS?n8*OW<8HNvC7Ci|chVfS& zCvYWF;MLu>S$m$goYPPviVmg?KU2&x6qx-v z$$Dg>UBz8qsCFIvCxxcS-$to=M_C_Xr4Vm#NcCm$GcGsZBgS1omTH}thgxgX$r#?y zEW#NmTC}OfvXrapC&HKy-l)din@Xu`isCW0#O=kg99mb?JBvXa;+*^Fe&mq7`?_c4#5hD3lcR1cItN#psUX_z1mR z5XoSsx*IqsaR2ab3YqoKml^l8!OF(5kC0;`gr49P{;0Fxz{b^LCt3?%J**2TKF%)g7H7_d(d>rRh_T}en25Y{Z3+-n|wxtvm zn$c_@2jV^N9)wc+`wIM&$ko-)bY}NVI8w$wxRBMVMKR`U7P^Qy)iVte>)(>Y|3o6e zvqf!F)vIP?&2pVR_j7s}P?8;2#9|2+pOc_yT`0x->An`Ma~#0%KY!x@S)1fR0zziZ zj8N?zv200{%XKt(r@f&Ll$wh_2A8BqqFH_DUCDM0!`ykrQaq-D9p(z+%wVv!^`2J; z6KnfSFUiIG3hFAxLL+>3yeY7TX?-bEIsI+QX|l>3m4gjAVn~!2E+jU@zxrvjG#87z zJuC+6(2E>nE4%nkLl1=8c!rVfU`89#=1m=6T(!GZIwZ6=plhyq&hf`pFa}MBU_-n1 z8PH^o<+`t#Nave*Ot?6@;f@&r8j|1}hUzn$Q>;DOMhRK&*({B>X2{^z+;1e9Z_y-$ z(5%hHHC7>F7BQY16%tvMOl?4;99jA#&?*iWZlT0!49}*3F>k*kN;Ht8+*lMC(Fuggr$=1w zYfOB%u;B<8$k%VRi#e9253x5#nj24`P*fs;3jyDLme((KG6`@wapYXa8(=f>7@$Z8 zyeK=o4=LdCZ$9hSeQ`*k z)fIBnDly@=*yJA5QJ~p59{>4NUabzPg!JF{EXN!3mcoy-LRxmQ>EPB?r@>vzw*LuP%XNHexb)Q_5BpWr%HY9tpNm^_ zHnLmd{W)1?XHWNSCOajv<26kTJeah3<7S4b8U;#uuJDvctM@Qa`?w+QQVgUu%iT_p z6bj=S9F40RT0LDbmot#z;5~Po&zYss%6?Md6I`kk_Y%^yV!41@KLakc-dSw4?@-b; z-hIgrw7;_wwAA5@|Cytx;Am63N2JnVW4Bwjq5IJg{{X3fT}_j2FnQ%B?Q1tF$dJI{ z)a*6+3$jD9&z2?)jyD>>X_TSjvbEvKu%8`vTT=Kmx)i88 z!b`5S;*Z(HDXG3?^K9msdFubwB9Bxvi?^DA(1q&Cm&_&RX? zen!n~w&(1FXYTY@gEmp1Q|@zyi=MQu2p+RdPC7xa`jfkr98nU8 zb@N8P|8;Ad^$2q7Yp*iAMV$pgEFZ~e)4-1${j|o(LGvHi{iJRlA)$}=5NQB~!)D}q`maPrOc(J`? z1OTco%p34C0Cniasf@mPXIM9V*&XvfAc~pUnEe;-00nXV87n6#V>)e99BHZrukn}M zqdF2Sj^<5kA%alq)b7gcWVPKF9`GBw zvEt)L|0V`UW6JC3&*v74JFUqa+<^6s>UpO@-JxH3H<11(o|BTc$jn16)ycf<(IGJs z(}G1P?GYw+)E`{BASy{|z66*0V|Ln57r^j@r+dlK!GSxb4A-OV)W`LNr!$f*oY+es zHvoyJ;E)KCD3l&ZFi9cP>-6p-j}&XJ(=cUtTah=%n8ZK2KbsM+_021+^;P6 z84xRT_G4%RR%Dk{WfW9-3Nr#vVkuIS%4DNfK8<9vM_n+4f0Hp0s)^Z^yS0c?SayD^ zD(YX@$&@@BVD$`$*8Q5iyzC|H8!F>(RhzV^1L{WS7{G_DRF`}|n~+otz_CG4V%D62 z5rf{ByaAtb`n~Q%&RZw3{t%KMKx4QcnFh1p&bL9s-8+Q}o&lP3ZEOY)S0W!o!ZZw2 zc9iSnht(g3BQc=X84iEh70-2n zKHD`i2&VkMaJz^A0_)qco1#@>L3mxOG9Lc<*R1m+G}jwfS@%wA zFJ+^C>iQ23$;>ZbPRs=dm(26+;X9+6X_cjR>oo!s8QE{aTVkpeH+uteFu{uox%l~< z&Dc66E(+F@!yz=&AO~`?Geyq8;uv=b4d0=@+DQF}fIsbbI#0#w>^C1jMV|{+9&<@> z%0^WtKd==E33Q9d2Cmv*ezbNhP?mWKf1Tbv1G2R8*8E4i$ztn)C*v`D`3dKJt~VrY zr^ou8$1leF^!F@?UL?Gy0AT+N`2HrQwuDcx;OEn|JW|&Agf~O*klIaYcWb5uet6Q9 z-7!0jx#x1)oGtD*Nl78E-AI#qOYNhgU-LXC|IQeAj~mwGd?@%;71lfTx&&{!Jrtj^IUbm@nh#rY&ZeRXa8@NXs-revvSQixnXk`D2^ zPQn5WWoStzf0tl=M_1T}vUO^b(^vnl?<%#K!nihNLWcR1`r%52Mhle$$!tKxWqNI-ru&tVz;URtau@Pw1}Y>_aWmZ>h?2rokImTVP> z^*JB~OefyG8fp*Szw}#SN3_h!D-x~zCD_etM|ij|`aHDQunZ+$uM6mTGM~03821y> zZ6vEHF`wurgsc9wrJ=D?^baJnxWVcarIeiCv<$l5vNGS$qCsgXy=&wTed1bRm^K}6 z_^d^>@SUJ3hzc1vT?2Zw1$XOtK^9pt96}x~zLU3Y3BI?$s}67QdIn7H&M*%FC1qR_ z*ys3;_vK-Np9>H*q!x3B<}1NpELDY0+f=2Do&g5h!B=vv;@(P)T?gMST9DTgv;4U& z4v&Z^sb_x}Z^T?Vg+LV9qf$8;uSkc~(0&nHZ965=(~!5EBMpJqioSC6&62>aSu3`Z z#H9`DY8JtX@jsu!tBjWIFfhfFbQMVyJ^ESsPZLAbe@E>ef#^4RRfhSfSHF_ng#q7G z^fbHXF8zqwaLU&XqSpwmWfejO0$lv=XOwEh*i9(v`Ai2;Qbvsf*1lYViW~L(1P&%| zD`{N_r^V~xh+rkbJI^walSG9gNdlJlUy)x6HZC>KZ^!F0_cU5rP~@1_>Pu)kI2Mr5;8`u(hYBaobNLE&Jb#?brL*sr~aAq9QN>>O8koBB@ zkSJU0NLbOA0+^jiVHT52cpD3e;Cz^7h8Wy<%EvS|LL+ISKe%EgeFV5pU|GeSLUrSm zFy-t>terOFSdCojC>;Y|2)(SAN}+vE5pUxAnD57)WdCb~i|~V>8E}`;>uM0&EpeGN zT924LCq;Ev10{~V(F^h9Dm#ud<}efpeckcXYiqQgrkt#VnD~n+&`91F9_hB^$D_fn zz)~hq7d9{|GyDuFiY#!=F$8v@g*Io?q)03Z=e$p)#0=EyZr?V`ei8W^iONfVXL^64 zrqhb29+K>V2))@lFTxa$SyZe`ES-Am!7BX>kd6!$?q-B%^X@}*3j~F>d?Tz39En@+ zYrP}Y;Z>5p9uVQ*l}3=OmKfL{vbZ>;chaYa6Fz{}M{JHEJ{mKw&-^~w7&@k?5dBguy%o}MTJnlk^m8*(okUmY>_y z3T?&-hKkv4RRfjs=s9GzYRQ$}R9AbYDl9Z+nhp zD$^G<+dXF`_sl{*ON&(x$%c@7ug}{1^PFxOpc4)VU{-wr*#<3t3fQ{4JF~NO?`Uaq zZ)%8I-1<#O^H!)=*jxfe0c;fW%yustWGvA&m>KA9~ zl+*a-xwr4@JS4UgQX*Q`5$8;&EGgi5aT%ftOFjD_4BKee5sVyuF*F`?SXoIPN$4c- zn5{}TO<_6}VmW~vmL3xA+-Am(0QUlh7m?9cW5#s)$HE?=WZ`)*bIQxdAq#yOrq@xx zSzOC*ik?3Fp30klQE;dRDe36$gVHI3n(+_tClVONDcM{=lj|QsRC~j&a z!W9R5GrNHs2)T#q>~@nz!_+a|N4mM-0@MiT?hr?DH={IkDmz0n*_AKV4*rJyqN z!>wN=;-3^+Qy$Dst4SyO?lAG`=${xcE+69##%eZ|XA%2NlnO3D8^}&m3`Bl&U^V7? zT;%nB;!Gf&`FL#f8Sp6ubz=Gc(4oN37J3yBO8;|Js&M6xswWa z&#uRaM>cIt$G$zL_{FZ77c!W}XU**^|H{kjrxoniwr639!Ozq?hlXLtilZa(kM~uR z@sOXl`}>O(5^pQI%s*BQ@X(tRa!HwPBj+R+X=A}lDOxJGryjJI6lS|kK;sV|yzeZN zum$vFlokbLEEpYzDO8rF;ZeYFSF!^h9<92X`ct7S1ffX^%jD>~3rF3F_rFScrSZ7YdG(Az#o10-HN?4yR+rtShL~=Zx)LCmN@Bu{?;w~a~Td@zqlro zR`F_5G`?dJ=&^|;oez1xp^fk+fv!8%aTMk@q7#SUg-stq)8bhpKd^w>6e06%0EgUmGOWiU2*u-y8mUqzeK+4%66bhXmcY z5hK1<433R`_=fN6IGCSA#K?Q@&eRMib5;*Jeg@2y<6BeECibsDttdQJ7Zh(QCEEyD94Hvh%}I%Ck#2j85IYdCh5^PCR%bxR_b0fnque+zkB+K6)8w?OZ7yqqL!<93c& z2|i?_#Y*l{I=n1aD(FF-VdHCtA;b*iS zkI7RCRb?|kUxM!#(Ol-1)up1~gs!Vb27l}kUf`A+@VQFdaCmh+&_y}paB$#uufyt8 z?RZ4=!T9@|dg-Hib!8;F?*M?zW>>A z1pKorUWj4(#ky&Mbxh>w-`K)%NuOY)yk`JIH&;*2p;D)`DH7nwEF>%b8KAXs9QF4k zX8@PzLFiN?EmjbBGgd-!hHC*URkteHo33-PK4no>jzmi-nKD5NOG@01MMaZ7C4{U> z+A+WQr8XP=iM`$pto$dqEXWq~74g9&kT793c{4M7f zjgv0;4b8!#9~MXv1bg2lv7HaB)gHJY32}jow^asw8#J@Xn?(KGX7CGD-VX362fHa> zaiov^Zgl&|%uM8sNm)~@U(~*Od-sPCALbc@VnRr~hfzf}4mgz_2H8{6Iuv`aYc>j1 zdZ~cbWONPeFTJc~!&Wz7hDA#ax0vw4RM^#acgA+8li9pz*{r@ctSh;*%H4gRx&-Xl zT1a6nG_6yt&a~Qn_PzSp>eLX@A)m=@ddD>!+HR1rr3#fi=h?M>4(jvi+05o9G6#@hw|Tu%7+s6Vy8{8j<~QmgLm*OO>=41HpGeGY zIfrd0O&$e!a^~LnYZ_aY1S4C(vDc8i^Bum(KQqdy=}0g}tJ2KaCO2N(d7AkEvNB*X_L>Fu`*PFagT ze0}l!C?!ygJdPrYR7E}$xq=ixb0dYQ^&Y&MzvWxj%PYfo`l4@1yIJO$OcI?LLMqT+ zDlyO7$f%2;&mcVnI0o|Py&Rk7+~ zmKt@#+1kX1B&7>rPpbV(1o67CI)M$`jy-*q7jw zD<=ypU{`9d1Nht>Yk{DhB%ZLQS%!o*+ki6^c|>M?N8p*Rw83$RuEwvwpAeR*Jnc`Z zVE{GSvVtwQ3MwOLfoFw<@;>g))P-Qn)Cy!OWYldNG)|D+xZH!=XV@7Cs}wM`x5> zO!upYDK3dAXTWrLbIqf4h|iQA={my5394vEV2pK8^H9d-W3dp2B!L_E-vV9XOfff! z)TY9mtut2aJP_turlPLU`9TS+otZQje;){L;mx=_?Eo;wJ#+&`SW+-=O|B1==0E5E z(P7;bFdgi5S;TK*3xFRF7D8@*J1w{~(hzpHxywu1{z@_~x;1!QIMk}L`{|bTAHa4& z^pJlGH?I1JDb=Nol1*x44-Q*`DrieTz#Xepq*Iy~Z!qOC6Em!7RG#`*H*Mm6@=l=w zQU|FWrCbjB(W4c?SuaSfOV;Pit{{qjcn^Yys!l}<7pB~V~30b>eIGSIe1cnty^AcQj0G8@jIL& zOw-@~o7Z~12j%qF!%{nkXNsujL$K|KQ*F;3{8mGuTCs}v0V91+$59J2Ew$Cb`sh#H z>p0nq*9_9NEdAr5qI#E6WgMX zOty8+>c(vErnPL$g0eb!2d)L|dXa^ddg(7BoK))asd+zt?|Vbj9SV_a)h%50GJUM9 z(h3h~?a!5GZc94A_V{QTcrKD!4zl{hjk(RPnk+*zYtJJz&Ssnzo?g^bifVEk zKR6Qih#E|?=wWf$+F3+xfgo&GN@>aXJ#lFo7uZ=nC7QzgdkN}Us|p_?4PMLc`X4JP zdTp=0-ztgTncgLp$Ku$PAv>+SKQXOS^ZP5mv@NIW{rziGG&{g+RX}Rw?qsHUffX@pN_b+pAG(H z`UtWQ{#bvF<*kZT4$IU#`W(1c5?e^?F- ze|Ry}Fn^;F=Nl^Dw@A=1Y_Ig!Cm)lOap}TH36aMuE7+Ub?)qT<7eu@OYlU<6{KF{gcoGqanAhpL{D%_jQ@M6yYwKk; zZ+qCEZLyIZrHu=mP4^pvGo!3rQf+kxl598F%D9p=rJ7D=4Y6m&W^&u)%tf3N8J8H| z+F`gsk?MP29GfDa@@Jm9*F}~sSF=2!`DVqeexJ0= zBT(efM)B|4(*vsEZ^9$|c?hc(3o2Uup*2-TR7U=kWvT+F7RC4AfFDT*wR_r%8yi>U zUPMh5+=HLhF+G+TU5`m|Lc;On-`Pb$;dT*_Pa`r*P1y+__5*+xYqR(7M@r}~DbT9j zaVU=-WH9+T*yLJf@Wlq_vLbl=FW$Jd~+1>DyQ=%*~+l-CUM zNz0mA1iZJ2PzYp8!Pdz4YMD5@W=$z;$K3i4Kt1WPYXR7iH{%=rm;Z^VUkE`q{agJ5 zoO=?Z0e1h@iMQPBYZO?TG+?JNhkBEKyIiM}rhub-b7ef0g%4-n;gbTbZ^e&0DS6Y8 zi_sy=Y_wWdU;7fhT4Ct`p1B$^ZJ*x8ief@8jEA5AhB#ROJv* z&ycy8N)A!kRH6s^MwcEfb8TaZ%#!{l^)O{D}`YT64DcQ`pZ`%!R6~8QTM?`x23!VdZ z+8Cr>O}n+g>r&QnHELFI#GXW+oGCK6hiw}QzDP4HF_4dO$lC01sP+5})iiCHDGGF! zCF89g%u2IXj)>Uw>VlR_ogoZp!+r~ErE9v@QL*m2MIZDi$%69dQdzA!)00|g9ZG|H zb=kZ}Oao59T~`lGu12lRvt8xKVXqpRU#e{3=O_{`QANH*i$>2EsdvLz*CVP=ln)2| zDUe+Gp1h$V>-0>2zq$N$bcg8FV}qg+OV_tRE&WKUeMvE=_#_Xk5 z;ile>Evu@1E|hx2wNUbvFVnSQlGkRp0kKCG5xEvhT zz+4U2l1>5kr#G1wcpA{kuG6(UT#m9|e{rt;^gj1Fw-(?Qt;-}-+t2TL%m149YYxl= zuV|ZO7Z=X>V2;RMkRNi)G(1+A+luc94(3QckeRk?bhDDRT0dVie^>e?eY3?%qz{8S zpK*!i!Kge|6tW<;B1Z~UX-l}>-?EK1nisoYjHdZWfw@ngks37pLO-O!o87sh=v~+t zBT3U#2sSWYZ`3B#@!*rmqPvjFSN%qe#`7!bTgcAQvD8)|gi{PBx_J3eTCO_LMmycb zqfU9^yU+{3=rUk*SPi1*y6!cWBGNk)mKt4JyBIKINrjZU%={zWWbv4u6UDU=b(AsR zt%aVHW|5y$H&u{iE5o{qze;JdlPQ7tM=tD9S}iyi{>vqsw!$C5uvlAw1ND7E)J^r< zf~8T4d-N;91t5V>6=Bi=2j~T7fGG-uA?vlRO>ji(@VDGQbSqT)95uiH;D7l=-@K0w z3yfFHTQ9^`b@Jy7qvhjzj59J&HAig#FwqHzZin@Z{m3T3TgfLZy`v{V$;lWSZv&sZ z6_PJ_%8G2C;aq9VUO&df+-c8NF+M-rJqNRfkMzP60ZNYJ<-=Nx#9yEJq~L&lQfVkN z2U5(9?1EPbc7O{=t{tD(Jk(NBGTQq0hNGh47PT4l1a(%0MO$i&1-?8n32z2d+K#ONxG;=KrvUz4jiDMGx0_ThD-_3}QY5T0Mh((jCy{Ua;+k!rsnyeqOj z>{v$i)kLP?%U~6JRNn}*AdubC<<5q?kqy~N)SDtFHx$;2{%xc6vusTm(;#SBTfepr ztZ8i647*MC9?_g@H;3FNHqltIW>)h_5%`O>FuT(zsvN(ewqRyN-Q1g3u7!;hwJ;$m z8@5#jE|yyc!)WG11hzH^%mZY4gS38w#B=%n)=1k6lC*W!2`H(M5l@nN|9qaj~_=D~$|@TbxG;lSe*9763g@_)VAo zf{^=XGr?3mm+v;q&;A3HBHXqP25O!K%#W>TwZR>8y|x)7^b-5K&9vEE?4$&`eyUum zwp{%j0W*meim->?J?f!`HtK~@gJuiE8;Sn`>Z}JWIfuHb5bj>pmPfQ=?MG=U6+`8s zWs}vPZ=(+q&zRb+^Uv?!UqxASATV#gBb?C4*5xT*mf;bIn)?qx+erf(*FxBB_YhgT zJ;Wv>J+bnN(B(E3=2khhM$K*%|y^`bs7+KQFqKR^!lq|9nrWflbomwA1DQ$Ta`m|HmuD&;7a&CWXC#mJ_dF)u!st(7t2L{iRd zBa3i>oB3gKp`1hiTxeSoSes$zV74ToE-L-+9nCrlG;Y{kY&S0|RVFZMes{jwVj@Hk z_b*#V$WoLJl^s5C6cMIml=p$NnrnyahDfv6wsZ}ZwE}7;iV#we?Vq;=u%0dBK75l? zvlY{Y8$!QZ{mBW^B?X2~4COEb-&uZd)lne=MrwvCXOqOWRpjUROuN@~fE$cd*(A)A zwIjoy86HeFC`uY;SOE1gTK<)tk@8%xeL)`5#w38&DVPRCwk;C#C_tiB>~T` z9}ARKQbViu*3KGMhRF+-0xE0dZSXJzRac99#M5t!1b9Ea*fRcgDs*kVR~XY{_a9($ zNjp0JX9xh10Z$NkDVw@YOl5YM;galuyEI(nb{qY*JIwY!z;d)#IBrIvzYT+O>Wq%M zJ>?GYC)c6;xy+*x(~xIeAp=4J)-wpAL|T(yy2z7Hd5R_Lx>J2lc6Z00WL79NcNND8 zlskcUsH4f6FCfqV00aR(s@-s=0qbX0D_TEG<^(#$Lh+ADD(c&e)k{MPZqFq9rIPA`ytU(F7?`P8zv;k9VnHl@>?>Xx}PGvn$_4bRiB{{SAq zrPy2HD8+BL>4>i(=BZbR};bWo9O*L?T>J%iW$l_97#)O*b zdHI=-N*l}6B}gFYqRPE$!AHm-g)8$VDI{DP9GrL4LkQ|~1a6jDQNV6$Vn6pRgHh_i zCUFVq?|{f9+owJX>0@Cw%36!M65?D>aayz}zZRYLpkelzPt$mj2I$>f*xJuVZ>Kb2 zYs*HdI0X`4y6IevUxwFPAImd#k)q2P8~gD`c5t7DFc*!F{{x(oJ!+vZ&77wNTLQ}h z=CFKP_JcUPPW39`dA}@C@bYD_@@Gdtv|o6uKDfC5JhWq6PjgYzk$GfbO251ivtYo?akbNE5y_2--onD-$m8hg8@X}- z=16O8blm8*lLv0i-XJhqmZHp3?BuBZosSH1dim(5X>0GO&ht&;5##-~3hxP5pvYPM z&3IJgrEpKv^>gpf7je?w^&!=lrpj*_?^(;cR2z;@x_PW=oF%yNgi`A;YBzVvwjK-? zDCH?T+nfhQEL4M7R@!I|9#4VL`EL^8NQqxkU=XflEGPT-%vUE#PGt6=8|eG1Yw0ES zuHoOImnXDc`KgUxF8NGeZ(jPNeS)-qJ79YgmrXOyL8TGGib~$!Oq}ZPOYg4#^5|V` zmF0pkshiy2LvZN`=js$8vZ^V>2L{<&87pR)&2W+CH~6niO#VbIa@(Lp@ydgPw`G(K zqPDL6@fboH{M~q?6>M})y}V-Z_mruIiv284Zj1;Tij1YzscqDQ-5(uY19C0DAr zLd#Bgn7wi)I%bKH2|+$H403e1IpP^xpF@& zhRR&0*la}Ion+%@zTc+^-E1eysVlccJ7_AJ&=K|D(e{h)6h3~VIbh%(G?TA#|6H%Q zGab>+w!jz`s;mJTOE%VltE*FV$8S62C~s=j9lf7qu{j6iX1IAv9Z(-Zi4|P+y5Klb ztTL-hjS&j3%Dn{T>iQ+BL)+nbL(@0VXO^xqmDMby6)$Vt+1Q)_uphFdm|O>`I&MF} z0yM5d2WQ4cyHSl#srWOqB0EKUdM=68N7+PG+g#y?Mlw0Ob~mKV%nuO;YH!yX)eXo9 zJ@wLGA}az3Uf>c4wW^Fr{SB1D%=c2wGIOkqNTRi^=vIgcG#ovsUW}l>n>1OtCX}%# zlsS|YE<5t4>}W4?tI-OlCyH&MC<@(rM3CL3N$E%kq+NPq1qrK)YW=Og#GL-{2V^j?|Lxa?{tELB473_N^Lh4}Mx8?PrG~p9=yY(&J*&9sWVUP;pYX zd1z@|xLtmdT}`#hRrvtDk(a3S1%sIS%M1H&ozbiF7Wb6L3x#qz8uU<0XKj;Fn9nt7W%+h~Uho}d zn1}q%nmgcX`0Xyr@PKsD<%Gpl&7U$48ucu+F(w4`5`adfO&_n_BAAwHe6wcJ+4IOX0skLR~ia`%=?eGgG92_c6?jvWP>&Bgv9 zzhBRJ{0si_GGk`JCoD5;jwA0`o$HL(ztY7|Ahh1KP$R9$^ro3GkerK~7u1t{{q^nU zp7+4$sH`34NkmU4DX)CW7bv{`y zr6ThqlgNFP*~{f~z`B_THa+U3M)$%>>qw+7VWwa2w|2|2zHpBPyFHP6Wc#%RPWs#Zb-v)0#hd|#8xr_|ytK{a~Jqj=tE(bQbd>}DDI>dPV z@s5=#X22@2)EU+}>D{jM?_>C=4;z9;_UR6l@Ayr$<%bVpOSogM+KXxWXxv$F5)k^+ z`Oljo`->7ej(;Af%9PXd%v$)|nZO@cdaG6W%wng0f9TTpyEpcEpr;qc?+shk9cOX~ zWSwh07*|y??&p)j&<}81{af&wshyCA(Iu6$1zBsB4mo@(@1H@cttFSg^jbUCA90B5 zR_mB*dC_o*Tc|t>0U7zyloA^&%|lcIWaj(^7V=o-v2BPcZ;}4R(+ZkU?`x@sFR~sE z_{)}jDQ$-`1I3y(=`qB6W&3#0sbmQVU6iH2g{dnmK>xW-1D%*~(OW)Z=;l7Ek=8=y zYpW5X&z=r_isX84N;GyrK8kIgWLol^uV4|LQ>G=k!wf_i&sjIrMDvG;!KSDG1L#F7 zo&Sh{2f2|XX)UW*UrN?ZfBqG8=&zBFjms(PrAbno z*6bCLqA6!U7A^&)pUHiWm0B%g?HGVX%Ie3NvR$(1{hUU=_4iUBbCAz+N#-J{hK*N7 z;WdW71q>CnT|jv-VD%~`4HR{6W%t{sUU(o+%K=tf1LGlRr! z&^*u%NU(;8|7dl2H9)n@U;O=8g7V+6=}!@>wLqgK<`3(9NrT);^$Fh~f4De;YX*xD z%mPTVnc+L57Dh_gMWQTa*73G;2s&xTZVvW}0i98FFeiD{{@UGTTo>74LWpPdNl4Gl zEet@HXAEZ=*V^i9=aH-o*JEDOYTIoQ=JG#Yr7BuQZ!#hHAW34xWC6$H+u2C6$Ki*t zOxXJVHgtFag+E`su=kI@*u_Y}JCY}0nu78UyBe#Cn#3tnis?hxB#5b#>rpS{vGYBHsDOP~RhKN@ZD z;i`AeA!T+1VwWG^l&-jB1x?OM7~8<3ug>%+Wd8$@fA?BgIJ-b_v_N_mA-4uk;A_vN z8i;k{MuaFaRRG8NToYYlMMb=sc}2zVJT*r@Thp~s_8hR9t+%(gbMudB$c_FMF>L@4f|l9gP7$5j zWhZ8um!9==qTdeB-4852>8XwyF>cQ1ENX2EL9BT0-!$rqI;xfYr9NH7#R_ulD(UNi z>0Ycrx3c3uBK5bdS8%rpGE6x(FmEn5bl_n*r1}*+V5N#}N!lVycJ4 z+?*-pcU3`SkV;;qz3H2T$@K7b%8$F}L?u{+o%S_;k3<)3k1tANAjLI(+6YPW?JtGs zw;h?}YDLP(D9+itPCDJF=zCiua<;AdO^*zFS+)}7oYU+Of7#-3A6c_3kN^Mn2zzu8 zx|uZKV$*mqRK}teVC}NZ*OdI!Nn}o);?x$YrYxSHqqCddxWlnJ~VuB=Df-ve^EW;A97u2vbOq{|%ZcdGt)`*(_pe!Hp==W#o8D?`G*^1T$P* ze24k2-q!xG@Awv1;%o}QIwvy5lo|>+ZPuj{8mAd+o@c<5Jc?49sx%(u_8O~UOE0n@ zxV_NaQc$+?DJZ#MM4yTwo5su@u~t|~teu^&FaP&f**jLu3yJS%Pp>i1gzV>1PgYrg zP+==>ss_sN35}Zj@a35fQCR4Q>55={fc6WNZUn?!8O-@c@$#ky@>oN$B-kVz8zX1A zFbNXAI7tDa`L#3-`gAYVeR&p{4jy)mZd}u6JztRcrOwOW zmPgw;TAS))o{Q5Hj<<_Mr6}O!trDZ>$jDlx=6o0#I{6~ci>P=v?SA3V4ujr`rdxn1 zUd@&#YG&!CsI&QhJeH=^A;wzhV`>R&g9W3lJ7^O6mhY*k(-L<1z!ZyO`KAT;Og0+! zbL(YkYR=Y85ksG9jRxhkup(TAGlW21llG&)Y)zJY@@c}o8uZfAc=ZX!lMzeeLz zyHL2wU>IFz;ZAQ4nTP!AmGw2}tc=hkO;5svF^=}Jl`+Ho1`n{%US_Iq%pZxw) z4#?FS!J%^I0^c_;S{ei5pIDQ8KI7gobvHrE=gsJ}w5(^%Xk=$o*45g2eECRmCp@uA zv1To89X7kp{`3dEi&R=S#13eqC&hY|xDbqA2>z8J+RAx(F5howOfEC~k79!+NtpEB zM zJU-|cNanL&c63zSMwXMFlrJ|cLeXc>{urhVob@Cpu6*( z3uWMILxU}Sjy<&<>ZY{YlDl+{RAa^I{(=0x?kqpR}P)NIv`@=zh5&DMu{ z*CLpHYW#DevKAe4{&WI@`Ye<8F5=3nu^-|IaxqINZ^WK(Gm~FpTZP+gB%tg-o^oaj z=1;9CA=eurbi6P)iI+;Z$BJew3E@XYBFpXby9~L~xuj*$TZbts7l+JUD{8+i} za&qM6YsnC=FPNBx8|4h#r0(;IUq^J)gjMB$+@2#Qub{RESm~n=#38eC;pu8vb$WO^ z=>^}tP+?_RrnDBr<0Cf4xanvq8r`QhtF;4o*7jEoswH1|eV9WA-YE;bI6^1d--lCo z+yDJ@7HPmwjLAAh{1c>M4b$$R!?#}4-t0e`Cs`Td?1vIh{DV-xfpQdJcBJ91J2~10 zfd>mUK_cfD#Ju9afg+Ja8@dUoN?Fz|5ODz0fSG?}Cuez8S!SsV%=9QLZm?1uuKb&p zV0V+{*Ufe{U4l8)t|4>Q2m{FKhEWxZ;6T>LhygRHhI593&OGMR4-s%cm8y9WPQYZR zSRmXJ;-7aA`>w?sF{2Zs9^2x$UOO@MTOb?{pEf)+97<8Z1|lrYUnVBfg3z!?y(A6& zbLbx;&MmLoKm){b?O z*J!5o=1xsxzb>iqxqJ~Hf>{jV%;)!rqQ9QSK>M}N)H!3$N{S{P9Y(Fc{z#1CP?dEe zz_oTAP&Dcs@$3X{y=L9u|83{kdLN$Ulb+IQO8#i5OEs`U5SVwWg%(weT1e5=o~iE@ zPV`;FXux~1NX=6G4xeFI9ramn2Fq`?%{4vMF;mx-pu zPIJc4rv%7gJUr;JF}^gs+@Iq6*9*AM`&P%gSU(NwZ!1(+BtS8@`rEY?zRnU!I`~0g zmypK;i#sd#-$rxkp69Q4ngl7>HHQ>n$Vb-FywZZGvrSo#{1debhY z;7?r}?B^GG%j}r)czkR=*Yj!>X8ZITxnRQ@#2Z;AM^ed;Wp96Wn4g! z-wgGm?tJ1g13Rt6t(&{-@3-f*zXQG&t z9TCMlHquCSCA|{)ZvShFcU?qYH;o$7R#HlY;};bV3o;OjWm2NTV!#iH(4IQtE`dtc|n2P23U}UdFXYqa{q5) zsF*#ekZzveNX+Fs7XRUS%?NYhA44|T)bZ)EP?>=fo)t6GmA@*&JeD-iQj3%SL^lQ% zAb-1yX9N=@2#b`b#m?|f*~!i}xR}Ld+O)fyF%NP>+&>Fc!lSY8xI8`?)1=Y32HU*s$ffw;8;L-JZ+t2)qHKNI=flq?L;}< zMG0o{P5NKrnJ#1LN$ARt#*}58`xpnX=vxD?^zg}Hua%47*}IIB21|jZ?!h$7E9prQ zU^9i^-JRT_TE-uahN&N!UoJaTgohez&7M=&JZ_8aEE0I>Et;^WJyLw{aKN(V;+uF+ zxA+A>{2S?#HNQ@;hObT#`AI@`f7&PiAtQ}=cG}fAOMme6j!86-#UO#*e&^SfGDa`( zprp3OPyN#o!*b%9^ij8!79B$~;a`>syaMqxQ$maFO{&{`1qbkF0sq`|iJrI&_0BNmb+&useJu z57zS({Is&&Jz}=^4Hp6O7JjEkDQzQ3w~Vd7Pi@*~-80V{D_FFJY92P`cId*2#)`q` z28LwnpCI&OuN;!)i+3498xRJ!?ej%3l-bgug9VIS=Ok1!U<#;p&_7qjsrZlV&1oPv z7bAEvt7k?y;Nr>wlxbq?VQ0!Ww=VqYoiM-GO2%u*iN1;Tku&r|*?^O$H=P6}+`M}V zk^v@nV!M&^(QYo%A*GgI@~BCA*&edub=KAkZ3T`ss%DM#&y~E(BUIN3_W*WWEsZ_e z1;6OP#Coa2X_3J4VFpzv3uQ@lX-NM;v&ET9y{Z>d|Uic zw!zwl>HDjm^7j>F!FeF!&e!WQ@Tb_Qv&vl3Gsv>NIARI!zLdEh+m=FY*_`e6$l89u zouZM;-SY*TbA;aODQqQr0N@aCHr;7{s|3@#4@ynF*krU}vTL$E-a%L??q^wi%5mX?A`3Bt!JoiS?5l?)x{?4ymp> zrF*Fgiwli`yBen2W6IBu1Bbl-1JIy257F}_iWpJ8L)r7e$JcI4o7n33E~{BVV@Amn ze`1u{`;TShOAKcFa82Kigj#HqG!<7kXyxSA5-xF;`6ze-YJkMZ2A3l~pBPitudQz! zLW9`UU8eU~G&6>F8#KwKzQ(_Kv#G4k!xBvB5#}8B7X;Hfqk;f&kZ27Dn+mb;=$jgC zO)BdKr`<81?o`>9)u$78~*oF*>qZQL@DC3%vi$AXfqn+^3FY_T6I^kT3{M~Kk ztxLl$ncERW@uPd+j^_*1%sM(x8fg|#-oHU78hm)YCSpBZojpA7zV(k`M)Tu%d98Y2 zp<84}5{ouhDZa1=z@hF(-Cp4*!_!2P?b?l%D8kPcZy(tbBxHzM~Z8f_^8H-By5@TT?&kOnwuBNVsz%!ihnbK&@^ zMe1|K?Rm~Nk(4bTe_xiRYPBh=I5?#DF&CRXVZBP%+A6Z*-uf;4kLekxZxF=r5ycQT zcw2;vF2$YiM#OKsxdnbapPSh4AC2B^7>EIox1|hGH!H710!9gXVyH;Kd+fF)lAJw9 zh=oG|B)uto`ed8x?Dq$lEmWt+U1sYd#(gudKlrmr$!v{*C(>;o)QxI(ona?YWczpJmhNXpiKRj7rJ(ROtj}ZEH*2rkN)d_QYC2VB{FrOi0q;tt4xz zp3RaTyb!hrSS{r`U#b}uq07N@qFnO4?2Jg2hmHEc;FP!b-)a~8P~c}ggiFUf@N5)e zAwmPi{!|>s65<^^&h-k`^Tqu-wO*6*9{Kck^L>Ugq%5CR;x7q)`Sge7G$72_i+qWH1n>2~EiRN?U_Qz4od2%T4Xn z(*gwd4{RE8W^&@HB+!m$p(le#wBrM3H>|gt9ZtAj_dTFw8yM?;&g9#jUV=#YcUA>j z()V={ohV^~@X-5LW8UVX|2REiYiM{^l(OvKzp;F>#o2d3S@wG?=jNLmC`MwrD_zwg zFM!3dty>La+`_BMuq2nqcyXde@<8(JGex<4A6&_bg_Sj*Sk+7DtI zFEEF*@A54Fhz$*OiZnfZFZiDFPQ~GQ=WiKhuj``z3xp&m>NymSXW5bvV!B;}8TvH> z4{jX2`*n?IA9y<7J}h&w7)U+o-vcuP(w`J!pzqExL~kxcVq4&Jd}UI4Tq8wWkkvxb z|ChP>YH!kRW2MQdQfqwVsRL4by~Q+va!-F4P|~vQ5bvZ#{aFA@nWkXB$@* z2eZtgqo*~o4t-+OUS@uP=&080X{|gHx{X=)#)bX`JzOIMJU)M02~}5X1uvgZ`=aN7 zbv>{mcbeh9Snj?U1#0FeuCxZMord{5p5 zCa6!P$KyXh>^e(5+%M|UCPX1?N9%3#`~xKZT#QbZoGZ4e!-Y8w+XHjhFAWb6uF?>K zUZSHSdNzOb5M5NChv@HE69k}}2Gdpoiu%)(80to3(vF7b;?J%ch=MRxB+{6=sQlZF zGTk_SR$42$xp>5wcT~hbV-xzto?j=4&P1|p+V*60{LFG;RFBsHi#X(i)y^^syW8$+ z<~El81EBC(;ZW2IA$On0X9(yfQ3PWL++1ZADaVR>QSBG2H@J*RUDRwqF# z=03l$+P#vx{|9&CiS*7kHVUaeHs8B4_ZNyWo4&H3!i`@?)((hq6rLh_rxb*B*94cj zfUre3UYeOT>l^$FF)in%0s#D#v-Jmr&H>JI_0pef$l<{MnR9M;Vcu(#+r*2H+ZJ9X zF%teRn-xno@dl*OeFz!H6plJ(62Sp^LF5GdY*D9@$nP5Z-f_=Xd6Re{-I)KZns-mU zrTL5?DvBUI)8SQ?z8`wuZ>lbHq%mqKVu*cP+=pHsD6GtbJDfwttAd=D z;y$oNvuhA7a(1h%$vsZ@K2(jF5BxZOsm1Pjrw+zKi$IbKKJh`f#tlaTbz=9#{W1*`KCO z0t}RulLyuKU`gS^KG7i9gFa_h zy}f^9p?M26J6}|3V#;8oIEu|s0xl7kcWQRT&-X)RWq(}uB5N}fD7ch#bKm`ygOh}} z^F!5$l}WXgHkZFDouTPheq!tV+M|_}CO$~&ja%+ci#jo%W1s0(Q&QCE$SP$2=&wj> zm8EG@>n4D=%9z%*qp>-ch4g2vpX2`lY@h84SR%6>Z!5J#gZgu*wA5{uF7vvsmLUrRWu>gXvWe#w$9x+mD%!<)7 zsQXvo;n5Zz*x&NZBTbpQIwi+i-mQ}=be_!aV%?XLeQ z#43NzSzUBvyC}|zvTb_TG)-bnT}`;8`)zaJEmMKcY>2YQ1@smY+o(nWF5;^GU`nGv zvwOgqzUXv6+u&;a`hv3frzNBVWhUx8h-|z6G%(XG0@XDXIG7n!CO%z?`V%v8z9V(g z|DIT3rhMC_f^>$qo1?zFUEKJk0)eiGL)EhC0<+xT;R9F z>IbX1n9OQzUPZT{0^v}z`QVY_mMOMu=6J%NrPcZ=o0@Gt=fpSV-_(N*&5uHWK3IAw zES(8P={Ya}j<@mmd%xXF={b%{tS`v#IZtD|_!L7(2e(5`U>^C|Y{1;^XB zDvcQGOnr3>DQd+ykc*W%%d0Ct3{v_9w4W+Zos?Mw>}D(=;-#Rv39-710_F+N-EJ_8ApfW~ z??Dpv_fgr$e-0lQTZ#uH2eT=g%igvTV-eH*mEN;Q@7_1JRT8s~E#VLz4Ga`#TmEag zHUhsVx13scadU;w{L^osEnY3Tg6oS$Y+eiHUCt|}qq63nVqe~jPx#$U+t}6G{@SSW zu~;9;n?Bm@eBlsL`T*3D+antXGM*$wkPW|I~DKCyZ$UpeJXg> z+A_CFuvh4=wE7IVVT&!yrLWWVsMK6Re_R?^ukd@=&}Ue6U2+p;$yF>WeWIPP1yGATnC-N`J{2 zn_*&_AfdUw zTi2K1+yXhUsq8J2J7A?Li9Q5ZmFeEN8LV84$Mt}<_Ji{GA?t)QzMAsD#&<)0FO3X* zN9F%0{0D$EoavS2@(kL82uxU|cSQv`&hwda$(>Gyq^j8)6kj)oNl zX3)jnZ>qO1QkPjv|Kfemyi`3Q2#d)0| z|IlfZ=JJ)GGqi8|tR>j%hP}SBt06P$)Q(Gm1R=c-|-{ zW=MfYy~4_$LsN8YXF)t-0ESW;ERB(1{0Go?r%y+l;75J|-#2o47u3Y~67q9qsKd@P zJ-&ymYu1gYdVTGazdtf+9PiTC&$Q7bQszp#G+!SsR;7>pVLJfFX(n~!jL4(AvsK2U zs!y^z98BQz<%$=-2=?*BgZ}+Limp1KskaLwAq~_ZtH)MOqk8j>`$CzRg6o&9BM1X2%a{jfm zuUDm236W5~jo!CapTPa~6{+dkc|IL~U}0unD&fWD^mCizD+c(?GWAgmqz9g0Aun6{ z4W(yaOs}VmDuL`CwFE%rD2R^6&4a0RfT4Ny}`FSecy1>P$=$4JE(lD zCt=TaRb#vd=dS1PL+XK-N#n94!A8OtGP& zJx|a5Tv)mO{AD5WsO&7HPTHI{u*jSt#I6<|VuLLCHI$F_2&;4LK89eX{D5PZ-2;zU#_1LrO!h zW$!gyDtZ!h|Ah5c#(kYse`cm+!$xV2#as5JB!_|Q_AUx=t7KWBn230hpa8>3NJDaa zaS&a(NddWdS=N#m%MZipy**oPTQ>GTN$~XeA?Sy61Q&Q5Bv1pbxv<*Ruy66^CrxZB zQR2(cvZK%X;lSvt@680uw3_{_qAea$D?y8K0|yF{+fryHYlVuSP%O&%PYBB&+$i*O z5Y}EvdFM@kJgZ>tD5D>Qho3$vclXXXn|DHJ|DAcS+xL-{M(zOQlW}~@O1D;`Lt#< zGf7b15|jS@_4$pU)c;{!?zXmxxUVCnbi7))XI6281VE()^c07c!kG3l8$fw`fysOI zx^&v-si7l*-s?iFVPh*`4}MkMM2zDMw-c)s05sBg)C8{3_v-h0q(Y4D9>?5(kOn(U zraap;&bfClMH#rCi!}?i7az`?(eYcS8aO7nvCkmeB{7 zKou_$FE3-mbj1^Z-BFM>A=v()^8KgIaaRRqY)>)H$L5{vR z$v82mo94%8zyb3BXs1~AG(G!y_zok#7F5+I4d}N%3<>zbn$KT9Iw6!E_0Gn&+vLaU z&G@*hEL3$0hQ*JTIF(}1A>)U%6Zl)SWsI>&S8i^E!3TVl^({@>?4BDb5%Q>zAVo(< z_qR_LIMO2i2`vwKepc$tsc?W;en|guU7|Pm!g|rRFXqFx;sAu!A`XEch@Er?z0H1({SqU>@5dN^B-=3=Zr;B=>w<0E3 zES^oeAawaHM;t{v5go13%613WSgPty%@29TF#CDYF9C!Qe1!Cer3-h@DsHM@xw%=h ztkPy`{_|&bW{XdK`O*`Y(xqbz{xZ9)XAVwNj*6;DVZ)7qj zqeR|(8_qbSO9+d0m*tP}h#Y1JfUKs&vrwM{Acz-`0vM?>%(46$u|I}po9Ayl-|g(_ z{Bv{I6%M2h!FHM0QhqkOguO2gpd_RK+QV)PW$tW7k8M*7?c~r4j0q&lan1{zBXMA_ zWZwZKzVUo`jt3+BA+S3z*YJ942K2mx;{Am4`$nQM4vieEvO&DaVK#YZft7>_W)5JH zbsyb~SJqGOsM{k)`1Z9E#FkxZ!S1tjrWV_kG0yvP?$O0-FTcbyI_NYTeA@udhfY7~ z{ssFUcwBykYe!)GKMZmcCE1~b*XaqCFQnuub{BoR6qZ9a-fx-LaU^zaq=_f&q|VQ& zA&Y)Tx{$7|@7|psHb)M>177H`Z7xGyU3>=nN&;j}d& zO!CDsn03xf(T=V)H;|vTLsE-lOAFbn~8YJT!jCAC|>wFG+w#$U-xN==vPUl#1 zH?jGpfs>lDbp6kBrcW;xtBxVv6=`)(6>k;gEwn+ACjSZruy11X2iXy^`l!l5MkMP% zd#99P=8k$mGB;-W>zcuVT>PtW%##-DkoJ(hA(3)5+mA2q=sc$V5c|;!%|7#adK!^- zYwU+Pt-pPu!HCRa#iQt!D2Zd( zg~GF{cBxqIzv=*s=n8G1^|!5;7JI+i2S}|N84}649Cga*e4ipSLRbzjP4Wa7)LXUY zwP@>Piiak3a94q#!h#Jd7-xa%v*3Y}6*|-#!-Oi_=!-knXW2TkwezRUm>?y=nEzq? z7?R%}Q7XKcWS5wU;0Ja`J2M{4v*Pgs0#FY*me{hqnIB}1bU4k6uuaVi%7eF?vFiVP zs=-i^`Qz$`#0C$%aEuQ60YliA_(MqQ*Ck8V->*~O{fD7E-&vqhKk&jn!JxCN*My@j zXHPwoQ%5~DpPN17>F~?b(fFrT8E>PgcrLDu4oN>Er5|x3RwJRh>1(m$C0$){H5;x{ zG>N=2@;g>|>U*mf0s&v$*?Jd_&3HnCcXJ>1lxoa!;>340UR`khS%yfdf~}&MX;5`H z=^sl-hTKWl^mm+w%{<6Y`W4I1LT=CL)3WQa{0@gbJZk9Rqb02=Y7GdJSP4$Gbk{la zZutNija@Hd(E8||91Scoq2YBn|ApA*$@ijsZR|7Da_X=llLUJw=r}!cCx57gYixkp z0&8p=$fKglVho9{od+3X-}iC>WkP!L>m=QU>i;mBO_7TTWgvsVN43l7?#d^xEdh)q z92@9jd;zqxDti}Il8j_89}R|)%nNk(kP%jzRes&;j{3-^>cYQB_*_r>RQ~)m%xfPP ze7VX4q-Au2QqJ`JU-#rGPc-Ww4!eJ=G~RaQ8ilGT=c+SOcM6JN9v?aW6~s!ZkmY?* z7*gy9y+-3*|G#mB(1it!%G0B%ko3L3JDUkzpTehXU`!W(C^TAgh!TT%M-LoHs>Gh1 zR%WLdEgX1weoTS7*ea&3KY`HIe3yF?Ff~=L&X*XH7~S#xmaFc}{BnHKGV)^tf4n4S zF0`qVG6N6yJnKpclT@yB`Aut@;I>l!k({Kf{3;6$(jem7$Ld)B9^W@bbHZ1u5=ZS# z7jL|#(8_vA@YY+^ z_?;ydvo%0$+@CP&ebFmN>wALK9cCA|6vRS@`CWWCS`y%cdCN$?d}A(`e80;MU=<&AyLfsg!*CL!P@rGVj9CtmdMtDNaC*#irdq!Ku4@wY7wcTw*6&1 zZDv#!RNz&I7MSmFM{3x(xiH4Q`BP!95Jkw%j(ghIB9g8&-)MoDP;~Y5K*}t%m^ZwS z5tCbGMR&Mr{#y0&xK!_M=?Efg#i>}}z+gt$WXKK8R^&ap>e;lu zHcOm>>O7Gi0U=KSw`jiA53zVfG&0uD)>VSRnQs6{`6xeC5%@_6EA zW+~`jTwG{cwZvG+xCU;i)H7#C%>8<|cfi!cJa;>Jl2mBslFaA@y36_%9GJw7PkZyS z{>Q_IR=0fFLb-s=!v^n`jYc6CLL!o#l0PIP-v~ExjvfQ^-Mey8f2I7_AzD{dxTm=0 zXkzocQ%yg%>5;7^_yr?siri!vZ1v?EV-VxRBEJ9SGfSJxW_oo=)yS@2L@tGg>M3Dh z#^dh!k^q)Cx%uyfR`z^T8CCK3&vveGC0KwVkgF|o{MIytA#~H4Idu$M^XGu7C_qTzV;zv@_>5}+WZH_fiO@TN+}_4i4t+RC)mU-@X;&S=E4 z=YO=>~V%(}&AR zthOlwenGJ%*V*r{*m6?HooNpJMn&fy1DIXDao1U5Uz?h-h8^KMjEu#-hV{awq@t&A60~w_b+H`t+eqx>vU}YgmuDdzxAO(iXjbM&JCM zzmbgAp-OwnasA{Xa~QJJ&cC-5ASwp$)@fs;PI4jZ#br-0gzt6u=Ph?UYPVas;O$bd zDT!i)Km9FqAM{Q4dDajApXpl(gRLKEor+&ZaY;;V_N14A8&&wi*ooe!?*%&;`i{pT z%*lm*=t+~l%U?I@6BZ>0$78_Ye^((p1JdXKhfZ#foh_(UkFSEW(94IS3Q^xS20C}o ztv_OEUCg?U_HaDu?->JL(?NF-h|D=xa0=Q-lNHfL&#y>Qq%W4YGyUW#>6hi(BqbAH%>1n%phg zM5R=k_o;~^o#SmWPh>K`+)JDw|LV3BtyV<7GTS4JBzxaYn(?J4^(WZ{m(~0yzOXj| zSk7xj3llYW4Wb8@JFu%~k(d!*k4h>}?!@jh*rpp24$W*Bm=TlKqzA7K0)@>4N*vs$ zc)X87YZycPt>s^Je5Fz5Zg^eh%^v%q$c_L%*!?+p{|DDX^of>+=6hFsft3bRvM9v` z+QENC`lZKB(tf73Tpw1hh&PDvll^XabUD*1uNqQCuhB-n9Vb$#b;Y&7g;lS84o~&S5C{Gz+8+7A>+mTo zHHnW~%s!~ibNTmr;3~o|z-(#krj4;n+KjR($@|szKmPj<&gGgD6)td4V&ilj7tSs3 zKd_$2E#u=t`OpTo-DnVEOCghqiF9ww;FCw%8{W*Y1meRVl9E#gc6G<6u9xfVY7=~P z6B{YY&rujMOPTLAKOc6r6ptx$&a4Ts#>5^I-@Xw-eouOEf}25k5F)$Zwl@<~XPdcu z9D7yC@f5~f6&9{;-CD&+% z*PYVvzBTNcQ?buAVn-NIw!)tdeHB<^0gO*SMLI{O4)P$j_v|DFU|IuywVjQ#rFV`q z9GUsj4mtPEyXo#P1u@kAV3cCWy84T(zUvNK_+Nf$$NhU2x`Q!O7pUy9B=P?2%vug2kI$`ohkkv5r2A)v{K=z^wR2rN5jKUz zB?--5+O8P!yK@_~pEI*0!WbDN5+0V%iGNbf@^I!9#N&29*_smp;OR`SA*;A!cUGhke2C`v zOXaO?i|0sbAJK=8cfjRtl!oMH+@I+TUnEcSa~di@qQ-iRQBM$Yz~e$t(Q+?94*k!Y z(UTEa9i6s~M+iTKWYt~vuC>S_8%HjG00R;A8(*g_n!QxhUq z6hQSCDfgl#YG|ieswA#NQ9O16{c8Tks)Mju;QWOogDwTm@U;l4WL)1Xup(Mr2Q9$G7jBts>~tf8t3Ze6jfY{rC)m0A{Tjt@I+_&zxo!6Y+9F)y zh#zu%Fd2H1p&FzDw6XHN zpT&Zh-~ZeTS98v4w`Qy~{S5h-V;&;DxnF%O+RBF7stok%CbJJxl zE_M1R@gI`1EaL+`gSpQ!4V5u?tk+gQDb|$(u%s1!UNZ_wUO*xWOB&U8y~sf|KjgSO zU`A2>26fIMvqb_$Hs4N+nmzoySlLM#TQep{u`(3TR4TN!Okx=Q;=k7{BoWJ&A_MtHRKQ8W*Q$O!3CHv{lvU8mY`sr>g+=z~ z2HcDk1t5u!Lpx<#So2Lgu)wliQU;l;ujTzP`S_2*rnS1Cl^5(g3>hh|>xDQ{SAJjy zDqDy^B6r%iWab86L=L}~@4Oi<_lY=K5E(JIdVRd}hPANcyT_;@|RfiEjfGGA=M>p-% zVhadU_4Xbn{+&N56vg+Fdb_$4ai36!RsP1Mq|kQN7KakYF8#Y;2w;vl@=b|QcH)@V zA`#3U3pPJaiRip70faj{X^~JpQcRHP6qu|dj#*0{psL!*3Ay%jLs ztj{Z>-KQhIKiX_JRH!@V|th! z7Sc^jLv2As;xPD?85S^FwhB|;9Jt370YXGRc*KfM#!ar^mGk%FvNDrZOVPKgvNLT~ z{eKzCUUdf^M5!~o9@9}hRNCwL)?koo4K6}ykPLAZXQuM!WX+n9HEOx}>EpyIm+x}l zlXeWliKQr5!MaD|Y~eb~+wLkxAy2Un1b}!oTs|oq6|SWlNfB_dpV!C3RoTNIX=N98;JwEN=`q0ZoC8_`2hk%0Kr@y1%a6Lu${^ zP~lhk>ksKJj7;?@h4#7Q)nBi0k(3%}0wB_04=sj?G$5H#SpYsPAHT&({~tztfzw!{ z^vGH1hVw&8x-c?huz>Pf#>VcmV2*A|v{qn?eBFN-Eut+`j|5><`OH*ka*%t=nYV6v+#1aQ_|@e`99B^WI332dAH#$h!F070}=b z39qcD_DoH3_<`TiDYA2M+>J|q5IXL*Qq+huzY0y+?{rt2!zaSyV)e;1r@4^#dtLLx zi%io#y2=eyK`=Xw7rd{3{nA##^T%>Ta%_~(3c)q$I{y=a^X0z}#%STfXbic`FyGl&(Q%lXV~*UyJ6~%y?4wlb4wk9@&kGV555k>>WcTgdRi#*uS)nQpdr1G z_oK^jA!;p!bIb|F_>(3t8)+g)PRd2Ma{$vf-8iK$?>Hkl-28lHpOIFR-K9tBrG2cw zz3DcUI6sbwfw?nD>KOr-m;1aq6)0ra&i!hw$Q2V$Qcb|t39C2|Rrq9f4pH1QoOP|< zrV+&k<5`~?B2(l<;`iZlU|4fiYu{e#Deh_75<5fcu%%n(#^FbwNbc8EUK zw)k`-?;~j$PmagRWOZRi(Z}rJwZc40x6?1FiOg0j8p6#w;sr4xV4Jl5kUGKt+7iWIdgS)-mp+M+sexSYHyZRqamHx$ zfGPu;;1WdsoUTXgDi88XlKNf-LjnS@FfH;Q)^(L1jS}{Qb;u6e*QcV>dWEw8hS)Zo ztW|$?@MQa*{PF->B~9>KFnTWvcd6(=L+yBP9;sQ|r?N#Aza(aB=*52^!92xRJN0k_ zIeG+u-pqdQ&gDFAT6Kzdwj0)|W2QYf$L!aQb)EHr-+!6f>y3>bXQ@*K7TTpWksc%K z=83uCmeuKWJ_}Q78WhDtQpZF0|NPX8NfWEl*dRZ?Jm>J zNza4%gRakFG82iFD7;lnm57oL{}}HY2yC z-u6b(M~0jk_4MctY?)J(3V0B$y`67EHOc<4zThUbD}k4D-PQ*#i67quu9>zF-+i^i zAQ4Gn1w63NBwRHJKDW&~t$lk_>G*6w%6C8>>q0}UWiFcsp=X^?c!3>@K7AVr+10NT zb8MW+Qh1w}n-LVCACx+dkq#axqPOe#~0+N8zvX(LNalOSVjp{3Wn|Mu)< zsJnaFS^nL-0T#5x&V1-(Q3WQ?mx<MM+2U4w~;&D2A;k3H`@4zf_h^Q4($l~ zT*4Ak9#p#{q0|Ol;*OZh8N&%X~-#7 zSzwe_P=TK|F!@imu1pkcal-t(6El?ID`W0Y?xF%LG>%mISO|TA_P}zI@W$#dyvItl zlp47arH6$EYL&Q;ei{rs*@JrrgZmpWZdRu**K_zCamR42`Xnt|1nJPP=56G|8b@Vb zUd+i}fvzrzFtswq@$ZbGA;r{(aSg8krbqXZ3GPq7FtRi=H?=x&^DK>gqozxcSER?W=@$4p4NpkC_0Tv4-e}v& zxdA6#RUMx4`?d{QI-<7k&}N&djCB^4=T{;k`nE$BJ2uewSo!4#AA-#Mc=!_M=ZL8?gCMBA!@(;U=cA zL#{-TA&>PY9#>iw>#9a=+Kdj13mZ+j%wKsfFGeCxv}_ZNNCyhKr%2ErxmOXZ)Fn?l z*6W+ko8#9Q1LqZe+z2LKGmL8#B$j332dPA@Oeke`UL35Il0%@ z!&tPck?){EK68T4!JUu#| zw017MlHi#LAjOW+2J%d$2zo>SQhyJ!YGm5eNkd4JUAnpIM#Br1kaub+cIkqudlabB zO(=7=Kt9vs50f>xj)yc$vK+GN@ zEvhbVY~3{EDV?)Vu4bbywpwRDnfOg8$SjMk1=PH79_9>|&D*f{gA526kHueocw(SV zAk5%A1%CHsHmy=R0~Z)fTyxuK54|l+%*4Bg@B(O)_7$<%-Dr~SCd1&fD1^BW0&VdU zs|S@Ffiv@{6|uO5iC}%uTtqbu?3wv5KX30Rdneknq7lxf1}2zt&6YTgP{82A_zy!v zdm!=u=A*9>fD+{CKQ!bws1IHaGXfJELbUq2pj`2KYrke^`8_(B6j#~vIk8-Hy}TkB zawVBVcFj1N&nu$(_h~P>Gu1s!SCIeDyLV=X7-fv0>~OLL&x*82maw<+ce;Oghvo(R z-TZlM^PaA%@sVkq^@L*xc?|9*e|88ldt#JI)>jFir7AJ=jJ~ zG{ueE77u%`UkoO>gw-xm@twaTlkf+9`Q~fF6HT~6r=cCX{)=L5ttP*-C?s)-FJv*TChg?}G z(3|-QF%jgzq?&}3vSdvdNn!;k?NYnXLX_5rxH6AyZEcLo!$Uu8#07Jt2fHKXq){bo z{1NDX55JFeO<(1L4=>A6Ml6|bj}0XXZEkIV%ygljme zcJK2pS)j0i@Uud=kqY$gy#5go5)7LU(wZWfP&sJsFp*_|Y)R!>EP|*IPi{Ew=H|!-PKZ1JJO2Dx_7IqaF2JdZ3et?Kj3ohNXN)emW+tzphpSNd?fk0Tc+!S zT>b7}HW){%mD&4jRhhk+rYJ9Qw+N(%FGKv3@$eJi-8n%8F63dj@K!|oqfx|~sdyRy z4lt$@x|nj7Bl1QyD~E&JyvGlCuoRyWv%o!?uoXW9Ux{$LJb8LfI{%cYhQaST=+Y{E z8|ue0F0HAw*F8!ab5^}EDweq0x9J-?QSc1HbUU&>kyMI8tZRMF3Vq-6ckvO2otxoH z3gE|n2Hd&rpx8Rn5qOM5r0SyR8Oqd+3D*eVgNm(H_&G0;RW<(d!I){I?n^uUfOQ!> z{)y{j1fAgDnfe_yHDJ0Zc96kBMM_014pdBBy=o9K1 z?`Gd=rXbPKcRSQRnP9tRmiM811AS=x&K)oEFb&jD#50iJh^eqEjW<$ZAwarMvQimy zb~>QH&(=6j+T-FVy!R*7+7n(%goqm2ciJphg`K_jc)R;exZ6J~gn09Ab;X2i?f^lz;tcs_!M z<}=1vIBnD{e(MpLKDtYABiHCS*ZTW@w-L_7US$jQt{h?{`IJ^Lw`(|9laQ>Bbw z^gmjyhk*-48<`cm-UmC!9?38Z!7F0iUUM$qSOoTx4c=bnfJ?07kY6VU8QgoFn(b6tFd=ZO-&dzTO#+e zLh%1Z+#!X5lZpnuN*ozfZhi{1Mxysva5SiY?&l1RA{(poeU@C z`31FmK=Nn;rbj%`nfk(Q2{C%PghZ&G%8WdLwz6xmB2% zDoC2hhI31eT2a8NOx_P&CRhY0~w_~?oKz`8kvI7eNq1KX>#enim%B^5Bi+_ zKU9Q@2N$2?mF@}RGv`3Xk8ZE_be)2@X+r!2v)jnR#JA<@1{GM@85^F{JC%+#5@z+EDWzUQ_HXkN&ZqIFUaa$H14~< z*%1^PmbAM`*41sB%DJU^^?0$8#QglarVD3s!W$1GP4!6DIo6`ZfmC>-z4{&d6bXqc z^jzZREN$`LA^p304zGX3tVTx2 z+|=vH74fFq@VCCY=dF^dS7jJT7TW>1&ywHr|AdInbMCz)e!@Drl)PQL@plE6PGY!1 zPp!JItGmdy|C!2sE#UE?o9BZ?cuggXlH5;o75m+<%6Bj~CF6j_l=WtgpQbB_y%2V3 z$oa?LsoTWe<02zFhd0{bOCTQrP-kluPfx>_FQ_xjE{BEPhyO|o%GSB!;C3%rKO5)z zlv=NX_|hMm%zcrO^eq_7=q*tL2@0+G_u0~1lY*S?Uo*Ci;{M?eo$wH}Xw3AsW?RO1 zs2jBw)fo?>`xnkdS`yj(qrY;UoQgS+l|HM^FCS zt@|u}Uo-qPL2)^?b9L}?mDmuw{%T{HUhLc4M8GEwT&@sH``D8!Iqhcc2q|61*f*8m9jhoCK}!!l z68!Ah?{yAthaEVu(0&Gnp_r*|wPt_)Bpmph^X(*4oEX}0ZL>CI-^6S zJ&5Xb*|89@C-kDJ*hg{zW3s@1~|++t5rkGd@N8R6=w9 zE3N;AeCUre2uqdG;>E3DB9SAk96OtQ^53RZWP^FcfJ^n1%d>(3F^!jVht6xFs2vYu zb`r@AlUU}>@OrHY_(Z6Z$m!CN$h;&!S$D`4=P;v@IUroFs=XHdl9Cx?`I6rPS{nFQ zJ3`3Nql1N5lYCQkclBM%l#=ssBA_5@0Al6a*6qOWNUV$*2r`Xj^ve-CY|`MScVAXiqlT8k4_?=y~e%*oc{`;Po;d8 zlW+l@ij)m+NI(WjL|}jKOYPsy*!!K*$wjnXOGY$52k`-%_o_|gDf?Vb1!H*lsoG7! z{j<0EHLU+e-E|A?(7~Yr4@RnD1r@Kq2)-S+$C%(7n-zg@>b8SV^j1Nxix+=5iy&V& z--YA=fPV>kETSaPE*Zld2>*Ux5)0p0Dg({`H=B{I)*G`mS_R63o+bM~mX| zN3f0kYI~96$38t?mHOJqLC$^v7(pa5b_Oi`CCneZkwN68~m&5KvL36`j2s9nP1FA(_$p>(Hap z8%&~p&L9iXhStv`9Ov&))I5(nYAizCsedAPD@i9hb?vXmMq7Gd^yfB=E!;MPqJ>21 zTGsjte?txKZ&N*G+e~Cq*;*V_QwDbdN2QmS+Y487K$@wkM+VF)H^TdHJkycv0&b!t zLNlGt)WmoY+_$dn#tYJ8m#9RHZk1p+}dvDSn_T~IXzhis@QlgOJ z{xp|1Zv}7&RZ>Q!&rPMH?~0cxMhbFf8EY+bL{O*nSmwCV8fk=xD?@T1Z1%L1x1UMabdNnMvR|{uULT;!bD8&LmdI zxLkk$!zEBza?jo9^Ah_59tDwdi?51Pr83uM)uBPJW?uK>V{8^?N=i6QUF2}U z!p`#8*=2l57w)<0etJT;;Zj6Ehhcu9Y3lf-urO~@JKzfQV?t7)Skgj6<%1KEVwq|U z2vF!J044J&j3w^UpNVwWuyMEkXZx4i8HVxYt4-6QY4Eh!R84FK!EU)V79)nYYFZee z^NaP5z&oW^uBrh#VQXCD`J8*&1?xkUCtdk%Kbc7|IeXf!)+O$f1%I$Jfj8vanTAW8Bz zmq|eDF}Yv4sXMvRt23Qp6~wl-HoLZUrrie0gl}v3@apn(B)CdH&C>a#em_`+0Dwvy zCm>r62-AgOf$T=MJ1t{zMid-eWR$!DDuz1(DNdga(|N&_>>ucn_yGSOX80e*e)>c@ zZJ6U9(Y%FYSWu$CZFUXewJ)#X-tOVE;5bDeTI(e5p;sgrKNvfsgw1n`)bZj<4(Mb8 zIh>ZGZ!g}wfXmkaP1i_<2$C?T?!I1xB+O~ll0R7+uru7EU2eR^=?eVIA@*-r9b;c& z#=l&_I7{)4#T0GCUyZZ2X)9qKW4xT;sx|yN%cwYc88z&x=S5BM@V|S|Ib8zc7>;$UmH{A$>pxwFJ(3Cm)j6ZXc8sV zHxC&l`z;v+7xYnY-p<`>?L^SZWK;X9*f5yiQ7z5e?@;g}dWC835o`;(=}BAN}fZXB*{ zwA6`zd8_$mPqs10kGGLyUXVe-(bqT%<0tdtBNen8O<)6~Kofx7_S09IcS(l}te$Kk zVs}wDS%{J2vBY{sH^&K{;Qidyi^c-?!QifIcK$ih5uLPJ&9JGxON96k{1TcM&mVBX zr2{2R^s+mWrFjG68!`VS$-=W`I*bQ|LA3$ro}x8*Sp7^} zz<;{M`rQ>Cd(6vTF`50lZ&MxitoQ1}5Ue^zILmQ4Sop%>Q}3I~SFzho^S&bMU8G+6 zo#9m~_&I!kp8U>SXCB4ML@aXq_mqe{U+WiAVLeV8y{?X$m2YEQ#w|nksSAa&g;nd?H0cIf$Afxjp@J zGU5{pYzy}#hq3w7CnOMkuPEqVANbbL2$vfQ02%-uNuU$b`K07gd_#DS(*F}Q)V=cFSjv~ z=`q=Q(bXmAouy~0;!rfXAs2-j;2-kx5P>`P0CC3Mg#~|*7pY&Ow8UGr7$IzmP(lA= zp*cBkvnmMGXPn^b?#-QlHnL~O+UDq}InML@QVz&<0eKqMW7vAc`Znc%7>?=KVRnlY zBl9k%)L(bBKM|IjfZQ*PM<>?u6B*mo0|{C_Qv^LHDzG!?gf_$f06&_F#DW=(4=}*9 zQE3qZprp$Wy?GxH8o+3?Q6aarh(wMn%Z>1)V}o|xqaXBc60xKb6B#YW`vmNQ6C!Bh zK`3AZ7wO=|gj8!?2t4~$9_qh}A$0AnPSArQH5aB3#kJHwU}E9mj-8flG_2*cYP@b!+tm)=O^}=-nC&N z4EWf9*?YOb-U8MacVZ1ZCcv~+u<2#-r|C7Z>0cbf;?p3P7rq!64jY9SxaNj3+SqOV z;~N-#tzg+92tXqJoMretUn8YBP8*w9*l(F}-t_x8yMsmxI~|5KX5{E?WfzBgBbiO> z2H6zk{{ZNh8DBhOEFVBR?f0OUjgh1FG5+M5WaD`GHJdu4FIjIM6X4m6Ov1>OsAd-~ z>gD1jkuSL)H|jC>9d0pB*+)i}biQkP%NtZX#b|xlx_-Z4lkR)|I>O9X;I4 zfa+~(q}Vy^tQu<-SPS?|E|%vh&g-Wp2W4jkXLxHFrG}B65y@oxvyz1hHHk|Jvtp`+u0mf5!m!8Krymw%u~Weg4v+tyll~ zaJG&T5BJ$)D=h>IP=T4;iAgo+ec`>)_jvL=A&mXw${0wB|IPrC9TqUT5oUnDEY?|vsed06czCvX>`QE;;-P|5TG{KsrhmZLU5jR zbohI^^459>fV~f{KQ!IQQjc8mX)RvXeD;Q|acCKLa~)-nMt)bU&#!8Xvj3yf``Fb2o5kQ%`r167r)(VX>-3+c3Q*vyN6FN|W$+(IXMx}kCV z{ZUNI)X)PG-(EfK(kU(mI?IR)kSBVGcc^S(7NgI6S>%_=b_2a9r#!KnQ zdTFIIlfo>U(5crA-E}Dr)d1Yfys^N1qdeXEJ%F64xmmva_v`08)g2d10|1vwqzB^U z$618d$+KOgNLR&HQU2EMoRj5s zwal3?UEk?%A;Gfz)8qa?mp28ee1iCH3 zU%mFQUi!6b+fQ&R`$&DeK;r#~;>n1>%bZ%r^`1=5w$8b_Qw?ugUrR1w94YDk`v(|j za|QQ01MaryM-tCOK(YTM&v8RSLN06aKL$#!)aGiMM*85HY4iNj3)=qm?k08j6()WM zP~2U2+i>YF>x+iI80c*UN_mvcmNUj)F8^kX0Xua59mL|AfUR9Uo^J6q(+BQfmur|$ zEj#0Tj|qQR=;5;JEg=aFz6$E~VT(x#q_oub2)^|@FnrcoRyyHsVZ@K0>Fw+UEs10f zX`Rxz5J^3Ed-LERWj}qycOmMN zY+~}`2VLf!IZinJW)Z&`+I$EVxym~l5y}QkM47A`?`*~*B~UkUo_fxfy6jPvi=yWm z8Ky2}8XZ#Fl9m@Mgx7UUZ(F6Rx}zO&HVyeW0`A74L`b?9qSC`Wjo9IK4tvjVOZ&Tx zdYpS#4N9^@x5Pm9oirq8M~B8MqLO57aJg{Cy|(DuN<+Ti)AKaE=5%VDgXHepz#747 z_rA`VRz6Bf))buW2GI5^b=x05I%j1?C12zaZ|h4vDnrI{_-|L%E7l44{at<7A5IeR zAV>eW;`zv9NBlrQk(uLj;Fhg-=j#O7wf~HHvYUIHR99^C8KQ5VrJ5v{C+k&PmnAW| zoN6sW2-cf(*>e^>r8x;1+*cs|>1L>+e+oZzxi7nYv)&clz4h;Da7d(bxI{ZxU6*?C zg)|7!d#cePYrpPhO;%QXL=&M&$t+T zbUg}=li8X1`|eaEfP`gF`|QJqg^CUJN;qBRmpx5_Bu8+!x|c;DQ8ZM5Kjr4C=AqaZ zPU;Evf{7_J1CmL_)9|}F-tP@Q3Lf--g;$)He2NnC@WKfLQf9|RpF8vlilusFC~Yz? ziK36jss4D+MiSLo=UT+LW5>vs&j6R)2XFkO4^ai)N3U7x)H!ix_3@| z(PI~P*1YyBW4 zx9rY7DxdD`mZrw*W6C99G^V0QWR9dY~lO3+dRB?)KqP%EsviGuJHgb z8oXqF9grS?8H@%C-FD~1@v+T@j-P8Le>U3t{*uy@KBV43A{>TwGIrFVw56}W*TnJ^r^2n*j1iO8TTor8vOYqy#WWP zkEjpLXwN%N=XLyJ&pN_VB#TW7(}`p2=dt@QwqyRvQ(7On9E>}Ph3ngs*QJD6fqXpV`*qUbkvVjckIXI89W-NXcj2*1e6-xY4Gj0?#hxgv} zr0lN+--OR9Rd_0vL}#-B%@fx@VKWE?9sh^pvIC-W52Ir~%6Cj{5s3MQ*Y-yT^3T~%g*iiv9EzCsXGKM}k`vB{J8MG&3D^OO3;s2F z4tazn1vF&>e1!hc*%GZ2Oa~ZBdsuU)3)VwiCOdeCbBmBj=kfSQBX=@{BM+Yd439=R zjdC3l6TB<%=0cRRBBFA0G78Z(K&8u`zF9}V@M5~OJ!_0>X?Y-Rs1|niDE@036IZV{ zu<0Jh0Ay>RpNNk^*8VvR_~8;E8KW#(_B8mN$U(qDSI!|g%~pUN+@>ICQQX>$eR?cwrz1z43quuI z;z1idRM}s!6>YaC*ZQmtqn$&;h^SrVoSW=HdAhxw)vAQEM_v$&Xuf=37Nb)o^4gm*r zetCd43gVKg9Baqb{%bozSqZY4F7J{SksO%6AN^NG7Vd(dBTGQ)pvFUR@(j-Ia`5W|i%#51CPw6jkXL-Fx#y30f>%p*lLw4D~0wQG=wn zrCT;bV6re?bl?`{r(WqI|0#@Y$?D-!Rcqq!Gz*I-7dkDPYj5NKvcYK?e#%P_>P^k; z_I)ppw|w?4N7m0zn2RdyFsPrc|K1e=9snaOJkf%nk6(1;ZP>rf9bZ-?>Dbkx9re7! zneBFq?4$U-)NVd&)%zCqUD$+$G2M3nm31RCEb1+KW*dMl$;XD7Y_gfCwaqm%AUqRK zj>Rfli#TNr<9EBe^R1>j(;r3I<8LHkzVh6%Y!P8-G*JZT#LZkpMaLaCntF2Tl-@CS zun3f>)v>0ywKeoqg6Xge5&Gd15|U7i@C4Gck6^cj6{VXK;1O%m=r!t$zi6AGZ>Fa# z-byQnl2+|*Z^LS9qGlBNCso;jy7Sk1lU~Cn(if?}0ZP7fe>G`x9Kjx{Yd8xVVVTU= z(HuMdCPMkzCMW_90!6T4XNrmZ^i!ZDXC59h#t%h+!0&CTVvjQ z5iB0ym`>I>9KWL;6n5FF}IxjY{JeN7WUt}}@_77mnYbvH>W6X4hv^_iUaZ>O@M z2e&j+u2K_J<>!n3!!bt^)_8w+Wlmk3A|IYTm-+e20LP$bHDwNm3_&QVH~+AB%5e~k z7W`iz(%IHXx_=$o1l5diuQkpLmKIO&SO50Q!pW`7JTh9Ff&6;z#Xh$p5&PgKC2NrQ zmVE42fQ1ETaLzwX_mg(7aausGEWaf3%aIdWxn;(h$#ldmlQ!+_6y8aBkFf!fmGdnT zGY+0S+O*Sz|;wP zxy94_dMqcyG#m4;X+RIiH8EZnlLMMhC$#Z6cPUBTc63J(kAT*HQSsQ}v zXQixr)@ysR%0i)!*Le@thcEU1Lc%eRfQ3M3qw8L8dVIs4`6RYrQlp^+U92DENlTU+ zE$EoCdofD-&X~u}J?sw2fU>&(28`_(0l0c!R0wh*#xKgR^`Hu!*?-*`-j=^8ROCMN zqU($6TgL)wl`>;xxaTBMPC(VRa;onQppQ4`@Hi0NDPiBMKqtX)cM_4bDkYLv55~#T z`|#>rm_!Els^#kFE}f}9KP?V75At7_*mK$SY-yc(Duth(wh;( zA(guZnb(mGu6+V>nFlZzWa_e~K}Qohyv)q>Icw4_?DLx2FrCr6bnO+|0?c;XMFUtq zVBF=FEe~>ctfHTLd?{F|*~#U!=XP?Z(p{CAUYw)JfwgY2z8K8t_hnlxT$#}`3R`~P zPC6=~zM5ympd6quVeW@iIexpmzWn3_!-<(=^;i5TiTMu)$kcpT>VQ(ZS#qYW@krGo zFpc{6>H~b+2FS2ow7OmBb&1?!#RKg#>@4uSl zi?ABIlTX?DSy)4un>+c+_;Pe~4(imEqG7JooCmAl_~JecNj#Q5%su_^!J~nO5DvL= zg@e{W$-)G`Z!-HA2)RT>?AjkT+S8AEmHv32SRs94U}NRn+UfKOZW_Itow8@qWyIk( zLNS#(IFmo!Vr8vu9{R;%-e})3n|a3(+a-T_q$7%G4}J%A{le>=Mt?3k#Qs=NdH;qK z|6O^Od2+J}&A-+J1VcN@5i#GQ_ggxR9AM`fbX(5)9nsK=xNU{ zlMuGLilI)#j!Kp|{3UIJ_i({$Tp+WB#U)S<7p<#y6nYX0dczYxJ ztatT6Ktbbdul?MD;a!IeoS5!8+8_ zS^!VZ9dF>v7yY&zxEl_Yn~3|AN-RDRk?MzqKeIKPR_q3-dtv=X?NKG3&Y35z6c3or zXHiVs1*F}X=w2gJhUXIGd3S0BT7yZOSsxDpUS3a?&C7)%1L;~H@91@zVAb{u$>O0` z!k_0mDO#)DHxf0BUytgFX=V9wv!$y3-70THoFwOdk?Zi85c#lwC%GX<%!p%;VpR50 zxS9#RYU@0=Z~mF;`Rm1@5{sw8jn`Uf0ali|tnJkol7>1*{FtDv^l@Az`Zbn?Puj8N zN+3!e$=?Uz!G2M&T>NzRCFw~CZc*66x~zr`*_cSBL7dsF1S?%@&`b4LX`roib{kM< zcHDfzyn9-#*V@WMeMYU#rv`HXiR(lD%!;^?cUfENH6D*8>~(a!VdAde`ifjX&^Mqn zd6FTlK_9_B^VOM{cds`qSG7!l>2|;6jofXL4$5~^z1yL1GqO{0pT{Gc7$6h}V_em{ zpE{)_)QMjTb_zJ3qN-FncsoPvX7Y>N9p8k@HWV97pQPIWLmg0sM%d~g2Ra=ir_j1zO--9FcFDg}cDl^OQ$ z98w+R@aikI0taXmRo2`#w}`5Y=Lpz2d+tw^0^S90599;;yK_S1vEbg0zOM>A5EV zu9s2@G_L&9NKY-zVov0tp#YAE%Tfr()uh&bY9*w9A>fExrY)qrNh4n8B*93N)ltOT z^g^d}X_T+GpwW=@n~Q6P_o7!B*2rDl-IMkyU#3Kg2ItOsSyoi>8HZ}mW20xr;^kZJ z>I!m*q;x535tr>Nm2hHO^y%doJ(c@+xP;h?Zu2bt_Nx)?v0K0DMw@>D7r|6E7S$RW zf0u|%;Pnp$_*J9pte2K-q{is8U47d7#C^sJwhJpcQbVRJp979yr!|3X&UUWNv2Fgl zcKrPVL#tJ@-xVa6XkPA^;I{$KJQj)CtLgLYHD=Yt(&!>y9nnqVY}lzNGGcoUP|WTf z!{x2TT)dr_^f4JdVn#eCkr0YtymGfp-ZBd-z2OfQM4!mSAU;oWc2XoqF`7)*Wsf6G zx8+U(<94K8uw0E{Ix5OCm)k$Cbbr#s)&4_9(ajTHQ-N0hI=&4_U=H}0FVj$&@N((n z*B|Q4VtYh(_Jo@cwh3S_=P#>%)jcOZT}|5OC0108n*tMri2&h0jun6%MR)l3lf$z~ zGFGKXHHWvPKP!g0@v`g7wU#f~ams?9X67)mIg?D{QuQ@JEA?cmzKf<`yv{5r02Br; zIGZ$FW?s!Kl7|BN`l%Ej*iZUzitulUJ7|dj?9i}i0|T=q|E5|Ef~aTca-YcgSU$P^ zSf*~D!DBC(Twfl=7>#4h^D|QGr@qdULDK4Qgu=nodUgJDTxg+9>^kC&=khXOs^Dwb zY*=a=k$-A+QpQ0xI!wKkbqf;>{zW8);#u70G`UxL8qq0Ur zfdGz~4YokDSp(y1vpEdN!8cgyBXL({1~(_C(HpkK2Wo5t5s)BhEX9i{pi0nfLZ{#H zTA43p8_mmFF$|9@e^MJ)mixJ^LCpV627`B<`uYR{2ml+s>pzH6WG#~AhdSeu!kEb9 zwzST|3^p621eX=g|2R*tDtG|%!qs>CpUi1%{Zs)vd^BJKSHL_ib&)gY?|-;Tc#6r8 zwE_+2C*;p1+J7!B3VzErGD9Ok6e%IOh8=_;^u?oXey|sB3EO;BUnWE zBlKi~x#`(M`z$-)rkve-ADO83(`GnMylansz_E|-e*iQXXmdCYuu%%pI~;<^n#gS8<5A*y6oqkt0iO;k(!(g;8X^huxA555yEPTZUi zyRj8-cP_c&OT=T85-ruBv|WU`h&}YZB|!@#CdW76{b!_M?9;NY$xmhmRGEKFiF%&l zCe+(Dx1|uI_iadvye;s~v?c%^gKTsMxc>~zpE~sL1T;+F=~30NZ1IoT#gePB$D(6e zsmqUiE;*sw?aD~wU35o7l@t*p>?_^P6yT;OLO1Blg()FSfSRbH#`#;2=WvlqrolqD ziD=X0t&Cg(UDFHBmrS&pRTF{+v8rkYv(s$L%1{(H@PGr}_byDkZaJpW+`{bXrip`Ehxoga$WmINdKcoqm4r2~` zcVE^5dNf|TBw|g4*7x1=4z4kE?$rI5OMYMIth|w(=cq+xXZok*GZW*$9p!P`-D60< zJeDyG4;=%T5+*>cM6nH5Hc9y^YsK_czFQ}cFHDp9e}o1Bo=XH1zPQMRkh|vZ$+9F| z)v)_Q0Agb$Wj&Dxm>+y2)p%6`&LUJ8|HoC9WFKy!QX$w8D;C5FZQ?4%oJ;vAYC{VKQ2d{4S|TE??G`kq!^1z3-d!PD?%=_;^>i? zg46sti$ZC!D+O!tr~N>rqsq|*1WO9PqYJ_nU#8sTkGm32u%WsFTB}Z=M1xeuB`en- zD#=L_I(GI@hqjc(fN{E>3r*S#nOKEVj#GlwNGw<;!=uScEv*_$$mIL^MlQ(Xri-z*rMs^s$LV?CMTlaw5zeGW-hB-eowEGFU* zIO{<$m9q8*pmQ}M_TuQ8bJiq~BYCB+gihm97PWy4hjuWHi`uvN7lTx5P?DE!)k8wm ziRq)PZ+)X?FTrswWI4x)`KFG&k6>eDAdD;nCsWckr$ zHR_r9NB1Z%+^p5~wrB_iL?}3qE?~q+Pl7Rp2n;mHir%Z7{(V8hffMwM^#`^G4&B%J zzCF6kjq~-9Q4oh+)YxA2l8!f3Ab|xp2mP`1(Pde0m&nIaDYXYa`s{1>0s+h&c_M~{ zTn^kp_<}0IOnXox=*HhV;b-l05i&9}dl_sHeC@pboydMEXafvmNqo4TY=f`Ny-ngb>|Lq5)j+;aCB#?gbw z647pbKoi05N9ToaAh}^xL0|ekeQRsonRihFKVlh#ytAdN3}$ap|FiA@nm@L$NEm5# z9fNUNlS8QeNN!t0*9>fKJ3oz)!2gkRXq1@|VQ<8uB|`DQB>s330<^b09FIY@N4fdK zsYwyM2gcWir*Yad-Rib)TWn4_i+-7zLDmCQueLKF{f!F-bwl%@)&gCrXpxf4&CTb1 z|7rnWCRs$8cqOaJKcLsJaAz1scwZNW7m9p}3P=7~xD;lywURo#0p^lbat(%Q>%WNE zCXbRR>$Z!{bTKI?T+;r9=PJOzejC+fyHf~Ln#WRLC!v&BDs&EZI{fuOFX6O|yGAJA z`)G?q3xTiM+wYCuKS4Gc8fi1~;Psu*jr0H3Q#)4Mi+aTKxL5n+;KdiqG}5~y-F>}92YvlHwnd-4k-=F6lg-Oc&mYhP z@u;*1JG$OvNzy5*UfTRK2bbSz|5k;P3lDZC4`oP`lu5vtR5X>Mz1Q(ll)PUTx}KQ5 z4)=M_?VWHXv+^Nn-=!$;8vEv`B4=NExZ>daCoFj(!v53a?eMhTa5#(u(@Z zPGpyluGGJceL^%d!3};%IbHH$D8N}{e*Sho)Z+ZgS!oi7glcpaFH?saEq@dbWp;JfKGO~T41Y<$lLIqIfhOxv zcSI}?1_t7M*fO0K@n)Ky#qnMc_51+8I5K=%a#SOjLP>WbeErmkGm7=?4p$2FC8}CJ zZRNmZYi|jr#ehe!Im)s z!-4lY{iyZOtwgVlh%Q)5T@drOiN~>!si>MoZXc4r$u(`!W?ryFTGGhp7}aSMH0Hp& zCLv?8Y}p05dC}y8b+$A(b>!dM#E6s~`H2n+*kSoi-~YWbEy;2Ue63{wo$g&Hw8OEh zEsMax6`=cMF53MlFRZ3e(cHwY(UaSR{j(8y%IbvaoLF-xaZGE~R>!8}gEqMqS~o)9 zNZb-Ukj+4oA-h z;4i8nt?-VT^}hK-Z+CLq&2_NzwC3jJ5HLI6_V96Lk(XuC1jG8;Y{M(fo!nL?!@W?3`i zuiVy~&fO5d#h7hn|NT1pALct}jvKCq3tWP4(}US98QyMx?F-;nV7x@t9*B3#i{5re z$WrMD`|d4TWPGKN4^HT+06zRJwT*wx$dN(8oEa3xK_&6H;Zi_~&%X#4x1-7<`OKC7 za2%<)zI}AwXbVX0*pJl0Xn)lv=$R|eo^sWFG9WJN)3l=LnAbR)DuNfO5@IY@QfpD@ zTr1}0HzyF-T=jL3(_vI8D~`X|P!Uz*>bPZ`_RPb*FM=$3>_b#hWCfARZK%SvtP;O0L^p~eEkYs>ot{ivn7 zgCWk7-c!9VQwQ0vXqW1H(??q$^JYcM4v4h*2zu%PMQEO^;s2Tv@-*CQXo$`ywSXA2 z-uFzbc#Y5}0*(Bx@9GbxA>YJ`tHwFNoJCTR6mfe-8=-SYx_$z|qM|2#q#8EHnGesf z-y1rrjc<3SJVn$M9IVX+jJiUKFd_5FIW^T9MQ@>ns)oNk?6hv#3#z~~YgMwoavAE% zQ-v8XGSUXmmTNO=C~cX%ZzBACrbqvB3k>llb~+4#_ZM85%MpEI6yO=S9n>UYQNvD@o%cy!~X zon+{@Dc;UZ#2o)_o$h8N^Cd|*A1t8C+DL(7tDR!U_21Qt{%STKa{Aua5%lZ+0Z!_{ zy^x=~v+v$m73JQ?h3@+Ox z()@=5k-Q^rQG^V!X zUWeDsb&&fz{Vr9G*?6FrEK*`*cJ;*;XOFKvm8GZGyp8rp^C#{Uey@*7Vmg%dnsl{( zG=k=;nXDR=ZJNEtTau3l#xF^IF-QNvi#q*BuA6u3%q!e^;-KSscW&xuFMN*cMmFG4 z$F@gH-G+mDv;GhN9lN=2W$(XD8~D*`Hi`2fs~|&9^3yGJAcU^+W0tgEJdpDBOF?C zSZ(=JG+n31E~Hv&bFCZVIW4CLaX{q%{dl*L~}x* zOd6ZiZ15u;(%%mf-jjNey~W{a_f5*sB1&*`-ZiB2B0l&K2?kw!FxmQK+;+htV5TWl z9GJe|Vi3J%#&e$aV?T)!iK=4=t$%BBWAb%1GTi9_-mr(8_%_39`WXp_z&6nNzj;NFxA}=!r>3}T3^q|b z_|GL0TEt}QYPO?IZNTGE7R~4St#AdNR&76{S%%X z3A3^0jHgQU{k)oU#|PBXqjyH?KxP%rnKpfdYZ%~A`QXv}@bLUEbU=K`aoC*I+v5&` z;aO5?t#_Q1JHAu=Np;J525$)hTnlJ3Cd?K=-&MX`aYQ1klsNuyyjv{b zGyR&=w)+a%WZwq$_@9X1O=<0LWrQa8MH$DV2a5RiCucV^54mslfj%fT5tdXIG4mvI z2=I&_H`c9~!#LuT@eF>>)mtd`TZ9Vk=EX5h{-%(g?+)DZ?DI@sCwQI&X6mIxHwf$2ortM7|XMZ!(HVI_&98m1&EQc9PJS21QA z^0P#$rGCW%SFB$Ix={?G6Fqs0)I6@*bba3B^NyXVHT+SlhN^F+GNJnM4Q{su;1gk4 zxFbYEt&5HSw1~&EGc*;uve7(6is(DlO95u`ish9TdWMOW@uZS-hU}Q`mQNPg@#x_CbD# z_N6=Iex!x_N;1`zzqo% zMQjjk&qZmBUE+zvQV_m|df!<}aQ}XtWy-|1nod~Rj{ngi7#>m+=%~!0F$zmlC#oRQ zKe(BGw5&~LTz0(kwuwRkhTf~Tez!&ZVGlnBrEoL?9<50v9JjS#BC2}nMac+nG3@wc z@94RO7t7XusB-(q(Ba7fyQ}ZYiiu#PsC<=?_NjD?zc?{HG7CP9&BcR$}mCCWFFkU+-ZIOf^UT7v@n zNQF~xn-~y6&j!uI3*ck)QIJ`zoK?isf;HouA3zjZ{dfD(Fv#9T&?kM#%#uX??xjeI z`R+x=%7baB-G4Z45Qd<^zT%&N?g>H@y>TV?eS&tccX=#&_${ZcD^9=2w2ZW`O6XrD zJ{63c4w;If9^Y_;h09}!XFZ{I@PrV@SiqooF|-geUs&sks-8FZYjp88)n&mW7}-GO zhuTR>u1nmtQ;>Mb{(zE#H;SPMle0yNnU`oyKo(qtQiw;eI!TRrS2(W;BmOpL9F(V5 zU5decEp`L*<@h%5+*I&=V+TP#)aBivGhTj{jX1;+nx+IYXE0EaPS`d z6gk&g?9YIrYEN0$H`{4SuABDPml{y{vVtkg=un0{hEl6?ql>peL*b!Yup}{RU0|(P zoo}ojC*zWl4}2{%ovruj;iwmp-KQ@U`zIiP*Is(GWOB^E^{`G~wV`IgqXEQD4Q`A?y>jFID^x;-P$k$!@KCLGr%JXJ#!^~piY9P6R+pdA>>PoS(a-y_3xNFK?v}{8v*Wcsw zPRtI{ptqbNWBR27;?|;rEHXDa(f#J9DeLt%n@Xs#}a}kH*$9<=!M5K(h1D+ z0;V*MeHzNo@g*XlWpM^J^n@_UxM9s;g%t@owi%Vjex4`<~H$|-`*m=rLOx>v>8h!^9#nx)IiT~bwSJx$)|!N`a* z|813KkR`K2C!l@4|F!jW%mNm$y_x_{|6Y&Lbbj3S;I%=#nC(}`{ z4D4l;zLyF3oYmcgWfpkz?|JJG|Jj0^XkG4W|D;<}F{meD1?Aags&u8yi5n>8c$_SS z#%3!^Lp*l9Qh8^z9^l zLHhB;Z>I8Xy*`fgvq8pd%z>C#;paoC_2mgA4nNOEkyGM>b{?fzoP(43!jF1Og|!R9 zwzoy;+%LMIp;a5j73wO`r-tx~gp<7wueyGmm*&|S#ok&(XV9cEN^O8T)vpvTs|{QO z4Aea8uR>kJhtjGl;|mv?CVA9?w+nnH^j)_EJsyNYs5Av^17JHJ@f{e z6KvOVZign9<8#)ntnzN6CZ%Ee-bDpQiQdkYFEdW>Z}24bYdi+M)cX27tJv=5-5#&V zRku!rjH-jA|4t}TQ!;V4U*FD7)SFZDWrV-f8YI}9Dzhw7oE!nFJCR}AaDBbxO)Cv6 zgStGMfW6Opc`CWwDXWdkw9eH(*1O0x=>4sxI}mG>B}0)9iu!y)re(KQ)$05m1LX^^ zxjU8EG#8w>p-^*&tGQ;$gzc6D(!%B4S&M@)As=uZeTT?Va}btQ^dM7z)~~ORm3T=) z@nF&ita?@F9J)E>y!qvh+p(|U771G@QkoO}U6wb!xa`cX!&ql{!sv_5;xUXU!#2-^ zmpLP)R5L_rs;G$g0ZfoD5b|l|TU8OHV^Amt%rs@k3mNmU5Gw8SL|D!)6oLMH<0MQC z`KPDeDs)%=BL`^%Of8RBfU#j~;kaN_KI=wfk*L?jf3<<^I=Y>pYbS4vv) zDQeGHpt9qB)vaF5E?m)~>TIvCZH>}{`dPQRF_$%iufv%cuYtggdPSN;0`KMaozSh1 zr~I^LjYz%d68WPXX+f(g{1|PU`o{r>L@$2cjuw*>tVrSfC3bBQr&6)Mc&@nx;?x- zxn~OCS7kf1o%Yw#I?!W75>m{RYPD7FLedF1Zr7}MG2dKK#?z*Y zPfm-yeKbJ>eKeQD&h4$@OXKyUb`}G1)IPQ;QnV2Y-?pw2dpA|-G`7M@n<~ThuPO@V zHeQ}N1~Ue1@6OD{MtrruK3gmHrSGAR=gfces#~GaZnCKcs)fh)2}XRxOLM)T`Atfq!4i@UZya%cP*OR@ zoa6Jx-5|sCEw%#=flt?3u4s%pB+&S>Cl-&lvANRg%s_o{<;aTliuVD$tG@il9sIXG z8aT?V+n_oe>$WIUCbl+K?;ktG5E%GZgXvOLi=!XuWSNJ8;vqN}Xd&9E`y?l(N|R1p zr>GD?E|!|>u87hpKkthbJdz+D#?cJ?pIIe8qil!0SU|r<9GRk_$k&4);%?DE zN7pKJ!w|i>Sh0VV){!Ue^=lZuJGW31J|%~Unk}IhCGOANPjWm%ee$0Je^U4se}{v9 z^^OB&1r>%qZp-H6t}+N{TItw4_{Xp`+o)R+jI?@jC{mIb?DphvAtD6)+t)>nNhyn< zuBNJ z5m%e&b%Z`NzB8?J3cK9!Zyk~H*UY6&N%gPi^6^Zq5Tgv>G0K&$PTH_&dSqnY3$$C5 zQh6?AZvdr(h!OLysR+K=c05HCe`3Jj(~cC67o8p&3v`LEn4a_#8BMC?oz<9FmwwVA zMD&}%51tL#$b#TNob;DMw}AW`wlK!~3gvf*`pNM@-S3AH+=v}{eX{BMZ;ansKCAnC z(5>?1KU>*J_vE9Ed@+v1!;*dgT7gXWSy@!oah8HEDJhylTBZ1 z2BVc9R-gVbzM|PVEKX94=uS(vu$#n@`6$!W{2h!#EJ@80tGL@p4tf zWCR=1eHYcTbw*vE=eyMd(zzp^_3-d8KYibkI++Gznh{x=_;J?Fg^Szee%FMuLqGRL zP@;Sh^8Mjz&LbBCSn8R}2@Q8XuAFNIZ;NmGQe?Q#GUC!Q9@y>_X;<(yD}5Vcr|?l| z*xSaEh9X~FZ;PJ*loq>@z?{wyT4eHMt~*~I-@Ep<8Poyd1!E&%Z<^{#^D_Iw$Vhkp z5EF_n^hP*tm?TF?S~yJ|N(E^Sgyb~m`{lA5uF96zX5KtU1Q4F1_KBDdY{tK;eW806 zYM1Kts+~nc={AWufb*Xm`I%38b=oh?4lCXPdas$ctt}bT9&2g`jp(_BTwz-*bU&$k z_v}5XhVtnjrDWxrye=Qeqe>67dmxR4++H%Juj+WxeleE9VZ0kk)qkN)^^)Zw4zDF0 zN1BR4r8U>@z|N@jq^KzPtX(mpde^92Qglp+(lBzvp3JxSg%hPV5N}>}XA>qn?bb|* zhnAEg;8o)vNU_Z$<9F4mGqwa$|}@?}=I2s@A&S`2#bkeCy9H!;Vyiy3Jgu>FMXug4c7+#7}`q;*p~ zKNdGH#I=miXO$1Ba{dvMiBAW2|9r~3X!lp}IopLz1GW6;mr0e1W8nFGs)QTb2g{eh zTv&~h0s1xYo)Vh@Pd*vqeX~p|d6>dfn33nMr@PJ|mcKdWQSdKNn@V$6Z$fIs+?J~R z{RgnSKE4ZcOhY`>fihm0J?Eh{klw6!9%?$LVKzKu&@A?q?9{|zB>2c?nk$lmky`rT z&uc4g|49X93PGP3KpC_g6kOe_8s&T;jrSwv@5yA4Y-*MHna|9()BTDEFzy=~n>orP zWu{GWP>{?10Q+M^Pi#Y(^)tJk=_^-(Q}_BuSJavuLgUApyXfK+#5}0|~s*7of zi-+2;6D`6V`STtiMvDA2d6M3(>pL;49}lI30NI7yH@yJ$Bx!mLW3K5&Q_1slyv`%y_m`l{3whiyM>7w3Z*2Q_Y@`5mXbi^2Uv!*mN_&?1fEF*7z`*p8#9Rpg&Jt_G&^Qh+vDH`AU>s+|z{~*NQet4E@ZN%s$CcSVjAOY z=N;&^V=we1BlrD(IQC@7>7t~N()}-IXx{ykryJ0CV^wK1@Fo%1qWjA%OqZp^3bz+j z1(QElS$-}rdiefTWlRub6;3oUewh9i94%LaYHvc)9R>++Lk7d2YNotKvGf=py=v-g zR|^8L;J?#p?x%#KU@Awb&yB`Zzn-*8s9PF7GK%D9Vj2l4ejyUQp9txH40iOS#{Md9 zbc~9?MNgGJ1}LoFeAQ_$*lgQ7I`dDp1_e1=n|Gy1h&(h2d#WD=J{tdnpxo9=+ULNf zgic2?*>k4Z7g0p?K!t2s>n?I8NmNu^)HVd2X?@{z%=9>MjQH1=mE6>nC5k)6aLnx= z+}LezMgH_N8=i=j^`&HFHH{<0E3M(<@tiR+y&7A+XH)i)J|bniJlumW4*#B*?wsLd z)d>Y*7db2a<9C!Q-*0TI^JVwtcZS@Zq%1dzdoL1Y-+W}#wcAhYnhO=>5*s#9-mdX2 z#kVE9=f2fzI?@@>2fu09u>=i7rR-q(Qp+7#8eK(NA&ZY2-+Il+_6(J!w39QZr-V?x zs1!xT$owBgXTcCf+eTrekp`tpQt9pxP&%YjK)SnGx?4a%1p%d7y1R4ft|g^eVp(9> z_nYqzuro|N^PF?;>u5?+9dzR01(l|%WvyB$`CiEBclo(Z*R9+zX`5k}{Q z+(n91<7q!|{;m6=2Kq@=&;dk%Qv-dP4SSJTuISZLx$n$->Gkx+s;ML!xXp%*xIfYY zY@KYT6+u5%7YpC9xOFMOOL}If-ZVQ?gDtILTz#ic z4+>JU(UWSkee{#m)mqQNt-hxt>cn!#WvNg+i0~f>c|wDt7PPu>A3p5;xRX#V@Kb`p zu9f7jG0{E632UT|!>j+CwD_LPI_aD+Dm)mKG?@`HF({^i7g8|v77zx4byC$uEkv%& zl-##QiD>6Zc%jnyrr8~tLRzm&Q+f78A3k1W((MLjOd37QP|ei0K;=~o)Sywsm9CQw z4s~^|`^a+x+EzBU?S`2|-gzN<()EZD-zUtRf6&mB!j zd^$@Equ93)&n3v8Xi{Y+F*fe}>2o(}(~tPWzaFz;fwP*O|2;+ac$u=KrjW|v(vEh3 z`(T6Y0$2odDDXgyUvfRb)nvq|>;CCdvWl>aU)-d5m@_O%q)5$?p4%&^{-xm;(Lq>q>8=31*c}_D?WUYgxY150uX3dlml6 z5}w^q{3G8piPDt0|2_{vLOyeQl;Bg6u2zndq1}vraOgnpF2$Ey=Q_Zc!%S%9&eXcl z^Z^BN4qNg?*`xUM5}F=#ivWmisY-KhAeVQ1)!#TVHI2fvjQENZ&iociu`PNllIe{lZw zT5Qi(;5=!;_t7@o8F$S;?z(gW z3wo>0dk&q8O$*}zeE^da_tDZlZ}KwEkK=oeVY$hpq7yo!z+Uv5BP0j6&|0>>O0F| zb`A`pMj8YXUps@@Z913G#dy~xEFzd%zt=JB6~yf}B{mSd;%oHZNzWcqcSbfD{zmcs zM~*FZ8u5B*GWl6U ziHZ#Bkz>*%S*O|yl0M~%q{S7uPB_w&n9N_=74X~s>iMR6ysJ6G$4VGd$k*tOq4?Q7 z9GP}8W87!sKwDhJDEd2zS*h!_;)~E1N;FI!V2a`Z9En8F;N$AIY%j$2wuK&1mFanc zI?@G;Xd$u`Wp3OS4J|64l$FQzq7!%P@b2N#2bWM>dM1!n$M zL*iUDr6rE~^=l{M!O!jL7z-rtlo``SFg{Z%>uX~(e3dOIx(F5NX#$T_)~;p1GNGj- zm6wg}@C>u8V1ABu`#(RoHPt@a;U$ufbxYXqB!4Gu9aiR4wmO6wjj$f7hIo4Q-T|oY zP&s;HAkq6R|4IYkIHscXiESZADpxFH2R;>DeRceow6u{|< zzx~b2uf9Kd1a!qvN7Paxq`b`Qd#+r2`VwogLnh7G;oIeFj>7kJ;!yte)!;uLg_p%c z@{-cP*l51$Q$x4T%6-=cC>B`=kQ*nHx?rE4ZaU3XXlAHlPhD*9DBxk_ud4RSplvHe z^k2$fZQxX!tZfMmny+~hN@FJu2b z)Gl;=8Ts;lFCPmDgy9+|{3Uurux9fW=Y()d8|KWxb$s$3CDlV~&h*F2R8jrPuUD7& zE9Ab9hx9pHNCg~r0;ucy*s~_6ym>3sZ7_AUp*Ij&8yN0-_t~yr*g!EIDM89;)q{!60>TI zGkGonKH(GO@>C@)UV~DbdUnEOs6!phlm9~@l+w6XwYe^>I>u=e@n81#Y32z>nsd@y=WbPGLdJY9WbZE4+)1JY?IF zP_OjYJS=%cgi==0D>bSrz}6QiH{O|Q%F2174egWfW=BZQ?=IlOeixgKx4>EqOtd8D zF&eY9$T+At0^(VB3g|G@WAcjVue5D6QvtwC=xPp3uA$RTRm#$w9GjR@5E5U4@Y?eqIX&-XNNgCg|na;u9)zL548YEm#ru_qjCG9v^Er zx!s>YoqphP@KIz%2w(c3B&s}NTF@!Teo)3qZg!$=Y9hTYZR`o9*~Bp^EMQ3YlQ75u z*xMY!b8N1F@H)<3HQd(;w`z zhiO@cPoX;p`H4#z{u7YY-b8#wZPi44)+h5t&cxRZzqpxIos9T$CCgmg z6q)vo{AD`7m}d{~3d)Vk?ccGLBM5CbX%}08=^x9KA zc+-PyS&R%x+;NznZnP4|46EBMVnVcr-9_(jrPR5Ws@rJ`{D!@@Z_dw^a`?p#ZxSj7 zF=zWE>3e}yu$suD)tUY+vkN6%XDaxnoN0kbA_O9DhsF6SJhfEj*)d3+DEe2n!@Ff) z*`xIq^QGh{yK~N{%Y9Luv46nyiIn*AlFZeI|4>kj5I|}pA`K*&YH&5Xc>Bz1a;|-+ z#*#jDUR4@o5_5+7IXJhHe$RS@wOf~OSoiChA?!3V@8Qv|)bSY@qlt8srgS9Fr|*7) zFpJcVTK$(t5UG&2jTMj%yAa#+$P2?4|D-DkmoomQQSdu^6O>LaaPIFHS{ZQ!2 z4WuW$u_p}4(>epv*$wtRv2PNbcX8c)!Y_Kk7M*@T>p5p;!u!VSsbt?<#*HLFJ|chy z&*0!5LFPF>5ZXw14!tsm8Nf^Trl|Y!NSm$>rt8Y!z$8e@CPTtSfIb8-gP4j>on0`l z1yi|KY=_E*Dd#yCxc`e~t_%sOLM9*YM{02pd{=C#2oVTJB6L@i)X81m!OuRcxo&gX z7@LPnZ;>JG<%5R+odRXF4?R}s4g^ezP+Y!si-m31VV3UV``%g}5@N$+OmY&F(|)i7 z1--GoSl9au`$6zp8K|f0_P=870Mg{>L(e6{k-I%{bOP|iC*f0KvysZpe?JWnN|Ik- zT@1s!n{@lDpBp_pi6$5)I>_eUKz!*Y44<2Oc5-CuOQRaM*U*qSzpoUgCw8NU%hr4g zi~R3|Dhq9jm0wK>j`z4YSqvMV&jfg0;xge`xtFO`(i{FChJ5<;+T4EtNwiH3cN;+x z`CI$DU4rBxqn(xSblw!Ean-Qo3PW1rQV4-c?(-Ic();gtgi|T-+F?Mrc}I>Qt?dQo zC4eZBD|paMayb+t^;`e~LE(q8lG+ZwA_nC8PCl^LeDRj?tUZIu9OL~$dbZ3qJ(i2# zQI+~&LRx-ujw8{)J4Yjg+20ESRCwR)UdDH7xpAz0_Gi3TlzkPHx+Vnh$sele-e_L^R{ zr68$y2}>`W4`uXT!!eh!U+rH^c(evz-kd65FtlV(y0q%+q%B8;Wb&nvNd`otpyU$t z?5&4unK-|Qe20gEB}uw0qoG-?zzrIEsf5Ms)sv!VeslUzAl?&`Pl03yQ7@H1>HFyu zLKv?~eoLC^4PD0E{W!BtzT(qRT2b`?TW5wB~JFU773RoiAkY)=OMDHj}- zsX~Xqa(1fou9rgD2m8pIhA>UZtdx?q+YuxI>V8so9gzxE`N`pHxspRkgDFXTdwKev zJ_HWTwemc-Bg7ykpQ#P6E(gFwpN)PSHT|+0xOeagnD!H2+Z=e=Ec(Ex)w?Hj*K5J8VE?$_MC6)%ho-2I_ zrSOZty24^RqWg(5GL#!(_?tei*fdOegh}ka1byR0-wuI;Dkb?uAH|4kOchqNg)&wo^7XabIYbDt zP0m?b*U`z!gTMY^#(4e1Mq*C(m|ap>NJvhMc=)vD;w&aQ$K7RLZIXEup>G^W0Q~FI0%%^kqWPkgzhc&>ZyQRFaZt?c%;>B18 z*_lCPWRzL1U*Y*r-cEsIB)f_IVwfA>SHy^rdYp-bsK_*@E?LOythjNoiY}|IICC*@_cjRVUcZs`P z8f%P9t^AFHzv@P+#2fZ^+$Vnp>L&Yn1yQfAI+EZR(y#{x%itIJAidl*OP^{|zirJs}P$$_@FmW-Wtsp`Ap6TCeo$xsr zKWk8zyA2FR+Q@8t!1h%kTRF@blm>bn4h-onK9!IMJK&#)yOr_G9 zQ@Ed1Mjs58G%c(6sVmR2tK22`_6Q7r!FgPeo=NZ38L}f0o)M6ZUKV}tK>aH>rK?18VEYxH38P9 z?ia8W-~9G#vtV{ze$!2O>{%N3X8HaPX`O{vyr}$&W(q+%`fp0~W6cs4l(7fBc(^bB zfLX<8eJiY=9v8ByqLF`kgic1QiqOcy1=U{c#Yo}>UI*z_inPRqTs(bhUF+qG{g|oX zF@rpN%mZA}8n?!-Ryv2%rJ|Gh$(I_Sf1a;a45i07|-3e6XZ=Ra64pPA0z8tDV2G{ zPomV()@%b-+mWHqa!hJ3P5AO@qWshkkjO-xm)Roc?=7sL<>$80h`O7CBuYf7b(eFp50RC7K)P z{E>`rHSyk$D#Fqk+4mA-Q9?d`^h)0K;%dx*$DFDqqq(?AXxSybVXFD3lM78Pm0S?d zDlV={GaA^f#ar1>rqkwl!p!mC9A3P!Vz%%BOg6_pyMkL_Jm$IEZ+=F@-8lw-&wZCYe2#$9pA2B_)zyq2b}BwB+&OJQk6_U^S|^| zc#)-x@{Hp6ga#pxl!0KtgG&8Fl*Tu)CD%9qp*?_ac%A7~j%18yB2`WGJ;h8 zHmRQGRGP4f_?4Kv9kdk`!=7lQEpBG3otkz!Esz@nvvmcV6MlJK)elL$lQdnbsmPyn zx6~k{fzgJ(bVvp(j4%~5Dc^ia332pnp_Rc~p34(W;_GigxtU^;cpkD)@TWLdSy$#3 zMahj+w`TFn7(Jpbqw*zZP|OYEOZkQ;LGZm+__?lI+87B0E9&o4q$Sn8oM=a4Zz+rB zy4RgJuawKi;++4aF}@j>TKLLDWbk5)vw%c~M#dt44H$FqfVS+xeqE))f}bl88f#5k zKRnft0!!RN>KvNQF+9ATb>P=?a=45i>aiQTtxd!B7ut-nLe9SMdP-QOsd;mw)=q(? z?;a4+tf76O(l1R;sOajhG=ZGPAe@s1kR)-m#~hQrDBlmgiNNi6hhs|gqIpBZqa*3Y z3;?o4#t9~K3~gVuk71$8*J#w#;E(GC*1Fn(kozrobT7Gs#5 zjJ3KgJ^Lq%$1kvAKnvx8`NCg7W03eJyGOl1W&>VvF(*J<433u+a0+IA>8_-oC(KTO z`TjKAEcEGhXy5zpmm!D2yQ4cip`U{G1JW-2^gx~cuN$k`1D(+* zCFQ-GNQwY5_}{%@YK)Al`HDFbXZeW}fd#ES58JN4$QEvbj577ix!^A%emeim*-*s) z=H+ngopm@{Qxt9!$6g-SjRD&G|7Mp!3m#i!pX>%TfSJwQRjZyeS8vB66os=<<%eGl zNQ$T#TI@fB(S(O5-QsjKL6ZBFwK88=jO$tM@E7@@(U1pO;&AB4uEwjS_or<3N8IAhPU)? zNnasaK7M>YYgexx`jtbZ%<{WUK&g;cpC%2#)Y1V0IxQk9Li;3;9FEz6q0usqmw4-{ zb}u_GcZKs6;>wYGC&az!^q-2-h80ziez*JnmX@%86NjMUB8Ie;yg-W-N9vroNQ$CO zYAcj7nwxd3V?^14f#MzI4i6AfCJ5p1clWP_xTOlZksNa#kn79{Db;dK5EM>E|L_zz zHBlLBef1^DN2>DSQ<}bl5f$WHYDe0p7kF9b=qF4Q9l0gbd_Qp9*s=XGWGPQ3Mv5i- zjEofCd=m6Q7xHOcok{LE36vD=G5oeGQvOjnGb%Ql;;2+}IL!)Wj{(nnD1;Cs1Dqhx zc+mF|*hmQ%?z+W^VZhHYmH=DKU|7&TL7eiB4c1BgBTt|I)t=?XwNxsU6OT zzjaIH_6EOJFL#ip1PK4I@Abo~#F}1BR^9c~{p*`y>r=zH2LOd9%wxRMHK^ zh68W^^fxHHS|)sjPj!J_!fPHusFAnuM_zC!?^6+tfG}41?TA~g^xYrW?@T4&T$u5Y zbkCtGmTT`Uo<=4ITP9AIA3ibWilC((dcRjCjm#5A%1rkFtc0C)o>*goj3-|I)7>Qf z8`kP-|1*c|nls<7uFPDLegjEssq^}keWU$4(iG7zf7D3vSljv;{IL;oTm;eXNTX z$=fLq9~79OlYJ5fIYYM-L)hh^u_Ntn@sb8rTaGn1rmLogYa6C2bM;j+cFM*q+;YFa z6QNK)&5Tn6M|chN=PDzx;a-qh^pK4$TKJc%Y{+J6#5orNw`|)<7)IVc)C*d9h zcKh>u7s9cwD(0H&l9&sbdT*W-SbL5O{C4zn2a@qv8uO>jdW#}FK6JVrrN!xmN85qrWeLgz5E)vS?PGguVtOrF{HOlP@;aA(QktA&d@mijOYK5eQk< zT!csCSlPy~UMqOQbpot4SE`+vVgfrAw;Q2{b8MeYJc&6GcYO zo^(Ao4JUk#C=vj=3wAngtbG&52rm6cjVFrbf26PbY*5lej6GJ=5I}A+GE?#kB$05H zqZf(Zz94rh@*fJZj#gd6o75jJE~Q%^C4U@8J9T(OJ|iWTcN^=(T`mf&_vt(yQ+lR* zfLHboFepU|x4aw-#pU@tY>IaPX>R+QCllSkGQ=Du~)#_BGDr zOa?mgvJ-(*?R%o$i@ySOqTFqp+lU*zIvRdHEchtUsnU~*S|xSzJ<4P}R)$+scE$=Y z0g?rkBKg%c4tudE!D`R;FTR##$(fe*GP%-3PX#AAvm}4Z4XYj+gTDMG?xuheiS^;U9R-AO)SPF>-_s)<3^*ra&_#WZVzKLuq>qP6ebuz4*u zT2c<|QS6^JzPo9TQH@aK9q^;)EcS|Axkosa)IsSZE_hfPsQPX!ZH!u0#9cOLNE_|) zRT^!tbXypxM_F9L!50&+b--fB>$TM&p>*lm@4HPsXZT#wnYa;dGu~(_`3&TJB%9#20FpS6F z7gA8iuZD@8_DYEf>0gd)V_j~#oP(50z=B%bE7mQ9eSH|R>kED~TH{Yx;-p$o)uKIjd%CwG8Xgz)*QT2U#st4+%M~?nmlfGD(6*+!BW{-GD$Se#H={J| zw^8HpSJV$y3YPDOmmt>XGtZ7q-7Q6%O6u8BT(i=rl570^Vn>ChXbV=Qm>HU78EUI- z-A+0FgX7H;V>YV0BQ(#uSLqV>6~ypRrMz7!Z3&a0F+X=Y^e+vOVH0j?`2)+xl#5Mo zYsuDCu7-YyjpU%?HEvO_ZtU2mAAsBf*r!leKW3pSGtM1khL7&sjAGHXA-}Z+ zMcT&Cejv7;=wGU1AGsdG&1CRJi9!#ty1Gnrc)iog-9pB}H+@I$)%fEmx!*GINm~)p z^D6V2ak+_)F&#A6OdJ-hcoBBFk$<4?;;k8{#gh$utJ1>d3Q*TyfW5}8*nl{-%J_Ps zHm&99uyFl%9>ITyaY+$EMkq(MBq}`L5WkS~a*#pSEzcr&t;b@R53 zZGqS6c#&Y5{E+VLTzOfjEbgIy!qr>K33XRlghvZg!ByOlY!akdC> z5Q!Z(?^^-f;!kFT#AXvP)l1apqRKDU*%Rn5X0jaOr8IShj3awP*iDvxAm5k;Smzq4 zEd48FdK$%b*HD~V@uAVK4L@&u%5TzyGBYQkHu_Ob7Fk>U5sX6X-N(BQcFg!7-0u`$ zeZEKRZ}2IBGurS=;|*OWMUnkH1RuZ-xlW zEZ-HM-xbxC%0&FR&i%rg?lN{~)mV2V`Qj(IG5Mv^{T6LQ@@%~Qc~v1s&yIRn5TFsA zIyi2jzzh99dxPV~?3KsQniJGJgST|h{f_ekW0w23DH}&nPj_jPSI{n`ssdak+{O0V zE{9lLPocQ=zqZ;uICuMH@K0Pwk}D-t%4AP78o#2zC`79snmE=YX14zEwPax@r!$zz zDfPXXm-JL7%oC@3_F2k+S{i!M4#inulPKdze-~AIx~IH5b22pq={tSj{Vcp>O*z3o z{2n%dhm_fuBIf#)l367_$e5_p_O+}R4|a4TakJLMkZ)pR||`BCPBYP3Z9RFt(QrY2A8BeGmc9rx}!%=aP$D_Kg^?EMAutVKUxt4 ziwOOjF6@qQZC6}tG%&^O{cEX`3?EJ#Aqr0B>D?FeBG>+4uH`UxiXG@Tfq)h><|ac} zx}4D~2G~KQ&iRE~@@NKGzXlQEmfR}!-OiVdRHGzkem@<`Y z?a5Fr%v!0xCeiDo@7+{W2PJbN&wY}mA^FjC#-5sgB^T`{H;SaU2dE+xm_%k+1yb3^ zd0#zQ(`}Czxkw+riw`B0^A-MJEr%m^1ot|glC|eTtCX#9fL@5{U=%#Wh(}Q8H5L5K z`4IY6Zu-v*VsI`3cQ-3DtXitW4JO2EE@{49Aqe9 zx2%HZMFd}POal~uFzi=byfW8+C@J|WM%5sWASPwc>*)<^upx&mE6eF&PFpf~V?KA~9x`_agNhG)JW4v9%$&3t5SU-$ycEk7RYCuYJPvMN7!n zj0pBJXqjuAVmv=={bd!+^p*B(5Cvr;IG968R=FtN(}C{piN&8>Wd_0Kv&Uh=Rb8Qc zur|gR`Q3_*#iVBLAe`cqHn4XiM9*qDcTtAG-q(!c>Nz`>lL))2I-P}4s$-O$#0UZ93-LBUfRsjI&5r9g%(jn)sORk zm~3|m=~kz_e~^$yh99fj?u4uParZ6X3g+~2>OxZ#OS@1)+(P?+#Brl(2nF0|XL~jK z|#6LS*BMfmtCLl=4}ve$qQZ!#!bE_aoFG>{st%qI}vo)SE>E^(K6&f zxrt_+E2-)gEJrw#c7a5hWvHpRh@EPGDqq+ufOeSh)HCIV@ z70+<5Mj&~ySJW^&z*Qp`&UuwBLM0rRC(x1y4=&kN&61}{4>*tcUPgdh5v?;?)nZ%u z0YrKw0!snDs%H>d&pDDhPY(WUdo|(zp2P@%$2NnbWzpKRgoZU?ZS4HB)LrHb6vI-%D@l=al zv60mBhE&w?^PggDn~i?HElr}H=m?-aO662qIrtD(?5&^lREWejxROLLKgXSe{3;FO z?#$^=MZ^b7f;rbWWbKPgOg>C{&MW9+wI#PwyJ=uSMOM}MML2mCW%E|5++y?4ukQP| zW7p^h=&|}ZjTM1r+0Ps*2@b05CMNxV2dkzgh~f%8_Vd)`q4XnpG;qDEOEX}s8;4t_ z9whW<({ZhEUCt(xs~X}^%wCw~5;sV2lrZZ+VpfHa1g--FHg6v{5eSfD7E_y zfbO$PxvPys@-LEH8ZF;BC94{AUz5EN+PD^MP_9GK%JvoggffkCY{aX~wJ=-$UEm$s z(NXvvLI5e7dB|wPIt|7pd9C-aAbdV`B4MEYMchwOBTo$@4S%q7+Yx3v?wBqcDnEQf zA*2haD%f@6KbiW4IpIE8SRhj%-4&#Rnpt$rsDyN2rvKL3H>@Bd4)Q3A+CVLOJ3Q!$ zLazzD*sHZv?PrKNP-7`|l3echZY~Yj=Cih@D1AdEN30R9<0eKW8wMpfQBBYY|`eIn@^z_q1n1Uyvd9jkUsm1>O-Cb=;s51|oGqz7=WgeCl8kQ$< z#adpqW@0bA{4=i_ayn`j#L@O+)rD%!jVp8^&Sv|_lTE87!g0M$afP!knttGpW|@m?$Jvc#*fQk$x&fQ*) zQ)166_`skUp*;n*1(ZI z-gk7&ed(1_BNO-tbUZg7r8Gh4_<=p9M~~Gn%Wa*UewkMxZ~iT}D04W>b{E_E*jew> z=65)0an_ZOjN9V%TKP{q-C|N&{bX*7o*7}7^ei9$%M?K<_g0LPJSioXzL*LpKP{A~ zXJ;)xL0lvH2=VT@+?zH?J3v?2trqU+!cNaalP^qJ^F1Hti{iCluqac1k9e(v#Hj>E z=%TmK;b=u63$Q=IXaj*Ev2I?~_I@y}`nB~UT;>*%_gs5;Oq&@DW`WCd+l`@vT?RWN zXVAT#BxBZJuMd`^K(rN3`psT-x_PQ5%61>D7BKn3l8tbDa_35X^nu&Cv%jbj6(S)mTCUBwik>E-2E;+J4Mp?-Sc^S zdqq@>$x)R(8cQI)()P~JPc(g?=CHsk+lEpK z-mGrg_KtZL!sk6sus1dg8|SX@dmw%7xNZk3mIn5#>ejzz{Glp{EqD+hM6)5M4FW&Z zLfv+-k_A7%SC6ail<={ukTb~0;Jh~GmHJEcL~3@v1CTQO5Ts7S@ulnkp$y2CWCoSB zGt@7EB+6uAc9xi{rPp}GVVayaYkbSEE+R&bLJ>4x<@S#)=bHt6CFY#VWc0XaE_W2O z^%mLZYf-2!bYew&G{v@>lrWB@8jISyJMu_;rPf`oQG?^B)etWC%x4VoY>q_57St; zdyn%fF~gH$T)(0$9-&jHYSlCqlYRD)6T@bWnv$= zdEaiNu|wL)z!S$)#C_~%D@S7cJRuS@e9%$!Egk9IYZP)tfiML?lvCCZp1siiPEx`tmJ}-7;Mdp`Lfbup!GM}`Yw;Rmy z87h>OQYe{S*AlRj@9fAfgQo6EOE~Xf&A#rHlrp7NRFp(xvFGX!+)hA-E5P|>MPyp) z@Xt}np0{uBj*j)^{aEAcixY@b6cml&Ik22ob)CKJxH~k+Ik$mGJk@8@Y2uuT7}0t! z%6;b%vqe~yLsOhXD;vs7#5sHnsCI*A?;0fexH$D2d$*AI_vj^G9L>K;TNw)AwAWBj ze&NrSA98f$pH>{1fV}?)wqr6FoS46p&lK`;?v5;eE9VY1<$&ft9?JZZEi?9y*xhL= zjS-w}Ik=%^1U-veE&qqIJk0I@kYw5^ij%B|gnq9n)BasId=_|^zr*Dchpk$vo4qw6 zbx5aF`>3HzgM~uZeFv4&tf&`!GmDTHl+97TdL;RwZ-ROxdKn?35@#Qn>%u6VQ&2xj z!6(jy%Fp+Fp=dnRI$vWQUtIv5T79X(6k(M`uLsJ@)P+;wmM>oVbg7S+s-lgxurVh? znlnzf65j|;hc_lZOK)^tWE9qq=I9zb@MB&^MsH}+co!uMe{pNyTEm_QU3dp@QLhz3 zsYbK=LP!=ti&=A~Vl0StZ4ta9x5;0k$9fkb0B-A z{>c+-&zYgx#+iAOk#X1=K{*fP1kxt1hp1SL=|& ze9Q?=oZ+(tCK>pX9BGvS);|46Z00x^@p!-cBE+ z6A~-oxQa(~rVge!7|35q{b?A0^34)~CWYSCzmW8n3rzAz`Snl+j%|EkRo3|sfUOno z6U6ZjL5WR!(zKLXVysFT3ptOfGIh_uz@mLf@_R&nyJgciYni(>($#bT%AM1N4?gq6 zrf|6Qbg4{pCb);}wW zp%|R^cUGwov&l1wq1m|_OFLE4w@w>_Bxc)J-Tr=W?leEyybG28t-G$vBZJ-8qIO$Mr#d^UY5{I{==pFm(7oY&bTxXuSbe$ zhtjg29IT}fvBOE%NrCGOClOk4+c&2d!R2Ib*n_7YUo7g+X!zGgqh-O*0^n+cWln?% z)sf(Dm_RJ={+4d6APiWc|_Zr2o55_iIV~b+qm0c*F~4#*3lCZ%aAxtEik)ilEb=Ixu{3f!~xp zG0lxkK`xP`C|K4*z5}anl@RM#McX=iNC*31gYvS7Np!>$7~fssU0+}4CnR*rR+J<@ zNY@B>>h;)0gnv4|58-G-*_0#AC8kZIpQ=>D*D!2WZ_*I+ZPwPD7dtHfs%nM5|F`k0 ze;^dm_`E?nSqBN5=C6ZJiyhV~ehK>4iX6$wjhX&E6ZC?-cD~qh)86=C)P8>Oa5J7} z#Km6ITtWUgbtJ`6okJP=IoHAzq0uH`kmliLk4DGq9w)X}N53=IMsPSk$!IM?rFU`O znd$jn`Uj3aiPA3hc_&v}%8PBHjo|Bbuld|1Yh#1BW&WN)%94#Rq8a(dox5JzK(BKh zPVs?)WRUWk@4?5fa_Qw$K1z@{_OXn<(D~S`H!|)Up0X#I_x)T0=#$Ii`(jrzLUPjt z@pbmdB^sdRn_1MI7drazWFBXYyO>FXy5YA$fYn0Fj&u9eh7#SW%<&y*`ENifkOqZs z9>7Zf!jIQXDvIH=hw-XT!&WZ%l29=f2m(&QOesyQX;?|9c1FwEwgjPjYpaC-Knj-+ ztL^Bz#&@}|onWzjAyH)(h0KAOrJf&CnK>gOK=L}^uy)9X1?svjTbX9+RWU``*= z-%WQ$Y%TX%jLygs986z-O5saY+skXZBR}f{Of9u5`hmAKoPu{PE=Ju822R{7b#cpB zNmkt0^w)D;pd11y#Y#&f5Vl~7rPo>gvhDZ#*vu8ijOQ`*k@+l)K8I#MWs!VM#Fd@! z_hGt-t?0WH|Eb{F8v|>|wFt&2I55qITNp+0SnnG|?tTarHe)(cDzp*mkR{?@J%N8w z-Y9@^@_r*Mt#_mCF z$rRQ%dM4;42b&Qh++O-r78Ljcw1M}#ndT|4pbKbo;rZ$Rp%4KU!)1~&UA!AT0D=T+ z&JV6~aehNt!##>Vq*);Pm6$ti#a;@Zp`qtOBM-&>5X2+Mmm*KnAe$LU56ESB9xW6h zO}oT-XXz}dY0Js8hLdI9pp;$fCcxpmGBB}E8sR^3KEa2cE_xZhyq4*%%PR^t5J#N~U-!5x z9v|2Km@4%3f{jUZI22qB0{l#jMfhUSRXz-55g}3Bb!H$fy6oI3o$YHdjMbc!-SPb| z%@|FZv9jy&Om*M1g6tE~cqz-n5$?HpviX&V-H56=6><&$SyC5+?L70auQpnKMy`(! z+w%xf693%Nnz6(+5!3Y<>SK#|H({YppEvLNe*3NSz0DZV1Hh*i->>>>DDIQ=M_LcC z&I7dXQJ0^;m5t@G#t=Glb9t&2fAOl(l0KV>C4goRD|IBnfF;A!)Fw;KIs%8qlWwu?jl!G^~3h&}KD zdcGe1@s?Dfeq*#P%Ot7%X`Nq5D)s?(;y5(?=aq}~8{ND}_r11J!04;qswX?vSCnU> z?aj>`as4j6ZB|mHcrb3jf2VVUO0bvGQ?LiDN$(RUe9&k48`&mZUs=%{l>wnGiOTy@ zBzDf#Po5}%DdL8+ty=VX&5@3~qZ5i*N;{u!%|HI7B~$l#AxI)fAFbPKs|Ul|h5N~5 z;oQ5LZrEhKvTX_1NmVIvj5%pt>vd3_GYXatP}-|$jw4OyAcNKY8Ym7HiRHfF_P%@x z51CM`YwNO@SH`HqL3ciQkC9NohGPAwW0FmqlAy3rtRdwOR9Va;an1qH0&7GQ&XIb~ z^7PS>Nm;3rpDF=yQs z+uu#mv&3AV@+sN9dFfiiQvb}vc1BaqX^#gkQVH>bOTdJeLYPm?Xj0ZLhqn0J(uWbe zwq5hpmI+%7?*2HQfM1oHC58R_(lQgAQg3}%*8=}q_U0Hx}8)d@*xzv`}256KCUp!RXr^%P%Ya`l&}mK@sX zGf#z`azb)ZPL8{Fxk`kX^mk;_L*LU|gz=T)IdIk(RZzc?ZS<4mU2`mdQb>_L5@UH; zH08;J#%^D!U&gU% zLa7{t5>=YMcjF`Gpu&Rq2O~kszqJ_jn(=( zv}p8eJBZ^xEz`g;$qwxVe zoO|plz0WF-VM@2KBIOS-UY zDV>&;o6Pm5jrJO-h>nFjAcO%fD8PF-u$^dV@ZK17?}Q;vC=UI;Dk$)ehsft1iQev^ z>t?et$1`JLMFe9}&;Xji%egD8NRT>|e)v%_tdps9%%}2nEtgFEbNyZ3p%J0GSyDBjtf?4h*gF=yDK<9v5egn#?Y4Gy{B*L7fqK$j#)5$GG591>=v&o4_T6kDW(o0VqH{aW)v);;Fr>V@Lzz?lmR>? zo(SV*%Vm(s69+tGDaaq4Zv--tSPhUCGHyPWPT{$3JRfSW^iE@P^h*LOJmx-fi0+Mx z%4J^)I2LudpN9{HQQ%DtSPfiyZ9*sEXn0|Vy{*uEp(I)Sk<^quD(NsL{}PPSdb6@b%; zF}2wq$+<=tQ@U`Tq5%0HM^_ov^Gf^p#5T#S;?v4paH%KF(GzdtJ zl7`VpHw*?$dXsR#*zbM*@27owjwg2C_iniT{ z0S5MBQDXG?^cW|6C_3%#O{2m6q3poeKo4J!TA3tO`J(>UN&=pjNPukF7Tf2~`aB#1 zdGs!))P}Fd4nK=3%1HG0+C-=+`oA~*(!i58eQ@$}OoZE@_vF3Cqty35-|Qk~>!-Kk zP6EFJoTzw>8ilt-EJu{zwPaD!PIOShK~<3;?7;6-Z%YX5scoOkF7?+VIA`Kb{lLb{ zo$NSx_kv(O2VzjVQN!gl{l3YLs_ohm04}(xgiOGV<*&DuGiuE4n z9dkYPlGYxyTasYkM{{_D#3qTYrAVr(Jn}%(8?hlbG3w6SX~z@$X+_kAF40yqLYC`e z^`z^zhd1A6-q7BV-fzII0&Q}3&H51w~{i}l&TJD(vt)0p9(77uTg ze$=;l{Z?Z%JIC4t-938}qNQ-11!mZIVzXH-c;a_4%Dp!wx_;*rHHrM>QuZqLkg7fC zg?kS5*1#P0h9&y^Ey}Lw5l^bKnrLd-3)U}IoYo^(6@Pg;R!E%^Y}} zEyDM$XT|1N?#9GSxmv{-f_rUkH8YhONYQ(*ZgF)6^ZKsFaVBMYq5t-%a}3XVsmzuy z!3NC(DnH<4aV<;TmGEP!EuAR&CM(Lo#8(&WQ%i`7j*>~+dc7Rif1O?>3r0PX50e;1 z@kR6btbT5`X{;9W2+LInW}EAHAvRT-=gcz~VzGRIb(LLk3SC z@>E&Ac!mu0jMAPWIV$37S!QS*M(Zjv_M``Il^>;^+}0?k{ob{&I6BjCE3!YC~v?J?Z z*932qM{XRRQiX+ zT-H*E!kx>ADUzuAbhi$}r=DMT|GUOM>B?67NAaRN``f5o6Nq7Ogd0!6kY%QY(^2=<;~y+|B(Sp{_I7@U35MPuvcvYtdPfYS$Us+j%acK?)tH} zBkP<^oWJsp38q6F)W3O+u#1de_!ED%E?!?BNch9;NAfT!7H47ZZ0cz8xq&q2P1*DR z@Uouc10(STd5WCDS;;M8xr7={AZbV-M-Mfalr|jbxo*7)brn9YH<8Njm*(T%qm6AO zsSqBbTb_2L&OE>da`lJG+wQ!NVKyNU>t9WxqRY)F-sCgQ zwY!_LcUkqL1%#(vz7e8TwQj4eIqH>5K<@P(qzfJ`*UJ4(ocH9_PfQ@&DG5kih&srV z-8lJ5a=rfBCqE>G7k~_VHnvqMXrY+eqZD)~%!o+)8FKe~`S6afJBlWXOnx_^!Qm$h zf)_PfzR>QC$&#(&QRL&qSNNv^CwWBHrSZWU_}-=cMv1maMEz}Hi__{JMb7rzNwoIs z`~rJU6#2f30xN3P^)>&%wxWXhb%bxx*dyJwrcPcF?}AW%tq=lu?j@+T4{k1rqz&x% zZ`H-wb1M1K6_O~1y*)5wjVtx#cre?-$q-OI(0}9lwNcaGCoMa0CE_W!cWMa_{&^^V zeZ5r+bPgV)Gk3BmBe-MqEl#8V*{6HwfAH#oO2c~kvl8=9blHD+DJNZ=6~?0#y&szM zRuzBnWOM!Oubk>VgdA1;3T0|cAGB9Ew{%YZ@Ku4o@?Qp)=6t2h!fA}cMiOyKA~9sf zH>C>FRePOgcKZdMalNC(jY-DjeVMZyhfoB&W*6J@93FuiKF~`jfKKuP6d-n&?`-RPm7z z+|2kfny38X?uDQXgfV>CgLg)MELiR;FZhGQK{?a)rsj^i;Ri^IA$przV%}dRy*>0w zY$5M{Psb@Sg}j_mP%A%sdiSl^>F4HJHc96BLp!tNug=$*ZTig#`%wN&99_|1uz^oD zE%AC{*gtNPqOl@zD$t=mFC}O^W0=_ay6>(9C);zvd20lWSr9HPXIIfhbpgrnKiPsd zR>I|jVbeCuBIIe#^v}Eq*h1FRodN#}z-W^N?82Gc(+UgkW?T=Ov!r$FlC2mOJZ-nZ z@pN`I#}6+uWQ=7!wQ-nb<}q@GBA&T1I#L?+sB{5UF(F{!7vUSQI*T(>B3*aLhE7ah z-4P6C&qy|96b#X4D0bklgw&Rlull=`reP$kjg<{rldcC7#iZ#l|KSz&=Sn7(_m_($ zBo=rdYlH9fZ;)ty9Muqpu6lJ1^qI5SJ>fz#oS(J5OXq`gO=;C9kt)(=(Sq~z3DITW zdL~Q^P?sO#=2q=G1vn|fWH59_vwC3pT$>3Y% z^2!7^XnR9%TCdQ`5+Bjhl6lIk(FbteNlADpSU3)m9Vv0{|yBeGTr`wi$f zJ+Z)1f{en#Tx`u-zSeWX1Ua>aWLEAA&L}y0p}PD=G7`eut2+SF*8$-Eh<8{~2+*vf zbkCwJnDgJIPst0@)wSBF@SM(BhdzHuQ0&)AlOAwA-3s}x%jmwb>$7{Y)5F;VUk68v z18>$08njHjcewHJx58uhQVplyMPkOnr(Kts8V3zN*Yr2n1Fg=N|0cjbY_vi9RV;!j z>0o%9CBv~21Ef!(OQ)i>%-5X)4Gk7S$~#2 zXTp7cN!wdfAp9cKISNQX77Lp!Vhm9O02=?H`W0mF=92~7gy7OUG~?&vdbd}ACubaQ zQqGh0K|Bv>P7@Y_bAXFObE`ab`xZD#G_ZlU`dCjiNkntX{i8zK6rDMVs18j>A)#fH zSJA`FyN))0Ph5D6$+N6@nPtL4rLTY|i%<+(f;3B2?Fc##K%k-fE#=M}!flHZz%yG2 z-}=_2l&T3dcJSCSpFSvjoT|!X`q0W~&rA7AhXc|_2W+Z*oFsH1Y!b!cN~b%&=Ll|* z0Ml#@UM0gGe~a1=_F=nEH}(X7J5&LNV^b*!5ri^YHUOvO9NKidyCh)Kq9tI-bDd&2 zSgmz%S!956FBkSxVO|2}od$3pT)LSz4QSC#?ODBBVS62*Y^6h+0 zbtO!_XUz3;j7BS}1Bnnhw9YcYmCWy+50I6^HMn2yPUcb_b-jP3QyzS7vSO_wvsd+R z$f!c&PaYaXu>JTnbMIBIGOP+F@3c6$0T8QzTc#^<1tgf>RbVMlb(*o38o#l0cV(`$ zpv}h%8|yO@Y&T5DwLeJ_JpbV_au&GQC8aASHRQN(?Ot9fcDGz0u?%!OcG+ld967)@ zF-Be0N-ks-{V;W;`O(=xdFAI}vaVVJ_Az@YtzDY2+5IuNZx1H*8;?_zi(quubE!p> zZhd~%jnHXi6zYj|>{BY?)PWr6jeF8>ejxkANIvoD9}Jo7EX5ypA08oEIOtt;elxdv zvXd~L;+6H$|4E->AR^-k<5#MoVEB~DA)cx(^4oVdRCnU@LhgY!F2 z%I|>a06`T=^M80$2PYB$BbHXedG!0cS&HD1I$nrXgX(c;cs)bAxHefLrIfU9pQ1;Q zeOV~L8zYpMYnjx+kz1m!_J?N!zx{+Rx=AwMXSn~uCx0Y!t5W~mCds>ZB3}Ntp4tV; zK7YA4269ugH;oZM#!9A8083k<`-!t06+&6w*m1G$4~CZ_NpE)Z{f&fIN0F|2efgV$ z6ZvRGxlsNc5JpCNN3{uFgyB28qRw}uSi|%T-`J&9HIG63$p!vxf4Ne!O|<^Tsu{E} zzWoG4?7qUk4e2jh79ZGZ1s|W?8XhiznOurnI?&WnaC1M_^4ah0nYn~THY?s0Otr8x z4ucQ-CBoyWYB^XDVE6oh^u1J~m)ylM-u=M74RMbMtlx32yqxUx#?{}d@cr#5I9@9h zFCf3*Tp0tesR22!JHP0zV1Pl+o&$~z-J0t2Udz-J3U+L1a}f53UbPR_h+nNDcF1{l zMl7~2yja8<#uNrj9^a&_@&UX^#vYfm7|{^hY{XJriFiTla zrDU&tU8jpo0Z_+v)8KK-W&gqNr}HrO=^?Eb&hL_|?uyF4!LVdZp~^KTuLl+Nr+=&G zs;`<}l=nCE-H(^h*WcDR#KV(08RA-Q$8SpR(qnj=D;9uE)KBHtK3;o|N8;7oIXn|S z@&G>-Iz{?ni!DrI=awmAg~yj9z6Sr9{F6P|X>NA@^Ll->WsL8}^F!fM@%$)h%&T zVuM~Q!;mtPfK{%*zFe01^x2P_-awVItwf{Ou|wZ!AF+XUhE<7!Z#iA|oaByK-hT{K zaIi{BxfwnCpsN87ndFs2U)HC3wqN9BxnW(s+_J`msg8K1QlAto8-M2RL@-}Yj~6NH z8}6{Lu77bSb^fUy?^Vu9NmuwOG4UTWm*--ZVA#=!b1YV0BH!Bj7OElEU1H4^Z-w^V zXpYe6(gUg@0nbowBU}TWqtb9{{YHG z%tXpah$#0j8ec2(^2<=$%Ld?)lpkZr(^f-o5Qfr8QEnm5cbuJJQb-cSa73PXYi&;} z^*0cYkeM$zqUfQtTsfLIfj_!4+32}T*v)(*&3yFqZ~HR4!<(gWyXTVf-p`&g5pNoB zD_jxY*z7dq53W9c^(%7dG&S}4hR1z2-(hq?a=-i!f*XU0GOPBQZ#%l&&py9--Nam*;imiR6P zsrzb79Pb_%xbLOvZ0Hqu1%R2^kGTc ztWcQ%c*S-3@)*$mGi3f_90A2B;JdNx@yvaRrksP=t0n@0H`e7rS7pr*JQ2D939n`w zy#MfQ!$8`2y)cVH?7Yw>E8$o-+gw+#&h%aK2ni9(zA6Lr8b--#2>WQ{^goH1GkJ1aUw&Vw+&q z<4TA1(C_1x!Q0JHI5Zzc#Js*)m4nrwU`wErxsUl)U_Ejgdx85h^v!K^+StI;*U?JDb~T~=QTWk(5w1m95}J~9 z-jX}#TKfvj6X%qOvlE?GPDK3uw{x2R{c&@DpzEq0c(1r6TRyONUloX$s4$tiCCchQ zD_?IE8B1*xetS8@@TxU!!qKo>vVEQ>8p7fLeZ(+0W4a96PyXhm=@AT1&gDEYI7u3Y z-gh{?JaRNsB_4|4;X2R2HKqLvP>LxGXdsTX(oZy@tSMB#)uhCAytAU zd^2-t%JeD&v$=nLFZE18Y8{>)s2;;DgH`l;^Uiq@y~g};i7biLGdJsg$~mp}-+a$L zc2&_M1B9u+SBzFTp3FMVh$MLcds!E`4UGT_C)R*7E!3Q&LHW~)EpYZ;P^S5U$^xU0(kG$U|Wc&$bMv?T-VT8A(avekV>gqG=>onE|G|!*l>px%3XG|-~ zE+}+vVSf6Vfah=MGurEZng~S_y!Wxh#EUX)2*sL}S0qebJ-M@m(3FCI7{v1;E!>J>LId*UlVjXVi zTzp?=XP!#}i;MfORQ@W0Gi!E8ro4XI@-xPbtUXN+$Q0NlkG1R7&uHd8wiJ_^;QjTs z%rRPd{`hd=xMjag}gRlibS@7R9xg~yGSm& zf;?!|kY5LC7nQrQ3*8ef4#f{s!hqyRvRxtT2l2xWpSVO&tKz&0016C1jd!nor*{D4ns&_-W)galnWs_g&+4Ib z8cxDpSi1jm&;Ae7Lqjf<_-uaGsI6B;D=}5b9vp<>m&Vea?I_i_fB&=9Y?1F6=*s{c z{AP2f7nmJ5Ek|%p`uzc!TWs2}^GXy8MM|6-+0{ zF^{k(VTQd_nIkU!2%sld>UWu8Pn`|Jc!3sK8UwOes7L%glMFTOH}KvV6HT@Yn8?LL z`P=x_lK?TS_y1=VIkQtwwc&u{#AsFr*xl?clq@#uJlIkks-h(OJ9}zejQ8n%BpoGP zA{)(RO{dg7zss{Xfl_RlfEC)i^luIdYz02qFTuH>+;xCP%PRs}al<20>m&Ys!1EtT3mB z!a0-(Koy+04%r-U?wh;~CT5;ijRSxOQ)(zCN;d5h`8(2biX;L8F9Aa<2I}cdFd2Y6 z0s#0=%jf1?Tk0Bij4$~miC%A?)U5(3bTaEPg!e>L)ta#~z+2n`Z}BROleKT>t9O5x zlr(Ed3PwXgDD^G}1@^xyxGIjsExWtC+h;(Y_Gj(d7n#lRj6HQ-w=TGnycUJmV`NhX z7xUPYQU={uSO6cy_d>+qD*uFu2Fn=PXRtSC&55CBOstFEF8=xlfLkad;EtTF*j3nZ z`>kkzH7rkfQ1}5x$0)LIwJ7#toIhB`eNLw>T`KTVXYQA$DPew`?H8jH;etBDMTueH z4$KNVatOj$@3Ys~hPIEZ2j&C=Zblr3+o#L~lr~Q@*l@XZAK)Ku2Pb022LlA8sh{t3 zX9A`hw5`r@y;o9waY(q-+dXzAw(EHc*T zfkuy$K}Y&RR#06{)wkLjeKOoPg^;PI`0u7NbCgj2qN6f>JcyL()gEOCd~Ml2`)UfU zf06YW#@T<{JKdZ{_f<`675|?;B#bI7bkyZXI)Nwwf!lV$C%I5?wmW2D3nOJW`WehB z$=F0F(am8R{55BqCC!(l!u)giq4;X9Mp3o0CcyS8EP9RI0Dznf+{1$#`(rffe48(I zI=D7q*41LkVRXgT4kg3(S7V-z;K;J{kcvFdZ}HJY}{R+-n{lWGUJ9Vfs9mkRDaRiG zpqB{X`bAiV2+k|;`QH+#AV5Ux0m-2sadWSixH=X&c8mm1rZcbj6Cxh1xx~1yd~Gz# z+Ql!b3YAcb#N^(RLi#w0&DZKklFda_Bts&51SGro1^M=1>7=)ZHj>(sHKB{PomL_i%0DcInQl*1I00`C zcN}@>tsicklTpFiaR>M&PcT&|;r^S7&;Q~5JX3gFd1R9LE2~xEC^}EgFk(>_3iE0Z zqjxE_qE%i6>ww{@g;YH#5SA`~+b*&)ZQsmci6sws-9mpTlrauCc(j}08CemT*S<7u#<6Zb!KaLoUFqnT>@M}0`tobIJJn^@e+&xWtNfXmDEjA!woYw1e@jK!=s8xUv~ zp=R(Qkxi5X%jtASfDS8RNZY0|)aP?(ovZlLfH*CY0<=E>F{ukA;E*%_h{u!V@M$kQ zs*%R>@!*+>=F){y!o&aYvQy?R#oa={wVJ#mT_mo}-&ny-hHQnZL8+mN!RYv`r2)CF z&n?JL$M443=swD!m+i56np+snEL=u99EIICxL?+(6<6FYwyLBo}dy|oicJ~s?Ui9Zl z{n0R%5M~~(@(tYn%B+){hKu{u8=leD!zmR^6Cr=Df7*O!!&6gkigA*S$Nd?7biZOg ziVLyKIac}$EgiKevpFfWM_C;85aC6JNGeghO_pC^_Jxp}H;5g~)iB~muC*hT!bM8D zAuNtkeT^C+f(T@RmZR_XrCAd4__fAm-?=K-ME|#fyBkh_axs49l!rlIk3PBRh_#@w zN1Mo?Wu}{LQ;U`T!AjR;yPPq9#$}=>%(Be5wF&j0bpLRxPg+ld{DB5hv4K8`3=!)2 z$P@{*g?Gs3O^9gqM3DkRdSZH?P}p0T0)IV8{C-Ok$Y@R&ft$d%mYI{J#+3!eO=S!aBT_9%! z81gss@i4wYp6;WgzM1-A>>UNDVPX`2`k(gRZprozli;Ul%P(f-QpcismR1!rn~RY) z(ognuLPYgp3ZX=nmKf#JFE2wfCK6TuRD^G69DFUMIa>VD0*h+y{J@b95&WSp6??Ls zB^Zfk^{kGBU(^=$RZ9KKdX_Eusr3k5;nf%m+oN z-|Sg?XvmH%4|Q^G6O{Rwc+NDhRcVIkF0AleUzs~)(~`z~Kb=zg=RO{1@TM z8p$^(S8nA>+ZPa}-j|0{e{G|mYu)*c6ljJJ24ovW>re&AYtKK{e3y>L+*Mxl`6=vdIa;!3eU~nc1c+9W|{Z$`egovBQ>$ArVydDhl8{%&;W^Hiz+t>&cx=N>_nUkBu!O4+x z-HzaGmW~Z>le-@L1kx`y_-mid5a`kvv)?WyFWE2Ws+g0q#?%01T$sMp(7igHd}}Wc;Xi?_codZq?`u558>A6>a0~oBu${-n*BO65`cO zmhNlu_OjN*0y-SPtkQm_(V|HVb)eIjmnJC6V;F{YD&LkKgGJ#*K;3oW11h z&1mZC)GyuUlG<1z{SnT5-<`UViMXy0l>@$4#?2dcTJ1!Lmd{TwWF4OcV6_yFm47>V zc0RpZZ;ka!J2Wu!UMt#loJPd%q?$sU9d`8mK7aHAl}r29FfbNM0z;sXRb>+S=Ph4E z6=OCL6%|hE*&kb@YR0RrEH<2^(ytQ>wI;JDDDUn@#f3m_%9r<=>5iikc|+BI$_UVi zcm(}LMlKfeq8-||6i*9NW6_PQW+HBZ>r<+7eicKZReiiGm;50`9i0d^&Z_-fMwJ>? zcG3Uv0tK(W!^(M>NG=cbCY`_Y|4f!gQiG$Pw*iip!w91`eh=s<)WdaLu99LZi1T#= zdut1?t#jK7IjTnZa*`~nWj&8Vk-arn!1tase%Dl~@b1=K&=q9Gx<6ITyAm_%YW_gi zc&)j%i!Jq38SInz1W{>?!F-kr>)|-y>X3EIX(M5~p#%ECvQSIZK!3Ip6^i%kcXM&p z&rpz`Ii58?c7m0?4~Uj0dHoLq0i84y&eU>W7uwmIu54esxBu?!WTU&-DM+VzLW%Vm zDSguEee;eRkG+j!;UCE~ZvxSna0o|f-hP-k_T6tBDV77RHq1%8W;R_BVEMdj!QHA+ z-?1!tlOZ!*(of&ss!i8endhFG$sYs#m+dhmb%-0oUQ3h*^n|hpfC_5);RcyJ5LZl0 zef?@l`o=QEep$j_?19IN;ho)OgzQ)@9-FP5_RO+8lQ&C{-g>6Lj1d$hpYv&=@12TTQc&QZaiQ!}{;)(9fN{oO#P9T^%%0#501 zVK+pmY!=BIVJHUlT7IqTRsS%F`s3`7wp2q5z4c`JV@-%i{Hbj1b84Pq>^NaP9sJb>vJpMO7&J8Xt#S=hG2pQ!F>}z` zXt8cSC`2CPKii#)s7ax; zTs8aExA3t&eWiPTEc=nW7`!J-Kjfz&la&*g4$=qeaoHjWk<8wP9ADptDo#b!Xg)eB zWSZa7osl>wNxL^x1`6Oh-AXzM*VZ>De7%j$_5S#(hH>s6{h{fV=&XedeZY7NDBJdQ4bpLUj6 z9Ckl$&d{H^boOrzyvRXX`%Appew7x;@Xsh+tgow_T!X28CS_$B!M@bnvqEdR47qx^ z*t`|{A0E3BQ{cf4JJm%M{Aa|rWV$cc`6lbDJRV28XmPee=WtfS-jCftWlK1Nh73;w zQ$_MP1ZY+M>I*7F$^dFb&$3Fk(2R1o8iQMyBgLsscdhsE-e?(f zR=y#yjXOFzGJ~9upr5%%{D*f>!5JMfryYL?0m_rN3$?XwM%6lxe>JE7a8ctD=Q2wU ztgAUu(Qpq#U~iX(Em0Q2@eG?}Ksy0B3}ejKa*jr4$2Cv#KFu=kv3J;$i^+j>kv53J zt#k3Gh#amSx`nZKayPix*jR}d;1|k!h@F_KzF5zqn1&A*TgezCK)N0{a7rm;)CwR?X zPIgG}c*x3Vd{|u}9sb}?5aG%jWp4s`&$x*;jN-N3068KlH<5GHij zi9{>aH7OL*qa<+^-&|Pb`1* zpj@**weW*Uv3i(qf#t7U-7l%c-^|tL+}nd|#<_7{5->6hCL`HBd!tk489iVQNOo$+ z`#D#suT#A{*`F4iF6)p$wL=}t|8AXV*<_R38DWQGm5g}R*VS~}3GK@qJ_iC14%pwU z=L9p;;t!dwtA!F00PSt}=Zr&H4@Hf)JBSiT7l(-;$ln{79 zw;1a3H0^uH;{^p#h|f5W$$sQcC$haoet-tB|5w1H$$a?T>Or!IhuRXvAu(0Wdh}kp zV=SwJbx)@!A#o6tE3)tF3uj@>d(LKLyw8&olG|4S*($9IdERT|B9b|=)_z`` zvh1SMQP@|&xAcQJe+2-=_xan%A@_UEtFGU>@{Bq862OgA4|=NMYMp^PL= zRATRHUN|M7gWV`5&S^Cx9=C3LNq_9o>Vb#%5Mv=I`>S_ot?VI4w8>di%Xm*Ll;}i3 zr9{WtTeAA#O6i@;*O&C`b9m2({i%n$H&o3|dkHUif|s@{u#71?rxmr3KGnMBjmNfg z^$H~0lfv4~^MW=u+1@I5#k>XvMC#IQgkF?Z@wq~#RK`Uew%b&{Ko|6LPZYSlmpB%% za4Z0FN92cX^-wCjP|(^wTSdYcy=&W{f4g4Rbk{oF|0E`O*7Zjj5xtVO3}0%-0$5-q zu&Lq2g`~5{IqW-6_!je|K>nOAS~!|guU9mwTH?UfdKl96BlN(VDDAfVSPChF_&F?7 z-u$QJaq!J8pfP$22V-Qz2=4N&0n=n1TW0;m7ITWBu9H8rFP#OUO5a!@HSzx}V<0FG z#$$X$(nl@+!R&5v)MuI(=I&U{?KB=ew!0yg5U8Hp(E=X)e6~tkQDtlz%JPbiSos|E za>`Si?m>pqGM{1_;A-FxfD*qFVmVDX?cI62p|vOmY@Ou z;e|&viF*ZcegVN|yEict7>w-BC9MqK084y*c2aM?BVMnYc##GFHX~Ycch0To-F&es zSCe9af_-5;=!Iy}VmCPMvPvru{QKrj48}Xm;pc2XS+@Pv7}rUX>>B3?^@6Gdy0-cF zU$?@qj0vl;9#l&7tKB{2_q%>aML~6i!Wyh(2t_Tkj4#g4R${U+@GeU%ru(=?6#eUJ zPX^rHqHj?^tIEGOt9WJ6oo$)Eghca{@R98~fZ8}q2(Z;pA1-{w_Fy#JdifWc37YuY z?v{4RZkD8Lv6|31bGuG7Hs>$K5V(v3#$~-(t%Ei=>7RD4Ti0@%gq~2i+x=leCNkrH zRHUc7{PsfUgH+ngxToJtsoGHXS_zDExuD_SJtgsrBi(8p&xfELiZ1It;ct{FI@h)b zM<+(!Se5y#4HLn4Z&TTth~``w-MJ%}B0xb_m>59Pi7P&T~qxTAw% zajkbbvfTcoEAre&VjamfywT)1l2L*>nXS92Vd^^BTFcp&I9V z6RQzIW<=6KHUS%dylPS=d@BXJx&OR*oQxbYdEW@*U7-mZ3lHpLvhG7(za8f{%m03) ztT_0Hz6%;#$MzrIkQkYj+}$7a(Qo_N)V0HhncwFb@zV7}=cV-PK;(0f4ygwId$_5Y^Ef^M80Ur9^bT5MQ@O6XVs-{5hKxUVYJun%;Alm3y~h zDNTn;Ot<#jVie|E7E%SFpn8+6TjM!>9js{%i(Y9|aIwGVA-7}^M9A5igZ;HEbsaXSU6<6;Z3*GKsMl#2|5|pIt$yi)tFQ9L8Kbgb`%u}LcG)TBn2^E( zT(iuI%VYlIaoAutSBIy+%T?+s{di!$b-?sD!GiFkt;-q%9M=LupL=j|;>B)pTX@WF zrOtnN)zwDO2p`RIO&zN4Z@kjt6h@*)1r1Y>XIeVLEDzem%rT5_V%0sg<_0?(=Drub zynerWho47#w8_JCez}~Cx1#{W^u}Q>s$&Lk3g(XGHs{W<~`8ij=fnwiJMhq%2~=w_Wo zSa}@!>ZvMq27*nNfq7QstERPO$I4!tl$LuSsD)gL<#62ui@ge8#EGQjtx^+epX`g0 zMu&q9vOLgx+N9GS?o1_4QHl}PN~A1Es-(ZVEnZ>;c&nfx`M695LGlN};rknOX;a`> zTF(=cH1kP_gKO7YsV`%_ZaS^^N^jtiFlOwNCNa-!@2Y&2p7uEF_TN7HvI7FyDv?hQ z#onci%za1UzR{#TYM6m5f2zGo*CweS6NPEF{i`G9yocop8N+e@rmrDB|7N3Vz)otb ztKw+xU_ylb7;H0V6ELzoYoSn?vvKa)%Z$7ZA{P)cWxYYh)CuRhUTXGGOXV%onQRWv zGPqZH$oU{zw5Tegns;fGs>;_Y%>+C*#%V#%>1Bx9Fo^luuiP1fI2MuGKB*d^thExt zNT^a6MR20e!D>09^zhk)68~>a(r()nZD!#^(AQp$n59~V2D6GrEICTG2w};woY?1Y zOOOy}Y+!F57lCj0k(AE~dG~FxyV~O(K|22bmneX?jUj*+&`CuBIJGaupHoYNqVa}w zd8=@%#O>BpZMbF1t&QT*+WT~3)07XE!D-vSqRBLBZR@W&1#=g#aoW7mX9mFDyk47%32D3lWM;RU) zyB?lyAxK|$QOjdw9H%A9|3q-xl40FRZ``-msZb>PZ~wk&E?X1$H0yZ6D1BmLOoOl1 zUT^=W6AVey7**OgOv21w{cAXtTA}_6-IR{bvzpPC&U4}c1wLLlAp2i|w9sCrH;Ex>oKpB)?{jihEgmL|Y5Y4gX?Rt+ZSyAhN+Iao>B&wS z1);7S9CW^Q9VS6Jd~R;mnYM)a6=ZmljlC}n@8SeOtjWTjTADm}$3k{k^9S2g{Bsr! zez-nN`YSrvu+cWGy3%**jLug<6@J^PwU{0#WScGPnwMnFIdO5R-4T2|ID5Z}<;h-7 z10Hodm*GVw>gD!uF3NBl6omsFFe>>%qZJhG%U*gtq{$C=Cm*zZCMSNf9=g1sZ1K`< z9u+r4cpd&nx{6- z4gyAm&z|RC0o_FxTTxD{NLWriF$S2Z5zgJk0TBbS*z_|@!ndVe0p2gF)->)&+7CJ>?HSAdJJmm!r z)ojhI)5FETvqMnZ=dsH4jn9&a?WoopB(?uX6!-=#I5mu^Z@Duk9vEI3RM@GQ@9GR< z)q)Ux5e*M-Wk|=#LN*+FIa)IG4wL9hW{O?{vLJUX-P{!`rUcPL@xejhU%oX}u4ha>B2Usjrt?ijri=-QY@8Oq zo+5SWJ~a;i!rSZc-_<1?tu!6mdGh`3ui<@iSvGWE-@*I(-T2^7K^b?8YXx_ZY5L*L zaHf%w5&{CUX)vy%p6A3{`03YPc9;yTN9Gdu{^DLhWYaxuCD+cZHwAyaudR#TTn>dD z1OFR|65ASv4o7xSf(aL1byP6l(b6&c$|BPJP9S2yO0m!MfqP9+6_zXZ^$#YU7_N-8 z3~bZ%DKbg?YsJZz*Y~sZ6C=sldqGt*4yEdy_m$Tm+fs@x|-Dw!OQ$tdKi14_08T^%0ReszwBy8EtJ>z!t*r| z?CZZL6^9O@w+jZ?k|_Un49_Z=;c;|u5{3K^p>hNgc!c6^oiOeLo3to_?#Hb!w*jV) zrPH~ifdJI6{?QMErg8Qz(BqEwwY?I!NUvj9T*ZBC3Z6b7_?a>-TjqE*u?g=QUE z6Z0ohQcKxElv$q5PKS+a8!F!1qLl2WUiW^qkyPE$-W7z2R-6!^zyq8tB%kYj9-w(M z*)L2CipOSOqLX8{})Ofs>ORx4K+1J4`0HHm1*$OY=<7aEgZSe3)h)P{R#ZW7Y z1o}_MuflJo;xMbxy>C#hnURN0>zeAoygRqvZ!B4< z<3BX?eIe_Uj~=1gdy=|tSM<$>ij=A9t6LV=^+;fI4xk^6@1$Vq`FD!qnr|86xX`4t z?4(D$MU;M&8{oK;1d!bjdTG0GguXtBmxA7N-b&I_|{>ot`jqR1o z#WW72VbRcntT;IY8ls93Nj-7sf1mm3P(}cdq2r8$p4h2vGSfc0{ewT?pg*>w{?~sF zJSD9D)4ny~{F?iN_OiaVu>GFALpAX}TP3tC zR~;m}g)zwk_`lCe_)_Oe@Z@-C^)`Sccu>)(+5pG=^gh`AE95wh{{XbXd48wTWy%9-q}zHdDfv%f3r*59lDsr#P`;<_@1RG92;i znyh>ss1|(qOF~p^Z!p;v-Om~M`*Z&Q)~Z%IFT*+ad2Oj~+;=-#+{S-0qtt(qQw;|XycB!X=Eqpj*1Ob~>SjVRzlb^z|Z^o|_Qb{L=&DI+U z5?-h*zN8q42d}MMx$wt>W>K?Px(p6-*DN#X*ON>C00{p8g&s8ztXq@sEPvF}R=Dq> zn--&}TTD{Z;pNmsXEJ|iK%s_l{{Sq`dUMFBB-i{)8hP?~B6W#e;bqp&eTfQvKE0}6 z_(A+T1fu3I5X=;k!)(zg=jb!|4{DNKGQ`F9N&IO8L|2H#$_(`9kEdFz#j;G6@vYt5 z;NEy%Xm*kUeX8MudL9QO@Tj$q8fiAs#r>_}X(mIRk^c4af8ECijtR$Kdb@exy%PG| zMGn7Z8D5MeO~mubEt6H7!}j`H7)Og)8(|?VnSmg8!h4g>Df^uWZIW4Qo*TN^Ex&@) zJ7aH`8kYyC1JkMeYeBq3ZGnO>4{CtmZ4V2CKSQ@4iR0^3p|q9;6F-V?&~`FB{qi~- z0Dt<`8ZEV@g+y{S$F)FqRktUCR?zzEH z&*N96yPcJmKLlJL9E@J7lldB=x)LORYfp%G;AbU&wOpU3Gm%bxkPWw9I@Li6PR!0i z`W)^506bJ#T&uem?Tq&}#J-I@403Q~FP}^tk@#YouHkg;y8~;~jlWrxiRJUEJ;qcxKRtT;Z-6 zi0jS*ioa#2&MtRfcs&tRVPuNtR?p$(jCVbMI?n8?v5{%sEOGMqS5H4TSDl#SJY$-< z{hh8feo@hMwAs!wPBZmWT8cEdRC(G>(I>7hq$AVQ=}s3qLnzs!k`LVtJ~_{+rFKhf z%!^OfHi21e<G4W(SPNzN3O{_fXPUIR$oGU@vUsvHt)Hx8ctP z>pHfnX`*SGmYqEJQlv3Q3@Qt|9Ag5iX!I!$f zj%V>csbQsyO(hFkn>hC$Ir0z|&t=@%^#ZzZcz24##+vH?00TPpl^Qae=zRy_PYzl5 zL&H`+7}JypY^{qK_3KPT(nu4G=O-TK zq9nP&I632yP_*(EV3SST6Ry^cPI_mWWR2GZXSY4- zIVRdLyphLRHyzF=;t%Y#`!D=b*IUJ368`{aj{|s%RcO^8Q`Y<`X?1W_K>>W(VlRP{ zft(8PUmSnI68`|;o<1qlucFbu6KXymy}yvZ_Lhm_d(igmTO`F4^2q{@!F-|s#sI*t zt6Jf?at=qOG)mikROk3{Q^_8~Q<6VBejJfN@Pn`Xx-Zqxg2?XyZ({PaIDn3wijMwX%G;y3Nbt5LD+(6)GJ@ZCEIgxNU z=A2{7H#?Iwn?}&M1F)vWkaz7M4!m^Yp=JSagn^ubSEVPJ$pfxA1o2cWqR*J2bB?E< zV^Ns~Nlb&^=IK%I-6t3xzW&sJslzhk2N=&xP%>a$;`_jDIRg~2fKwwM?)y}0EL(nO zJvcOP5fbWt@%!7T125tqjr0$O9s<_>B>1-Z?X<5CX%{-4sdA9CjUBuZG_tT&$zl;$ z;Bs-tTK;4_Z?5>0#-1|Nd}HGcUR&KySG=0{Q@*%;&vS7jmyy_Wkm>r5i6VnQ(?qd{7eeYqTbNWFjGHlZbJ!_8K(E3704^GIh^49wo6fr1V*jP~e% zO3p9sDm zY9AFoEBI^SUX45{cj9e&%SyB|xx&kHIz*)7ur5E&EBb%8@{$FJL={yA)LdW>@UQ2$ z{s|HA{=?#L{1h|cmXWKgh%B{fwP0d?lG$5LcNTNo&4mFT{mun{L2WpEAOHY6cdaRk zTMoOsSmBaV!w52RdI9U2{73zZf8d|qH2(mCpIv-M{h)kT@p$+?NAX#bTlMhpzKZtl zEp)3{C%KyGtmRmi zB#aklF3JESPn#fawg&d=N&uJ zsP!B)N9wkZ{{RJk{{Vt&e$sOIYvA9-Pk@@m&7`+>>tki&&jZ-%T6Klq&m{sy9J9L+ zBxu!=9N~`u99Px9w3qCe@Hh6a_){0b{{V|#8JOtycM;ubV@kbySJUT?DPfLCytANmd(pE~1&+WJ>PG z2u2hW(~A8F{jmQ4;JbRafj%VZx@YWjap2Dvcv|bj(OlhXdj5^5tkBc)LWnw3_8) zxJZ+kB8$yo=AdGV21wv!3jU2_n`?QeZt~oaI`f>@A9|UKS{c-7!&YvpBJMb_7!74frPxI?lqa;YJCsK3sFFknaPMOv4#rYY= zaZoWO7?D;qJd!{hVv{S7Mh*r(Z}aa@*n=n^pyh^Y5rdXuPp>_H&-JBr1d-0h41fkl z1dg40Rd?K3nR@a3Ip(Bh5yO?n0mvYIO+_mSP?l4-JqPlsDKqMC_#$+IFZe09!!-Z^ zMzk9~pV*iG0M`}!?-PQAI0v2uej0zl1H2nIivIv_+f8i%{{V__!g`jZNjSh|OHMt1 zw{ZSS~4a%iDB~&`2#u4G2Cq02*DKD829|W>AB7`>%ld(h&;`?$PW_JJ+ehcxLJavn=}Y>%$* zdixvp&uN?Xbl0@oZA4p5aQ3SW&I$6!xsCIHIXF}K*TI^;ldajjW`7&sDqw%Bv{?h3 zcm3OQ`F$(nc!Dli$od~klRIIh>;6YuZ!WbAEN(PSEI7xOAc2Mte1?9M#<|x-F0twM zk^21Y{{W8Rf%ofNm9@3w2^xKJ_HE=dH2QJ{2PE-xO!!tRiT z&p0Q7tKTQ-_)~DQwZS``9n6}Xs$?D*(-<)OjWYg60<*6){{R!TFEyTvp(L3s1c-!Z zp5s5tvm?@V#4W$Xvtv7kN|FwJeA89t()>dt;oINs*B$YB)h(26ky?SF~&hW_N_f4-^MLyf3`IL01w=#&(CfG1MG90bBe*cwbzyv ziM2cOdSW#~PCfj=>FfB^@LYJJ6$Trr2LqXG(6RcR{c4;-_BvQ?;JV)Lq-o@^!vwlQ z52kRu)OR+mEPHPJ9j8gOjGwjc820 zBkjTI^NgB;`H5tKU$Cqqz$2{}&?@cBq zShMi%tETQYIzESP&KrKsEDhNsB#|#Y2Q;#1ah%-U=z43;+&qhDP%s1>ADI6D`s)^H z?=B}I<5IoKwmwZ2@tDh?gkuFNCFz6PZS^gm_ z1NaPMr8-Dw5*Cj@uo>zMo$xt7Dd+I6TzGp>)Dh#39a=x$bLZPKmiNeDIQ%{8XnYl` zs%?nh1~COJw=!oL_Hr}t*P1z3aT@;s*#0Y)D3!X@H6cpl!`*HMs3fXybaBAyRXoa2v7QKa}cS+jlL z*jE=VDuXNCUwNb+ylzAKVvbw&70ru?J|ixo%@w|muoW0bg}0x?G4?zUT82G8;s}wX zxWCbm6U9H+RZb=fSf^N&f&? zml6El$0Pbvwt{y(p5I&43$)RAhBi<-wbZh0C)<5##jamo#T&z{lmk1OIamhi*yMhd zm8E#!PnzV(7l)H_^fRUx^=ul_Ux{$rw2`)fXTbR}Z;Us5{Mf8oeT}Y*V_t17MYZ0y zc`1>}R~BaM+pgACP~Ry9RG(}F0ysV@dU&8p4oO_`VeGX{YM>YTVIM+&fayrGeQT52Z+I5pLsYq zF?>p&7>9@6YK>HxiyX%KFo@0^7J`+N=p<#YXbDO@X@gHCOFKWR3Y@_{FJC!UlJ?B>2Pd>8ml`$G6CPlvx5yaT6rhe(mPi%muV z-c*}eM6o!-!zfl@B#f(o%mEoS^H-0*;D+OB6 zJ{f+~`p%K1c`5~-z2UgED~~V^Pn#6h$U*EpypK~}5#zu3AfN4}plMo$vEiQ(cq7DS z+1S}>wi?XPUS6*6l&HA3ju_hyUHE9ldM%XPV+L_>pW(gi?X(17PEjYsP*d{{X=VKVa`2YBu_(hcy2H z7kEZEp(!Pwhdf2*M{slZ+~3D?;k$gSqaCVwBtCev7 z_$}=g<&$^WZmzCjC{&-i#wacqu6C2yn*D}k-SU+OJ#Z`L-}oo@{1dmspRgVO0FMub zekERbg2To4mW^Qr?w+vCVQ+43Hb*3im2hu9%%lTVAoYs&{B4s6V zHvQgC;fw+@dv&Hb!1D$P&J^=cxsz*c&wjb6&lO;BK0E!E{{Uu>iW>dL zihd;i&i)41^-IXacRH@K;VV0vnG_A68DxyV0P)WqtHJzh{{RF<{{Vt{c(cQE=-war zWvlpN=4k^it)cj;_DhJqU`SQGl6Y`O9L8`D7&tZcg1a#WMIPk()Qsg{B=_SO>6(0z zeFx0Z@we>n{{RGN_(}UIe$##y{h_`R{9f@K*4`hsyj!go#FqB=GF`m(vs-RUh!z15 zQ7-MKC&?Jd&mUrLlcB~*L@a z!Y|nK!nzcfsJAy?5$)!R0{qDob{C=84DuDEk3a7U{AxC*c`vtz`H1KI@m$pv=34kj z@S!m3I`=$(p0v!d{{WVq1`iz6jD&_D;2t{l{3?9BbBti& zNo?^=Gkl<#2+lbicN}p@m?X&?Zpb@_e%_S8z@Q{`Y>qnnQUTIUBd!;W^GF+L;Rbs0 zX`p47e*>t=_3cBJ3|RL)fZ)(lN8f+&ZiLPM01dU`mQ}z%3h$Jkf9*Xp`e*X5+5<9$ z+z96x=ec2E~N>yz}Z znl8~Idy+{rWQ_8AfzbNYF8Ddm(=^przL5U_f|Cq<2h+FSy|8Z; zz^u{8$Z%~g`>6-0AJ4UXHdDiGyU+V8Ot=Yhwa3A4`l{LHN$^r8PVK{Bv~7F&nP+) zdSh@reLef;m&w#^qVlx$Hs5=Hl^9iS0OVsI{;IEM*jGEdyE}#vNe;gxX>TAghx1*| zKfHYKBp#rG4;ifatu3MkAuhQzgtjEJ)EzK7bduvAO5(h~5JubM)d6KZ6Bk4H8YG)o zay;!;GRx|pc;nok@@Q1wadRWIOGspjIBw+eA}Cyp>WUop8DSwg{CTQcoyUi;_YJIT z=0ziNCblET;pPB8ueq*VHR(c+dNI(QgD3IDLB1PDQr4=+10*7Y$0LsTn2-;yAD>+FkHVk-00}jQtnX`Uu3t!uaIxF!)2xS%bG7mY zbJvcQ%gcPX8-UnYB=biq`pG*l z^UX5fRBK}9+S>%aPnRZn{6MMXW9B-zuQfemHZz;}RfbCqLDiSP7?(Z0d)2hKHgkD$ z-QI9=MkGJ4nf^7wORE`Y3YPJfV}PZjX8gMTl=v@TypWf^RAPQ$U|@JYn|{B}m$PE# zM{_2>qgl>wE_KaCP!eSM_wNC4oIj&qK=>Frq4UFr)khWks{HvFLz%oy*}J?fP;VL7{=j&(~I5Kjfg z)QkrWHO$G!J;HoFJ>m|oH#D|on{QG93@}bVPKKAVLC(&)H283+)=}u%Aw*->R)w~nr<`O|@IHeo zHLjngcx;CoSr$~2aL#*U&>o$-*8_5O$SyK(j?pGTiY9u(7z>nnI;LjbU2<=b(bahwsyT9RD{!^!hq_zO=lUxdgBDMI!Yq>4fZS!2$AxU0S% zgIn&~CtzmK$ zVgU?@F=+%Z4ZDDx8iRC!f9EoYy5tNXf=O`t;=EH{Fsmrtt6bNA;8T*=PkQ`7~mgDXPV`(NxCS&TMUjh>k&eS}T0|oopBx_DX&9FTsmbX|dgWjr08Hi!i&>p-~#Kjb+m&;hIZM#AIjWkN=-@IXw#(UzOgjpy{2H%Y2 z(#dob6jlQR9G+>h+?PTJ@DYsn_p0|(Id7SiV7FPtt%tIzQmyFNa?ge0$-qhPs!5dz+J|28o@xFZ_x=dG{{RIJ@I~$XR$m5fyg_ARhF|SH3&b{W9m0X1-9;>s+&SyV zlo*k~HJgl<=xF&(UQ{&o6vScCG6j`h(= zdxTBhk;q&QZ~^QEC%$q&r6VpIEYtynz&&e1PXYe`f{ld#0EH=}hjvh&DuuGFS3l^t zL!5nU;;X44nSeI&zxza-kH;ooc^DUR)yPp9sI@FZge8sMUA;X#rvh_^rq&}u@>4~Yk|8l!@$S;t5o&a;MP_suJrqa zhBpOL;@y}pzEu4`PME3Vx0N7xtnE>9rxFHbW5@fPAI5;@ANJ%iw2&)pi|_d}@-yl( zD!iJ5U0brDnfb_!%t1YQ!6KTlUl{(+b73O|A&L7&;#>X&x89LmOqpFOVY5A2KTl?+ zv>Rn+wzfN&?ZixkkOjZZ#Zj@od;5klr12*VrsI0z7jF|Qy_**c3F4BcPxWEuuqf>b@|wzT6FW@mq-Pd$$(7A*vGcqbM?(6 zlV~!XviS@NJ;qPxieoO9pWQhl{fTzt>4Qn61iP;`D1zPNVfUxlS%Jnl0r8Jr!1NyA zv2@E*H`)}*=I$)gEvq3X-)P2v3YBJ&6+}efj1cTw@WG+AStFP1^Ia-Ff7SU)4J`+` zEzP`F@O_^0_-!lB0Y>%Y@M)HCSiqqL`H&UBGS9mM{sc`j>s68`5Y2HYB|+P;4x@}z z6W(6sOl7er+;)&6=hBVCsd6YRY}ChZaSU;>&JtC}NUy`&5!mVG0$vxtU1-Bf)Aq=`W4eML=}8jpNXfL+cHw@*q( zyz1+6>z`QHlm7sG)^tC z!f<^7MLGQiNN$9YW>SH3(Gaid8ljc0wFvhOt&DDUZNzsSb5CTyl^QuE-I0e~i~b+y z9jF7;63Wux6qn0v{{RR){+!ci)9o$UysMa3YHlH;`=g$D@lDIx3w-;b6#U!+DmxiEpw?vAX@H%)O;T@#6R;rhpt9`QTBcGUjfcjx^ z`I_}Ij54o_O|2uT!I;=+ykp-rI@~*BAdXKU)_fD~Apzv|093_>K2kH>5lZMRi!UQ?+#m6zXL3OUp7=Ep zW>J%q=}L@batSDz5%VY~&hl@T%Lt4r(bR$jX|GIVcMpoboYB=qyZR zQG$Oek~sBUO{-)U035^>+wn2j)yv1QwV zz|MW?3kBV~f=2^1s8Tf@GCinD<4(ki;I{pwWxooMCQ+F%PZ=NpGyZc`ox^}uPp`Eh znbiOf$ZT}Y1LiHW2akR+o}AJdPC*OY9DX%d?F`BT6M=)tr86JmIKk~j$}TLdIbr}J zg5`2o1QK(f=~T_jJ8jNAiOy-lQTJ6l_C4yB#PX7fLN8IA@O#pv(a8G(1M8mE6p}Gt zz4&IM9B?uC_NFwFE*TlY3gdy3Na)HkSd-qE7z6|mdgB6{EQ>6mT>dl(txEyWD31f% zkLgj&RHz6S)^zA zI(9j#%foF_KBk`}s}>mKcLI$7d2kq#LC;!{ZU}y1$Q;xODzt>2-8iQ-#QeD*l@~xb z%;)89p!F2OSUF%o8ShM#vl11!9OQMRQb5Vz0mvekFb;WPc*z|5Q&61cN4TaP#2=K7 zI+4W$lDiqY4mhZ|5&)!bBn+NNt}o&@?B)ACd|TENU-+x=C&9iZzJ@O@E8h@!X4dv- z(>dC&G;F|k;MZFu&aI9y&>9xtxb8Xhq|$qi?HpggZ`p_7PwdZYty_Ex_<3#N*nC5N zFLg_6JDHv-CJVF`Ngb9T2Sh-sJuALv+z8x998^AFZgPJrQ0fWKryNj{OF=hqVS|=D z=O&S*VSrCxT1V(GbA!b*bC5>i`BMYukN7EGB=||8Td^Sj0K!BGB=N)&@%q=qtZhSl z>|5pI88!A_{1o99@8O?^t>v|8aj!FkSpwvUU)4{q>t7Py>6Wt~OL!%>kUm|7!xdn2 zo&a1NkD&Cgkj?eRzeIf=TJ42iteW=r&eko`$+Tyk++=pD!(FtPX42tUt8@&xBlW4x zl&fkj;*IgQ8_mNL_3A%LthTzSZ!I+ogy$rSgI->Tb7OBjM`r}yOreA0J4{DAarE}7 zA-|U66WdulfB*sVMoH`3j8hvNkb_*jw68vRX#PNW(`fmp@Jbr%QW0M{u($ppe9{0dec=>%~^Ny}b|qk0LgC zzz4s%#VtUwcWHHSjTyHQhGCB=0pBCk{OaV$%w@mq`C4B%$qxPjb{yld`_xHm za}%nWV39kh1%FUYQSz=`rygRfb}pYiU%`jRMlWo!q#Y-1i?KnL(OAMPP6;h14?c?0=TWGj=~ zc!oJJk!gC2iy>F ze+q`qO%7~*m5IpXZa}Mx94e%O!yc|_ST1@@64=Lp?Y9Gf3Hfu|{QgyQPnsL487Y<%6+NBOLop4jDzIaaWKeUS^o@--k%BW?!8Y(R zP!gnj@<9E2QKsu!-N6eT#5i6`PYF}E*zO{tk9ElsEU4Rg+mGji`qb8g$dtUc@G$3r zk&k*>k3&bpzA)00U$U3PFiSR`mnrtqY=AM3aNZX?a&eq|!!wtp@-bxjghE}(T^vk5>PcOcivh{)@`EYUdZ{Y_E2@ZOcEAM}kr1CyVbe_HRr zL#{xRx3`qcA3s&D=gkL( z$mxvtugbkg;ctg-P~y@kj_wR#XZZ?`MDYIri+lkrxBmbRJb$j}GswGSeIHhu)tq`U zje$J+*RfALgZSckbIN7*S^BKf6*wg1oD5W==~o1v-D~qVM*XWlY%c-X$7`niXurC* zM%+fVXKi}YbI{2$G?@A}Yq-!q;He+9K9O|@@n6Fq3~LtAE+tz9y0e~7eeCj}KSafL zLp8$B$mX71UOOLswd{lu*Ket(3!#sE0>6pM57ZT9A>8UFy~rtI_UP4lwfA^bB(FqBKa zX#6_?NMu(W@$H(fvo=0#5_{75@qkCE^`n^%$ehSGVE5=h&(@qIU%FehQ)eZEu;_Rm zwBa%q=RVxg%!0_cjf^&awB(SfBphO@36EagzLeoPV}~Q3N)tk}Ewtkto=D^MrBk%w z&wO;M!G`0>_Zg>&FgG@OaD6F?%$PH7-H)I&=8zM(fq~c>t|nqQIUcmpI99_Rw8vs| zG-bFL9MK%2NP!sO_4KGW0l+%~PQfa7*}Jmhqt zDHLrRN;pnl^{QM zc?vV$mC0Y3nYvUhIaU}1^TjvJET=8c91}niLh3TXzZ}%1)Qkhq)~YZ7ALp6_xEUGe z*O~yZTMSN5wIWPV<8Z}9pz_0k>rH3CI2oV`rITdln3TNjR2B3=}nc#Tw|J>fh1&&zz#o^Ib;e5Vf3hyLWg#G(2iM& zBp+kzPV5JnK1>|(*E9xP?gxx;aZJj)lb*dh;+XOR_#A=IQoCS$;r{>yENRcf-wqfc z^B8L~9C5(%PdwMfx|RLyw96BiV5-C5={{XFDWB&lbO=LZkbdRL-?2C9%lz zNbmKpimxuM#v)6Q7;%lG{EGQJ*?+dbM12-q-_=k)qo`a&S!7stgNJxhBe>{^K6dmq4sfVFD#9D za!h7IWAaH>3+UO%C+Uw&Q}0$ZM~2?ZCA!NiF>lBaJNwmPtP9sKZE}HvaIF~_{eP`i zyw){1*oeZh7{Ct2aD7UX{HU-OZA3PNy}hgf$jNC~pU}1iL33!5N#{n8D`zi)4tsP5 zih?aeP?;@|?zxFr`Ds;_TxZcC8(1BMI6bKDR9wE=R++AYI{Yl%*KnMuj? z=xLB&K`ew@-GD*I%%hY400IJ|iswuS(HrQ%EtZbq4_~8FTwCea*23O9_|-#X7^T`l z`h)sVP08B&bXIc^+^R8{0sOYojZacW5&02=jDW*#N8(56RqeFvSfcV?M3>NPX^t%P|4jQW051=&B_wFCE*09S#zznI9V8&lO4nHx@(us&uD#y=cptIw!JHnci* zyo-z%6Almk^G%CWjhk$CF(D`B9%ga$z@ou%Us=47mKm+gSSNEzM;?dXgK%U!V7v|( z=O`3$_|%sAmHfvGrIbK$S*FMHshO@0Ot1hk&(FJ$%7GN_Y;`-0i)%cD0heI>XWO6i znrrEIlPFny&~P&Ta~zT1ALsC?Swrx#BOcj7&*@cU`(j8@EEIJBupi8QC<4?PW7?8n zQ67Hrjv>$I{&}g+nQH7FM#B-wR0>Z|L0Ri1>jJ)5LPsnlV0iTZ0QFN8>n!`DxRFo+ z_naC8X+fu3+LmiuvV{Kta4+>0VA-pHr&XJtz_0%R6IsbVq!cnZcNpA_2**x))H;MZ zh1(%(6&O6Rd=h^K=|CQj6~sd_Mv`&VLge)xgRM+;TUL!Cxsls*U@gGMKb2>~ZElna z)fkRG?P=MF7B4IggYM#%hPMTh(HWx}G9oJD%~;f{epo$eEoHruLlwA$#gCB3D=rRk z?}Nwx0A8nw;rr}!k?B*%Bu5I(*SH_&{EAJ6pndS{Qv*NUkd7`CKS&5z+-*#q!3D#h{+ zI5FH2gY?BoZw$)@W3rUU56tF#D-7|#{-&tQs#`Jzx_1~K#Tn0T;C~}di%`?icEPAf zxc(NM!2FFML+?B@9;+(dT*%P8ZW#n)03M{{B<8K%+eG$4&OIvKPB7Aj`J0YH#raeFXU25uUKoPR&47dBg%+xaLO3&pSx#)ICztGjGOSuj)Is5+rfjSMu zg@v=P`?6xS^al9l;42ukj{y8@@qU+K0o@ISr-*J+-41j9s6NkLF;^}1JDY?vn@QO~ z+&A5E{{ZX%018R(E+Bx+L~yAfZG5rLc=XAw;fb#unUttfmgwv>AKO#**YJ(tmiNaR zSBPRnR7+iBL4+U!xocqylh~5qt#%p@{1q$W=8I({UOxB>rD~T3PAAcIxh!Kr>E+8D z<&VDcUOxBDG-y)XxNp4Z1~XB}o_dxxa&9Axpuxc7zaOP^!!yN4sm(kU33NWB@W=cW zPxgPmytLCiS>cZr=uayD0HxmDX^~HBZr#S{C3#$W5f*)gdM|`OZV%Zr#{U3lIzPwF z0^MzZG97Yu(((`YMdHL?{sWr$I`2k~WXJYv6T27zMg~Vc>nl?57LR#sW46-W+*fO$d|~~syc4KN zt9WzvnejBTIu+ZlPWq%^9C8x%Wk7i921huqxMtL4x-gbsHM2iYT6~|~lh5lQ*;(u#YQNF-^}BMzsYqmo5sdaS@#<@;82mK)obtj}k3;PwlP4bDwDUI9 z#&hpqHF!h*3ZwfaYsqmZh_#)6!rn;&G}pX6E3~7YSL|PCRRbLEBdF_Mi{SqN+W!Fd zUh(u+viM*2fbfjc-nQFoDfQ?ex^hkcHuAE{2|Qq+u9()RC8IHwImdK*U~~t0J@}wV zn8!RG1y-JSPnF_jIOsv|P@B%AA?e)JkZ1{Vdmeh?o+KxhZ~nDW6T1#ZJw|93Bn$vE z>L|Dx0#K?H9D15Ze5ed?59d{U@X?I8=}MP7kIKjV{c5fSlqytZzYu5>6g;;lILWG} z;49&OAw$Sj_Vvl5mr#2ZWP_e@^LD3n1%`d{ zWRIbztAzjt_vuw0ch6DvqB+~64sph5fpE$T?k)AENhm||sN=g-UVLXHkOfC2%xpgH zIiLw%ZR$rId8P>$f~Yf|-RiquD=`@9N+b))u;;#MT>)fBth={>eF3Jlk+9sX3F>`n z7M!OlGmfC1X{c44%0SOX$of%m9n|0f$4pWwLVbDTnm5KWNIA#nOb-~&PkwVrpAk+@=6OaM^RN*#G(9*1ooT%$a z?UBJe8f^=h5>lKp_vhN2vMaXLJ^4PB43Y&S9R(yZ5A}@Q4_tJq4r0lW;{zmn(;hjETFCz1C9sz`cz@?<7m%5vUbdGb$C zIY0es_@G%rBUHG&xRCRg5V$z}tL;ztDnpg{ap87~LYdd37(v(f4e9i+hi-3gKjo6M zW1Zq7$6naxzF#%h7X12r*Q^8o%|N`M$sE`^gk<_UCQ0<{Y737NT_8}e9@$xxZh_M$IQ}D0Ca0#r!5;L+-I>vW z9la_+ai!b>QtCG(XC;8iqReTHI>xIkuMxIfsQ~`-cj88Rccw?EU%kPKc=mRwHh!az(wXGixB^>ya2q^` z{$uIM;;b#wH{MdX>A#HrI5fzwITyeT{1#!Tez0{W+&eBnpMb6!DFc1IN=J)`kO`$~+M8 zPJQwT{&Wj#UDP3B-3B@4G9N&f%_rlFA~Vw(TWI$mqmS0>LfB(HBWs zhChe_{KW literal 0 HcmV?d00001 diff --git a/samples/data/aruco/tutorial_camera_charuco.yml b/samples/data/aruco/tutorial_camera_charuco.yml new file mode 100644 index 0000000000..fbcdd4e309 --- /dev/null +++ b/samples/data/aruco/tutorial_camera_charuco.yml @@ -0,0 +1,21 @@ +%YAML:1.0 +--- +calibration_time: "Wed 08 Dec 2021 05:13:09 PM MSK" +image_width: 640 +image_height: 480 +flags: 0 +camera_matrix: !!opencv-matrix + rows: 3 + cols: 3 + dt: d + data: [ 4.5251072219637672e+02, 0., 3.1770297317353277e+02, 0., + 4.5676707935146891e+02, 2.7775155919135995e+02, 0., 0., 1. ] +distortion_coefficients: !!opencv-matrix + rows: 1 + cols: 5 + dt: d + data: [ 1.2136925618707872e-01, -1.0854664722560681e+00, + 1.1786843796668460e-04, -4.6240686046485508e-04, + 2.9542589406810080e+00 ] +avg_reprojection_error: 1.8234905535936044e-01 +info: "The camera calibration parameters were obtained by img_00.jpg-img_03.jpg from aruco/tutorials/aruco_calibration/images" diff --git a/samples/python/aruco_detect_board_charuco.py b/samples/python/aruco_detect_board_charuco.py new file mode 100644 index 0000000000..2625023ad2 --- /dev/null +++ b/samples/python/aruco_detect_board_charuco.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +"""aruco_detect_board_charuco.py +Usage example: +python aruco_detect_board_charuco.py -w=5 -h=7 -sl=0.04 -ml=0.02 -d=10 -c=../data/aruco/tutorial_camera_charuco.yml + -i=../data/aruco/choriginal.jpg +""" + +import argparse +import numpy as np +import cv2 as cv +import sys + + +def read_camera_parameters(filename): + fs = cv.FileStorage(cv.samples.findFile(filename, False), cv.FileStorage_READ) + if fs.isOpened(): + cam_matrix = fs.getNode("camera_matrix").mat() + dist_coefficients = fs.getNode("distortion_coefficients").mat() + return True, cam_matrix, dist_coefficients + return False, [], [] + + +def main(): + # parse command line options + parser = argparse.ArgumentParser(description="detect markers and corners of charuco board, estimate pose of charuco" + "board", add_help=False) + parser.add_argument("-H", "--help", help="show help", action="store_true", dest="show_help") + parser.add_argument("-v", "--video", help="Input from video or image file, if omitted, input comes from camera", + default="", action="store", dest="v") + parser.add_argument("-i", "--image", help="Input from image file", default="", action="store", dest="img_path") + parser.add_argument("-w", help="Number of squares in X direction", default="3", action="store", dest="w", type=int) + parser.add_argument("-h", help="Number of squares in Y direction", default="3", action="store", dest="h", type=int) + parser.add_argument("-sl", help="Square side length", default="1.", action="store", dest="sl", type=float) + parser.add_argument("-ml", help="Marker side length", default="0.5", action="store", dest="ml", type=float) + parser.add_argument("-d", help="dictionary: DICT_4X4_50=0, DICT_4X4_100=1, DICT_4X4_250=2, DICT_4X4_1000=3," + "DICT_5X5_50=4, DICT_5X5_100=5, DICT_5X5_250=6, DICT_5X5_1000=7, DICT_6X6_50=8," + "DICT_6X6_100=9, DICT_6X6_250=10, DICT_6X6_1000=11, DICT_7X7_50=12, DICT_7X7_100=13," + "DICT_7X7_250=14, DICT_7X7_1000=15, DICT_ARUCO_ORIGINAL = 16}", + default="0", action="store", dest="d", type=int) + parser.add_argument("-ci", help="Camera id if input doesnt come from video (-v)", default="0", action="store", + dest="ci", type=int) + parser.add_argument("-c", help="Input file with calibrated camera parameters", default="", action="store", + dest="cam_param") + + args = parser.parse_args() + + show_help = args.show_help + if show_help: + parser.print_help() + sys.exit() + width = args.w + height = args.h + sqruare_len = args.sl + marker_len = args.ml + dict = args.d + video = args.v + camera_id = args.ci + img_path = args.img_path + + cam_param = args.cam_param + cam_matrix = [] + dist_coefficients = [] + if cam_param != "": + _, cam_matrix, dist_coefficients = read_camera_parameters(cam_param) + + aruco_dict = cv.aruco.getPredefinedDictionary(dict) + board_size = (width, height) + board = cv.aruco.CharucoBoard(board_size, sqruare_len, marker_len, aruco_dict) + charuco_detector = cv.aruco.CharucoDetector(board) + + image = None + input_video = None + wait_time = 10 + if video != "": + input_video = cv.VideoCapture(cv.samples.findFileOrKeep(video, False)) + image = input_video.retrieve()[1] if input_video.grab() else None + elif img_path == "": + input_video = cv.VideoCapture(camera_id) + image = input_video.retrieve()[1] if input_video.grab() else None + elif img_path != "": + wait_time = 0 + image = cv.imread(cv.samples.findFile(img_path, False)) + + if image is None: + print("Error: unable to open video/image source") + sys.exit(0) + + while image is not None: + image_copy = np.copy(image) + charuco_corners, charuco_ids, marker_corners, marker_ids = charuco_detector.detectBoard(image) + if not (marker_ids is None) and len(marker_ids) > 0: + cv.aruco.drawDetectedMarkers(image_copy, marker_corners) + if not (charuco_ids is None) and len(charuco_ids) > 0: + cv.aruco.drawDetectedCornersCharuco(image_copy, charuco_corners, charuco_ids) + if len(cam_matrix) > 0 and len(charuco_ids) >= 4: + try: + obj_points, img_points = board.matchImagePoints(charuco_corners, charuco_ids) + flag, rvec, tvec = cv.solvePnP(obj_points, img_points, cam_matrix, dist_coefficients) + if flag: + cv.drawFrameAxes(image_copy, cam_matrix, dist_coefficients, rvec, tvec, .2) + except cv.error as error_inst: + print("SolvePnP recognize calibration pattern as non-planar pattern. To process this need to use " + "minimum 6 points. The planar pattern may be mistaken for non-planar if the pattern is " + "deformed or incorrect camera parameters are used.") + print(error_inst.err) + cv.imshow("out", image_copy) + key = cv.waitKey(wait_time) + if key == 27: + break + image = input_video.retrieve()[1] if input_video is not None and input_video.grab() else None + + +if __name__ == "__main__": + main() From 68e2df56e7f6487239341867ab95df9d545087ec Mon Sep 17 00:00:00 2001 From: TuNanTang Date: Tue, 14 Mar 2023 21:17:39 +0800 Subject: [PATCH 052/199] Optimize&Fix fitEllipse sample Optimize&Fix fitEllipse sample --- samples/cpp/fitellipse.cpp | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/samples/cpp/fitellipse.cpp b/samples/cpp/fitellipse.cpp index f136b9c408..7d217014d5 100644 --- a/samples/cpp/fitellipse.cpp +++ b/samples/cpp/fitellipse.cpp @@ -218,6 +218,11 @@ int main( int argc, char** argv ) return 0; } +inline static bool isGoodBox(const RotatedRect& box) { + //size.height >= size.width awalys,only if the pts are on a line or at the same point,size.width=0 + return (box.size.height <= box.size.width * 30) && (box.size.width > 0); +} + // Define trackbar callback function. This function finds contours, // draws them, and approximates by ellipses. void processImage(int /*h*/, void*) @@ -276,39 +281,30 @@ void processImage(int /*h*/, void*) { vector pts = points[i]; - if (pts.size()<=5) { + //At least 5 points can fit an ellipse + if (pts.size()<5) { continue; } if (fitEllipseQ) { box = fitEllipse(pts); - if( MAX(box.size.width, box.size.height) > MIN(box.size.width, box.size.height)*30 || - MAX(box.size.width, box.size.height) <= 0 || - MIN(box.size.width, box.size.height) <= 0){continue;}; + if (isGoodBox(box)) { + paper.drawEllipseWithBox(box, fitEllipseColor, 3); + } } if (fitEllipseAMSQ) { boxAMS = fitEllipseAMS(pts); - if( MAX(boxAMS.size.width, boxAMS.size.height) > MIN(boxAMS.size.width, boxAMS.size.height)*30 || - MAX(box.size.width, box.size.height) <= 0 || - MIN(box.size.width, box.size.height) <= 0){continue;}; + if (isGoodBox(boxAMS)) { + paper.drawEllipseWithBox(boxAMS, fitEllipseAMSColor, 2); + } } if (fitEllipseDirectQ) { boxDirect = fitEllipseDirect(pts); - if( MAX(boxDirect.size.width, boxDirect.size.height) > MIN(boxDirect.size.width, boxDirect.size.height)*30 || - MAX(box.size.width, box.size.height) <= 0 || - MIN(box.size.width, box.size.height) <= 0 ){continue;}; - } - - if (fitEllipseQ) { - paper.drawEllipseWithBox(box, fitEllipseColor, 3); - } - if (fitEllipseAMSQ) { - paper.drawEllipseWithBox(boxAMS, fitEllipseAMSColor, 2); - } - if (fitEllipseDirectQ) { - paper.drawEllipseWithBox(boxDirect, fitEllipseDirectColor, 1); + if (isGoodBox(boxDirect)){ + paper.drawEllipseWithBox(boxDirect, fitEllipseDirectColor, 1); + } } - paper.drawPoints(pts, cv::Scalar(255,255,255)); + paper.drawPoints(pts, fitEllipseTrueColor); } imshow("result", paper.img); From 69fd82fc465c5ce72c2d2b3a1b0bf33f1a87ca75 Mon Sep 17 00:00:00 2001 From: Abduragim Date: Wed, 15 Mar 2023 16:30:00 +0300 Subject: [PATCH 053/199] minor grammatical fixes to dnn_custom_layers.md --- .../dnn_custom_layers/dnn_custom_layers.md | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md index a3f521d5df..6f4a70f642 100644 --- a/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md +++ b/doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md @@ -3,29 +3,30 @@ @prev_tutorial{tutorial_dnn_javascript} ## Introduction -Deep learning is a fast growing area. The new approaches to build neural networks -usually introduce new types of layers. They could be modifications of existing -ones or implement outstanding researching ideas. +Deep learning is a fast-growing area. New approaches to building neural networks +usually introduce new types of layers. These could be modifications of existing +ones or implementation of outstanding research ideas. -OpenCV gives an opportunity to import and run networks from different deep learning -frameworks. There are a number of the most popular layers. However you can face -a problem that your network cannot be imported using OpenCV because of unimplemented layers. +OpenCV allows importing and running networks from different deep learning frameworks. +There is a number of the most popular layers. However, you can face a problem that +your network cannot be imported using OpenCV because some layers of your network +can be not implemented in the deep learning engine of OpenCV. The first solution is to create a feature request at https://github.com/opencv/opencv/issues -mentioning details such a source of model and type of new layer. A new layer could -be implemented if OpenCV community shares this need. +mentioning details such as a source of a model and a type of new layer. +The new layer could be implemented if the OpenCV community shares this need. -The second way is to define a **custom layer** so OpenCV's deep learning engine +The second way is to define a **custom layer** so that OpenCV's deep learning engine will know how to use it. This tutorial is dedicated to show you a process of deep -learning models import customization. +learning model's import customization. ## Define a custom layer in C++ Deep learning layer is a building block of network's pipeline. It has connections to **input blobs** and produces results to **output blobs**. There are trained **weights** and **hyper-parameters**. -Layers' names, types, weights and hyper-parameters are stored in files are generated by -native frameworks during training. If OpenCV mets unknown layer type it throws an -exception trying to read a model: +Layers' names, types, weights and hyper-parameters are stored in files are +generated by native frameworks during training. If OpenCV encounters unknown +layer type it throws an exception while trying to read a model: ``` Unspecified error: Can't create layer "layer_name" of type "MyType" in function getLayerInstance @@ -61,7 +62,7 @@ This method should create an instance of you layer and return cv::Ptr with it. @snippet dnn/custom_layers.hpp MyLayer::getMemoryShapes -Returns layer's output shapes depends on input shapes. You may request an extra +Returns layer's output shapes depending on input shapes. You may request an extra memory using `internals`. - Run a layer @@ -71,20 +72,20 @@ memory using `internals`. Implement a layer's logic here. Compute outputs for given inputs. @note OpenCV manages memory allocated for layers. In the most cases the same memory -can be reused between layers. So your `forward` implementation should not rely that -the second invocation of `forward` will has the same data at `outputs` and `internals`. +can be reused between layers. So your `forward` implementation should not rely on that +the second invocation of `forward` will have the same data at `outputs` and `internals`. - Optional `finalize` method @snippet dnn/custom_layers.hpp MyLayer::finalize -The chain of methods are the following: OpenCV deep learning engine calls `create` -method once then it calls `getMemoryShapes` for an every created layer then you -can make some preparations depends on known input dimensions at cv::dnn::Layer::finalize. -After network was initialized only `forward` method is called for an every network's input. +The chain of methods is the following: OpenCV deep learning engine calls `create` +method once, then it calls `getMemoryShapes` for every created layer, then you +can make some preparations depend on known input dimensions at cv::dnn::Layer::finalize. +After network was initialized only `forward` method is called for every network's input. -@note Varying input blobs' sizes such height or width or batch size you make OpenCV -reallocate all the internal memory. That leads efficiency gaps. Try to initialize +@note Varying input blobs' sizes such height, width or batch size make OpenCV +reallocate all the internal memory. That leads to efficiency gaps. Try to initialize and deploy models using a fixed batch size and image's dimensions. ## Example: custom layer from Caffe @@ -201,7 +202,7 @@ deep learning model. That was trained with one and only difference comparing to a current version of [Caffe framework](http://caffe.berkeleyvision.org/). `Crop` layers that receive two input blobs and crop the first one to match spatial dimensions of the second one used to crop from the center. Nowadays Caffe's layer does it -from the top-left corner. So using the latest version of Caffe or OpenCV you'll +from the top-left corner. So using the latest version of Caffe or OpenCV you will get shifted results with filled borders. Next we're going to replace OpenCV's `Crop` layer that makes top-left cropping by @@ -217,7 +218,7 @@ a centric one. @snippet dnn/edge_detection.py Register -That's it! We've replaced an implemented OpenCV's layer to a custom one. +That's it! We have replaced an implemented OpenCV's layer to a custom one. You may find a full script in the [source code](https://github.com/opencv/opencv/tree/3.4/samples/dnn/edge_detection.py).
From a2e04718ecd0ec0911520ecd0b1acd64c34a5751 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 17 Mar 2023 13:36:47 +0100 Subject: [PATCH 054/199] te for MSMF in doc --- modules/videoio/include/opencv2/videoio.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index c22fb369ca..3c0f8cda8a 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -84,8 +84,10 @@ namespace cv Select preferred API for a capture object. To be used in the VideoCapture::VideoCapture() constructor or VideoCapture::open() -@note Backends are available only if they have been built with your OpenCV binaries. +@note +- Backends are available only if they have been built with your OpenCV binaries. See @ref videoio_overview for more information. +- For CAP_MSMF setting environment flag "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS" to 0 may improve speed initialization */ enum VideoCaptureAPIs { CAP_ANY = 0, //!< Auto detect == 0 @@ -107,7 +109,7 @@ enum VideoCaptureAPIs { CAP_XIAPI = 1100, //!< XIMEA Camera API CAP_AVFOUNDATION = 1200, //!< AVFoundation framework for iOS (OS X Lion will have the same API) CAP_GIGANETIX = 1300, //!< Smartek Giganetix GigEVisionSDK - CAP_MSMF = 1400, //!< Microsoft Media Foundation (via videoInput) + CAP_MSMF = 1400, //!< Microsoft Media Foundation (via videoInput) read note above CAP_WINRT = 1410, //!< Microsoft Windows Runtime using Media Foundation CAP_INTELPERC = 1500, //!< RealSense (former Intel Perceptual Computing SDK) CAP_REALSENSE = 1500, //!< Synonym for CAP_INTELPERC @@ -126,6 +128,7 @@ enum VideoCaptureAPIs { CAP_OBSENSOR = 2600, //!< For Orbbec 3D-Sensor device/module (Astra+, Femto) }; + /** @brief cv::VideoCapture generic properties identifier. Reading / writing properties involves many layers. Some unexpected result might happens along this chain. From aef1fc087da05031bee24a45c3f679db5b2a9ffa Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Sun, 19 Mar 2023 10:58:47 +0300 Subject: [PATCH 055/199] cmake: fix V4L config verification conflict with OBSENSOR --- modules/videoio/cmake/detect_obsensor.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/videoio/cmake/detect_obsensor.cmake b/modules/videoio/cmake/detect_obsensor.cmake index 140e9a2e50..fe3f893b48 100644 --- a/modules/videoio/cmake/detect_obsensor.cmake +++ b/modules/videoio/cmake/detect_obsensor.cmake @@ -18,8 +18,8 @@ if(NOT HAVE_OBSENSOR) endif() endif() elseif(UNIX) - check_include_file(linux/videodev2.h HAVE_CAMV4L2) - if(HAVE_CAMV4L2) + check_include_file(linux/videodev2.h HAVE_CAMV4L2_OBSENSOR) + if(HAVE_CAMV4L2_OBSENSOR) set(HAVE_OBSENSOR TRUE) set(HAVE_OBSENSOR_V4L2 TRUE) ocv_add_external_target(obsensor "" "" "HAVE_OBSENSOR;HAVE_OBSENSOR_V4L2") From c4226f0457112ed6fc598d4b7456d130a1097085 Mon Sep 17 00:00:00 2001 From: Labib Asari <94868003+labeeb-7z@users.noreply.github.com> Date: Mon, 20 Mar 2023 12:36:57 +0530 Subject: [PATCH 056/199] Merge pull request #23196 from labeeb-7z:printOptionInRoiSelector Added argument to print notice in `roiSelector.cpp` Related Issue : https://github.com/opencv/opencv/issues/23175 I've added a printNotice argument to `selectROI` (and it's overload) and `selectROIs` functions. I've also updated the function declarations in `highgui.hpp`. Tested by building locally. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/highgui/include/opencv2/highgui.hpp | 8 ++++-- modules/highgui/src/roiSelector.cpp | 32 ++++++++++++--------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/modules/highgui/include/opencv2/highgui.hpp b/modules/highgui/include/opencv2/highgui.hpp index 7ae2395737..32f6dfb25d 100644 --- a/modules/highgui/include/opencv2/highgui.hpp +++ b/modules/highgui/include/opencv2/highgui.hpp @@ -520,16 +520,17 @@ Controls: use `space` or `enter` to finish selection, use key `c` to cancel sele @param showCrosshair if true crosshair of selection rectangle will be shown. @param fromCenter if true center of selection will match initial mouse position. In opposite case a corner of selection rectangle will correspont to the initial mouse position. +@param printNotice if true a notice to select ROI or cancel selection will be printed in console. @return selected ROI or empty rect if selection canceled. @note The function sets it's own mouse callback for specified window using cv::setMouseCallback(windowName, ...). After finish of work an empty callback will be set for the used window. */ -CV_EXPORTS_W Rect selectROI(const String& windowName, InputArray img, bool showCrosshair = true, bool fromCenter = false); +CV_EXPORTS_W Rect selectROI(const String& windowName, InputArray img, bool showCrosshair = true, bool fromCenter = false, bool printNotice = true); /** @overload */ -CV_EXPORTS_W Rect selectROI(InputArray img, bool showCrosshair = true, bool fromCenter = false); +CV_EXPORTS_W Rect selectROI(InputArray img, bool showCrosshair = true, bool fromCenter = false, bool printNotice = true); /** @brief Allows users to select multiple ROIs on the given image. @@ -543,12 +544,13 @@ use `esc` to terminate multiple ROI selection process. @param showCrosshair if true crosshair of selection rectangle will be shown. @param fromCenter if true center of selection will match initial mouse position. In opposite case a corner of selection rectangle will correspont to the initial mouse position. +@param printNotice if true a notice to select ROI or cancel selection will be printed in console. @note The function sets it's own mouse callback for specified window using cv::setMouseCallback(windowName, ...). After finish of work an empty callback will be set for the used window. */ CV_EXPORTS_W void selectROIs(const String& windowName, InputArray img, - CV_OUT std::vector& boundingBoxes, bool showCrosshair = true, bool fromCenter = false); + CV_OUT std::vector& boundingBoxes, bool showCrosshair = true, bool fromCenter = false, bool printNotice = true); /** @brief Creates a trackbar and attaches it to the specified window. diff --git a/modules/highgui/src/roiSelector.cpp b/modules/highgui/src/roiSelector.cpp index 4fba07eacb..1bbd246c05 100644 --- a/modules/highgui/src/roiSelector.cpp +++ b/modules/highgui/src/roiSelector.cpp @@ -13,11 +13,14 @@ namespace class ROISelector { public: - Rect select(const String &windowName, Mat img, bool showCrossair = true, bool fromCenter = true) + Rect select(const String &windowName, Mat img, bool showCrossair = true, bool fromCenter = true, bool printNotice = true) { - // show notice to user - printf("Select a ROI and then press SPACE or ENTER button!\n"); - printf("Cancel the selection process by pressing c button!\n"); + if(printNotice) + { + // show notice to user + printf("Select a ROI and then press SPACE or ENTER button!\n"); + printf("Cancel the selection process by pressing c button!\n"); + } key = 0; imageSize = img.size(); @@ -83,16 +86,19 @@ class ROISelector } void select(const String &windowName, Mat img, std::vector &boundingBoxes, - bool showCrosshair = true, bool fromCenter = true) + bool showCrosshair = true, bool fromCenter = true, bool printNotice = true) { - printf("Finish the selection process by pressing ESC button!\n"); + if(printNotice) + { + printf("Finish the selection process by pressing ESC button!\n"); + } boundingBoxes.clear(); key = 0; // while key is not ESC (27) for (;;) { - Rect temp = select(windowName, img, showCrosshair, fromCenter); + Rect temp = select(windowName, img, showCrosshair, fromCenter, printNotice); if (key == 27) break; if (temp.width > 0 && temp.height > 0) @@ -195,21 +201,21 @@ class ROISelector }; } -Rect cv::selectROI(InputArray img, bool showCrosshair, bool fromCenter) +Rect cv::selectROI(InputArray img, bool showCrosshair, bool fromCenter, bool printNotice) { ROISelector selector; - return selector.select("ROI selector", img.getMat(), showCrosshair, fromCenter); + return selector.select("ROI selector", img.getMat(), showCrosshair, fromCenter, printNotice); } -Rect cv::selectROI(const String& windowName, InputArray img, bool showCrosshair, bool fromCenter) +Rect cv::selectROI(const String& windowName, InputArray img, bool showCrosshair, bool fromCenter, bool printNotice) { ROISelector selector; - return selector.select(windowName, img.getMat(), showCrosshair, fromCenter); + return selector.select(windowName, img.getMat(), showCrosshair, fromCenter, printNotice); } void cv::selectROIs(const String& windowName, InputArray img, - std::vector& boundingBox, bool showCrosshair, bool fromCenter) + std::vector& boundingBox, bool showCrosshair, bool fromCenter, bool printNotice) { ROISelector selector; - selector.select(windowName, img.getMat(), boundingBox, showCrosshair, fromCenter); + selector.select(windowName, img.getMat(), boundingBox, showCrosshair, fromCenter, printNotice); } From a1b4aa5e88d429e5300ef05c926ac84767348068 Mon Sep 17 00:00:00 2001 From: Genci Berisha Date: Sat, 18 Feb 2023 23:17:47 +0100 Subject: [PATCH 057/199] Added QR_Code data flip support, flip and retry after first EEC failure Added regression test for the flipped images --- 3rdparty/quirc/include/quirc.h | 2 ++ 3rdparty/quirc/src/decode.c | 15 ++++++++++++ modules/objdetect/src/qrcode.cpp | 6 +++++ modules/objdetect/test/test_qrcode.cpp | 32 ++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/3rdparty/quirc/include/quirc.h b/3rdparty/quirc/include/quirc.h index 0e7cb94d1c..957ae10e6c 100644 --- a/3rdparty/quirc/include/quirc.h +++ b/3rdparty/quirc/include/quirc.h @@ -165,6 +165,8 @@ void quirc_extract(const struct quirc *q, int index, /* Decode a QR-code, returning the payload data. */ quirc_decode_error_t quirc_decode(const struct quirc_code *code, struct quirc_data *data); +/* flip the QR code horizontaly (mirror flip) */ +void quirc_flip(struct quirc_code *code); #ifdef __cplusplus } diff --git a/3rdparty/quirc/src/decode.c b/3rdparty/quirc/src/decode.c index 894b5067d8..e24a4fbe2a 100644 --- a/3rdparty/quirc/src/decode.c +++ b/3rdparty/quirc/src/decode.c @@ -917,3 +917,18 @@ quirc_decode_error_t quirc_decode(const struct quirc_code *code, return QUIRC_SUCCESS; } + +void quirc_flip(struct quirc_code *code) +{ + struct quirc_code flipped = {0}; + unsigned int offset = 0; + for (int y = 0; y < code->size; y++) { + for (int x = 0; x < code->size; x++) { + if (grid_bit(code, y, x)) { + flipped.cell_bitmap[offset >> 3u] |= (1u << (offset & 7u)); + } + offset++; + } + } + memcpy(&code->cell_bitmap, &flipped.cell_bitmap, sizeof(flipped.cell_bitmap)); +} diff --git a/modules/objdetect/src/qrcode.cpp b/modules/objdetect/src/qrcode.cpp index 41c604a8db..93fffeaf5c 100644 --- a/modules/objdetect/src/qrcode.cpp +++ b/modules/objdetect/src/qrcode.cpp @@ -2732,6 +2732,12 @@ bool QRDecode::decodingProcess() quirc_data qr_code_data; quirc_decode_error_t errorCode = quirc_decode(&qr_code, &qr_code_data); + + if(errorCode == QUIRC_ERROR_DATA_ECC){ + quirc_flip(&qr_code); + errorCode = quirc_decode(&qr_code, &qr_code_data); + } + if (errorCode != 0) { return false; } for (int i = 0; i < qr_code_data.payload_len; i++) diff --git a/modules/objdetect/test/test_qrcode.cpp b/modules/objdetect/test/test_qrcode.cpp index 568324b7ca..0868932eab 100644 --- a/modules/objdetect/test/test_qrcode.cpp +++ b/modules/objdetect/test/test_qrcode.cpp @@ -708,6 +708,38 @@ TEST(Objdetect_QRCode_detect, detect_regression_21287) #endif } +TEST(Objdetect_QRCode_detect_flipped, regression_23249) +{ + + const std::vector> flipped_images = + // image name , expected result + {{"flipped_1.png", "The key is /qrcod_OMevpf"}, + {"flipped_2.png", "A26"}}; + + const std::string root = "qrcode/flipped/"; + + for(const auto &flipped_image : flipped_images){ + const std::string &image_name = flipped_image.first; + const std::string &expect_msg = flipped_image.second; + + std::string image_path = findDataFile(root + image_name); + Mat src = imread(image_path); + ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path; + QRCodeDetector qrcode; + std::vector corners; + Mat straight_barcode; + cv::String decoded_info; + EXPECT_TRUE(qrcode.detect(src, corners)); + EXPECT_TRUE(!corners.empty()); + std::string decoded_msg; + #ifdef HAVE_QUIRC + EXPECT_NO_THROW(decoded_msg = qrcode.decode(src, corners, straight_barcode)); + ASSERT_FALSE(straight_barcode.empty()) << "Can't decode qrimage."; + EXPECT_EQ(expect_msg, decoded_msg); + #endif + } +} + // @author Kumataro, https://github.com/Kumataro TEST(Objdetect_QRCode_decode, decode_regression_21929) { From abfa5c586a35b9725ce5e822f72ecd26e99ac2dc Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Mar 2023 15:44:14 +0100 Subject: [PATCH 058/199] use findFile in opengl.cpp sample --- samples/opengl/opengl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/opengl/opengl.cpp b/samples/opengl/opengl.cpp index dd2f913164..fd1aa45c08 100644 --- a/samples/opengl/opengl.cpp +++ b/samples/opengl/opengl.cpp @@ -50,12 +50,12 @@ int main(int argc, char* argv[]) if (argc < 2) { cout << "Usage: " << argv[0] << " image" << endl; - filename = "../data/lena.jpg"; + filename = "lena.jpg"; } else filename = argv[1]; - Mat img = imread(filename); + Mat img = imread(samples::findFile(filename)); if (img.empty()) { cerr << "Can't open image " << filename << endl; From da3a4dcbc156e9f223fad9699bf62dada67747f1 Mon Sep 17 00:00:00 2001 From: Wwupup Date: Fri, 23 Dec 2022 11:12:43 +0800 Subject: [PATCH 059/199] upgrade FaceDetectorYN to v2 --- modules/objdetect/src/face_detect.cpp | 210 +++++++++++--------------- modules/objdetect/test/test_face.cpp | 13 +- 2 files changed, 91 insertions(+), 132 deletions(-) diff --git a/modules/objdetect/src/face_detect.cpp b/modules/objdetect/src/face_detect.cpp index 10259a32e6..17c982d92f 100644 --- a/modules/objdetect/src/face_detect.cpp +++ b/modules/objdetect/src/face_detect.cpp @@ -6,6 +6,7 @@ #include "opencv2/imgproc.hpp" #include "opencv2/core.hpp" + #ifdef HAVE_OPENCV_DNN #include "opencv2/dnn.hpp" #endif @@ -27,6 +28,8 @@ public: int top_k, int backend_id, int target_id) + :divisor(32), + strides({8, 16, 32}) { net = dnn::readNet(model, config); CV_Assert(!net.empty()); @@ -37,18 +40,20 @@ public: inputW = input_size.width; inputH = input_size.height; + padW = (int((inputW - 1) / divisor) + 1) * divisor; + padH = (int((inputH - 1) / divisor) + 1) * divisor; + scoreThreshold = score_threshold; nmsThreshold = nms_threshold; topK = top_k; - - generatePriors(); } void setInputSize(const Size& input_size) override { inputW = input_size.width; inputH = input_size.height; - generatePriors(); + padW = ((inputW - 1) / divisor + 1) * divisor; + padH = ((inputH - 1) / divisor + 1) * divisor; } Size getInputSize() override @@ -97,12 +102,14 @@ public: return 0; } CV_CheckEQ(input_image.size(), Size(inputW, inputH), "Size does not match. Call setInputSize(size) if input size does not match the preset size"); + // Pad input_image with divisor 32 + Mat pad_image = padWithDivisor(input_image); // Build blob from input image - Mat input_blob = dnn::blobFromImage(input_image); + Mat input_blob = dnn::blobFromImage(pad_image); // Forward - std::vector output_names = { "loc", "conf", "iou" }; + std::vector output_names = { "cls_8", "cls_16", "cls_32", "obj_8", "obj_16", "obj_32", "bbox_8", "bbox_16", "bbox_32", "kps_8", "kps_16", "kps_32" }; std::vector output_blobs; net.setInput(input_blob); net.forward(output_blobs, output_names); @@ -113,126 +120,70 @@ public: return 1; } private: - void generatePriors() - { - // Calculate shapes of different scales according to the shape of input image - Size feature_map_2nd = { - int(int((inputW+1)/2)/2), int(int((inputH+1)/2)/2) - }; - Size feature_map_3rd = { - int(feature_map_2nd.width/2), int(feature_map_2nd.height/2) - }; - Size feature_map_4th = { - int(feature_map_3rd.width/2), int(feature_map_3rd.height/2) - }; - Size feature_map_5th = { - int(feature_map_4th.width/2), int(feature_map_4th.height/2) - }; - Size feature_map_6th = { - int(feature_map_5th.width/2), int(feature_map_5th.height/2) - }; - - std::vector feature_map_sizes; - feature_map_sizes.push_back(feature_map_3rd); - feature_map_sizes.push_back(feature_map_4th); - feature_map_sizes.push_back(feature_map_5th); - feature_map_sizes.push_back(feature_map_6th); - - // Fixed params for generating priors - const std::vector> min_sizes = { - {10.0f, 16.0f, 24.0f}, - {32.0f, 48.0f}, - {64.0f, 96.0f}, - {128.0f, 192.0f, 256.0f} - }; - CV_Assert(min_sizes.size() == feature_map_sizes.size()); // just to keep vectors in sync - const std::vector steps = { 8, 16, 32, 64 }; - - // Generate priors - priors.clear(); - for (size_t i = 0; i < feature_map_sizes.size(); ++i) - { - Size feature_map_size = feature_map_sizes[i]; - std::vector min_size = min_sizes[i]; - - for (int _h = 0; _h < feature_map_size.height; ++_h) - { - for (int _w = 0; _w < feature_map_size.width; ++_w) - { - for (size_t j = 0; j < min_size.size(); ++j) - { - float s_kx = min_size[j] / inputW; - float s_ky = min_size[j] / inputH; - - float cx = (_w + 0.5f) * steps[i] / inputW; - float cy = (_h + 0.5f) * steps[i] / inputH; - - Rect2f prior = { cx, cy, s_kx, s_ky }; - priors.push_back(prior); - } - } - } - } - } - Mat postProcess(const std::vector& output_blobs) { - // Extract from output_blobs - Mat loc = output_blobs[0]; - Mat conf = output_blobs[1]; - Mat iou = output_blobs[2]; - - // Decode from deltas and priors - const std::vector variance = {0.1f, 0.2f}; - float* loc_v = (float*)(loc.data); - float* conf_v = (float*)(conf.data); - float* iou_v = (float*)(iou.data); Mat faces; - // (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score) - // 'tl': top left point of the bounding box - // 're': right eye, 'le': left eye - // 'nt': nose tip - // 'rcm': right corner of mouth, 'lcm': left corner of mouth - Mat face(1, 15, CV_32FC1); - for (size_t i = 0; i < priors.size(); ++i) { - // Get score - float clsScore = conf_v[i*2+1]; - float iouScore = iou_v[i]; - // Clamp - if (iouScore < 0.f) { - iouScore = 0.f; - } - else if (iouScore > 1.f) { - iouScore = 1.f; + for (size_t i = 0; i < strides.size(); ++i) { + int cols = int(padW / strides[i]); + int rows = int(padH / strides[i]); + + // Extract from output_blobs + Mat cls = output_blobs[i]; + Mat obj = output_blobs[i + strides.size() * 1]; + Mat bbox = output_blobs[i + strides.size() * 2]; + Mat kps = output_blobs[i + strides.size() * 3]; + + // Decode from predictions + float* cls_v = (float*)(cls.data); + float* obj_v = (float*)(obj.data); + float* bbox_v = (float*)(bbox.data); + float* kps_v = (float*)(kps.data); + + // (tl_x, tl_y, w, h, re_x, re_y, le_x, le_y, nt_x, nt_y, rcm_x, rcm_y, lcm_x, lcm_y, score) + // 'tl': top left point of the bounding box + // 're': right eye, 'le': left eye + // 'nt': nose tip + // 'rcm': right corner of mouth, 'lcm': left corner of mouth + Mat face(1, 15, CV_32FC1); + + for(int r = 0; r < rows; ++r) { + for(int c = 0; c < cols; ++c) { + size_t idx = r * cols + c; + + // Get score + float cls_score = cls_v[idx]; + float obj_score = obj_v[idx]; + + // Clamp + cls_score = MIN(cls_score, 1.f); + cls_score = MAX(cls_score, 0.f); + obj_score = MIN(obj_score, 1.f); + obj_score = MAX(obj_score, 0.f); + float score = std::sqrt(cls_score * obj_score); + face.at(0, 14) = score; + + // Get bounding box + float cx = ((c + bbox_v[idx * 4 + 0]) * strides[i]); + float cy = ((r + bbox_v[idx * 4 + 1]) * strides[i]); + float w = exp(bbox_v[idx * 4 + 2]) * strides[i]; + float h = exp(bbox_v[idx * 4 + 3]) * strides[i]; + + float x1 = cx - w / 2.f; + float y1 = cy - h / 2.f; + + face.at(0, 0) = x1; + face.at(0, 1) = y1; + face.at(0, 2) = w; + face.at(0, 3) = h; + + // Get landmarks + for(int n = 0; n < 5; ++n) { + face.at(0, 4 + 2 * n) = (kps_v[idx * 10 + 2 * n] + c) * strides[i]; + face.at(0, 4 + 2 * n + 1) = (kps_v[idx * 10 + 2 * n + 1]+ r) * strides[i]; + } + faces.push_back(face); + } } - float score = std::sqrt(clsScore * iouScore); - face.at(0, 14) = score; - - // Get bounding box - float cx = (priors[i].x + loc_v[i*14+0] * variance[0] * priors[i].width) * inputW; - float cy = (priors[i].y + loc_v[i*14+1] * variance[0] * priors[i].height) * inputH; - float w = priors[i].width * exp(loc_v[i*14+2] * variance[0]) * inputW; - float h = priors[i].height * exp(loc_v[i*14+3] * variance[1]) * inputH; - float x1 = cx - w / 2; - float y1 = cy - h / 2; - face.at(0, 0) = x1; - face.at(0, 1) = y1; - face.at(0, 2) = w; - face.at(0, 3) = h; - - // Get landmarks - face.at(0, 4) = (priors[i].x + loc_v[i*14+ 4] * variance[0] * priors[i].width) * inputW; // right eye, x - face.at(0, 5) = (priors[i].y + loc_v[i*14+ 5] * variance[0] * priors[i].height) * inputH; // right eye, y - face.at(0, 6) = (priors[i].x + loc_v[i*14+ 6] * variance[0] * priors[i].width) * inputW; // left eye, x - face.at(0, 7) = (priors[i].y + loc_v[i*14+ 7] * variance[0] * priors[i].height) * inputH; // left eye, y - face.at(0, 8) = (priors[i].x + loc_v[i*14+ 8] * variance[0] * priors[i].width) * inputW; // nose tip, x - face.at(0, 9) = (priors[i].y + loc_v[i*14+ 9] * variance[0] * priors[i].height) * inputH; // nose tip, y - face.at(0, 10) = (priors[i].x + loc_v[i*14+10] * variance[0] * priors[i].width) * inputW; // right corner of mouth, x - face.at(0, 11) = (priors[i].y + loc_v[i*14+11] * variance[0] * priors[i].height) * inputH; // right corner of mouth, y - face.at(0, 12) = (priors[i].x + loc_v[i*14+12] * variance[0] * priors[i].width) * inputW; // left corner of mouth, x - face.at(0, 13) = (priors[i].y + loc_v[i*14+13] * variance[0] * priors[i].height) * inputH; // left corner of mouth, y - - faces.push_back(face); } if (faces.rows > 1) @@ -265,16 +216,27 @@ private: return faces; } } + + Mat padWithDivisor(InputArray& input_image) + { + int bottom = padH - inputH; + int right = padW - inputW; + Mat pad_image; + copyMakeBorder(input_image, pad_image, 0, bottom, 0, right, BORDER_CONSTANT, 0); + return pad_image; + } private: dnn::Net net; int inputW; int inputH; + int padW; + int padH; + const int divisor; + int topK; float scoreThreshold; float nmsThreshold; - int topK; - - std::vector priors; + const std::vector strides; }; #endif diff --git a/modules/objdetect/test/test_face.cpp b/modules/objdetect/test/test_face.cpp index d33032fa2f..e55401c061 100644 --- a/modules/objdetect/test/test_face.cpp +++ b/modules/objdetect/test/test_face.cpp @@ -65,20 +65,16 @@ TEST(Objdetect_face_detection, regression) { // Pre-set params float scoreThreshold = 0.7f; - float matchThreshold = 0.9f; - float l2disThreshold = 5.0f; + float matchThreshold = 0.7f; + float l2disThreshold = 15.0f; int numLM = 5; int numCoords = 4 + 2 * numLM; // Load ground truth labels std::map gt = blobFromTXT(findDataFile("dnn_face/detection/cascades_labels.txt"), numCoords); - // for (auto item: gt) - // { - // std::cout << item.first << " " << item.second.size() << std::endl; - // } // Initialize detector - std::string model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false); + std::string model = findDataFile("dnn/onnx/models/yunet-202303.onnx", false); Ptr faceDetector = FaceDetectorYN::create(model, "", Size(300, 300)); faceDetector->setScoreThreshold(0.7f); @@ -137,6 +133,7 @@ TEST(Objdetect_face_detection, regression) lmMatched[lmIdx] = true; } } + break; } EXPECT_TRUE(boxMatched) << "In image " << item.first << ", cannot match resBox " << resBox << " with any ground truth."; if (boxMatched) @@ -178,7 +175,7 @@ TEST(Objdetect_face_recognition, regression) } // Initialize detector - std::string detect_model = findDataFile("dnn/onnx/models/yunet-202202.onnx", false); + std::string detect_model = findDataFile("dnn/onnx/models/yunet-202303.onnx", false); Ptr faceDetector = FaceDetectorYN::create(detect_model, "", Size(150, 150), score_thresh, nms_thresh); std::string recog_model = findDataFile("dnn/onnx/models/face_recognizer_fast.onnx", false); From a60408cda5c73948401bdae4af696a003e64f808 Mon Sep 17 00:00:00 2001 From: "ippei.i" Date: Tue, 21 Mar 2023 20:29:24 +0900 Subject: [PATCH 060/199] Merge pull request #23300 from ippei-i:CAP_PROP_AUTO_WB-and-CAP_PROP_WHITE_BALANCE_BLUE_U_support_in_CAP_DSHOW Support VideoCapture CAP_PROP_AUTO_WB and CV_CAP_PROP_WHITE_BALANCE_BLUE_U for DShow ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [OK] I agree to contribute to the project under Apache 2 License. - [OK] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [OK] The PR is proposed to the proper branch - [OK] There is a reference to the original bug report and related work https://github.com/opencv/opencv/issues/19621 https://github.com/opencv/opencv/issues/21408 ### Before apply this pull request console output. before AWB setting CAP_PROP_WHITE_BALANCE_BLUE_U: 2000 CAP_PROP_AUTO_WB: -1 after AWB disable setting CAP_PROP_WHITE_BALANCE_BLUE_U: 2000 CAP_PROP_AUTO_WB: -1 after AWB enable setting CAP_PROP_WHITE_BALANCE_BLUE_U: 2000 CAP_PROP_AUTO_WB: -1 after Manual WB(and Disable AWB) setting CAP_PROP_WHITE_BALANCE_BLUE_U: 2000 CAP_PROP_AUTO_WB: -1 ### After apply this pull request console output. before AWB setting CAP_PROP_WHITE_BALANCE_BLUE_U: 2000 CAP_PROP_AUTO_WB: 0 after AWB disable setting CAP_PROP_WHITE_BALANCE_BLUE_U: 4000 CAP_PROP_AUTO_WB: 0 after AWB enable setting CAP_PROP_WHITE_BALANCE_BLUE_U: 4000 CAP_PROP_AUTO_WB: 1 after Manual WB(and Disable AWB) setting CAP_PROP_WHITE_BALANCE_BLUE_U: 2000 CAP_PROP_AUTO_WB: 0 ### Test Code [OpenCvVideoCapTest.zip](https://github.com/opencv/opencv/files/10825399/OpenCvVideoCapTest.zip) --- modules/videoio/src/cap_dshow.cpp | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/modules/videoio/src/cap_dshow.cpp b/modules/videoio/src/cap_dshow.cpp index 3d7013d48f..bf3e716520 100644 --- a/modules/videoio/src/cap_dshow.cpp +++ b/modules/videoio/src/cap_dshow.cpp @@ -1859,7 +1859,7 @@ bool videoInput::setVideoSettingFilter(int deviceID, long Property, long lValue, DebugPrintOut("Current value: %ld Flags %ld (%s)\n", CurrVal, CapsFlags, (CapsFlags == 1 ? "Auto" : (CapsFlags == 2 ? "Manual" : "Unknown"))); if (useDefaultValue) { - hr = pAMVideoProcAmp->Set(Property, Default, VideoProcAmp_Flags_Auto); + hr = pAMVideoProcAmp->Set(Property, Default, Flags); } else{ // Perhaps add a check that lValue and Flags are within the range acquired from GetRange above @@ -2391,6 +2391,9 @@ int videoInput::getVideoPropertyFromCV(int cv_property){ case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: return VideoProcAmp_WhiteBalance; + case cv::VideoCaptureProperties::CAP_PROP_AUTO_WB: + return VideoProcAmp_WhiteBalance; + case CV_CAP_PROP_BACKLIGHT: return VideoProcAmp_BacklightCompensation; @@ -3397,6 +3400,11 @@ double VideoCapture_DShow::getProperty(int propIdx) const return (double)current_value; break; + case cv::VideoCaptureProperties::CAP_PROP_AUTO_WB: + if (g_VI.getVideoSettingFilter(m_index, g_VI.getVideoPropertyFromCV(propIdx), min_value, max_value, stepping_delta, current_value, flags, defaultValue)) + return (double)flags == CameraControl_Flags_Auto ? 1.0 : 0.0; + break; + // camera properties case CV_CAP_PROP_PAN: case CV_CAP_PROP_TILT: @@ -3539,6 +3547,24 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal) return true; } + // set the same as setVideoSettingFilter default arguments. + long flags = 0L; + bool useDefaultValue = false; + switch (propIdx) + { + case cv::VideoCaptureProperties::CAP_PROP_AUTO_WB: + case CV_CAP_PROP_AUTO_EXPOSURE: + useDefaultValue = true; + if (cvRound(propVal) == 1) + flags = VideoProcAmp_Flags_Auto; + else + flags = VideoProcAmp_Flags_Manual; + break; + case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: + flags = VideoProcAmp_Flags_Manual; + break; + } + //video Filter properties switch (propIdx) { @@ -3550,9 +3576,10 @@ bool VideoCapture_DShow::setProperty(int propIdx, double propVal) case CV_CAP_PROP_GAMMA: case CV_CAP_PROP_MONOCHROME: case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: + case cv::VideoCaptureProperties::CAP_PROP_AUTO_WB: case CV_CAP_PROP_BACKLIGHT: case CV_CAP_PROP_GAIN: - return g_VI.setVideoSettingFilter(m_index, g_VI.getVideoPropertyFromCV(propIdx), (long)propVal); + return g_VI.setVideoSettingFilter(m_index, g_VI.getVideoPropertyFromCV(propIdx), (long)propVal, flags, useDefaultValue); } //camera properties From 8483f2ef2f948d68f18c430816abafab917fc636 Mon Sep 17 00:00:00 2001 From: Zero-nnkn Date: Mon, 20 Mar 2023 23:08:53 +0700 Subject: [PATCH 061/199] Fix error of `POSE_PAIRS` in pose estimation doc --- doc/js_tutorials/js_assets/js_pose_estimation.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/js_tutorials/js_assets/js_pose_estimation.html b/doc/js_tutorials/js_assets/js_pose_estimation.html index 19c64663d1..dbdb26fdbc 100644 --- a/doc/js_tutorials/js_assets/js_pose_estimation.html +++ b/doc/js_tutorials/js_assets/js_pose_estimation.html @@ -147,7 +147,7 @@ if (dataset === 'COCO') { ["Neck", "LShoulder"], ["RShoulder", "RElbow"], ["RElbow", "RWrist"], ["LShoulder", "LElbow"], ["LElbow", "LWrist"], ["Nose", "REye"], - ["REye", "REar"], ["Neck", "LEye"], + ["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"], ["Neck", "MidHip"], ["MidHip", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["RAnkle", "RBigToe"], From 5df6b4a756dcac3fc162a25ab75f13376417856b Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 21 Mar 2023 14:50:53 +0300 Subject: [PATCH 062/199] Merge pull request #23325 from dkurt:dnn_input_info Propagate inputs info for ONNX and TFLite models ### Pull Request Readiness Checklist Needed for generic applications such as benchmarking pipelines. So OpenCV can tell about the default input shapes specified in the models. See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/dnn/src/net_impl.cpp | 4 ++-- modules/dnn/src/onnx/onnx_importer.cpp | 5 +++++ modules/dnn/src/tflite/tflite_importer.cpp | 19 ++++++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 25 ++++++++++++++++++++++ modules/dnn/test/test_tflite_importer.cpp | 14 ++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp index dc0c53191f..775016b3b7 100644 --- a/modules/dnn/src/net_impl.cpp +++ b/modules/dnn/src/net_impl.cpp @@ -1400,6 +1400,7 @@ void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor Mat blob_ = blob.getMat(); // can't use InputArray directly due MatExpr stuff MatShape blobShape = shape(blob_); +#if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 if (pin.lid == 0) { CV_Assert(!netInputLayer.empty()); @@ -1411,7 +1412,6 @@ void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor if (!inputShapeLimitation.empty()) { CV_CheckEQ(inputShapeLimitation.size(), blobShape.size(), ""); -#if 0 // TODO: DNNTestNetwork.MobileNet_SSD_Caffe_Different_Width_Height/0 const size_t dims = inputShapeLimitation.size(); for (size_t dim = 0; dim < dims; dim++) { @@ -1419,10 +1419,10 @@ void Net::Impl::setInput(InputArray blob, const String& name, double scalefactor continue; // don't limit batch CV_CheckEQ(inputShapeLimitation[dim], blobShape[dim], ""); } -#endif } } } +#endif LayerData& ld = layers[pin.lid]; const int numInputs = std::max(pin.oid + 1, (int)ld.requiredOutputs.size()); diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 307a05ef4b..027326c69e 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -891,6 +891,11 @@ void ONNXImporter::populateNet() } dstNet.setInputsNames(netInputs); + if (!hasDynamicShapes) + { + for (int i = 0; i < netInputs.size(); ++i) + dstNet.setInputShape(netInputs[i], outShapes[netInputs[i]]); + } // dump outputs for (int i = 0; i < graph_proto.output_size(); ++i) diff --git a/modules/dnn/src/tflite/tflite_importer.cpp b/modules/dnn/src/tflite/tflite_importer.cpp index d556bf6f6f..cc09ec14eb 100644 --- a/modules/dnn/src/tflite/tflite_importer.cpp +++ b/modules/dnn/src/tflite/tflite_importer.cpp @@ -163,6 +163,8 @@ void TFLiteImporter::populateNet() CV_Assert(modelTensors); layouts.resize(modelTensors->size(), DATA_LAYOUT_UNKNOWN); size_t subgraph_inputs_size = subgraph_inputs->size(); + std::vector inputsNames(subgraph_inputs_size); + std::vector inputsShapes(subgraph_inputs_size); for (size_t i = 0; i < subgraph_inputs_size; ++i) { int idx = subgraph_inputs->Get(i); @@ -171,7 +173,24 @@ void TFLiteImporter::populateNet() if (!tensor) CV_Error(Error::StsError, cv::format("DNN/TFLite: subgraph input %d (%d) is NULL", (int)i, idx)); layouts[idx] = estimateLayout(*tensor); + + // Keep info about origin inputs names and shapes + inputsNames[i] = tensor->name()->str(); + std::vector shape(tensor->shape()->begin(), tensor->shape()->end()); + if (layouts[idx] == DATA_LAYOUT_NHWC) { + CV_CheckEQ(shape.size(), (size_t)4, ""); + std::swap(shape[2], shape[3]); + std::swap(shape[1], shape[2]); + } + inputsShapes[i] = shape; } + + dstNet.setInputsNames(inputsNames); + for (size_t i = 0; i < subgraph_inputs_size; ++i) + { + dstNet.setInputShape(inputsNames[i], inputsShapes[i]); + } + const auto& all_operators = *subgraph_operators; const size_t all_operators_size = all_operators.size(); for (size_t op_idx = 0; op_idx < all_operators_size; ++op_idx) diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 6698174521..b5a97770b1 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -30,6 +30,27 @@ public: pb }; + void testInputShapes(const Net& net, const std::vector& inps) + { + std::vector inLayerShapes; + std::vector outLayerShapes; + net.getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes); + ASSERT_EQ(inLayerShapes.size(), inps.size()); + + for (int i = 0; i < inps.size(); ++i) { + bool hasDynamicShapes = inLayerShapes[i].empty(); + if (hasDynamicShapes) + continue; + if (inLayerShapes[i].size() == 1) { // 1D input + ASSERT_EQ(shape(inLayerShapes[i][0], 1), shape(inps[i])); + } else { + // Compare all axes except batch dimension which is variable. + inLayerShapes[i][0] = inps[i].size[0]; + ASSERT_EQ(inLayerShapes[i], shape(inps[i])); + } + } + } + void testONNXModels(const String& basename, const Extension ext = npy, const double l1 = 0, const float lInf = 0, const bool useSoftmax = false, bool checkNoFallbacks = true, int numInps = 1) @@ -54,6 +75,8 @@ public: Net net = readNetFromONNX(onnxmodel); ASSERT_FALSE(net.empty()); + testInputShapes(net, inps); + net.setPreferableBackend(backend); net.setPreferableTarget(target); @@ -2315,6 +2338,8 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics) lInf = 0.06; } + testInputShapes(net, {input0}); + checkBackend(&input0, &ref0); net.setInput(input0); Mat out = net.forward().clone(); diff --git a/modules/dnn/test/test_tflite_importer.cpp b/modules/dnn/test/test_tflite_importer.cpp index bffdaa5b03..bce826b96f 100644 --- a/modules/dnn/test/test_tflite_importer.cpp +++ b/modules/dnn/test/test_tflite_importer.cpp @@ -11,6 +11,7 @@ Test for TFLite models loading #include // CV_DNN_REGISTER_LAYER_CLASS #include +#include #ifdef OPENCV_TEST_DNN_TFLITE @@ -19,9 +20,21 @@ namespace opencv_test { namespace { using namespace cv; using namespace cv::dnn; +void testInputShapes(const Net& net, const std::vector& inps) { + std::vector inLayerShapes; + std::vector outLayerShapes; + net.getLayerShapes(MatShape(), 0, inLayerShapes, outLayerShapes); + ASSERT_EQ(inLayerShapes.size(), inps.size()); + + for (int i = 0; i < inps.size(); ++i) { + ASSERT_EQ(inLayerShapes[i], shape(inps[i])); + } +} + void testModel(const std::string& modelName, const Mat& input, double l1 = 1e-5, double lInf = 1e-4) { Net net = readNet(findDataFile("dnn/tflite/" + modelName + ".tflite", false)); + testInputShapes(net, {input}); net.setInput(input); std::vector outNames = net.getUnconnectedOutLayersNames(); @@ -72,6 +85,7 @@ TEST(Test_TFLite, max_unpooling) cvtColor(input, input, COLOR_BGR2RGBA); input = input.mul(Scalar(1, 1, 1, 0)); input = blobFromImage(input, 1.0 / 255); + testInputShapes(net, {input}); net.setInput(input); std::vector > outs; From 42793e16ddded7315fd00c23b17071361748f963 Mon Sep 17 00:00:00 2001 From: Raj Kachhadiya <110079774+kachhadiyaraj15@users.noreply.github.com> Date: Fri, 20 Jan 2023 18:47:46 +0530 Subject: [PATCH 063/199] Update py_intro.markdown --- doc/py_tutorials/py_setup/py_intro/py_intro.markdown | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/py_tutorials/py_setup/py_intro/py_intro.markdown b/doc/py_tutorials/py_setup/py_intro/py_intro.markdown index b013ef014e..b7a600afc1 100644 --- a/doc/py_tutorials/py_setup/py_intro/py_intro.markdown +++ b/doc/py_tutorials/py_setup/py_intro/py_intro.markdown @@ -79,8 +79,8 @@ Below is the list of contributors who submitted tutorials to OpenCV-Python. Additional Resources -------------------- --# A Quick guide to Python - [A Byte of Python](http://swaroopch.com/notes/python/) -2. [NumPy Quickstart tutorial](https://numpy.org/devdocs/user/quickstart.html) -3. [NumPy Reference](https://numpy.org/devdocs/reference/index.html#reference) -4. [OpenCV Documentation](http://docs.opencv.org/) +1. [A Quick guide to Python](https://www.freecodecamp.org/news/the-python-guide-for-beginners/) +2. [NumPy Quickstart tutorial](https://numpy.org/doc/stable/user/quickstart.html) +3. [NumPy Reference](https://numpy.org/doc/stable/reference/index.html) +4. [OpenCV Documentation](https://docs.opencv.org/4.x/index.html) 5. [OpenCV Forum](https://forum.opencv.org/) From 5c5ef9746c9c9ce3fa3306f1d8d4eb038c2c1130 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 21 Mar 2023 15:32:21 +0300 Subject: [PATCH 064/199] Presume original book, update references. --- doc/py_tutorials/py_setup/py_intro/py_intro.markdown | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/py_tutorials/py_setup/py_intro/py_intro.markdown b/doc/py_tutorials/py_setup/py_intro/py_intro.markdown index b7a600afc1..0108462f95 100644 --- a/doc/py_tutorials/py_setup/py_intro/py_intro.markdown +++ b/doc/py_tutorials/py_setup/py_intro/py_intro.markdown @@ -79,8 +79,9 @@ Below is the list of contributors who submitted tutorials to OpenCV-Python. Additional Resources -------------------- +-# A Quick guide to Python - [A Byte of Python](https://python.swaroopch.com/) 1. [A Quick guide to Python](https://www.freecodecamp.org/news/the-python-guide-for-beginners/) 2. [NumPy Quickstart tutorial](https://numpy.org/doc/stable/user/quickstart.html) 3. [NumPy Reference](https://numpy.org/doc/stable/reference/index.html) -4. [OpenCV Documentation](https://docs.opencv.org/4.x/index.html) +4. [OpenCV Documentation](https://docs.opencv.org/) 5. [OpenCV Forum](https://forum.opencv.org/) From 6033599c88f7e100338002a15a080cd54ab0d92e Mon Sep 17 00:00:00 2001 From: Simon Lynen Date: Wed, 22 Mar 2023 04:12:51 +0100 Subject: [PATCH 065/199] Make LineSegmentDetector deterministic by using stable_sort for ordering keypoints prior to region growing This makes LineSegmentDetector deterministic by using stable_sort for ordering points by norm. Without this change the region growing in LSD is non-determinstic and thus the returned lines are changing between invocations. This is a replacement for https://github.com/opencv/opencv/pull/23370 --- modules/imgproc/src/lsd.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/lsd.cpp b/modules/imgproc/src/lsd.cpp index 2f13c5a8a0..8d26a016ab 100644 --- a/modules/imgproc/src/lsd.cpp +++ b/modules/imgproc/src/lsd.cpp @@ -592,8 +592,8 @@ void LineSegmentDetectorImpl::ll_angle(const double& threshold, } } - // Sort - std::sort(ordered_points.begin(), ordered_points.end(), compare_norm); + // Use stable sort to ensure deterministic region growing and thus overall LSD result determinism. + std::stable_sort(ordered_points.begin(), ordered_points.end(), compare_norm); } void LineSegmentDetectorImpl::region_grow(const Point2i& s, std::vector& reg, From c9e42c50504bcf11debe132041834f9f449ea5b2 Mon Sep 17 00:00:00 2001 From: Christian Henkel <6976069+ct2034@users.noreply.github.com> Date: Mon, 20 Mar 2023 22:26:05 +0100 Subject: [PATCH 066/199] two typos --- modules/core/src/hal_replacement.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 6ed795b5e1..25acff662c 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -531,7 +531,7 @@ inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME /** @param context double pointer to context storing all necessary data @param width,height image dimensions -@param depth image type (CV_32F or CV64F) +@param depth image type (CV_32F or CV_64F) @param src_channels number of channels in input image @param dst_channels number of channels in output image @param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...) @@ -558,7 +558,7 @@ inline int hal_ni_dftFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME /** @param context double pointer to context storing all necessary data @param width,height image dimensions -@param depth image type (CV_32F or CV64F) +@param depth image type (CV_32F or CV_64F) @param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...) */ inline int hal_ni_dctInit2D(cvhalDFT **context, int width, int height, int depth, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } From ea7efd57d890f73e00ff2130baf303d81964adb9 Mon Sep 17 00:00:00 2001 From: tkram01 Date: Wed, 22 Mar 2023 09:50:58 +0300 Subject: [PATCH 067/199] Fix for using sampleIdx to limit training data --- modules/ml/src/data.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ml/src/data.cpp b/modules/ml/src/data.cpp index a5dd101f1d..d8b41ed91c 100644 --- a/modules/ml/src/data.cpp +++ b/modules/ml/src/data.cpp @@ -904,7 +904,7 @@ public: if( s ) { j = s[i]; - CV_Assert( 0 <= j && j < nsamples ); + CV_Assert( 0 <= j && j < ((layout == ROW_SAMPLE) ? samples.rows : samples.cols) ); } values[i] = src[j*sstep]; if( values[i] == MISSED_VAL ) From 6ffe686ba884b0ed4e9ed7ef14b0790050c82f82 Mon Sep 17 00:00:00 2001 From: Sergey Petrenko Date: Wed, 22 Mar 2023 10:24:22 +0300 Subject: [PATCH 068/199] check keydown event characters length before returning the pressed character code --- modules/highgui/src/window_cocoa.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index 1cf55f9397..2c05b10be5 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -618,7 +618,7 @@ CV_IMPL int cvWaitKey (int maxWait) inMode:NSDefaultRunLoopMode dequeue:YES]; - if([event type] == NSKeyDown) { + if([event type] == NSKeyDown && [[event characters] length]) { returnCode = [[event characters] characterAtIndex:0]; break; } From d3cc507380d24e57148e4457bc59c2d267eae67e Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 23 Mar 2023 16:58:22 +0300 Subject: [PATCH 069/199] Added reference to Media Foundation. --- modules/videoio/include/opencv2/videoio.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index 3c0f8cda8a..1c3f0f5eb0 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -87,7 +87,10 @@ To be used in the VideoCapture::VideoCapture() constructor or VideoCapture::open @note - Backends are available only if they have been built with your OpenCV binaries. See @ref videoio_overview for more information. -- For CAP_MSMF setting environment flag "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS" to 0 may improve speed initialization +- Microsoft Media Foundation backend tries to use hardware accelerated transformations +if possible. Environment flag "OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS" set to 0 +disables it and may improve initialization time. More details: +https://learn.microsoft.com/en-us/windows/win32/medfound/mf-readwrite-enable-hardware-transforms */ enum VideoCaptureAPIs { CAP_ANY = 0, //!< Auto detect == 0 @@ -109,7 +112,7 @@ enum VideoCaptureAPIs { CAP_XIAPI = 1100, //!< XIMEA Camera API CAP_AVFOUNDATION = 1200, //!< AVFoundation framework for iOS (OS X Lion will have the same API) CAP_GIGANETIX = 1300, //!< Smartek Giganetix GigEVisionSDK - CAP_MSMF = 1400, //!< Microsoft Media Foundation (via videoInput) read note above + CAP_MSMF = 1400, //!< Microsoft Media Foundation (via videoInput). See platform specific notes above. CAP_WINRT = 1410, //!< Microsoft Windows Runtime using Media Foundation CAP_INTELPERC = 1500, //!< RealSense (former Intel Perceptual Computing SDK) CAP_REALSENSE = 1500, //!< Synonym for CAP_INTELPERC From 02bdc1006267fb2b0dd2aad29dabbeacdb3edce8 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 24 Mar 2023 11:52:05 +0300 Subject: [PATCH 070/199] fix assert, add test --- .../misc/python/test/test_objdetect_aruco.py | 11 +++++++++++ modules/objdetect/src/aruco/aruco_board.cpp | 1 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/objdetect/misc/python/test/test_objdetect_aruco.py b/modules/objdetect/misc/python/test/test_objdetect_aruco.py index 97d4fcb821..c72691d003 100644 --- a/modules/objdetect/misc/python/test/test_objdetect_aruco.py +++ b/modules/objdetect/misc/python/test/test_objdetect_aruco.py @@ -11,6 +11,17 @@ from tests_common import NewOpenCVTests class aruco_objdetect_test(NewOpenCVTests): + def test_board(self): + p1 = np.array([[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32) + p2 = np.array([[1, 0, 0], [1, 1, 0], [2, 1, 0], [2, 0, 0]], dtype=np.float32) + objPoints = np.array([p1, p2]) + dictionary = cv.aruco.getPredefinedDictionary(cv.aruco.DICT_4X4_50) + ids = np.array([0, 1]) + + board = cv.aruco.Board(objPoints, dictionary, ids) + np.testing.assert_array_equal(board.getIds().squeeze(), ids) + np.testing.assert_array_equal(np.ravel(np.array(board.getObjPoints())), np.ravel(np.concatenate([p1, p2]))) + def test_idsAccessibility(self): ids = np.arange(17) diff --git a/modules/objdetect/src/aruco/aruco_board.cpp b/modules/objdetect/src/aruco/aruco_board.cpp index 370d50dd29..1f582e06a9 100644 --- a/modules/objdetect/src/aruco/aruco_board.cpp +++ b/modules/objdetect/src/aruco/aruco_board.cpp @@ -157,7 +157,6 @@ Board::Board(): Board::Board(InputArrayOfArrays objPoints, const Dictionary &dictionary, InputArray ids): Board(new Board::Impl(dictionary)) { - CV_Assert(ids.size() == objPoints.size()); CV_Assert(objPoints.total() == ids.total()); CV_Assert(objPoints.type() == CV_32FC3 || objPoints.type() == CV_32FC1); From 0bb84096a2a80472d644b31e5d7b1f8b5a2b9a1a Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Sun, 4 Sep 2022 18:06:37 +0300 Subject: [PATCH 071/199] Fix tolerance for Preproc4lpiTest set --- modules/gapi/test/gapi_fluid_resize_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/gapi/test/gapi_fluid_resize_test.cpp b/modules/gapi/test/gapi_fluid_resize_test.cpp index 0ec00c8e0b..79fb416803 100644 --- a/modules/gapi/test/gapi_fluid_resize_test.cpp +++ b/modules/gapi/test/gapi_fluid_resize_test.cpp @@ -829,7 +829,11 @@ TEST_P(Preproc4lpiTest, Test) cv::cvtColor(in_mat, rgb_mat, cv::COLOR_YUV2RGB_NV12); cv::resize(rgb_mat, out_mat_ocv, out_sz, 0, 0, interp); +#if defined(__arm__) || defined(__aarch64__) + EXPECT_GE(2, cvtest::norm(out_mat(roi), out_mat_ocv(roi), NORM_INF)); +#else EXPECT_EQ(0, cvtest::norm(out_mat(roi), out_mat_ocv(roi), NORM_INF)); +#endif } INSTANTIATE_TEST_CASE_P(Fluid, Preproc4lpiTest, From ee302b063fad142c743fa8ddb5d68bea6c485a63 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 24 Mar 2023 14:03:14 +0100 Subject: [PATCH 072/199] Typo in enum cv::QuatEnum::EulerAnglesType --- modules/core/include/opencv2/core/quaternion.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/core/include/opencv2/core/quaternion.hpp b/modules/core/include/opencv2/core/quaternion.hpp index 8c21501e3f..9e3e44332f 100644 --- a/modules/core/include/opencv2/core/quaternion.hpp +++ b/modules/core/include/opencv2/core/quaternion.hpp @@ -77,9 +77,9 @@ public: * For intrinsic rotations in the order of X-Y-Z, the rotation matrix R can be calculated by:\f[R =X(\theta_1) Y(\theta_2) Z(\theta_3) \f] * For extrinsic rotations in the order of X-Y-Z, the rotation matrix R can be calculated by:\f[R =Z({\theta_3}) Y({\theta_2}) X({\theta_1})\f] * where - * \f[X({\theta})={\begin{bmatrix}1&0&0\\0&\cos {\theta_1} &-\sin {\theta_1} \\0&\sin {\theta_1} &\cos {\theta_1} \\\end{bmatrix}}, - * Y({\theta})={\begin{bmatrix}\cos \theta_{2}&0&\sin \theta_{2}\\0&1 &0 \\\ -sin \theta_2& 0&\cos \theta_{2} \\\end{bmatrix}}, - * Z({\theta})={\begin{bmatrix}\cos\theta_{3} &-\sin \theta_3&0\\\sin \theta_3 &\cos \theta_3 &0\\0&0&1\\\end{bmatrix}}. + * \f[X({\theta_1})={\begin{bmatrix}1&0&0\\0&\cos {\theta_1} &-\sin {\theta_1} \\0&\sin {\theta_1} &\cos {\theta_1} \\\end{bmatrix}}, + * Y({\theta_2})={\begin{bmatrix}\cos \theta_{2}&0&\sin \theta_{2}\\0&1 &0 \\\ -sin \theta_2& 0&\cos \theta_{2} \\\end{bmatrix}}, + * Z({\theta_3})={\begin{bmatrix}\cos\theta_{3} &-\sin \theta_3&0\\\sin \theta_3 &\cos \theta_3 &0\\0&0&1\\\end{bmatrix}}. * \f] * * The function is designed according to this set of conventions: From 83a49b4f6a74276d43d54e8b0d7cb1799a823adf Mon Sep 17 00:00:00 2001 From: Kumataro Date: Sun, 26 Mar 2023 09:03:16 +0900 Subject: [PATCH 073/199] imgcodecs: update documentation for imwrite() to support images formats. --- .../imgcodecs/include/opencv2/imgcodecs.hpp | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index c4b570e68c..2eb5e596fc 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -218,17 +218,26 @@ CV_EXPORTS_W bool imreadmulti(const String& filename, CV_OUT std::vector& m /** @brief Saves an image to a specified file. The function imwrite saves the image to the specified file. The image format is chosen based on the -filename extension (see cv::imread for the list of extensions). In general, only 8-bit +filename extension (see cv::imread for the list of extensions). In general, only 8-bit unsigned (CV_8U) single-channel or 3-channel (with 'BGR' channel order) images can be saved using this function, with these exceptions: -- 16-bit unsigned (CV_16U) images can be saved in the case of PNG, JPEG 2000, and TIFF formats -- 32-bit float (CV_32F) images can be saved in TIFF, OpenEXR, and Radiance HDR formats; 3-channel -(CV_32FC3) TIFF images will be saved using the LogLuv high dynamic range encoding (4 bytes per pixel) -- PNG images with an alpha channel can be saved using this function. To do this, create -8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels -should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below). -- Multiple images (vector of Mat) can be saved in TIFF format (see the code sample below). +- With OpenEXR encoder, only 32-bit float (CV_32F) images can be saved. + - 8-bit unsigned (CV_8U) images are not supported. +- With Radiance HDR encoder, non 64-bit float (CV_64F) images can be saved. + - All images will be converted to 32-bit float (CV_32F). +- With JPEG 2000 encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved. +- With PAM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved. +- With PNG encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved. + - PNG images with an alpha channel can be saved using this function. To do this, create + 8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels + should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below). +- With PGM/PPM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved. +- With TIFF encoder, 8-bit unsigned (CV_8U), 16-bit unsigned (CV_16U), + 32-bit float (CV_32F) and 64-bit float (CV_64F) images can be saved. + - Multiple images (vector of Mat) can be saved in TIFF format (see the code sample below). + - 32-bit float 3-channel (CV_32FC3) TIFF images will be saved + using the LogLuv high dynamic range encoding (4 bytes per pixel) If the image format is not supported, the image will be converted to 8-bit unsigned (CV_8U) and saved that way. From 1c6c3dfa8d63bdb4758763cbc2ec91872ddd8612 Mon Sep 17 00:00:00 2001 From: Kumataro Date: Sun, 26 Mar 2023 18:33:54 +0900 Subject: [PATCH 074/199] remove tail whitespace --- modules/imgcodecs/include/opencv2/imgcodecs.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgcodecs/include/opencv2/imgcodecs.hpp b/modules/imgcodecs/include/opencv2/imgcodecs.hpp index 2eb5e596fc..9ae0c3a807 100644 --- a/modules/imgcodecs/include/opencv2/imgcodecs.hpp +++ b/modules/imgcodecs/include/opencv2/imgcodecs.hpp @@ -233,7 +233,7 @@ can be saved using this function, with these exceptions: 8-bit (or 16-bit) 4-channel image BGRA, where the alpha channel goes last. Fully transparent pixels should have alpha set to 0, fully opaque pixels should have alpha set to 255/65535 (see the code sample below). - With PGM/PPM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved. -- With TIFF encoder, 8-bit unsigned (CV_8U), 16-bit unsigned (CV_16U), +- With TIFF encoder, 8-bit unsigned (CV_8U), 16-bit unsigned (CV_16U), 32-bit float (CV_32F) and 64-bit float (CV_64F) images can be saved. - Multiple images (vector of Mat) can be saved in TIFF format (see the code sample below). - 32-bit float 3-channel (CV_32FC3) TIFF images will be saved From a809ae4e88c96f07d70a1642ca6d7d2c71cca8ec Mon Sep 17 00:00:00 2001 From: HAN Liutong Date: Mon, 27 Mar 2023 21:30:47 +0800 Subject: [PATCH 075/199] Fix HAL compatibility layer and modify use cases. --- .../core/include/opencv2/core/hal/intrin.hpp | 237 +++++++++++++++--- .../src/layers/cpu_kernels/convolution.cpp | 123 +++++---- 2 files changed, 266 insertions(+), 94 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 207b8cab4e..ee8310b5c5 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -758,6 +758,36 @@ namespace CV__SIMD_NAMESPACE { #if CV_SIMD_64F OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64) #endif + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + // when we use CV_SIMD128 with 256/512 bit SIMD (e.g. AVX2 or AVX512) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x16) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x8) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x4) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x2) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x16) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x8) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x4) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x2) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x4) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x2) + #endif + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + // when we use CV_SIMD256 with 512 bit SIMD (e.g. AVX512) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint8x32) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint16x16) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint32x8) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_uint64x4) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int8x32) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int16x16) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int32x8) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_int64x4) + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float32x8) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_BIN_OP_ADDSUB(v_float64x4) + #endif + #endif #define OPENCV_HAL_WRAP_BIN_OP_LOGIC(_Tpvec) \ inline _Tpvec v_and(const _Tpvec& a, const _Tpvec& b) \ @@ -785,6 +815,26 @@ namespace CV__SIMD_NAMESPACE { OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16) OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32) OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64) + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8x16) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16x8) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32x4) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64x2) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8x16) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16x8) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32x4) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64x2) + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint8x32) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint16x16) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint32x8) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_uint64x4) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int8x32) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int16x16) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int32x8) + OPENCV_HAL_WRAP_BIN_OP_LOGIC(v_int64x4) + #endif #define OPENCV_HAL_WRAP_BIN_OP_MUL(_Tpvec) \ inline _Tpvec v_mul(const _Tpvec& a, const _Tpvec& b) \ @@ -805,17 +855,51 @@ namespace CV__SIMD_NAMESPACE { #if CV_SIMD_64F OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64) #endif + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x16) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x8) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x4) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x16) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x8) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x4) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x4) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x2) + #endif + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint8x32) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint16x16) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_uint32x8) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_int8x32) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_int16x16) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_int32x8) + OPENCV_HAL_WRAP_BIN_OP_MUL(v_float32x8) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_BIN_OP_MUL(v_float64x4) + #endif + #endif - - inline v_float32 v_div(const v_float32& a, const v_float32& b) \ + #define OPENCV_HAL_WRAP_BIN_OP_DIV(_Tpvec) \ + inline _Tpvec v_div(const _Tpvec& a, const _Tpvec& b) \ { \ return a / b; \ } + OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32) #if CV_SIMD_64F - inline v_float64 v_div(const v_float64& a, const v_float64& b) \ - { \ - return a / b; \ - } + OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64) + #endif + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32x4) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64x2) + #endif + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_BIN_OP_DIV(v_float32x8) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_BIN_OP_DIV(v_float64x4) + #endif #endif #define OPENCV_HAL_WRAP_CMP_OP(_Tpvec, intrin, op) \ @@ -844,44 +928,124 @@ namespace CV__SIMD_NAMESPACE { #if CV_SIMD_64F OPENCV_HAL_WRAP_CMP(v_float64) #endif + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_CMP(v_uint8x16) + OPENCV_HAL_WRAP_CMP(v_uint16x8) + OPENCV_HAL_WRAP_CMP(v_uint32x4) + OPENCV_HAL_WRAP_CMP(v_int8x16) + OPENCV_HAL_WRAP_CMP(v_int16x8) + OPENCV_HAL_WRAP_CMP(v_int32x4) + OPENCV_HAL_WRAP_CMP(v_float32x4) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_CMP(v_float64x2) + #endif + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_CMP(v_uint8x32) + OPENCV_HAL_WRAP_CMP(v_uint16x16) + OPENCV_HAL_WRAP_CMP(v_uint32x8) + OPENCV_HAL_WRAP_CMP(v_int8x32) + OPENCV_HAL_WRAP_CMP(v_int16x16) + OPENCV_HAL_WRAP_CMP(v_int32x8) + OPENCV_HAL_WRAP_CMP(v_float32x8) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_CMP(v_float64x4) + #endif + #endif //////////// get0 //////////// - #define OPENCV_HAL_WRAP_GRT0_INT(_Tpvec, _Tp) \ - inline _Tp v_get0(const v_##_Tpvec& v) \ + #define OPENCV_HAL_WRAP_GRT0(_Tpvec) \ + inline typename VTraits<_Tpvec>::lane_type v_get0(const _Tpvec& v) \ { \ return v.get0(); \ } - OPENCV_HAL_WRAP_GRT0_INT(uint8, uchar) - OPENCV_HAL_WRAP_GRT0_INT(int8, schar) - OPENCV_HAL_WRAP_GRT0_INT(uint16, ushort) - OPENCV_HAL_WRAP_GRT0_INT(int16, short) - OPENCV_HAL_WRAP_GRT0_INT(uint32, unsigned) - OPENCV_HAL_WRAP_GRT0_INT(int32, int) - OPENCV_HAL_WRAP_GRT0_INT(uint64, uint64) - OPENCV_HAL_WRAP_GRT0_INT(int64, int64) - OPENCV_HAL_WRAP_GRT0_INT(float32, float) + OPENCV_HAL_WRAP_GRT0(v_uint8) + OPENCV_HAL_WRAP_GRT0(v_int8) + OPENCV_HAL_WRAP_GRT0(v_uint16) + OPENCV_HAL_WRAP_GRT0(v_int16) + OPENCV_HAL_WRAP_GRT0(v_uint32) + OPENCV_HAL_WRAP_GRT0(v_int32) + OPENCV_HAL_WRAP_GRT0(v_uint64) + OPENCV_HAL_WRAP_GRT0(v_int64) + OPENCV_HAL_WRAP_GRT0(v_float32) #if CV_SIMD_64F - OPENCV_HAL_WRAP_GRT0_INT(float64, double) + OPENCV_HAL_WRAP_GRT0(v_float64) + #endif + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_GRT0(v_uint8x16) + OPENCV_HAL_WRAP_GRT0(v_uint16x8) + OPENCV_HAL_WRAP_GRT0(v_uint32x4) + OPENCV_HAL_WRAP_GRT0(v_uint64x2) + OPENCV_HAL_WRAP_GRT0(v_int8x16) + OPENCV_HAL_WRAP_GRT0(v_int16x8) + OPENCV_HAL_WRAP_GRT0(v_int32x4) + OPENCV_HAL_WRAP_GRT0(v_int64x2) + OPENCV_HAL_WRAP_GRT0(v_float32x4) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_GRT0(v_float64x2) + #endif + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_GRT0(v_uint8x32) + OPENCV_HAL_WRAP_GRT0(v_uint16x16) + OPENCV_HAL_WRAP_GRT0(v_uint32x8) + OPENCV_HAL_WRAP_GRT0(v_uint64x4) + OPENCV_HAL_WRAP_GRT0(v_int8x32) + OPENCV_HAL_WRAP_GRT0(v_int16x16) + OPENCV_HAL_WRAP_GRT0(v_int32x8) + OPENCV_HAL_WRAP_GRT0(v_int64x4) + OPENCV_HAL_WRAP_GRT0(v_float32x8) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_GRT0(v_float64x4) + #endif #endif - #define OPENCV_HAL_WRAP_EXTRACT(_Tpvec, _Tp, vl) \ - inline _Tp v_extract_highest(const _Tpvec& v) \ + #define OPENCV_HAL_WRAP_EXTRACT(_Tpvec) \ + inline typename VTraits<_Tpvec>::lane_type v_extract_highest(const _Tpvec& v) \ { \ - return v_extract_n(v); \ + return v_extract_n::nlanes-1>(v); \ } - OPENCV_HAL_WRAP_EXTRACT(v_uint8, uchar, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_int8, schar, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_uint16, ushort, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_int16, short, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_uint32, unsigned int, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_int32, int, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_uint64, uint64, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_int64, int64, VTraits::nlanes) - OPENCV_HAL_WRAP_EXTRACT(v_float32, float, VTraits::nlanes) + OPENCV_HAL_WRAP_EXTRACT(v_uint8) + OPENCV_HAL_WRAP_EXTRACT(v_int8) + OPENCV_HAL_WRAP_EXTRACT(v_uint16) + OPENCV_HAL_WRAP_EXTRACT(v_int16) + OPENCV_HAL_WRAP_EXTRACT(v_uint32) + OPENCV_HAL_WRAP_EXTRACT(v_int32) + OPENCV_HAL_WRAP_EXTRACT(v_uint64) + OPENCV_HAL_WRAP_EXTRACT(v_int64) + OPENCV_HAL_WRAP_EXTRACT(v_float32) #if CV_SIMD_64F - OPENCV_HAL_WRAP_EXTRACT(v_float64, double, VTraits::nlanes) + OPENCV_HAL_WRAP_EXTRACT(v_float64) + #endif + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_EXTRACT(v_uint8x16) + OPENCV_HAL_WRAP_EXTRACT(v_uint16x8) + OPENCV_HAL_WRAP_EXTRACT(v_uint32x4) + OPENCV_HAL_WRAP_EXTRACT(v_uint64x2) + OPENCV_HAL_WRAP_EXTRACT(v_int8x16) + OPENCV_HAL_WRAP_EXTRACT(v_int16x8) + OPENCV_HAL_WRAP_EXTRACT(v_int32x4) + OPENCV_HAL_WRAP_EXTRACT(v_int64x2) + OPENCV_HAL_WRAP_EXTRACT(v_float32x4) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_EXTRACT(v_float64x2) + #endif + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_EXTRACT(v_uint8x32) + OPENCV_HAL_WRAP_EXTRACT(v_uint16x16) + OPENCV_HAL_WRAP_EXTRACT(v_uint32x8) + OPENCV_HAL_WRAP_EXTRACT(v_uint64x4) + OPENCV_HAL_WRAP_EXTRACT(v_int8x32) + OPENCV_HAL_WRAP_EXTRACT(v_int16x16) + OPENCV_HAL_WRAP_EXTRACT(v_int32x8) + OPENCV_HAL_WRAP_EXTRACT(v_int64x4) + OPENCV_HAL_WRAP_EXTRACT(v_float32x8) + #if CV_SIMD_64F + OPENCV_HAL_WRAP_EXTRACT(v_float64x4) + #endif #endif #define OPENCV_HAL_WRAP_BROADCAST(_Tpvec) \ @@ -893,7 +1057,16 @@ namespace CV__SIMD_NAMESPACE { OPENCV_HAL_WRAP_BROADCAST(v_uint32) OPENCV_HAL_WRAP_BROADCAST(v_int32) OPENCV_HAL_WRAP_BROADCAST(v_float32) - + #if CV_SIMD_WIDTH != 16/*128*/ && CV_SIMD128 + OPENCV_HAL_WRAP_BROADCAST(v_uint32x4) + OPENCV_HAL_WRAP_BROADCAST(v_int32x4) + OPENCV_HAL_WRAP_BROADCAST(v_float32x4) + #endif + #if CV_SIMD_WIDTH != 32/*256*/ && CV_SIMD256 + OPENCV_HAL_WRAP_BROADCAST(v_uint32x8) + OPENCV_HAL_WRAP_BROADCAST(v_int32x8) + OPENCV_HAL_WRAP_BROADCAST(v_float32x8) + #endif #endif //!CV_SIMD_SCALABLE diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.cpp b/modules/dnn/src/layers/cpu_kernels/convolution.cpp index 6b0f9c865e..be1f99852b 100644 --- a/modules/dnn/src/layers/cpu_kernels/convolution.cpp +++ b/modules/dnn/src/layers/cpu_kernels/convolution.cpp @@ -1028,11 +1028,10 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co { for (; j + 7 < out_width; j += 8) { - v_float32x4 v0 = v_load(cptr + j) + vbias; - v_float32x4 v1 = v_load(cptr + j + 4) + vbias; - - v0 += v_load(pbptr + j); - v1 += v_load(pbptr + j + 4); + v_float32x4 v0 = v_add(v_load(cptr + j), vbias); + v_float32x4 v1 = v_add(v_load(cptr + j + 4), vbias); + v0 = v_add(v0, v_load(pbptr + j)); + v1 = v_add(v1, v_load(pbptr + j + 4)); if (ifMinMaxAct) { @@ -1048,8 +1047,8 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr& co { for (; j + 7 < out_width; j += 8) { - v_float32x4 v0 = v_load(cptr + j) + vbias; - v_float32x4 v1 = v_load(cptr + j + 4) + vbias; + v_float32x4 v0 = v_add(v_load(cptr + j), vbias); + v_float32x4 v1 = v_add(v_load(cptr + j + 4), vbias); if (ifMinMaxAct) { @@ -1154,13 +1153,13 @@ static void convBlockMR1x28(int np, const float* a, const float* b, float *c, co if (init_c) { - c0 += v_load(c); - c1 += v_load(c + 4); - c2 += v_load(c + 8); - c3 += v_load(c + 12); - c4 += v_load(c + 16); - c5 += v_load(c + 20); - c6 += v_load(c + 24); + c0 = v_add(c0, v_load(c)); + c1 = v_add(c1, v_load(c + 4)); + c2 = v_add(c2, v_load(c + 8)); + c3 = v_add(c3, v_load(c + 12)); + c4 = v_add(c4, v_load(c + 16)); + c5 = v_add(c5, v_load(c + 20)); + c6 = v_add(c6, v_load(c + 24)); } if (ifMinMaxAct) @@ -1207,12 +1206,12 @@ static void convBlockMR1x24(int np, const float* a, const float* b, float *c, co if (init_c) { - c0 += v_load(c); - c1 += v_load(c + 4); - c2 += v_load(c + 8); - c3 += v_load(c + 12); - c4 += v_load(c + 16); - c5 += v_load(c + 20); + c0 = v_add(c0, v_load(c)); + c1 = v_add(c1, v_load(c + 4)); + c2 = v_add(c2, v_load(c + 8)); + c3 = v_add(c3, v_load(c + 12)); + c4 = v_add(c4, v_load(c + 16)); + c5 = v_add(c5, v_load(c + 20)); } if (ifMinMaxAct) @@ -1251,9 +1250,9 @@ static void convBlockMR1x12(int np, const float* a, const float* b, float *c, co if (init_c) { - c0 += v_load(c); - c1 += v_load(c + 4); - c2 += v_load(c + 8); + c0 = v_add(c0, v_load(c)); + c1 = v_add(c1, v_load(c + 4)); + c2 = v_add(c2, v_load(c + 8)); } if (ifMinMaxAct) @@ -1343,33 +1342,33 @@ static void convBlock4x24(int np, const float* a, const float* b, float* c, int if (!init_c) { - c0 += v_load(c); - c1 += v_load(c + 4); - c2 += v_load(c + 8); - c3 += v_load(c + 12); - c4 += v_load(c + 16); - c5 += v_load(c + 20); - - c6 += v_load(c + ldc); - c7 += v_load(c + ldc + 4); - c8 += v_load(c + ldc + 8); - c9 += v_load(c + ldc + 12); - c10 += v_load(c + ldc + 16); - c11 += v_load(c + ldc + 20); - - c12 += v_load(c + ldc*2); - c13 += v_load(c + ldc*2 + 4); - c14 += v_load(c + ldc*2 + 8); - c15 += v_load(c + ldc*2 + 12); - c16 += v_load(c + ldc*2 + 16); - c17 += v_load(c + ldc*2 + 20); - - c18 += v_load(c + ldc*3); - c19 += v_load(c + ldc*3 + 4); - c20 += v_load(c + ldc*3 + 8); - c21 += v_load(c + ldc*3 + 12); - c22 += v_load(c + ldc*3 + 16); - c23 += v_load(c + ldc*3 + 20); + c0 = v_add(c0, v_load(c)); + c1 = v_add(c1, v_load(c + 4)); + c2 = v_add(c2, v_load(c + 8)); + c3 = v_add(c3, v_load(c + 12)); + c4 = v_add(c4, v_load(c + 16)); + c5 = v_add(c5, v_load(c + 20)); + + c6 = v_add(c6 , v_load(c + ldc)); + c7 = v_add(c7 , v_load(c + ldc + 4)); + c8 = v_add(c8 , v_load(c + ldc + 8)); + c9 = v_add(c9 , v_load(c + ldc + 12)); + c10 = v_add(c10, v_load(c + ldc + 16)); + c11 = v_add(c11, v_load(c + ldc + 20)); + + c12 = v_add(c12, v_load(c + ldc*2)); + c13 = v_add(c13, v_load(c + ldc*2 + 4)); + c14 = v_add(c14, v_load(c + ldc*2 + 8)); + c15 = v_add(c15, v_load(c + ldc*2 + 12)); + c16 = v_add(c16, v_load(c + ldc*2 + 16)); + c17 = v_add(c17, v_load(c + ldc*2 + 20)); + + c18 = v_add(c18, v_load(c + ldc*3)); + c19 = v_add(c19, v_load(c + ldc*3 + 4)); + c20 = v_add(c20, v_load(c + ldc*3 + 8)); + c21 = v_add(c21, v_load(c + ldc*3 + 12)); + c22 = v_add(c22, v_load(c + ldc*3 + 16)); + c23 = v_add(c23, v_load(c + ldc*3 + 20)); } v_store(c, c0); @@ -1431,17 +1430,17 @@ static void convBlock4x8(int np, const float* a, const float* b, float* c, int l if (!init_c) { - c0 += v_load(c); - c1 += v_load(c + 4); + c0 = v_add(c0, v_load(c)); + c1 = v_add(c1, v_load(c + 4)); - c2 += v_load(c + ldc); - c3 += v_load(c + ldc + 4); + c2 = v_add(c2, v_load(c + ldc)); + c3 = v_add(c3, v_load(c + ldc + 4)); - c4 += v_load(c + ldc*2); - c5 += v_load(c + ldc*2 + 4); + c4 = v_add(c4, v_load(c + ldc*2)); + c5 = v_add(c5, v_load(c + ldc*2 + 4)); - c6 += v_load(c + ldc*3); - c7 += v_load(c + ldc*3 + 4); + c6 = v_add(c6, v_load(c + ldc*3)); + c7 = v_add(c7, v_load(c + ldc*3 + 4)); } v_store(c, c0); @@ -1476,10 +1475,10 @@ static void convBlock4x4(int np, const float* a, const float* b, float* c, int l if (!init_c) { - c0 += v_load(c); - c1 += v_load(c + ldc); - c2 += v_load(c + ldc*2); - c3 += v_load(c + ldc*3); + c0 = v_add(c0, v_load(c)); + c1 = v_add(c1, v_load(c + ldc)); + c2 = v_add(c2, v_load(c + ldc*2)); + c3 = v_add(c3, v_load(c + ldc*3)); } v_store(c, c0); From 9cfced4650bdb69602cb7868e6e7e319ebe94026 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Mon, 27 Mar 2023 18:29:38 +0300 Subject: [PATCH 076/199] RISC-V: fix hardcoded options in RVV 0.7.1 toolchain file --- platforms/linux/riscv64-071-gcc.toolchain.cmake | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/platforms/linux/riscv64-071-gcc.toolchain.cmake b/platforms/linux/riscv64-071-gcc.toolchain.cmake index be2c7dcda9..53e4a7fced 100644 --- a/platforms/linux/riscv64-071-gcc.toolchain.cmake +++ b/platforms/linux/riscv64-071-gcc.toolchain.cmake @@ -1,9 +1,8 @@ -set(CMAKE_SYSTEM_NAME "Linux") -set(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc) -set(CMAKE_CXX_COMPILER riscv64-unknown-linux-gnu-g++) +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR riscv64) -set(CMAKE_CXX_FLAGS "" CACHE STRING "") -set(CMAKE_C_FLAGS "" CACHE STRING "") +set(CMAKE_CXX_COMPILER riscv64-unknown-linux-gnu-g++) +set(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc) -set(CMAKE_CXX_FLAGS "-static -march=rv64gcvxthead -mabi=lp64v -pthread -D__riscv_vector_071") -set(CMAKE_C_FLAGS "-static -march=rv64gcvxthead -mabi=lp64v -pthread -D__riscv_vector_071") +set(CMAKE_CXX_FLAGS_INIT "-march=rv64gcv -mabi=lp64d -D__riscv_vector_071") +set(CMAKE_C_FLAGS_INIT "-march=rv64gcv -mabi=lp64d -D__riscv_vector_071") From 5e1d33329bcda70fe8ef6e6cda5fa460d3587e95 Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Mon, 27 Mar 2023 19:40:59 +0300 Subject: [PATCH 077/199] Several fixes for ONNX importer: Expand, Gather --- modules/dnn/src/onnx/onnx_importer.cpp | 15 +++++++++++---- modules/dnn/test/test_onnx_importer.cpp | 5 +++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 027326c69e..651d1b1571 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -2435,12 +2435,18 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node } else { - inpShape = shape(getBlob(input0)); + Mat blob = getBlob(input0); + if (constBlobsExtraInfo.find(node_proto.input(0)) != constBlobsExtraInfo.end() && + getBlobExtraInfo(node_proto, 0).real_ndims == 1) { + inpShape = {(int)blob.total()}; + } else { + inpShape = shape(blob); + } } String srcName = input0; // Unsqueeze and repeat along new axis - if (targetShape.size() == inpShape.size() + 1) + if (targetShape.size() > inpShape.size()) { inpShape.insert(inpShape.begin(), targetShape.size() - inpShape.size(), 1); for (int i = 0; i < targetShape.size(); i++) @@ -2486,7 +2492,7 @@ void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::Node { if (broadcast_axes.empty()) { - addConstant(output_name, getBlob(node_proto, 0)); + addConstant(output_name, getBlob(node_proto, 0).reshape(1, targetShape)); return; } @@ -2719,7 +2725,8 @@ void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::Node runLayer(layerParams, inputs, output); output.back().convertTo(output.back(), type); - output.back().dims = std::max(input_real_ndims - real_ndims, 1); + if (real_ndims < 2) // In case of scalars or 1D vectors, OpenCV initializes 2D cv::Mat + output.back().dims = std::max(input_real_ndims - real_ndims, 1); addConstant(node_proto.output(0), output.back()); return; } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index b5a97770b1..e566acd827 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -2487,6 +2487,11 @@ TEST_P(Test_ONNX_layers, Gelu) testONNXModels("gelu_approximation"); } +TEST_P(Test_ONNX_layers, OpenAI_CLIP_head) +{ + testONNXModels("clip-vit-base-head"); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets()); }} // namespace From c643af0b85862fc38350ade2d1e2fd9b05fd8254 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 29 Mar 2023 15:29:56 +0300 Subject: [PATCH 078/199] fix test --- .../objdetect/test/test_charucodetection.cpp | 30 +++---------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/modules/objdetect/test/test_charucodetection.cpp b/modules/objdetect/test/test_charucodetection.cpp index e99f9de262..63b28d8e5f 100644 --- a/modules/objdetect/test/test_charucodetection.cpp +++ b/modules/objdetect/test/test_charucodetection.cpp @@ -213,7 +213,7 @@ void CV_CharucoPoseEstimation::run(int) { Mat distCoeffs(5, 1, CV_64FC1, Scalar::all(0)); // for different perspectives - for(double distance : {0.2, 0.3}) { + for(double distance : {0.2, 0.25}) { for(int yaw = -55; yaw <= 50; yaw += 25) { for(int pitch = -55; pitch <= 50; pitch += 25) { @@ -244,18 +244,6 @@ void CV_CharucoPoseEstimation::run(int) { detector.setCharucoParameters(charucoParameters); detector.detectBoard(img, charucoCorners, charucoIds, corners, ids); } - - // // create debug images - // Mat rgb_image; - // cv::cvtColor(img, rgb_image, COLOR_GRAY2RGB); - // aruco::drawDetectedCornersCharuco(rgb_image, charucoCorners, charucoIds); - // aruco::drawDetectedMarkers(rgb_image, corners, ids); - // cv::imwrite("Debug_CV_CharucoPoseEstimation" - // + (legacyPattern ? std::string("_legacy") : std::string("")) - // + "_dist" + std::to_string(distance) - // + "_yaw" + std::to_string(yaw) - // + "_pitch" + std::to_string(pitch) + ".png", rgb_image); - ASSERT_EQ(ids.size(), board.getIds().size()); if(charucoIds.size() == 0) continue; @@ -323,7 +311,7 @@ void CV_CharucoDiamondDetection::run(int) { int iter = 0; Mat cameraMatrix = Mat::eye(3, 3, CV_64FC1); - Size imgSize(750, 750); + Size imgSize(500, 500); aruco::DetectorParameters params; params.minDistanceToBorder = 0; float squareLength = 0.03f; @@ -333,7 +321,7 @@ void CV_CharucoDiamondDetection::run(int) { aruco::CharucoDetector detector(board); - cameraMatrix.at(0, 0) = cameraMatrix.at< double >(1, 1) = 1000; + cameraMatrix.at(0, 0) = cameraMatrix.at< double >(1, 1) = 650; cameraMatrix.at(0, 2) = imgSize.width / 2; cameraMatrix.at(1, 2) = imgSize.height / 2; @@ -344,7 +332,7 @@ void CV_CharucoDiamondDetection::run(int) { detector.setCharucoParameters(charucoParameters); // for different perspectives - for(double distance : {0.2, 0.3}) { + for(double distance : {0.2, 0.22}) { for(int yaw = -50; yaw <= 50; yaw += 25) { for(int pitch = -50; pitch <= 50; pitch += 25) { @@ -376,16 +364,6 @@ void CV_CharucoDiamondDetection::run(int) { detector.detectDiamonds(img, diamondCorners, diamondIds, corners, ids); - // // create debug images - // Mat rgb_image; - // cv::cvtColor(img, rgb_image, COLOR_GRAY2RGB); - // aruco::drawDetectedDiamonds(rgb_image, diamondCorners, diamondIds); - // aruco::drawDetectedMarkers(rgb_image, corners, ids); - // cv::imwrite(std::string("Debug_CV_CharucoDiamondDetection") - // + "_dist" + std::to_string(distance) - // + "_yaw" + std::to_string(yaw) - // + "_pitch" + std::to_string(pitch) + ".png", rgb_image); - // check detect if(ids.size() != 4) { ts->printf(cvtest::TS::LOG, "Not enough markers for diamond detection"); From 3f7d319fff24c240153905270b3251187be35a38 Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Sat, 1 Apr 2023 17:47:40 +0300 Subject: [PATCH 079/199] Create FUNDING.yml --- .github/FUNDING.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000000..ef81a29e71 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +# These are supported funding model platforms + +github: opencv/opencv From 6d93a0e42ce264e9fe770678680131037ad8b5e0 Mon Sep 17 00:00:00 2001 From: Anna Petrovicheva Date: Sat, 1 Apr 2023 18:13:40 +0300 Subject: [PATCH 080/199] Update FUNDING.yml --- .github/FUNDING.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index ef81a29e71..b34c2e4626 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,3 +1,3 @@ # These are supported funding model platforms -github: opencv/opencv +github: opencv From 26ca124150b7f87392d380fc9333d409ebab6e38 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Thu, 2 Mar 2023 18:26:08 +0500 Subject: [PATCH 081/199] fix "ImportError: DLL load failed while importing cv2" while installing using pre-built binaries --- .../py_setup/py_setup_in_windows/py_setup_in_windows.markdown | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/py_tutorials/py_setup/py_setup_in_windows/py_setup_in_windows.markdown b/doc/py_tutorials/py_setup/py_setup_in_windows/py_setup_in_windows.markdown index 0ba1643ee1..1cefb01d5c 100644 --- a/doc/py_tutorials/py_setup/py_setup_in_windows/py_setup_in_windows.markdown +++ b/doc/py_tutorials/py_setup/py_setup_in_windows/py_setup_in_windows.markdown @@ -33,6 +33,8 @@ Installing OpenCV from prebuilt binaries -# Copy **cv2.pyd** to **C:/Python27/lib/site-packages**. +-# Copy the **opencv_world.dll** file to **C:/Python27/lib/site-packages** + -# Open Python IDLE and type following codes in Python terminal. @code >>> import cv2 as cv From 38719840284e753c46550692ab4588f910f7a0ac Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Sun, 2 Apr 2023 17:52:53 +0300 Subject: [PATCH 082/199] G-API: Fix compilation error in Standalone mode - Point3f was added to type traits but was missing in the "own" package; fixed. --- modules/gapi/include/opencv2/gapi/opencv_includes.hpp | 1 + modules/gapi/include/opencv2/gapi/own/types.hpp | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/modules/gapi/include/opencv2/gapi/opencv_includes.hpp b/modules/gapi/include/opencv2/gapi/opencv_includes.hpp index 25a67d6da6..7c2c42d8a2 100644 --- a/modules/gapi/include/opencv2/gapi/opencv_includes.hpp +++ b/modules/gapi/include/opencv2/gapi/opencv_includes.hpp @@ -31,6 +31,7 @@ namespace cv { using Size = gapi::own::Size; using Point = gapi::own::Point; using Point2f = gapi::own::Point2f; + using Point3f = gapi::own::Point3f; using Scalar = gapi::own::Scalar; using Mat = gapi::own::Mat; } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/own/types.hpp b/modules/gapi/include/opencv2/gapi/own/types.hpp index 3ec9787839..211b5c85ff 100644 --- a/modules/gapi/include/opencv2/gapi/own/types.hpp +++ b/modules/gapi/include/opencv2/gapi/own/types.hpp @@ -43,6 +43,17 @@ public: float y = 0.f; }; +class Point3f +{ +public: + Point3f() = default; + Point3f(float _x, float _y, float _z) : x(_x), y(_y), z(_z) {} + + float x = 0.f; + float y = 0.f; + float z = 0.f; +}; + class Rect { public: From b34fc422a2f7924b60e77daf224f9bbcfd999ce8 Mon Sep 17 00:00:00 2001 From: Zwyx <29386932+Zwyx@users.noreply.github.com> Date: Sun, 2 Apr 2023 23:00:21 +0800 Subject: [PATCH 083/199] Switch `rows` and `cols` in call to Mat.zeros The correct order is rows then cols: https://docs.opencv.org/4.x/d3/d63/classcv_1_1Mat.html#a56daa006391a670e9cb0cd08e3168c99 --- doc/js_tutorials/js_assets/js_contours_begin_contours.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/js_tutorials/js_assets/js_contours_begin_contours.html b/doc/js_tutorials/js_assets/js_contours_begin_contours.html index 8efd3298a5..09e6ec44e1 100644 --- a/doc/js_tutorials/js_assets/js_contours_begin_contours.html +++ b/doc/js_tutorials/js_assets/js_contours_begin_contours.html @@ -41,7 +41,7 @@