From 8b2783e9ffe3b6a5f9a84b34cc72da0c71d41a3a Mon Sep 17 00:00:00 2001 From: Christine Poerschke <6458642+cpoerschke@users.noreply.github.com> Date: Sat, 25 May 2024 08:53:33 +0100 Subject: [PATCH 01/17] replace lena.jpg in find-existing-file tests --- modules/core/test/test_utils.cpp | 2 +- modules/python/test/test_misc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/test/test_utils.cpp b/modules/core/test/test_utils.cpp index a43ea78381..13720c2b00 100644 --- a/modules/core/test/test_utils.cpp +++ b/modules/core/test/test_utils.cpp @@ -345,7 +345,7 @@ TEST(Samples, findFile) { cv::utils::logging::LogLevel prev = cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_VERBOSE); cv::String path; - ASSERT_NO_THROW(path = samples::findFile("lena.jpg", false)); + ASSERT_NO_THROW(path = samples::findFile("HappyFish.jpg", false)); EXPECT_NE(std::string(), path.c_str()); cv::utils::logging::setLogLevel(prev); } diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 08ab04d53d..ec86663e47 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -973,7 +973,7 @@ class CanUsePurePythonModuleFunction(NewOpenCVTests): class SamplesFindFile(NewOpenCVTests): def test_ExistedFile(self): - res = cv.samples.findFile('lena.jpg', False) + res = cv.samples.findFile('HappyFish.jpg', False) self.assertNotEqual(res, '') def test_MissingFile(self): From db3654ef51b156feab4f59c13f2ee41ca2ab9a85 Mon Sep 17 00:00:00 2001 From: Kumataro Date: Sun, 21 Jul 2024 10:00:29 +0900 Subject: [PATCH 02/17] python: prefer cv::Mat over cv::UMat in python binding --- modules/python/src2/gen2.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 3249c57f82..af187e5d3f 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -854,7 +854,22 @@ class FuncInfo(object): all_code_variants = [] + # See https://github.com/opencv/opencv/issues/25928 + # Conversion to UMat is expensive more than conversion to Mat. + # To reduce this cost, conversion to Mat is prefer than to UMat. + variants = [] + variants_umat = [] for v in self.variants: + hasUMat = False + for a in v.args: + hasUMat = hasUMat or "UMat" in a.tp + if hasUMat : + variants_umat.append(v) + else: + variants.append(v) + variants.extend(variants_umat) + + for v in variants: code_decl = "" code_ret = "" code_cvt_list = [] From 0b3dbdd4b3395674fe9d05162ab916b83e683559 Mon Sep 17 00:00:00 2001 From: Alexander Lyulkov Date: Thu, 25 Jul 2024 16:47:41 +0300 Subject: [PATCH 03/17] Added Java ORB test --- .../java/test/ORBFeatureDetectorTest.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/modules/features2d/misc/java/test/ORBFeatureDetectorTest.java b/modules/features2d/misc/java/test/ORBFeatureDetectorTest.java index 7399253af6..2cd9977fb2 100644 --- a/modules/features2d/misc/java/test/ORBFeatureDetectorTest.java +++ b/modules/features2d/misc/java/test/ORBFeatureDetectorTest.java @@ -1,5 +1,13 @@ package org.opencv.test.features2d; +import org.junit.Assert; +import org.opencv.core.CvType; +import org.opencv.core.KeyPoint; +import org.opencv.core.Mat; +import org.opencv.core.MatOfKeyPoint; +import org.opencv.core.Scalar; +import org.opencv.features2d.Features2d; +import org.opencv.features2d.ORB; import org.opencv.test.OpenCVTestCase; public class ORBFeatureDetectorTest extends OpenCVTestCase { @@ -36,4 +44,35 @@ public class ORBFeatureDetectorTest extends OpenCVTestCase { fail("Not yet implemented"); } + public void testDetectTwoPoints() { + Mat img = new Mat(256,256, CvType.CV_8UC3, new Scalar(0,0,0)); + img.put(35, 40, 255,255, 255); + img.put(152, 98, 200,0, 0); + + MatOfKeyPoint keypoints = new MatOfKeyPoint(); + ORB orb = ORB.create(); + Mat descriptors = new Mat(); + orb.detectAndCompute(img, new Mat(), keypoints, descriptors); + + KeyPoint[] keypointsArray = keypoints.toArray(); + assertEquals(2, keypointsArray.length); + + long x1 = Math.round(keypointsArray[0].pt.x); + long y1 = Math.round(keypointsArray[0].pt.y); + long x2 = Math.round(keypointsArray[1].pt.x); + long y2 = Math.round(keypointsArray[1].pt.y); + + if (x2 > x1) { + assertEquals(40, x1); + assertEquals(35, y1); + assertEquals(98, x2); + assertEquals(152, y2); + } else { + assertEquals(40, x2); + assertEquals(35, y2); + assertEquals(98, x1); + assertEquals(152, y1); + } + } + } From be3c519956296bb9254040a231bd63454d37958c Mon Sep 17 00:00:00 2001 From: Kumataro Date: Fri, 26 Jul 2024 05:55:00 +0900 Subject: [PATCH 04/17] core: FileStorage: detect invalid attribute value --- modules/core/src/persistence_xml.cpp | 2 ++ modules/core/test/test_io.cpp | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/modules/core/src/persistence_xml.cpp b/modules/core/src/persistence_xml.cpp index 6141fade2d..ed699758fc 100644 --- a/modules/core/src/persistence_xml.cpp +++ b/modules/core/src/persistence_xml.cpp @@ -737,6 +737,8 @@ public: if( c != '\"' && c != '\'' ) { ptr = skipSpaces( ptr, CV_XML_INSIDE_TAG ); + if(!ptr) + CV_PARSE_ERROR_CPP("Invalid attribute value"); if( *ptr != '\"' && *ptr != '\'' ) CV_PARSE_ERROR_CPP( "Attribute value should be put into single or double quotes" ); } diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 16b66e75ee..d7be6e08c6 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -1985,4 +1985,22 @@ INSTANTIATE_TEST_CASE_P( /*nothing*/, Core_InputOutput_regression_25073, Values("test.json", "test.xml", "test.yml") ); +// see https://github.com/opencv/opencv/issues/25946 +TEST(Core_InputOutput, FileStorage_invalid_attribute_value_regression_25946) +{ + const std::string fileName = cv::tempfile("FileStorage_invalid_attribute_value_exception_test.xml"); + const std::string content = " Date: Fri, 26 Jul 2024 13:24:26 -0700 Subject: [PATCH 05/17] fix: js perf tests modules/js/perf/perf_helpfunc.js and target tests, e.g. perf_gaussianBlur.js contained "const isNodeJs", leading to re-definition when using associated *.html files. --- modules/js/perf/perf_64bits.js | 2 +- modules/js/perf/perf_helpfunc.js | 2 +- modules/js/perf/perf_imgproc/perf_blur.js | 2 +- modules/js/perf/perf_imgproc/perf_cvtcolor.js | 2 +- modules/js/perf/perf_imgproc/perf_dilate.js | 2 +- modules/js/perf/perf_imgproc/perf_erode.js | 2 +- modules/js/perf/perf_imgproc/perf_filter2D.js | 2 +- modules/js/perf/perf_imgproc/perf_gaussianBlur.js | 2 +- modules/js/perf/perf_imgproc/perf_medianBlur.js | 2 +- modules/js/perf/perf_imgproc/perf_pyrDown.js | 2 +- modules/js/perf/perf_imgproc/perf_remap.js | 2 +- modules/js/perf/perf_imgproc/perf_resize.js | 2 +- modules/js/perf/perf_imgproc/perf_scharr.js | 2 +- modules/js/perf/perf_imgproc/perf_sobel.js | 2 +- modules/js/perf/perf_imgproc/perf_threshold.js | 2 +- modules/js/perf/perf_imgproc/perf_warpAffine.js | 2 +- modules/js/perf/perf_imgproc/perf_warpPerspective.js | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/modules/js/perf/perf_64bits.js b/modules/js/perf/perf_64bits.js index dc4e234d4c..de75921e20 100644 --- a/modules/js/perf/perf_64bits.js +++ b/modules/js/perf/perf_64bits.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_helpfunc.js b/modules/js/perf/perf_helpfunc.js index c2ad7f2e0f..45cdd1c681 100644 --- a/modules/js/perf/perf_helpfunc.js +++ b/modules/js/perf/perf_helpfunc.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if(isNodeJs) { var Base = require("./base"); diff --git a/modules/js/perf/perf_imgproc/perf_blur.js b/modules/js/perf/perf_imgproc/perf_blur.js index 66c5f240e7..1f3981c132 100644 --- a/modules/js/perf/perf_imgproc/perf_blur.js +++ b/modules/js/perf/perf_imgproc/perf_blur.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_cvtcolor.js b/modules/js/perf/perf_imgproc/perf_cvtcolor.js index fbae5d1bca..a72236e5bc 100644 --- a/modules/js/perf/perf_imgproc/perf_cvtcolor.js +++ b/modules/js/perf/perf_imgproc/perf_cvtcolor.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_dilate.js b/modules/js/perf/perf_imgproc/perf_dilate.js index 5b6cd01682..5647cc3acd 100644 --- a/modules/js/perf/perf_imgproc/perf_dilate.js +++ b/modules/js/perf/perf_imgproc/perf_dilate.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_erode.js b/modules/js/perf/perf_imgproc/perf_erode.js index 8915ead40f..3edffbf534 100644 --- a/modules/js/perf/perf_imgproc/perf_erode.js +++ b/modules/js/perf/perf_imgproc/perf_erode.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_filter2D.js b/modules/js/perf/perf_imgproc/perf_filter2D.js index 4602befcbd..1a2169cae3 100644 --- a/modules/js/perf/perf_imgproc/perf_filter2D.js +++ b/modules/js/perf/perf_imgproc/perf_filter2D.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_gaussianBlur.js b/modules/js/perf/perf_imgproc/perf_gaussianBlur.js index b59aa83b84..5643fe4e6f 100644 --- a/modules/js/perf/perf_imgproc/perf_gaussianBlur.js +++ b/modules/js/perf/perf_imgproc/perf_gaussianBlur.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_medianBlur.js b/modules/js/perf/perf_imgproc/perf_medianBlur.js index 333bc8424c..29ff99663a 100644 --- a/modules/js/perf/perf_imgproc/perf_medianBlur.js +++ b/modules/js/perf/perf_imgproc/perf_medianBlur.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_pyrDown.js b/modules/js/perf/perf_imgproc/perf_pyrDown.js index 957ac7684d..df200e4f19 100644 --- a/modules/js/perf/perf_imgproc/perf_pyrDown.js +++ b/modules/js/perf/perf_imgproc/perf_pyrDown.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_remap.js b/modules/js/perf/perf_imgproc/perf_remap.js index 1aa69ecef7..38afef575a 100644 --- a/modules/js/perf/perf_imgproc/perf_remap.js +++ b/modules/js/perf/perf_imgproc/perf_remap.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_resize.js b/modules/js/perf/perf_imgproc/perf_resize.js index 5262d22489..ad2b949ed2 100644 --- a/modules/js/perf/perf_imgproc/perf_resize.js +++ b/modules/js/perf/perf_imgproc/perf_resize.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_scharr.js b/modules/js/perf/perf_imgproc/perf_scharr.js index 4726e76312..f4df15db55 100644 --- a/modules/js/perf/perf_imgproc/perf_scharr.js +++ b/modules/js/perf/perf_imgproc/perf_scharr.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_sobel.js b/modules/js/perf/perf_imgproc/perf_sobel.js index ddc09bb8f6..a082cd7b30 100644 --- a/modules/js/perf/perf_imgproc/perf_sobel.js +++ b/modules/js/perf/perf_imgproc/perf_sobel.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_threshold.js b/modules/js/perf/perf_imgproc/perf_threshold.js index 629628748d..71f55257a3 100644 --- a/modules/js/perf/perf_imgproc/perf_threshold.js +++ b/modules/js/perf/perf_imgproc/perf_threshold.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_warpAffine.js b/modules/js/perf/perf_imgproc/perf_warpAffine.js index dc3cf67af4..3917719869 100644 --- a/modules/js/perf/perf_imgproc/perf_warpAffine.js +++ b/modules/js/perf/perf_imgproc/perf_warpAffine.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); diff --git a/modules/js/perf/perf_imgproc/perf_warpPerspective.js b/modules/js/perf/perf_imgproc/perf_warpPerspective.js index 252729e3f0..1b2e5b777a 100644 --- a/modules/js/perf/perf_imgproc/perf_warpPerspective.js +++ b/modules/js/perf/perf_imgproc/perf_warpPerspective.js @@ -1,4 +1,4 @@ -const isNodeJs = (typeof window) === 'undefined'? true : false; +var isNodeJs = (typeof window) === 'undefined'? true : false; if (isNodeJs) { var Benchmark = require('benchmark'); From 938b9e4bb7ff26751c558ba3548e0ee53863ff35 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 18 Jul 2024 05:37:14 +0000 Subject: [PATCH 06/17] cmake: try baseline optimization feature check without extra flags first --- cmake/OpenCVCompilerOptimizations.cmake | 4 ++-- cmake/checks/cpu_sse2.cpp | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index ff0e40c666..418964ab0a 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -171,7 +171,7 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") endif() if(X86 OR X86_64) - ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL") + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL") ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD") ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA") @@ -445,7 +445,7 @@ macro(ocv_check_compiler_optimization OPT) set(_varname "") if(CPU_${OPT}_TEST_FILE) set(__available 0) - if(CPU_BASELINE_DETECT) + if(__is_from_baseline OR CPU_BASELINE_DETECT) set(_varname "HAVE_CPU_${OPT}_SUPPORT") ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") if(${_varname}) diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp index 68a69f88cb..2827a1a460 100644 --- a/cmake/checks/cpu_sse2.cpp +++ b/cmake/checks/cpu_sse2.cpp @@ -1,2 +1,16 @@ #include -int main() { return 0; } + +inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b) +{ + const __m128i delta = _mm_set1_epi32((int)0x80000000); + return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta)); +} + +int main() +{ + __m128i a, b, c; + a = _mm_set1_epi32(0x00000000); + b = _mm_set1_epi32(0x0000ffff); + c = _v128_comgt_epu32(a, b); + return 0; +} From 2a333a6c86b7fd88bd21fd92e8d828aa6a0b595c Mon Sep 17 00:00:00 2001 From: Daniele Affinita Date: Tue, 30 Jul 2024 13:16:08 +0200 Subject: [PATCH 07/17] Merge pull request #25644 from DaniAffCH:blockwise-quantization [GSoC] dnn: Blockwise quantization support #25644 This PR introduces blockwise quantization in DNN allowing the parsing of ONNX models quantized in blockwise style. In particular it modifies the `Quantize` and `Dequantize` operations. The related PR opencv/opencv_extra#1181 contains the test data. Additional notes: - The original quantization issue has been fixed. Previously, for 1D scale and zero-point, the operation applied was $y = int8(x/s - z)$ instead of $y = int8(x/s + z)$. Note that the operation was already correctly implemented when the scale and zero-point were scalars. The previous implementation failed the ONNX test cases, but now all have passed successfully. [Reference](https://github.com/onnx/onnx/blob/main/docs/Operators.md#QuantizeLinear) - the function `block_repeat` broadcasts scale and zero-point to the input shape. It repeats all the elements of a given axis n times. This function generalizes the behavior of `repeat` from the core module which is defined just for 2 axis assuming `Mat` has 2 dimensions. If appropriate and useful, you might consider moving `block_repeat` to the core module. - Now, the scale and zero-point can be taken as layer inputs. This increases the ONNX layers' coverage and enables us to run the ONNX test cases (previously disabled) being fully compliant with ONNX standards. Since they are now supported, I have enabled the test cases for: `test_dequantizelinear`, `test_dequantizelinear_axis`, `test_dequantizelinear_blocked`, `test_quantizelinear`, `test_quantizelinear_axis`, `test_quantizelinear_blocked` just in CPU backend. All of them pass successfully. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../dnn/src/int8layers/quantization_utils.cpp | 201 +++++++++++++++--- modules/dnn/src/onnx/onnx_importer.cpp | 16 +- modules/dnn/test/test_onnx_conformance.cpp | 2 + ...conformance_layer_filter__openvino.inl.hpp | 12 +- ...ance_layer_filter__vulkan_denylist.inl.hpp | 6 + ...er_filter_opencv_ocl_fp16_denylist.inl.hpp | 8 +- ...er_filter_opencv_ocl_fp32_denylist.inl.hpp | 6 + ..._conformance_layer_parser_denylist.inl.hpp | 4 - 8 files changed, 212 insertions(+), 43 deletions(-) diff --git a/modules/dnn/src/int8layers/quantization_utils.cpp b/modules/dnn/src/int8layers/quantization_utils.cpp index 146ad68257..4690f68e5f 100644 --- a/modules/dnn/src/int8layers/quantization_utils.cpp +++ b/modules/dnn/src/int8layers/quantization_utils.cpp @@ -15,7 +15,10 @@ namespace dnn static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int axis) { // The data is the 1-D scales or zeropoints. - CV_Assert(axis >= 0 && targetShape.size() > axis && data.total() == targetShape[axis]); + CV_CheckGE(axis, 0, "Quantization axis must be non-negative."); + CV_CheckGT((int)targetShape.size(),axis,"Quantization axis must be within the valid range of target shape dimensions."); + CV_CheckEQ((int)data.total(), (int)targetShape[axis], "Data total size must match the size of the specified target dimension."); + std::vector broadcast_axes; for (int i = 0; i < targetShape.size(); i++) { @@ -35,29 +38,98 @@ static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int ax } } -static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector& scales, - const std::vector& zeropoints, const MatShape& targetShape, int axis) +static void block_repeat(InputArray src, const MatShape& srcShape, int axis, int repetitions, OutputArray dst) { - // broad cast the scales and zeropoint to the input shape. - MatShape subTargetShape(targetShape.size(), 1); - subTargetShape[axis] = scales.size(); + CV_Assert(src.getObj() != dst.getObj()); + CV_Check(axis, axis >= 0 && axis < src.dims(), "Axis out of range"); + CV_CheckGT(repetitions, 1, "More than one repetition expected"); - zeropointsMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1); - scalesMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1); + Mat src_mat = src.getMat(); + Mat dst_mat; - const int len = scales.size(); - // Deep copy the scales and zeropoint data and prevent the original data from being changed. + if (src_mat.depth() != CV_32F) + src_mat.convertTo(src_mat, CV_32F); - float * scalePtr = scalesMat.ptr(0); - for (int i = 0; i < len; i++) - scalePtr[i] = scales[i]; + MatShape sshape = srcShape; + MatShape dshape = srcShape; + + size_t dtype_bytes = src_mat.elemSize(); + int chunk_size = dtype_bytes; + int num_chunks = 1; + + dshape[axis] *= repetitions; + + for (int i = axis+1; i < sshape.size(); ++i) + chunk_size*=sshape[i]; + + for (int i = 0; i <= axis; ++i) + num_chunks*=sshape[i]; + + dst.create(dshape.size(), dshape.data(), src_mat.type()); + dst_mat = dst.getMat(); + + CV_Assert(dst_mat.isContinuous()); + CV_Assert(src_mat.isContinuous()); + + for (int i = 0; i < repetitions; ++i) { + size_t src_offset = 0; + size_t dst_offset = i * chunk_size; + + for (int j = 0; j < num_chunks; ++j) { + memcpy(dst_mat.data + dst_offset, src_mat.data + src_offset, chunk_size); + src_offset += chunk_size; + dst_offset += chunk_size * repetitions; + } + } +} + +template +static void copyVecToMat(Mat& mat, const std::vector& data){ + float * matPtr = mat.ptr(0); + const int len = data.size(); - float * zpPtr = zeropointsMat.ptr(0); for (int i = 0; i < len; i++) - zpPtr[i] = (float )zeropoints[i]; + matPtr[i] = (float) data[i]; +} - broadcast1D2TargetMat(scalesMat, targetShape, axis); - broadcast1D2TargetMat(zeropointsMat, targetShape, axis); +template +static void broadcastBlockedMatrix(Mat& mat, const std::vector& data, const MatShape& targetShape, int axis, int block_size){ + CV_Check(block_size, targetShape[axis] % block_size == 0 && block_size <= targetShape[axis], "Block size must be a divisor of the target dimension size and not exceed it."); + + MatShape subTargetShape(targetShape); + subTargetShape[axis] = static_cast(subTargetShape[axis] / block_size); + + block_repeat(data, subTargetShape, axis, block_size, mat); +} + +template +static void broadcastStandardMatrix(Mat& mat, const std::vector& data, const MatShape& targetShape, int axis) +{ + MatShape subTargetShape(targetShape.size(), 1); + subTargetShape[axis] = data.size(); + mat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1); + + copyVecToMat(mat,data); + + broadcast1D2TargetMat(mat, targetShape, axis); +} + + +static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector& scales, + const std::vector& zeropoints, const MatShape& targetShape, int axis, int block_size) +{ + // broad cast the scales and zeropoint to the input shape. + + if (block_size == 0) + { + broadcastStandardMatrix(zeropointsMat, zeropoints, targetShape, axis); + broadcastStandardMatrix(scalesMat, scales, targetShape, axis); + } + else + { + broadcastBlockedMatrix(zeropointsMat, zeropoints, targetShape, axis, block_size); + broadcastBlockedMatrix(scalesMat, scales, targetShape, axis, block_size); + } } // Quantize FP32/FP16 Inputs to INT8 @@ -65,13 +137,17 @@ class QuantizeLayerImpl CV_FINAL : public QuantizeLayer { public: int axis; + int block_size; bool is1D; - Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data. + Mat scalesMat, zeropointsMat; // Saving the broadcasted scales data. + bool quantParamExternal = true; // Indicates if the quantization parameters (scale and zero point) are provided as inputs to the node. QuantizeLayerImpl(const LayerParams& params) { is1D = params.get("is1D", false); axis = params.get("axis", 1); + block_size = params.get("block_size", 0); + if (!is1D) { scales.push_back(params.get("scales", 1.0f)); @@ -82,7 +158,7 @@ public: DictValue paramScales = params.get("scales"); int i, n = paramScales.size(); - CV_Assert(n > 0); + CV_CheckGT(n, 0, "Scale missing."); scales.resize(n, 0.); for (i = 0; i < n; i++) scales[i] = paramScales.get(i); @@ -108,7 +184,7 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { - CV_Assert(inputs.size() == 1); + CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive."); Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); return false; } @@ -124,7 +200,7 @@ public: if (is1D) { MatShape inputShape = shape(inputs[0]); - broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis); + broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size); } } @@ -146,6 +222,39 @@ public: return true; } #endif + void processInputOutput(std::vector& inputs, std::vector& outputs) + { + CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive."); + quantParamExternal &= inputs.size() > 1; + + // Scale and zeropoint taken as input + if (quantParamExternal) + { + quantParamExternal = false; + scalesMat = inputs[1]; + + scalesMat.reshape(1, 1).copyTo(scales); + + if(scalesMat.total() > 1) is1D = true; + + + if (inputs.size() > 2) + { + zeropointsMat = inputs[2]; + CV_CheckEQ((int)zeropointsMat.total(), (int)scalesMat.total(), "Scale and zero point elements number must match."); + zeropointsMat.reshape(1, 1).copyTo(zeropoints); + } + + if (is1D) + { + MatShape inputShape = shape(inputs[0]); + broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size); + } + } + + if (outputs[0].depth() != CV_8S) + outputs[0].convertTo(outputs[0], CV_8S); + } void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { @@ -159,14 +268,13 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - if (outputs[0].depth() != CV_8S) - outputs[0].convertTo(outputs[0], CV_8S); + processInputOutput(inputs, outputs); if (is1D) { Mat inputTmp; divide(inputs[0], scalesMat, inputTmp); - subtract(inputTmp, zeropointsMat, inputTmp); + add(inputTmp, zeropointsMat, inputTmp); inputTmp.convertTo(outputs[0], CV_8S); } @@ -190,13 +298,16 @@ class DequantizeLayerImpl CV_FINAL : public DequantizeLayer { public: int axis; + int block_size; bool is1D; Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data. + bool quantParamExternal = true; DequantizeLayerImpl(const LayerParams& params) { is1D = params.get("is1D", false); axis = params.get("axis", 1); + block_size = params.get("block_size", 0); if (!is1D) { @@ -208,7 +319,7 @@ public: DictValue paramScales = params.get("scales"); int i, n = paramScales.size(); - CV_Assert(n > 0); + CV_CheckGT(n, 0, "Scale missing."); scales.resize(n); for (i = 0; i < n; i++) scales[i] = paramScales.get(i); @@ -234,7 +345,7 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { - CV_Assert(inputs.size() == 1); + CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive."); Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); return false; } @@ -250,7 +361,7 @@ public: if (is1D) { MatShape inputShape = shape(inputs[0]); - broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis); + broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size); } } @@ -269,6 +380,39 @@ public: } #endif + void processInputOutput(std::vector& inputs, std::vector& outputs) + { + CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive."); + + quantParamExternal &= inputs.size() > 1; + // Scale and zeropoint taken as input + if (quantParamExternal) + { + quantParamExternal = false; + scalesMat = inputs[1]; + + scalesMat.reshape(1, 1).copyTo(scales); + + if(scalesMat.total() > 1) is1D = true; + + if (inputs.size() > 2) + { + zeropointsMat = inputs[2]; + CV_CheckEQ((int)zeropointsMat.total(), (int)scalesMat.total(), "Scale and zero point elements number must match."); + zeropointsMat.reshape(1, 1).copyTo(zeropoints); + } + + if (is1D) + { + MatShape inputShape = shape(inputs[0]); + broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size); + } + } + + if (outputs[0].depth() != CV_32F) + outputs[0].convertTo(outputs[0], CV_32F); + } + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE { CV_TRACE_FUNCTION(); @@ -281,8 +425,7 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - if (outputs[0].depth() != CV_32F) - outputs[0].convertTo(outputs[0], CV_32F); + processInputOutput(inputs, outputs); if (is1D) { diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 3745d7ed86..e91e2605c5 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -3239,6 +3239,17 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx // or 1-D tensor (per-channel quantized). bool is1D = false; + if (layerParams.type == "Quantize") + layerParams.set("depth", CV_8S); + else // Dequantize + layerParams.set("depth", CV_32F); + + // If scale is not defined as a constant blob, it is considered an external input. + if(constBlobs.find(node_proto.input(1)) == constBlobs.end()){ + addLayer(layerParams, node_proto); + return; + } + Mat scaleMat = getBlob(node_proto, 1); if(scaleMat.total() > 1) is1D = true; @@ -3280,11 +3291,6 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx layerParams.set("zeropoints", zeropoint); } - if (layerParams.type == "Quantize") - layerParams.set("depth", CV_8S); - else // Dequantize - layerParams.set("depth", CV_32F); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input. { std::vector inputs, outputs; diff --git a/modules/dnn/test/test_onnx_conformance.cpp b/modules/dnn/test/test_onnx_conformance.cpp index 57969ced87..0199d29548 100644 --- a/modules/dnn/test/test_onnx_conformance.cpp +++ b/modules/dnn/test/test_onnx_conformance.cpp @@ -224,6 +224,7 @@ static const TestCase testConformanceConfig[] = { {"test_depthtospace_example", 1, 1}, {"test_dequantizelinear", 3, 1}, {"test_dequantizelinear_axis", 3, 1}, + {"test_dequantizelinear_blocked", 3, 1}, {"test_det_2d", 1, 1}, {"test_det_nd", 1, 1}, {"test_div", 2, 1}, @@ -569,6 +570,7 @@ static const TestCase testConformanceConfig[] = { {"test_qlinearmatmul_3D", 8, 1}, {"test_quantizelinear", 3, 1}, {"test_quantizelinear_axis", 3, 1}, + {"test_quantizelinear_blocked", 3, 1}, {"test_range_float_type_positive_delta", 3, 1}, {"test_range_float_type_positive_delta_expanded", 3, 1}, {"test_range_int32_type_negative_delta", 3, 1}, diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp index 9b2a2f4f2d..9069a69ff4 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp @@ -565,9 +565,11 @@ CASE(test_depthtospace_dcr_mode) CASE(test_depthtospace_example) // no filter CASE(test_dequantizelinear) - // no filter + SKIP; CASE(test_dequantizelinear_axis) - // no filter + SKIP; +CASE(test_dequantizelinear_blocked) + SKIP; CASE(test_det_2d) // no filter CASE(test_det_nd) @@ -1348,9 +1350,11 @@ CASE(test_qlinearmatmul_2D) CASE(test_qlinearmatmul_3D) // no filter CASE(test_quantizelinear) - // no filter + SKIP; CASE(test_quantizelinear_axis) - // no filter + SKIP; +CASE(test_quantizelinear_blocked) + SKIP; CASE(test_range_float_type_positive_delta) // no filter CASE(test_range_float_type_positive_delta_expanded) diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp index 968dd1e025..f6aee0dd36 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp @@ -48,6 +48,9 @@ "test_cumsum_2d_axis_1", "test_cumsum_2d_negative_axis", "test_concat_1d_axis_negative_1", +"test_dequantizelinear", +"test_dequantizelinear_axis", +"test_dequantizelinear_blocked", "test_div_uint8", "test_flatten_axis0", "test_flatten_axis2", @@ -71,6 +74,9 @@ "test_pow_types_float32_int32", // vulkan backend does not take tensor other than float32 data type "test_pow_types_float32_int64", // vulkan backend does not take tensor other than float32 data type "test_pow_types_int", // vulkan backend does not take tensor other than float32 data type +"test_quantizelinear", +"test_quantizelinear_axis", +"test_quantizelinear_blocked", "test_softmax_default_axis", "test_sub_bcast", "test_sub_uint8", diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp index 7303348d10..8dc970fe1e 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp @@ -1,4 +1,7 @@ "test_averagepool_3d_default", +"test_dequantizelinear", +"test_dequantizelinear_axis", +"test_dequantizelinear_blocked", "test_dropout_default_ratio", "test_globalmaxpool", "test_globalmaxpool_precomputed", @@ -14,7 +17,10 @@ "test_maxpool_2d_same_upper", "test_maxpool_2d_strides", "test_maxpool_3d_default", -"test_pow", // fp16 accuracy issue +"test_pow", +"test_quantizelinear", +"test_quantizelinear_axis", +"test_quantizelinear_blocked", "test_softmax_large_number", "test_softmax_large_number_expanded", "test_split_equal_parts_1d", diff --git a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp index 7fe58a07fd..2453e2ad9f 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp @@ -1,5 +1,11 @@ "test_averagepool_3d_default", +"test_dequantizelinear", +"test_dequantizelinear_axis", +"test_dequantizelinear_blocked", "test_maxpool_3d_default", +"test_quantizelinear", +"test_quantizelinear_axis", +"test_quantizelinear_blocked", "test_scatter_elements_with_axis", "test_scatter_elements_with_duplicate_indices", "test_scatter_elements_with_negative_indices", diff --git a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp index 78c26eeea2..7b408619d2 100644 --- a/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp +++ b/modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp @@ -89,8 +89,6 @@ "test_convtranspose_pad", "test_convtranspose_pads", "test_convtranspose_with_kernel", -"test_dequantizelinear", -"test_dequantizelinear_axis", "test_det_2d", "test_det_nd", "test_dropout_default_mask", @@ -290,8 +288,6 @@ "test_qlinearconv", "test_qlinearmatmul_2D", "test_qlinearmatmul_3D", -"test_quantizelinear", -"test_quantizelinear_axis", "test_range_float_type_positive_delta", "test_range_float_type_positive_delta_expanded", "test_range_int32_type_negative_delta", From 93745245a362aea9a57f6c5f767f88ea503985fe Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 31 Jul 2024 18:05:33 +0300 Subject: [PATCH 08/17] Improved error handling in image codecs. --- modules/imgcodecs/src/bitstrm.cpp | 54 ++++++++++++++++++-------- modules/imgcodecs/src/bitstrm.hpp | 20 ++++++---- modules/imgcodecs/src/grfmt_bmp.cpp | 38 +++++++++--------- modules/imgcodecs/src/grfmt_pfm.cpp | 34 ++++++++-------- modules/imgcodecs/src/grfmt_pxm.cpp | 8 ++-- modules/imgcodecs/src/grfmt_sunras.cpp | 18 ++++----- modules/imgcodecs/src/grfmt_tiff.cpp | 10 ----- modules/imgcodecs/src/grfmt_tiff.hpp | 4 -- 8 files changed, 99 insertions(+), 87 deletions(-) diff --git a/modules/imgcodecs/src/bitstrm.cpp b/modules/imgcodecs/src/bitstrm.cpp index 97df645a6d..a8f91aa4dd 100644 --- a/modules/imgcodecs/src/bitstrm.cpp +++ b/modules/imgcodecs/src/bitstrm.cpp @@ -377,26 +377,30 @@ void WBaseStream::allocate() } -void WBaseStream::writeBlock() +bool WBaseStream::writeBlock() { int size = (int)(m_current - m_start); CV_Assert(isOpened()); if( size == 0 ) - return; + return true; if( m_buf ) { size_t sz = m_buf->size(); m_buf->resize( sz + size ); memcpy( &(*m_buf)[sz], m_start, size ); + m_current = m_start; + m_block_pos += size; + return true; } else { - fwrite( m_start, 1, size, m_file ); + size_t written = fwrite( m_start, 1, size, m_file ); + m_current = m_start; + m_block_pos += size; + return written == (size_t)size; } - m_current = m_start; - m_block_pos += size; } @@ -463,15 +467,17 @@ WLByteStream::~WLByteStream() { } -void WLByteStream::putByte( int val ) +bool WLByteStream::putByte( int val ) { *m_current++ = (uchar)val; if( m_current >= m_end ) - writeBlock(); + return writeBlock(); + + return true; } -void WLByteStream::putBytes( const void* buffer, int count ) +bool WLByteStream::putBytes( const void* buffer, int count ) { uchar* data = (uchar*)buffer; @@ -492,12 +498,18 @@ void WLByteStream::putBytes( const void* buffer, int count ) count -= l; } if( m_current == m_end ) - writeBlock(); + { + bool written = writeBlock(); + if (!written) + return false; + } } + + return true; } -void WLByteStream::putWord( int val ) +bool WLByteStream::putWord( int val ) { uchar *current = m_current; @@ -507,17 +519,19 @@ void WLByteStream::putWord( int val ) current[1] = (uchar)(val >> 8); m_current = current + 2; if( m_current == m_end ) - writeBlock(); + return writeBlock(); } else { putByte(val); putByte(val >> 8); } + + return true; } -void WLByteStream::putDWord( int val ) +bool WLByteStream::putDWord( int val ) { uchar *current = m_current; @@ -529,7 +543,7 @@ void WLByteStream::putDWord( int val ) current[3] = (uchar)(val >> 24); m_current = current + 4; if( m_current == m_end ) - writeBlock(); + return writeBlock(); } else { @@ -538,6 +552,8 @@ void WLByteStream::putDWord( int val ) putByte(val >> 16); putByte(val >> 24); } + + return true; } @@ -548,7 +564,7 @@ WMByteStream::~WMByteStream() } -void WMByteStream::putWord( int val ) +bool WMByteStream::putWord( int val ) { uchar *current = m_current; @@ -558,17 +574,19 @@ void WMByteStream::putWord( int val ) current[1] = (uchar)val; m_current = current + 2; if( m_current == m_end ) - writeBlock(); + return writeBlock(); } else { putByte(val >> 8); putByte(val); } + + return true; } -void WMByteStream::putDWord( int val ) +bool WMByteStream::putDWord( int val ) { uchar *current = m_current; @@ -580,7 +598,7 @@ void WMByteStream::putDWord( int val ) current[3] = (uchar)val; m_current = current + 4; if( m_current == m_end ) - writeBlock(); + return writeBlock(); } else { @@ -589,6 +607,8 @@ void WMByteStream::putDWord( int val ) putByte(val >> 8); putByte(val); } + + return true; } } diff --git a/modules/imgcodecs/src/bitstrm.hpp b/modules/imgcodecs/src/bitstrm.hpp index 26947971f3..ebffb91f10 100644 --- a/modules/imgcodecs/src/bitstrm.hpp +++ b/modules/imgcodecs/src/bitstrm.hpp @@ -63,6 +63,12 @@ DECLARE_RBS_EXCEPTION(THROW_FORB) DECLARE_RBS_EXCEPTION(BAD_HEADER) #define RBS_BAD_HEADER RBS_BAD_HEADER_Exception(cv::Error::StsError, "Invalid header", CV_Func, __FILE__, __LINE__) +#define CHECK_WRITE(action) \ +if (!action) \ +{ \ + return false; \ +} + typedef unsigned long ulong; // class RBaseStream - base class for other reading streams. @@ -147,7 +153,7 @@ protected: bool m_is_opened; std::vector* m_buf; - virtual void writeBlock(); + virtual bool writeBlock(); virtual void release(); virtual void allocate(); }; @@ -160,10 +166,10 @@ class WLByteStream : public WBaseStream public: virtual ~WLByteStream(); - void putByte( int val ); - void putBytes( const void* buffer, int count ); - void putWord( int val ); - void putDWord( int val ); + bool putByte( int val ); + bool putBytes( const void* buffer, int count ); + bool putWord( int val ); + bool putDWord( int val ); }; @@ -173,8 +179,8 @@ class WMByteStream : public WLByteStream { public: virtual ~WMByteStream(); - void putWord( int val ); - void putDWord( int val ); + bool putWord( int val ); + bool putDWord( int val ); }; inline unsigned BSWAP(unsigned v) diff --git a/modules/imgcodecs/src/grfmt_bmp.cpp b/modules/imgcodecs/src/grfmt_bmp.cpp index 91ef23cc3f..e69a93c78b 100644 --- a/modules/imgcodecs/src/grfmt_bmp.cpp +++ b/modules/imgcodecs/src/grfmt_bmp.cpp @@ -635,38 +635,40 @@ bool BmpEncoder::write( const Mat& img, const std::vector& ) m_buf->reserve( alignSize(fileSize + 16, 256) ); // write signature 'BM' - strm.putBytes( fmtSignBmp, (int)strlen(fmtSignBmp) ); + CHECK_WRITE(strm.putBytes( fmtSignBmp, (int)strlen(fmtSignBmp) )); // write file header - strm.putDWord( validateToInt(fileSize) ); // file size - strm.putDWord( 0 ); - strm.putDWord( headerSize ); + CHECK_WRITE(strm.putDWord( validateToInt(fileSize) )); // file size + CHECK_WRITE(strm.putDWord( 0 )); + CHECK_WRITE(strm.putDWord( headerSize )); // write bitmap header - strm.putDWord( bitmapHeaderSize ); - strm.putDWord( width ); - strm.putDWord( height ); - strm.putWord( 1 ); - strm.putWord( channels << 3 ); - strm.putDWord( BMP_RGB ); - strm.putDWord( 0 ); - strm.putDWord( 0 ); - strm.putDWord( 0 ); - strm.putDWord( 0 ); - strm.putDWord( 0 ); + CHECK_WRITE(strm.putDWord( bitmapHeaderSize )); + CHECK_WRITE(strm.putDWord( width )); + CHECK_WRITE(strm.putDWord( height )); + CHECK_WRITE(strm.putWord( 1 )); + CHECK_WRITE(strm.putWord( channels << 3 )); + CHECK_WRITE(strm.putDWord( BMP_RGB )); + CHECK_WRITE(strm.putDWord( 0 )); + CHECK_WRITE(strm.putDWord( 0 )); + CHECK_WRITE(strm.putDWord( 0 )); + CHECK_WRITE(strm.putDWord( 0 )); + CHECK_WRITE(strm.putDWord( 0 )); if( channels == 1 ) { FillGrayPalette( palette, 8 ); - strm.putBytes( palette, sizeof(palette)); + CHECK_WRITE(strm.putBytes( palette, sizeof(palette))); } width *= channels; for( int y = height - 1; y >= 0; y-- ) { - strm.putBytes( img.ptr(y), width ); + CHECK_WRITE(strm.putBytes( img.ptr(y), width )); if( fileStep > width ) - strm.putBytes( zeropad, fileStep - width ); + { + CHECK_WRITE(strm.putBytes( zeropad, fileStep - width )); + } } strm.close(); diff --git a/modules/imgcodecs/src/grfmt_pfm.cpp b/modules/imgcodecs/src/grfmt_pfm.cpp index b213d18fde..61cab06714 100644 --- a/modules/imgcodecs/src/grfmt_pfm.cpp +++ b/modules/imgcodecs/src/grfmt_pfm.cpp @@ -64,11 +64,11 @@ T read_number(cv::RLByteStream& strm) return atoT(str); } -template void write_anything(cv::WLByteStream& strm, const T& t) +template bool write_anything(cv::WLByteStream& strm, const T& t) { std::ostringstream ss; ss << t; - strm.putBytes(ss.str().c_str(), static_cast(ss.str().size())); + return strm.putBytes(ss.str().c_str(), static_cast(ss.str().size())); } } @@ -206,33 +206,33 @@ bool PFMEncoder::write(const Mat& img, const std::vector& params) } Mat float_img; - strm.putByte('P'); + CHECK_WRITE(strm.putByte('P')); switch (img.channels()) { case 1: - strm.putByte('f'); + CHECK_WRITE(strm.putByte('f')); img.convertTo(float_img, CV_32FC1); break; case 3: - strm.putByte('F'); + CHECK_WRITE(strm.putByte('F')); img.convertTo(float_img, CV_32FC3); break; default: CV_Error(Error::StsBadArg, "Expected 1 or 3 channel image."); } - strm.putByte('\n'); + CHECK_WRITE(strm.putByte('\n')); - write_anything(strm, float_img.cols); - strm.putByte(' '); - write_anything(strm, float_img.rows); - strm.putByte('\n'); + CHECK_WRITE(write_anything(strm, float_img.cols)); + CHECK_WRITE(strm.putByte(' ')); + CHECK_WRITE(write_anything(strm, float_img.rows)); + CHECK_WRITE(strm.putByte('\n')); #ifdef WORDS_BIGENDIAN - write_anything(strm, 1.0); + CHECK_WRITE(write_anything(strm, 1.0)); #else - write_anything(strm, -1.0); + CHECK_WRITE(write_anything(strm, -1.0)); #endif - strm.putByte('\n'); + CHECK_WRITE(strm.putByte('\n')); // Comments are not officially supported in this file format. // write_anything(strm, "# Generated by OpenCV " CV_VERSION "\n"); @@ -248,17 +248,15 @@ bool PFMEncoder::write(const Mat& img, const std::vector& params) rgb_row[x*3+1] = bgr_row[x*3+1]; rgb_row[x*3+2] = bgr_row[x*3+0]; } - strm.putBytes( reinterpret_cast(rgb_row.data()), - static_cast(sizeof(float) * row_size) ); + CHECK_WRITE(strm.putBytes( reinterpret_cast(rgb_row.data()), + static_cast(sizeof(float) * row_size) )); } else if (float_img.channels() == 1) { - strm.putBytes(float_img.ptr(y), sizeof(float) * float_img.cols); + CHECK_WRITE(strm.putBytes(float_img.ptr(y), sizeof(float) * float_img.cols)); } } return true; } - } - #endif // HAVE_IMGCODEC_PFM diff --git a/modules/imgcodecs/src/grfmt_pxm.cpp b/modules/imgcodecs/src/grfmt_pxm.cpp index d2ce60c743..20c815e833 100644 --- a/modules/imgcodecs/src/grfmt_pxm.cpp +++ b/modules/imgcodecs/src/grfmt_pxm.cpp @@ -479,7 +479,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector& params) header_sz += sz; } - strm.putBytes(buffer, header_sz); + CHECK_WRITE(strm.putBytes(buffer, header_sz)); for( y = 0; y < height; y++ ) { @@ -512,7 +512,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector& params) { *ptr++ = byte; } - strm.putBytes(buffer, (int)(ptr - buffer)); + CHECK_WRITE(strm.putBytes(buffer, (int)(ptr - buffer))); continue; } @@ -539,7 +539,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector& params) } } - strm.putBytes( (channels > 1 || depth > 8) ? buffer : (const char*)data, fileStep); + CHECK_WRITE(strm.putBytes( (channels > 1 || depth > 8) ? buffer : (const char*)data, fileStep)); } else { @@ -610,7 +610,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector& params) *ptr++ = '\n'; - strm.putBytes( buffer, (int)(ptr - buffer) ); + CHECK_WRITE(strm.putBytes( buffer, (int)(ptr - buffer) )); } } diff --git a/modules/imgcodecs/src/grfmt_sunras.cpp b/modules/imgcodecs/src/grfmt_sunras.cpp index 798f295376..852e735477 100644 --- a/modules/imgcodecs/src/grfmt_sunras.cpp +++ b/modules/imgcodecs/src/grfmt_sunras.cpp @@ -410,17 +410,17 @@ bool SunRasterEncoder::write( const Mat& img, const std::vector& ) if( strm.open(m_filename) ) { - strm.putBytes( fmtSignSunRas, (int)strlen(fmtSignSunRas) ); - strm.putDWord( width ); - strm.putDWord( height ); - strm.putDWord( channels*8 ); - strm.putDWord( fileStep*height ); - strm.putDWord( RAS_STANDARD ); - strm.putDWord( RMT_NONE ); - strm.putDWord( 0 ); + CHECK_WRITE(strm.putBytes( fmtSignSunRas, (int)strlen(fmtSignSunRas) )); + CHECK_WRITE(strm.putDWord( width )); + CHECK_WRITE(strm.putDWord( height )); + CHECK_WRITE(strm.putDWord( channels*8 )); + CHECK_WRITE(strm.putDWord( fileStep*height )); + CHECK_WRITE(strm.putDWord( RAS_STANDARD )); + CHECK_WRITE(strm.putDWord( RMT_NONE )); + CHECK_WRITE(strm.putDWord( 0 )); for( y = 0; y < height; y++ ) - strm.putBytes( img.ptr(y), fileStep ); + CHECK_WRITE(strm.putBytes( img.ptr(y), fileStep )); strm.close(); result = true; diff --git a/modules/imgcodecs/src/grfmt_tiff.cpp b/modules/imgcodecs/src/grfmt_tiff.cpp index f68a6e5c0d..e2184663aa 100644 --- a/modules/imgcodecs/src/grfmt_tiff.cpp +++ b/modules/imgcodecs/src/grfmt_tiff.cpp @@ -1100,16 +1100,6 @@ bool TiffEncoder::isFormatSupported( int depth ) const return depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F; } -void TiffEncoder::writeTag( WLByteStream& strm, TiffTag tag, - TiffFieldType fieldType, - int count, int value ) -{ - strm.putWord( tag ); - strm.putWord( fieldType ); - strm.putDWord( count ); - strm.putDWord( value ); -} - class TiffEncoderBufHelper { public: diff --git a/modules/imgcodecs/src/grfmt_tiff.hpp b/modules/imgcodecs/src/grfmt_tiff.hpp index ee5bcb7018..0d1f511372 100644 --- a/modules/imgcodecs/src/grfmt_tiff.hpp +++ b/modules/imgcodecs/src/grfmt_tiff.hpp @@ -132,10 +132,6 @@ public: ImageEncoder newEncoder() const CV_OVERRIDE; protected: - void writeTag( WLByteStream& strm, TiffTag tag, - TiffFieldType fieldType, - int count, int value ); - bool writeLibTiff( const std::vector& img_vec, const std::vector& params ); bool write_32FC3_SGILOG(const Mat& img, void* tif); From 2db7f8e82764cc4cd26d529155a70499833a0fdf Mon Sep 17 00:00:00 2001 From: chacha21 Date: Thu, 1 Aug 2024 09:36:08 +0200 Subject: [PATCH 09/17] Adding getStdAllocator() to cv::cuda::GpuMat To be on par with `cv::Mat`, let's add `cv::cuda::GpuMat::getStdAllocator()` This is useful anyway, because when a user wants to use custom allocators, he might want to resort to the standard default allocator behaviour, not some other allocator that could have been set by `setDefaultAllocator()` --- modules/core/include/opencv2/core/cuda.hpp | 1 + modules/core/src/cuda/gpu_mat.cu | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp index 9d210ed7b5..6cd6711582 100644 --- a/modules/core/include/opencv2/core/cuda.hpp +++ b/modules/core/include/opencv2/core/cuda.hpp @@ -118,6 +118,7 @@ public: //! default allocator CV_WRAP static GpuMat::Allocator* defaultAllocator(); CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator); + CV_WRAP static GpuMat::Allocator* getStdAllocator(); //! default constructor CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator()); diff --git a/modules/core/src/cuda/gpu_mat.cu b/modules/core/src/cuda/gpu_mat.cu index c286f28eb0..a86888cac3 100644 --- a/modules/core/src/cuda/gpu_mat.cu +++ b/modules/core/src/cuda/gpu_mat.cu @@ -135,6 +135,7 @@ namespace DefaultAllocator cudaDefaultAllocator; GpuMat::Allocator* g_defaultAllocator = &cudaDefaultAllocator; + GpuMat::Allocator* g_stdAllocator = &cudaDefaultAllocator; } GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator() @@ -148,6 +149,12 @@ void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator) g_defaultAllocator = allocator; } +GpuMat::Allocator* cv::cuda::GpuMat::getStdAllocator() +{ + return g_stdAllocator; +} + + ///////////////////////////////////////////////////// /// create From f67d4852bf3845febbb28f330aa5365af65195ba Mon Sep 17 00:00:00 2001 From: chacha21 Date: Thu, 1 Aug 2024 10:00:31 +0200 Subject: [PATCH 10/17] Added no-imp placeholder when HAVE_CUDA is false --- modules/core/src/cuda_gpu_mat.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/core/src/cuda_gpu_mat.cpp b/modules/core/src/cuda_gpu_mat.cpp index a245b1a293..84b5b210d1 100644 --- a/modules/core/src/cuda_gpu_mat.cpp +++ b/modules/core/src/cuda_gpu_mat.cpp @@ -420,6 +420,11 @@ void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator) throw_no_cuda(); } +GpuMat::Allocator* cv::cuda::GpuMat::getStdAllocator() +{ + return 0; +} + void cv::cuda::GpuMat::create(int _rows, int _cols, int _type) { CV_UNUSED(_rows); From 35463e079c60df2d5622c762460dc054b938b305 Mon Sep 17 00:00:00 2001 From: Junyan721113 Date: Wed, 12 Jun 2024 16:01:43 +0800 Subject: [PATCH 11/17] feat: Part 1.5 - New Interfaces --- 3rdparty/ndsrvp/include/core.hpp | 2 +- 3rdparty/ndsrvp/include/imgproc.hpp | 45 +-- 3rdparty/ndsrvp/ndsrvp_hal.hpp | 5 +- 3rdparty/ndsrvp/src/cvutils.cpp | 78 +++++ 3rdparty/ndsrvp/src/cvutils.hpp | 108 ++++++ 3rdparty/ndsrvp/src/integral.cpp | 2 + 3rdparty/ndsrvp/src/remap.cpp | 188 +++++++++++ 3rdparty/ndsrvp/src/threshold.cpp | 147 ++++----- 3rdparty/ndsrvp/src/warpAffine.cpp | 174 +++------- 3rdparty/ndsrvp/src/warpPerspective.cpp | 208 ++++-------- CMakeLists.txt | 2 +- .../include/opencv2/imgproc/hal/hal.hpp | 8 + .../include/opencv2/imgproc/hal/interface.h | 6 + modules/imgproc/src/hal_replacement.hpp | 50 +++ modules/imgproc/src/imgwarp.cpp | 307 ++++++++++-------- 15 files changed, 821 insertions(+), 509 deletions(-) create mode 100644 3rdparty/ndsrvp/src/cvutils.cpp create mode 100644 3rdparty/ndsrvp/src/cvutils.hpp create mode 100644 3rdparty/ndsrvp/src/remap.cpp diff --git a/3rdparty/ndsrvp/include/core.hpp b/3rdparty/ndsrvp/include/core.hpp index 190a1b926b..ee57668539 100644 --- a/3rdparty/ndsrvp/include/core.hpp +++ b/3rdparty/ndsrvp/include/core.hpp @@ -1,6 +1,6 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. +// of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_NDSRVP_CORE_HPP #define OPENCV_NDSRVP_CORE_HPP diff --git a/3rdparty/ndsrvp/include/imgproc.hpp b/3rdparty/ndsrvp/include/imgproc.hpp index 3a572172a8..94104f0b71 100644 --- a/3rdparty/ndsrvp/include/imgproc.hpp +++ b/3rdparty/ndsrvp/include/imgproc.hpp @@ -1,18 +1,12 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. +// of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_NDSRVP_IMGPROC_HPP #define OPENCV_NDSRVP_IMGPROC_HPP namespace cv { -// ################ remap ################ - -void remap(InputArray _src, OutputArray _dst, - InputArray _map1, InputArray _map2, - int interpolation, int borderType, const Scalar& borderValue); - namespace ndsrvp { enum InterpolationMasks { @@ -36,23 +30,36 @@ int integral(int depth, int sdepth, int sqdepth, // ################ warpAffine ################ -int warpAffine(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[6], int interpolation, int borderType, const double borderValue[4]); +int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw); -#undef cv_hal_warpAffine -#define cv_hal_warpAffine (cv::ndsrvp::warpAffine) +#undef cv_hal_warpAffineBlocklineNN +#define cv_hal_warpAffineBlocklineNN (cv::ndsrvp::warpAffineBlocklineNN) + +int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw); + +#undef cv_hal_warpAffineBlockline +#define cv_hal_warpAffineBlockline (cv::ndsrvp::warpAffineBlockline) // ################ warpPerspective ################ -int warpPerspective(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[9], int interpolation, int borderType, const double borderValue[4]); +int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw); + +#undef cv_hal_warpPerspectiveBlocklineNN +#define cv_hal_warpPerspectiveBlocklineNN (cv::ndsrvp::warpPerspectiveBlocklineNN) + +int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw); + +#undef cv_hal_warpPerspectiveBlockline +#define cv_hal_warpPerspectiveBlockline (cv::ndsrvp::warpPerspectiveBlockline) + +// ################ remap ################ + +int remap32f(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, + uchar *dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step, + float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4]); -#undef cv_hal_warpPerspective -#define cv_hal_warpPerspective (cv::ndsrvp::warpPerspective) +#undef cv_hal_remap32f +#define cv_hal_remap32f (cv::ndsrvp::remap32f) // ################ threshold ################ diff --git a/3rdparty/ndsrvp/ndsrvp_hal.hpp b/3rdparty/ndsrvp/ndsrvp_hal.hpp index 7f12636520..8ceac78db3 100644 --- a/3rdparty/ndsrvp/ndsrvp_hal.hpp +++ b/3rdparty/ndsrvp/ndsrvp_hal.hpp @@ -1,13 +1,14 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. +// of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_NDSRVP_HAL_HPP #define OPENCV_NDSRVP_HAL_HPP -#include "opencv2/core/mat.hpp" #include +#include "opencv2/core/hal/interface.h" + #include "include/core.hpp" #include "include/imgproc.hpp" #include "include/features2d.hpp" diff --git a/3rdparty/ndsrvp/src/cvutils.cpp b/3rdparty/ndsrvp/src/cvutils.cpp new file mode 100644 index 0000000000..48e025488f --- /dev/null +++ b/3rdparty/ndsrvp/src/cvutils.cpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "cvutils.hpp" + +namespace cv { + +namespace ndsrvp { + +// fastMalloc + +// [0][1][2][3][4][5][6][7][8][9] +// ^udata +// ^adata +// ^adata[-1] == udata + +void* fastMalloc(size_t size) +{ + uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN); + if(!udata) + ndsrvp_error(Error::StsNoMem, "fastMalloc(): Not enough memory"); + uchar** adata = (uchar**)align((size_t)((uchar**)udata + 1), CV_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +} + +void fastFree(void* ptr) +{ + if(ptr) + { + uchar* udata = ((uchar**)ptr)[-1]; + if(!(udata < (uchar*)ptr && ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*) + CV_MALLOC_ALIGN))) + ndsrvp_error(Error::StsBadArg, "fastFree(): Invalid memory block"); + free(udata); + } +} + +// borderInterpolate + +int borderInterpolate(int p, int len, int borderType) +{ + if( (unsigned)p < (unsigned)len ) + ; + else if( borderType == CV_HAL_BORDER_REPLICATE ) + p = p < 0 ? 0 : len - 1; + else if( borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101 ) + { + int delta = borderType == CV_HAL_BORDER_REFLECT_101; + if( len == 1 ) + return 0; + do + { + if( p < 0 ) + p = -p - 1 + delta; + else + p = len - 1 - (p - len) - delta; + } + while( (unsigned)p >= (unsigned)len ); + } + else if( borderType == CV_HAL_BORDER_WRAP ) + { + ndsrvp_assert(len > 0); + if( p < 0 ) + p -= ((p - len + 1) / len) * len; + if( p >= len ) + p %= len; + } + else if( borderType == CV_HAL_BORDER_CONSTANT ) + p = -1; + else + ndsrvp_error(Error::StsBadArg, "borderInterpolate(): Unknown/unsupported border type"); + return p; +} + +} // namespace ndsrvp + +} // namespace cv diff --git a/3rdparty/ndsrvp/src/cvutils.hpp b/3rdparty/ndsrvp/src/cvutils.hpp new file mode 100644 index 0000000000..8cf1476ed6 --- /dev/null +++ b/3rdparty/ndsrvp/src/cvutils.hpp @@ -0,0 +1,108 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_NDSRVP_CVUTILS_HPP +#define OPENCV_NDSRVP_CVUTILS_HPP + +#include + +#include "opencv2/core/hal/interface.h" + +#include +#include +#include +#include +#include +#include +#include + +// misc functions that not exposed to public interface + +namespace cv { + +namespace ndsrvp { + +void* fastMalloc(size_t size); +void fastFree(void* ptr); +int borderInterpolate(int p, int len, int borderType); + +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) +#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) + +#define CV_MALLOC_ALIGN 64 + +// error codes + +enum Error{ + StsNoMem = -4, + StsBadArg = -5, + StsAssert = -215 +}; + +// output error + +#define ndsrvp_assert(expr) { if(!(expr)) ndsrvp_error(Error::StsAssert, std::string(#expr)); } + +inline void ndsrvp_error(int code, std::string msg = "") +{ + std::cerr << "NDSRVP Error: code " << code << std::endl; + if(!msg.empty()) + std::cerr << msg << std::endl; + if(code < 0) + throw code; +} + +// clip & vclip + +inline int clip(int x, int a, int b) +{ + return x >= a ? (x < b ? x : b - 1) : a; +} + +inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b) +{ + return (int32x2_t)__nds__bpick((long)a, __nds__bpick((long)(b - 1), (long)x, (long)(x < b)), (long)(x >= a)); +} + +// saturate + +template static inline _Tp saturate_cast(int v) { return _Tp(v); } +template static inline _Tp saturate_cast(float v) { return _Tp(v); } +template static inline _Tp saturate_cast(double v) { return _Tp(v); } + +template<> inline uchar saturate_cast(int v) { return __nds__uclip32(v, 8); } +template<> inline uchar saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline uchar saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline char saturate_cast(int v) { return __nds__sclip32(v, 7); } +template<> inline char saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline char saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline ushort saturate_cast(int v) { return __nds__uclip32(v, 16); } +template<> inline ushort saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline ushort saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline short saturate_cast(int v) { return __nds__sclip32(v, 15); } +template<> inline short saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline short saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline int saturate_cast(float v) { return (int)lrintf(v); } +template<> inline int saturate_cast(double v) { return (int)lrint(v); } + +// align + +inline long align(size_t v, int n) +{ + return (v + n - 1) & -n; +} + +} // namespace ndsrvp + +} // namespace cv + +#endif diff --git a/3rdparty/ndsrvp/src/integral.cpp b/3rdparty/ndsrvp/src/integral.cpp index 37030a8d4c..e1dd993a90 100644 --- a/3rdparty/ndsrvp/src/integral.cpp +++ b/3rdparty/ndsrvp/src/integral.cpp @@ -3,6 +3,8 @@ // of this distribution and at http://opencv.org/license.html. #include "ndsrvp_hal.hpp" +#include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { diff --git a/3rdparty/ndsrvp/src/remap.cpp b/3rdparty/ndsrvp/src/remap.cpp new file mode 100644 index 0000000000..30e4d218e3 --- /dev/null +++ b/3rdparty/ndsrvp/src/remap.cpp @@ -0,0 +1,188 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "ndsrvp_hal.hpp" +#include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" + +namespace cv { + +namespace ndsrvp { + +int remap32f(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height, + uchar* dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step, + float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4]) +{ + const bool isRelative = ((interpolation & CV_HAL_WARP_RELATIVE_MAP) != 0); + interpolation &= ~CV_HAL_WARP_RELATIVE_MAP; + + if( interpolation == CV_HAL_INTER_AREA ) + interpolation = CV_HAL_INTER_LINEAR; + + if( interpolation != CV_HAL_INTER_NEAREST ) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + // only CV_8U + if( (src_type & CV_MAT_DEPTH_MASK) != CV_8U ) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + int cn = CV_MAT_CN(src_type); + + src_step /= sizeof(uchar); + dst_step /= sizeof(uchar); + + // mapping CV_32FC1 + mapx_step /= sizeof(float); + mapy_step /= sizeof(float); + + // border + uchar border_const[CV_CN_MAX]; + for( int k = 0; k < CV_CN_MAX; k++ ) + border_const[k] = saturate_cast(border_value[k & 3]); + + // divide into blocks + const int BLOCK_SIZE = 1024; + int x, y, x1, y1; + std::array aXY; + short* XY = aXY.data(); + size_t XY_step = BLOCK_SIZE * 2; + + // vectorize + const int32x2_t src_wh = {src_width, src_height}; + const int32x2_t arr_index = {cn, (int)src_step}; + + for (y = 0; y < dst_height; y += BLOCK_SIZE) + { + int dy = std::min(BLOCK_SIZE, dst_height - y); + for (x = 0; x < dst_width; x += BLOCK_SIZE) + { + const int off_y = isRelative ? y : 0; + const int off_x = isRelative ? x : 0; + const int32x2_t voff = {off_x, off_y}; + + int dx = std::min(BLOCK_SIZE, dst_width - x); + // prepare mapping data XY + for (y1 = 0; y1 < dy; y1++) + { + short* rXY = XY + y1 * XY_step; + const float* sX = mapx + (y + y1) * mapx_step + x; + const float* sY = mapy + (y + y1) * mapy_step + x; + for (x1 = 0; x1 < dx; x1++) + { + rXY[x1 * 2] = saturate_cast(sX[x1]); + rXY[x1 * 2 + 1] = saturate_cast(sY[x1]); + } + } + + // precalulate offset + if(isRelative) + { + int16x8_t voff_x; + int16x8_t voff_y = {0, 0, 1, 0, 2, 0, 3, 0}; + int16x8_t vones_x = {4, 0, 4, 0, 4, 0, 4, 0}; + int16x8_t vones_y = {0, 1, 0, 1, 0, 1, 0, 1}; + for(y1 = 0; y1 < BLOCK_SIZE; y1++, voff_y += vones_y) + { + int16x8_t* vrXY = (int16x8_t*)(XY + y1 * XY_step); + for(x1 = 0, voff_x = voff_y; x1 < BLOCK_SIZE; x1 += 4, vrXY++, voff_x += vones_x) + { + *vrXY += voff_x; + } + } + } + + // process the block + for( y1 = 0; y1 < dy; y1++ ) + { + uchar* dst_row = dst_data + (y + y1) * dst_step + x * cn; + const short* rXY = XY + y1 * XY_step; + if( cn == 1 ) + { + for( x1 = 0; x1 < dx; x1++ ) + { + int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff; + if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 ) + dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)]; + else + { + if( border_type == CV_HAL_BORDER_REPLICATE ) + { + vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh); + dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)]; + } + else if( border_type == CV_HAL_BORDER_CONSTANT ) + dst_row[x1] = border_const[0]; + else if( border_type != CV_HAL_BORDER_TRANSPARENT ) + { + vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type); + vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type); + dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)]; + } + } + } + } + else + { + uchar* dst_ptr = dst_row; + for(x1 = 0; x1 < dx; x1++, dst_ptr += cn ) + { + int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff; + const uchar *src_ptr; + if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 ) + { + if( cn == 3 ) + { + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2]; + // performance loss, commented out + // *(unsigned*)dst_ptr = __nds__bpick(*(unsigned*)dst_ptr, *(unsigned*)src_ptr, 0xFF000000); + } + else if( cn == 4 ) + { + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + *(uint8x4_t*)dst_ptr = *(uint8x4_t*)src_ptr; + } + else + { + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + int k = cn; + for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8) + *(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr; + while( k-- ) + dst_ptr[k] = src_ptr[k]; + } + } + else if( border_type != CV_HAL_BORDER_TRANSPARENT ) + { + if( border_type == CV_HAL_BORDER_REPLICATE ) + { + vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh); + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + } + else if( border_type == CV_HAL_BORDER_CONSTANT ) + src_ptr = &border_const[0]; + else + { + vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type); + vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type); + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + } + int k = cn; + for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8) + *(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr; + while( k-- ) + dst_ptr[k] = src_ptr[k]; + } + } + } + } + } + } + + return CV_HAL_ERROR_OK; +} + +} // namespace ndsrvp + +} // namespace cv diff --git a/3rdparty/ndsrvp/src/threshold.cpp b/3rdparty/ndsrvp/src/threshold.cpp index 06de591fef..0812100311 100644 --- a/3rdparty/ndsrvp/src/threshold.cpp +++ b/3rdparty/ndsrvp/src/threshold.cpp @@ -4,65 +4,44 @@ #include "ndsrvp_hal.hpp" #include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { namespace ndsrvp { template -class operators_threshold_t { -public: - virtual ~operators_threshold_t() {}; - virtual inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) - { - (void)src; - (void)thresh; - (void)maxval; - CV_Error(cv::Error::StsBadArg, ""); - return vtype(); - } - virtual inline type scalar(const type& src, const type& thresh, const type& maxval) - { - (void)src; - (void)thresh; - (void)maxval; - CV_Error(cv::Error::StsBadArg, ""); - return type(); - } -}; - -template -class opThreshBinary : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshBinary_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { return (vtype)__nds__bpick((long)maxval, (long)0, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { return src > thresh ? maxval : 0; } }; template -class opThreshBinaryInv : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshBinaryInv_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { return (vtype)__nds__bpick((long)0, (long)maxval, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { return src > thresh ? 0 : maxval; } }; template -class opThreshTrunc : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshTrunc_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { (void)maxval; return (vtype)__nds__bpick((long)thresh, (long)src, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { (void)maxval; return src > thresh ? thresh : src; @@ -70,13 +49,13 @@ class opThreshTrunc : public operators_threshold_t { }; template -class opThreshToZero : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshToZero_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { (void)maxval; return (vtype)__nds__bpick((long)src, (long)0, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { (void)maxval; return src > thresh ? src : 0; @@ -84,29 +63,36 @@ class opThreshToZero : public operators_threshold_t { }; template -class opThreshToZeroInv : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshToZeroInv_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { (void)maxval; return (vtype)__nds__bpick((long)0, (long)src, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { (void)maxval; return src > thresh ? 0 : src; } }; -template -static void threshold_op(const type* src_data, size_t src_step, - type* dst_data, size_t dst_step, +template typename opThresh_t> +static inline void threshold_op(const uchar* src, size_t src_step, + uchar* dst, size_t dst_step, int width, int height, int cn, - type thresh, type maxval, int thtype) + double thresh_d, double maxval_d) { int i, j; width *= cn; + + type* src_data = (type*)src; + type* dst_data = (type*)dst; src_step /= sizeof(type); dst_step /= sizeof(type); + + type thresh = saturate_cast(thresh_d); + type maxval = saturate_cast(maxval_d); vtype vthresh; vtype vmaxval; for (i = 0; i < nlane; i++) { @@ -114,62 +100,63 @@ static void threshold_op(const type* src_data, size_t src_step, vmaxval[i] = maxval; } - operators_threshold_t* op; - switch (thtype) { - case CV_HAL_THRESH_BINARY: - op = new opThreshBinary(); - break; - case CV_HAL_THRESH_BINARY_INV: - op = new opThreshBinaryInv(); - break; - case CV_HAL_THRESH_TRUNC: - op = new opThreshTrunc(); - break; - case CV_HAL_THRESH_TOZERO: - op = new opThreshToZero(); - break; - case CV_HAL_THRESH_TOZERO_INV: - op = new opThreshToZeroInv(); - break; - default: - CV_Error(cv::Error::StsBadArg, ""); - return; - } + opThresh_t opThresh; for (i = 0; i < height; i++, src_data += src_step, dst_data += dst_step) { for (j = 0; j <= width - nlane; j += nlane) { - vtype vs = *(vtype*)(src_data + j); - *(vtype*)(dst_data + j) = op->vector(vs, vthresh, vmaxval); + *(vtype*)(dst_data + j) = opThresh.vector(*(vtype*)(src_data + j), vthresh, vmaxval); } for (; j < width; j++) { - dst_data[j] = op->scalar(src_data[j], thresh, maxval); + dst_data[j] = opThresh.scalar(src_data[j], thresh, maxval); } } - delete op; return; } +typedef void (*ThreshFunc)(const uchar* src_data, size_t src_step, + uchar* dst_data, size_t dst_step, + int width, int height, int cn, + double thresh, double maxval); + int threshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType) { - if (width <= 255 && height <= 255) // slower at small size - return CV_HAL_ERROR_NOT_IMPLEMENTED; - if (depth == CV_8U) { - threshold_op((uchar*)src_data, src_step, (uchar*)dst_data, dst_step, width, height, cn, (uchar)thresh, (uchar)maxValue, thresholdType); - return CV_HAL_ERROR_OK; - } else if (depth == CV_16S) { - threshold_op((short*)src_data, src_step, (short*)dst_data, dst_step, width, height, cn, (short)thresh, (short)maxValue, thresholdType); - return CV_HAL_ERROR_OK; - } else if (depth == CV_16U) { - threshold_op((ushort*)src_data, src_step, (ushort*)dst_data, dst_step, width, height, cn, (ushort)thresh, (ushort)maxValue, thresholdType); - return CV_HAL_ERROR_OK; - } else { + static ThreshFunc thfuncs[4][5] = + { + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op }, + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op }, + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op }, + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op } + }; + + if(depth < 0 || depth > 3 || thresholdType < 0 || thresholdType > 4 || (width < 256 && height < 256)) return CV_HAL_ERROR_NOT_IMPLEMENTED; - } - return CV_HAL_ERROR_NOT_IMPLEMENTED; + + thfuncs[depth][thresholdType](src_data, src_step, dst_data, dst_step, width, height, cn, thresh, maxValue); + return CV_HAL_ERROR_OK; } } // namespace ndsrvp diff --git a/3rdparty/ndsrvp/src/warpAffine.cpp b/3rdparty/ndsrvp/src/warpAffine.cpp index d54e4dc237..4257361d1d 100644 --- a/3rdparty/ndsrvp/src/warpAffine.cpp +++ b/3rdparty/ndsrvp/src/warpAffine.cpp @@ -3,148 +3,68 @@ // of this distribution and at http://opencv.org/license.html. #include "ndsrvp_hal.hpp" -#include "opencv2/core.hpp" #include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { namespace ndsrvp { -class WarpAffineInvoker : public ParallelLoopBody { -public: - WarpAffineInvoker(const Mat& _src, Mat& _dst, int _interpolation, int _borderType, - const Scalar& _borderValue, int* _adelta, int* _bdelta, const double* _M) - : ParallelLoopBody() - , src(_src) - , dst(_dst) - , interpolation(_interpolation) - , borderType(_borderType) - , borderValue(_borderValue) - , adelta(_adelta) - , bdelta(_bdelta) - , M(_M) - { +int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) +{ + const int AB_BITS = MAX(10, (int)INTER_BITS); + int x1 = 0; + + for (; x1 < bw; x1 += 2) { + int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] }; + int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] }; + + vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15); + vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); } - virtual void operator()(const Range& range) const CV_OVERRIDE - { - const int BLOCK_SZ = 64; - AutoBuffer __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ); - short *XY = __XY.data(), *A = __A.data(); - const int AB_BITS = MAX(10, (int)INTER_BITS); - const int AB_SCALE = 1 << AB_BITS; - int round_delta = interpolation == CV_HAL_INTER_NEAREST ? AB_SCALE / 2 : AB_SCALE / INTER_TAB_SIZE / 2, x, y, x1, y1; - - int bh0 = std::min(BLOCK_SZ / 2, dst.rows); - int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, dst.cols); - bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, dst.rows); - - for (y = range.start; y < range.end; y += bh0) { - for (x = 0; x < dst.cols; x += bw0) { - int bw = std::min(bw0, dst.cols - x); - int bh = std::min(bh0, range.end - y); - - Mat _XY(bh, bw, CV_16SC2, XY); - Mat dpart(dst, Rect(x, y, bw, bh)); - - for (y1 = 0; y1 < bh; y1++) { - short* xy = XY + y1 * bw * 2; - int X0 = saturate_cast((M[1] * (y + y1) + M[2]) * AB_SCALE) + round_delta; - int Y0 = saturate_cast((M[4] * (y + y1) + M[5]) * AB_SCALE) + round_delta; - - if (interpolation == CV_HAL_INTER_NEAREST) { - x1 = 0; - - for (; x1 < bw; x1 += 2) { - int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] }; - int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] }; - - vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15); - vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); - } - - for (; x1 < bw; x1++) { - int X = (X0 + adelta[x + x1]) >> AB_BITS; - int Y = (Y0 + bdelta[x + x1]) >> AB_BITS; - xy[x1 * 2] = saturate_cast(X); - xy[x1 * 2 + 1] = saturate_cast(Y); - } - } else { - short* alpha = A + y1 * bw; - x1 = 0; - - const int INTER_MASK = INTER_TAB_SIZE - 1; - const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; - for (; x1 < bw; x1 += 2) { - int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] }; - int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] }; - vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS)); - vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS)); - - int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); - int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); - - uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); - *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; - } - - for (; x1 < bw; x1++) { - int X = (X0 + adelta[x + x1]) >> (AB_BITS - INTER_BITS); - int Y = (Y0 + bdelta[x + x1]) >> (AB_BITS - INTER_BITS); - xy[x1 * 2] = saturate_cast(X >> INTER_BITS); - xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1))); - } - } - } - - if (interpolation == CV_HAL_INTER_NEAREST) - remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue); - else { - Mat _matA(bh, bw, CV_16U, A); - remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue); - } - } - } + for (; x1 < bw; x1++) { + int X = X0 + adelta[x1]; + int Y = Y0 + bdelta[x1]; + xy[x1 * 2] = saturate_cast(X); + xy[x1 * 2 + 1] = saturate_cast(Y); } -private: - Mat src; - Mat dst; - int interpolation, borderType; - Scalar borderValue; - int *adelta, *bdelta; - const double* M; -}; - -int warpAffine(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[6], int interpolation, int borderType, const double borderValue[4]) -{ - Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); - Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); + return CV_HAL_ERROR_OK; +} - int x; - AutoBuffer _abdelta(dst.cols * 2); - int *adelta = &_abdelta[0], *bdelta = adelta + dst.cols; +int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) +{ const int AB_BITS = MAX(10, (int)INTER_BITS); - const int AB_SCALE = 1 << AB_BITS; + int x1 = 0; + + const int INTER_MASK = INTER_TAB_SIZE - 1; + const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; + for (; x1 < bw; x1 += 2) { + int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] }; + int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] }; + vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS)); + vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS)); + + int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); + int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); + + uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); + *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; + } - for (x = 0; x < dst.cols; x++) { - adelta[x] = saturate_cast(M[0] * x * AB_SCALE); - bdelta[x] = saturate_cast(M[3] * x * AB_SCALE); + for (; x1 < bw; x1++) { + int X = X0 + adelta[x1]; + int Y = Y0 + bdelta[x1]; + xy[x1 * 2] = saturate_cast(X >> INTER_BITS); + xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK)); } - Range range(0, dst.rows); - WarpAffineInvoker invoker(src, dst, interpolation, borderType, - Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]), - adelta, bdelta, M); - parallel_for_(range, invoker, dst.total() / (double)(1 << 16)); return CV_HAL_ERROR_OK; } diff --git a/3rdparty/ndsrvp/src/warpPerspective.cpp b/3rdparty/ndsrvp/src/warpPerspective.cpp index b4fa423ed7..40e44729d9 100644 --- a/3rdparty/ndsrvp/src/warpPerspective.cpp +++ b/3rdparty/ndsrvp/src/warpPerspective.cpp @@ -3,154 +3,90 @@ // of this distribution and at http://opencv.org/license.html. #include "ndsrvp_hal.hpp" -#include "opencv2/core.hpp" #include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { namespace ndsrvp { -class WarpPerspectiveInvoker : public ParallelLoopBody { -public: - WarpPerspectiveInvoker(const Mat& _src, Mat& _dst, const double* _M, int _interpolation, - int _borderType, const Scalar& _borderValue) - : ParallelLoopBody() - , src(_src) - , dst(_dst) - , M(_M) - , interpolation(_interpolation) - , borderType(_borderType) - , borderValue(_borderValue) - { +int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) +{ + int x1 = 0; + + for (; x1 < bw; x1 += 2) { + double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; + W1 = W1 ? 1. / W1 : 0; + W2 = W2 ? 1. / W2 : 0; + double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); + double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); + double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); + double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); + + int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; + int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; + + vX = __nds__v_sclip32(vX, 15); + vY = __nds__v_sclip32(vY, 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); } - virtual void operator()(const Range& range) const CV_OVERRIDE - { - const int BLOCK_SZ = 32; - short XY[BLOCK_SZ * BLOCK_SZ * 2], A[BLOCK_SZ * BLOCK_SZ]; - int x, y, y1, width = dst.cols, height = dst.rows; - - int bh0 = std::min(BLOCK_SZ / 2, height); - int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, width); - bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, height); - - for (y = range.start; y < range.end; y += bh0) { - for (x = 0; x < width; x += bw0) { - int bw = std::min(bw0, width - x); - int bh = std::min(bh0, range.end - y); // height - - Mat _XY(bh, bw, CV_16SC2, XY); - Mat dpart(dst, Rect(x, y, bw, bh)); - - for (y1 = 0; y1 < bh; y1++) { - short* xy = XY + y1 * bw * 2; - double X0 = M[0] * x + M[1] * (y + y1) + M[2]; - double Y0 = M[3] * x + M[4] * (y + y1) + M[5]; - double W0 = M[6] * x + M[7] * (y + y1) + M[8]; - - if (interpolation == CV_HAL_INTER_NEAREST) { - int x1 = 0; - - for (; x1 < bw; x1 += 2) { - double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; - W1 = W1 ? 1. / W1 : 0; - W2 = W2 ? 1. / W2 : 0; - double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); - double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); - double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); - double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); - - int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; - int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; - - vX = __nds__v_sclip32(vX, 15); - vY = __nds__v_sclip32(vY, 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); - } - - for (; x1 < bw; x1++) { - double W = W0 + M[6] * x1; - W = W ? 1. / W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1 * 2] = saturate_cast(X); - xy[x1 * 2 + 1] = saturate_cast(Y); - } - } else { - short* alpha = A + y1 * bw; - int x1 = 0; - - const int INTER_MASK = INTER_TAB_SIZE - 1; - const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; - for (; x1 < bw; x1 += 2) { - double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; - W1 = W1 ? INTER_TAB_SIZE / W1 : 0; - W2 = W2 ? INTER_TAB_SIZE / W2 : 0; - double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); - double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); - double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); - double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); - - int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; - int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; - - int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); - int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); - - uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); - *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; - } - - for (; x1 < bw; x1++) { - double W = W0 + M[6] * x1; - W = W ? INTER_TAB_SIZE / W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1 * 2] = saturate_cast(X >> INTER_BITS); - xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1))); - } - } - } - - if (interpolation == CV_HAL_INTER_NEAREST) - remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue); - else { - Mat _matA(bh, bw, CV_16U, A); - remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue); - } - } - } + for (; x1 < bw; x1++) { + double W = W0 + M[6] * x1; + W = W ? 1. / W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1 * 2] = saturate_cast(X); + xy[x1 * 2 + 1] = saturate_cast(Y); } -private: - Mat src; - Mat dst; - const double* M; - int interpolation, borderType; - Scalar borderValue; -}; - -int warpPerspective(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[9], int interpolation, int borderType, const double borderValue[4]) + return CV_HAL_ERROR_OK; +} + +int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { - Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); - Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); + int x1 = 0; + + const int INTER_MASK = INTER_TAB_SIZE - 1; + const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; + for (; x1 < bw; x1 += 2) { + double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; + W1 = W1 ? INTER_TAB_SIZE / W1 : 0; + W2 = W2 ? INTER_TAB_SIZE / W2 : 0; + double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); + double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); + double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); + double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); + + int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; + int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; + + int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); + int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); + + uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); + *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; + } + + for (; x1 < bw; x1++) { + double W = W0 + M[6] * x1; + W = W ? INTER_TAB_SIZE / W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1 * 2] = saturate_cast(X >> INTER_BITS); + xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK)); + } - Range range(0, dst.rows); - WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3])); - parallel_for_(range, invoker, dst.total() / (double)(1 << 16)); return CV_HAL_ERROR_OK; } diff --git a/CMakeLists.txt b/CMakeLists.txt index 0693731a8b..29d05cd86b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1040,7 +1040,7 @@ foreach(hal ${OpenCV_HAL}) ocv_hal_register(NDSRVP_HAL_LIBRARIES NDSRVP_HAL_HEADERS NDSRVP_HAL_INCLUDE_DIRS) list(APPEND OpenCV_USED_HAL "ndsrvp (ver ${NDSRVP_HAL_VERSION})") else() - message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not open, disabling ndsrvp...") + message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not enabled, disabling ndsrvp...") endif() elseif(hal STREQUAL "halrvv") if(";${CPU_BASELINE_FINAL};" MATCHES ";RVV;") diff --git a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp index 48851ece07..d4b0f3fbb9 100644 --- a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp +++ b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp @@ -108,11 +108,19 @@ CV_EXPORTS void warpAffine(int src_type, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]); +CV_EXPORTS void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw); + +CV_EXPORTS void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw); + CV_EXPORTS void warpPerspective(int src_type, const uchar * src_data, size_t src_step, int src_width, int src_height, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]); +CV_EXPORTS void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw); + +CV_EXPORTS void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw); + CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, diff --git a/modules/imgproc/include/opencv2/imgproc/hal/interface.h b/modules/imgproc/include/opencv2/imgproc/hal/interface.h index f8dbcfe791..8e485b9fca 100644 --- a/modules/imgproc/include/opencv2/imgproc/hal/interface.h +++ b/modules/imgproc/include/opencv2/imgproc/hal/interface.h @@ -12,6 +12,12 @@ #define CV_HAL_INTER_CUBIC 2 #define CV_HAL_INTER_AREA 3 #define CV_HAL_INTER_LANCZOS4 4 +#define CV_HAL_INTER_LINEAR_EXACT 5 +#define CV_HAL_INTER_NEAREST_EXACT 6 +#define CV_HAL_INTER_MAX 7 +#define CV_HAL_WARP_FILL_OUTLIERS 8 +#define CV_HAL_WARP_INVERSE_MAP 16 +#define CV_HAL_WARP_RELATIVE_MAP 32 //! @} //! @name Morphology operations diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index 773fed9b48..ceb6c8b0f6 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp @@ -273,6 +273,29 @@ inline int hal_ni_resize(int src_type, const uchar *src_data, size_t src_step, i @sa cv::warpAffine, cv::hal::warpAffine */ inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpAffineBlocklineNN doing a row of affine transformation + @param adelta input M0 * x array + @param bdelta input M3 * x array + @param xy output (x', y') coordinates + @param X0 input M1 * y + M2 value + @param Y0 input M4 * y + M5 value + @param bw length of the row + @sa cv::warpAffineBlocklineNN, cv::hal::warpAffineBlocklineNN + */ +inline int hal_ni_warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpAffineBlockline doing a row of affine transformation + @param adelta input M0 * x array + @param bdelta input M3 * x array + @param xy output (x', y') coordinates + @param alpha output least significant bits of the (x', y') coordinates for interpolation + @param X0 input M1 * y + M2 value + @param Y0 input M4 * y + M5 value + @param bw length of the row + @sa cv::warpAffineBlockline, cv::hal::warpAffineBlockline + */ +inline int hal_ni_warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } /** @brief hal_warpPerspective @param src_type source and destination image type @@ -291,11 +314,38 @@ inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_ste @sa cv::warpPerspective, cv::hal::warpPerspective */ inline int hal_ni_warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpPerspectiveBlocklineNN doing a row of perspective transformation + @param M 3x3 matrix with transform coefficients + @param xy output (x', y') coordinates + @param X0 input M0 * x0 + M1 * y + M2 value + @param Y0 input M3 * x0 + M4 * y + M5 value + @param W0 input M6 * x0 + M7 * y + M8 value + @param bw length of the row + @sa cv::warpPerspectiveBlocklineNN, cv::hal::warpPerspectiveBlocklineNN + */ +inline int hal_ni_warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpPerspectiveBlockline doing a row of perspective transformation + @param M 3x3 matrix with transform coefficients + @param xy output (x', y') coordinates + @param alpha output least significant bits of the (x', y') coordinates for interpolation + @param X0 input M0 * x0 + M1 * y + M2 value + @param Y0 input M3 * x0 + M4 * y + M5 value + @param W0 input M6 * x0 + M7 * y + M8 value + @param bw length of the row + @sa cv::warpPerspectiveBlockline, cv::hal::warpPerspectiveBlockline + */ +inline int hal_ni_warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } //! @cond IGNORED #define cv_hal_resize hal_ni_resize #define cv_hal_warpAffine hal_ni_warpAffine +#define cv_hal_warpAffineBlocklineNN hal_ni_warpAffineBlocklineNN +#define cv_hal_warpAffineBlockline hal_ni_warpAffineBlockline #define cv_hal_warpPerspective hal_ni_warpPerspective +#define cv_hal_warpPerspectiveBlocklineNN hal_ni_warpPerspectiveBlocklineNN +#define cv_hal_warpPerspectiveBlockline hal_ni_warpPerspectiveBlockline //! @endcond /** diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index c0eaf8114c..4e4d718da3 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2268,16 +2268,7 @@ public: short *XY = __XY.data(), *A = __A.data(); const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_SCALE = 1 << AB_BITS; - int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1; - #if CV_TRY_AVX2 - bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2; - #endif - #if CV_TRY_SSE4_1 - bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1; - #endif - #if CV_TRY_LASX - bool useLASX = CV_CPU_HAS_SUPPORT_LASX; - #endif + int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, y1; int bh0 = std::min(BLOCK_SZ/2, dst.rows); int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols); @@ -2300,84 +2291,9 @@ public: int Y0 = saturate_cast((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta; if( interpolation == INTER_NEAREST ) - { - x1 = 0; - #if CV_TRY_SSE4_1 - if( useSSE4_1 ) - opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta + x, bdelta + x, xy, X0, Y0, bw); - else - #endif - { - #if CV_SIMD128 - { - v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0); - int span = VTraits::vlanes(); - for( ; x1 <= bw - span; x1 += span ) - { - v_int16x8 v_dst[2]; - #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr(v_add(shift,v_load(ptr + offset))),\ - v_shr(v_add(shift,v_load(ptr + offset + 4)))) - v_dst[0] = CV_CONVERT_MAP(adelta, x+x1, v_X0); - v_dst[1] = CV_CONVERT_MAP(bdelta, x+x1, v_Y0); - #undef CV_CONVERT_MAP - v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]); - } - } - #endif - for( ; x1 < bw; x1++ ) - { - int X = (X0 + adelta[x+x1]) >> AB_BITS; - int Y = (Y0 + bdelta[x+x1]) >> AB_BITS; - xy[x1*2] = saturate_cast(X); - xy[x1*2+1] = saturate_cast(Y); - } - } - } + hal::warpAffineBlocklineNN(adelta + x, bdelta + x, xy, X0, Y0, bw); else - { - short* alpha = A + y1*bw; - x1 = 0; - #if CV_TRY_AVX2 - if ( useAVX2 ) - x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw); - #endif - #if CV_TRY_LASX - if ( useLASX ) - x1 = opt_LASX::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw); - #endif - #if CV_SIMD128 - { - v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0); - v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1); - int span = VTraits::vlanes(); - for( ; x1 <= bw - span * 2; x1 += span * 2 ) - { - v_int32x4 v_X0 = v_shr(v_add(v__X0, v_load(this->adelta + x + x1))); - v_int32x4 v_Y0 = v_shr(v_add(v__Y0, v_load(this->bdelta + x + x1))); - v_int32x4 v_X1 = v_shr(v_add(v__X0, v_load(this->adelta + x + x1 + span))); - v_int32x4 v_Y1 = v_shr(v_add(v__Y0, v_load(this->bdelta + x + x1 + span))); - - v_int16x8 v_xy[2]; - v_xy[0] = v_pack(v_shr(v_X0), v_shr(v_X1)); - v_xy[1] = v_pack(v_shr(v_Y0), v_shr(v_Y1)); - v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]); - - v_int32x4 v_alpha0 = v_or(v_shl(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask)); - v_int32x4 v_alpha1 = v_or(v_shl(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask)); - v_store(alpha + x1, v_pack(v_alpha0, v_alpha1)); - } - } - #endif - for( ; x1 < bw; x1++ ) - { - int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS); - int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS); - xy[x1*2] = saturate_cast(X >> INTER_BITS); - xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + - (X & (INTER_TAB_SIZE-1))); - } - } + hal::warpAffineBlockline(adelta + x, bdelta + x, xy, A + y1*bw, X0, Y0, bw); } if( interpolation == INTER_NEAREST ) @@ -2802,6 +2718,97 @@ void warpAffine(int src_type, parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) +{ + CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw); + + const int AB_BITS = MAX(10, (int)INTER_BITS); + int x1 = 0; + #if CV_TRY_SSE4_1 + bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1; + if( useSSE4_1 ) + opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw); + else + #endif + { + #if CV_SIMD128 + { + v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0); + int span = VTraits::vlanes(); + for( ; x1 <= bw - span; x1 += span ) + { + v_int16x8 v_dst[2]; + #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr(v_add(shift,v_load(ptr + offset))),\ + v_shr(v_add(shift,v_load(ptr + offset + 4)))) + v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0); + v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0); + #undef CV_CONVERT_MAP + v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]); + } + } + #endif + for( ; x1 < bw; x1++ ) + { + int X = (X0 + adelta[x1]) >> AB_BITS; + int Y = (Y0 + bdelta[x1]) >> AB_BITS; + xy[x1*2] = saturate_cast(X); + xy[x1*2+1] = saturate_cast(Y); + } + } +} + +void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) +{ + CALL_HAL(warpAffineBlockline, cv_hal_warpAffineBlockline, adelta, bdelta, xy, alpha, X0, Y0, bw); + + const int AB_BITS = MAX(10, (int)INTER_BITS); + int x1 = 0; + #if CV_TRY_AVX2 + bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2; + if ( useAVX2 ) + x1 = opt_AVX2::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw); + #endif + #if CV_TRY_LASX + bool useLASX = CV_CPU_HAS_SUPPORT_LASX; + if ( useLASX ) + x1 = opt_LASX::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw); + #endif + { + #if CV_SIMD128 + { + v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0); + v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1); + int span = VTraits::vlanes(); + for( ; x1 <= bw - span * 2; x1 += span * 2 ) + { + v_int32x4 v_X0 = v_shr(v_add(v__X0, v_load(adelta + x1))); + v_int32x4 v_Y0 = v_shr(v_add(v__Y0, v_load(bdelta + x1))); + v_int32x4 v_X1 = v_shr(v_add(v__X0, v_load(adelta + x1 + span))); + v_int32x4 v_Y1 = v_shr(v_add(v__Y0, v_load(bdelta + x1 + span))); + + v_int16x8 v_xy[2]; + v_xy[0] = v_pack(v_shr(v_X0), v_shr(v_X1)); + v_xy[1] = v_pack(v_shr(v_Y0), v_shr(v_Y1)); + v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]); + + v_int32x4 v_alpha0 = v_or(v_shl(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask)); + v_int32x4 v_alpha1 = v_or(v_shl(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask)); + v_store(alpha + x1, v_pack(v_alpha0, v_alpha1)); + } + } + #endif + for( ; x1 < bw; x1++ ) + { + int X = (X0 + adelta[x1]) >> (AB_BITS - INTER_BITS); + int Y = (Y0 + bdelta[x1]) >> (AB_BITS - INTER_BITS); + xy[x1*2] = saturate_cast(X >> INTER_BITS); + xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + + (X & (INTER_TAB_SIZE-1))); + } + } +} + } // hal:: } // cv:: @@ -3204,12 +3211,6 @@ public: int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width); bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height); - #if CV_TRY_SSE4_1 - Ptr pwarp_impl_sse4; - if(CV_CPU_HAS_SUPPORT_SSE4_1) - pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M); - #endif - for( y = range.start; y < range.end; y += bh0 ) { for( x = 0; x < width; x += bw0 ) @@ -3228,57 +3229,9 @@ public: double W0 = M[6]*x + M[7]*(y + y1) + M[8]; if( interpolation == INTER_NEAREST ) - { - #if CV_TRY_SSE4_1 - if (pwarp_impl_sse4) - pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw); - else - #endif - #if CV_SIMD128_64F - WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw); - #else - for( int x1 = 0; x1 < bw; x1++ ) - { - double W = W0 + M[6]*x1; - W = W ? 1./W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1*2] = saturate_cast(X); - xy[x1*2+1] = saturate_cast(Y); - } - #endif - } + hal::warpPerspectiveBlocklineNN(M, xy, X0, Y0, W0, bw); else - { - short* alpha = A + y1*bw; - - #if CV_TRY_SSE4_1 - if (pwarp_impl_sse4) - pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw); - else - #endif - #if CV_SIMD128_64F - WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw); - #else - for( int x1 = 0; x1 < bw; x1++ ) - { - double W = W0 + M[6]*x1; - W = W ? INTER_TAB_SIZE/W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1*2] = saturate_cast(X >> INTER_BITS); - xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + - (X & (INTER_TAB_SIZE-1))); - } - #endif - } + hal::warpPerspectiveBlockline(M, xy, A + y1*bw, X0, Y0, W0, bw); } if( interpolation == INTER_NEAREST ) @@ -3371,6 +3324,74 @@ void warpPerspective(int src_type, parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) +{ + CALL_HAL(warpPerspectiveBlocklineNN, cv_hal_warpPerspectiveBlocklineNN, M, xy, X0, Y0, W0, bw); + + #if CV_TRY_SSE4_1 + Ptr pwarp_impl_sse4; + if(CV_CPU_HAS_SUPPORT_SSE4_1) + pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M); + + if (pwarp_impl_sse4) + pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw); + else + #endif + { + #if CV_SIMD128_64F + WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw); + #else + for( int x1 = 0; x1 < bw; x1++ ) + { + double W = W0 + M[6]*x1; + W = W ? 1./W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1*2] = saturate_cast(X); + xy[x1*2+1] = saturate_cast(Y); + } + #endif + } +} + +void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) +{ + CALL_HAL(warpPerspectiveBlockline, cv_hal_warpPerspectiveBlockline, M, xy, alpha, X0, Y0, W0, bw); + + #if CV_TRY_SSE4_1 + Ptr pwarp_impl_sse4; + if(CV_CPU_HAS_SUPPORT_SSE4_1) + pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M); + + if (pwarp_impl_sse4) + pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw); + else + #endif + { + #if CV_SIMD128_64F + WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw); + #else + for( int x1 = 0; x1 < bw; x1++ ) + { + double W = W0 + M[6]*x1; + W = W ? INTER_TAB_SIZE/W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1*2] = saturate_cast(X >> INTER_BITS); + xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + + (X & (INTER_TAB_SIZE-1))); + } + #endif + } +} + } // hal:: } // cv:: From 340a390ea2a152aadf5866428e1457c51c2ec10b Mon Sep 17 00:00:00 2001 From: stepkamipt Date: Fri, 2 Aug 2024 10:07:36 +0200 Subject: [PATCH 12/17] Fix path to 3rdparty cmake. Current code using CMAKE_SOURCE_DIR and it works well if opencv is standalone CMake project, but in case of building OpenCV as part of a larger CMake project (e.g. one that includes opencv and opencv_contrib) this path is incorrect, unlike OpenCV_SOURCE_DIR --- modules/videoio/cmake/detect_obsensor.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/videoio/cmake/detect_obsensor.cmake b/modules/videoio/cmake/detect_obsensor.cmake index f0b66015eb..c7e6164c0f 100644 --- a/modules/videoio/cmake/detect_obsensor.cmake +++ b/modules/videoio/cmake/detect_obsensor.cmake @@ -1,7 +1,7 @@ # --- obsensor --- if(NOT HAVE_OBSENSOR) if(OBSENSOR_USE_ORBBEC_SDK) - include(${CMAKE_SOURCE_DIR}/3rdparty/orbbecsdk/orbbecsdk.cmake) + include("${OpenCV_SOURCE_DIR}/3rdparty/orbbecsdk/orbbecsdk.cmake") download_orbbec_sdk(ORBBEC_SDK_ROOT_DIR) message(STATUS "ORBBEC_SDK_ROOT_DIR: ${ORBBEC_SDK_ROOT_DIR}") if(ORBBEC_SDK_ROOT_DIR) From 796974cccc5c3e6dfcf6e21583cc7c62fa8e372b Mon Sep 17 00:00:00 2001 From: Aven Date: Sun, 4 Aug 2024 05:04:03 +0800 Subject: [PATCH 13/17] fix compilation errors caused by namespace related: #25199 --- modules/dnn/src/layers/elementwise_layers.cpp | 2 +- modules/dnn/src/layers/nary_eltwise_layers.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 6c06554d5f..770939710d 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -932,7 +932,7 @@ struct GeluFunctor : public BaseFunctor { #endif #ifdef HAVE_DNN_NGRAPH - std::shared_ptr initNgraphAPI(const ngraph::Output& node) + std::shared_ptr initNgraphAPI(const ov::Output& node) { return std::make_shared(node); } diff --git a/modules/dnn/src/layers/nary_eltwise_layers.cpp b/modules/dnn/src/layers/nary_eltwise_layers.cpp index 659e7e29a8..305070f9b8 100644 --- a/modules/dnn/src/layers/nary_eltwise_layers.cpp +++ b/modules/dnn/src/layers/nary_eltwise_layers.cpp @@ -1006,7 +1006,7 @@ public: // In case only one input if (inputs.size() == 1) { auto &ieInpNode = nodes[0].dynamicCast()->node; - ngraph::OutputVector inp{ieInpNode}; + ov::OutputVector inp{ieInpNode}; auto blank = std::make_shared(inp, 0); return Ptr(new InfEngineNgraphNode(blank)); } From a15cd4b63dba4b1286bfc6b23e4fe99a77d2620e Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 1 Aug 2024 10:57:19 +0300 Subject: [PATCH 14/17] Set and check allocator pointer for all cv::Mat instances. --- modules/core/src/matrix.cpp | 6 +- modules/core/test/test_allocator.cpp | 144 +++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 modules/core/test/test_allocator.cpp diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 1b11e12145..f05711bba8 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -692,16 +692,13 @@ void Mat::create(int d, const int* _sizes, int _type) if( total() > 0 ) { MatAllocator *a = allocator, *a0 = getDefaultAllocator(); -#ifdef HAVE_TGPU - if( !a || a == tegra::getAllocator() ) - a = tegra::getAllocator(d, _sizes, _type); -#endif if(!a) a = a0; try { u = a->allocate(dims, size, _type, 0, step.p, ACCESS_RW /* ignored */, USAGE_DEFAULT); CV_Assert(u != 0); + allocator = a; } catch (...) { @@ -709,6 +706,7 @@ void Mat::create(int d, const int* _sizes, int _type) throw; u = a0->allocate(dims, size, _type, 0, step.p, ACCESS_RW /* ignored */, USAGE_DEFAULT); CV_Assert(u != 0); + allocator = a0; } CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) ); } diff --git a/modules/core/test/test_allocator.cpp b/modules/core/test/test_allocator.cpp new file mode 100644 index 0000000000..88b03b689e --- /dev/null +++ b/modules/core/test/test_allocator.cpp @@ -0,0 +1,144 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +// Dummy allocator implementation copied from the default OpenCV allocator with some simplifications +struct DummyAllocator: public cv::MatAllocator +{ +public: + DummyAllocator() {}; + ~DummyAllocator() {}; + + cv::UMatData* allocate(int dims, const int* sizes, int type, + void* data0, size_t* step, cv::AccessFlag flags, + cv::UMatUsageFlags usageFlags) const + { + CV_UNUSED(flags); + CV_UNUSED(usageFlags); + + size_t total = CV_ELEM_SIZE(type); + for( int i = dims-1; i >= 0; i-- ) + { + if( step ) + { + if( data0 && step[i] != CV_AUTOSTEP ) + { + CV_Assert(total <= step[i]); + total = step[i]; + } + else + step[i] = total; + } + total *= sizes[i]; + } + + uchar* data = nullptr; + if (data0) + { + data = (uchar*)data0; + } + else + { + data = new uchar[total]; + DummyAllocator::allocatedBytes += total; + DummyAllocator::allocations++; + } + cv::UMatData* u = new cv::UMatData(this); + u->data = u->origdata = data; + u->size = total; + if(data0) + u->flags |= cv::UMatData::USER_ALLOCATED; + + return u; + } + + bool allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const + { + CV_UNUSED(accessFlags); + CV_UNUSED(usageFlags); + + if(!u) return false; + return true; + } + + void deallocate(cv::UMatData* u) const + { + if(!u) + return; + + CV_Assert(u->urefcount == 0); + CV_Assert(u->refcount == 0); + if( !(u->flags & cv::UMatData::USER_ALLOCATED) ) + { + delete[] u->origdata; + DummyAllocator::deallocations++; + u->origdata = 0; + } + delete u; + } + + static size_t allocatedBytes; + static int allocations; + static int deallocations; +}; + +size_t DummyAllocator::allocatedBytes = 0; +int DummyAllocator::allocations = 0; +int DummyAllocator::deallocations = 0; + +cv::MatAllocator* getDummyAllocator() +{ + static cv::MatAllocator* allocator = new DummyAllocator; + return allocator; +} + +struct AllocatorTest : public testing::Test { + void SetUp() override { + cv::MatAllocator* allocator = getDummyAllocator(); + EXPECT_TRUE(allocator != nullptr); + cv::Mat::setDefaultAllocator(allocator); + } + + void TearDown() override { + cv::Mat::setDefaultAllocator(cv::Mat::getStdAllocator()); + } +}; + +TEST_F(AllocatorTest, DummyAllocator) +{ + cv::MatAllocator* dummy = getDummyAllocator(); + + DummyAllocator::allocatedBytes = 0; + DummyAllocator::allocations = 0; + DummyAllocator::deallocations = 0; + + { + cv::Mat src1 = cv::Mat::ones (16, 16, CV_8UC1); + EXPECT_TRUE(!src1.empty()); + EXPECT_EQ(src1.allocator, dummy); + + cv::Mat src1_roi = src1(cv::Rect(2,2,8,8)); + EXPECT_EQ(src1_roi.allocator, dummy); + + cv::MatAllocator* standard = cv::Mat::getStdAllocator(); + cv::Mat::setDefaultAllocator(standard); + cv::Mat src2 = cv::Mat::ones (16, 16, CV_8UC1); + EXPECT_TRUE(!src2.empty()); + EXPECT_EQ(src2.allocator, standard); + + src1.create(32, 32, CV_8UC1); + EXPECT_EQ(src1.allocator, dummy); + } + + size_t expect_allocated = 16*16*sizeof(uchar) + 32*32*sizeof(uchar); + EXPECT_EQ(expect_allocated, DummyAllocator::allocatedBytes); + + // ROI should not trigger extra allocations + EXPECT_EQ(2, DummyAllocator::allocations); + EXPECT_EQ(2, DummyAllocator::deallocations); +} + +}} // namespace From 6ed603e917424e0cb10e3d019706db1b0ff1e8af Mon Sep 17 00:00:00 2001 From: Maxim Smolskiy Date: Mon, 5 Aug 2024 13:28:07 +0300 Subject: [PATCH 15/17] Merge pull request #25991 from MaximSmolskiy:improve-corners-matching-in-ChessBoardDetector-NeighborsFinder-findCornerNeighbor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve corners matching in ChessBoardDetector::NeighborsFinder::findCornerNeighbor #25991 ### Pull Request Readiness Checklist Idea was mentioned in `Section III-B. New Heuristic for Quadrangle Linking` of `Rufli, Martin & Scaramuzza, Davide & Siegwart, Roland. (2008). Automatic Detection of Checkerboards on Blurred and Distorted Images. 2008 IEEE/RSJ International Conference on Intelligent Robots and Systems, IROS. 3121-3126. 10.1109/IROS.2008.4650703` (https://rpg.ifi.uzh.ch/docs/IROS08_scaramuzza_b.pdf): ![Снимок экрана от 2024-08-05 09-51-27](https://github.com/user-attachments/assets/7a090ccc-c24c-4dfb-b0dd-259c8709eb72) ``` * For each candidate pair, focus on the quadrangles they belong to and draw two straight lines passing through the midsections of the respective quadrangle edges (see Fig. 6). * If the candidate corner and the source corner are on the same side of every of the four straight lines drawn this way (this corresponds to the yellow shaded area in Fig. 6), then the corners are successfully matched. ``` By improving corners matching, we can increase the search radius (`thresh_scale`). I tested this PR with benchmark ``` python3 objdetect_benchmark.py --configuration=generate_run --board_x=7 --path=res_chessboard --synthetic_object=chessboard ``` PR increases detected chessboards number by `3/7%`: ``` cell_img_size = 100 (default) before category detected chessboard total detected chessboard total chessboard average detected error chessboard all 0.910417 13110 14400 0.599746 Total detected time: 147.50906700000002 sec after category detected chessboard total detected chessboard total chessboard average detected error chessboard all 0.941667 13560 14400 0.596726 Total detected time: 136.68963200000007 sec ---------------------------------------------------------------------------------------------------------------------------------------------- cell_img_size = 10 before category detected chessboard total detected chessboard total chessboard average detected error chessboard all 0.539792 7773 14400 4.208237 Total detected time: 2.668964 sec after category detected chessboard total detected chessboard total chessboard average detected error chessboard all 0.579167 8340 14400 4.198448 Total detected time: 2.535998999999999 sec ``` See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- modules/calib3d/src/calibinit.cpp | 56 +++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/modules/calib3d/src/calibinit.cpp b/modules/calib3d/src/calibinit.cpp index 1767c38981..eb6b87ce7d 100644 --- a/modules/calib3d/src/calibinit.cpp +++ b/modules/calib3d/src/calibinit.cpp @@ -222,7 +222,7 @@ public: int all_quads_count; struct NeighborsFinder { - const float thresh_scale = 1.f; + const float thresh_scale = sqrt(2.f); ChessBoardDetector& detector; std::vector neighbors_indices; std::vector neighbors_dists; @@ -232,8 +232,9 @@ public: NeighborsFinder(ChessBoardDetector& detector); bool findCornerNeighbor( - const int idx, - const cv::Point2f& pt, + const int quad_idx, + const int corner_idx, + const cv::Point2f& corner_pt, float& min_dist, const float radius, int& closest_quad_idx, @@ -514,9 +515,23 @@ ChessBoardDetector::NeighborsFinder::NeighborsFinder(ChessBoardDetector& _detect neighbors_dists.resize(all_corners_count); } +static double pointSideFromLine(const Point2f& line_direction_vector, const Point2f& vector) +{ + return line_direction_vector.cross(vector); +} + +static bool arePointsOnSameSideFromLine(const Point2f& line_pt1, const Point2f& line_pt2, const Point2f& pt1, const Point2f& pt2) +{ + const Point2f line_direction_vector = line_pt2 - line_pt1; + const Point2f vector1 = pt1 - line_pt1; + const Point2f vector2 = pt2 - line_pt1; + return pointSideFromLine(line_direction_vector, vector1) * pointSideFromLine(line_direction_vector, vector2) > 0.; +} + bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor( - const int idx, - const cv::Point2f& pt, + const int quad_idx, + const int corner_idx, + const cv::Point2f& corner_pt, float& min_dist, const float radius, int& closest_quad_idx, @@ -525,12 +540,12 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor( { ChessBoardQuad* p_all_quads = detector.all_quads.data(); - const ChessBoardQuad& cur_quad = (const ChessBoardQuad&)p_all_quads[idx]; + const ChessBoardQuad& cur_quad = (const ChessBoardQuad&)p_all_quads[quad_idx]; int closest_neighbor_idx = -1; ChessBoardQuad *closest_quad = 0; // find the closest corner in all other quadrangles - const std::vector query = { pt.x, pt.y }; + const std::vector query = { corner_pt.x, corner_pt.y }; const cvflann::SearchParams search_params(-1); const int neighbors_count = all_quads_pts_index.radiusSearch(query, neighbors_indices, neighbors_dists, radius, search_params); @@ -538,7 +553,7 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor( { const int neighbor_idx = neighbors_indices[neighbor_idx_idx]; const int k = neighbor_idx >> 2; - if (k == idx) + if (k == quad_idx) continue; ChessBoardQuad& q_k = p_all_quads[k]; @@ -546,7 +561,8 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor( if (q_k.neighbors[j]) continue; - const float dist = normL2Sqr(pt - all_quads_pts[neighbor_idx]); + const Point2f neighbor_pt = all_quads_pts[neighbor_idx]; + const float dist = normL2Sqr(corner_pt - neighbor_pt); if (dist <= cur_quad.edge_len * thresh_scale && dist <= q_k.edge_len * thresh_scale) { @@ -560,6 +576,24 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor( DPRINTF("Incompatible edge lengths"); continue; } + + const Point2f mid_pt1 = (cur_quad.corners[corner_idx]->pt + cur_quad.corners[(corner_idx + 1) & 3]->pt) / 2.f; + const Point2f mid_pt2 = (cur_quad.corners[(corner_idx + 2) & 3]->pt + cur_quad.corners[(corner_idx + 3) & 3]->pt) / 2.f; + if (!arePointsOnSameSideFromLine(mid_pt1, mid_pt2, corner_pt, neighbor_pt)) + continue; + + const Point2f mid_pt3 = (cur_quad.corners[(corner_idx + 1) & 3]->pt + cur_quad.corners[(corner_idx + 2) & 3]->pt) / 2.f; + const Point2f mid_pt4 = (cur_quad.corners[(corner_idx + 3) & 3]->pt + cur_quad.corners[corner_idx]->pt) / 2.f; + if (!arePointsOnSameSideFromLine(mid_pt3, mid_pt4, corner_pt, neighbor_pt)) + continue; + + const Point2f neighbor_pt_diagonal = q_k.corners[(j + 2) & 3]->pt; + if (!arePointsOnSameSideFromLine(mid_pt1, mid_pt2, corner_pt, neighbor_pt_diagonal)) + continue; + + if (!arePointsOnSameSideFromLine(mid_pt3, mid_pt4, neighbor_pt, neighbor_pt_diagonal)) + continue; + closest_neighbor_idx = neighbor_idx; closest_quad_idx = k; closest_corner_idx = j; @@ -589,7 +623,7 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor( if (cur_quad.neighbors[j] == closest_quad) break; - if (normL2Sqr(closest_corner_pt - all_quads_pts[(idx << 2) + j]) < min_dist) + if (normL2Sqr(closest_corner_pt - all_quads_pts[(quad_idx << 2) + j]) < min_dist) break; } if (j < 4) @@ -1793,6 +1827,7 @@ void ChessBoardDetector::findQuadNeighbors() bool found = neighborsFinder.findCornerNeighbor( idx, + i, pt, min_dist, radius, @@ -1813,6 +1848,7 @@ void ChessBoardDetector::findQuadNeighbors() found = neighborsFinder.findCornerNeighbor( closest_quad_idx, + closest_corner_idx, closest_corner_pt, min_dist, radius, From 75fca7d9d0e4b7fa796d36d4c4ad98f5d210a9b5 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 23 Jul 2024 15:42:21 +0300 Subject: [PATCH 16/17] Added fisheye::distort with non-identity projection matrix. --- modules/calib3d/include/opencv2/calib3d.hpp | 17 +++- modules/calib3d/src/fisheye.cpp | 42 ++++++++++ modules/calib3d/test/test_fisheye.cpp | 90 ++++++++++++++++++++- 3 files changed, 147 insertions(+), 2 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d.hpp index aedbaf930a..9fc6773450 100644 --- a/modules/calib3d/include/opencv2/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d.hpp @@ -3835,10 +3835,25 @@ namespace fisheye @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . Note that the function assumes the camera intrinsic matrix of the undistorted points to be identity. - This means if you want to distort image points you have to multiply them with \f$K^{-1}\f$. + This means if you want to distort image points you have to multiply them with \f$K^{-1}\f$ or + use another function overload. */ CV_EXPORTS_W void distortPoints(InputArray undistorted, OutputArray distorted, InputArray K, InputArray D, double alpha = 0); + /** @overload + Overload of distortPoints function to handle cases when undistorted points are got with non-identity + camera matrix, e.g. output of #estimateNewCameraMatrixForUndistortRectify. + @param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is + the number of points in the view. + @param Kundistorted Camera intrinsic matrix used as new camera matrix for undistortion. + @param K Camera intrinsic matrix \f$cameramatrix{K}\f$. + @param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$. + @param alpha The skew coefficient. + @param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\ . + @sa estimateNewCameraMatrixForUndistortRectify + */ + CV_EXPORTS_W void distortPoints(InputArray undistorted, OutputArray distorted, InputArray Kundistorted, InputArray K, InputArray D, double alpha = 0); + /** @brief Undistorts 2D points using fisheye model @param distorted Array of object points, 1xN/Nx1 2-channel (or vector\ ), where N is the diff --git a/modules/calib3d/src/fisheye.cpp b/modules/calib3d/src/fisheye.cpp index 751a1aa6da..4aec4324e0 100644 --- a/modules/calib3d/src/fisheye.cpp +++ b/modules/calib3d/src/fisheye.cpp @@ -315,6 +315,48 @@ void cv::fisheye::distortPoints(InputArray undistorted, OutputArray distorted, I } } +void cv::fisheye::distortPoints(InputArray _undistorted, OutputArray distorted, InputArray Kundistorted, InputArray K, InputArray D, double alpha) +{ + CV_INSTRUMENT_REGION(); + + CV_Assert(_undistorted.type() == CV_32FC2 || _undistorted.type() == CV_64FC2); + CV_Assert(Kundistorted.size() == Size(3,3) && (Kundistorted.type() == CV_32F || Kundistorted.type() == CV_64F)); + + cv::Mat undistorted = _undistorted.getMat(); + cv::Mat normalized(undistorted.size(), CV_64FC2); + + Mat Knew = Kundistorted.getMat(); + + double cx, cy, fx, fy; + if (Knew.depth() == CV_32F) + { + fx = (double)Knew.at(0, 0); + fy = (double)Knew.at(1, 1); + cx = (double)Knew.at(0, 2); + cy = (double)Knew.at(1, 2); + } + else + { + fx = Knew.at(0, 0); + fy = Knew.at(1, 1); + cx = Knew.at(0, 2); + cy = Knew.at(1, 2); + } + + size_t n = undistorted.total(); + const Vec2f* Xf = undistorted.ptr(); + const Vec2d* Xd = undistorted.ptr(); + Vec2d* normXd = normalized.ptr(); + for (size_t i = 0; i < n; i++) + { + Vec2d p = undistorted.depth() == CV_32F ? (Vec2d)Xf[i] : Xd[i]; + normXd[i][0] = (p[0] - cx) / fx; + normXd[i][1] = (p[1] - cy) / fy; + } + + cv::fisheye::distortPoints(normalized, distorted, K, D, alpha); +} + ////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// cv::fisheye::undistortPoints diff --git a/modules/calib3d/test/test_fisheye.cpp b/modules/calib3d/test/test_fisheye.cpp index 36b7d0d653..d7368c3190 100644 --- a/modules/calib3d/test/test_fisheye.cpp +++ b/modules/calib3d/test/test_fisheye.cpp @@ -107,7 +107,6 @@ TEST_F(fisheyeTest, distortUndistortPoints) int height = imageSize.height; /* Create test points */ - std::vector points0Vector; cv::Mat principalPoints = (cv::Mat_(5, 2) << K(0, 2), K(1, 2), // (cx, cy) /* Image corners */ 0, 0, @@ -150,6 +149,95 @@ TEST_F(fisheyeTest, distortUndistortPoints) } } +TEST_F(fisheyeTest, distortUndistortPointsNewCameraFixed) +{ + int width = imageSize.width; + int height = imageSize.height; + + /* Random points inside image */ + cv::Mat xy[2] = {}; + xy[0].create(100, 1, CV_64F); + theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x + xy[1].create(100, 1, CV_64F); + theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y + + cv::Mat randomPoints; + merge(xy, 2, randomPoints); + + cv::Mat points0 = randomPoints; + cv::Mat Reye = cv::Mat::eye(3, 3, CV_64FC1); + + cv::Mat Knew; + cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, D, imageSize, Reye, Knew); + + /* Distort -> Undistort */ + cv::Mat distortedPoints; + cv::fisheye::distortPoints(points0, distortedPoints, Knew, K, D); + cv::Mat undistortedPoints; + cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, D, Reye, Knew); + + EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8); + + /* Undistort -> Distort */ + cv::fisheye::undistortPoints(points0, undistortedPoints, K, D, Reye, Knew); + cv::fisheye::distortPoints(undistortedPoints, distortedPoints, Knew, K, D); + + EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8); +} + +TEST_F(fisheyeTest, distortUndistortPointsNewCameraRandom) +{ + int width = imageSize.width; + int height = imageSize.height; + + /* Create test points */ + std::vector points0Vector; + cv::Mat principalPoints = (cv::Mat_(5, 2) << K(0, 2), K(1, 2), // (cx, cy) + /* Image corners */ + 0, 0, + 0, height, + width, 0, + width, height + ); + + /* Random points inside image */ + cv::Mat xy[2] = {}; + xy[0].create(100, 1, CV_64F); + theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x + xy[1].create(100, 1, CV_64F); + theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y + + cv::Mat randomPoints; + merge(xy, 2, randomPoints); + + cv::Mat points0; + cv::Mat Reye = cv::Mat::eye(3, 3, CV_64FC1); + cv::vconcat(principalPoints.reshape(2), randomPoints, points0); + + /* Test with random D set */ + for (size_t i = 0; i < 10; ++i) { + cv::Mat distortion(1, 4, CV_64F); + theRNG().fill(distortion, cv::RNG::UNIFORM, -0.001, 0.001); + + cv::Mat Knew; + cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, distortion, imageSize, Reye, Knew); + + /* Distort -> Undistort */ + cv::Mat distortedPoints; + cv::fisheye::distortPoints(points0, distortedPoints, Knew, K, distortion); + cv::Mat undistortedPoints; + cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, distortion, Reye, Knew); + + EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8); + + /* Undistort -> Distort */ + cv::fisheye::undistortPoints(points0, undistortedPoints, K, distortion, Reye, Knew); + cv::fisheye::distortPoints(undistortedPoints, distortedPoints, Knew, K, distortion); + + EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8); + } +} + TEST_F(fisheyeTest, solvePnP) { const int n = 16; From 49459d46e2234363af81d1692511ae8cc0c256f9 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com> Date: Tue, 6 Aug 2024 11:40:58 +0300 Subject: [PATCH 17/17] Merge pull request #25932 from asmorkalov:as/HAL_cvtColor_aprox Added xxxApprox overloads for YUV color conversions in HAL and AlgorithmHint to cvtColor #25932 The xxxApprox to implement HAL functions with less bits for arithmetic of FP. The hint was introduced in #25792 and #25911 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- modules/imgproc/include/opencv2/imgproc.hpp | 6 +- modules/imgproc/src/color.cpp | 23 +-- modules/imgproc/src/color.hpp | 18 +-- modules/imgproc/src/color_yuv.dispatch.cpp | 152 +++++++++++--------- modules/imgproc/src/hal_replacement.hpp | 148 +++++++++++++++++++ modules/imgproc/test/test_color.cpp | 6 +- 6 files changed, 265 insertions(+), 88 deletions(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 4456b3a88c..eb92e8cc21 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -3726,10 +3726,11 @@ floating-point. @param code color space conversion code (see #ColorConversionCodes). @param dstCn number of channels in the destination image; if the parameter is 0, the number of the channels is derived automatically from src and code. +@param hint Implementation modfication flags. See #AlgorithmHint @see @ref imgproc_color_conversions */ -CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0 ); +CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0, AlgorithmHint hint = cv::ALGO_HINT_DEFAULT ); /** @brief Converts an image from one color space to another where the source image is stored in two planes. @@ -3748,8 +3749,9 @@ This function only supports YUV420 to RGB conversion as of now. - #COLOR_YUV2RGB_NV21 - #COLOR_YUV2BGRA_NV21 - #COLOR_YUV2RGBA_NV21 +@param hint Implementation modfication flags. See #AlgorithmHint */ -CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code ); +CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code, AlgorithmHint hint = cv::ALGO_HINT_DEFAULT ); /** @brief main function for all demosaicing processes diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index dde8e1344c..703511b9cf 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -168,7 +168,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) // helper function for dual-plane modes -void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int code ) +void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int code, AlgorithmHint hint ) { // only YUV420 is currently supported switch (code) @@ -181,7 +181,7 @@ void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, in return; } - cvtColorTwoPlaneYUV2BGRpair(_ysrc, _uvsrc, _dst, dstChannels(code), swapBlue(code), uIndex(code)); + cvtColorTwoPlaneYUV2BGRpair(_ysrc, _uvsrc, _dst, hint, dstChannels(code), swapBlue(code), uIndex(code)); } @@ -189,10 +189,13 @@ void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, in // The main function // ////////////////////////////////////////////////////////////////////////////////////////// -void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) +void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == cv::ALGO_HINT_DEFAULT) + hint = cv::getDefaultAlgorithmHint(); + CV_Assert(!_src.empty()); if(dcn <= 0) @@ -244,12 +247,12 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: case COLOR_BGR2YUV: case COLOR_RGB2YUV: - cvtColorBGR2YUV(_src, _dst, swapBlue(code), code == COLOR_BGR2YCrCb || code == COLOR_RGB2YCrCb); + cvtColorBGR2YUV(_src, _dst, hint, swapBlue(code), code == COLOR_BGR2YCrCb || code == COLOR_RGB2YCrCb); break; case COLOR_YCrCb2BGR: case COLOR_YCrCb2RGB: case COLOR_YUV2BGR: case COLOR_YUV2RGB: - cvtColorYUV2BGR(_src, _dst, dcn, swapBlue(code), code == COLOR_YCrCb2BGR || code == COLOR_YCrCb2RGB); + cvtColorYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), code == COLOR_YCrCb2BGR || code == COLOR_YCrCb2RGB); break; case COLOR_BGR2XYZ: @@ -321,14 +324,14 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) case COLOR_YUV2BGRA_NV21: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV12: // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples - cvtColorTwoPlaneYUV2BGR(_src, _dst, dcn, swapBlue(code), uIndex(code)); + cvtColorTwoPlaneYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), uIndex(code)); break; case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12: case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV: //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes. //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes - cvtColorThreePlaneYUV2BGR(_src, _dst, dcn, swapBlue(code), uIndex(code)); + cvtColorThreePlaneYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), uIndex(code)); break; case COLOR_YUV2GRAY_420: @@ -337,7 +340,7 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12: case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: - cvtColorBGR2ThreePlaneYUV(_src, _dst, swapBlue(code), uIndex(code)); + cvtColorBGR2ThreePlaneYUV(_src, _dst, hint, swapBlue(code), uIndex(code)); break; case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: @@ -349,7 +352,7 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { int ycn = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0; - cvtColorOnePlaneYUV2BGR(_src, _dst, dcn, swapBlue(code), uIndex(code), ycn); + cvtColorOnePlaneYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), uIndex(code), ycn); break; } @@ -362,7 +365,7 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) { int ycn = (code==COLOR_RGB2YUV_UYVY || code==COLOR_BGR2YUV_UYVY || code==COLOR_RGBA2YUV_UYVY || code==COLOR_BGRA2YUV_UYVY) ? 1 : 0; - cvtColorOnePlaneBGR2YUV(_src, _dst, swapBlue(code), uIndex(code), ycn); + cvtColorOnePlaneBGR2YUV(_src, _dst, hint, swapBlue(code), uIndex(code), ycn); break; } diff --git a/modules/imgproc/src/color.hpp b/modules/imgproc/src/color.hpp index 6ebca26a2c..883c9ccab4 100644 --- a/modules/imgproc/src/color.hpp +++ b/modules/imgproc/src/color.hpp @@ -556,15 +556,15 @@ void cvtColorLuv2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bo void cvtColorBGR2XYZ( InputArray _src, OutputArray _dst, bool swapb ); void cvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb ); -void cvtColorBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, bool crcb); -void cvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb); - -void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn); -void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, int uidx, int ycn); -void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx ); -void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx ); -void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx ); -void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx); +void cvtColorBGR2YUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, bool crcb); +void cvtColorYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, bool crcb); + +void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx, int ycn ); +void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx, int ycn ); +void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx ); +void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx ); +void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx ); +void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx ); void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ); void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi ); diff --git a/modules/imgproc/src/color_yuv.dispatch.cpp b/modules/imgproc/src/color_yuv.dispatch.cpp index 71d840d857..89e933affa 100644 --- a/modules/imgproc/src/color_yuv.dispatch.cpp +++ b/modules/imgproc/src/color_yuv.dispatch.cpp @@ -18,13 +18,18 @@ namespace cv { namespace hal { // 8u, 16u, 32f -void cvtBGRtoYUV(const uchar * src_data, size_t src_step, +static void cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, - int depth, int scn, bool swapBlue, bool isCbCr) + int depth, int scn, bool swapBlue, bool isCbCr, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtBGRtoYUV, cv_hal_cvtBGRtoYUVApprox, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr); + } + CALL_HAL(cvtBGRtoYUV, cv_hal_cvtBGRtoYUV, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr); #if defined(HAVE_IPP) @@ -66,13 +71,18 @@ void cvtBGRtoYUV(const uchar * src_data, size_t src_step, CV_CPU_DISPATCH_MODES_ALL); } -void cvtYUVtoBGR(const uchar * src_data, size_t src_step, +static void cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, - int depth, int dcn, bool swapBlue, bool isCbCr) + int depth, int dcn, bool swapBlue, bool isCbCr, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtYUVtoBGR, cv_hal_cvtYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr); + } + CALL_HAL(cvtYUVtoBGR, cv_hal_cvtYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr); @@ -115,63 +125,79 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step, CV_CPU_DISPATCH_MODES_ALL); } -// 4:2:0, two planes in one array: Y, UV interleaved +// 4:2:0, two planes: Y, UV interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, +static void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, - int dcn, bool swapBlue, int uIdx) + int dcn, bool swapBlue, int uIdx, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGRExApprox, + y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + } - cvtTwoPlaneYUVtoBGR( - src_data, src_step, src_data + src_step * dst_height, src_step, dst_data, dst_step, - dst_width, dst_height, dcn, swapBlue, uIdx); + CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGREx, + y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + + CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), + CV_CPU_DISPATCH_MODES_ALL); } -// 4:2:0, two planes: Y, UV interleaved +// 4:2:0, two planes in one array: Y, UV interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, +static void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, - int dcn, bool swapBlue, int uIdx) + int dcn, bool swapBlue, int uIdx, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); - cvtTwoPlaneYUVtoBGR(y_data, src_step, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + } + + CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + + cvtTwoPlaneYUVtoBGR( + src_data, src_step, src_data + src_step * dst_height, src_step, dst_data, dst_step, + dst_width, dst_height, dcn, swapBlue, uIdx, hint); } // 4:2:0, two planes: Y, UV interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, +static void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, - int dcn, bool swapBlue, int uIdx) + int dcn, bool swapBlue, int uIdx, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); - CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGREx, - y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); - - CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), - CV_CPU_DISPATCH_MODES_ALL); + cvtTwoPlaneYUVtoBGR(y_data, src_step, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx, hint); } // 4:2:0, three planes in one array: Y, U, V // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, +static void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, - int dcn, bool swapBlue, int uIdx) + int dcn, bool swapBlue, int uIdx, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtThreePlaneYUVtoBGR, cv_hal_cvtThreePlaneYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); + } + CALL_HAL(cvtThreePlaneYUVtoBGR, cv_hal_cvtThreePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx); CV_CPU_DISPATCH(cvtThreePlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx), @@ -181,46 +207,39 @@ void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, // 4:2:0, three planes in one array: Y, U, V // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, +static void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, - int scn, bool swapBlue, int uIdx) + int scn, bool swapBlue, int uIdx, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtBGRtoThreePlaneYUV, cv_hal_cvtBGRtoThreePlaneYUVApprox, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx); + } + CALL_HAL(cvtBGRtoThreePlaneYUV, cv_hal_cvtBGRtoThreePlaneYUV, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx); CV_CPU_DISPATCH(cvtBGRtoThreePlaneYUV, (src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx), CV_CPU_DISPATCH_MODES_ALL); } -// 4:2:0, two planes: Y, UV interleaved -// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 -// 20-bit fixed-point arithmetics -void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, - uchar * y_data, uchar * uv_data, size_t dst_step, - int width, int height, - int scn, bool swapBlue, int uIdx) -{ - CV_INSTRUMENT_REGION(); - - CALL_HAL(cvtBGRtoTwoPlaneYUV, cv_hal_cvtBGRtoTwoPlaneYUV, - src_data, src_step, y_data, dst_step, uv_data, dst_step, width, height, scn, swapBlue, uIdx); - - CV_CPU_DISPATCH(cvtBGRtoTwoPlaneYUV, (src_data, src_step, y_data, uv_data, dst_step, width, height, scn, swapBlue, uIdx), - CV_CPU_DISPATCH_MODES_ALL); -} - // 4:2:2 interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, +static void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, - int dcn, bool swapBlue, int uIdx, int ycn) + int dcn, bool swapBlue, int uIdx, int ycn, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtOnePlaneYUVtoBGR, cv_hal_cvtOnePlaneYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn); + } + CALL_HAL(cvtOnePlaneYUVtoBGR, cv_hal_cvtOnePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn); CV_CPU_DISPATCH(cvtOnePlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn), @@ -230,13 +249,18 @@ void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, // 4:2:2 interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 14-bit fixed-point arithmetics is used -void cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, +static void cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, - int scn, bool swapBlue, int uIdx, int ycn) + int scn, bool swapBlue, int uIdx, int ycn, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == ALGO_HINT_APPROX) + { + CALL_HAL(cvtOnePlaneBGRtoYUV, cv_hal_cvtOnePlaneBGRtoYUVApprox, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx, ycn); + } + CALL_HAL(cvtOnePlaneBGRtoYUV, cv_hal_cvtOnePlaneBGRtoYUV, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx, ycn); CV_CPU_DISPATCH(cvtOnePlaneBGRtoYUV, (src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx, ycn), @@ -386,43 +410,43 @@ bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx, // HAL calls // -void cvtColorBGR2YUV(InputArray _src, OutputArray _dst, bool swapb, bool crcb) +void cvtColorBGR2YUV(InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, bool crcb) { CvtHelper< Set<3, 4>, Set<3>, Set > h(_src, _dst, 3); hal::cvtBGRtoYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, h.scn, swapb, crcb); + h.depth, h.scn, swapb, crcb, hint); } -void cvtColorYUV2BGR(InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb) +void cvtColorYUV2BGR(InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, bool crcb) { if(dcn <= 0) dcn = 3; CvtHelper< Set<3>, Set<3, 4>, Set > h(_src, _dst, dcn); hal::cvtYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.depth, dcn, swapb, crcb); + h.depth, dcn, swapb, crcb, hint); } // 4:2:2 interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn) +void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx, int ycn) { CvtHelper< Set<2>, Set<3, 4>, Set, FROM_UYVY > h(_src, _dst, dcn); hal::cvtOnePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - dcn, swapb, uidx, ycn); + dcn, swapb, uidx, ycn, hint); } // 4:2:2 interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 14-bit fixed-point arithmetics is used -void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, int uidx, int ycn) +void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx, int ycn) { CvtHelper< Set<3, 4>, Set<2>, Set, TO_UYVY > h(_src, _dst, 2); hal::cvtOnePlaneBGRtoYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.scn, swapb, uidx, ycn); + h.scn, swapb, uidx, ycn, hint); } void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi ) @@ -435,12 +459,12 @@ void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi ) // 4:2:0, three planes in one array: Y, U, V // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx) +void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx) { CvtHelper< Set<3, 4>, Set<1>, Set, TO_YUV > h(_src, _dst, 1); hal::cvtBGRtoThreePlaneYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows, - h.scn, swapb, uidx); + h.scn, swapb, uidx, hint); } void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ) @@ -460,32 +484,32 @@ void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst ) // 4:2:0, three planes in one array: Y, U, V // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx) +void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx) { if(dcn <= 0) dcn = 3; CvtHelper< Set<1>, Set<3, 4>, Set, FROM_YUV> h(_src, _dst, dcn); hal::cvtThreePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows, - dcn, swapb, uidx); + dcn, swapb, uidx, hint); } // 4:2:0, two planes in one array: Y, UV interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics // see also: http://www.fourcc.org/yuv.php#NV21, http://www.fourcc.org/yuv.php#NV12 -void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx ) +void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx ) { if(dcn <= 0) dcn = 3; CvtHelper< Set<1>, Set<3, 4>, Set, FROM_YUV> h(_src, _dst, dcn); hal::cvtTwoPlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows, - dcn, swapb, uidx); + dcn, swapb, uidx, hint); } // 4:2:0, two planes: Y, UV interleaved // Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 // 20-bit fixed-point arithmetics -void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx ) +void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx ) { int stype = _ysrc.type(); int depth = CV_MAT_DEPTH(stype); @@ -503,13 +527,13 @@ void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArr { hal::cvtTwoPlaneYUVtoBGR(ysrc.data, uvsrc.data, ysrc.step, dst.data, dst.step, dst.cols, dst.rows, - dcn, swapb, uidx); + dcn, swapb, uidx, hint); } else { hal::cvtTwoPlaneYUVtoBGR(ysrc.data, ysrc.step, uvsrc.data, uvsrc.step, dst.data, dst.step, dst.cols, dst.rows, - dcn, swapb, uidx); + dcn, swapb, uidx, hint); } } diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index ceb6c8b0f6..1409dda991 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp @@ -499,6 +499,23 @@ inline int hal_ni_cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, uchar */ inline int hal_ni_cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Analog of hal_cvtBGRtoYUV, but allows approximations (not bit-exact) + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width image width + @param height image height + @param depth image depth (one of CV_8U, CV_16U or CV_32F) + @param scn source image channels (3 or 4) + @param swapBlue if set to true B and R source channels will be swapped (treat as RGB) + @param isCbCr if set to true write output in YCbCr format + Convert from BGR, RGB, BGRA or RGBA to YUV or YCbCr. + */ +inline int hal_ni_cvtBGRtoYUVApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + + /** @brief hal_cvtYUVtoBGR @param src_data source image data @@ -515,6 +532,22 @@ inline int hal_ni_cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * d */ inline int hal_ni_cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Analog of hal_cvtYUVtoBGR, but allows approximations (not bit-exact) + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width image width + @param height image height + @param depth image depth (one of CV_8U, CV_16U or CV_32F) + @param dcn destination image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param isCbCr if set to true treat source as YCbCr + Convert from YUV or YCbCr to BGR, RGB, BGRA or RGBA. + */ +inline int hal_ni_cvtYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + /** @brief hal_cvtBGRtoXYZ @param src_data source image data @@ -630,6 +663,24 @@ inline int hal_ni_cvtLabtoBGR(const uchar * src_data, size_t src_step, uchar * d */ inline int hal_ni_cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief analog of hal_cvtTwoPlaneYUVtoBGR that allows approximations (not bit-exact) + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param dst_width destination image width + @param dst_height destination image height + @param dcn destination image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param uIdx U-channel index in the interleaved U/V plane (0 or 1) + Convert from YUV (YUV420sp (or NV12/NV21) - Y plane followed by interleaved U/V plane) to BGR, RGB, BGRA or RGBA. + Only for CV_8U. + Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 + */ +inline int hal_ni_cvtTwoPlaneYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + + /** @brief Extended version of hal_cvtTwoPlaneYUVtoBGR. @param y_data source image data (Y-plane) @@ -651,6 +702,27 @@ inline int hal_ni_cvtTwoPlaneYUVtoBGREx(const uchar * y_data, size_t y_step, con uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Extended version of hal_cvtTwoPlaneYUVtoBGR that allows approximations (not bit-exact) + @param y_data source image data (Y-plane) + @param y_step source image step (Y-plane) + @param uv_data source image data (UV-plane) + @param uv_step source image step (UV-plane) + @param dst_data destination image data + @param dst_step destination image step + @param dst_width destination image width + @param dst_height destination image height + @param dcn destination image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param uIdx U-channel index in the interleaved U/V plane (0 or 1) + Convert from YUV (YUV420sp (or NV12/NV21) - Y plane followed by interleaved U/V plane) to BGR, RGB, BGRA or RGBA. + Only for CV_8U. + Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 + */ +inline int hal_ni_cvtTwoPlaneYUVtoBGRExApprox(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + /** @brief hal_cvtBGRtoTwoPlaneYUV @param src_data source image data @@ -690,6 +762,23 @@ inline int hal_ni_cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step, */ inline int hal_ni_cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Analog of hal_cvtThreePlaneYUVtoBGR that allows approximations (not bit-exact) + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param dst_width destination image width + @param dst_height destination image height + @param dcn destination image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param uIdx U-channel plane index (0 or 1) + Convert from YUV (YUV420p (or YV12/YV21) - Y plane followed by U and V planes) to BGR, RGB, BGRA or RGBA. + Only for CV_8U. + Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 + */ +inline int hal_ni_cvtThreePlaneYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + /** @brief hal_cvtBGRtoThreePlaneYUV @param src_data source image data @@ -707,6 +796,24 @@ inline int hal_ni_cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, */ inline int hal_ni_cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief Analog of hal_cvtBGRtoThreePlaneYUV that allows approximations (not bit-exact) + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width image width + @param height image height + @param scn source image channels (3 or 4) + @param swapBlue if set to true B and R source channels will be swapped (treat as RGB) + @param uIdx U-channel plane index (0 or 1) + Convert from BGR, RGB, BGRA or RGBA to YUV (YUV420p (or YV12/YV21) - Y plane followed by U and V planes). + Only for CV_8U. + Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 + */ +inline int hal_ni_cvtBGRtoThreePlaneYUVApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + + /** @brief hal_cvtOnePlaneYUVtoBGR @param src_data source image data @@ -725,6 +832,24 @@ inline int hal_ni_cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, */ inline int hal_ni_cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief analog of hal_cvtOnePlaneYUVtoBGR that allows approximations (not bit-exact) + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width image width + @param height image height + @param dcn destination image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param uIdx U-channel index (0 or 1) + @param ycn Y-channel index (0 or 1) + Convert from interleaved YUV 4:2:2 (UYVY, YUY2 or YVYU) to BGR, RGB, BGRA or RGBA. + Only for CV_8U. + Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 + */ +inline int hal_ni_cvtOnePlaneYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + /** @brief hal_cvtOnePlaneBGRtoYUV @param src_data,src_step source image data and step @@ -740,6 +865,21 @@ inline int hal_ni_cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, u */ inline int hal_ni_cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief analog of hal_cvtOnePlaneBGRtoYUV that allows approximations (not bit-exact) + @param src_data,src_step source image data and step + @param dst_data,dst_step destination image data and step + @param width,height image size + @param scn source image channels (3 or 4) + @param swapBlue if set to true B and R destination channels will be swapped (write RGB) + @param uIdx U-channel index (0 or 1) + @param ycn Y-channel index (0 or 1) + Convert from BGR, RGB, BGRA or RGBA to interleaved YUV 4:2:2 (UYVY, YUY2 or YVYU). + Only for CV_8U. + Y : [16, 235]; Cb, Cr: [16, 240] centered at 128 + */ +inline int hal_ni_cvtOnePlaneBGRtoYUVApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + /** @brief hal_cvtRGBAtoMultipliedRGBA @param src_data source image data @@ -775,7 +915,9 @@ inline int hal_ni_cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_ste #define cv_hal_cvtBGR5x5toGray hal_ni_cvtBGR5x5toGray #define cv_hal_cvtGraytoBGR5x5 hal_ni_cvtGraytoBGR5x5 #define cv_hal_cvtBGRtoYUV hal_ni_cvtBGRtoYUV +#define cv_hal_cvtBGRtoYUVApprox hal_ni_cvtBGRtoYUVApprox #define cv_hal_cvtYUVtoBGR hal_ni_cvtYUVtoBGR +#define cv_hal_cvtYUVtoBGRApprox hal_ni_cvtYUVtoBGRApprox #define cv_hal_cvtBGRtoXYZ hal_ni_cvtBGRtoXYZ #define cv_hal_cvtXYZtoBGR hal_ni_cvtXYZtoBGR #define cv_hal_cvtBGRtoHSV hal_ni_cvtBGRtoHSV @@ -783,12 +925,18 @@ inline int hal_ni_cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_ste #define cv_hal_cvtBGRtoLab hal_ni_cvtBGRtoLab #define cv_hal_cvtLabtoBGR hal_ni_cvtLabtoBGR #define cv_hal_cvtTwoPlaneYUVtoBGR hal_ni_cvtTwoPlaneYUVtoBGR +#define cv_hal_cvtTwoPlaneYUVtoBGRApprox hal_ni_cvtTwoPlaneYUVtoBGRApprox #define cv_hal_cvtTwoPlaneYUVtoBGREx hal_ni_cvtTwoPlaneYUVtoBGREx +#define cv_hal_cvtTwoPlaneYUVtoBGRExApprox hal_ni_cvtTwoPlaneYUVtoBGRExApprox #define cv_hal_cvtBGRtoTwoPlaneYUV hal_ni_cvtBGRtoTwoPlaneYUV #define cv_hal_cvtThreePlaneYUVtoBGR hal_ni_cvtThreePlaneYUVtoBGR +#define cv_hal_cvtThreePlaneYUVtoBGRApprox hal_ni_cvtThreePlaneYUVtoBGRApprox #define cv_hal_cvtBGRtoThreePlaneYUV hal_ni_cvtBGRtoThreePlaneYUV +#define cv_hal_cvtBGRtoThreePlaneYUVApprox hal_ni_cvtBGRtoThreePlaneYUVApprox #define cv_hal_cvtOnePlaneYUVtoBGR hal_ni_cvtOnePlaneYUVtoBGR +#define cv_hal_cvtOnePlaneYUVtoBGRApprox hal_ni_cvtOnePlaneYUVtoBGRApprox #define cv_hal_cvtOnePlaneBGRtoYUV hal_ni_cvtOnePlaneBGRtoYUV +#define cv_hal_cvtOnePlaneBGRtoYUVApprox hal_ni_cvtOnePlaneBGRtoYUVApprox #define cv_hal_cvtRGBAtoMultipliedRGBA hal_ni_cvtRGBAtoMultipliedRGBA #define cv_hal_cvtMultipliedRGBAtoRGBA hal_ni_cvtMultipliedRGBAtoRGBA //! @endcond diff --git a/modules/imgproc/test/test_color.cpp b/modules/imgproc/test/test_color.cpp index 60862b2805..1229a468eb 100644 --- a/modules/imgproc/test/test_color.cpp +++ b/modules/imgproc/test/test_color.cpp @@ -2657,7 +2657,7 @@ TEST(Imgproc_ColorLab_Full, bitExactness) Mat probe(256, 256, CV_8UC3), result; rng.fill(probe, RNG::UNIFORM, 0, 255, true); - cvtColor(probe, result, codes[c]); + cvtColor(probe, result, codes[c], 0, ALGO_HINT_ACCURATE); uint32_t h = adler32(result); uint32_t goodHash = hashes[c*nIterations + iter]; @@ -2749,7 +2749,7 @@ TEST(Imgproc_ColorLuv_Full, bitExactness) Mat probe(256, 256, CV_8UC3), result; rng.fill(probe, RNG::UNIFORM, 0, 255, true); - cvtColor(probe, result, codes[c]); + cvtColor(probe, result, codes[c], 0, ALGO_HINT_ACCURATE); uint32_t h = adler32(result); uint32_t goodHash = hashes[c*nIterations + iter]; @@ -2808,7 +2808,7 @@ void runCvtColorBitExactCheck(ColorConversionCodes code, int inputType, uint32_t Mat dst; rng.fill(src, RNG::UNIFORM, 0, 255, true); - cv::cvtColor(src, dst, code); + cv::cvtColor(src, dst, code, 0, ALGO_HINT_ACCURATE); uint32_t dst_hash = adler32(dst);