From 15783d65981d8978597c6b60e830e21e964cbdf9 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov <2536374+asmorkalov@users.noreply.github.com> Date: Fri, 12 Jul 2024 15:03:33 +0300 Subject: [PATCH] Merge pull request #25792 from asmorkalov:as/HAL_fast_GaussianBlur Added flag to GaussianBlur for faster but not bit-exact implementation #25792 Rationale: Current implementation of GaussianBlur is almost always bit-exact. It helps to get predictable results according platforms, but prohibits most of approximations and optimization tricks. The patch converts `borderType` parameter to more generic `flags` and introduces `GAUSS_ALLOW_APPROXIMATIONS` flag to allow not bit-exact implementation. With the flag IPP and generic HAL implementation are called first. The flag naming and location is a subject for discussion. Replaces https://github.com/opencv/opencv/pull/22073 Possibly related issue: https://github.com/opencv/opencv/issues/24135 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- CMakeLists.txt | 6 +- .../config_reference.markdown | 1 + modules/core/CMakeLists.txt | 4 ++ modules/core/include/opencv2/core.hpp | 12 ++++ modules/core/src/system.cpp | 9 +++ modules/imgproc/include/opencv2/imgproc.hpp | 4 +- modules/imgproc/src/smooth.dispatch.cpp | 58 ++++++++++++++++--- modules/imgproc/test/test_smooth_bitexact.cpp | 56 +++++++++++++++++- modules/python/test/test_misc.py | 4 ++ modules/ts/src/ts.cpp | 1 + 10 files changed, 143 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 30b205ecd8..c196d0f2be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1258,7 +1258,11 @@ if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config") else() status(" Configuration:" ${CMAKE_BUILD_TYPE}) endif() - +if(DEFINED OPENCV_ALGO_HINT_DEFAULT) + status(" Algorithm Hint:" ${OPENCV_ALGO_HINT_DEFAULT}) +else() + status(" Algorithm Hint:" " ALGO_ACCURATE") +endif() # ========================= CPU code generation mode ========================= status("") diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 7ced9a2536..e43b8793e5 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -217,6 +217,7 @@ Following options can be used to produce special builds with instrumentation or | `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation. | | `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). | | `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. | +| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_ACCURATE` or `ALGO_APROX`. Dangerous! The option changes behaviour globally and may affect accuracy of many algorithms. | @see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html) @see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing)) diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 16f32c994a..ea1100c954 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -186,6 +186,10 @@ if(OPENCV_SEMIHOSTING) ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING") endif(OPENCV_SEMIHOSTING) +if(DEFINED OPENCV_ALGO_HINT_DEFAULT) + ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_ALGO_HINT_DEFAULT=${OPENCV_ALGO_HINT_DEFAULT}") +endif(DEFINED OPENCV_ALGO_HINT_DEFAULT) + if(HAVE_HPX) ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}") endif() diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index b58a3a6ccb..4bfb95fede 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -150,6 +150,18 @@ It is possible to alternate error processing by using #redirectError(). */ CV_EXPORTS CV_NORETURN void error(const Exception& exc); +/*! @brief Flags that allow to midify some functions behavior. Used as set of flags. +*/ +enum AlgorithmHint { + ALGO_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build + ALGO_ACCURATE = 1, //!< Use generic portable implementation + ALGO_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform +}; + +/*! @brief Returns ImplementationHint selected by default, a.k.a. `IMPL_DEFAULT` defined during OpenCV compilation. + */ +CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint(); + enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted //!< independently; this flag and the previous one are diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 8227175b6a..eccef84c92 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -46,6 +46,7 @@ #include #include +#include #include #include @@ -2888,6 +2889,14 @@ bool restoreFPDenormalsState(const FPDenormalsModeState& state) } // namespace details +AlgorithmHint getDefaultAlgorithmHint() +{ +#ifdef OPENCV_ALGO_HINT_DEFAULT + return OPENCV_ALGO_HINT_DEFAULT; +#else + return ALGO_ACCURATE; +#endif +}; } // namespace cv diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 2f3c6f344f..53ff5ea6bd 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1536,12 +1536,14 @@ respectively (see #getGaussianKernel for details); to fully control the result r possible future modifications of all this semantics, it is recommended to specify all of ksize, sigmaX, and sigmaY. @param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. +@param hint Implementation modfication flags. See #AlgorithmHint @sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur */ CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize, double sigmaX, double sigmaY = 0, - int borderType = BORDER_DEFAULT ); + int borderType = BORDER_DEFAULT, + AlgorithmHint hint = cv::ALGO_DEFAULT ); /** @brief Applies the bilateral filter to an image. diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index d0f50a73bb..6bc989e520 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -468,7 +468,7 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize, #endif -#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option +#ifdef ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option #define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1 #define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1 @@ -526,14 +526,14 @@ private: #endif -static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, +static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize, double sigma1, double sigma2, int borderType ) { #ifdef HAVE_IPP_IW CV_INSTRUMENT_REGION_IPP(); #if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) - CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); return false; // bug on ia32 #else if(sigma1 != sigma2) @@ -548,8 +548,6 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, // Acquire data and begin processing try { - Mat src = _src.getMat(); - Mat dst = _dst.getMat(); ::ipp::IwiImage iwSrc = ippiGetImage(src); ::ipp::IwiImage iwDst = ippiGetImage(dst); ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize)); @@ -589,7 +587,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, return true; #endif #else - CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); + CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); return false; #endif } @@ -610,10 +608,13 @@ static bool validateGaussianBlurKernel(std::vector& kernel) void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, double sigma1, double sigma2, - int borderType) + int borderType, AlgorithmHint hint) { CV_INSTRUMENT_REGION(); + if (hint == cv::ALGO_DEFAULT) + hint = cv::getDefaultAlgorithmHint(); + CV_Assert(!_src.empty()); int type = _src.type(); @@ -693,7 +694,27 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, src2.locateROI( wsz, ofs ); CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn, - ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); + ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, + borderType & ~BORDER_ISOLATED); + } + + if (hint == ALGO_APPROX) + { + Point ofs; + Size wsz(src.cols, src.rows); + if(!(borderType & BORDER_ISOLATED)) + src.locateROI( wsz, ofs ); + + CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, + ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, + sigma1, sigma2, borderType & ~BORDER_ISOLATED); + +#ifdef ENABLE_IPP_GAUSSIAN_BLUR + // IPP is not bit-exact to OpenCV implementation + CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); +#endif + CV_OVX_RUN(true, + openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) } CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType), @@ -747,6 +768,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); } + if (hint == ALGO_APPROX) + { + Point ofs; + Size wsz(src.cols, src.rows); + if(!(borderType & BORDER_ISOLATED)) + src.locateROI( wsz, ofs ); + + CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, + ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, + sigma1, sigma2, borderType & ~BORDER_ISOLATED); + +#ifdef ENABLE_IPP_GAUSSIAN_BLUR + // IPP is not bit-exact to OpenCV implementation + CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)); +#endif + CV_OVX_RUN(true, + openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) + } + CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType), CV_CPU_DISPATCH_MODES_ALL); @@ -772,7 +812,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, - sigma1, sigma2, borderType&~BORDER_ISOLATED); + sigma1, sigma2, borderType & ~BORDER_ISOLATED); CV_OVX_RUN(true, openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) diff --git a/modules/imgproc/test/test_smooth_bitexact.cpp b/modules/imgproc/test/test_smooth_bitexact.cpp index d4ae2af833..2d1f7b5a4e 100644 --- a/modules/imgproc/test/test_smooth_bitexact.cpp +++ b/modules/imgproc/test/test_smooth_bitexact.cpp @@ -244,7 +244,7 @@ static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N TEST(GaussianBlur_Bitexact, regression_9863) { Mat src8u = imread(cvtest::findDataFile("shared/lena.png")); - Mat src32f; src8u.convertTo(src32f, CV_32F); + Mat src32f; src8u.convertTo(src32f, CV_32F); checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30); } @@ -260,4 +260,58 @@ TEST(GaussianBlur_Bitexact, overflow_20792) EXPECT_GT(count, nintyPercent); } +CV_ENUM(GaussInputType, CV_8U, CV_16S); +CV_ENUM(GaussBorder, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101); + +struct GaussianBlurVsBitexact: public testing::TestWithParam> +{ + virtual void SetUp() + { + orig = imread(findDataFile("shared/lena.png")); + EXPECT_FALSE(orig.empty()) << "Cannot find test image shared/lena.png"; + } + + Mat orig; +}; + +// NOTE: The test was designed for IPP (-DOPENCV_IPP_GAUSSIAN_BLUR=ON) +// Should be extended after new HAL integration +TEST_P(GaussianBlurVsBitexact, approx) +{ + auto testParams = GetParam(); + int dtype = get<0>(testParams); + int ksize = get<1>(testParams); + double sigma = get<2>(testParams); + int border = get<3>(testParams); + + Mat src; + orig.convertTo(src, dtype); + + cv::Mat gt; + GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_ACCURATE); + + cv::Mat dst; + GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_APPROX); + + cv::Mat diff; + cv::absdiff(dst, gt, diff); + cv::Mat flatten_diff = diff.reshape(1, diff.rows); + + int nz = countNonZero(flatten_diff); + EXPECT_LE(nz, 0.06*src.total()); // Less 6% of different pixels + + double min_val, max_val; + minMaxLoc(flatten_diff, &min_val, &max_val); + EXPECT_LE(max_val, 2); // expectes results floating +-1 +} + +INSTANTIATE_TEST_CASE_P(/*nothing*/, GaussianBlurVsBitexact, + testing::Combine( + GaussInputType::all(), + testing::Values(3, 5, 7), + testing::Values(0.75, 1.25), + GaussBorder::all() + ) +); + }} // namespace diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 08ab04d53d..ac2b02f875 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -987,6 +987,10 @@ class SamplesFindFile(NewOpenCVTests): except cv.error as _e: pass +class AlgorithmImplHit(NewOpenCVTests): + def test_callable(self): + res = cv.getDefaultAlgorithmHint() + self.assertTrue(res is not None) if __name__ == '__main__': NewOpenCVTests.bootstrap() diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index fb60a18ff1..9fe5cb3937 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -1126,6 +1126,7 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&) recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n")); recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG); recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n")); + recordPropertyVerbose("implementation_hint", "Algorithm hint", getSnippetFromConfig("Algorithm Hint:", "\n")); const char* parallelFramework = cv::currentParallelFramework(); if (parallelFramework) {