Merge pull request #25792 from asmorkalov:as/HAL_fast_GaussianBlur

Added flag to GaussianBlur for faster but not bit-exact implementation #25792

Rationale:
Current implementation of GaussianBlur is almost always bit-exact. It helps to get predictable results according platforms, but prohibits most of approximations and optimization tricks.

The patch converts `borderType` parameter to more generic `flags` and introduces `GAUSS_ALLOW_APPROXIMATIONS` flag to allow not bit-exact implementation. With the flag IPP and generic HAL implementation are called first. The flag naming and location is a subject for discussion.

Replaces https://github.com/opencv/opencv/pull/22073
Possibly related issue: https://github.com/opencv/opencv/issues/24135

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/25903/head
Alexander Smorkalov 4 months ago committed by GitHub
parent 3ff97c5580
commit 15783d6598
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 6
      CMakeLists.txt
  2. 1
      doc/tutorials/introduction/config_reference/config_reference.markdown
  3. 4
      modules/core/CMakeLists.txt
  4. 12
      modules/core/include/opencv2/core.hpp
  5. 9
      modules/core/src/system.cpp
  6. 4
      modules/imgproc/include/opencv2/imgproc.hpp
  7. 58
      modules/imgproc/src/smooth.dispatch.cpp
  8. 56
      modules/imgproc/test/test_smooth_bitexact.cpp
  9. 4
      modules/python/test/test_misc.py
  10. 1
      modules/ts/src/ts.cpp

@ -1258,7 +1258,11 @@ if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config")
else() else()
status(" Configuration:" ${CMAKE_BUILD_TYPE}) status(" Configuration:" ${CMAKE_BUILD_TYPE})
endif() endif()
if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
status(" Algorithm Hint:" ${OPENCV_ALGO_HINT_DEFAULT})
else()
status(" Algorithm Hint:" " ALGO_ACCURATE")
endif()
# ========================= CPU code generation mode ========================= # ========================= CPU code generation mode =========================
status("") status("")

@ -217,6 +217,7 @@ Following options can be used to produce special builds with instrumentation or
| `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation. | | `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation. |
| `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). | | `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). |
| `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. | | `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. |
| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_ACCURATE` or `ALGO_APROX`. Dangerous! The option changes behaviour globally and may affect accuracy of many algorithms. |
@see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html) @see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html)
@see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing)) @see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing))

@ -186,6 +186,10 @@ if(OPENCV_SEMIHOSTING)
ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING") ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING")
endif(OPENCV_SEMIHOSTING) endif(OPENCV_SEMIHOSTING)
if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_ALGO_HINT_DEFAULT=${OPENCV_ALGO_HINT_DEFAULT}")
endif(DEFINED OPENCV_ALGO_HINT_DEFAULT)
if(HAVE_HPX) if(HAVE_HPX)
ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}") ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}")
endif() endif()

@ -150,6 +150,18 @@ It is possible to alternate error processing by using #redirectError().
*/ */
CV_EXPORTS CV_NORETURN void error(const Exception& exc); CV_EXPORTS CV_NORETURN void error(const Exception& exc);
/*! @brief Flags that allow to midify some functions behavior. Used as set of flags.
*/
enum AlgorithmHint {
ALGO_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build
ALGO_ACCURATE = 1, //!< Use generic portable implementation
ALGO_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform
};
/*! @brief Returns ImplementationHint selected by default, a.k.a. `IMPL_DEFAULT` defined during OpenCV compilation.
*/
CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint();
enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently
SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
//!< independently; this flag and the previous one are //!< independently; this flag and the previous one are

@ -46,6 +46,7 @@
#include <iostream> #include <iostream>
#include <ostream> #include <ostream>
#include <opencv2/core.hpp>
#include <opencv2/core/utils/configuration.private.hpp> #include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/trace.private.hpp> #include <opencv2/core/utils/trace.private.hpp>
@ -2888,6 +2889,14 @@ bool restoreFPDenormalsState(const FPDenormalsModeState& state)
} // namespace details } // namespace details
AlgorithmHint getDefaultAlgorithmHint()
{
#ifdef OPENCV_ALGO_HINT_DEFAULT
return OPENCV_ALGO_HINT_DEFAULT;
#else
return ALGO_ACCURATE;
#endif
};
} // namespace cv } // namespace cv

@ -1536,12 +1536,14 @@ respectively (see #getGaussianKernel for details); to fully control the result r
possible future modifications of all this semantics, it is recommended to specify all of ksize, possible future modifications of all this semantics, it is recommended to specify all of ksize,
sigmaX, and sigmaY. sigmaX, and sigmaY.
@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported. @param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
@param hint Implementation modfication flags. See #AlgorithmHint
@sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur @sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur
*/ */
CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize, CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize,
double sigmaX, double sigmaY = 0, double sigmaX, double sigmaY = 0,
int borderType = BORDER_DEFAULT ); int borderType = BORDER_DEFAULT,
AlgorithmHint hint = cv::ALGO_DEFAULT );
/** @brief Applies the bilateral filter to an image. /** @brief Applies the bilateral filter to an image.

@ -468,7 +468,7 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#endif #endif
#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option #ifdef ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1 #define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1 #define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
@ -526,14 +526,14 @@ private:
#endif #endif
static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize,
double sigma1, double sigma2, int borderType ) double sigma1, double sigma2, int borderType )
{ {
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP(); CV_INSTRUMENT_REGION_IPP();
#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__)) #if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false; // bug on ia32 return false; // bug on ia32
#else #else
if(sigma1 != sigma2) if(sigma1 != sigma2)
@ -548,8 +548,6 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
// Acquire data and begin processing // Acquire data and begin processing
try try
{ {
Mat src = _src.getMat();
Mat dst = _dst.getMat();
::ipp::IwiImage iwSrc = ippiGetImage(src); ::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst); ::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize)); ::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
@ -589,7 +587,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
return true; return true;
#endif #endif
#else #else
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false; return false;
#endif #endif
} }
@ -610,10 +608,13 @@ static bool validateGaussianBlurKernel(std::vector<T>& kernel)
void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
double sigma1, double sigma2, double sigma1, double sigma2,
int borderType) int borderType, AlgorithmHint hint)
{ {
CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION();
if (hint == cv::ALGO_DEFAULT)
hint = cv::getDefaultAlgorithmHint();
CV_Assert(!_src.empty()); CV_Assert(!_src.empty());
int type = _src.type(); int type = _src.type();
@ -693,7 +694,27 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
src2.locateROI( wsz, ofs ); src2.locateROI( wsz, ofs );
CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn, CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width,
borderType & ~BORDER_ISOLATED);
}
if (hint == ALGO_APPROX)
{
Point ofs;
Size wsz(src.cols, src.rows);
if(!(borderType & BORDER_ISOLATED))
src.locateROI( wsz, ofs );
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
#ifdef ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
#endif
CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
} }
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType), CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType),
@ -747,6 +768,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED); ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
} }
if (hint == ALGO_APPROX)
{
Point ofs;
Size wsz(src.cols, src.rows);
if(!(borderType & BORDER_ISOLATED))
src.locateROI( wsz, ofs );
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
#ifdef ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
#endif
CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
}
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType), CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType),
CV_CPU_DISPATCH_MODES_ALL); CV_CPU_DISPATCH_MODES_ALL);
@ -772,7 +812,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn, CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height, ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
sigma1, sigma2, borderType&~BORDER_ISOLATED); sigma1, sigma2, borderType & ~BORDER_ISOLATED);
CV_OVX_RUN(true, CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType)) openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))

@ -244,7 +244,7 @@ static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N
TEST(GaussianBlur_Bitexact, regression_9863) TEST(GaussianBlur_Bitexact, regression_9863)
{ {
Mat src8u = imread(cvtest::findDataFile("shared/lena.png")); Mat src8u = imread(cvtest::findDataFile("shared/lena.png"));
Mat src32f; src8u.convertTo(src32f, CV_32F); Mat src32f; src8u.convertTo(src32f, CV_32F);
checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30); checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30);
} }
@ -260,4 +260,58 @@ TEST(GaussianBlur_Bitexact, overflow_20792)
EXPECT_GT(count, nintyPercent); EXPECT_GT(count, nintyPercent);
} }
CV_ENUM(GaussInputType, CV_8U, CV_16S);
CV_ENUM(GaussBorder, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101);
struct GaussianBlurVsBitexact: public testing::TestWithParam<tuple<GaussInputType, int, double, GaussBorder>>
{
virtual void SetUp()
{
orig = imread(findDataFile("shared/lena.png"));
EXPECT_FALSE(orig.empty()) << "Cannot find test image shared/lena.png";
}
Mat orig;
};
// NOTE: The test was designed for IPP (-DOPENCV_IPP_GAUSSIAN_BLUR=ON)
// Should be extended after new HAL integration
TEST_P(GaussianBlurVsBitexact, approx)
{
auto testParams = GetParam();
int dtype = get<0>(testParams);
int ksize = get<1>(testParams);
double sigma = get<2>(testParams);
int border = get<3>(testParams);
Mat src;
orig.convertTo(src, dtype);
cv::Mat gt;
GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_ACCURATE);
cv::Mat dst;
GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_APPROX);
cv::Mat diff;
cv::absdiff(dst, gt, diff);
cv::Mat flatten_diff = diff.reshape(1, diff.rows);
int nz = countNonZero(flatten_diff);
EXPECT_LE(nz, 0.06*src.total()); // Less 6% of different pixels
double min_val, max_val;
minMaxLoc(flatten_diff, &min_val, &max_val);
EXPECT_LE(max_val, 2); // expectes results floating +-1
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, GaussianBlurVsBitexact,
testing::Combine(
GaussInputType::all(),
testing::Values(3, 5, 7),
testing::Values(0.75, 1.25),
GaussBorder::all()
)
);
}} // namespace }} // namespace

@ -987,6 +987,10 @@ class SamplesFindFile(NewOpenCVTests):
except cv.error as _e: except cv.error as _e:
pass pass
class AlgorithmImplHit(NewOpenCVTests):
def test_callable(self):
res = cv.getDefaultAlgorithmHint()
self.assertTrue(res is not None)
if __name__ == '__main__': if __name__ == '__main__':
NewOpenCVTests.bootstrap() NewOpenCVTests.bootstrap()

@ -1126,6 +1126,7 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&)
recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n")); recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n"));
recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG); recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG);
recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n")); recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n"));
recordPropertyVerbose("implementation_hint", "Algorithm hint", getSnippetFromConfig("Algorithm Hint:", "\n"));
const char* parallelFramework = cv::currentParallelFramework(); const char* parallelFramework = cv::currentParallelFramework();
if (parallelFramework) if (parallelFramework)
{ {

Loading…
Cancel
Save