Merge pull request #25792 from asmorkalov:as/HAL_fast_GaussianBlur

Added flag to GaussianBlur for faster but not bit-exact implementation #25792

Rationale:
Current implementation of GaussianBlur is almost always bit-exact. It helps to get predictable results according platforms, but prohibits most of approximations and optimization tricks.

The patch converts `borderType` parameter to more generic `flags` and introduces `GAUSS_ALLOW_APPROXIMATIONS` flag to allow not bit-exact implementation. With the flag IPP and generic HAL implementation are called first. The flag naming and location is a subject for discussion.

Replaces https://github.com/opencv/opencv/pull/22073
Possibly related issue: https://github.com/opencv/opencv/issues/24135

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
pull/25903/head
Alexander Smorkalov 5 months ago committed by GitHub
parent 3ff97c5580
commit 15783d6598
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 6
      CMakeLists.txt
  2. 1
      doc/tutorials/introduction/config_reference/config_reference.markdown
  3. 4
      modules/core/CMakeLists.txt
  4. 12
      modules/core/include/opencv2/core.hpp
  5. 9
      modules/core/src/system.cpp
  6. 4
      modules/imgproc/include/opencv2/imgproc.hpp
  7. 58
      modules/imgproc/src/smooth.dispatch.cpp
  8. 56
      modules/imgproc/test/test_smooth_bitexact.cpp
  9. 4
      modules/python/test/test_misc.py
  10. 1
      modules/ts/src/ts.cpp

@ -1258,7 +1258,11 @@ if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config")
else()
status(" Configuration:" ${CMAKE_BUILD_TYPE})
endif()
if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
status(" Algorithm Hint:" ${OPENCV_ALGO_HINT_DEFAULT})
else()
status(" Algorithm Hint:" " ALGO_ACCURATE")
endif()
# ========================= CPU code generation mode =========================
status("")

@ -217,6 +217,7 @@ Following options can be used to produce special builds with instrumentation or
| `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation. |
| `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). |
| `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. |
| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_ACCURATE` or `ALGO_APROX`. Dangerous! The option changes behaviour globally and may affect accuracy of many algorithms. |
@see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html)
@see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing))

@ -186,6 +186,10 @@ if(OPENCV_SEMIHOSTING)
ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING")
endif(OPENCV_SEMIHOSTING)
if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_ALGO_HINT_DEFAULT=${OPENCV_ALGO_HINT_DEFAULT}")
endif(DEFINED OPENCV_ALGO_HINT_DEFAULT)
if(HAVE_HPX)
ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}")
endif()

@ -150,6 +150,18 @@ It is possible to alternate error processing by using #redirectError().
*/
CV_EXPORTS CV_NORETURN void error(const Exception& exc);
/*! @brief Flags that allow to midify some functions behavior. Used as set of flags.
*/
enum AlgorithmHint {
ALGO_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build
ALGO_ACCURATE = 1, //!< Use generic portable implementation
ALGO_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform
};
/*! @brief Returns ImplementationHint selected by default, a.k.a. `IMPL_DEFAULT` defined during OpenCV compilation.
*/
CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint();
enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently
SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
//!< independently; this flag and the previous one are

@ -46,6 +46,7 @@
#include <iostream>
#include <ostream>
#include <opencv2/core.hpp>
#include <opencv2/core/utils/configuration.private.hpp>
#include <opencv2/core/utils/trace.private.hpp>
@ -2888,6 +2889,14 @@ bool restoreFPDenormalsState(const FPDenormalsModeState& state)
} // namespace details
AlgorithmHint getDefaultAlgorithmHint()
{
#ifdef OPENCV_ALGO_HINT_DEFAULT
return OPENCV_ALGO_HINT_DEFAULT;
#else
return ALGO_ACCURATE;
#endif
};
} // namespace cv

@ -1536,12 +1536,14 @@ respectively (see #getGaussianKernel for details); to fully control the result r
possible future modifications of all this semantics, it is recommended to specify all of ksize,
sigmaX, and sigmaY.
@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
@param hint Implementation modfication flags. See #AlgorithmHint
@sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur
*/
CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize,
double sigmaX, double sigmaY = 0,
int borderType = BORDER_DEFAULT );
int borderType = BORDER_DEFAULT,
AlgorithmHint hint = cv::ALGO_DEFAULT );
/** @brief Applies the bilateral filter to an image.

@ -468,7 +468,7 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
#endif
#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
#ifdef ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
@ -526,14 +526,14 @@ private:
#endif
static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize,
double sigma1, double sigma2, int borderType )
{
#ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP();
#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false; // bug on ia32
#else
if(sigma1 != sigma2)
@ -548,8 +548,6 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
// Acquire data and begin processing
try
{
Mat src = _src.getMat();
Mat dst = _dst.getMat();
::ipp::IwiImage iwSrc = ippiGetImage(src);
::ipp::IwiImage iwDst = ippiGetImage(dst);
::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
@ -589,7 +587,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
return true;
#endif
#else
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
return false;
#endif
}
@ -610,10 +608,13 @@ static bool validateGaussianBlurKernel(std::vector<T>& kernel)
void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
double sigma1, double sigma2,
int borderType)
int borderType, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == cv::ALGO_DEFAULT)
hint = cv::getDefaultAlgorithmHint();
CV_Assert(!_src.empty());
int type = _src.type();
@ -693,7 +694,27 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
src2.locateROI( wsz, ofs );
CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width,
borderType & ~BORDER_ISOLATED);
}
if (hint == ALGO_APPROX)
{
Point ofs;
Size wsz(src.cols, src.rows);
if(!(borderType & BORDER_ISOLATED))
src.locateROI( wsz, ofs );
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
#ifdef ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
#endif
CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
}
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType),
@ -747,6 +768,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
}
if (hint == ALGO_APPROX)
{
Point ofs;
Size wsz(src.cols, src.rows);
if(!(borderType & BORDER_ISOLATED))
src.locateROI( wsz, ofs );
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
#ifdef ENABLE_IPP_GAUSSIAN_BLUR
// IPP is not bit-exact to OpenCV implementation
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
#endif
CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
}
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType),
CV_CPU_DISPATCH_MODES_ALL);
@ -772,7 +812,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
sigma1, sigma2, borderType&~BORDER_ISOLATED);
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
CV_OVX_RUN(true,
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))

@ -244,7 +244,7 @@ static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N
TEST(GaussianBlur_Bitexact, regression_9863)
{
Mat src8u = imread(cvtest::findDataFile("shared/lena.png"));
Mat src32f; src8u.convertTo(src32f, CV_32F);
Mat src32f; src8u.convertTo(src32f, CV_32F);
checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30);
}
@ -260,4 +260,58 @@ TEST(GaussianBlur_Bitexact, overflow_20792)
EXPECT_GT(count, nintyPercent);
}
CV_ENUM(GaussInputType, CV_8U, CV_16S);
CV_ENUM(GaussBorder, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101);
struct GaussianBlurVsBitexact: public testing::TestWithParam<tuple<GaussInputType, int, double, GaussBorder>>
{
virtual void SetUp()
{
orig = imread(findDataFile("shared/lena.png"));
EXPECT_FALSE(orig.empty()) << "Cannot find test image shared/lena.png";
}
Mat orig;
};
// NOTE: The test was designed for IPP (-DOPENCV_IPP_GAUSSIAN_BLUR=ON)
// Should be extended after new HAL integration
TEST_P(GaussianBlurVsBitexact, approx)
{
auto testParams = GetParam();
int dtype = get<0>(testParams);
int ksize = get<1>(testParams);
double sigma = get<2>(testParams);
int border = get<3>(testParams);
Mat src;
orig.convertTo(src, dtype);
cv::Mat gt;
GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_ACCURATE);
cv::Mat dst;
GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_APPROX);
cv::Mat diff;
cv::absdiff(dst, gt, diff);
cv::Mat flatten_diff = diff.reshape(1, diff.rows);
int nz = countNonZero(flatten_diff);
EXPECT_LE(nz, 0.06*src.total()); // Less 6% of different pixels
double min_val, max_val;
minMaxLoc(flatten_diff, &min_val, &max_val);
EXPECT_LE(max_val, 2); // expectes results floating +-1
}
INSTANTIATE_TEST_CASE_P(/*nothing*/, GaussianBlurVsBitexact,
testing::Combine(
GaussInputType::all(),
testing::Values(3, 5, 7),
testing::Values(0.75, 1.25),
GaussBorder::all()
)
);
}} // namespace

@ -987,6 +987,10 @@ class SamplesFindFile(NewOpenCVTests):
except cv.error as _e:
pass
class AlgorithmImplHit(NewOpenCVTests):
def test_callable(self):
res = cv.getDefaultAlgorithmHint()
self.assertTrue(res is not None)
if __name__ == '__main__':
NewOpenCVTests.bootstrap()

@ -1126,6 +1126,7 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&)
recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n"));
recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG);
recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n"));
recordPropertyVerbose("implementation_hint", "Algorithm hint", getSnippetFromConfig("Algorithm Hint:", "\n"));
const char* parallelFramework = cv::currentParallelFramework();
if (parallelFramework)
{

Loading…
Cancel
Save