From 2799c74d50372bdf1b7a5258e5f53c45edbd662a Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 20 Jun 2024 17:22:36 +0300 Subject: [PATCH] Use Carotene implementation of TEGRA_GaussianBlurBinomial 3x3 and 5x5 on ARM. --- 3rdparty/carotene/hal/tegra_hal.hpp | 75 ++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/3rdparty/carotene/hal/tegra_hal.hpp b/3rdparty/carotene/hal/tegra_hal.hpp index ce8fa90982..7f67ecf1a3 100644 --- a/3rdparty/carotene/hal/tegra_hal.hpp +++ b/3rdparty/carotene/hal/tegra_hal.hpp @@ -1286,7 +1286,6 @@ inline int TEGRA_SEPFILTERFREE(cvhalFilter2D *context) #undef cv_hal_sepFilterFree #define cv_hal_sepFilterFree TEGRA_SEPFILTERFREE - struct MorphCtx { int operation; @@ -1857,6 +1856,80 @@ TegraCvtColor_Invoker(bgrx2hsvf, bgrx2hsv, src_data + static_cast(range. #define cv_hal_cvtTwoPlaneYUVtoBGREx TEGRA_CVT2PYUVTOBGR_EX #endif +// The optimized branch was developed for old armv7 processors and leads to perf degradation on armv8 +#if defined(DCAROTENE_NEON_ARCH) && (DCAROTENE_NEON_ARCH == 7) +inline CAROTENE_NS::BORDER_MODE borderCV2Carotene(int borderType) +{ + switch(borderType) + { + case CV_HAL_BORDER_CONSTANT: + return CAROTENE_NS::BORDER_MODE_CONSTANT; + case CV_HAL_BORDER_REPLICATE: + return CAROTENE_NS::BORDER_MODE_REPLICATE; + case CV_HAL_BORDER_REFLECT: + return CAROTENE_NS::BORDER_MODE_REFLECT; + case CV_HAL_BORDER_WRAP: + return CAROTENE_NS::BORDER_MODE_WRAP; + case CV_HAL_BORDER_REFLECT_101: + return CAROTENE_NS::BORDER_MODE_REFLECT101; + } + + return CAROTENE_NS::BORDER_MODE_UNDEFINED; +} + +inline int TEGRA_GaussianBlurBinomial(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, + int width, int height, int depth, int cn, size_t margin_left, size_t margin_top, + size_t margin_right, size_t margin_bottom, size_t ksize, int border_type) +{ + CAROTENE_NS::Size2D sz(width, height); + CAROTENE_NS::BORDER_MODE border = borderCV2Carotene(border_type); + CAROTENE_NS::Margin mg(margin_left, margin_right, margin_top, margin_bottom); + + if (ksize == 3) + { + if ((depth != CV_8U) || (cn != 1)) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + if (CAROTENE_NS::isGaussianBlur3x3MarginSupported(sz, border, mg)) + { + CAROTENE_NS::gaussianBlur3x3Margin(sz, src_data, src_step, dst_data, dst_step, + border, 0, mg); + return CV_HAL_ERROR_OK; + } + } + else if (ksize == 5) + { + if (!CAROTENE_NS::isGaussianBlur5x5Supported(sz, cn, border)) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + if (depth == CV_8U) + { + CAROTENE_NS::gaussianBlur5x5(sz, cn, (uint8_t*)src_data, src_step, + (uint8_t*)dst_data, dst_step, border, 0, mg); + return CV_HAL_ERROR_OK; + } + else if (depth == CV_16U) + { + CAROTENE_NS::gaussianBlur5x5(sz, cn, (uint16_t*)src_data, src_step, + (uint16_t*)dst_data, dst_step, border, 0, mg); + return CV_HAL_ERROR_OK; + } + else if (depth == CV_16S) + { + CAROTENE_NS::gaussianBlur5x5(sz, cn, (int16_t*)src_data, src_step, + (int16_t*)dst_data, dst_step, border, 0, mg); + return CV_HAL_ERROR_OK; + } + } + + return CV_HAL_ERROR_NOT_IMPLEMENTED; +} + +#undef cv_hal_gaussianBlurBinomial +#define cv_hal_gaussianBlurBinomial TEGRA_GaussianBlurBinomial + +#endif // DCAROTENE_NEON_ARCH=7 + #endif // OPENCV_IMGPROC_HAL_INTERFACE_H #endif