From 698b2bf729fbc44e83b5e15c1eb48f11fd7b0220 Mon Sep 17 00:00:00 2001 From: Yosshi999 Date: Tue, 1 Sep 2020 19:28:25 +0900 Subject: [PATCH] Merge pull request #18167 from Yosshi999:bit-exact-gaussian Bit exact gaussian blur for 16bit unsigned int * bit-exact gaussian kernel for CV_16U * SIMD optimization * template GaussianBlurFixedPoint * remove template specialization * simd support for h3N121 uint16 * test for u16 gaussian blur * remove unnecessary comments * fix return type of raw() * add typedef of native internal type in fixedpoint * update return type of raw() --- modules/imgproc/src/fixedpoint.inl.hpp | 38 ++-- modules/imgproc/src/smooth.dispatch.cpp | 46 ++++- modules/imgproc/src/smooth.simd.hpp | 162 ++++++++++++------ modules/imgproc/test/test_smooth_bitexact.cpp | 143 +++++++++++----- 4 files changed, 266 insertions(+), 123 deletions(-) diff --git a/modules/imgproc/src/fixedpoint.inl.hpp b/modules/imgproc/src/fixedpoint.inl.hpp index c3693b5d95..f5f433fec6 100644 --- a/modules/imgproc/src/fixedpoint.inl.hpp +++ b/modules/imgproc/src/fixedpoint.inl.hpp @@ -14,13 +14,14 @@ namespace { class fixedpoint64 { private: - static const int fixedShift = 32; - int64_t val; fixedpoint64(int64_t _val) : val(_val) {} static CV_ALWAYS_INLINE uint64_t fixedround(const uint64_t& _val) { return (_val + ((1LL << fixedShift) >> 1)); } public: + static const int fixedShift = 32; + typedef fixedpoint64 WT; + typedef int64_t raw_t; CV_ALWAYS_INLINE fixedpoint64() { val = 0; } CV_ALWAYS_INLINE fixedpoint64(const fixedpoint64& v) { val = v.val; } CV_ALWAYS_INLINE fixedpoint64(const int8_t& _val) { val = ((int64_t)_val) << fixedShift; } @@ -97,13 +98,14 @@ public: class ufixedpoint64 { private: - static const int fixedShift = 32; - uint64_t val; ufixedpoint64(uint64_t _val) : val(_val) {} static CV_ALWAYS_INLINE uint64_t fixedround(const uint64_t& _val) { return (_val + ((1LL << fixedShift) >> 1)); } public: + static const int fixedShift = 32; + typedef ufixedpoint64 WT; + typedef uint64_t raw_t; CV_ALWAYS_INLINE ufixedpoint64() { val = 0; } CV_ALWAYS_INLINE ufixedpoint64(const ufixedpoint64& v) { val = v.val; } CV_ALWAYS_INLINE ufixedpoint64(const uint8_t& _val) { val = ((uint64_t)_val) << fixedShift; } @@ -157,6 +159,9 @@ public: CV_ALWAYS_INLINE bool isZero() { return val == 0; } static CV_ALWAYS_INLINE ufixedpoint64 zero() { return ufixedpoint64(); } static CV_ALWAYS_INLINE ufixedpoint64 one() { return ufixedpoint64((uint64_t)(1ULL << fixedShift)); } + + static CV_ALWAYS_INLINE ufixedpoint64 fromRaw(uint64_t v) { return ufixedpoint64(v); } + CV_ALWAYS_INLINE uint64_t raw() { return val; } CV_ALWAYS_INLINE uint32_t cvFloor() const { return cv::saturate_cast(val >> fixedShift); } friend class ufixedpoint32; }; @@ -164,13 +169,14 @@ public: class fixedpoint32 { private: - static const int fixedShift = 16; - int32_t val; fixedpoint32(int32_t _val) : val(_val) {} static CV_ALWAYS_INLINE uint32_t fixedround(const uint32_t& _val) { return (_val + ((1 << fixedShift) >> 1)); } public: + static const int fixedShift = 16; + typedef fixedpoint64 WT; + typedef int32_t raw_t; CV_ALWAYS_INLINE fixedpoint32() { val = 0; } CV_ALWAYS_INLINE fixedpoint32(const fixedpoint32& v) { val = v.val; } CV_ALWAYS_INLINE fixedpoint32(const int8_t& _val) { val = ((int32_t)_val) << fixedShift; } @@ -218,13 +224,14 @@ public: class ufixedpoint32 { private: - static const int fixedShift = 16; - uint32_t val; ufixedpoint32(uint32_t _val) : val(_val) {} static CV_ALWAYS_INLINE uint32_t fixedround(const uint32_t& _val) { return (_val + ((1 << fixedShift) >> 1)); } public: + static const int fixedShift = 16; + typedef ufixedpoint64 WT; + typedef uint32_t raw_t; CV_ALWAYS_INLINE ufixedpoint32() { val = 0; } CV_ALWAYS_INLINE ufixedpoint32(const ufixedpoint32& v) { val = v.val; } CV_ALWAYS_INLINE ufixedpoint32(const uint8_t& _val) { val = ((uint32_t)_val) << fixedShift; } @@ -262,19 +269,23 @@ public: CV_ALWAYS_INLINE bool isZero() { return val == 0; } static CV_ALWAYS_INLINE ufixedpoint32 zero() { return ufixedpoint32(); } static CV_ALWAYS_INLINE ufixedpoint32 one() { return ufixedpoint32((1U << fixedShift)); } + + static CV_ALWAYS_INLINE ufixedpoint32 fromRaw(uint32_t v) { return ufixedpoint32(v); } + CV_ALWAYS_INLINE uint32_t raw() { return val; } friend class ufixedpoint16; }; class fixedpoint16 { private: - static const int fixedShift = 8; - int16_t val; fixedpoint16(int16_t _val) : val(_val) {} static CV_ALWAYS_INLINE uint16_t fixedround(const uint16_t& _val) { return (_val + ((1 << fixedShift) >> 1)); } public: + static const int fixedShift = 8; + typedef fixedpoint32 WT; + typedef int16_t raw_t; CV_ALWAYS_INLINE fixedpoint16() { val = 0; } CV_ALWAYS_INLINE fixedpoint16(const fixedpoint16& v) { val = v.val; } CV_ALWAYS_INLINE fixedpoint16(const int8_t& _val) { val = ((int16_t)_val) << fixedShift; } @@ -315,13 +326,14 @@ public: class ufixedpoint16 { private: - static const int fixedShift = 8; - uint16_t val; ufixedpoint16(uint16_t _val) : val(_val) {} static CV_ALWAYS_INLINE uint16_t fixedround(const uint16_t& _val) { return (_val + ((1 << fixedShift) >> 1)); } public: + static const int fixedShift = 8; + typedef ufixedpoint32 WT; + typedef uint16_t raw_t; CV_ALWAYS_INLINE ufixedpoint16() { val = 0; } CV_ALWAYS_INLINE ufixedpoint16(const ufixedpoint16& v) { val = v.val; } CV_ALWAYS_INLINE ufixedpoint16(const uint8_t& _val) { val = ((uint16_t)_val) << fixedShift; } @@ -358,7 +370,7 @@ public: static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); } static CV_ALWAYS_INLINE ufixedpoint16 fromRaw(uint16_t v) { return ufixedpoint16(v); } - CV_ALWAYS_INLINE ufixedpoint16 raw() { return val; } + CV_ALWAYS_INLINE uint16_t raw() { return val; } }; } diff --git a/modules/imgproc/src/smooth.dispatch.cpp b/modules/imgproc/src/smooth.dispatch.cpp index 65d1fc8ed6..69d07580f2 100644 --- a/modules/imgproc/src/smooth.dispatch.cpp +++ b/modules/imgproc/src/smooth.dispatch.cpp @@ -258,23 +258,20 @@ softdouble getGaussianKernelFixedPoint_ED(CV_OUT std::vector& result, c } static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); } -template static void getGaussianKernel(int n, double sigma, int, std::vector& res); -//{ res = getFixedpointGaussianKernel(n, sigma); } - -template<> void getGaussianKernel(int n, double sigma, int, std::vector& res) +template static void getGaussianKernel(int n, double sigma, int, std::vector& res) { std::vector res_sd; softdouble s0 = getGaussianKernelBitExact(res_sd, n, sigma); CV_UNUSED(s0); std::vector fixed_256; - softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, 8); + softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, FT::fixedShift); CV_UNUSED(approx_err); res.resize(n); for (int i = 0; i < n; i++) { - res[i] = ufixedpoint16::fromRaw((uint16_t)fixed_256[i]); + res[i] = FT::fromRaw((typename FT::raw_t)fixed_256[i]); //printf("%03d: %d\n", i, res[i].raw()); } } @@ -688,6 +685,43 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize, return; } } + if(sdepth == CV_16U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix())) + { + CV_LOG_INFO(NULL, "GaussianBlur: running bit-exact version..."); + + std::vector fkx, fky; + createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2); + + static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool("OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS", false); + if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fkx)) + { + CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2)); + } + else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fky)) + { + CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2)); + } + else + { + // TODO: implement ocl_sepFilter2D_BitExact -- how to deal with bdepth? + // CV_OCL_RUN(useOpenCL, + // ocl_sepFilter2D_BitExact(_src, _dst, sdepth, + // ksize, + // (const uint32_t*)&fkx[0], (const uint32_t*)&fky[0], + // Point(-1, -1), 0, borderType, + // 16/*shift_bits*/) + // ); + + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + if (src.data == dst.data) + src = src.clone(); + CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType), + CV_CPU_DISPATCH_MODES_ALL); + return; + } + } #ifdef HAVE_OPENCL if (useOpenCL) diff --git a/modules/imgproc/src/smooth.simd.hpp b/modules/imgproc/src/smooth.simd.hpp index 3102b36f74..2a7a8e72bb 100644 --- a/modules/imgproc/src/smooth.simd.hpp +++ b/modules/imgproc/src/smooth.simd.hpp @@ -54,9 +54,10 @@ namespace cv { CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN // forward declarations -void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst, - const uint16_t/*ufixedpoint16*/* fkx, int fkx_size, - const uint16_t/*ufixedpoint16*/* fky, int fky_size, +template +void GaussianBlurFixedPoint(const Mat& src, Mat& dst, + const RFT* fkx, int fkx_size, + const RFT* fky, int fky_size, int borderType); #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY @@ -192,8 +193,9 @@ void hlineSmooth3N(const uint8_t* src, int cn, const ufi } } } -template -void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, int borderType) + +template +void hlineSmooth3N121Impl(const ET* src, int cn, const FT*, int, FT* dst, int len, int borderType) { if (len == 1) { @@ -217,7 +219,13 @@ void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, i } src += cn; dst += cn; - for (int i = cn; i < (len - 1)*cn; i++, src++, dst++) + int i = cn, lencn = (len - 1)*cn; +#if CV_SIMD + const int VECSZ = VFT::nlanes; + for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ) + v_store((typename FT::raw_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn) + (vx_load_expand(src) << 1)) << (FT::fixedShift-2)); +#endif + for (; i < lencn; i++, src++, dst++) *dst = (FT(src[-cn])>>2) + (FT(src[cn])>>2) + (FT(src[0])>>1); // Point that fall right from border @@ -231,51 +239,19 @@ void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, i } } } +template +void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, int borderType); template <> -void hlineSmooth3N121(const uint8_t* src, int cn, const ufixedpoint16*, int, ufixedpoint16* dst, int len, int borderType) +void hlineSmooth3N121(const uint8_t* src, int cn, const ufixedpoint16* _m, int _n, ufixedpoint16* dst, int len, int borderType) { - if (len == 1) - { - if (borderType != BORDER_CONSTANT) - for (int k = 0; k < cn; k++) - dst[k] = ufixedpoint16(src[k]); - else - for (int k = 0; k < cn; k++) - dst[k] = ufixedpoint16(src[k]) >> 1; - } - else - { - // Point that fall left from border - for (int k = 0; k < cn; k++) - dst[k] = (ufixedpoint16(src[k])>>1) + (ufixedpoint16(src[cn + k])>>2); - if (borderType != BORDER_CONSTANT)// If BORDER_CONSTANT out of border values are equal to zero and could be skipped - { - int src_idx = borderInterpolate(-1, len, borderType); - for (int k = 0; k < cn; k++) - dst[k] = dst[k] + (ufixedpoint16(src[src_idx*cn + k])>>2); - } - - src += cn; dst += cn; - int i = cn, lencn = (len - 1)*cn; -#if CV_SIMD - const int VECSZ = v_uint16::nlanes; - for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ) - v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn) + (vx_load_expand(src) << 1)) << 6); -#endif - for (; i < lencn; i++, src++, dst++) - *((uint16_t*)dst) = (uint16_t(src[-cn]) + uint16_t(src[cn]) + (uint16_t(src[0]) << 1)) << 6; - - // Point that fall right from border - for (int k = 0; k < cn; k++) - dst[k] = (ufixedpoint16(src[k - cn])>>2) + (ufixedpoint16(src[k])>>1); - if (borderType != BORDER_CONSTANT)// If BORDER_CONSTANT out of border values are equal to zero and could be skipped - { - int src_idx = (borderInterpolate(len, len, borderType) - (len - 1))*cn; - for (int k = 0; k < cn; k++) - dst[k] = dst[k] + (ufixedpoint16(src[src_idx + k])>>2); - } - } + hlineSmooth3N121Impl(src, cn, _m, _n, dst, len, borderType); +} +template <> +void hlineSmooth3N121(const uint16_t* src, int cn, const ufixedpoint32* _m, int _n, ufixedpoint32* dst, int len, int borderType) +{ + hlineSmooth3N121Impl(src, cn, _m, _n, dst, len, borderType); } + template void hlineSmooth3Naba(const ET* src, int cn, const FT* m, int, FT* dst, int len, int borderType) { @@ -1376,6 +1352,28 @@ void vlineSmooth3N121(const ufixedpoint16* const * src, for (; i < len; i++) dst[i] = (((uint32_t)(((uint16_t*)(src[0]))[i]) + (uint32_t)(((uint16_t*)(src[2]))[i]) + ((uint32_t)(((uint16_t*)(src[1]))[i]) << 1)) + (1 << 9)) >> 10; } +template <> +void vlineSmooth3N121(const ufixedpoint32* const * src, const ufixedpoint32*, int, uint16_t* dst, int len) +{ + int i = 0; +#if CV_SIMD + const int VECSZ = v_uint32::nlanes; + for (; i <= len - 2*VECSZ; i += 2*VECSZ) + { + v_uint64 v_src00, v_src01, v_src02, v_src03, v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23; + v_expand(vx_load((uint32_t*)(src[0]) + i), v_src00, v_src01); + v_expand(vx_load((uint32_t*)(src[0]) + i + VECSZ), v_src02, v_src03); + v_expand(vx_load((uint32_t*)(src[1]) + i), v_src10, v_src11); + v_expand(vx_load((uint32_t*)(src[1]) + i + VECSZ), v_src12, v_src13); + v_expand(vx_load((uint32_t*)(src[2]) + i), v_src20, v_src21); + v_expand(vx_load((uint32_t*)(src[2]) + i + VECSZ), v_src22, v_src23); + v_store(dst + i, v_pack(v_rshr_pack<18>(v_src00 + v_src20 + (v_src10 + v_src10), v_src01 + v_src21 + (v_src11 + v_src11)), + v_rshr_pack<18>(v_src02 + v_src22 + (v_src12 + v_src12), v_src03 + v_src23 + (v_src13 + v_src13)))); + } +#endif + for (; i < len; i++) + dst[i] = (((uint64_t)((uint32_t*)(src[0]))[i]) + (uint64_t)(((uint32_t*)(src[2]))[i]) + ((uint64_t(((uint32_t*)(src[1]))[i]) << 1)) + (1 << 17)) >> 18; +} template void vlineSmooth5N(const FT* const * src, const FT* m, int, ET* dst, int len) { @@ -1525,6 +1523,39 @@ void vlineSmooth5N14641(const ufixedpoint16* const * src (((uint32_t)(((uint16_t*)(src[1]))[i]) + (uint32_t)(((uint16_t*)(src[3]))[i])) << 2) + (uint32_t)(((uint16_t*)(src[0]))[i]) + (uint32_t)(((uint16_t*)(src[4]))[i]) + (1 << 11)) >> 12; } +template <> +void vlineSmooth5N14641(const ufixedpoint32* const * src, const ufixedpoint32*, int, uint16_t* dst, int len) +{ + int i = 0; +#if CV_SIMD + const int VECSZ = v_uint32::nlanes; + for (; i <= len - 2*VECSZ; i += 2*VECSZ) + { + v_uint64 v_src00, v_src10, v_src20, v_src30, v_src40; + v_uint64 v_src01, v_src11, v_src21, v_src31, v_src41; + v_uint64 v_src02, v_src12, v_src22, v_src32, v_src42; + v_uint64 v_src03, v_src13, v_src23, v_src33, v_src43; + v_expand(vx_load((uint32_t*)(src[0]) + i), v_src00, v_src01); + v_expand(vx_load((uint32_t*)(src[0]) + i + VECSZ), v_src02, v_src03); + v_expand(vx_load((uint32_t*)(src[1]) + i), v_src10, v_src11); + v_expand(vx_load((uint32_t*)(src[1]) + i + VECSZ), v_src12, v_src13); + v_expand(vx_load((uint32_t*)(src[2]) + i), v_src20, v_src21); + v_expand(vx_load((uint32_t*)(src[2]) + i + VECSZ), v_src22, v_src23); + v_expand(vx_load((uint32_t*)(src[3]) + i), v_src30, v_src31); + v_expand(vx_load((uint32_t*)(src[3]) + i + VECSZ), v_src32, v_src33); + v_expand(vx_load((uint32_t*)(src[4]) + i), v_src40, v_src41); + v_expand(vx_load((uint32_t*)(src[4]) + i + VECSZ), v_src42, v_src43); + v_store(dst + i, v_pack(v_rshr_pack<20>((v_src20 << 2) + (v_src20 << 1) + ((v_src10 + v_src30) << 2) + v_src00 + v_src40, + (v_src21 << 2) + (v_src21 << 1) + ((v_src11 + v_src31) << 2) + v_src01 + v_src41), + v_rshr_pack<20>((v_src22 << 2) + (v_src22 << 1) + ((v_src12 + v_src32) << 2) + v_src02 + v_src42, + (v_src23 << 2) + (v_src23 << 1) + ((v_src13 + v_src33) << 2) + v_src03 + v_src43))); + } +#endif + for (; i < len; i++) + dst[i] = ((uint64_t)(((uint32_t*)(src[2]))[i]) * 6 + + (((uint64_t)(((uint32_t*)(src[1]))[i]) + (uint64_t)(((uint32_t*)(src[3]))[i])) << 2) + + (uint64_t)(((uint32_t*)(src[0]))[i]) + (uint64_t)(((uint32_t*)(src[4]))[i]) + (1 << 19)) >> 20; +} template void vlineSmooth(const FT* const * src, const FT* m, int n, ET* dst, int len) { @@ -2029,25 +2060,42 @@ private: } // namespace anon -void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst, - const uint16_t/*ufixedpoint16*/* fkx, int fkx_size, - const uint16_t/*ufixedpoint16*/* fky, int fky_size, - int borderType) +template +void GaussianBlurFixedPointImpl(const Mat& src, /*const*/ Mat& dst, + const RFT* fkx, int fkx_size, + const RFT* fky, int fky_size, + int borderType) { CV_INSTRUMENT_REGION(); - CV_Assert(src.depth() == CV_8U && ((borderType & BORDER_ISOLATED) || !src.isSubmatrix())); - fixedSmoothInvoker invoker( - src.ptr(), src.step1(), - dst.ptr(), dst.step1(), dst.cols, dst.rows, dst.channels(), - (const ufixedpoint16*)fkx, fkx_size, (const ufixedpoint16*)fky, fky_size, + CV_Assert(src.depth() == DataType::depth && ((borderType & BORDER_ISOLATED) || !src.isSubmatrix())); + fixedSmoothInvoker invoker( + src.ptr(), src.step1(), + dst.ptr(), dst.step1(), dst.cols, dst.rows, dst.channels(), + (const FT*)fkx, fkx_size, (const FT*)fky, fky_size, borderType & ~BORDER_ISOLATED); { // TODO AVX guard (external call) parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs()))); } } +template <> +void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst, + const uint16_t/*ufixedpoint16*/* fkx, int fkx_size, + const uint16_t/*ufixedpoint16*/* fky, int fky_size, + int borderType) +{ + GaussianBlurFixedPointImpl(src, dst, fkx, fkx_size, fky, fky_size, borderType); +} +template <> +void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst, + const uint32_t/*ufixedpoint32*/* fkx, int fkx_size, + const uint32_t/*ufixedpoint32*/* fky, int fky_size, + int borderType) +{ + GaussianBlurFixedPointImpl(src, dst, fkx, fkx_size, fky, fky_size, borderType); +} #endif CV_CPU_OPTIMIZATION_NAMESPACE_END } // namespace diff --git a/modules/imgproc/test/test_smooth_bitexact.cpp b/modules/imgproc/test/test_smooth_bitexact.cpp index 8151c48238..f446deb8d8 100644 --- a/modules/imgproc/test/test_smooth_bitexact.cpp +++ b/modules/imgproc/test/test_smooth_bitexact.cpp @@ -7,13 +7,15 @@ namespace opencv_test { namespace { static const int fixedShiftU8 = 8; - static const int64_t fixedOne = (1L << fixedShiftU8); - - int64_t v[][9] = { - { fixedOne }, // size 1, sigma 0 - { fixedOne >> 2, fixedOne >> 1, fixedOne >> 2 }, // size 3, sigma 0 - { fixedOne >> 4, fixedOne >> 2, 6 * (fixedOne >> 4), fixedOne >> 2, fixedOne >> 4 }, // size 5, sigma 0 - { fixedOne >> 5, 7 * (fixedOne >> 6), 7 * (fixedOne >> 5), 9 * (fixedOne >> 5), 7 * (fixedOne >> 5), 7 * (fixedOne >> 6), fixedOne >> 5 }, // size 7, sigma 0 + static const int64_t fixedOneU8 = (1L << fixedShiftU8); + static const int fixedShiftU16 = 16; + static const int64_t fixedOneU16 = (1L << fixedShiftU16); + + int64_t vU8[][9] = { + { fixedOneU8 }, // size 1, sigma 0 + { fixedOneU8 >> 2, fixedOneU8 >> 1, fixedOneU8 >> 2 }, // size 3, sigma 0 + { fixedOneU8 >> 4, fixedOneU8 >> 2, 6 * (fixedOneU8 >> 4), fixedOneU8 >> 2, fixedOneU8 >> 4 }, // size 5, sigma 0 + { fixedOneU8 >> 5, 7 * (fixedOneU8 >> 6), 7 * (fixedOneU8 >> 5), 9 * (fixedOneU8 >> 5), 7 * (fixedOneU8 >> 5), 7 * (fixedOneU8 >> 6), fixedOneU8 >> 5 }, // size 7, sigma 0 { 4, 13, 30, 51, 60, 51, 30, 13, 4 }, // size 9, sigma 0 #if 1 #define CV_TEST_INACCURATE_GAUSSIAN_BLUR @@ -24,6 +26,14 @@ namespace opencv_test { namespace { #endif }; + int64_t vU16[][9] = { + { fixedOneU16 }, // size 1, sigma 0 + { fixedOneU16 >> 2, fixedOneU16 >> 1, fixedOneU16 >> 2 }, // size 3, sigma 0 + { fixedOneU16 >> 4, fixedOneU16 >> 2, 6 * (fixedOneU16 >> 4), fixedOneU16 >> 2, fixedOneU16 >> 4 }, // size 5, sigma 0 + { fixedOneU16 >> 5, 7 * (fixedOneU16 >> 6), 7 * (fixedOneU16 >> 5), 9 * (fixedOneU16 >> 5), 7 * (fixedOneU16 >> 5), 7 * (fixedOneU16 >> 6), fixedOneU16 >> 5 }, // size 7, sigma 0 + { 4<<8, 13<<8, 30<<8, 51<<8, 60<<8, 51<<8, 30<<8, 13<<8, 4<<8 } // size 9, sigma 0 + }; + template T eval(Mat src, vector kernelx, vector kernely) { @@ -39,8 +49,6 @@ namespace opencv_test { namespace { return saturate_cast((val + fixedRound) >> (fixedShift * 2)); } -TEST(GaussianBlur_Bitexact, Linear8U) -{ struct testmode { int type; @@ -50,34 +58,6 @@ TEST(GaussianBlur_Bitexact, Linear8U) double sigma_y; vector kernel_x; vector kernel_y; - } modes[] = { - { CV_8UC1, Size( 1, 1), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 2, 2), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 3, 1), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 1, 3), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 3, 3), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 3, 3), Size(5, 5), 0, 0, vector(v[2], v[2]+5), vector(v[2], v[2]+5) }, - { CV_8UC1, Size( 3, 3), Size(7, 7), 0, 0, vector(v[3], v[3]+7), vector(v[3], v[3]+7) }, - { CV_8UC1, Size( 5, 5), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector(v[2], v[2]+5), vector(v[2], v[2]+5) }, - { CV_8UC1, Size( 3, 5), Size(5, 5), 0, 0, vector(v[2], v[2]+5), vector(v[2], v[2]+5) }, - { CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector(v[2], v[2]+5), vector(v[2], v[2]+5) }, - { CV_8UC1, Size( 5, 5), Size(7, 7), 0, 0, vector(v[3], v[3]+7), vector(v[3], v[3]+7) }, - { CV_8UC1, Size( 7, 7), Size(7, 7), 0, 0, vector(v[3], v[3]+7), vector(v[3], v[3]+7) }, - { CV_8UC1, Size( 256, 128), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC2, Size( 256, 128), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC3, Size( 256, 128), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC4, Size( 256, 128), Size(3, 3), 0, 0, vector(v[1], v[1]+3), vector(v[1], v[1]+3) }, - { CV_8UC1, Size( 256, 128), Size(5, 5), 0, 0, vector(v[2], v[2]+5), vector(v[2], v[2]+5) }, - { CV_8UC1, Size( 256, 128), Size(7, 7), 0, 0, vector(v[3], v[3]+7), vector(v[3], v[3]+7) }, - { CV_8UC1, Size( 256, 128), Size(9, 9), 0, 0, vector(v[4], v[4]+9), vector(v[4], v[4]+9) }, -#ifdef CV_TEST_INACCURATE_GAUSSIAN_BLUR - { CV_8UC1, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(v[5], v[5]+3), vector(v[6], v[6]+3) }, - { CV_8UC2, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(v[5], v[5]+3), vector(v[6], v[6]+3) }, - { CV_8UC3, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(v[5], v[5]+3), vector(v[6], v[6]+3) }, - { CV_8UC4, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(v[5], v[5]+3), vector(v[6], v[6]+3) }, - { CV_8UC1, Size( 256, 128), Size(5, 5), 0.375, 0.75, vector(v[7], v[7]+5), vector(v[8], v[8]+5) } -#endif }; int bordermodes[] = { @@ -93,11 +73,12 @@ TEST(GaussianBlur_Bitexact, Linear8U) // BORDER_REFLECT_101 }; - for (int modeind = 0, _modecnt = sizeof(modes) / sizeof(modes[0]); modeind < _modecnt; ++modeind) + template + void checkMode(const testmode& mode) { - int type = modes[modeind].type, depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - int dcols = modes[modeind].sz.width, drows = modes[modeind].sz.height; - Size kernel = modes[modeind].kernel; + int type = mode.type, depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + int dcols = mode.sz.width, drows = mode.sz.height; + Size kernel = mode.kernel; int rows = drows + 20, cols = dcols + 20; Mat src(rows, cols, type), refdst(drows, dcols, type), dst; @@ -142,25 +123,93 @@ TEST(GaussianBlur_Bitexact, Linear8U) for (int i = 0; i < dcols; i++) { if (depth == CV_8U) - dst_chan.at(j, i) = eval(src_chan(Rect(i,j,kernel.width,kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y); + dst_chan.at(j, i) = eval(src_chan(Rect(i,j,kernel.width,kernel.height)), mode.kernel_x, mode.kernel_y); else if (depth == CV_16U) - dst_chan.at(j, i) = eval(src_chan(Rect(i, j, kernel.width, kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y); + dst_chan.at(j, i) = eval(src_chan(Rect(i, j, kernel.width, kernel.height)), mode.kernel_x, mode.kernel_y); else if (depth == CV_16S) - dst_chan.at(j, i) = eval(src_chan(Rect(i, j, kernel.width, kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y); + dst_chan.at(j, i) = eval(src_chan(Rect(i, j, kernel.width, kernel.height)), mode.kernel_x, mode.kernel_y); else if (depth == CV_32S) - dst_chan.at(j, i) = eval(src_chan(Rect(i, j, kernel.width, kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y); + dst_chan.at(j, i) = eval(src_chan(Rect(i, j, kernel.width, kernel.height)), mode.kernel_x, mode.kernel_y); else CV_Assert(0); } mixChannels(dst_chan, refdst, toFrom, 1); } - cv::GaussianBlur(src_roi, dst, kernel, modes[modeind].sigma_x, modes[modeind].sigma_y, bordermodes[borderind]); + cv::GaussianBlur(src_roi, dst, kernel, mode.sigma_x, mode.sigma_y, bordermodes[borderind]); EXPECT_GE(0, cvtest::norm(refdst, dst, cv::NORM_L1)) - << "GaussianBlur " << cn << "-chan mat " << drows << "x" << dcols << " by kernel " << kernel << " sigma(" << modes[modeind].sigma_x << ";" << modes[modeind].sigma_y << ") failed with max diff " << cvtest::norm(refdst, dst, cv::NORM_INF); + << "GaussianBlur " << cn << "-chan mat " << drows << "x" << dcols << " by kernel " << kernel << " sigma(" << mode.sigma_x << ";" << mode.sigma_y << ") failed with max diff " << cvtest::norm(refdst, dst, cv::NORM_INF); } } + +TEST(GaussianBlur_Bitexact, Linear8U) +{ + testmode modes[] = { + { CV_8UC1, Size( 1, 1), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 2, 2), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 3, 1), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 1, 3), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 3, 3), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 3, 3), Size(5, 5), 0, 0, vector(vU8[2], vU8[2]+5), vector(vU8[2], vU8[2]+5) }, + { CV_8UC1, Size( 3, 3), Size(7, 7), 0, 0, vector(vU8[3], vU8[3]+7), vector(vU8[3], vU8[3]+7) }, + { CV_8UC1, Size( 5, 5), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector(vU8[2], vU8[2]+5), vector(vU8[2], vU8[2]+5) }, + { CV_8UC1, Size( 3, 5), Size(5, 5), 0, 0, vector(vU8[2], vU8[2]+5), vector(vU8[2], vU8[2]+5) }, + { CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector(vU8[2], vU8[2]+5), vector(vU8[2], vU8[2]+5) }, + { CV_8UC1, Size( 5, 5), Size(7, 7), 0, 0, vector(vU8[3], vU8[3]+7), vector(vU8[3], vU8[3]+7) }, + { CV_8UC1, Size( 7, 7), Size(7, 7), 0, 0, vector(vU8[3], vU8[3]+7), vector(vU8[3], vU8[3]+7) }, + { CV_8UC1, Size( 256, 128), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC2, Size( 256, 128), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC3, Size( 256, 128), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC4, Size( 256, 128), Size(3, 3), 0, 0, vector(vU8[1], vU8[1]+3), vector(vU8[1], vU8[1]+3) }, + { CV_8UC1, Size( 256, 128), Size(5, 5), 0, 0, vector(vU8[2], vU8[2]+5), vector(vU8[2], vU8[2]+5) }, + { CV_8UC1, Size( 256, 128), Size(7, 7), 0, 0, vector(vU8[3], vU8[3]+7), vector(vU8[3], vU8[3]+7) }, + { CV_8UC1, Size( 256, 128), Size(9, 9), 0, 0, vector(vU8[4], vU8[4]+9), vector(vU8[4], vU8[4]+9) }, +#ifdef CV_TEST_INACCURATE_GAUSSIAN_BLUR + { CV_8UC1, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(vU8[5], vU8[5]+3), vector(vU8[6], vU8[6]+3) }, + { CV_8UC2, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(vU8[5], vU8[5]+3), vector(vU8[6], vU8[6]+3) }, + { CV_8UC3, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(vU8[5], vU8[5]+3), vector(vU8[6], vU8[6]+3) }, + { CV_8UC4, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector(vU8[5], vU8[5]+3), vector(vU8[6], vU8[6]+3) }, + { CV_8UC1, Size( 256, 128), Size(5, 5), 0.375, 0.75, vector(vU8[7], vU8[7]+5), vector(vU8[8], vU8[8]+5) } +#endif + }; + + for (int modeind = 0, _modecnt = sizeof(modes) / sizeof(modes[0]); modeind < _modecnt; ++modeind) + { + checkMode(modes[modeind]); + } +} + +TEST(GaussianBlur_Bitexact, Linear16U) +{ + testmode modes[] = { + { CV_16UC1, Size( 1, 1), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 2, 2), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 3, 1), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 1, 3), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 3, 3), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 3, 3), Size(5, 5), 0, 0, vector(vU16[2], vU16[2]+5), vector(vU16[2], vU16[2]+5) }, + { CV_16UC1, Size( 3, 3), Size(7, 7), 0, 0, vector(vU16[3], vU16[3]+7), vector(vU16[3], vU16[3]+7) }, + { CV_16UC1, Size( 5, 5), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 5, 5), Size(5, 5), 0, 0, vector(vU16[2], vU16[2]+5), vector(vU16[2], vU16[2]+5) }, + { CV_16UC1, Size( 3, 5), Size(5, 5), 0, 0, vector(vU16[2], vU16[2]+5), vector(vU16[2], vU16[2]+5) }, + { CV_16UC1, Size( 5, 5), Size(5, 5), 0, 0, vector(vU16[2], vU16[2]+5), vector(vU16[2], vU16[2]+5) }, + { CV_16UC1, Size( 5, 5), Size(7, 7), 0, 0, vector(vU16[3], vU16[3]+7), vector(vU16[3], vU16[3]+7) }, + { CV_16UC1, Size( 7, 7), Size(7, 7), 0, 0, vector(vU16[3], vU16[3]+7), vector(vU16[3], vU16[3]+7) }, + { CV_16UC1, Size( 256, 128), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC2, Size( 256, 128), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC3, Size( 256, 128), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC4, Size( 256, 128), Size(3, 3), 0, 0, vector(vU16[1], vU16[1]+3), vector(vU16[1], vU16[1]+3) }, + { CV_16UC1, Size( 256, 128), Size(5, 5), 0, 0, vector(vU16[2], vU16[2]+5), vector(vU16[2], vU16[2]+5) }, + { CV_16UC1, Size( 256, 128), Size(7, 7), 0, 0, vector(vU16[3], vU16[3]+7), vector(vU16[3], vU16[3]+7) }, + { CV_16UC1, Size( 256, 128), Size(9, 9), 0, 0, vector(vU16[4], vU16[4]+9), vector(vU16[4], vU16[4]+9) }, + }; + + for (int modeind = 0, _modecnt = sizeof(modes) / sizeof(modes[0]); modeind < _modecnt; ++modeind) + { + checkMode<16>(modes[modeind]); + } } TEST(GaussianBlur_Bitexact, regression_15015)