From f4c2e4f8720cbb990d9be11af4fb87a4a4805da7 Mon Sep 17 00:00:00 2001 From: penghuiho <penghuihe@yeah.net> Date: Fri, 23 Aug 2024 22:12:19 +0800 Subject: [PATCH] Merge pull request #26061 from penghuiho:fix-pow-bug Fixed the simd bugs of iPow8u and iPow16u #26061 Add the following cases in opencv_perf_core: * OCL_PowFixture_iPow.iPow/0, where GetParam() = (640x480, 8UC1) * OCL_PowFixture_iPow.iPow/2, where GetParam() = (640x480, 16UC1) iPow8u and iPow16u failed to call to simd accelerating while executing. Fix the bug by changing the input type of iPow_SIMD function. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [ ] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- modules/core/perf/opencl/perf_arithm.cpp | 18 ++++++++++++++++++ modules/core/src/mathfuncs.cpp | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/modules/core/perf/opencl/perf_arithm.cpp b/modules/core/perf/opencl/perf_arithm.cpp index 04d343a136..cb14aa92b1 100644 --- a/modules/core/perf/opencl/perf_arithm.cpp +++ b/modules/core/perf/opencl/perf_arithm.cpp @@ -688,6 +688,24 @@ OCL_PERF_TEST_P(PowFixture, Pow, ::testing::Combine( SANITY_CHECK(dst, 1.5e-6, ERROR_RELATIVE); } +///////////// iPow //////////////////////// +OCL_PERF_TEST_P(PowFixture, iPow, ::testing::Combine( + OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8UC1, CV_8SC1,CV_16UC1,CV_16SC1,CV_32SC1))) +{ + const Size_MatType_t params = GetParam(); + const Size srcSize = get<0>(params); + const int type = get<1>(params); + + checkDeviceMaxMemoryAllocSize(srcSize, type); + + UMat src(srcSize, type), dst(srcSize, type); + randu(src, 0, 100); + declare.in(src).out(dst); + + OCL_TEST_CYCLE() cv::pow(src, 7.0, dst); + + SANITY_CHECK_NOTHING(); +} ///////////// AddWeighted//////////////////////// typedef Size_MatType AddWeightedFixture; diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 764d2d9b03..e1906fcc8f 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -791,7 +791,7 @@ struct iPow_SIMD #if (CV_SIMD || CV_SIMD_SCALABLE) template <> -struct iPow_SIMD<uchar, int> +struct iPow_SIMD<uchar, unsigned> { int operator() ( const uchar * src, uchar * dst, int len, int power ) { @@ -871,7 +871,7 @@ struct iPow_SIMD<schar, int> }; template <> -struct iPow_SIMD<ushort, int> +struct iPow_SIMD<ushort, unsigned> { int operator() ( const ushort * src, ushort * dst, int len, int power) {