diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp index 2c5cf9f997..76ef3082ea 100644 --- a/modules/features2d/src/sift.simd.hpp +++ b/modules/features2d/src/sift.simd.hpp @@ -150,7 +150,7 @@ void findScaleSpaceExtrema( void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, - int d, int n, Mat& dst, int row + const int d, const int n, Mat& dst, int row ); @@ -708,7 +708,7 @@ void findScaleSpaceExtrema( void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, - int d, int n, Mat& dstMat, int row + const int d, const int n, Mat& dstMat, int row ) { CV_TRACE_FUNCTION(); @@ -725,7 +725,10 @@ void calcSIFTDescriptor( cos_t /= hist_width; sin_t /= hist_width; - int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2); + int i, j, k; + const int len = (radius*2+1)*(radius*2+1); + const int len_hist = (d+2)*(d+2)*(n+2); + const int len_ddn = d * d * n; int rows = img.rows, cols = img.cols; cv::utils::BufferArea area; @@ -736,8 +739,8 @@ void calcSIFTDescriptor( area.allocate(W, len, CV_SIMD_WIDTH); area.allocate(RBin, len, CV_SIMD_WIDTH); area.allocate(CBin, len, CV_SIMD_WIDTH); - area.allocate(hist, histlen, CV_SIMD_WIDTH); - area.allocate(rawDst, len, CV_SIMD_WIDTH); + area.allocate(hist, len_hist, CV_SIMD_WIDTH); + area.allocate(rawDst, len_ddn, CV_SIMD_WIDTH); area.commit(); Mag = Y; @@ -771,10 +774,10 @@ void calcSIFTDescriptor( } } - len = k; - cv::hal::fastAtan2(Y, X, Ori, len, true); - cv::hal::magnitude32f(X, Y, Mag, len); - cv::hal::exp32f(W, W, len); + const int len_left = k; + cv::hal::fastAtan2(Y, X, Ori, len_left, true); + cv::hal::magnitude32f(X, Y, Mag, len_left); + cv::hal::exp32f(W, W, len_left); k = 0; #if (CV_SIMD || CV_SIMD_SCALABLE) @@ -788,7 +791,7 @@ void calcSIFTDescriptor( const v_int32 __1 = vx_setall_s32(1); const v_int32 __d_plus_2 = vx_setall_s32(d+2); const v_int32 __n_plus_2 = vx_setall_s32(n+2); - for( ; k <= len - vecsize; k += vecsize ) + for( ; k <= len_left - vecsize; k += vecsize ) { v_float32 rbin = vx_load_aligned(RBin + k); v_float32 cbin = vx_load_aligned(CBin + k); @@ -839,7 +842,7 @@ void calcSIFTDescriptor( } } #endif - for( ; k < len; k++ ) + for( ; k < len_left; k++ ) { float rbin = RBin[k], cbin = CBin[k]; float obin = (Ori[k] - ori)*bins_per_rad; @@ -892,13 +895,12 @@ void calcSIFTDescriptor( // and scale the result, so that it can be easily converted // to byte array float nrm2 = 0; - len = d*d*n; k = 0; #if (CV_SIMD || CV_SIMD_SCALABLE) { v_float32 __nrm2 = vx_setzero_f32(); v_float32 __rawDst; - for( ; k <= len - VTraits::vlanes(); k += VTraits::vlanes() ) + for( ; k <= len_ddn - VTraits::vlanes(); k += VTraits::vlanes() ) { __rawDst = vx_load_aligned(rawDst + k); __nrm2 = v_fma(__rawDst, __rawDst, __nrm2); @@ -906,10 +908,10 @@ void calcSIFTDescriptor( nrm2 = (float)v_reduce_sum(__nrm2); } #endif - for( ; k < len; k++ ) + for( ; k < len_ddn; k++ ) nrm2 += rawDst[k]*rawDst[k]; - float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; + const float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; i = 0, nrm2 = 0; #if 0 //CV_AVX2 @@ -920,7 +922,7 @@ void calcSIFTDescriptor( __m256 __dst; __m256 __nrm2 = _mm256_setzero_ps(); __m256 __thr = _mm256_set1_ps(thr); - for( ; i <= len - 8; i += 8 ) + for( ; i <= len_ddn - 8; i += 8 ) { __dst = _mm256_loadu_ps(&rawDst[i]); __dst = _mm256_min_ps(__dst, __thr); @@ -936,7 +938,7 @@ void calcSIFTDescriptor( nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7]; } #endif - for( ; i < len; i++ ) + for( ; i < len_ddn; i++ ) { float val = std::min(rawDst[i], thr); rawDst[i] = val; @@ -954,7 +956,7 @@ if( dstMat.type() == CV_32F ) v_float32 __min = vx_setzero_f32(); v_float32 __max = vx_setall_f32(255.0f); // max of uchar v_float32 __nrm2 = vx_setall_f32(nrm2); - for( k = 0; k <= len - VTraits::vlanes(); k += VTraits::vlanes() ) + for( k = 0; k <= len_ddn - VTraits::vlanes(); k += VTraits::vlanes() ) { __dst = vx_load_aligned(rawDst + k); __dst = v_min(v_max(v_cvt_f32(v_round(v_mul(__dst, __nrm2))), __min), __max); @@ -965,7 +967,7 @@ if( dstMat.type() == CV_32F ) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" // iteration XX invokes undefined behavior #endif - for( ; k < len; k++ ) + for( ; k < len_ddn; k++ ) { dst[k] = saturate_cast(rawDst[k]*nrm2); } @@ -980,7 +982,7 @@ else // CV_8U v_float32 __dst0, __dst1; v_uint16 __pack01; v_float32 __nrm2 = vx_setall_f32(nrm2); - for( k = 0; k <= len - VTraits::vlanes() * 2; k += VTraits::vlanes() * 2 ) + for( k = 0; k <= len_ddn - VTraits::vlanes() * 2; k += VTraits::vlanes() * 2 ) { __dst0 = vx_load_aligned(rawDst + k); __dst1 = vx_load_aligned(rawDst + k + VTraits::vlanes()); @@ -994,7 +996,7 @@ else // CV_8U #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Waggressive-loop-optimizations" // iteration XX invokes undefined behavior #endif - for( ; k < len; k++ ) + for( ; k < len_ddn; k++ ) { dst[k] = saturate_cast(rawDst[k]*nrm2); } @@ -1004,7 +1006,7 @@ else // CV_8U } #else float nrm1 = 0; - for( k = 0; k < len; k++ ) + for( k = 0; k < len_ddn; k++ ) { rawDst[k] *= nrm2; nrm1 += rawDst[k]; @@ -1013,7 +1015,7 @@ else // CV_8U if( dstMat.type() == CV_32F ) { float *dst = dstMat.ptr(row); - for( k = 0; k < len; k++ ) + for( k = 0; k < len_ddn; k++ ) { dst[k] = std::sqrt(rawDst[k] * nrm1); } @@ -1021,7 +1023,7 @@ else // CV_8U else // CV_8U { uint8_t *dst = dstMat.ptr(row); - for( k = 0; k < len; k++ ) + for( k = 0; k < len_ddn; k++ ) { dst[k] = saturate_cast(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR); } diff --git a/modules/features2d/test/test_sift.cpp b/modules/features2d/test/test_sift.cpp index 731b31ac0f..d98f1c6b8a 100644 --- a/modules/features2d/test/test_sift.cpp +++ b/modules/features2d/test/test_sift.cpp @@ -30,5 +30,17 @@ TEST(Features2d_SIFT, descriptor_type) ASSERT_EQ(countNonZero(diff), 0) << "descriptors are not identical"; } +TEST(Features2d_SIFT, regression_26139) +{ + auto extractor = cv::SIFT::create(); + cv::Mat1b image{cv::Size{300, 300}, 0}; + std::vector kps { + cv::KeyPoint(154.076813f, 136.160904f, 111.078636f, 216.195618f, 0.00000899323549f, 7) + }; + cv::Mat descriptors; + extractor->compute(image, kps, descriptors); // we expect no memory corruption + ASSERT_EQ(descriptors.size(), Size(128, 1)); +} + }} // namespace