From ee5a2c31792ae5f2efe9a87b6634149ce53bb3f1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 15 Jan 2024 11:09:10 +0000 Subject: [PATCH] signal: fix memory access --- modules/signal/src/signal_resample.cpp | 120 +++++++++---------------- 1 file changed, 44 insertions(+), 76 deletions(-) diff --git a/modules/signal/src/signal_resample.cpp b/modules/signal/src/signal_resample.cpp index 03fc5cad3..789112346 100644 --- a/modules/signal/src/signal_resample.cpp +++ b/modules/signal/src/signal_resample.cpp @@ -105,102 +105,69 @@ static inline v_float32 simd_cubicHermite(const v_float32 &v_A, const v_float32 } #endif -static void cubicInterpolate(const Mat1f &src, uint32_t dstlen, Mat1f &dst, uint32_t srclen) +static void cubicInterpolate(const float* src/*[srclen]*/, uint32_t srclen, float* dst/*[dstlen]*/, uint32_t dstlen) { - Mat1f tmp(Size(srclen + 3U, 1U)); - tmp.at(0) = src.at(0); - -#if (CV_SIMD || CV_SIMD_SCALABLE) - v_float32 v_reg = vx_setall_f32(src.at(srclen - 1U)); - vx_store(tmp.ptr(0) + (srclen - 1U), v_reg); -#else // scalar version - tmp.at(srclen + 1U) = src.at(srclen - 1U); - tmp.at(srclen + 2U) = src.at(srclen - 1U); -#endif + const int srclen_1 = (int)srclen - 1; uint32_t i = 0U; + const float dstToSrcScale = 1.0f / (float)(dstlen - 1U) * (float)srclen; #if (CV_SIMD || CV_SIMD_SCALABLE) - uint32_t len_sub_vfloatStep = (uint32_t)std::max((int64_t)srclen - (int64_t)v_float32_width, (int64_t)0); - for (; i < len_sub_vfloatStep; i+= v_float32_width) - { - v_float32 v_copy = vx_load(src.ptr(0) + i); - vx_store(tmp.ptr(0) + (i + 1U), v_copy); - } -#endif - - // if the tail exists or scalar version - for (; i < srclen; ++i) - { - tmp.at(i + 1U) = src.at(i); - } - - i = 0U; + const v_float32 v_dst2src_scale = vx_setall_f32(dstToSrcScale); + const v_float32 v_half = vx_setall_f32(0.5f); -#if (CV_SIMD || CV_SIMD_SCALABLE) int ptr_x_int[v_float32_max_width]; - uint32_t j; - - v_float32 v_dstlen_sub_1 = vx_setall_f32((float)(dstlen - 1U)); - v_float32 v_one = vx_setall_f32(1.0f); - v_float32 v_x_start = v_div(v_one, v_dstlen_sub_1); - v_float32 v_u = vx_setall_f32((float)srclen); - v_float32 v_half = vx_setall_f32(0.5f); - - len_sub_vfloatStep = (uint32_t)std::max((int64_t)dstlen - (int64_t)v_float32_width, (int64_t)0); - for (; i < v_float32_width; ++i) + for (unsigned j = 0; j < v_float32_width; ++j) { - ptr_x_int[i] = (int)i; + ptr_x_int[j] = (int)j; } + const v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int)); - float ptr_for_cubicHermite[v_float32_max_width]; - v_float32 v_sequence = v_cvt_f32(vx_load(ptr_x_int)); - for (i = 0U; i < len_sub_vfloatStep; i+= v_float32_width) + for (i = 0U; i <= dstlen - v_float32_width; i+= v_float32_width) { v_float32 v_reg_i = v_add(vx_setall_f32((float)i), v_sequence); - v_float32 v_x = v_sub(v_mul(v_x_start, v_reg_i, v_u), v_half); + v_float32 v_x = v_sub(v_mul(v_reg_i, v_dst2src_scale), v_half); v_int32 v_x_int = v_trunc(v_x); v_float32 v_x_fract = v_sub(v_x, v_cvt_f32(v_floor(v_x))); vx_store(ptr_x_int, v_x_int); - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] - 1)); - v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermite); - - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j])); - v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermite); - - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] + 1)); - v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermite); - - for(j = 0U; j < v_float32_width; ++j) - ptr_for_cubicHermite[j] = *(tmp.ptr(0) + (ptr_x_int[j] + 2)); - v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermite); + float ptr_for_cubicHermiteA[v_float32_max_width]; + float ptr_for_cubicHermiteB[v_float32_max_width]; + float ptr_for_cubicHermiteC[v_float32_max_width]; + float ptr_for_cubicHermiteD[v_float32_max_width]; + for (unsigned j = 0U; j < v_float32_width; ++j) + { + int src_offset = ptr_x_int[j]; + ptr_for_cubicHermiteA[j] = src[std::min(std::max(0, src_offset - 1), srclen_1)]; + ptr_for_cubicHermiteB[j] = src[std::min(std::max(0, src_offset + 0), srclen_1)]; + ptr_for_cubicHermiteC[j] = src[std::min(std::max(0, src_offset + 1), srclen_1)]; + ptr_for_cubicHermiteD[j] = src[std::min(std::max(0, src_offset + 2), srclen_1)]; + } + v_float32 v_x_int_add_A = vx_load(ptr_for_cubicHermiteA); + v_float32 v_x_int_add_B = vx_load(ptr_for_cubicHermiteB); + v_float32 v_x_int_add_C = vx_load(ptr_for_cubicHermiteC); + v_float32 v_x_int_add_D = vx_load(ptr_for_cubicHermiteD); - vx_store(dst.ptr(0) + i, simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract)); + vx_store(&dst[i], simd_cubicHermite(v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract)); } #endif // if the tail exists or scalar version - float *ptr = tmp.ptr(0) + 1U; - float lenScale = 1.0f / (float)(dstlen - 1U); - float U, X, xfract; - int xint; for(; i < dstlen; ++i) { - U = (float)i * lenScale; - X = (U * (float)srclen) - 0.5f; - xfract = X - floor(X); - xint = (int)X; - dst.at(i) = scal_cubicHermite(ptr[xint - 1], ptr[xint], ptr[xint + 1], ptr[xint + 2], xfract); + float X = (float)i * dstToSrcScale - 0.5f; + float xfract = X - floor(X); + int xint = (int)X; + float cubicHermiteA = src[std::min(std::max(0, xint - 1), srclen_1)]; + float cubicHermiteB = src[std::min(std::max(0, xint + 0), srclen_1)]; + float cubicHermiteC = src[std::min(std::max(0, xint + 1), srclen_1)]; + float cubicHermiteD = src[std::min(std::max(0, xint + 2), srclen_1)]; + dst[i] = scal_cubicHermite(cubicHermiteA, cubicHermiteB, cubicHermiteC, cubicHermiteD, xfract); } - } static void fir_f32(const float *pSrc, float *pDst, @@ -332,7 +299,7 @@ static void fir_f32(const float *pSrc, float *pDst, } void resampleSignal(InputArray inputSignal, OutputArray outputSignal, - const int inFreq, const int outFreq) + const int inFreq, const int outFreq) { CV_TRACE_FUNCTION(); CV_Assert(!inputSignal.empty()); @@ -343,16 +310,18 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal, inputSignal.copyTo(outputSignal); return; } - uint32_t filtLen = 33U; - float beta = 3.395f; - std::vector filt_window(filtLen, 0.f); - init_filter(beta, filtLen, filt_window.data()); float ratio = (float)outFreq / float(inFreq); Mat1f inMat = inputSignal.getMat(); - Mat1f outMat = Mat1f(Size(cvFloor(inMat.cols * ratio), 1)); - cubicInterpolate(inMat, outMat.cols, outMat, inMat.cols); + outputSignal.create(Size(cvFloor(inMat.cols * ratio), 1), CV_32FC1); + Mat1f outMat = outputSignal.getMat(); + cubicInterpolate(inMat.ptr(0), inMat.cols, outMat.ptr(0), outMat.cols); if (inFreq < 2 * outFreq) { + uint32_t filtLen = 33U; + float beta = 3.395f; + std::vector filt_window(filtLen, 0.f); + init_filter(beta, filtLen, filt_window.data()); + std::vector dlyl(filtLen * 2 - 1, 0.f); std::vector ptmp(outMat.cols + 2 * filtLen, 0.); @@ -367,7 +336,6 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal, outMat.at(i - filtLen) = ptmp2[i + cvFloor((float)filtLen / 2.f)]; } } - outputSignal.assign(std::move(outMat)); }