diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index 27beccd9ab..9a95b3a67b 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -1239,6 +1239,7 @@ namespace CV__SIMD_NAMESPACE { #define CV_SIMD 0 #endif +#include "intrin_math.hpp" #include "simd_utils.impl.hpp" #ifndef CV_DOXYGEN diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 8619fec60c..40979f6c4b 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -263,7 +263,7 @@ Most of these operations return only one value. ### Other math -- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude +- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp - Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs ### Conversions @@ -363,6 +363,7 @@ Floating point: |reverse | x | x | |extract_n | x | x | |broadcast_element | x | | +|exp | x | x | @{ */ @@ -721,11 +722,33 @@ template inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) Only for floating point types.*/ OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) +/** + * @brief Exponential \f$ e^x \f$ of elements + * + * Only for floating point types. Core implementation steps: + * 1. Decompose Input: Convert the input to \f$ 2^{x \cdot \log_2e} \f$ and split its exponential into integer and fractional parts: + * \f$ x \cdot \log_2e = n + f \f$, where \f$ n \f$ is the integer part and \f$ f \f$ is the fractional part. + * 2. Compute \f$ 2^n \f$: Calculated by shifting the bits. + * 3. Adjust Fractional Part: Compute \f$ f \cdot \ln2 \f$ to convert the fractional part to base \f$ e \f$. + * \f$ C1 \f$ and \f$ C2 \f$ are used to adjust the fractional part. + * 4. Polynomial Approximation for \f$ e^{f \cdot \ln2} \f$: The closer the fractional part is to 0, the more accurate the result. + * - For float16 and float32, use a Taylor Series with 6 terms. + * - For float64, use Pade Polynomials Approximation with 4 terms. + * 5. Combine Results: Multiply the two parts together to get the final result: + * \f$ e^x = 2^n \cdot e^{f \cdot \ln2} \f$. + * + * @note The precision of the calculation depends on the implementation and the data type of the input vector. + */ +OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) +#define OPENCV_HAL_MATH_HAVE_EXP 1 + //! @cond IGNORED OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) +#define OPENCV_HAL_MATH_HAVE_SIN 1 OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) -OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) +#define OPENCV_HAL_MATH_HAVE_COS 1 OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) +#define OPENCV_HAL_MATH_HAVE_LOG 1 //! @endcond /** @brief Absolute value of elements diff --git a/modules/core/include/opencv2/core/hal/intrin_math.hpp b/modules/core/include/opencv2/core/hal/intrin_math.hpp new file mode 100644 index 0000000000..528166889b --- /dev/null +++ b/modules/core/include/opencv2/core/hal/intrin_math.hpp @@ -0,0 +1,200 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This header is not standalone. Don't include directly, use "intrin.hpp" instead. +#ifdef OPENCV_HAL_INTRIN_HPP // defined in intrin.hpp + +namespace CV__SIMD_NAMESPACE { + +/* Universal Intrinsics implementation of sin, cos, exp and log + + Inspired by Intel Approximate Math library, and based on the + corresponding algorithms of the cephes math library +*/ + +/* Copyright (C) 2010,2011 RJVB - extensions */ +/* Copyright (C) 2011 Julien Pommier + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + (this is the zlib license) +*/ + +#ifndef OPENCV_HAL_MATH_HAVE_EXP + +//! @name Exponential +//! @{ +#if defined(CV_SIMD_FP16) && CV_SIMD_FP16 + // Implementation is the same as float32 vector. + inline v_float16 v_exp(const v_float16 &x) { + const v_float16 _vexp_lo_f16 = vx_setall_f16(-10.7421875f); + const v_float16 _vexp_hi_f16 = vx_setall_f16(11.f); + const v_float16 _vexp_half_fp16 = vx_setall_f16(0.5f); + const v_float16 _vexp_one_fp16 = vx_setall_f16(1.f); + const v_float16 _vexp_LOG2EF_f16 = vx_setall_f16(1.44269504088896341f); + const v_float16 _vexp_C1_f16 = vx_setall_f16(-6.93359375E-1f); + const v_float16 _vexp_C2_f16 = vx_setall_f16(2.12194440E-4f); + const v_float16 _vexp_p0_f16 = vx_setall_f16(1.9875691500E-4f); + const v_float16 _vexp_p1_f16 = vx_setall_f16(1.3981999507E-3f); + const v_float16 _vexp_p2_f16 = vx_setall_f16(8.3334519073E-3f); + const v_float16 _vexp_p3_f16 = vx_setall_f16(4.1665795894E-2f); + const v_float16 _vexp_p4_f16 = vx_setall_f16(1.6666665459E-1f); + const v_float16 _vexp_p5_f16 = vx_setall_f16(5.0000001201E-1f); + const v_int16 _vexp_bias_s16 = vx_setall_s16(0xf); + + v_float16 _vexp_, _vexp_x, _vexp_y, _vexp_xx; + v_int16 _vexp_mm; + + // compute exponential of x + _vexp_x = v_max(x, _vexp_lo_f16); + _vexp_x = v_min(_vexp_x, _vexp_hi_f16); + + _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f16, _vexp_half_fp16); + _vexp_mm = v_floor(_vexp_); + _vexp_ = v_cvt_f16(_vexp_mm); + _vexp_mm = v_add(_vexp_mm, _vexp_bias_s16); + _vexp_mm = v_shl(_vexp_mm, 10); + + _vexp_x = v_fma(_vexp_, _vexp_C1_f16, _vexp_x); + _vexp_x = v_fma(_vexp_, _vexp_C2_f16, _vexp_x); + _vexp_xx = v_mul(_vexp_x, _vexp_x); + + _vexp_y = v_fma(_vexp_x, _vexp_p0_f16, _vexp_p1_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f16); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f16); + + _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x); + _vexp_y = v_add(_vexp_y, _vexp_one_fp16); + _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f16(_vexp_mm)); + + // exp(NAN) -> NAN + v_float16 mask_not_nan = v_not_nan(x); + return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f16(vx_setall_s16(0x7e00))); + } +#endif + + inline v_float32 v_exp(const v_float32 &x) { + const v_float32 _vexp_lo_f32 = vx_setall_f32(-88.3762626647949f); + const v_float32 _vexp_hi_f32 = vx_setall_f32(89.f); + const v_float32 _vexp_half_fp32 = vx_setall_f32(0.5f); + const v_float32 _vexp_one_fp32 = vx_setall_f32(1.f); + const v_float32 _vexp_LOG2EF_f32 = vx_setall_f32(1.44269504088896341f); + const v_float32 _vexp_C1_f32 = vx_setall_f32(-6.93359375E-1f); + const v_float32 _vexp_C2_f32 = vx_setall_f32(2.12194440E-4f); + const v_float32 _vexp_p0_f32 = vx_setall_f32(1.9875691500E-4f); + const v_float32 _vexp_p1_f32 = vx_setall_f32(1.3981999507E-3f); + const v_float32 _vexp_p2_f32 = vx_setall_f32(8.3334519073E-3f); + const v_float32 _vexp_p3_f32 = vx_setall_f32(4.1665795894E-2f); + const v_float32 _vexp_p4_f32 = vx_setall_f32(1.6666665459E-1f); + const v_float32 _vexp_p5_f32 = vx_setall_f32(5.0000001201E-1f); + const v_int32 _vexp_bias_s32 = vx_setall_s32(0x7f); + + v_float32 _vexp_, _vexp_x, _vexp_y, _vexp_xx; + v_int32 _vexp_mm; + + // compute exponential of x + _vexp_x = v_max(x, _vexp_lo_f32); + _vexp_x = v_min(_vexp_x, _vexp_hi_f32); + + _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f32, _vexp_half_fp32); + _vexp_mm = v_floor(_vexp_); + _vexp_ = v_cvt_f32(_vexp_mm); + _vexp_mm = v_add(_vexp_mm, _vexp_bias_s32); + _vexp_mm = v_shl(_vexp_mm, 23); + + _vexp_x = v_fma(_vexp_, _vexp_C1_f32, _vexp_x); + _vexp_x = v_fma(_vexp_, _vexp_C2_f32, _vexp_x); + _vexp_xx = v_mul(_vexp_x, _vexp_x); + + _vexp_y = v_fma(_vexp_x, _vexp_p0_f32, _vexp_p1_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4_f32); + _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5_f32); + + _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_x); + _vexp_y = v_add(_vexp_y, _vexp_one_fp32); + _vexp_y = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm)); + + // exp(NAN) -> NAN + v_float32 mask_not_nan = v_not_nan(x); + return v_select(mask_not_nan, _vexp_y, v_reinterpret_as_f32(vx_setall_s32(0x7fc00000))); + } + +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + inline v_float64 v_exp(const v_float64 &x) { + const v_float64 _vexp_lo_f64 = vx_setall_f64(-709.43613930310391424428); + const v_float64 _vexp_hi_f64 = vx_setall_f64(710.); + const v_float64 _vexp_half_f64 = vx_setall_f64(0.5); + const v_float64 _vexp_one_f64 = vx_setall_f64(1.0); + const v_float64 _vexp_two_f64 = vx_setall_f64(2.0); + const v_float64 _vexp_LOG2EF_f64 = vx_setall_f64(1.44269504088896340736); + const v_float64 _vexp_C1_f64 = vx_setall_f64(-6.93145751953125E-1); + const v_float64 _vexp_C2_f64 = vx_setall_f64(-1.42860682030941723212E-6); + const v_float64 _vexp_p0_f64 = vx_setall_f64(1.26177193074810590878E-4); + const v_float64 _vexp_p1_f64 = vx_setall_f64(3.02994407707441961300E-2); + const v_float64 _vexp_p2_f64 = vx_setall_f64(9.99999999999999999910E-1); + const v_float64 _vexp_q0_f64 = vx_setall_f64(3.00198505138664455042E-6); + const v_float64 _vexp_q1_f64 = vx_setall_f64(2.52448340349684104192E-3); + const v_float64 _vexp_q2_f64 = vx_setall_f64(2.27265548208155028766E-1); + const v_float64 _vexp_q3_f64 = vx_setall_f64(2.00000000000000000009E0); + const v_int64 _vexp_bias_s64 = vx_setall_s64(0x3ff); + + v_float64 _vexp_, _vexp_x, _vexp_y, _vexp_z, _vexp_xx; + v_int64 _vexp_mm; + + // compute exponential of x + _vexp_x = v_max(x, _vexp_lo_f64); + _vexp_x = v_min(_vexp_x, _vexp_hi_f64); + + _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF_f64, _vexp_half_f64); + _vexp_mm = v_expand_low(v_floor(_vexp_)); + _vexp_ = v_cvt_f64(_vexp_mm); + _vexp_mm = v_add(_vexp_mm, _vexp_bias_s64); + _vexp_mm = v_shl(_vexp_mm, 52); + + _vexp_x = v_fma(_vexp_, _vexp_C1_f64, _vexp_x); + _vexp_x = v_fma(_vexp_, _vexp_C2_f64, _vexp_x); + _vexp_xx = v_mul(_vexp_x, _vexp_x); + + _vexp_y = v_fma(_vexp_xx, _vexp_p0_f64, _vexp_p1_f64); + _vexp_y = v_fma(_vexp_y, _vexp_xx, _vexp_p2_f64); + _vexp_y = v_mul(_vexp_y, _vexp_x); + + _vexp_z = v_fma(_vexp_xx, _vexp_q0_f64, _vexp_q1_f64); + _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q2_f64); + _vexp_z = v_fma(_vexp_xx, _vexp_z, _vexp_q3_f64); + + _vexp_z = v_div(_vexp_y, v_sub(_vexp_z, _vexp_y)); + _vexp_z = v_fma(_vexp_two_f64, _vexp_z, _vexp_one_f64); + _vexp_z = v_mul(_vexp_z, v_reinterpret_as_f64(_vexp_mm)); + + // exp(NAN) -> NAN + v_float64 mask_not_nan = v_not_nan(x); + return v_select(mask_not_nan, _vexp_z, v_reinterpret_as_f64(vx_setall_s64(0x7FF8000000000000))); + } +#endif + +#define OPENCV_HAL_MATH_HAVE_EXP 1 +//! @} + +#endif +} +#endif // OPENCV_HAL_INTRIN_HPP diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 08138d194d..6e55a6ddd3 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -1698,6 +1698,103 @@ template struct TheTest return *this; } + void __test_exp(LaneType dataMax, LaneType diff_thr, LaneType enlarge_factor, LaneType flt_min) { + int n = VTraits::vlanes(); + + // Test overflow and underflow values with step + const LaneType step = (LaneType) 0.01; + for (LaneType i = dataMax + 1; i <= dataMax + 11;) { + Data dataUpperBound, dataLowerBound, resOverflow, resUnderflow; + for (int j = 0; j < n; ++j) { + dataUpperBound[j] = i; + dataLowerBound[j] = -i; + i += step; + } + R upperBound = dataUpperBound, lowerBound = dataLowerBound; + resOverflow = v_exp(upperBound); + resUnderflow = v_exp(lowerBound); + for (int j = 0; j < n; ++j) { + SCOPED_TRACE(cv::format("Overflow/Underflow test value: %f", i)); + EXPECT_TRUE(resOverflow[j] > 0 && std::isinf(resOverflow[j])); + EXPECT_GE(resUnderflow[j], 0); + EXPECT_LT(resUnderflow[j], flt_min); + } + } + + // Test random values combined with special values + std::vector specialValues = {0, 1, INFINITY, -INFINITY, NAN, dataMax}; + const int testRandNum = 10000; + const double specialValueProbability = 0.1; // 10% chance to insert a special value + cv::RNG_MT19937 rng; + + for (int i = 0; i < testRandNum; i++) { + Data dataRand, resRand; + for (int j = 0; j < n; ++j) { + if (rng.uniform(0.f, 1.f) <= specialValueProbability) { + // Insert a special value + int specialValueIndex = rng.uniform(0, (int) specialValues.size()); + dataRand[j] = specialValues[specialValueIndex]; + } else { + // Generate random data in [-dataMax*1.1, dataMax*1.1] + dataRand[j] = (LaneType) rng.uniform(-dataMax * 1.1, dataMax * 1.1); + } + } + // Compare with std::exp + R x = dataRand; + resRand = v_exp(x); + for (int j = 0; j < n; ++j) { + SCOPED_TRACE(cv::format("Random test value: %f", dataRand[j])); + LaneType std_exp = std::exp(dataRand[j]); + if (dataRand[j] == 0) { + // input 0 -> output 1 + EXPECT_EQ(resRand[j], 1); + } else if (dataRand[j] == 1) { + // input 1 -> output e + EXPECT_NEAR((LaneType) M_E, resRand[j], 1e-15); + } else if (dataRand[j] > 0 && std::isinf(dataRand[j])) { + // input INF -> output INF + EXPECT_TRUE(resRand[j] > 0 && std::isinf(resRand[j])); + } else if (dataRand[j] < 0 && std::isinf(dataRand[j])) { + // input -INF -> output 0 + EXPECT_EQ(resRand[j], 0); + } else if (std::isnan(dataRand[j])) { + // input NaN -> output NaN + EXPECT_TRUE(std::isnan(resRand[j])); + } else if (dataRand[j] == dataMax) { + // input dataMax -> output less than INFINITY + EXPECT_LT(resRand[j], (LaneType) INFINITY); + } else if (std::isinf(resRand[j])) { + // output INF -> input close to edge + EXPECT_GT(dataRand[j], dataMax); + } else { + EXPECT_GE(resRand[j], 0); + EXPECT_LT(std::abs(resRand[j] - std_exp), diff_thr * (std_exp + flt_min * enlarge_factor)); + } + } + } + } + + TheTest &test_exp_fp16() { +#if CV_SIMD_FP16 + float16_t flt16_min; + uint16_t flt16_min_hex = 0x0400; + std::memcpy(&flt16_min, &flt16_min_hex, sizeof(float16_t)); + __test_exp((float16_t) 10, (float16_t) 1e-2, (float16_t) 1e2, flt16_min); +#endif + return *this; + } + + TheTest &test_exp_fp32() { + __test_exp(88.0f, 1e-6f, 1e6f, FLT_MIN); + return *this; + } + + TheTest &test_exp_fp64() { +#if CV_SIMD_64F || CV_SIMD_SCALABLE_64F + __test_exp(709.0, 1e-15, 1e15, DBL_MIN); +#endif + return *this; + } }; #define DUMP_ENTRY(type) printf("SIMD%d: %s\n", 8*VTraits::vlanes(), CV__TRACE_FUNCTION); @@ -2011,6 +2108,7 @@ void test_hal_intrin_float32() .test_extract_highest() .test_broadcast_highest() .test_pack_triplets() + .test_exp_fp32() #if CV_SIMD_WIDTH == 32 .test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>() .test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>() @@ -2035,13 +2133,13 @@ void test_hal_intrin_float64() .test_mask() .test_unpack() .test_float_math() - .test_round_pair_f64() .test_float_cvt32() .test_reverse() .test_extract<0>().test_extract<1>() .test_rotate<0>().test_rotate<1>() .test_extract_n<0>().test_extract_n<1>() .test_extract_highest() + .test_exp_fp64() //.test_broadcast_element<0>().test_broadcast_element<1>() #if CV_SIMD_WIDTH == 32 .test_extract<2>().test_extract<3>() @@ -2062,6 +2160,7 @@ void test_hal_intrin_float16() #if CV_SIMD_FP16 .test_loadstore_fp16() .test_float_cvt_fp16() + .test_exp_fp16() #endif ; #else diff --git a/modules/dnn/src/layers/cpu_kernels/softmax.cpp b/modules/dnn/src/layers/cpu_kernels/softmax.cpp index eb258ecfa2..fd55c1c1de 100644 --- a/modules/dnn/src/layers/cpu_kernels/softmax.cpp +++ b/modules/dnn/src/layers/cpu_kernels/softmax.cpp @@ -71,48 +71,12 @@ void softmax(Mat &dst, const Mat &src, int axis, int axisBias, int axisStep){ // calculate the exp value along the axis v_float32 vs = vx_setzero_f32(); vmax = vx_setall_f32(maxVal); - // initialize vexp constant - v_float32 _vexp_lo = vx_setall_f32(-88.3762626647949f); - v_float32 _vexp_hi = vx_setall_f32(88.3762626647949f); - v_float32 _vexp_half = vx_setall_f32(0.5f); - v_float32 _vexp_one = vx_setall_f32(1.f); - v_float32 _vexp_LOG2EF = vx_setall_f32(1.44269504088896341f); - v_float32 _vexp_C1 = vx_setall_f32(-0.693359375f); - v_float32 _vexp_C2 = vx_setall_f32(2.12194440e-4f); - v_float32 _vexp_p0 = vx_setall_f32(1.9875691500E-4f); - v_float32 _vexp_p1 = vx_setall_f32(1.3981999507E-3f); - v_float32 _vexp_p2 = vx_setall_f32(8.3334519073E-3f); - v_float32 _vexp_p3 = vx_setall_f32(4.1665795894E-2f); - v_float32 _vexp_p4 = vx_setall_f32(1.6666665459E-1f); - v_float32 _vexp_p5 = vx_setall_f32(5.0000001201E-1f); - // initialize temp vectors for vexp - v_float32 val, _vexp_, _vexp_x, _vexp_y, _vexp_z; - v_int32 _vexp_mm; - + v_float32 val; // calculate and sum all data along axis for (size_t cnDim = 0; cnDim < axisStep; cnDim += nlanes) { val = vx_load(axisBuf + cnDim); val = v_sub(val, vmax); - - // compute vexp of val - _vexp_x = v_min(val, _vexp_hi); - _vexp_x = v_max(_vexp_x, _vexp_lo); - _vexp_ = v_fma(_vexp_x, _vexp_LOG2EF, _vexp_half); - _vexp_mm = v_floor(_vexp_); - _vexp_ = v_cvt_f32(_vexp_mm); - _vexp_mm = v_add(_vexp_mm, vx_setall_s32(0x7f)); - _vexp_mm = v_shl(_vexp_mm, 23); - _vexp_x = v_fma(_vexp_, _vexp_C1, _vexp_x); - _vexp_x = v_fma(_vexp_, _vexp_C2, _vexp_x); - _vexp_z = v_mul(_vexp_x, _vexp_x); - _vexp_y = v_fma(_vexp_x, _vexp_p0, _vexp_p1); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p2); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p3); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p4); - _vexp_y = v_fma(_vexp_y, _vexp_x, _vexp_p5); - _vexp_y = v_fma(_vexp_y, _vexp_z, _vexp_x); - _vexp_y = v_add(_vexp_y, _vexp_one); - val = v_mul(_vexp_y, v_reinterpret_as_f32(_vexp_mm)); + val = v_exp(val); vs = v_add(vs, val); v_store(axisBuf + cnDim, val);