From 8b44ee2ce1a8d6b4f1a8291b66915de61c2ee732 Mon Sep 17 00:00:00 2001 From: shengwenxue Date: Wed, 30 Mar 2022 11:29:01 +0800 Subject: [PATCH] fix MSA sum overflow issue --- modules/core/include/opencv2/core/hal/intrin_msa.hpp | 12 ++++++------ modules/core/include/opencv2/core/hal/msa_macros.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp index a1fbb093a8..c035fdad60 100644 --- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp @@ -1037,12 +1037,12 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \ return (scalartype)msa_sum_##suffix(a.val); \ } -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32) -OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32) +OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32) OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32) inline uint64 v_reduce_sum(const v_uint64x2& a) diff --git a/modules/core/include/opencv2/core/hal/msa_macros.h b/modules/core/include/opencv2/core/hal/msa_macros.h index bd6ddb127a..fad8c5adda 100644 --- a/modules/core/include/opencv2/core/hal/msa_macros.h +++ b/modules/core/include/opencv2/core/hal/msa_macros.h @@ -719,7 +719,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); v2i64 _c; \ _b = __builtin_msa_hadd_s_w(__a, __a); \ _c = __builtin_msa_hadd_s_d(_b, _b); \ - (int16_t)(_c[0] + _c[1]); \ + (int32_t)(_c[0] + _c[1]); \ }) @@ -736,7 +736,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); ({ \ v2i64 _b; \ _b = __builtin_msa_hadd_s_d(__a, __a); \ - (int32_t)(_b[0] + _b[1]); \ + (int64_t)(_b[0] + _b[1]); \ }) /* uint8_t msa_sum_u8(v16u8 __a)*/ @@ -756,7 +756,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8))); v4i32 _c32; \ _b16 = __builtin_msa_hadd_s_h(__a, __a); \ _c32 = __builtin_msa_hadd_s_w(_b16, _b16); \ - (int8_t)msa_sum_s32(_c32); \ + (int16_t)msa_sum_s32(_c32); \ }) /* float msa_sum_f32(v4f32 __a)*/