From 8b44ee2ce1a8d6b4f1a8291b66915de61c2ee732 Mon Sep 17 00:00:00 2001
From: shengwenxue <shengwenxue@loongson.cn>
Date: Wed, 30 Mar 2022 11:29:01 +0800
Subject: [PATCH] fix MSA sum overflow issue

---
 modules/core/include/opencv2/core/hal/intrin_msa.hpp | 12 ++++++------
 modules/core/include/opencv2/core/hal/msa_macros.h   |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/modules/core/include/opencv2/core/hal/intrin_msa.hpp b/modules/core/include/opencv2/core/hal/intrin_msa.hpp
index a1fbb093a8..c035fdad60 100644
--- a/modules/core/include/opencv2/core/hal/intrin_msa.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_msa.hpp
@@ -1037,12 +1037,12 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \
     return (scalartype)msa_sum_##suffix(a.val); \
 }
 
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32)
-OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32)
+OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32)
 OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32)
 
 inline uint64 v_reduce_sum(const v_uint64x2& a)
diff --git a/modules/core/include/opencv2/core/hal/msa_macros.h b/modules/core/include/opencv2/core/hal/msa_macros.h
index bd6ddb127a..fad8c5adda 100644
--- a/modules/core/include/opencv2/core/hal/msa_macros.h
+++ b/modules/core/include/opencv2/core/hal/msa_macros.h
@@ -719,7 +719,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
   v2i64 _c;                                     \
   _b = __builtin_msa_hadd_s_w(__a, __a);        \
   _c = __builtin_msa_hadd_s_d(_b, _b);          \
-  (int16_t)(_c[0] + _c[1]);                     \
+  (int32_t)(_c[0] + _c[1]);                     \
 })
 
 
@@ -736,7 +736,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
 ({                                             \
   v2i64 _b;                                    \
   _b = __builtin_msa_hadd_s_d(__a, __a);       \
-  (int32_t)(_b[0] + _b[1]);                    \
+  (int64_t)(_b[0] + _b[1]);                    \
 })
 
 /* uint8_t msa_sum_u8(v16u8 __a)*/
@@ -756,7 +756,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
   v4i32 _c32;                                    \
   _b16 = __builtin_msa_hadd_s_h(__a, __a);       \
   _c32 = __builtin_msa_hadd_s_w(_b16, _b16);         \
-  (int8_t)msa_sum_s32(_c32);                     \
+  (int16_t)msa_sum_s32(_c32);                     \
 })
 
 /* float msa_sum_f32(v4f32 __a)*/