@ -23,6 +23,11 @@
# include <stdint.h>
# include <msa.h>
# include <config.h>
# if HAVE_MSA2
# include <msa2.h>
# endif
# define ALIGNMENT 16
# define ALLOC_ALIGNED(align) __attribute__ ((aligned((align) << 1)))
@ -1234,6 +1239,15 @@
unsigned absolute diff values , even - odd pairs are added
together to generate 8 halfword results .
*/
# if HAVE_MSA2
# define SAD_UB2_UH(in0, in1, ref0, ref1) \
( { \
v8u16 sad_m = { 0 } ; \
sad_m + = __builtin_msa2_sad_adj2_u_w2x_b ( ( v16u8 ) in0 , ( v16u8 ) ref0 ) ; \
sad_m + = __builtin_msa2_sad_adj2_u_w2x_b ( ( v16u8 ) in1 , ( v16u8 ) ref1 ) ; \
sad_m ; \
} )
# else
# define SAD_UB2_UH(in0, in1, ref0, ref1) \
( { \
v16u8 diff0_m , diff1_m ; \
@ -1247,6 +1261,7 @@
\
sad_m ; \
} )
# endif // #if HAVE_MSA2
/* Description : Insert specified word elements from input vectors to 1
destination vector
@ -2287,6 +2302,12 @@
extracted and interleaved with same vector ' in0 ' to generate
4 word elements keeping sign intact
*/
# if HAVE_MSA2
# define UNPCK_R_SH_SW(in, out) \
{ \
out = ( v4i32 ) __builtin_msa2_w2x_lo_s_h ( ( v8i16 ) in ) ; \
}
# else
# define UNPCK_R_SH_SW(in, out) \
{ \
v8i16 sign_m ; \
@ -2294,6 +2315,7 @@
sign_m = __msa_clti_s_h ( ( v8i16 ) in , 0 ) ; \
out = ( v4i32 ) __msa_ilvr_h ( sign_m , ( v8i16 ) in ) ; \
}
# endif // #if HAVE_MSA2
/* Description : Sign extend byte elements from input vector and return
halfword results in pair of vectors
@ -2306,6 +2328,13 @@
Then interleaved left with same vector ' in0 ' to
generate 8 signed halfword elements in ' out1 '
*/
# if HAVE_MSA2
# define UNPCK_SB_SH(in, out0, out1) \
{ \
out0 = ( v4i32 ) __builtin_msa2_w2x_lo_s_b ( ( v16i8 ) in ) ; \
out1 = ( v4i32 ) __builtin_msa2_w2x_hi_s_b ( ( v16i8 ) in ) ; \
}
# else
# define UNPCK_SB_SH(in, out0, out1) \
{ \
v16i8 tmp_m ; \
@ -2313,6 +2342,7 @@
tmp_m = __msa_clti_s_b ( ( v16i8 ) in , 0 ) ; \
ILVRL_B2_SH ( tmp_m , in , out0 , out1 ) ; \
}
# endif // #if HAVE_MSA2
/* Description : Zero extend unsigned byte elements to halfword elements
Arguments : Inputs - in ( 1 input unsigned byte vector )
@ -2339,6 +2369,13 @@
Then interleaved left with same vector ' in0 ' to
generate 4 signed word elements in ' out1 '
*/
# if HAVE_MSA2
# define UNPCK_SH_SW(in, out0, out1) \
{ \
out0 = ( v4i32 ) __builtin_msa2_w2x_lo_s_h ( ( v8i16 ) in ) ; \
out1 = ( v4i32 ) __builtin_msa2_w2x_hi_s_h ( ( v8i16 ) in ) ; \
}
# else
# define UNPCK_SH_SW(in, out0, out1) \
{ \
v8i16 tmp_m ; \
@ -2346,6 +2383,7 @@
tmp_m = __msa_clti_s_h ( ( v8i16 ) in , 0 ) ; \
ILVRL_H2_SW ( tmp_m , in , out0 , out1 ) ; \
}
# endif // #if HAVE_MSA2
/* Description : Swap two variables
Arguments : Inputs - in0 , in1
@ -2850,13 +2888,11 @@
*/
# define DPADD_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2) \
( { \
v8i16 tmp1_m ; \
v8i16 out0_m ; \
\
out0_m = __msa_dotp_s_h ( ( v16i8 ) in0 , ( v16i8 ) coeff0 ) ; \
out0_m = __msa_dpadd_s_h ( out0_m , ( v16i8 ) in1 , ( v16i8 ) coeff1 ) ; \
tmp1_m = __msa_dotp_s_h ( ( v16i8 ) in2 , ( v16i8 ) coeff2 ) ; \
out0_m = __msa_adds_s_h ( out0_m , tmp1_m ) ; \
out0_m = __msa_dpadd_s_h ( out0_m , ( v16i8 ) in2 , ( v16i8 ) coeff2 ) ; \
\
out0_m ; \
} )