@ -304,35 +304,35 @@ static const unsigned char popCountTable[] =
} // namespace
} // namespace
static v128_t wasm_unpacklo_i8x16 ( v128_t a , v128_t b ) {
static v128_t wasm_unpacklo_i8x16 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 0 , 16 , 1 , 17 , 2 , 18 , 3 , 19 , 4 , 20 , 5 , 21 , 6 , 22 , 7 , 23 ) ;
return wasm_i 8x16_shuffle ( a , b , 0 , 16 , 1 , 17 , 2 , 18 , 3 , 19 , 4 , 20 , 5 , 21 , 6 , 22 , 7 , 23 ) ;
}
}
static v128_t wasm_unpacklo_i16x8 ( v128_t a , v128_t b ) {
static v128_t wasm_unpacklo_i16x8 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 0 , 1 , 16 , 17 , 2 , 3 , 18 , 19 , 4 , 5 , 20 , 21 , 6 , 7 , 22 , 23 ) ;
return wasm_i 8x16_shuffle ( a , b , 0 , 1 , 16 , 17 , 2 , 3 , 18 , 19 , 4 , 5 , 20 , 21 , 6 , 7 , 22 , 23 ) ;
}
}
static v128_t wasm_unpacklo_i32x4 ( v128_t a , v128_t b ) {
static v128_t wasm_unpacklo_i32x4 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 4 , 5 , 6 , 7 , 20 , 21 , 22 , 23 ) ;
return wasm_i 8x16_shuffle ( a , b , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 4 , 5 , 6 , 7 , 20 , 21 , 22 , 23 ) ;
}
}
static v128_t wasm_unpacklo_i64x2 ( v128_t a , v128_t b ) {
static v128_t wasm_unpacklo_i64x2 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
return wasm_i 8x16_shuffle ( a , b , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
}
}
static v128_t wasm_unpackhi_i8x16 ( v128_t a , v128_t b ) {
static v128_t wasm_unpackhi_i8x16 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 8 , 24 , 9 , 25 , 10 , 26 , 11 , 27 , 12 , 28 , 13 , 29 , 14 , 30 , 15 , 31 ) ;
return wasm_i 8x16_shuffle ( a , b , 8 , 24 , 9 , 25 , 10 , 26 , 11 , 27 , 12 , 28 , 13 , 29 , 14 , 30 , 15 , 31 ) ;
}
}
static v128_t wasm_unpackhi_i16x8 ( v128_t a , v128_t b ) {
static v128_t wasm_unpackhi_i16x8 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 8 , 9 , 24 , 25 , 10 , 11 , 26 , 27 , 12 , 13 , 28 , 29 , 14 , 15 , 30 , 31 ) ;
return wasm_i 8x16_shuffle ( a , b , 8 , 9 , 24 , 25 , 10 , 11 , 26 , 27 , 12 , 13 , 28 , 29 , 14 , 15 , 30 , 31 ) ;
}
}
static v128_t wasm_unpackhi_i32x4 ( v128_t a , v128_t b ) {
static v128_t wasm_unpackhi_i32x4 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 , 12 , 13 , 14 , 15 , 28 , 29 , 30 , 31 ) ;
return wasm_i 8x16_shuffle ( a , b , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 , 12 , 13 , 14 , 15 , 28 , 29 , 30 , 31 ) ;
}
}
static v128_t wasm_unpackhi_i64x2 ( v128_t a , v128_t b ) {
static v128_t wasm_unpackhi_i64x2 ( v128_t a , v128_t b ) {
return wasm_v 8x16_shuffle ( a , b , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
return wasm_i 8x16_shuffle ( a , b , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
}
}
/** Convert **/
/** Convert **/
@ -423,7 +423,7 @@ inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u16x8_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u16x8_gt ( a . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u16x8_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u16x8_gt ( b . val , maxval ) ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( a1 , b1 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( a1 , b1 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
inline v_int8x16 v_pack ( const v_int16x8 & a , const v_int16x8 & b )
inline v_int8x16 v_pack ( const v_int16x8 & a , const v_int16x8 & b )
{
{
@ -433,14 +433,14 @@ inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i16x8_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i16x8_gt ( b . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i16x8_lt ( b1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i16x8_lt ( b1 , minval ) ) ;
return v_int8x16 ( wasm_v 8x16_shuffle ( a2 , b2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_int8x16 ( wasm_i 8x16_shuffle ( a2 , b2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
inline v_uint16x8 v_pack ( const v_uint32x4 & a , const v_uint32x4 & b )
inline v_uint16x8 v_pack ( const v_uint32x4 & a , const v_uint32x4 & b )
{
{
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u32x4_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u32x4_gt ( a . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u32x4_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u32x4_gt ( b . val , maxval ) ) ;
return v_uint16x8 ( wasm_v 8x16_shuffle ( a1 , b1 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
return v_uint16x8 ( wasm_i 8x16_shuffle ( a1 , b1 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
}
}
inline v_int16x8 v_pack ( const v_int32x4 & a , const v_int32x4 & b )
inline v_int16x8 v_pack ( const v_int32x4 & a , const v_int32x4 & b )
{
{
@ -450,15 +450,15 @@ inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i32x4_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i32x4_gt ( b . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i32x4_lt ( b1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i32x4_lt ( b1 , minval ) ) ;
return v_int16x8 ( wasm_v 8x16_shuffle ( a2 , b2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
return v_int16x8 ( wasm_i 8x16_shuffle ( a2 , b2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
}
}
inline v_uint32x4 v_pack ( const v_uint64x2 & a , const v_uint64x2 & b )
inline v_uint32x4 v_pack ( const v_uint64x2 & a , const v_uint64x2 & b )
{
{
return v_uint32x4 ( wasm_v 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
return v_uint32x4 ( wasm_i 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
}
}
inline v_int32x4 v_pack ( const v_int64x2 & a , const v_int64x2 & b )
inline v_int32x4 v_pack ( const v_int64x2 & a , const v_int64x2 & b )
{
{
return v_int32x4 ( wasm_v 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
return v_int32x4 ( wasm_i 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
}
}
inline v_uint8x16 v_pack_u ( const v_int16x8 & a , const v_int16x8 & b )
inline v_uint8x16 v_pack_u ( const v_int16x8 & a , const v_int16x8 & b )
{
{
@ -468,7 +468,7 @@ inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i16x8_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i16x8_gt ( b . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i16x8_lt ( b1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i16x8_lt ( b1 , minval ) ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( a2 , b2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( a2 , b2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
inline v_uint16x8 v_pack_u ( const v_int32x4 & a , const v_int32x4 & b )
inline v_uint16x8 v_pack_u ( const v_int32x4 & a , const v_int32x4 & b )
{
{
@ -478,7 +478,7 @@ inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i32x4_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_i32x4_gt ( b . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i32x4_lt ( b1 , minval ) ) ;
v128_t b2 = wasm_v128_bitselect ( minval , b1 , wasm_i32x4_lt ( b1 , minval ) ) ;
return v_uint16x8 ( wasm_v 8x16_shuffle ( a2 , b2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
return v_uint16x8 ( wasm_i 8x16_shuffle ( a2 , b2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
}
}
template < int n >
template < int n >
@ -490,7 +490,7 @@ inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b)
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u16x8_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u16x8_gt ( a1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_u16x8_gt ( b1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_u16x8_gt ( b1 , maxval ) ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( a2 , b2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( a2 , b2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
template < int n >
template < int n >
inline v_int8x16 v_rshr_pack ( const v_int16x8 & a , const v_int16x8 & b )
inline v_int8x16 v_rshr_pack ( const v_int16x8 & a , const v_int16x8 & b )
@ -504,7 +504,7 @@ inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i16x8_gt ( b1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i16x8_gt ( b1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i16x8_lt ( b1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i16x8_lt ( b1 , minval ) ) ;
return v_int8x16 ( wasm_v 8x16_shuffle ( a3 , b3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_int8x16 ( wasm_i 8x16_shuffle ( a3 , b3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
template < int n >
template < int n >
inline v_uint16x8 v_rshr_pack ( const v_uint32x4 & a , const v_uint32x4 & b )
inline v_uint16x8 v_rshr_pack ( const v_uint32x4 & a , const v_uint32x4 & b )
@ -515,7 +515,7 @@ inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u32x4_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u32x4_gt ( a1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_u32x4_gt ( b1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_u32x4_gt ( b1 , maxval ) ) ;
return v_uint16x8 ( wasm_v 8x16_shuffle ( a2 , b2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
return v_uint16x8 ( wasm_i 8x16_shuffle ( a2 , b2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
}
}
template < int n >
template < int n >
inline v_int16x8 v_rshr_pack ( const v_int32x4 & a , const v_int32x4 & b )
inline v_int16x8 v_rshr_pack ( const v_int32x4 & a , const v_int32x4 & b )
@ -529,7 +529,7 @@ inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i32x4_gt ( b1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i32x4_gt ( b1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i32x4_lt ( b1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i32x4_lt ( b1 , minval ) ) ;
return v_int16x8 ( wasm_v 8x16_shuffle ( a3 , b3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
return v_int16x8 ( wasm_i 8x16_shuffle ( a3 , b3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
}
}
template < int n >
template < int n >
inline v_uint32x4 v_rshr_pack ( const v_uint64x2 & a , const v_uint64x2 & b )
inline v_uint32x4 v_rshr_pack ( const v_uint64x2 & a , const v_uint64x2 & b )
@ -537,7 +537,7 @@ inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t a1 = wasm_u64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t a1 = wasm_u64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t b1 = wasm_u64x2_shr ( wasm_i64x2_add ( b . val , delta ) , n ) ;
v128_t b1 = wasm_u64x2_shr ( wasm_i64x2_add ( b . val , delta ) , n ) ;
return v_uint32x4 ( wasm_v 8x16_shuffle ( a1 , b1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
return v_uint32x4 ( wasm_i 8x16_shuffle ( a1 , b1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
}
}
template < int n >
template < int n >
inline v_int32x4 v_rshr_pack ( const v_int64x2 & a , const v_int64x2 & b )
inline v_int32x4 v_rshr_pack ( const v_int64x2 & a , const v_int64x2 & b )
@ -545,7 +545,7 @@ inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t a1 = wasm_i64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t a1 = wasm_i64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t b1 = wasm_i64x2_shr ( wasm_i64x2_add ( b . val , delta ) , n ) ;
v128_t b1 = wasm_i64x2_shr ( wasm_i64x2_add ( b . val , delta ) , n ) ;
return v_int32x4 ( wasm_v 8x16_shuffle ( a1 , b1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
return v_int32x4 ( wasm_i 8x16_shuffle ( a1 , b1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ) ;
}
}
template < int n >
template < int n >
inline v_uint8x16 v_rshr_pack_u ( const v_int16x8 & a , const v_int16x8 & b )
inline v_uint8x16 v_rshr_pack_u ( const v_int16x8 & a , const v_int16x8 & b )
@ -559,7 +559,7 @@ inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i16x8_gt ( b1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i16x8_gt ( b1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i16x8_lt ( b1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i16x8_lt ( b1 , minval ) ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( a3 , b3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( a3 , b3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
template < int n >
template < int n >
inline v_uint16x8 v_rshr_pack_u ( const v_int32x4 & a , const v_int32x4 & b )
inline v_uint16x8 v_rshr_pack_u ( const v_int32x4 & a , const v_int32x4 & b )
@ -573,14 +573,14 @@ inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i32x4_gt ( b1 , maxval ) ) ;
v128_t b2 = wasm_v128_bitselect ( maxval , b1 , wasm_i32x4_gt ( b1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i32x4_lt ( b1 , minval ) ) ;
v128_t b3 = wasm_v128_bitselect ( minval , b2 , wasm_i32x4_lt ( b1 , minval ) ) ;
return v_uint16x8 ( wasm_v 8x16_shuffle ( a3 , b3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
return v_uint16x8 ( wasm_i 8x16_shuffle ( a3 , b3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ) ;
}
}
inline void v_pack_store ( uchar * ptr , const v_uint16x8 & a )
inline void v_pack_store ( uchar * ptr , const v_uint16x8 & a )
{
{
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u16x8_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u16x8_gt ( a . val , maxval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a1 , a1 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
v128_t r = wasm_i 8x16_shuffle ( a1 , a1 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
uchar t_ptr [ 16 ] ;
uchar t_ptr [ 16 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 8 ; + + i ) {
for ( int i = 0 ; i < 8 ; + + i ) {
@ -593,7 +593,7 @@ inline void v_pack_store(schar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat ( - 128 ) ;
v128_t minval = wasm_i16x8_splat ( - 128 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i16x8_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i16x8_gt ( a . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a2 , a2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
v128_t r = wasm_i 8x16_shuffle ( a2 , a2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
schar t_ptr [ 16 ] ;
schar t_ptr [ 16 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 8 ; + + i ) {
for ( int i = 0 ; i < 8 ; + + i ) {
@ -604,7 +604,7 @@ inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
{
{
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u32x4_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u32x4_gt ( a . val , maxval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a1 , a1 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
v128_t r = wasm_i 8x16_shuffle ( a1 , a1 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
ushort t_ptr [ 8 ] ;
ushort t_ptr [ 8 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
@ -617,7 +617,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat ( - 32768 ) ;
v128_t minval = wasm_i32x4_splat ( - 32768 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i32x4_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i32x4_gt ( a . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a2 , a2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
v128_t r = wasm_i 8x16_shuffle ( a2 , a2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
short t_ptr [ 8 ] ;
short t_ptr [ 8 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
@ -626,7 +626,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a)
}
}
inline void v_pack_store ( unsigned * ptr , const v_uint64x2 & a )
inline void v_pack_store ( unsigned * ptr , const v_uint64x2 & a )
{
{
v128_t r = wasm_v 8x16_shuffle ( a . val , a . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
v128_t r = wasm_i 8x16_shuffle ( a . val , a . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
unsigned t_ptr [ 4 ] ;
unsigned t_ptr [ 4 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 2 ; + + i ) {
for ( int i = 0 ; i < 2 ; + + i ) {
@ -635,7 +635,7 @@ inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
}
}
inline void v_pack_store ( int * ptr , const v_int64x2 & a )
inline void v_pack_store ( int * ptr , const v_int64x2 & a )
{
{
v128_t r = wasm_v 8x16_shuffle ( a . val , a . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
v128_t r = wasm_i 8x16_shuffle ( a . val , a . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
int t_ptr [ 4 ] ;
int t_ptr [ 4 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 2 ; + + i ) {
for ( int i = 0 ; i < 2 ; + + i ) {
@ -648,7 +648,7 @@ inline void v_pack_u_store(uchar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat ( 0 ) ;
v128_t minval = wasm_i16x8_splat ( 0 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i16x8_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i16x8_gt ( a . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a2 , a2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
v128_t r = wasm_i 8x16_shuffle ( a2 , a2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
uchar t_ptr [ 16 ] ;
uchar t_ptr [ 16 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 8 ; + + i ) {
for ( int i = 0 ; i < 8 ; + + i ) {
@ -661,7 +661,7 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat ( 0 ) ;
v128_t minval = wasm_i32x4_splat ( 0 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i32x4_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_i32x4_gt ( a . val , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a2 = wasm_v128_bitselect ( minval , a1 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a2 , a2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
v128_t r = wasm_i 8x16_shuffle ( a2 , a2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
ushort t_ptr [ 8 ] ;
ushort t_ptr [ 8 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
@ -676,7 +676,7 @@ inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a)
v128_t a1 = wasm_u16x8_shr ( wasm_i16x8_add ( a . val , delta ) , n ) ;
v128_t a1 = wasm_u16x8_shr ( wasm_i16x8_add ( a . val , delta ) , n ) ;
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u16x8_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u16x8_gt ( a1 , maxval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a2 , a2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
v128_t r = wasm_i 8x16_shuffle ( a2 , a2 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
uchar t_ptr [ 16 ] ;
uchar t_ptr [ 16 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 8 ; + + i ) {
for ( int i = 0 ; i < 8 ; + + i ) {
@ -692,7 +692,7 @@ inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat ( - 128 ) ;
v128_t minval = wasm_i16x8_splat ( - 128 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i16x8_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i16x8_gt ( a1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a3 , a3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
v128_t r = wasm_i 8x16_shuffle ( a3 , a3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
schar t_ptr [ 16 ] ;
schar t_ptr [ 16 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 8 ; + + i ) {
for ( int i = 0 ; i < 8 ; + + i ) {
@ -706,7 +706,7 @@ inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
v128_t a1 = wasm_u32x4_shr ( wasm_i32x4_add ( a . val , delta ) , n ) ;
v128_t a1 = wasm_u32x4_shr ( wasm_i32x4_add ( a . val , delta ) , n ) ;
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t maxval = wasm_i32x4_splat ( 65535 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u32x4_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_u32x4_gt ( a1 , maxval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a2 , a2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
v128_t r = wasm_i 8x16_shuffle ( a2 , a2 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
ushort t_ptr [ 8 ] ;
ushort t_ptr [ 8 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
@ -722,7 +722,7 @@ inline void v_rshr_pack_store(short* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat ( - 32768 ) ;
v128_t minval = wasm_i32x4_splat ( - 32768 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i32x4_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i32x4_gt ( a1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a3 , a3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
v128_t r = wasm_i 8x16_shuffle ( a3 , a3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
short t_ptr [ 8 ] ;
short t_ptr [ 8 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
@ -734,7 +734,7 @@ inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
{
{
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t a1 = wasm_u64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t a1 = wasm_u64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t r = wasm_v 8x16_shuffle ( a1 , a1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
v128_t r = wasm_i 8x16_shuffle ( a1 , a1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
unsigned t_ptr [ 4 ] ;
unsigned t_ptr [ 4 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 2 ; + + i ) {
for ( int i = 0 ; i < 2 ; + + i ) {
@ -746,7 +746,7 @@ inline void v_rshr_pack_store(int* ptr, const v_int64x2& a)
{
{
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t delta = wasm_i64x2_splat ( ( ( int64 ) 1 < < ( n - 1 ) ) ) ;
v128_t a1 = wasm_i64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t a1 = wasm_i64x2_shr ( wasm_i64x2_add ( a . val , delta ) , n ) ;
v128_t r = wasm_v 8x16_shuffle ( a1 , a1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
v128_t r = wasm_i 8x16_shuffle ( a1 , a1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 ) ;
int t_ptr [ 4 ] ;
int t_ptr [ 4 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 2 ; + + i ) {
for ( int i = 0 ; i < 2 ; + + i ) {
@ -762,7 +762,7 @@ inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat ( 0 ) ;
v128_t minval = wasm_i16x8_splat ( 0 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i16x8_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i16x8_gt ( a1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i16x8_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a3 , a3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
v128_t r = wasm_i 8x16_shuffle ( a3 , a3 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 ) ;
uchar t_ptr [ 16 ] ;
uchar t_ptr [ 16 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 8 ; + + i ) {
for ( int i = 0 ; i < 8 ; + + i ) {
@ -778,7 +778,7 @@ inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat ( 0 ) ;
v128_t minval = wasm_i32x4_splat ( 0 ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i32x4_gt ( a1 , maxval ) ) ;
v128_t a2 = wasm_v128_bitselect ( maxval , a1 , wasm_i32x4_gt ( a1 , maxval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t a3 = wasm_v128_bitselect ( minval , a2 , wasm_i32x4_lt ( a1 , minval ) ) ;
v128_t r = wasm_v 8x16_shuffle ( a3 , a3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
v128_t r = wasm_i 8x16_shuffle ( a3 , a3 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 ) ;
ushort t_ptr [ 8 ] ;
ushort t_ptr [ 8 ] ;
wasm_v128_store ( t_ptr , r ) ;
wasm_v128_store ( t_ptr , r ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
@ -791,7 +791,7 @@ inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t maxval = wasm_i16x8_splat ( 255 ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u16x8_gt ( a . val , maxval ) ) ;
v128_t a1 = wasm_v128_bitselect ( maxval , a . val , wasm_u16x8_gt ( a . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u16x8_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u16x8_gt ( b . val , maxval ) ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( a1 , b1 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( a1 , b1 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ) ;
}
}
inline v_uint8x16 v_pack_b ( const v_uint32x4 & a , const v_uint32x4 & b ,
inline v_uint8x16 v_pack_b ( const v_uint32x4 & a , const v_uint32x4 & b ,
@ -802,9 +802,9 @@ inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u32x4_gt ( b . val , maxval ) ) ;
v128_t b1 = wasm_v128_bitselect ( maxval , b . val , wasm_u32x4_gt ( b . val , maxval ) ) ;
v128_t c1 = wasm_v128_bitselect ( maxval , c . val , wasm_u32x4_gt ( c . val , maxval ) ) ;
v128_t c1 = wasm_v128_bitselect ( maxval , c . val , wasm_u32x4_gt ( c . val , maxval ) ) ;
v128_t d1 = wasm_v128_bitselect ( maxval , d . val , wasm_u32x4_gt ( d . val , maxval ) ) ;
v128_t d1 = wasm_v128_bitselect ( maxval , d . val , wasm_u32x4_gt ( d . val , maxval ) ) ;
v128_t ab = wasm_v 8x16_shuffle ( a1 , b1 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
v128_t ab = wasm_i 8x16_shuffle ( a1 , b1 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
v128_t cd = wasm_v 8x16_shuffle ( c1 , d1 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
v128_t cd = wasm_i 8x16_shuffle ( c1 , d1 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( ab , cd , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( ab , cd , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ) ;
}
}
inline v_uint8x16 v_pack_b ( const v_uint64x2 & a , const v_uint64x2 & b , const v_uint64x2 & c ,
inline v_uint8x16 v_pack_b ( const v_uint64x2 & a , const v_uint64x2 & b , const v_uint64x2 & c ,
@ -820,13 +820,13 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin
v128_t f1 = wasm_v128_bitselect ( maxval , f . val , ( ( __u64x2 ) ( f . val ) > ( __u64x2 ) maxval ) ) ;
v128_t f1 = wasm_v128_bitselect ( maxval , f . val , ( ( __u64x2 ) ( f . val ) > ( __u64x2 ) maxval ) ) ;
v128_t g1 = wasm_v128_bitselect ( maxval , g . val , ( ( __u64x2 ) ( g . val ) > ( __u64x2 ) maxval ) ) ;
v128_t g1 = wasm_v128_bitselect ( maxval , g . val , ( ( __u64x2 ) ( g . val ) > ( __u64x2 ) maxval ) ) ;
v128_t h1 = wasm_v128_bitselect ( maxval , h . val , ( ( __u64x2 ) ( h . val ) > ( __u64x2 ) maxval ) ) ;
v128_t h1 = wasm_v128_bitselect ( maxval , h . val , ( ( __u64x2 ) ( h . val ) > ( __u64x2 ) maxval ) ) ;
v128_t ab = wasm_v 8x16_shuffle ( a1 , b1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t ab = wasm_i 8x16_shuffle ( a1 , b1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t cd = wasm_v 8x16_shuffle ( c1 , d1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t cd = wasm_i 8x16_shuffle ( c1 , d1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t ef = wasm_v 8x16_shuffle ( e1 , f1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t ef = wasm_i 8x16_shuffle ( e1 , f1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t gh = wasm_v 8x16_shuffle ( g1 , h1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t gh = wasm_i 8x16_shuffle ( g1 , h1 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 , 0 , 8 , 16 , 24 ) ;
v128_t abcd = wasm_v 8x16_shuffle ( ab , cd , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 ) ;
v128_t abcd = wasm_i 8x16_shuffle ( ab , cd , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 ) ;
v128_t efgh = wasm_v 8x16_shuffle ( ef , gh , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 ) ;
v128_t efgh = wasm_i 8x16_shuffle ( ef , gh , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 ) ;
return v_uint8x16 ( wasm_v 8x16_shuffle ( abcd , efgh , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ) ;
return v_uint8x16 ( wasm_i 8x16_shuffle ( abcd , efgh , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ) ;
}
}
inline v_float32x4 v_matmul ( const v_float32x4 & v , const v_float32x4 & m0 ,
inline v_float32x4 v_matmul ( const v_float32x4 & v , const v_float32x4 & m0 ,
@ -964,7 +964,7 @@ inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
v_expand ( b , b0 , b1 ) ;
v_expand ( b , b0 , b1 ) ;
v128_t c = wasm_i32x4_mul ( a0 . val , b0 . val ) ;
v128_t c = wasm_i32x4_mul ( a0 . val , b0 . val ) ;
v128_t d = wasm_i32x4_mul ( a1 . val , b1 . val ) ;
v128_t d = wasm_i32x4_mul ( a1 . val , b1 . val ) ;
return v_int16x8 ( wasm_v 8x16_shuffle ( c , d , 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , 18 , 19 , 22 , 23 , 26 , 27 , 30 , 31 ) ) ;
return v_int16x8 ( wasm_i 8x16_shuffle ( c , d , 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , 18 , 19 , 22 , 23 , 26 , 27 , 30 , 31 ) ) ;
}
}
inline v_uint16x8 v_mul_hi ( const v_uint16x8 & a , const v_uint16x8 & b )
inline v_uint16x8 v_mul_hi ( const v_uint16x8 & a , const v_uint16x8 & b )
{
{
@ -973,7 +973,7 @@ inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
v_expand ( b , b0 , b1 ) ;
v_expand ( b , b0 , b1 ) ;
v128_t c = wasm_i32x4_mul ( a0 . val , b0 . val ) ;
v128_t c = wasm_i32x4_mul ( a0 . val , b0 . val ) ;
v128_t d = wasm_i32x4_mul ( a1 . val , b1 . val ) ;
v128_t d = wasm_i32x4_mul ( a1 . val , b1 . val ) ;
return v_uint16x8 ( wasm_v 8x16_shuffle ( c , d , 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , 18 , 19 , 22 , 23 , 26 , 27 , 30 , 31 ) ) ;
return v_uint16x8 ( wasm_i 8x16_shuffle ( c , d , 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , 18 , 19 , 22 , 23 , 26 , 27 , 30 , 31 ) ) ;
}
}
//////// Dot Product ////////
//////// Dot Product ////////
@ -1398,7 +1398,7 @@ inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \
{ \
{ \
return _Tpsvec ( wasm_ # # suffix # # _shl ( a . val , imm ) ) ; \
return _Tpsvec ( wasm_ # # suffix # # _shl ( a . val , imm ) ) ; \
} \
} \
inline _Tpuvec V _shr( const _Tpuvec & a , int imm ) \
inline _Tpuvec v _shr( const _Tpuvec & a , int imm ) \
{ \
{ \
return _Tpuvec ( wasm_ # # ssuffix # # _shr ( a . val , imm ) ) ; \
return _Tpuvec ( wasm_ # # ssuffix # # _shr ( a . val , imm ) ) ; \
} \
} \
@ -1471,7 +1471,7 @@ namespace hal_wasm_internal
inline v128_t operator ( ) ( const v128_t & a , const v128_t & b ) const
inline v128_t operator ( ) ( const v128_t & a , const v128_t & b ) const
{
{
enum { imm2 = ( sizeof ( v128_t ) - imm ) } ;
enum { imm2 = ( sizeof ( v128_t ) - imm ) } ;
return wasm_v 8x16_shuffle ( a , b ,
return wasm_i 8x16_shuffle ( a , b ,
imm , imm + 1 , imm + 2 , imm + 3 ,
imm , imm + 1 , imm + 2 , imm + 3 ,
imm + 4 , imm + 5 , imm + 6 , imm + 7 ,
imm + 4 , imm + 5 , imm + 6 , imm + 7 ,
imm + 8 , imm + 9 , imm + 10 , imm + 11 ,
imm + 8 , imm + 9 , imm + 10 , imm + 11 ,
@ -1582,19 +1582,19 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double)
/** Reverse **/
/** Reverse **/
inline v_uint8x16 v_reverse ( const v_uint8x16 & a )
inline v_uint8x16 v_reverse ( const v_uint8x16 & a )
{ return v_uint8x16 ( wasm_v 8x16_shuffle ( a . val , a . val , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ) ; }
{ return v_uint8x16 ( wasm_i 8x16_shuffle ( a . val , a . val , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 ) ) ; }
inline v_int8x16 v_reverse ( const v_int8x16 & a )
inline v_int8x16 v_reverse ( const v_int8x16 & a )
{ return v_reinterpret_as_s8 ( v_reverse ( v_reinterpret_as_u8 ( a ) ) ) ; }
{ return v_reinterpret_as_s8 ( v_reverse ( v_reinterpret_as_u8 ( a ) ) ) ; }
inline v_uint16x8 v_reverse ( const v_uint16x8 & a )
inline v_uint16x8 v_reverse ( const v_uint16x8 & a )
{ return v_uint16x8 ( wasm_v 8x16_shuffle ( a . val , a . val , 14 , 15 , 12 , 13 , 10 , 11 , 8 , 9 , 6 , 7 , 4 , 5 , 2 , 3 , 0 , 1 ) ) ; }
{ return v_uint16x8 ( wasm_i 8x16_shuffle ( a . val , a . val , 14 , 15 , 12 , 13 , 10 , 11 , 8 , 9 , 6 , 7 , 4 , 5 , 2 , 3 , 0 , 1 ) ) ; }
inline v_int16x8 v_reverse ( const v_int16x8 & a )
inline v_int16x8 v_reverse ( const v_int16x8 & a )
{ return v_reinterpret_as_s16 ( v_reverse ( v_reinterpret_as_u16 ( a ) ) ) ; }
{ return v_reinterpret_as_s16 ( v_reverse ( v_reinterpret_as_u16 ( a ) ) ) ; }
inline v_uint32x4 v_reverse ( const v_uint32x4 & a )
inline v_uint32x4 v_reverse ( const v_uint32x4 & a )
{ return v_uint32x4 ( wasm_v 8x16_shuffle ( a . val , a . val , 12 , 13 , 14 , 15 , 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3 ) ) ; }
{ return v_uint32x4 ( wasm_i 8x16_shuffle ( a . val , a . val , 12 , 13 , 14 , 15 , 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3 ) ) ; }
inline v_int32x4 v_reverse ( const v_int32x4 & a )
inline v_int32x4 v_reverse ( const v_int32x4 & a )
{ return v_reinterpret_as_s32 ( v_reverse ( v_reinterpret_as_u32 ( a ) ) ) ; }
{ return v_reinterpret_as_s32 ( v_reverse ( v_reinterpret_as_u32 ( a ) ) ) ; }
@ -1603,7 +1603,7 @@ inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32 ( v_reverse ( v_reinterpret_as_u32 ( a ) ) ) ; }
{ return v_reinterpret_as_f32 ( v_reverse ( v_reinterpret_as_u32 ( a ) ) ) ; }
inline v_uint64x2 v_reverse ( const v_uint64x2 & a )
inline v_uint64x2 v_reverse ( const v_uint64x2 & a )
{ return v_uint64x2 ( wasm_v 8x16_shuffle ( a . val , a . val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ) ; }
{ return v_uint64x2 ( wasm_i 8x16_shuffle ( a . val , a . val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ) ; }
inline v_int64x2 v_reverse ( const v_int64x2 & a )
inline v_int64x2 v_reverse ( const v_int64x2 & a )
{ return v_reinterpret_as_s64 ( v_reverse ( v_reinterpret_as_u64 ( a ) ) ) ; }
{ return v_reinterpret_as_s64 ( v_reverse ( v_reinterpret_as_u64 ( a ) ) ) ; }
@ -1616,8 +1616,8 @@ inline v_float64x2 v_reverse(const v_float64x2 &a)
inline scalartype v_reduce_sum ( const _Tpvec & a ) \
inline scalartype v_reduce_sum ( const _Tpvec & a ) \
{ \
{ \
regtype val = a . val ; \
regtype val = a . val ; \
val = wasm_ # # suffix # # _add ( val , wasm_v 8x16_shuffle ( val , val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ) ; \
val = wasm_ # # suffix # # _add ( val , wasm_i 8x16_shuffle ( val , val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ) ; \
val = wasm_ # # suffix # # _add ( val , wasm_v 8x16_shuffle ( val , val , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 ) ) ; \
val = wasm_ # # suffix # # _add ( val , wasm_i 8x16_shuffle ( val , val , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 ) ) ; \
return ( scalartype ) wasm_ # # esuffix # # _extract_lane ( val , 0 ) ; \
return ( scalartype ) wasm_ # # esuffix # # _extract_lane ( val , 0 ) ; \
}
}
@ -1649,7 +1649,7 @@ OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int)
inline scalartype v_reduce_sum ( const _Tpvec & a ) \
inline scalartype v_reduce_sum ( const _Tpvec & a ) \
{ \
{ \
regtype val = a . val ; \
regtype val = a . val ; \
val = wasm_ # # suffix # # _add ( val , wasm_v 8x16_shuffle ( val , val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ) ; \
val = wasm_ # # suffix # # _add ( val , wasm_i 8x16_shuffle ( val , val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ) ; \
return ( scalartype ) wasm_ # # esuffix # # _extract_lane ( val , 0 ) ; \
return ( scalartype ) wasm_ # # esuffix # # _extract_lane ( val , 0 ) ; \
}
}
OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM ( v_uint64x2 , uint64 , v128_t , i64x2 , i64x2 )
OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM ( v_uint64x2 , uint64 , v128_t , i64x2 , i64x2 )
@ -1996,8 +1996,8 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
v128_t t00 = wasm_v128_load ( ptr ) ;
v128_t t00 = wasm_v128_load ( ptr ) ;
v128_t t01 = wasm_v128_load ( ptr + 16 ) ;
v128_t t01 = wasm_v128_load ( ptr + 16 ) ;
a . val = wasm_v 8x16_shuffle ( t00 , t01 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ;
a . val = wasm_i 8x16_shuffle ( t00 , t01 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 26 , 28 , 30 ) ;
b . val = wasm_v 8x16_shuffle ( t00 , t01 , 1 , 3 , 5 , 7 , 9 , 11 , 13 , 15 , 17 , 19 , 21 , 23 , 25 , 27 , 29 , 31 ) ;
b . val = wasm_i 8x16_shuffle ( t00 , t01 , 1 , 3 , 5 , 7 , 9 , 11 , 13 , 15 , 17 , 19 , 21 , 23 , 25 , 27 , 29 , 31 ) ;
}
}
inline void v_load_deinterleave ( const uchar * ptr , v_uint8x16 & a , v_uint8x16 & b , v_uint8x16 & c )
inline void v_load_deinterleave ( const uchar * ptr , v_uint8x16 & a , v_uint8x16 & b , v_uint8x16 & c )
@ -2006,13 +2006,13 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
v128_t t01 = wasm_v128_load ( ptr + 16 ) ;
v128_t t01 = wasm_v128_load ( ptr + 16 ) ;
v128_t t02 = wasm_v128_load ( ptr + 32 ) ;
v128_t t02 = wasm_v128_load ( ptr + 32 ) ;
v128_t t10 = wasm_v 8x16_shuffle ( t00 , t01 , 0 , 3 , 6 , 9 , 12 , 15 , 18 , 21 , 24 , 27 , 30 , 1 , 2 , 4 , 5 , 7 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , t01 , 0 , 3 , 6 , 9 , 12 , 15 , 18 , 21 , 24 , 27 , 30 , 1 , 2 , 4 , 5 , 7 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t00 , t01 , 1 , 4 , 7 , 10 , 13 , 16 , 19 , 22 , 25 , 28 , 31 , 0 , 2 , 3 , 5 , 6 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t00 , t01 , 1 , 4 , 7 , 10 , 13 , 16 , 19 , 22 , 25 , 28 , 31 , 0 , 2 , 3 , 5 , 6 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t00 , t01 , 2 , 5 , 8 , 11 , 14 , 17 , 20 , 23 , 26 , 29 , 0 , 1 , 3 , 4 , 6 , 7 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t00 , t01 , 2 , 5 , 8 , 11 , 14 , 17 , 20 , 23 , 26 , 29 , 0 , 1 , 3 , 4 , 6 , 7 ) ;
a . val = wasm_v 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 17 , 20 , 23 , 26 , 29 ) ;
a . val = wasm_i 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 17 , 20 , 23 , 26 , 29 ) ;
b . val = wasm_v 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 18 , 21 , 24 , 27 , 30 ) ;
b . val = wasm_i 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 18 , 21 , 24 , 27 , 30 ) ;
c . val = wasm_v 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 16 , 19 , 22 , 25 , 28 , 31 ) ;
c . val = wasm_i 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 16 , 19 , 22 , 25 , 28 , 31 ) ;
}
}
inline void v_load_deinterleave ( const uchar * ptr , v_uint8x16 & a , v_uint8x16 & b , v_uint8x16 & c , v_uint8x16 & d )
inline void v_load_deinterleave ( const uchar * ptr , v_uint8x16 & a , v_uint8x16 & b , v_uint8x16 & c , v_uint8x16 & d )
@ -2022,15 +2022,15 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
v128_t u2 = wasm_v128_load ( ptr + 32 ) ; // a8 b8 c8 d8 ...
v128_t u2 = wasm_v128_load ( ptr + 32 ) ; // a8 b8 c8 d8 ...
v128_t u3 = wasm_v128_load ( ptr + 48 ) ; // a12 b12 c12 d12 ...
v128_t u3 = wasm_v128_load ( ptr + 48 ) ; // a12 b12 c12 d12 ...
v128_t v0 = wasm_v 8x16_shuffle ( u0 , u1 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 1 , 5 , 9 , 13 , 17 , 21 , 25 , 29 ) ;
v128_t v0 = wasm_i 8x16_shuffle ( u0 , u1 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 1 , 5 , 9 , 13 , 17 , 21 , 25 , 29 ) ;
v128_t v1 = wasm_v 8x16_shuffle ( u2 , u3 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 1 , 5 , 9 , 13 , 17 , 21 , 25 , 29 ) ;
v128_t v1 = wasm_i 8x16_shuffle ( u2 , u3 , 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 1 , 5 , 9 , 13 , 17 , 21 , 25 , 29 ) ;
v128_t v2 = wasm_v 8x16_shuffle ( u0 , u1 , 2 , 6 , 10 , 14 , 18 , 22 , 26 , 30 , 3 , 7 , 11 , 15 , 19 , 23 , 27 , 31 ) ;
v128_t v2 = wasm_i 8x16_shuffle ( u0 , u1 , 2 , 6 , 10 , 14 , 18 , 22 , 26 , 30 , 3 , 7 , 11 , 15 , 19 , 23 , 27 , 31 ) ;
v128_t v3 = wasm_v 8x16_shuffle ( u2 , u3 , 2 , 6 , 10 , 14 , 18 , 22 , 26 , 30 , 3 , 7 , 11 , 15 , 19 , 23 , 27 , 31 ) ;
v128_t v3 = wasm_i 8x16_shuffle ( u2 , u3 , 2 , 6 , 10 , 14 , 18 , 22 , 26 , 30 , 3 , 7 , 11 , 15 , 19 , 23 , 27 , 31 ) ;
a . val = wasm_v 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
a . val = wasm_i 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
b . val = wasm_v 8x16_shuffle ( v0 , v1 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
b . val = wasm_i 8x16_shuffle ( v0 , v1 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
c . val = wasm_v 8x16_shuffle ( v2 , v3 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
c . val = wasm_i 8x16_shuffle ( v2 , v3 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
d . val = wasm_v 8x16_shuffle ( v2 , v3 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
d . val = wasm_i 8x16_shuffle ( v2 , v3 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
}
}
inline void v_load_deinterleave ( const ushort * ptr , v_uint16x8 & a , v_uint16x8 & b )
inline void v_load_deinterleave ( const ushort * ptr , v_uint16x8 & a , v_uint16x8 & b )
@ -2038,8 +2038,8 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
v128_t v0 = wasm_v128_load ( ptr ) ; // a0 b0 a1 b1 a2 b2 a3 b3
v128_t v0 = wasm_v128_load ( ptr ) ; // a0 b0 a1 b1 a2 b2 a3 b3
v128_t v1 = wasm_v128_load ( ptr + 8 ) ; // a4 b4 a5 b5 a6 b6 a7 b7
v128_t v1 = wasm_v128_load ( ptr + 8 ) ; // a4 b4 a5 b5 a6 b6 a7 b7
a . val = wasm_v 8x16_shuffle ( v0 , v1 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ; // a0 a1 a2 a3 a4 a5 a6 a7
a . val = wasm_i 8x16_shuffle ( v0 , v1 , 0 , 1 , 4 , 5 , 8 , 9 , 12 , 13 , 16 , 17 , 20 , 21 , 24 , 25 , 28 , 29 ) ; // a0 a1 a2 a3 a4 a5 a6 a7
b . val = wasm_v 8x16_shuffle ( v0 , v1 , 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , 18 , 19 , 22 , 23 , 26 , 27 , 30 , 31 ) ; // b0 b1 ab b3 b4 b5 b6 b7
b . val = wasm_i 8x16_shuffle ( v0 , v1 , 2 , 3 , 6 , 7 , 10 , 11 , 14 , 15 , 18 , 19 , 22 , 23 , 26 , 27 , 30 , 31 ) ; // b0 b1 ab b3 b4 b5 b6 b7
}
}
inline void v_load_deinterleave ( const ushort * ptr , v_uint16x8 & a , v_uint16x8 & b , v_uint16x8 & c )
inline void v_load_deinterleave ( const ushort * ptr , v_uint16x8 & a , v_uint16x8 & b , v_uint16x8 & c )
@ -2048,13 +2048,13 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
v128_t t01 = wasm_v128_load ( ptr + 8 ) ; // c2 a3 b3 c3 a4 b4 c4 a5
v128_t t01 = wasm_v128_load ( ptr + 8 ) ; // c2 a3 b3 c3 a4 b4 c4 a5
v128_t t02 = wasm_v128_load ( ptr + 16 ) ; // b5 c5 a6 b6 c6 a7 b7 c7
v128_t t02 = wasm_v128_load ( ptr + 16 ) ; // b5 c5 a6 b6 c6 a7 b7 c7
v128_t t10 = wasm_v 8x16_shuffle ( t00 , t01 , 0 , 1 , 6 , 7 , 12 , 13 , 18 , 19 , 24 , 25 , 30 , 31 , 2 , 3 , 4 , 5 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , t01 , 0 , 1 , 6 , 7 , 12 , 13 , 18 , 19 , 24 , 25 , 30 , 31 , 2 , 3 , 4 , 5 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t00 , t01 , 2 , 3 , 8 , 9 , 14 , 15 , 20 , 21 , 26 , 27 , 0 , 1 , 4 , 5 , 6 , 7 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t00 , t01 , 2 , 3 , 8 , 9 , 14 , 15 , 20 , 21 , 26 , 27 , 0 , 1 , 4 , 5 , 6 , 7 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t00 , t01 , 4 , 5 , 10 , 11 , 16 , 17 , 22 , 23 , 28 , 29 , 0 , 1 , 2 , 3 , 6 , 7 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t00 , t01 , 4 , 5 , 10 , 11 , 16 , 17 , 22 , 23 , 28 , 29 , 0 , 1 , 2 , 3 , 6 , 7 ) ;
a . val = wasm_v 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 20 , 21 , 26 , 27 ) ;
a . val = wasm_i 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 20 , 21 , 26 , 27 ) ;
b . val = wasm_v 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 16 , 17 , 22 , 23 , 28 , 29 ) ;
b . val = wasm_i 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 16 , 17 , 22 , 23 , 28 , 29 ) ;
c . val = wasm_v 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 18 , 19 , 24 , 25 , 30 , 31 ) ;
c . val = wasm_i 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 18 , 19 , 24 , 25 , 30 , 31 ) ;
}
}
inline void v_load_deinterleave ( const ushort * ptr , v_uint16x8 & a , v_uint16x8 & b , v_uint16x8 & c , v_uint16x8 & d )
inline void v_load_deinterleave ( const ushort * ptr , v_uint16x8 & a , v_uint16x8 & b , v_uint16x8 & c , v_uint16x8 & d )
@ -2064,15 +2064,15 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
v128_t u2 = wasm_v128_load ( ptr + 16 ) ; // a4 b4 c4 d4 ...
v128_t u2 = wasm_v128_load ( ptr + 16 ) ; // a4 b4 c4 d4 ...
v128_t u3 = wasm_v128_load ( ptr + 24 ) ; // a6 b6 c6 d6 ...
v128_t u3 = wasm_v128_load ( ptr + 24 ) ; // a6 b6 c6 d6 ...
v128_t v0 = wasm_v 8x16_shuffle ( u0 , u1 , 0 , 1 , 8 , 9 , 16 , 17 , 24 , 25 , 2 , 3 , 10 , 11 , 18 , 19 , 26 , 27 ) ; // a0 a1 a2 a3 b0 b1 b2 b3
v128_t v0 = wasm_i 8x16_shuffle ( u0 , u1 , 0 , 1 , 8 , 9 , 16 , 17 , 24 , 25 , 2 , 3 , 10 , 11 , 18 , 19 , 26 , 27 ) ; // a0 a1 a2 a3 b0 b1 b2 b3
v128_t v1 = wasm_v 8x16_shuffle ( u2 , u3 , 0 , 1 , 8 , 9 , 16 , 17 , 24 , 25 , 2 , 3 , 10 , 11 , 18 , 19 , 26 , 27 ) ; // a4 a5 a6 a7 b4 b5 b6 b7
v128_t v1 = wasm_i 8x16_shuffle ( u2 , u3 , 0 , 1 , 8 , 9 , 16 , 17 , 24 , 25 , 2 , 3 , 10 , 11 , 18 , 19 , 26 , 27 ) ; // a4 a5 a6 a7 b4 b5 b6 b7
v128_t v2 = wasm_v 8x16_shuffle ( u0 , u1 , 4 , 5 , 12 , 13 , 20 , 21 , 28 , 29 , 6 , 7 , 14 , 15 , 22 , 23 , 30 , 31 ) ; // c0 c1 c2 c3 d0 d1 d2 d3
v128_t v2 = wasm_i 8x16_shuffle ( u0 , u1 , 4 , 5 , 12 , 13 , 20 , 21 , 28 , 29 , 6 , 7 , 14 , 15 , 22 , 23 , 30 , 31 ) ; // c0 c1 c2 c3 d0 d1 d2 d3
v128_t v3 = wasm_v 8x16_shuffle ( u2 , u3 , 4 , 5 , 12 , 13 , 20 , 21 , 28 , 29 , 6 , 7 , 14 , 15 , 22 , 23 , 30 , 31 ) ; // c4 c5 c6 c7 d4 d5 d6 d7
v128_t v3 = wasm_i 8x16_shuffle ( u2 , u3 , 4 , 5 , 12 , 13 , 20 , 21 , 28 , 29 , 6 , 7 , 14 , 15 , 22 , 23 , 30 , 31 ) ; // c4 c5 c6 c7 d4 d5 d6 d7
a . val = wasm_v 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
a . val = wasm_i 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
b . val = wasm_v 8x16_shuffle ( v0 , v1 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
b . val = wasm_i 8x16_shuffle ( v0 , v1 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
c . val = wasm_v 8x16_shuffle ( v2 , v3 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
c . val = wasm_i 8x16_shuffle ( v2 , v3 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
d . val = wasm_v 8x16_shuffle ( v2 , v3 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
d . val = wasm_i 8x16_shuffle ( v2 , v3 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
}
}
inline void v_load_deinterleave ( const unsigned * ptr , v_uint32x4 & a , v_uint32x4 & b )
inline void v_load_deinterleave ( const unsigned * ptr , v_uint32x4 & a , v_uint32x4 & b )
@ -2080,8 +2080,8 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v128_t v0 = wasm_v128_load ( ptr ) ; // a0 b0 a1 b1
v128_t v0 = wasm_v128_load ( ptr ) ; // a0 b0 a1 b1
v128_t v1 = wasm_v128_load ( ptr + 4 ) ; // a2 b2 a3 b3
v128_t v1 = wasm_v128_load ( ptr + 4 ) ; // a2 b2 a3 b3
a . val = wasm_v 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ; // a0 a1 a2 a3
a . val = wasm_i 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ; // a0 a1 a2 a3
b . val = wasm_v 8x16_shuffle ( v0 , v1 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 , 20 , 21 , 22 , 23 , 28 , 29 , 30 , 31 ) ; // b0 b1 b2 b3
b . val = wasm_i 8x16_shuffle ( v0 , v1 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 , 20 , 21 , 22 , 23 , 28 , 29 , 30 , 31 ) ; // b0 b1 b2 b3
}
}
inline void v_load_deinterleave ( const unsigned * ptr , v_uint32x4 & a , v_uint32x4 & b , v_uint32x4 & c )
inline void v_load_deinterleave ( const unsigned * ptr , v_uint32x4 & a , v_uint32x4 & b , v_uint32x4 & c )
@ -2090,13 +2090,13 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v128_t t01 = wasm_v128_load ( ptr + 4 ) ; // b2 c2 a3 b3
v128_t t01 = wasm_v128_load ( ptr + 4 ) ; // b2 c2 a3 b3
v128_t t02 = wasm_v128_load ( ptr + 8 ) ; // c3 a4 b4 c4
v128_t t02 = wasm_v128_load ( ptr + 8 ) ; // c3 a4 b4 c4
v128_t t10 = wasm_v 8x16_shuffle ( t00 , t01 , 0 , 1 , 2 , 3 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , t01 , 0 , 1 , 2 , 3 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t00 , t01 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 , 0 , 1 , 2 , 3 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t00 , t01 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 , 0 , 1 , 2 , 3 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t00 , t01 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t00 , t01 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
a . val = wasm_v 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 ) ;
a . val = wasm_i 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 ) ;
b . val = wasm_v 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
b . val = wasm_i 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
c . val = wasm_v 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 ) ;
c . val = wasm_i 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 ) ;
}
}
inline void v_load_deinterleave ( const unsigned * ptr , v_uint32x4 & a , v_uint32x4 & b , v_uint32x4 & c , v_uint32x4 & d )
inline void v_load_deinterleave ( const unsigned * ptr , v_uint32x4 & a , v_uint32x4 & b , v_uint32x4 & c , v_uint32x4 & d )
@ -2114,8 +2114,8 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
v128_t v0 = wasm_v128_load ( ptr ) ; // a0 b0 a1 b1
v128_t v0 = wasm_v128_load ( ptr ) ; // a0 b0 a1 b1
v128_t v1 = wasm_v128_load ( ( ptr + 4 ) ) ; // a2 b2 a3 b3
v128_t v1 = wasm_v128_load ( ( ptr + 4 ) ) ; // a2 b2 a3 b3
a . val = wasm_v 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ; // a0 a1 a2 a3
a . val = wasm_i 8x16_shuffle ( v0 , v1 , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 16 , 17 , 18 , 19 , 24 , 25 , 26 , 27 ) ; // a0 a1 a2 a3
b . val = wasm_v 8x16_shuffle ( v0 , v1 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 , 20 , 21 , 22 , 23 , 28 , 29 , 30 , 31 ) ; // b0 b1 b2 b3
b . val = wasm_i 8x16_shuffle ( v0 , v1 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 , 20 , 21 , 22 , 23 , 28 , 29 , 30 , 31 ) ; // b0 b1 b2 b3
}
}
inline void v_load_deinterleave ( const float * ptr , v_float32x4 & a , v_float32x4 & b , v_float32x4 & c )
inline void v_load_deinterleave ( const float * ptr , v_float32x4 & a , v_float32x4 & b , v_float32x4 & c )
@ -2124,13 +2124,13 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
v128_t t01 = wasm_v128_load ( ptr + 4 ) ; // b2 c2 a3 b3
v128_t t01 = wasm_v128_load ( ptr + 4 ) ; // b2 c2 a3 b3
v128_t t02 = wasm_v128_load ( ptr + 8 ) ; // c3 a4 b4 c4
v128_t t02 = wasm_v128_load ( ptr + 8 ) ; // c3 a4 b4 c4
v128_t t10 = wasm_v 8x16_shuffle ( t00 , t01 , 0 , 1 , 2 , 3 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , t01 , 0 , 1 , 2 , 3 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t00 , t01 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 , 0 , 1 , 2 , 3 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t00 , t01 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 , 0 , 1 , 2 , 3 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t00 , t01 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t00 , t01 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
a . val = wasm_v 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 ) ;
a . val = wasm_i 8x16_shuffle ( t10 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 20 , 21 , 22 , 23 ) ;
b . val = wasm_v 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
b . val = wasm_i 8x16_shuffle ( t11 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
c . val = wasm_v 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 ) ;
c . val = wasm_i 8x16_shuffle ( t12 , t02 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 28 , 29 , 30 , 31 ) ;
}
}
inline void v_load_deinterleave ( const float * ptr , v_float32x4 & a , v_float32x4 & b , v_float32x4 & c , v_float32x4 & d )
inline void v_load_deinterleave ( const float * ptr , v_float32x4 & a , v_float32x4 & b , v_float32x4 & c , v_float32x4 & d )
@ -2158,9 +2158,9 @@ inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b,
v128_t t1 = wasm_v128_load ( ptr + 2 ) ; // c0, a1
v128_t t1 = wasm_v128_load ( ptr + 2 ) ; // c0, a1
v128_t t2 = wasm_v128_load ( ptr + 4 ) ; // b1, c1
v128_t t2 = wasm_v128_load ( ptr + 4 ) ; // b1, c1
a . val = wasm_v 8x16_shuffle ( t0 , t1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
a . val = wasm_i 8x16_shuffle ( t0 , t1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
b . val = wasm_v 8x16_shuffle ( t0 , t2 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
b . val = wasm_i 8x16_shuffle ( t0 , t2 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
c . val = wasm_v 8x16_shuffle ( t1 , t2 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
c . val = wasm_i 8x16_shuffle ( t1 , t2 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
}
}
inline void v_load_deinterleave ( const uint64 * ptr , v_uint64x2 & a ,
inline void v_load_deinterleave ( const uint64 * ptr , v_uint64x2 & a ,
@ -2192,13 +2192,13 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
inline void v_store_interleave ( uchar * ptr , const v_uint8x16 & a , const v_uint8x16 & b ,
inline void v_store_interleave ( uchar * ptr , const v_uint8x16 & a , const v_uint8x16 & b ,
const v_uint8x16 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
const v_uint8x16 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
{
{
v128_t t00 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 16 , 0 , 1 , 17 , 0 , 2 , 18 , 0 , 3 , 19 , 0 , 4 , 20 , 0 , 5 ) ;
v128_t t00 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 16 , 0 , 1 , 17 , 0 , 2 , 18 , 0 , 3 , 19 , 0 , 4 , 20 , 0 , 5 ) ;
v128_t t01 = wasm_v 8x16_shuffle ( a . val , b . val , 21 , 0 , 6 , 22 , 0 , 7 , 23 , 0 , 8 , 24 , 0 , 9 , 25 , 0 , 10 , 26 ) ;
v128_t t01 = wasm_i 8x16_shuffle ( a . val , b . val , 21 , 0 , 6 , 22 , 0 , 7 , 23 , 0 , 8 , 24 , 0 , 9 , 25 , 0 , 10 , 26 ) ;
v128_t t02 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 11 , 27 , 0 , 12 , 28 , 0 , 13 , 29 , 0 , 14 , 30 , 0 , 15 , 31 , 0 ) ;
v128_t t02 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 11 , 27 , 0 , 12 , 28 , 0 , 13 , 29 , 0 , 14 , 30 , 0 , 15 , 31 , 0 ) ;
v128_t t10 = wasm_v 8x16_shuffle ( t00 , c . val , 0 , 1 , 16 , 3 , 4 , 17 , 6 , 7 , 18 , 9 , 10 , 19 , 12 , 13 , 20 , 15 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , c . val , 0 , 1 , 16 , 3 , 4 , 17 , 6 , 7 , 18 , 9 , 10 , 19 , 12 , 13 , 20 , 15 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t01 , c . val , 0 , 21 , 2 , 3 , 22 , 5 , 6 , 23 , 8 , 9 , 24 , 11 , 12 , 25 , 14 , 15 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t01 , c . val , 0 , 21 , 2 , 3 , 22 , 5 , 6 , 23 , 8 , 9 , 24 , 11 , 12 , 25 , 14 , 15 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t02 , c . val , 26 , 1 , 2 , 27 , 4 , 5 , 28 , 7 , 8 , 29 , 10 , 11 , 30 , 13 , 14 , 31 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t02 , c . val , 26 , 1 , 2 , 27 , 4 , 5 , 28 , 7 , 8 , 29 , 10 , 11 , 30 , 13 , 14 , 31 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr + 16 , t11 ) ;
wasm_v128_store ( ptr + 16 , t11 ) ;
@ -2243,13 +2243,13 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
const v_uint16x8 & b , const v_uint16x8 & c ,
const v_uint16x8 & b , const v_uint16x8 & c ,
hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
{
{
v128_t t00 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 1 , 16 , 17 , 0 , 0 , 2 , 3 , 18 , 19 , 0 , 0 , 4 , 5 , 20 , 21 ) ;
v128_t t00 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 1 , 16 , 17 , 0 , 0 , 2 , 3 , 18 , 19 , 0 , 0 , 4 , 5 , 20 , 21 ) ;
v128_t t01 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 0 , 6 , 7 , 22 , 23 , 0 , 0 , 8 , 9 , 24 , 25 , 0 , 0 , 10 , 11 ) ;
v128_t t01 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 0 , 6 , 7 , 22 , 23 , 0 , 0 , 8 , 9 , 24 , 25 , 0 , 0 , 10 , 11 ) ;
v128_t t02 = wasm_v 8x16_shuffle ( a . val , b . val , 26 , 27 , 0 , 0 , 12 , 13 , 28 , 29 , 0 , 0 , 14 , 15 , 30 , 31 , 0 , 0 ) ;
v128_t t02 = wasm_i 8x16_shuffle ( a . val , b . val , 26 , 27 , 0 , 0 , 12 , 13 , 28 , 29 , 0 , 0 , 14 , 15 , 30 , 31 , 0 , 0 ) ;
v128_t t10 = wasm_v 8x16_shuffle ( t00 , c . val , 0 , 1 , 2 , 3 , 16 , 17 , 6 , 7 , 8 , 9 , 18 , 19 , 12 , 13 , 14 , 15 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , c . val , 0 , 1 , 2 , 3 , 16 , 17 , 6 , 7 , 8 , 9 , 18 , 19 , 12 , 13 , 14 , 15 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t01 , c . val , 20 , 21 , 2 , 3 , 4 , 5 , 22 , 23 , 8 , 9 , 10 , 11 , 24 , 25 , 14 , 15 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t01 , c . val , 20 , 21 , 2 , 3 , 4 , 5 , 22 , 23 , 8 , 9 , 10 , 11 , 24 , 25 , 14 , 15 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t02 , c . val , 0 , 1 , 26 , 27 , 4 , 5 , 6 , 7 , 28 , 29 , 10 , 11 , 12 , 13 , 30 , 31 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t02 , c . val , 0 , 1 , 26 , 27 , 4 , 5 , 6 , 7 , 28 , 29 , 10 , 11 , 12 , 13 , 30 , 31 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr + 8 , t11 ) ;
wasm_v128_store ( ptr + 8 , t11 ) ;
@ -2293,13 +2293,13 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint
inline void v_store_interleave ( unsigned * ptr , const v_uint32x4 & a , const v_uint32x4 & b ,
inline void v_store_interleave ( unsigned * ptr , const v_uint32x4 & a , const v_uint32x4 & b ,
const v_uint32x4 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
const v_uint32x4 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
{
{
v128_t t00 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 0 , 0 , 0 , 4 , 5 , 6 , 7 ) ;
v128_t t00 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 0 , 0 , 0 , 4 , 5 , 6 , 7 ) ;
v128_t t01 = wasm_v 8x16_shuffle ( a . val , b . val , 20 , 21 , 22 , 23 , 0 , 0 , 0 , 0 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
v128_t t01 = wasm_i 8x16_shuffle ( a . val , b . val , 20 , 21 , 22 , 23 , 0 , 0 , 0 , 0 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
v128_t t02 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 0 , 0 , 0 , 12 , 13 , 14 , 15 , 28 , 29 , 30 , 31 , 0 , 0 , 0 , 0 ) ;
v128_t t02 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 0 , 0 , 0 , 12 , 13 , 14 , 15 , 28 , 29 , 30 , 31 , 0 , 0 , 0 , 0 ) ;
v128_t t10 = wasm_v 8x16_shuffle ( t00 , c . val , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 12 , 13 , 14 , 15 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , c . val , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 12 , 13 , 14 , 15 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t01 , c . val , 0 , 1 , 2 , 3 , 20 , 21 , 22 , 23 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t01 , c . val , 0 , 1 , 2 , 3 , 20 , 21 , 22 , 23 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t02 , c . val , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 28 , 29 , 30 , 31 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t02 , c . val , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 28 , 29 , 30 , 31 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr + 4 , t11 ) ;
wasm_v128_store ( ptr + 4 , t11 ) ;
@ -2333,13 +2333,13 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
inline void v_store_interleave ( float * ptr , const v_float32x4 & a , const v_float32x4 & b ,
inline void v_store_interleave ( float * ptr , const v_float32x4 & a , const v_float32x4 & b ,
const v_float32x4 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
const v_float32x4 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
{
{
v128_t t00 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 0 , 0 , 0 , 4 , 5 , 6 , 7 ) ;
v128_t t00 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 16 , 17 , 18 , 19 , 0 , 0 , 0 , 0 , 4 , 5 , 6 , 7 ) ;
v128_t t01 = wasm_v 8x16_shuffle ( a . val , b . val , 20 , 21 , 22 , 23 , 0 , 0 , 0 , 0 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
v128_t t01 = wasm_i 8x16_shuffle ( a . val , b . val , 20 , 21 , 22 , 23 , 0 , 0 , 0 , 0 , 8 , 9 , 10 , 11 , 24 , 25 , 26 , 27 ) ;
v128_t t02 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 0 , 0 , 0 , 12 , 13 , 14 , 15 , 28 , 29 , 30 , 31 , 0 , 0 , 0 , 0 ) ;
v128_t t02 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 0 , 0 , 0 , 12 , 13 , 14 , 15 , 28 , 29 , 30 , 31 , 0 , 0 , 0 , 0 ) ;
v128_t t10 = wasm_v 8x16_shuffle ( t00 , c . val , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 12 , 13 , 14 , 15 ) ;
v128_t t10 = wasm_i 8x16_shuffle ( t00 , c . val , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 12 , 13 , 14 , 15 ) ;
v128_t t11 = wasm_v 8x16_shuffle ( t01 , c . val , 0 , 1 , 2 , 3 , 20 , 21 , 22 , 23 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
v128_t t11 = wasm_i 8x16_shuffle ( t01 , c . val , 0 , 1 , 2 , 3 , 20 , 21 , 22 , 23 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
v128_t t12 = wasm_v 8x16_shuffle ( t02 , c . val , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 28 , 29 , 30 , 31 ) ;
v128_t t12 = wasm_i 8x16_shuffle ( t02 , c . val , 24 , 25 , 26 , 27 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 28 , 29 , 30 , 31 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr , t10 ) ;
wasm_v128_store ( ptr + 4 , t11 ) ;
wasm_v128_store ( ptr + 4 , t11 ) ;
@ -2372,9 +2372,9 @@ inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x
inline void v_store_interleave ( uint64 * ptr , const v_uint64x2 & a , const v_uint64x2 & b ,
inline void v_store_interleave ( uint64 * ptr , const v_uint64x2 & a , const v_uint64x2 & b ,
const v_uint64x2 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
const v_uint64x2 & c , hal : : StoreMode /*mode*/ = hal : : STORE_UNALIGNED )
{
{
v128_t v0 = wasm_v 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
v128_t v0 = wasm_i 8x16_shuffle ( a . val , b . val , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ) ;
v128_t v1 = wasm_v 8x16_shuffle ( a . val , c . val , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
v128_t v1 = wasm_i 8x16_shuffle ( a . val , c . val , 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
v128_t v2 = wasm_v 8x16_shuffle ( b . val , c . val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
v128_t v2 = wasm_i 8x16_shuffle ( b . val , c . val , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ) ;
wasm_v128_store ( ptr , v0 ) ;
wasm_v128_store ( ptr , v0 ) ;
wasm_v128_store ( ptr + 2 , v1 ) ;
wasm_v128_store ( ptr + 2 , v1 ) ;
@ -2687,45 +2687,45 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo
inline v_int8x16 v_interleave_pairs ( const v_int8x16 & vec )
inline v_int8x16 v_interleave_pairs ( const v_int8x16 & vec )
{
{
return v_int8x16 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 2 , 1 , 3 , 4 , 6 , 5 , 7 , 8 , 10 , 9 , 11 , 12 , 14 , 13 , 15 ) ) ;
return v_int8x16 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 2 , 1 , 3 , 4 , 6 , 5 , 7 , 8 , 10 , 9 , 11 , 12 , 14 , 13 , 15 ) ) ;
}
}
inline v_uint8x16 v_interleave_pairs ( const v_uint8x16 & vec ) { return v_reinterpret_as_u8 ( v_interleave_pairs ( v_reinterpret_as_s8 ( vec ) ) ) ; }
inline v_uint8x16 v_interleave_pairs ( const v_uint8x16 & vec ) { return v_reinterpret_as_u8 ( v_interleave_pairs ( v_reinterpret_as_s8 ( vec ) ) ) ; }
inline v_int8x16 v_interleave_quads ( const v_int8x16 & vec )
inline v_int8x16 v_interleave_quads ( const v_int8x16 & vec )
{
{
return v_int8x16 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 4 , 1 , 5 , 2 , 6 , 3 , 7 , 8 , 12 , 9 , 13 , 10 , 14 , 11 , 15 ) ) ;
return v_int8x16 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 4 , 1 , 5 , 2 , 6 , 3 , 7 , 8 , 12 , 9 , 13 , 10 , 14 , 11 , 15 ) ) ;
}
}
inline v_uint8x16 v_interleave_quads ( const v_uint8x16 & vec ) { return v_reinterpret_as_u8 ( v_interleave_quads ( v_reinterpret_as_s8 ( vec ) ) ) ; }
inline v_uint8x16 v_interleave_quads ( const v_uint8x16 & vec ) { return v_reinterpret_as_u8 ( v_interleave_quads ( v_reinterpret_as_s8 ( vec ) ) ) ; }
inline v_int16x8 v_interleave_pairs ( const v_int16x8 & vec )
inline v_int16x8 v_interleave_pairs ( const v_int16x8 & vec )
{
{
return v_int16x8 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 4 , 5 , 2 , 3 , 6 , 7 , 8 , 9 , 12 , 13 , 10 , 11 , 14 , 15 ) ) ;
return v_int16x8 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 4 , 5 , 2 , 3 , 6 , 7 , 8 , 9 , 12 , 13 , 10 , 11 , 14 , 15 ) ) ;
}
}
inline v_uint16x8 v_interleave_pairs ( const v_uint16x8 & vec ) { return v_reinterpret_as_u16 ( v_interleave_pairs ( v_reinterpret_as_s16 ( vec ) ) ) ; }
inline v_uint16x8 v_interleave_pairs ( const v_uint16x8 & vec ) { return v_reinterpret_as_u16 ( v_interleave_pairs ( v_reinterpret_as_s16 ( vec ) ) ) ; }
inline v_int16x8 v_interleave_quads ( const v_int16x8 & vec )
inline v_int16x8 v_interleave_quads ( const v_int16x8 & vec )
{
{
return v_int16x8 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 8 , 9 , 2 , 3 , 10 , 11 , 4 , 5 , 12 , 13 , 6 , 7 , 14 , 15 ) ) ;
return v_int16x8 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 8 , 9 , 2 , 3 , 10 , 11 , 4 , 5 , 12 , 13 , 6 , 7 , 14 , 15 ) ) ;
}
}
inline v_uint16x8 v_interleave_quads ( const v_uint16x8 & vec ) { return v_reinterpret_as_u16 ( v_interleave_quads ( v_reinterpret_as_s16 ( vec ) ) ) ; }
inline v_uint16x8 v_interleave_quads ( const v_uint16x8 & vec ) { return v_reinterpret_as_u16 ( v_interleave_quads ( v_reinterpret_as_s16 ( vec ) ) ) ; }
inline v_int32x4 v_interleave_pairs ( const v_int32x4 & vec )
inline v_int32x4 v_interleave_pairs ( const v_int32x4 & vec )
{
{
return v_int32x4 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 ) ) ;
return v_int32x4 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 ) ) ;
}
}
inline v_uint32x4 v_interleave_pairs ( const v_uint32x4 & vec ) { return v_reinterpret_as_u32 ( v_interleave_pairs ( v_reinterpret_as_s32 ( vec ) ) ) ; }
inline v_uint32x4 v_interleave_pairs ( const v_uint32x4 & vec ) { return v_reinterpret_as_u32 ( v_interleave_pairs ( v_reinterpret_as_s32 ( vec ) ) ) ; }
inline v_float32x4 v_interleave_pairs ( const v_float32x4 & vec )
inline v_float32x4 v_interleave_pairs ( const v_float32x4 & vec )
{
{
return v_float32x4 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 ) ) ;
return v_float32x4 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 3 , 8 , 9 , 10 , 11 , 4 , 5 , 6 , 7 , 12 , 13 , 14 , 15 ) ) ;
}
}
inline v_int8x16 v_pack_triplets ( const v_int8x16 & vec )
inline v_int8x16 v_pack_triplets ( const v_int8x16 & vec )
{
{
return v_int8x16 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 4 , 5 , 6 , 8 , 9 , 10 , 12 , 13 , 14 , 16 , 16 , 16 , 16 ) ) ;
return v_int8x16 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 4 , 5 , 6 , 8 , 9 , 10 , 12 , 13 , 14 , 16 , 16 , 16 , 16 ) ) ;
}
}
inline v_uint8x16 v_pack_triplets ( const v_uint8x16 & vec ) { return v_reinterpret_as_u8 ( v_pack_triplets ( v_reinterpret_as_s8 ( vec ) ) ) ; }
inline v_uint8x16 v_pack_triplets ( const v_uint8x16 & vec ) { return v_reinterpret_as_u8 ( v_pack_triplets ( v_reinterpret_as_s8 ( vec ) ) ) ; }
inline v_int16x8 v_pack_triplets ( const v_int16x8 & vec )
inline v_int16x8 v_pack_triplets ( const v_int16x8 & vec )
{
{
return v_int16x8 ( wasm_v 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 3 , 4 , 5 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 6 , 7 ) ) ;
return v_int16x8 ( wasm_i 8x16_shuffle ( vec . val , vec . val , 0 , 1 , 2 , 3 , 4 , 5 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 6 , 7 ) ) ;
}
}
inline v_uint16x8 v_pack_triplets ( const v_uint16x8 & vec ) { return v_reinterpret_as_u16 ( v_pack_triplets ( v_reinterpret_as_s16 ( vec ) ) ) ; }
inline v_uint16x8 v_pack_triplets ( const v_uint16x8 & vec ) { return v_reinterpret_as_u16 ( v_pack_triplets ( v_reinterpret_as_s16 ( vec ) ) ) ; }