Merge pull request #26537 from artoolkitx:emscripten-build-fixes

Emscripten build fixes #26537

- Corrects typo in Emscripten-only intrinsics header (Fixes https://github.com/opencv/opencv/issues/26536)
- Updates deprecated intrinsic title (as per LLVM final intrinsic name).

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
pull/26546/head^2
Philip Lamb 5 months ago committed by GitHub
parent a27d749471
commit a5f8711ce1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      modules/core/include/opencv2/core/hal/intrin_lasx.hpp
  2. 296
      modules/core/include/opencv2/core/hal/intrin_wasm.hpp

@ -906,7 +906,7 @@ inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return
{ return _Tpuvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ { return _Tpuvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \ inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \
{ return _Tpsvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ { return _Tpsvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
inline _Tpuvec V_shr(const _Tpuvec& a, int imm) \ inline _Tpuvec v_shr(const _Tpuvec& a, int imm) \
{ return _Tpuvec(__lasx_xvsrl_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ { return _Tpuvec(__lasx_xvsrl_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \
inline _Tpsvec v_shr(const _Tpsvec& a, int imm) \ inline _Tpsvec v_shr(const _Tpsvec& a, int imm) \
{ return _Tpsvec(srai(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ { return _Tpsvec(srai(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \

@ -304,35 +304,35 @@ static const unsigned char popCountTable[] =
} // namespace } // namespace
static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); return wasm_i8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23);
} }
static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) { static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23); return wasm_i8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23);
} }
static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) { static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23); return wasm_i8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23);
} }
static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) { static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); return wasm_i8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
} }
static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) { static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31); return wasm_i8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31);
} }
static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) { static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31); return wasm_i8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31);
} }
static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) { static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31); return wasm_i8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31);
} }
static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) { static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) {
return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); return wasm_i8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
} }
/** Convert **/ /** Convert **/
@ -423,7 +423,7 @@ inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b)
v128_t maxval = wasm_i16x8_splat(255); v128_t maxval = wasm_i16x8_splat(255);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_uint8x16(wasm_i8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
{ {
@ -433,14 +433,14 @@ inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b)
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_int8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b)
{ {
v128_t maxval = wasm_i32x4_splat(65535); v128_t maxval = wasm_i32x4_splat(65535);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); return v_uint16x8(wasm_i8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
} }
inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
{ {
@ -450,15 +450,15 @@ inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b)
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); return v_int16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
} }
inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b)
{ {
return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); return v_uint32x4(wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
} }
inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b)
{ {
return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); return v_int32x4(wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
} }
inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
{ {
@ -468,7 +468,7 @@ inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b)
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval));
return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_uint8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
{ {
@ -478,7 +478,7 @@ inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b)
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval));
return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); return v_uint16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
} }
template<int n> template<int n>
@ -490,7 +490,7 @@ inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b)
v128_t maxval = wasm_i16x8_splat(255); v128_t maxval = wasm_i16x8_splat(255);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval));
return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_uint8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
template<int n> template<int n>
inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
@ -504,7 +504,7 @@ inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b)
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_int8x16(wasm_i8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
template<int n> template<int n>
inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
@ -515,7 +515,7 @@ inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b)
v128_t maxval = wasm_i32x4_splat(65535); v128_t maxval = wasm_i32x4_splat(65535);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval));
return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); return v_uint16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
} }
template<int n> template<int n>
inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
@ -529,7 +529,7 @@ inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b)
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); return v_int16x8(wasm_i8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
} }
template<int n> template<int n>
inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
@ -537,7 +537,7 @@ inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b)
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n); v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n);
return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); return v_uint32x4(wasm_i8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
} }
template<int n> template<int n>
inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
@ -545,7 +545,7 @@ inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b)
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n); v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n);
return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); return v_int32x4(wasm_i8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27));
} }
template<int n> template<int n>
inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
@ -559,7 +559,7 @@ inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b)
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval));
return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_uint8x16(wasm_i8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
template<int n> template<int n>
inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
@ -573,14 +573,14 @@ inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b)
v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval));
return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); return v_uint16x8(wasm_i8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29));
} }
inline void v_pack_store(uchar* ptr, const v_uint16x8& a) inline void v_pack_store(uchar* ptr, const v_uint16x8& a)
{ {
v128_t maxval = wasm_i16x8_splat(255); v128_t maxval = wasm_i16x8_splat(255);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); v128_t r = wasm_i8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
uchar t_ptr[16]; uchar t_ptr[16];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<8; ++i) { for (int i=0; i<8; ++i) {
@ -593,7 +593,7 @@ inline void v_pack_store(schar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat(-128); v128_t minval = wasm_i16x8_splat(-128);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
schar t_ptr[16]; schar t_ptr[16];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<8; ++i) { for (int i=0; i<8; ++i) {
@ -604,7 +604,7 @@ inline void v_pack_store(ushort* ptr, const v_uint32x4& a)
{ {
v128_t maxval = wasm_i32x4_splat(65535); v128_t maxval = wasm_i32x4_splat(65535);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval));
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
ushort t_ptr[8]; ushort t_ptr[8];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<4; ++i) { for (int i=0; i<4; ++i) {
@ -617,7 +617,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat(-32768); v128_t minval = wasm_i32x4_splat(-32768);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
short t_ptr[8]; short t_ptr[8];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<4; ++i) { for (int i=0; i<4; ++i) {
@ -626,7 +626,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a)
} }
inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
{ {
v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); v128_t r = wasm_i8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
unsigned t_ptr[4]; unsigned t_ptr[4];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<2; ++i) { for (int i=0; i<2; ++i) {
@ -635,7 +635,7 @@ inline void v_pack_store(unsigned* ptr, const v_uint64x2& a)
} }
inline void v_pack_store(int* ptr, const v_int64x2& a) inline void v_pack_store(int* ptr, const v_int64x2& a)
{ {
v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); v128_t r = wasm_i8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
int t_ptr[4]; int t_ptr[4];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<2; ++i) { for (int i=0; i<2; ++i) {
@ -648,7 +648,7 @@ inline void v_pack_u_store(uchar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat(0); v128_t minval = wasm_i16x8_splat(0);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
uchar t_ptr[16]; uchar t_ptr[16];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<8; ++i) { for (int i=0; i<8; ++i) {
@ -661,7 +661,7 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat(0); v128_t minval = wasm_i32x4_splat(0);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval));
v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
ushort t_ptr[8]; ushort t_ptr[8];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<4; ++i) { for (int i=0; i<4; ++i) {
@ -676,7 +676,7 @@ inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a)
v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n);
v128_t maxval = wasm_i16x8_splat(255); v128_t maxval = wasm_i16x8_splat(255);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval));
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
uchar t_ptr[16]; uchar t_ptr[16];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<8; ++i) { for (int i=0; i<8; ++i) {
@ -692,7 +692,7 @@ inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat(-128); v128_t minval = wasm_i16x8_splat(-128);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); v128_t r = wasm_i8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
schar t_ptr[16]; schar t_ptr[16];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<8; ++i) { for (int i=0; i<8; ++i) {
@ -706,7 +706,7 @@ inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a)
v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n);
v128_t maxval = wasm_i32x4_splat(65535); v128_t maxval = wasm_i32x4_splat(65535);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval));
v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
ushort t_ptr[8]; ushort t_ptr[8];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<4; ++i) { for (int i=0; i<4; ++i) {
@ -722,7 +722,7 @@ inline void v_rshr_pack_store(short* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat(-32768); v128_t minval = wasm_i32x4_splat(-32768);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); v128_t r = wasm_i8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
short t_ptr[8]; short t_ptr[8];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<4; ++i) { for (int i=0; i<4; ++i) {
@ -734,7 +734,7 @@ inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a)
{ {
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n);
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
unsigned t_ptr[4]; unsigned t_ptr[4];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<2; ++i) { for (int i=0; i<2; ++i) {
@ -746,7 +746,7 @@ inline void v_rshr_pack_store(int* ptr, const v_int64x2& a)
{ {
v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1)));
v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n);
v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11);
int t_ptr[4]; int t_ptr[4];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<2; ++i) { for (int i=0; i<2; ++i) {
@ -762,7 +762,7 @@ inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a)
v128_t minval = wasm_i16x8_splat(0); v128_t minval = wasm_i16x8_splat(0);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); v128_t r = wasm_i8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14);
uchar t_ptr[16]; uchar t_ptr[16];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<8; ++i) { for (int i=0; i<8; ++i) {
@ -778,7 +778,7 @@ inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a)
v128_t minval = wasm_i32x4_splat(0); v128_t minval = wasm_i32x4_splat(0);
v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval));
v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval));
v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); v128_t r = wasm_i8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13);
ushort t_ptr[8]; ushort t_ptr[8];
wasm_v128_store(t_ptr, r); wasm_v128_store(t_ptr, r);
for (int i=0; i<4; ++i) { for (int i=0; i<4; ++i) {
@ -791,7 +791,7 @@ inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b)
v128_t maxval = wasm_i16x8_splat(255); v128_t maxval = wasm_i16x8_splat(255);
v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval));
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval));
return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); return v_uint8x16(wasm_i8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30));
} }
inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
@ -802,9 +802,9 @@ inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b,
v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval));
v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval)); v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval));
v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval)); v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval));
v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); v128_t ab = wasm_i8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); v128_t cd = wasm_i8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28);
return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); return v_uint8x16(wasm_i8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
} }
inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c,
@ -820,13 +820,13 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin
v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval)); v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval));
v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval)); v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval));
v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval)); v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval));
v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); v128_t ab = wasm_i8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); v128_t cd = wasm_i8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); v128_t ef = wasm_i8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); v128_t gh = wasm_i8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24);
v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); v128_t abcd = wasm_i8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); v128_t efgh = wasm_i8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19);
return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); return v_uint8x16(wasm_i8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23));
} }
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
@ -964,7 +964,7 @@ inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
v_expand(b, b0, b1); v_expand(b, b0, b1);
v128_t c = wasm_i32x4_mul(a0.val, b0.val); v128_t c = wasm_i32x4_mul(a0.val, b0.val);
v128_t d = wasm_i32x4_mul(a1.val, b1.val); v128_t d = wasm_i32x4_mul(a1.val, b1.val);
return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); return v_int16x8(wasm_i8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
} }
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
{ {
@ -973,7 +973,7 @@ inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
v_expand(b, b0, b1); v_expand(b, b0, b1);
v128_t c = wasm_i32x4_mul(a0.val, b0.val); v128_t c = wasm_i32x4_mul(a0.val, b0.val);
v128_t d = wasm_i32x4_mul(a1.val, b1.val); v128_t d = wasm_i32x4_mul(a1.val, b1.val);
return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); return v_uint16x8(wasm_i8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31));
} }
//////// Dot Product //////// //////// Dot Product ////////
@ -1398,7 +1398,7 @@ inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \
{ \ { \
return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \
} \ } \
inline _Tpuvec V_shr(const _Tpuvec& a, int imm) \ inline _Tpuvec v_shr(const _Tpuvec& a, int imm) \
{ \ { \
return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \
} \ } \
@ -1471,7 +1471,7 @@ namespace hal_wasm_internal
inline v128_t operator()(const v128_t& a, const v128_t& b) const inline v128_t operator()(const v128_t& a, const v128_t& b) const
{ {
enum { imm2 = (sizeof(v128_t) - imm) }; enum { imm2 = (sizeof(v128_t) - imm) };
return wasm_v8x16_shuffle(a, b, return wasm_i8x16_shuffle(a, b,
imm, imm+1, imm+2, imm+3, imm, imm+1, imm+2, imm+3,
imm+4, imm+5, imm+6, imm+7, imm+4, imm+5, imm+6, imm+7,
imm+8, imm+9, imm+10, imm+11, imm+8, imm+9, imm+10, imm+11,
@ -1582,19 +1582,19 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double)
/** Reverse **/ /** Reverse **/
inline v_uint8x16 v_reverse(const v_uint8x16 &a) inline v_uint8x16 v_reverse(const v_uint8x16 &a)
{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } { return v_uint8x16(wasm_i8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); }
inline v_int8x16 v_reverse(const v_int8x16 &a) inline v_int8x16 v_reverse(const v_int8x16 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } { return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x8 v_reverse(const v_uint16x8 &a) inline v_uint16x8 v_reverse(const v_uint16x8 &a)
{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } { return v_uint16x8(wasm_i8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); }
inline v_int16x8 v_reverse(const v_int16x8 &a) inline v_int16x8 v_reverse(const v_int16x8 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } { return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x4 v_reverse(const v_uint32x4 &a) inline v_uint32x4 v_reverse(const v_uint32x4 &a)
{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } { return v_uint32x4(wasm_i8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); }
inline v_int32x4 v_reverse(const v_int32x4 &a) inline v_int32x4 v_reverse(const v_int32x4 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } { return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
@ -1603,7 +1603,7 @@ inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } { return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x2 v_reverse(const v_uint64x2 &a) inline v_uint64x2 v_reverse(const v_uint64x2 &a)
{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } { return v_uint64x2(wasm_i8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); }
inline v_int64x2 v_reverse(const v_int64x2 &a) inline v_int64x2 v_reverse(const v_int64x2 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } { return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
@ -1616,8 +1616,8 @@ inline v_float64x2 v_reverse(const v_float64x2 &a)
inline scalartype v_reduce_sum(const _Tpvec& a) \ inline scalartype v_reduce_sum(const _Tpvec& a) \
{ \ { \
regtype val = a.val; \ regtype val = a.val; \
val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \ val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \
return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
} }
@ -1649,7 +1649,7 @@ OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int)
inline scalartype v_reduce_sum(const _Tpvec& a) \ inline scalartype v_reduce_sum(const _Tpvec& a) \
{ \ { \
regtype val = a.val; \ regtype val = a.val; \
val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \
return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \
} }
OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2) OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2)
@ -1996,8 +1996,8 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
v128_t t00 = wasm_v128_load(ptr); v128_t t00 = wasm_v128_load(ptr);
v128_t t01 = wasm_v128_load(ptr + 16); v128_t t01 = wasm_v128_load(ptr + 16);
a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); a.val = wasm_i8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30);
b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); b.val = wasm_i8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31);
} }
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
@ -2006,13 +2006,13 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
v128_t t01 = wasm_v128_load(ptr + 16); v128_t t01 = wasm_v128_load(ptr + 16);
v128_t t02 = wasm_v128_load(ptr + 32); v128_t t02 = wasm_v128_load(ptr + 32);
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7); v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7);
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6); v128_t t11 = wasm_i8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6);
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7); v128_t t12 = wasm_i8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7);
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29); a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29);
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30); b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30);
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31); c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31);
} }
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d)
@ -2022,15 +2022,15 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ... v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ...
v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ... v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ...
v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); v128_t v0 = wasm_i8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); v128_t v1 = wasm_i8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29);
v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); v128_t v2 = wasm_i8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); v128_t v3 = wasm_i8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31);
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); b.val = wasm_i8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); c.val = wasm_i8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); d.val = wasm_i8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
} }
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
@ -2038,8 +2038,8 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3 v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3
v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7 v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7 a.val = wasm_i8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7
b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7 b.val = wasm_i8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7
} }
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
@ -2048,13 +2048,13 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5 v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5
v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7 v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5); v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5);
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7); v128_t t11 = wasm_i8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7);
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7); v128_t t12 = wasm_i8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7);
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27); a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27);
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29); b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29);
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31); c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31);
} }
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
@ -2064,15 +2064,15 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ...
v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ... v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ...
v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3 v128_t v0 = wasm_i8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3
v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7 v128_t v1 = wasm_i8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7
v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3 v128_t v2 = wasm_i8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3
v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7 v128_t v3 = wasm_i8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); b.val = wasm_i8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); c.val = wasm_i8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); d.val = wasm_i8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
} }
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b)
@ -2080,8 +2080,8 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1
v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3 v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 b.val = wasm_i8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
} }
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
@ -2090,13 +2090,13 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3
v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); v128_t t11 = wasm_i8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); v128_t t12 = wasm_i8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
} }
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
@ -2114,8 +2114,8 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1
v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3 v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3
a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3
b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 b.val = wasm_i8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3
} }
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
@ -2124,13 +2124,13 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3
v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4
v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7);
v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); v128_t t11 = wasm_i8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3);
v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); v128_t t12 = wasm_i8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7);
a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23);
b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27);
c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31);
} }
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d)
@ -2158,9 +2158,9 @@ inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b,
v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1 v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1
v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1 v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1
a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); a.val = wasm_i8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23); b.val = wasm_i8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23);
c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); c.val = wasm_i8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31);
} }
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
@ -2192,13 +2192,13 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
{ {
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5); v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5);
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26); v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26);
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0); v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0);
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15); v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15);
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15); v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15);
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31); v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31);
wasm_v128_store(ptr, t10); wasm_v128_store(ptr, t10);
wasm_v128_store(ptr + 16, t11); wasm_v128_store(ptr + 16, t11);
@ -2243,13 +2243,13 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
const v_uint16x8& b, const v_uint16x8& c, const v_uint16x8& b, const v_uint16x8& c,
hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
{ {
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21); v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21);
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11); v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11);
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0); v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0);
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15); v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15);
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15); v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15);
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31); v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31);
wasm_v128_store(ptr, t10); wasm_v128_store(ptr, t10);
wasm_v128_store(ptr + 8, t11); wasm_v128_store(ptr + 8, t11);
@ -2293,13 +2293,13 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
{ {
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
wasm_v128_store(ptr, t10); wasm_v128_store(ptr, t10);
wasm_v128_store(ptr + 4, t11); wasm_v128_store(ptr + 4, t11);
@ -2333,13 +2333,13 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
{ {
v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7);
v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27);
v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0);
v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15);
v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15);
v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31);
wasm_v128_store(ptr, t10); wasm_v128_store(ptr, t10);
wasm_v128_store(ptr + 4, t11); wasm_v128_store(ptr + 4, t11);
@ -2372,9 +2372,9 @@ inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED)
{ {
v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); v128_t v0 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15); v128_t v1 = wasm_i8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15);
v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); v128_t v2 = wasm_i8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31);
wasm_v128_store(ptr, v0); wasm_v128_store(ptr, v0);
wasm_v128_store(ptr + 2, v1); wasm_v128_store(ptr + 2, v1);
@ -2687,45 +2687,45 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo
inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) inline v_int8x16 v_interleave_pairs(const v_int8x16& vec)
{ {
return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15)); return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15));
} }
inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); }
inline v_int8x16 v_interleave_quads(const v_int8x16& vec) inline v_int8x16 v_interleave_quads(const v_int8x16& vec)
{ {
return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15));
} }
inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); }
inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
{ {
return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15)); return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15));
} }
inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); }
inline v_int16x8 v_interleave_quads(const v_int16x8& vec) inline v_int16x8 v_interleave_quads(const v_int16x8& vec)
{ {
return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15)); return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15));
} }
inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); }
inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) inline v_int32x4 v_interleave_pairs(const v_int32x4& vec)
{ {
return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); return v_int32x4(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
} }
inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); }
inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) inline v_float32x4 v_interleave_pairs(const v_float32x4& vec)
{ {
return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); return v_float32x4(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15));
} }
inline v_int8x16 v_pack_triplets(const v_int8x16& vec) inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
{ {
return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16)); return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16));
} }
inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); }
inline v_int16x8 v_pack_triplets(const v_int16x8& vec) inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
{ {
return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7)); return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7));
} }
inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }

Loading…
Cancel
Save