From a5f8711ce127fa3f8c57db74f5038d23cf83bbb2 Mon Sep 17 00:00:00 2001 From: Philip Lamb Date: Thu, 28 Nov 2024 19:25:01 +1300 Subject: [PATCH] Merge pull request #26537 from artoolkitx:emscripten-build-fixes Emscripten build fixes #26537 - Corrects typo in Emscripten-only intrinsics header (Fixes https://github.com/opencv/opencv/issues/26536) - Updates deprecated intrinsic title (as per LLVM final intrinsic name). ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../include/opencv2/core/hal/intrin_lasx.hpp | 2 +- .../include/opencv2/core/hal/intrin_wasm.hpp | 296 +++++++++--------- 2 files changed, 149 insertions(+), 149 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp index 68d08b2ef4..3661b7ef32 100644 --- a/modules/core/include/opencv2/core/hal/intrin_lasx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_lasx.hpp @@ -906,7 +906,7 @@ inline v_uint16x16 v_mul_hi(const v_uint16x16& a, const v_uint16x16& b) { return { return _Tpuvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \ { return _Tpsvec(__lasx_xvsll_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ - inline _Tpuvec V_shr(const _Tpuvec& a, int imm) \ + inline _Tpuvec v_shr(const _Tpuvec& a, int imm) \ { return _Tpuvec(__lasx_xvsrl_##suffix(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ inline _Tpsvec v_shr(const _Tpsvec& a, int imm) \ { return _Tpsvec(srai(a.val, __lasx_xvreplgr2vr_##suffix(imm))); } \ diff --git a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp index 70198451c0..7c4d8e05df 100644 --- a/modules/core/include/opencv2/core/hal/intrin_wasm.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_wasm.hpp @@ -304,35 +304,35 @@ static const unsigned char popCountTable[] = } // namespace static v128_t wasm_unpacklo_i8x16(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); + return wasm_i8x16_shuffle(a, b, 0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23); } static v128_t wasm_unpacklo_i16x8(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23); + return wasm_i8x16_shuffle(a, b, 0,1,16,17,2,3,18,19,4,5,20,21,6,7,22,23); } static v128_t wasm_unpacklo_i32x4(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23); + return wasm_i8x16_shuffle(a, b, 0,1,2,3,16,17,18,19,4,5,6,7,20,21,22,23); } static v128_t wasm_unpacklo_i64x2(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + return wasm_i8x16_shuffle(a, b, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); } static v128_t wasm_unpackhi_i8x16(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31); + return wasm_i8x16_shuffle(a, b, 8,24,9,25,10,26,11,27,12,28,13,29,14,30,15,31); } static v128_t wasm_unpackhi_i16x8(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31); + return wasm_i8x16_shuffle(a, b, 8,9,24,25,10,11,26,27,12,13,28,29,14,15,30,31); } static v128_t wasm_unpackhi_i32x4(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31); + return wasm_i8x16_shuffle(a, b, 8,9,10,11,24,25,26,27,12,13,14,15,28,29,30,31); } static v128_t wasm_unpackhi_i64x2(v128_t a, v128_t b) { - return wasm_v8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + return wasm_i8x16_shuffle(a, b, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); } /** Convert **/ @@ -423,7 +423,7 @@ inline v_uint8x16 v_pack(const v_uint16x8& a, const v_uint16x8& b) v128_t maxval = wasm_i16x8_splat(255); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); - return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_uint8x16(wasm_i8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) { @@ -433,14 +433,14 @@ inline v_int8x16 v_pack(const v_int16x8& a, const v_int16x8& b) v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); - return v_int8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_int8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } inline v_uint16x8 v_pack(const v_uint32x4& a, const v_uint32x4& b) { v128_t maxval = wasm_i32x4_splat(65535); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); - return v_uint16x8(wasm_v8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); + return v_uint16x8(wasm_i8x16_shuffle(a1, b1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); } inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) { @@ -450,15 +450,15 @@ inline v_int16x8 v_pack(const v_int32x4& a, const v_int32x4& b) v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); - return v_int16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); + return v_int16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); } inline v_uint32x4 v_pack(const v_uint64x2& a, const v_uint64x2& b) { - return v_uint32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); + return v_uint32x4(wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); } inline v_int32x4 v_pack(const v_int64x2& a, const v_int64x2& b) { - return v_int32x4(wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); + return v_int32x4(wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); } inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) { @@ -468,7 +468,7 @@ inline v_uint8x16 v_pack_u(const v_int16x8& a, const v_int16x8& b) v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i16x8_gt(b.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i16x8_lt(b1, minval)); - return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_uint8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) { @@ -478,7 +478,7 @@ inline v_uint16x8 v_pack_u(const v_int32x4& a, const v_int32x4& b) v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_i32x4_gt(b.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); v128_t b2 = wasm_v128_bitselect(minval, b1, wasm_i32x4_lt(b1, minval)); - return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); + return v_uint16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); } template @@ -490,7 +490,7 @@ inline v_uint8x16 v_rshr_pack(const v_uint16x8& a, const v_uint16x8& b) v128_t maxval = wasm_i16x8_splat(255); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u16x8_gt(b1, maxval)); - return v_uint8x16(wasm_v8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_uint8x16(wasm_i8x16_shuffle(a2, b2, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } template inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) @@ -504,7 +504,7 @@ inline v_int8x16 v_rshr_pack(const v_int16x8& a, const v_int16x8& b) v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); - return v_int8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_int8x16(wasm_i8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } template inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) @@ -515,7 +515,7 @@ inline v_uint16x8 v_rshr_pack(const v_uint32x4& a, const v_uint32x4& b) v128_t maxval = wasm_i32x4_splat(65535); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_u32x4_gt(b1, maxval)); - return v_uint16x8(wasm_v8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); + return v_uint16x8(wasm_i8x16_shuffle(a2, b2, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); } template inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) @@ -529,7 +529,7 @@ inline v_int16x8 v_rshr_pack(const v_int32x4& a, const v_int32x4& b) v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); - return v_int16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); + return v_int16x8(wasm_i8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); } template inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) @@ -537,7 +537,7 @@ inline v_uint32x4 v_rshr_pack(const v_uint64x2& a, const v_uint64x2& b) v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); v128_t b1 = wasm_u64x2_shr(wasm_i64x2_add(b.val, delta), n); - return v_uint32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); + return v_uint32x4(wasm_i8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); } template inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) @@ -545,7 +545,7 @@ inline v_int32x4 v_rshr_pack(const v_int64x2& a, const v_int64x2& b) v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); v128_t b1 = wasm_i64x2_shr(wasm_i64x2_add(b.val, delta), n); - return v_int32x4(wasm_v8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); + return v_int32x4(wasm_i8x16_shuffle(a1, b1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27)); } template inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) @@ -559,7 +559,7 @@ inline v_uint8x16 v_rshr_pack_u(const v_int16x8& a, const v_int16x8& b) v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i16x8_gt(b1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i16x8_lt(b1, minval)); - return v_uint8x16(wasm_v8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_uint8x16(wasm_i8x16_shuffle(a3, b3, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } template inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) @@ -573,14 +573,14 @@ inline v_uint16x8 v_rshr_pack_u(const v_int32x4& a, const v_int32x4& b) v128_t b2 = wasm_v128_bitselect(maxval, b1, wasm_i32x4_gt(b1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); v128_t b3 = wasm_v128_bitselect(minval, b2, wasm_i32x4_lt(b1, minval)); - return v_uint16x8(wasm_v8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); + return v_uint16x8(wasm_i8x16_shuffle(a3, b3, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29)); } inline void v_pack_store(uchar* ptr, const v_uint16x8& a) { v128_t maxval = wasm_i16x8_splat(255); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); - v128_t r = wasm_v8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + v128_t r = wasm_i8x16_shuffle(a1, a1, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); uchar t_ptr[16]; wasm_v128_store(t_ptr, r); for (int i=0; i<8; ++i) { @@ -593,7 +593,7 @@ inline void v_pack_store(schar* ptr, const v_int16x8& a) v128_t minval = wasm_i16x8_splat(-128); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); schar t_ptr[16]; wasm_v128_store(t_ptr, r); for (int i=0; i<8; ++i) { @@ -604,7 +604,7 @@ inline void v_pack_store(ushort* ptr, const v_uint32x4& a) { v128_t maxval = wasm_i32x4_splat(65535); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u32x4_gt(a.val, maxval)); - v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); ushort t_ptr[8]; wasm_v128_store(t_ptr, r); for (int i=0; i<4; ++i) { @@ -617,7 +617,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a) v128_t minval = wasm_i32x4_splat(-32768); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); short t_ptr[8]; wasm_v128_store(t_ptr, r); for (int i=0; i<4; ++i) { @@ -626,7 +626,7 @@ inline void v_pack_store(short* ptr, const v_int32x4& a) } inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) { - v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + v128_t r = wasm_i8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); unsigned t_ptr[4]; wasm_v128_store(t_ptr, r); for (int i=0; i<2; ++i) { @@ -635,7 +635,7 @@ inline void v_pack_store(unsigned* ptr, const v_uint64x2& a) } inline void v_pack_store(int* ptr, const v_int64x2& a) { - v128_t r = wasm_v8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + v128_t r = wasm_i8x16_shuffle(a.val, a.val, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); int t_ptr[4]; wasm_v128_store(t_ptr, r); for (int i=0; i<2; ++i) { @@ -648,7 +648,7 @@ inline void v_pack_u_store(uchar* ptr, const v_int16x8& a) v128_t minval = wasm_i16x8_splat(0); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i16x8_gt(a.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i16x8_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); uchar t_ptr[16]; wasm_v128_store(t_ptr, r); for (int i=0; i<8; ++i) { @@ -661,7 +661,7 @@ inline void v_pack_u_store(ushort* ptr, const v_int32x4& a) v128_t minval = wasm_i32x4_splat(0); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_i32x4_gt(a.val, maxval)); v128_t a2 = wasm_v128_bitselect(minval, a1, wasm_i32x4_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); ushort t_ptr[8]; wasm_v128_store(t_ptr, r); for (int i=0; i<4; ++i) { @@ -676,7 +676,7 @@ inline void v_rshr_pack_store(uchar* ptr, const v_uint16x8& a) v128_t a1 = wasm_u16x8_shr(wasm_i16x8_add(a.val, delta), n); v128_t maxval = wasm_i16x8_splat(255); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u16x8_gt(a1, maxval)); - v128_t r = wasm_v8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + v128_t r = wasm_i8x16_shuffle(a2, a2, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); uchar t_ptr[16]; wasm_v128_store(t_ptr, r); for (int i=0; i<8; ++i) { @@ -692,7 +692,7 @@ inline void v_rshr_pack_store(schar* ptr, const v_int16x8& a) v128_t minval = wasm_i16x8_splat(-128); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + v128_t r = wasm_i8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); schar t_ptr[16]; wasm_v128_store(t_ptr, r); for (int i=0; i<8; ++i) { @@ -706,7 +706,7 @@ inline void v_rshr_pack_store(ushort* ptr, const v_uint32x4& a) v128_t a1 = wasm_u32x4_shr(wasm_i32x4_add(a.val, delta), n); v128_t maxval = wasm_i32x4_splat(65535); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_u32x4_gt(a1, maxval)); - v128_t r = wasm_v8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + v128_t r = wasm_i8x16_shuffle(a2, a2, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); ushort t_ptr[8]; wasm_v128_store(t_ptr, r); for (int i=0; i<4; ++i) { @@ -722,7 +722,7 @@ inline void v_rshr_pack_store(short* ptr, const v_int32x4& a) v128_t minval = wasm_i32x4_splat(-32768); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + v128_t r = wasm_i8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); short t_ptr[8]; wasm_v128_store(t_ptr, r); for (int i=0; i<4; ++i) { @@ -734,7 +734,7 @@ inline void v_rshr_pack_store(unsigned* ptr, const v_uint64x2& a) { v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t a1 = wasm_u64x2_shr(wasm_i64x2_add(a.val, delta), n); - v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); unsigned t_ptr[4]; wasm_v128_store(t_ptr, r); for (int i=0; i<2; ++i) { @@ -746,7 +746,7 @@ inline void v_rshr_pack_store(int* ptr, const v_int64x2& a) { v128_t delta = wasm_i64x2_splat(((int64)1 << (n-1))); v128_t a1 = wasm_i64x2_shr(wasm_i64x2_add(a.val, delta), n); - v128_t r = wasm_v8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); + v128_t r = wasm_i8x16_shuffle(a1, a1, 0,1,2,3,8,9,10,11,0,1,2,3,8,9,10,11); int t_ptr[4]; wasm_v128_store(t_ptr, r); for (int i=0; i<2; ++i) { @@ -762,7 +762,7 @@ inline void v_rshr_pack_u_store(uchar* ptr, const v_int16x8& a) v128_t minval = wasm_i16x8_splat(0); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i16x8_gt(a1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i16x8_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); + v128_t r = wasm_i8x16_shuffle(a3, a3, 0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14); uchar t_ptr[16]; wasm_v128_store(t_ptr, r); for (int i=0; i<8; ++i) { @@ -778,7 +778,7 @@ inline void v_rshr_pack_u_store(ushort* ptr, const v_int32x4& a) v128_t minval = wasm_i32x4_splat(0); v128_t a2 = wasm_v128_bitselect(maxval, a1, wasm_i32x4_gt(a1, maxval)); v128_t a3 = wasm_v128_bitselect(minval, a2, wasm_i32x4_lt(a1, minval)); - v128_t r = wasm_v8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); + v128_t r = wasm_i8x16_shuffle(a3, a3, 0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13); ushort t_ptr[8]; wasm_v128_store(t_ptr, r); for (int i=0; i<4; ++i) { @@ -791,7 +791,7 @@ inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) v128_t maxval = wasm_i16x8_splat(255); v128_t a1 = wasm_v128_bitselect(maxval, a.val, wasm_u16x8_gt(a.val, maxval)); v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u16x8_gt(b.val, maxval)); - return v_uint8x16(wasm_v8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); + return v_uint8x16(wasm_i8x16_shuffle(a1, b1, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30)); } inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, @@ -802,9 +802,9 @@ inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, v128_t b1 = wasm_v128_bitselect(maxval, b.val, wasm_u32x4_gt(b.val, maxval)); v128_t c1 = wasm_v128_bitselect(maxval, c.val, wasm_u32x4_gt(c.val, maxval)); v128_t d1 = wasm_v128_bitselect(maxval, d.val, wasm_u32x4_gt(d.val, maxval)); - v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); - v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); - return v_uint8x16(wasm_v8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); + v128_t ab = wasm_i8x16_shuffle(a1, b1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + v128_t cd = wasm_i8x16_shuffle(c1, d1, 0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28); + return v_uint8x16(wasm_i8x16_shuffle(ab, cd, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); } inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, @@ -820,13 +820,13 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin v128_t f1 = wasm_v128_bitselect(maxval, f.val, ((__u64x2)(f.val) > (__u64x2)maxval)); v128_t g1 = wasm_v128_bitselect(maxval, g.val, ((__u64x2)(g.val) > (__u64x2)maxval)); v128_t h1 = wasm_v128_bitselect(maxval, h.val, ((__u64x2)(h.val) > (__u64x2)maxval)); - v128_t ab = wasm_v8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); - v128_t cd = wasm_v8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); - v128_t ef = wasm_v8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); - v128_t gh = wasm_v8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); - v128_t abcd = wasm_v8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); - v128_t efgh = wasm_v8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); - return v_uint8x16(wasm_v8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); + v128_t ab = wasm_i8x16_shuffle(a1, b1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t cd = wasm_i8x16_shuffle(c1, d1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t ef = wasm_i8x16_shuffle(e1, f1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t gh = wasm_i8x16_shuffle(g1, h1, 0,8,16,24,0,8,16,24,0,8,16,24,0,8,16,24); + v128_t abcd = wasm_i8x16_shuffle(ab, cd, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + v128_t efgh = wasm_i8x16_shuffle(ef, gh, 0,1,2,3,16,17,18,19,0,1,2,3,16,17,18,19); + return v_uint8x16(wasm_i8x16_shuffle(abcd, efgh, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23)); } inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, @@ -964,7 +964,7 @@ inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) v_expand(b, b0, b1); v128_t c = wasm_i32x4_mul(a0.val, b0.val); v128_t d = wasm_i32x4_mul(a1.val, b1.val); - return v_int16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); + return v_int16x8(wasm_i8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); } inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { @@ -973,7 +973,7 @@ inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) v_expand(b, b0, b1); v128_t c = wasm_i32x4_mul(a0.val, b0.val); v128_t d = wasm_i32x4_mul(a1.val, b1.val); - return v_uint16x8(wasm_v8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); + return v_uint16x8(wasm_i8x16_shuffle(c, d, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31)); } //////// Dot Product //////// @@ -1398,7 +1398,7 @@ inline _Tpsvec v_shl(const _Tpsvec& a, int imm) \ { \ return _Tpsvec(wasm_##suffix##_shl(a.val, imm)); \ } \ -inline _Tpuvec V_shr(const _Tpuvec& a, int imm) \ +inline _Tpuvec v_shr(const _Tpuvec& a, int imm) \ { \ return _Tpuvec(wasm_##ssuffix##_shr(a.val, imm)); \ } \ @@ -1471,7 +1471,7 @@ namespace hal_wasm_internal inline v128_t operator()(const v128_t& a, const v128_t& b) const { enum { imm2 = (sizeof(v128_t) - imm) }; - return wasm_v8x16_shuffle(a, b, + return wasm_i8x16_shuffle(a, b, imm, imm+1, imm+2, imm+3, imm+4, imm+5, imm+6, imm+7, imm+8, imm+9, imm+10, imm+11, @@ -1582,19 +1582,19 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double) /** Reverse **/ inline v_uint8x16 v_reverse(const v_uint8x16 &a) -{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } +{ return v_uint8x16(wasm_i8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); } inline v_int8x16 v_reverse(const v_int8x16 &a) { return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); } inline v_uint16x8 v_reverse(const v_uint16x8 &a) -{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } +{ return v_uint16x8(wasm_i8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); } inline v_int16x8 v_reverse(const v_int16x8 &a) { return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); } inline v_uint32x4 v_reverse(const v_uint32x4 &a) -{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } +{ return v_uint32x4(wasm_i8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); } inline v_int32x4 v_reverse(const v_int32x4 &a) { return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); } @@ -1603,7 +1603,7 @@ inline v_float32x4 v_reverse(const v_float32x4 &a) { return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); } inline v_uint64x2 v_reverse(const v_uint64x2 &a) -{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } +{ return v_uint64x2(wasm_i8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } inline v_int64x2 v_reverse(const v_int64x2 &a) { return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); } @@ -1616,8 +1616,8 @@ inline v_float64x2 v_reverse(const v_float64x2 &a) inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ regtype val = a.val; \ - val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ - val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \ + val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3)); \ return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ } @@ -1649,7 +1649,7 @@ OPENCV_HAL_IMPL_FALLBACK_REDUCE_OP_SUM(v_int16x8, int) inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ regtype val = a.val; \ - val = wasm_##suffix##_add(val, wasm_v8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ + val = wasm_##suffix##_add(val, wasm_i8x16_shuffle(val, val, 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); \ return (scalartype)wasm_##esuffix##_extract_lane(val, 0); \ } OPENCV_HAL_IMPL_WASM_REDUCE_OP_2_SUM(v_uint64x2, uint64, v128_t, i64x2, i64x2) @@ -1996,8 +1996,8 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b) v128_t t00 = wasm_v128_load(ptr); v128_t t01 = wasm_v128_load(ptr + 16); - a.val = wasm_v8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); - b.val = wasm_v8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); + a.val = wasm_i8x16_shuffle(t00, t01, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30); + b.val = wasm_i8x16_shuffle(t00, t01, 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31); } inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c) @@ -2006,13 +2006,13 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v128_t t01 = wasm_v128_load(ptr + 16); v128_t t02 = wasm_v128_load(ptr + 32); - v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7); - v128_t t11 = wasm_v8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6); - v128_t t12 = wasm_v8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7); + v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,3,6,9,12,15,18,21,24,27,30,1,2,4,5,7); + v128_t t11 = wasm_i8x16_shuffle(t00, t01, 1,4,7,10,13,16,19,22,25,28,31,0,2,3,5,6); + v128_t t12 = wasm_i8x16_shuffle(t00, t01, 2,5,8,11,14,17,20,23,26,29,0,1,3,4,6,7); - a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29); - b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30); - c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31); + a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,17,20,23,26,29); + b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,18,21,24,27,30); + c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,16,19,22,25,28,31); } inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c, v_uint8x16& d) @@ -2022,15 +2022,15 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v128_t u2 = wasm_v128_load(ptr + 32); // a8 b8 c8 d8 ... v128_t u3 = wasm_v128_load(ptr + 48); // a12 b12 c12 d12 ... - v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); - v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); - v128_t v2 = wasm_v8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); - v128_t v3 = wasm_v8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + v128_t v0 = wasm_i8x16_shuffle(u0, u1, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v1 = wasm_i8x16_shuffle(u2, u3, 0,4,8,12,16,20,24,28,1,5,9,13,17,21,25,29); + v128_t v2 = wasm_i8x16_shuffle(u0, u1, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); + v128_t v3 = wasm_i8x16_shuffle(u2, u3, 2,6,10,14,18,22,26,30,3,7,11,15,19,23,27,31); - a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); - b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); - c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); - d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_i8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_i8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_i8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); } inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) @@ -2038,8 +2038,8 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b) v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 a2 b2 a3 b3 v128_t v1 = wasm_v128_load(ptr + 8); // a4 b4 a5 b5 a6 b6 a7 b7 - a.val = wasm_v8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7 - b.val = wasm_v8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7 + a.val = wasm_i8x16_shuffle(v0, v1, 0,1,4,5,8,9,12,13,16,17,20,21,24,25,28,29); // a0 a1 a2 a3 a4 a5 a6 a7 + b.val = wasm_i8x16_shuffle(v0, v1, 2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31); // b0 b1 ab b3 b4 b5 b6 b7 } inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c) @@ -2048,13 +2048,13 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v128_t t01 = wasm_v128_load(ptr + 8); // c2 a3 b3 c3 a4 b4 c4 a5 v128_t t02 = wasm_v128_load(ptr + 16); // b5 c5 a6 b6 c6 a7 b7 c7 - v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5); - v128_t t11 = wasm_v8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7); - v128_t t12 = wasm_v8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7); + v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,6,7,12,13,18,19,24,25,30,31,2,3,4,5); + v128_t t11 = wasm_i8x16_shuffle(t00, t01, 2,3,8,9,14,15,20,21,26,27,0,1,4,5,6,7); + v128_t t12 = wasm_i8x16_shuffle(t00, t01, 4,5,10,11,16,17,22,23,28,29,0,1,2,3,6,7); - a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27); - b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29); - c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31); + a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,26,27); + b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,16,17,22,23,28,29); + c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,8,9,18,19,24,25,30,31); } inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d) @@ -2064,15 +2064,15 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v128_t u2 = wasm_v128_load(ptr + 16); // a4 b4 c4 d4 ... v128_t u3 = wasm_v128_load(ptr + 24); // a6 b6 c6 d6 ... - v128_t v0 = wasm_v8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3 - v128_t v1 = wasm_v8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7 - v128_t v2 = wasm_v8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3 - v128_t v3 = wasm_v8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7 + v128_t v0 = wasm_i8x16_shuffle(u0, u1, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a0 a1 a2 a3 b0 b1 b2 b3 + v128_t v1 = wasm_i8x16_shuffle(u2, u3, 0,1,8,9,16,17,24,25,2,3,10,11,18,19,26,27); // a4 a5 a6 a7 b4 b5 b6 b7 + v128_t v2 = wasm_i8x16_shuffle(u0, u1, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c0 c1 c2 c3 d0 d1 d2 d3 + v128_t v3 = wasm_i8x16_shuffle(u2, u3, 4,5,12,13,20,21,28,29,6,7,14,15,22,23,30,31); // c4 c5 c6 c7 d4 d5 d6 d7 - a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); - b.val = wasm_v8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); - c.val = wasm_v8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); - d.val = wasm_v8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + b.val = wasm_i8x16_shuffle(v0, v1, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + c.val = wasm_i8x16_shuffle(v2, v3, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + d.val = wasm_i8x16_shuffle(v2, v3, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); } inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b) @@ -2080,8 +2080,8 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 v128_t v1 = wasm_v128_load(ptr + 4); // a2 b2 a3 b3 - a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 - b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 + a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_i8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 } inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c) @@ -2090,13 +2090,13 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 - v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); - v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); - v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_i8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_i8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); - a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); - b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); - c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); + a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); } inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d) @@ -2114,8 +2114,8 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b v128_t v0 = wasm_v128_load(ptr); // a0 b0 a1 b1 v128_t v1 = wasm_v128_load((ptr + 4)); // a2 b2 a3 b3 - a.val = wasm_v8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 - b.val = wasm_v8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 + a.val = wasm_i8x16_shuffle(v0, v1, 0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27); // a0 a1 a2 a3 + b.val = wasm_i8x16_shuffle(v0, v1, 4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31); // b0 b1 b2 b3 } inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) @@ -2124,13 +2124,13 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b v128_t t01 = wasm_v128_load(ptr + 4); // b2 c2 a3 b3 v128_t t02 = wasm_v128_load(ptr + 8); // c3 a4 b4 c4 - v128_t t10 = wasm_v8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); - v128_t t11 = wasm_v8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); - v128_t t12 = wasm_v8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); + v128_t t10 = wasm_i8x16_shuffle(t00, t01, 0,1,2,3,12,13,14,15,24,25,26,27,4,5,6,7); + v128_t t11 = wasm_i8x16_shuffle(t00, t01, 4,5,6,7,16,17,18,19,28,29,30,31,0,1,2,3); + v128_t t12 = wasm_i8x16_shuffle(t00, t01, 8,9,10,11,20,21,22,23,0,1,2,3,4,5,6,7); - a.val = wasm_v8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); - b.val = wasm_v8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); - c.val = wasm_v8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); + a.val = wasm_i8x16_shuffle(t10, t02, 0,1,2,3,4,5,6,7,8,9,10,11,20,21,22,23); + b.val = wasm_i8x16_shuffle(t11, t02, 0,1,2,3,4,5,6,7,8,9,10,11,24,25,26,27); + c.val = wasm_i8x16_shuffle(t12, t02, 0,1,2,3,4,5,6,7,16,17,18,19,28,29,30,31); } inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c, v_float32x4& d) @@ -2158,9 +2158,9 @@ inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v128_t t1 = wasm_v128_load(ptr + 2); // c0, a1 v128_t t2 = wasm_v128_load(ptr + 4); // b1, c1 - a.val = wasm_v8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); - b.val = wasm_v8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23); - c.val = wasm_v8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); + a.val = wasm_i8x16_shuffle(t0, t1, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); + b.val = wasm_i8x16_shuffle(t0, t2, 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23); + c.val = wasm_i8x16_shuffle(t1, t2, 0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31); } inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, @@ -2192,13 +2192,13 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1 inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b, const v_uint8x16& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { - v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5); - v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26); - v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0); + v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,16,0,1,17,0,2,18,0,3,19,0,4,20,0,5); + v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 21,0,6,22,0,7,23,0,8,24,0,9,25,0,10,26); + v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,11,27,0,12,28,0,13,29,0,14,30,0,15,31,0); - v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15); - v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15); - v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31); + v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,16,3,4,17,6,7,18,9,10,19,12,13,20,15); + v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,21,2,3,22,5,6,23,8,9,24,11,12,25,14,15); + v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 26,1,2,27,4,5,28,7,8,29,10,11,30,13,14,31); wasm_v128_store(ptr, t10); wasm_v128_store(ptr + 16, t11); @@ -2243,13 +2243,13 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b, const v_uint16x8& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { - v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21); - v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11); - v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0); + v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,16,17,0,0,2,3,18,19,0,0,4,5,20,21); + v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 0,0,6,7,22,23,0,0,8,9,24,25,0,0,10,11); + v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 26,27,0,0,12,13,28,29,0,0,14,15,30,31,0,0); - v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15); - v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15); - v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31); + v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,16,17,6,7,8,9,18,19,12,13,14,15); + v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 20,21,2,3,4,5,22,23,8,9,10,11,24,25,14,15); + v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 0,1,26,27,4,5,6,7,28,29,10,11,12,13,30,31); wasm_v128_store(ptr, t10); wasm_v128_store(ptr + 8, t11); @@ -2293,13 +2293,13 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b, const v_uint32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { - v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); - v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); - v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); - v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); - v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); - v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); wasm_v128_store(ptr, t10); wasm_v128_store(ptr + 4, t11); @@ -2333,13 +2333,13 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32 inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, const v_float32x4& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { - v128_t t00 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); - v128_t t01 = wasm_v8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); - v128_t t02 = wasm_v8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); + v128_t t00 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,16,17,18,19,0,0,0,0,4,5,6,7); + v128_t t01 = wasm_i8x16_shuffle(a.val, b.val, 20,21,22,23,0,0,0,0,8,9,10,11,24,25,26,27); + v128_t t02 = wasm_i8x16_shuffle(a.val, b.val, 0,0,0,0,12,13,14,15,28,29,30,31,0,0,0,0); - v128_t t10 = wasm_v8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); - v128_t t11 = wasm_v8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); - v128_t t12 = wasm_v8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); + v128_t t10 = wasm_i8x16_shuffle(t00, c.val, 0,1,2,3,4,5,6,7,16,17,18,19,12,13,14,15); + v128_t t11 = wasm_i8x16_shuffle(t01, c.val, 0,1,2,3,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t t12 = wasm_i8x16_shuffle(t02, c.val, 24,25,26,27,4,5,6,7,8,9,10,11,28,29,30,31); wasm_v128_store(ptr, t10); wasm_v128_store(ptr + 4, t11); @@ -2372,9 +2372,9 @@ inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, hal::StoreMode /*mode*/ = hal::STORE_UNALIGNED) { - v128_t v0 = wasm_v8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); - v128_t v1 = wasm_v8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15); - v128_t v2 = wasm_v8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); + v128_t v0 = wasm_i8x16_shuffle(a.val, b.val, 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23); + v128_t v1 = wasm_i8x16_shuffle(a.val, c.val, 16,17,18,19,20,21,22,23,8,9,10,11,12,13,14,15); + v128_t v2 = wasm_i8x16_shuffle(b.val, c.val, 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31); wasm_v128_store(ptr, v0); wasm_v128_store(ptr + 2, v1); @@ -2687,45 +2687,45 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo inline v_int8x16 v_interleave_pairs(const v_int8x16& vec) { - return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15)); + return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,2,1,3,4,6,5,7,8,10,9,11,12,14,13,15)); } inline v_uint8x16 v_interleave_pairs(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_pairs(v_reinterpret_as_s8(vec))); } inline v_int8x16 v_interleave_quads(const v_int8x16& vec) { - return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); + return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); } inline v_uint8x16 v_interleave_quads(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_interleave_quads(v_reinterpret_as_s8(vec))); } inline v_int16x8 v_interleave_pairs(const v_int16x8& vec) { - return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15)); + return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15)); } inline v_uint16x8 v_interleave_pairs(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_pairs(v_reinterpret_as_s16(vec))); } inline v_int16x8 v_interleave_quads(const v_int16x8& vec) { - return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15)); + return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15)); } inline v_uint16x8 v_interleave_quads(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_interleave_quads(v_reinterpret_as_s16(vec))); } inline v_int32x4 v_interleave_pairs(const v_int32x4& vec) { - return v_int32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); + return v_int32x4(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); } inline v_uint32x4 v_interleave_pairs(const v_uint32x4& vec) { return v_reinterpret_as_u32(v_interleave_pairs(v_reinterpret_as_s32(vec))); } inline v_float32x4 v_interleave_pairs(const v_float32x4& vec) { - return v_float32x4(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); + return v_float32x4(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15)); } inline v_int8x16 v_pack_triplets(const v_int8x16& vec) { - return v_int8x16(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16)); + return v_int8x16(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,4,5,6,8,9,10,12,13,14,16,16,16,16)); } inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) { return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); } inline v_int16x8 v_pack_triplets(const v_int16x8& vec) { - return v_int16x8(wasm_v8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7)); + return v_int16x8(wasm_i8x16_shuffle(vec.val, vec.val, 0,1,2,3,4,5,8,9,10,11,12,13,14,15,6,7)); } inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) { return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); }