From fd91749e5a4feb45eafa91a0fd76685a104ad5e0 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Thu, 9 Feb 2017 21:41:02 +0200 Subject: [PATCH 01/31] The beginning of a test that checks for various SIMD implementations. --- test cases/common/139 simd/fallback.c | 7 ++++++ test cases/common/139 simd/meson.build | 13 ++++++++++ test cases/common/139 simd/simdchecker.c | 31 ++++++++++++++++++++++++ test cases/common/139 simd/simdfuncs.h | 15 ++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 test cases/common/139 simd/fallback.c create mode 100644 test cases/common/139 simd/meson.build create mode 100644 test cases/common/139 simd/simdchecker.c create mode 100644 test cases/common/139 simd/simdfuncs.h diff --git a/test cases/common/139 simd/fallback.c b/test cases/common/139 simd/fallback.c new file mode 100644 index 000000000..2b98304a0 --- /dev/null +++ b/test cases/common/139 simd/fallback.c @@ -0,0 +1,7 @@ +#include + +void increment_fallback(float arr[4]) { + for(int i=0; i<4; i++) { + arr[i]++; + } +} diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build new file mode 100644 index 000000000..b26d0071e --- /dev/null +++ b/test cases/common/139 simd/meson.build @@ -0,0 +1,13 @@ +project('simd', 'c') + +cdata = configuration_data() + +#cdata.set('HAVE_MMX', 1) + +configure_file(output : 'simdconfig.h', + configuration : cdata) + +p = executable('simdtest', 'simdchecker.c', 'fallback.c') + +test('simdtest', p) + diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c new file mode 100644 index 000000000..bcc6cef21 --- /dev/null +++ b/test cases/common/139 simd/simdchecker.c @@ -0,0 +1,31 @@ +#include +#include + +/* + * A function that checks at runtime if simd acceleration is + * available and calls the respective function if it is. Falls + * back to plain C implementation if not. + */ + +int main(int argc, char **argv) { + float four[4] = {2.0, 3.0, 4.0, 5.0}; + const float expected[4] = {3.0, 4.0, 5.0, 6.0}; + void (*fptr)(float[4]) = NULL; + +#if HAVE_MMX + if(mmx_available()) { + fptr = increment_mmx; + } +#endif + if(fptr == NULL) { + fptr = increment_fallback; + } + fptr(four); + for(int i=0; i<4; i++) { + if(four[i] != expected[i]) { + printf("Increment function failed.\n"); + return 1; + } + } + return 0; +} diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h new file mode 100644 index 000000000..17c627e9b --- /dev/null +++ b/test cases/common/139 simd/simdfuncs.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +/* Yes, I do know that arr[4] decays into a pointer + * here. Don't do this in real code but for test code + * it is ok. + */ + +void increment_fallback(float arr[4]); + +#if HAVE_MMX +void increment_mmx(float arr[4]); +#endif + From 326d8953b4742228c6a82fab36dd0718601da9c1 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Thu, 9 Feb 2017 22:41:30 +0200 Subject: [PATCH 02/31] Add support for MMX checking. --- test cases/common/139 simd/fallback.c | 3 ++- test cases/common/139 simd/meson.build | 24 ++++++++++++++++++++++-- test cases/common/139 simd/simd_mmx.c | 22 ++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 20 ++++++++++++++------ test cases/common/139 simd/simdfuncs.h | 12 ++++++++++-- 5 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 test cases/common/139 simd/simd_mmx.c diff --git a/test cases/common/139 simd/fallback.c b/test cases/common/139 simd/fallback.c index 2b98304a0..ab435f433 100644 --- a/test cases/common/139 simd/fallback.c +++ b/test cases/common/139 simd/fallback.c @@ -1,7 +1,8 @@ #include void increment_fallback(float arr[4]) { - for(int i=0; i<4; i++) { + int i; + for(i=0; i<4; i++) { arr[i]++; } } diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index b26d0071e..153b458b0 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -1,13 +1,33 @@ project('simd', 'c') +cc = meson.get_compiler('c') + cdata = configuration_data() -#cdata.set('HAVE_MMX', 1) +# The idea is to have a simd module and then do something like: +# +# static_libs = simd.check('mysimdstuff', +# mmx : 'mmx_funcs.c', +# sse : 'sse_funcs.c', +# sse2 : 'sse2_funcs.c', +# +# configuration : cdata, # adds HAVE_XXX +# compiler : cc) +# +# and then have a target that uses the result in links_with. + +simdlibs = [] + +if cc.has_argument('-mmmx') + cdata.set('HAVE_MMX', 1) + simdlibs += static_library('simd_mmx', 'simd_mmx.c', c_args : '-mmmx') +endif configure_file(output : 'simdconfig.h', configuration : cdata) -p = executable('simdtest', 'simdchecker.c', 'fallback.c') +p = executable('simdtest', 'simdchecker.c', 'fallback.c', + link_with : simdlibs) test('simdtest', p) diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c new file mode 100644 index 000000000..dc1d51c10 --- /dev/null +++ b/test cases/common/139 simd/simd_mmx.c @@ -0,0 +1,22 @@ +#include +#include +#include + +int mmx_available() { + return __builtin_cpu_supports("mmx"); +} + +void increment_mmx(float arr[4]) { + /* Super ugly but we know that values in arr are always small + * enough to fit in int16; + */ + __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); + __m64 incr = _mm_set1_pi16(1); + __m64 result = _mm_add_pi16(packed, incr); + int64_t unpacker = _m_to_int64(result); + _mm_empty(); + for(int i=0; i<4; i++) { + arr[i] = unpacker & ((1<<16)-1); + unpacker >>= 16; + } +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index bcc6cef21..d75af25fb 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -2,28 +2,36 @@ #include /* - * A function that checks at runtime if simd acceleration is - * available and calls the respective function if it is. Falls - * back to plain C implementation if not. + * A function that checks at runtime which simd accelerations are + * available and calls the best one. Falls + * back to plain C implementation if SIMD is not available. */ int main(int argc, char **argv) { float four[4] = {2.0, 3.0, 4.0, 5.0}; const float expected[4] = {3.0, 4.0, 5.0, 6.0}; void (*fptr)(float[4]) = NULL; + const char *type; -#if HAVE_MMX - if(mmx_available()) { +#if HAVE_SSE + /* Add here. The first matched one is used so put "better" instruction + * sets at the top. + */ +#elif HAVE_MMX + if(fptr == NULL && mmx_available()) { fptr = increment_mmx; + type = "MMX"; } #endif if(fptr == NULL) { fptr = increment_fallback; + type = "fallback"; } + printf("Using %s.\n", type); fptr(four); for(int i=0; i<4; i++) { if(four[i] != expected[i]) { - printf("Increment function failed.\n"); + printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); return 1; } } diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index 17c627e9b..4b452fc8e 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -3,13 +3,21 @@ #include /* Yes, I do know that arr[4] decays into a pointer - * here. Don't do this in real code but for test code - * it is ok. + * as a function argument. Don't do this in real code + * but for this test it is ok. */ void increment_fallback(float arr[4]); #if HAVE_MMX +int mmx_available(); void increment_mmx(float arr[4]); #endif +#if HAVE_SSE +#endif + +#if HAVE_SSE2 +#endif + +/* And so on. */ From 4a8ca14d44a4a6f67da0466dc98c81b3e5872194 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 12 Feb 2017 13:00:22 +0200 Subject: [PATCH 03/31] Some more arches. --- test cases/common/139 simd/simdfuncs.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index 4b452fc8e..62c6bc7f7 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -15,9 +15,33 @@ void increment_mmx(float arr[4]); #endif #if HAVE_SSE +int sse_available(); +void increment_sse(float arr[4]); #endif #if HAVE_SSE2 +int sse2_available(); +void increment_sse2(float arr[4]); +#endif + +#if HAVE_AVX +int avx_available(); +void increment_avx(float arr[4]); +#endif + +#if HAVE_AVX2 +int avx2_available(); +void increment_avx2(float arr[4]); +#endif + +#if HAVE_THUMB +int thumb_available(); +void increment_thumb(float arr[4]); +#endif + +#if HAVE_ALTIVEC +int altivec_available(); +void increment_altivec(float arr[4]); #endif /* And so on. */ From 9e5578ac472287979898f9ee8da72cccdf347636 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 12 Feb 2017 15:27:17 +0200 Subject: [PATCH 04/31] Added SSE support. --- test cases/common/139 simd/meson.build | 16 ++++++++++++++++ test cases/common/139 simd/simd_mmx.c | 3 +++ test cases/common/139 simd/simd_sse.c | 17 +++++++++++++++++ test cases/common/139 simd/simdchecker.c | 16 +++++++++++----- 4 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 test cases/common/139 simd/simd_sse.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 153b458b0..5311460cc 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -16,6 +16,17 @@ cdata = configuration_data() # # and then have a target that uses the result in links_with. +# The following headers need to be added. Also Thumb and Altivec. +# SSE2 +# SSE3 +# SSSE3 +# SSE4.1 +# SSE4.2 +# SSE4A +# AES +# AVX +# AVX512 + simdlibs = [] if cc.has_argument('-mmmx') @@ -23,6 +34,11 @@ if cc.has_argument('-mmmx') simdlibs += static_library('simd_mmx', 'simd_mmx.c', c_args : '-mmmx') endif +if cc.has_argument('-msse') + cdata.set('HAVE_SSE', 1) + simdlibs += static_library('simd_sse', 'simd_sse.c', c_args : '-msse') +endif + configure_file(output : 'simdconfig.h', configuration : cdata) diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c index dc1d51c10..351f36e63 100644 --- a/test cases/common/139 simd/simd_mmx.c +++ b/test cases/common/139 simd/simd_mmx.c @@ -1,3 +1,6 @@ +#include +#include + #include #include #include diff --git a/test cases/common/139 simd/simd_sse.c b/test cases/common/139 simd/simd_sse.c new file mode 100644 index 000000000..11a74c917 --- /dev/null +++ b/test cases/common/139 simd/simd_sse.c @@ -0,0 +1,17 @@ +#include +#include + +#include +#include +#include + +int sse_available() { + return __builtin_cpu_supports("sse"); +} + +void increment_sse(float arr[4]) { + __m128 val = _mm_load_ps(arr); + __m128 one = _mm_set_ps1(1.0); + __m128 result = _mm_add_ps(val, one); + _mm_storeu_ps(arr, result); +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index d75af25fb..b3c94f95c 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -12,12 +12,18 @@ int main(int argc, char **argv) { const float expected[4] = {3.0, 4.0, 5.0, 6.0}; void (*fptr)(float[4]) = NULL; const char *type; + int i; +/* Add here. The first matched one is used so put "better" instruction + * sets at the top. + */ #if HAVE_SSE - /* Add here. The first matched one is used so put "better" instruction - * sets at the top. - */ -#elif HAVE_MMX + if(fptr == NULL && sse_available()) { + fptr = increment_sse; + type = "SSE"; + } +#endif +#if HAVE_MMX if(fptr == NULL && mmx_available()) { fptr = increment_mmx; type = "MMX"; @@ -29,7 +35,7 @@ int main(int argc, char **argv) { } printf("Using %s.\n", type); fptr(four); - for(int i=0; i<4; i++) { + for(i=0; i<4; i++) { if(four[i] != expected[i]) { printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); return 1; From 953441badfbe4b08873d430e6aa0f08b66296489 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Mon, 13 Feb 2017 22:08:24 +0200 Subject: [PATCH 05/31] Added SSE2 support. --- test cases/common/139 simd/meson.build | 25 ++++++++++++++---------- test cases/common/139 simd/simd_sse2.c | 25 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ 3 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 test cases/common/139 simd/simd_sse2.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 5311460cc..d717dba7e 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -17,7 +17,6 @@ cdata = configuration_data() # and then have a target that uses the result in links_with. # The following headers need to be added. Also Thumb and Altivec. -# SSE2 # SSE3 # SSSE3 # SSE4.1 @@ -29,15 +28,21 @@ cdata = configuration_data() simdlibs = [] -if cc.has_argument('-mmmx') - cdata.set('HAVE_MMX', 1) - simdlibs += static_library('simd_mmx', 'simd_mmx.c', c_args : '-mmmx') -endif - -if cc.has_argument('-msse') - cdata.set('HAVE_SSE', 1) - simdlibs += static_library('simd_sse', 'simd_sse.c', c_args : '-msse') -endif +simdarr = [['-mmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], + ['-msse', 'HAVE_SSE', 'simd_sse', 'simd_sse.c'], + ['-msse2', 'HAVE_SSE2', 'simd_sse2', 'simd_sse2.c'], +] + +foreach ia : simdarr + arg = ia[0] + def = ia[1] + libname = ia[2] + filename = ia[3] + if cc.has_argument(arg) + cdata.set(def, 1) + simdlibs += static_library(libname, filename, c_args : arg) + endif +endforeach configure_file(output : 'simdconfig.h', configuration : cdata) diff --git a/test cases/common/139 simd/simd_sse2.c b/test cases/common/139 simd/simd_sse2.c new file mode 100644 index 000000000..52a7e4017 --- /dev/null +++ b/test cases/common/139 simd/simd_sse2.c @@ -0,0 +1,25 @@ +#include +#include + +#include +#include +#include + +int sse2_available() { + return __builtin_cpu_supports("sse2"); +} + +void increment_sse2(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd1(1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index b3c94f95c..ce28f054d 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_SSE2 + if(fptr == NULL && sse2_available()) { + fptr = increment_sse2; + type = "SSE2"; + } +#endif #if HAVE_SSE if(fptr == NULL && sse_available()) { fptr = increment_sse; From 0062595e13ab6c7a14150903696d94428530d608 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Mon, 13 Feb 2017 22:14:23 +0200 Subject: [PATCH 06/31] Added SSE3 support. --- test cases/common/139 simd/meson.build | 4 ++-- test cases/common/139 simd/simd_sse3.c | 26 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ test cases/common/139 simd/simdfuncs.h | 5 +++++ 4 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 test cases/common/139 simd/simd_sse3.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index d717dba7e..96852576d 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -17,7 +17,6 @@ cdata = configuration_data() # and then have a target that uses the result in links_with. # The following headers need to be added. Also Thumb and Altivec. -# SSE3 # SSSE3 # SSE4.1 # SSE4.2 @@ -28,9 +27,10 @@ cdata = configuration_data() simdlibs = [] -simdarr = [['-mmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], +simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-msse', 'HAVE_SSE', 'simd_sse', 'simd_sse.c'], ['-msse2', 'HAVE_SSE2', 'simd_sse2', 'simd_sse2.c'], + ['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_sse3.c b/test cases/common/139 simd/simd_sse3.c new file mode 100644 index 000000000..db0eef62d --- /dev/null +++ b/test cases/common/139 simd/simd_sse3.c @@ -0,0 +1,26 @@ +#include +#include + +#include +#include +#include + +int sse3_available() { + return __builtin_cpu_supports("sse3"); +} + +void increment_sse3(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd1(1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index ce28f054d..5e5fe4305 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_SSE3 + if(fptr == NULL && sse3_available()) { + fptr = increment_sse3; + type = "SSE3"; + } +#endif #if HAVE_SSE2 if(fptr == NULL && sse2_available()) { fptr = increment_sse2; diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index 62c6bc7f7..dc6da147f 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -24,6 +24,11 @@ int sse2_available(); void increment_sse2(float arr[4]); #endif +#if HAVE_SSE3 +int sse3_available(); +void increment_sse3(float arr[4]); +#endif + #if HAVE_AVX int avx_available(); void increment_avx(float arr[4]); From 52a0d958f173ea96f3ea8a065dd388375d4321c3 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Mon, 13 Feb 2017 22:31:28 +0200 Subject: [PATCH 07/31] Added SSSE3 support of sorts. --- test cases/common/139 simd/meson.build | 2 +- test cases/common/139 simd/simd_ssse3.c | 28 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 +++++ test cases/common/139 simd/simdfuncs.h | 5 +++++ 4 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 test cases/common/139 simd/simd_ssse3.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 96852576d..6307550ae 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -17,7 +17,6 @@ cdata = configuration_data() # and then have a target that uses the result in links_with. # The following headers need to be added. Also Thumb and Altivec. -# SSSE3 # SSE4.1 # SSE4.2 # SSE4A @@ -31,6 +30,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-msse', 'HAVE_SSE', 'simd_sse', 'simd_sse.c'], ['-msse2', 'HAVE_SSE2', 'simd_sse2', 'simd_sse2.c'], ['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'], + ['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_ssse3.c b/test cases/common/139 simd/simd_ssse3.c new file mode 100644 index 000000000..225fe253f --- /dev/null +++ b/test cases/common/139 simd/simd_ssse3.c @@ -0,0 +1,28 @@ +#include +#include + +#include +#include +#include + +int ssse3_available() { + return __builtin_cpu_supports("ssse3"); +} + +void increment_ssse3(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd1(1.0); + __m128d result = _mm_add_pd(val1, one); + __m128i tmp1, tmp2; + tmp1 = tmp2 = _mm_set1_epi16(0); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + tmp1 = _mm_hadd_epi32(tmp1, tmp2); /* This does nothing. Only here so we use an SSSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index 5e5fe4305..e0722c9b4 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_SSSE3 + if(fptr == NULL && ssse3_available()) { + fptr = increment_ssse3; + type = "SSSE3"; + } +#endif #if HAVE_SSE3 if(fptr == NULL && sse3_available()) { fptr = increment_sse3; diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index dc6da147f..9b5318198 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -29,6 +29,11 @@ int sse3_available(); void increment_sse3(float arr[4]); #endif +#if HAVE_SSSE3 +int ssse3_available(); +void increment_ssse3(float arr[4]); +#endif + #if HAVE_AVX int avx_available(); void increment_avx(float arr[4]); From 5d731b102bec98449c82a20d893977a0c9594643 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Fri, 17 Feb 2017 22:56:49 +0200 Subject: [PATCH 08/31] Add SSE4.1 support. --- test cases/common/139 simd/meson.build | 2 +- test cases/common/139 simd/simd_sse41.c | 26 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ test cases/common/139 simd/simdfuncs.h | 5 +++++ 4 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 test cases/common/139 simd/simd_sse41.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 6307550ae..6a4c6c19f 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -17,7 +17,6 @@ cdata = configuration_data() # and then have a target that uses the result in links_with. # The following headers need to be added. Also Thumb and Altivec. -# SSE4.1 # SSE4.2 # SSE4A # AES @@ -31,6 +30,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-msse2', 'HAVE_SSE2', 'simd_sse2', 'simd_sse2.c'], ['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'], ['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'], + ['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_sse41.c b/test cases/common/139 simd/simd_sse41.c new file mode 100644 index 000000000..6087e4006 --- /dev/null +++ b/test cases/common/139 simd/simd_sse41.c @@ -0,0 +1,26 @@ +#include +#include + +#include +#include +#include + +int sse41_available() { + return __builtin_cpu_supports("sse4.1"); +} + +void increment_sse41(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd1(1.0); + __m128d result = _mm_add_pd(val1, one); + result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index e0722c9b4..ece1c774a 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_SSE41 + if(fptr == NULL && sse41_available()) { + fptr = increment_sse41; + type = "SSE41"; + } +#endif #if HAVE_SSSE3 if(fptr == NULL && ssse3_available()) { fptr = increment_ssse3; diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index 9b5318198..67a8e1b10 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -34,6 +34,11 @@ int ssse3_available(); void increment_ssse3(float arr[4]); #endif +#if HAVE_SSE41 +int sse41_available(); +void increment_sse41(float arr[4]); +#endif + #if HAVE_AVX int avx_available(); void increment_avx(float arr[4]); From fc68e0c63ad87f11ce885e9470c6d8e0f2d8f020 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Fri, 17 Feb 2017 23:01:34 +0200 Subject: [PATCH 09/31] Add SSE4.2 support. --- test cases/common/139 simd/meson.build | 3 +-- test cases/common/139 simd/simd_sse42.c | 26 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ test cases/common/139 simd/simdfuncs.h | 5 +++++ 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 test cases/common/139 simd/simd_sse42.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 6a4c6c19f..7769bfdb6 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -17,8 +17,6 @@ cdata = configuration_data() # and then have a target that uses the result in links_with. # The following headers need to be added. Also Thumb and Altivec. -# SSE4.2 -# SSE4A # AES # AVX # AVX512 @@ -31,6 +29,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'], ['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'], ['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'], + ['-msse4.2', 'HAVE_SSE42', 'simd_sse42', 'simd_sse42.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_sse42.c b/test cases/common/139 simd/simd_sse42.c new file mode 100644 index 000000000..229ef039e --- /dev/null +++ b/test cases/common/139 simd/simd_sse42.c @@ -0,0 +1,26 @@ +#include +#include + +#include +#include +#include + +int sse42_available() { + return __builtin_cpu_supports("sse4.2"); +} + +void increment_sse42(float arr[4]) { + double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd1(1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + _mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index ece1c774a..62841b04f 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_SSE42 + if(fptr == NULL && sse42_available()) { + fptr = increment_sse42; + type = "SSE42"; + } +#endif #if HAVE_SSE41 if(fptr == NULL && sse41_available()) { fptr = increment_sse41; diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index 67a8e1b10..22781bf75 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -39,6 +39,11 @@ int sse41_available(); void increment_sse41(float arr[4]); #endif +#if HAVE_SSE42 +int sse42_available(); +void increment_sse42(float arr[4]); +#endif + #if HAVE_AVX int avx_available(); void increment_avx(float arr[4]); From 71ffbd5b73f827eeddffd22af1e783ccdc91dc80 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Fri, 17 Feb 2017 23:15:36 +0200 Subject: [PATCH 10/31] Add AVX support. --- test cases/common/139 simd/meson.build | 2 +- test cases/common/139 simd/simd_avx.c | 26 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 test cases/common/139 simd/simd_avx.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 7769bfdb6..1d23ffb78 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -18,7 +18,6 @@ cdata = configuration_data() # The following headers need to be added. Also Thumb and Altivec. # AES -# AVX # AVX512 simdlibs = [] @@ -30,6 +29,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'], ['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'], ['-msse4.2', 'HAVE_SSE42', 'simd_sse42', 'simd_sse42.c'], + ['-mavx', 'HAVE_AVX', 'simd_avx', 'simd_avx.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_avx.c b/test cases/common/139 simd/simd_avx.c new file mode 100644 index 000000000..26d76a008 --- /dev/null +++ b/test cases/common/139 simd/simd_avx.c @@ -0,0 +1,26 @@ +#include +#include + +#include +#include +#include + +int avx_available() { + return __builtin_cpu_supports("avx"); +} + +void increment_avx(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index 62841b04f..da53f0ada 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_AVX + if(fptr == NULL && avx_available()) { + fptr = increment_avx; + type = "AVX"; + } +#endif #if HAVE_SSE42 if(fptr == NULL && sse42_available()) { fptr = increment_sse42; From dbe028cc17ea402883755d46e1b64abff4769cb7 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Fri, 17 Feb 2017 23:26:57 +0200 Subject: [PATCH 11/31] Add AVX2. --- test cases/common/139 simd/meson.build | 1 + test cases/common/139 simd/simd_avx2.c | 27 ++++++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ 3 files changed, 34 insertions(+) create mode 100644 test cases/common/139 simd/simd_avx2.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 1d23ffb78..60312186e 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -30,6 +30,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'], ['-msse4.2', 'HAVE_SSE42', 'simd_sse42', 'simd_sse42.c'], ['-mavx', 'HAVE_AVX', 'simd_avx', 'simd_avx.c'], + ['-mavx2', 'HAVE_AVX2', 'simd_avx2', 'simd_avx2.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_avx2.c b/test cases/common/139 simd/simd_avx2.c new file mode 100644 index 000000000..bca8c99a8 --- /dev/null +++ b/test cases/common/139 simd/simd_avx2.c @@ -0,0 +1,27 @@ +#include +#include + +#include +#include +#include + +int avx2_available() { + return __builtin_cpu_supports("avx2"); +} + +void increment_avx2(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index da53f0ada..1502ae7d1 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_AVX2 + if(fptr == NULL && avx2_available()) { + fptr = increment_avx2; + type = "AVX2"; + } +#endif #if HAVE_AVX if(fptr == NULL && avx_available()) { fptr = increment_avx; From 57729e5a782e2bb745179926cfb487b6f417b98f Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sat, 18 Feb 2017 00:01:50 +0200 Subject: [PATCH 12/31] Added NEON support. --- test cases/common/139 simd/meson.build | 1 + test cases/common/139 simd/simd_neon.c | 20 ++++++++++++++++++++ test cases/common/139 simd/simdchecker.c | 6 ++++++ test cases/common/139 simd/simdfuncs.h | 6 +++--- 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 test cases/common/139 simd/simd_neon.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 60312186e..4dc352d29 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -31,6 +31,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], ['-msse4.2', 'HAVE_SSE42', 'simd_sse42', 'simd_sse42.c'], ['-mavx', 'HAVE_AVX', 'simd_avx', 'simd_avx.c'], ['-mavx2', 'HAVE_AVX2', 'simd_avx2', 'simd_avx2.c'], + ['-mfpu=neon', 'HAVE_NEON', 'simd_neon', 'simd_neon.c'], ] foreach ia : simdarr diff --git a/test cases/common/139 simd/simd_neon.c b/test cases/common/139 simd/simd_neon.c new file mode 100644 index 000000000..20820992b --- /dev/null +++ b/test cases/common/139 simd/simd_neon.c @@ -0,0 +1,20 @@ +#include +#include + +#include +#include + +int neon_available() { + return 1; /* Incorrect, but I don't know how to check this properly. */ +} + +void increment_neon(float arr[4]) { + float32x2_t a1, a2, one; + a1 = vld1_f32(arr); + a2 = vld1_f32(&arr[2]); + one = vdup_n_f32(1.0); + a1 = vadd_f32(a1, one); + a2 = vadd_f32(a2, one); + vst1_f32(arr, a1); + vst1_f32(&arr[2], a2); +} diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c index 1502ae7d1..222fbf3e3 100644 --- a/test cases/common/139 simd/simdchecker.c +++ b/test cases/common/139 simd/simdchecker.c @@ -17,6 +17,12 @@ int main(int argc, char **argv) { /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ +#if HAVE_NEON + if(fptr == NULL && neon_available()) { + fptr = increment_neon; + type = "NEON"; + } +#endif #if HAVE_AVX2 if(fptr == NULL && avx2_available()) { fptr = increment_avx2; diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h index 22781bf75..dfb056068 100644 --- a/test cases/common/139 simd/simdfuncs.h +++ b/test cases/common/139 simd/simdfuncs.h @@ -54,9 +54,9 @@ int avx2_available(); void increment_avx2(float arr[4]); #endif -#if HAVE_THUMB -int thumb_available(); -void increment_thumb(float arr[4]); +#if HAVE_NEON +int neon_available(); +void increment_neon(float arr[4]); #endif #if HAVE_ALTIVEC From d304aac504af2c627ec2857cc2e86070de089851 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sat, 18 Feb 2017 01:40:37 +0200 Subject: [PATCH 13/31] Created simd module. --- mesonbuild/compilers/compilers.py | 26 +++++++++ mesonbuild/modules/simd.py | 74 ++++++++++++++++++++++++++ test cases/common/139 simd/meson.build | 56 ++++++------------- 3 files changed, 116 insertions(+), 40 deletions(-) create mode 100644 mesonbuild/modules/simd.py diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index a8ec5e362..0b196d212 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -228,6 +228,19 @@ base_options = {'b_pch': coredata.UserBooleanOption('b_pch', 'Use precompiled he True), } +gnulike_instruction_set_args = {'mmx': ['-mmmx'], + 'sse': ['-msse'], + 'sse2': ['-msse2'], + 'sse3': ['-msse3'], + 'ssse3': ['-mssse3'], + 'sse41': ['-msse4.1'], + 'sse42': ['-msse4.2'], + 'avx': ['-mavx'], + 'avx2': ['-mavx2'], + 'neon': ['-mfpu=neon'], + } + + def sanitizer_compile_args(value): if value == 'none': return [] @@ -755,6 +768,12 @@ class Compiler: return [] raise EnvironmentException('Language %s does not support linking whole archives.' % self.get_display_language()) + # Compiler arguments needed to enable the given instruction set. + # May be [] meaning nothing needed or None meaning the given set + # is not supported. + def get_instruction_set_args(self, instruction_set): + return None + def build_unix_rpath_args(self, build_dir, from_dir, rpath_paths, install_rpath): if not rpath_paths and not install_rpath: return [] @@ -933,6 +952,10 @@ class GnuCompiler: return ['-mwindows'] return [] + def get_instruction_set_args(self, instruction_set): + return gnulike_instruction_set_args.get(instruction_set, None) + + class ClangCompiler: def __init__(self, clang_type): self.id = 'clang' @@ -1010,6 +1033,9 @@ class ClangCompiler: return result return ['-Wl,--whole-archive'] + args + ['-Wl,--no-whole-archive'] + def get_instruction_set_args(self, instruction_set): + return gnulike_instruction_set_args.get(instruction_set, None) + # Tested on linux for ICC 14.0.3, 15.0.6, 16.0.4, 17.0.1 class IntelCompiler: diff --git a/mesonbuild/modules/simd.py b/mesonbuild/modules/simd.py new file mode 100644 index 000000000..3a9fe59cf --- /dev/null +++ b/mesonbuild/modules/simd.py @@ -0,0 +1,74 @@ +# Copyright 2017 The Meson development team + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import mesonlib, compilers, build, mlog + +from . import ExtensionModule + +class SimdModule(ExtensionModule): + + def __init__(self): + super().__init__() + self.snippets.add('check') + # FIXME add Altivec and AVX512. + self.isets = ('mmx', + 'sse', + 'sse2', + 'sse3', + 'ssse3', + 'sse41', + 'sse42', + 'avx', + 'avx2', + 'neon', + ) + + def check(self, interpreter, state, args, kwargs): + result = [] + if len(args) != 1: + raise mesonlib.MesonException('Check requires one argument, a name prefix for checks.') + prefix = args[0] + if not isinstance(prefix, str): + raise mesonlib.MesonException('Argument must be a string.') + if 'compiler' not in kwargs: + raise mesonlib.MesonException('Must specify compiler keyword') + compiler = kwargs['compiler'].compiler + if not isinstance(compiler, compilers.Compiler): + raise mesonlib.MesonException('Compiler argument must be a compiler object.') + if 'configuration' not in kwargs: + raise mesonlib.MesonException('Must specify configuration object.') + conf = kwargs['configuration'].held_object + if not isinstance(conf, build.ConfigurationData): + raise mesonlib.MesonException('Configuration must be a configuration object.') + for iset in self.isets: + if iset not in kwargs: + continue + iset_fname = kwargs[iset] # Migth also be an array or Files. static_library will validate. + args = compiler.get_instruction_set_args(iset) + if args is None: + continue + if len(args) > 0: + if not compiler.has_multi_arguments(args, state.environment): + mlog.log('Compiler supports %s:' % iset, mlog.red('NO')) + continue + mlog.log('Compiler supports %s:' % iset, mlog.green('YES')) + conf.values['HAVE_' + iset.upper()] = ('1', 'Compiler supports %s.' % iset) + libname = prefix + '_' + iset + lib_kwargs = {'sources': iset_fname, + compiler.get_language() + '_args': args} + result.append(interpreter.func_static_lib(None, [libname], lib_kwargs)) + return result + +def initialize(): + return SimdModule() diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 4dc352d29..c817c2d00 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -1,49 +1,25 @@ project('simd', 'c') +simd = import('simd') + cc = meson.get_compiler('c') cdata = configuration_data() -# The idea is to have a simd module and then do something like: -# -# static_libs = simd.check('mysimdstuff', -# mmx : 'mmx_funcs.c', -# sse : 'sse_funcs.c', -# sse2 : 'sse2_funcs.c', -# -# configuration : cdata, # adds HAVE_XXX -# compiler : cc) -# -# and then have a target that uses the result in links_with. - -# The following headers need to be added. Also Thumb and Altivec. -# AES -# AVX512 - -simdlibs = [] - -simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'], - ['-msse', 'HAVE_SSE', 'simd_sse', 'simd_sse.c'], - ['-msse2', 'HAVE_SSE2', 'simd_sse2', 'simd_sse2.c'], - ['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'], - ['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'], - ['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'], - ['-msse4.2', 'HAVE_SSE42', 'simd_sse42', 'simd_sse42.c'], - ['-mavx', 'HAVE_AVX', 'simd_avx', 'simd_avx.c'], - ['-mavx2', 'HAVE_AVX2', 'simd_avx2', 'simd_avx2.c'], - ['-mfpu=neon', 'HAVE_NEON', 'simd_neon', 'simd_neon.c'], -] - -foreach ia : simdarr - arg = ia[0] - def = ia[1] - libname = ia[2] - filename = ia[3] - if cc.has_argument(arg) - cdata.set(def, 1) - simdlibs += static_library(libname, filename, c_args : arg) - endif -endforeach +simdlibs = simd.check('mysimds', + mmx : 'simd_mmx.c', + sse : 'simd_sse.c', + sse2 : 'simd_sse2.c', + sse3 : 'simd_sse3.c', + ssse3 : 'simd_ssse3.c', + sse41 : 'simd_sse41.c', + sse42 : 'simd_sse42.c', + avx : 'simd_avx.c', + avx2 : 'simd_avx2.c', + neon : 'simd_neon.c', + configuration : cdata, + compiler : cc) + configure_file(output : 'simdconfig.h', configuration : cdata) From 16ec3f0e195f31ec1f64f406ad138ca94fe2f6f2 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sat, 18 Feb 2017 16:46:50 +0200 Subject: [PATCH 14/31] Fix a few OSX "features". --- mesonbuild/compilers/compilers.py | 2 +- test cases/common/139 simd/simd_ssse3.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 0b196d212..9829d20da 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -1006,7 +1006,7 @@ class ClangCompiler: def has_multi_arguments(self, args, env): return super().has_multi_arguments( - ['-Werror=unknown-warning-option'] + args, + ['-Werror=unknown-warning-option', '-Werror=unused-command-line-argument'] + args, env) def has_function(self, funcname, prefix, env, extra_args=None, dependencies=None): diff --git a/test cases/common/139 simd/simd_ssse3.c b/test cases/common/139 simd/simd_ssse3.c index 225fe253f..f5b66d13d 100644 --- a/test cases/common/139 simd/simd_ssse3.c +++ b/test cases/common/139 simd/simd_ssse3.c @@ -1,12 +1,17 @@ #include #include +#include #include #include #include int ssse3_available() { +#ifdef __APPLE__ + return 1; +#else return __builtin_cpu_supports("ssse3"); +#endif } void increment_ssse3(float arr[4]) { From fefbb296f7100a31044fe281735bb413807680f0 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 19 Feb 2017 12:34:23 +0200 Subject: [PATCH 15/31] Fix building with Clang on Raspbian. --- test cases/common/139 simd/meson.build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index c817c2d00..e62cc448f 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -6,6 +6,12 @@ cc = meson.get_compiler('c') cdata = configuration_data() +if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' + message('Adding -march=armv7 because assuming that this build happens on Raspbian.') + message('Its Clang seems to be misconfigured and does not support NEON by default.') + add_project_arguments('-march=armv7', language : 'c') +endif + simdlibs = simd.check('mysimds', mmx : 'simd_mmx.c', sse : 'simd_sse.c', From 8396c4f3e6bfea6bbef6539055090902c089d375 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 19 Feb 2017 16:34:17 +0200 Subject: [PATCH 16/31] Added VS support to simd detector. --- mesonbuild/compilers/c.py | 10 +++++++++- mesonbuild/compilers/compilers.py | 24 ++++++++++++++++++++++++ mesonbuild/compilers/cpp.py | 4 ++-- mesonbuild/environment.py | 3 ++- mesonbuild/modules/simd.py | 1 + test cases/common/139 simd/simd_avx.c | 10 +++++++++- test cases/common/139 simd/simd_avx2.c | 13 ++++++++++++- test cases/common/139 simd/simd_mmx.c | 25 ++++++++++++++++++++++--- test cases/common/139 simd/simd_sse.c | 7 +++++++ test cases/common/139 simd/simd_sse2.c | 12 ++++++++++-- test cases/common/139 simd/simd_sse3.c | 9 ++++++++- test cases/common/139 simd/simd_sse41.c | 15 +++++++++++++-- test cases/common/139 simd/simd_sse42.c | 14 ++++++++++++-- test cases/common/139 simd/simd_ssse3.c | 14 +++++++++++++- 14 files changed, 144 insertions(+), 17 deletions(-) diff --git a/mesonbuild/compilers/c.py b/mesonbuild/compilers/c.py index cf9d1ee8f..99c7cf476 100644 --- a/mesonbuild/compilers/c.py +++ b/mesonbuild/compilers/c.py @@ -810,7 +810,7 @@ class VisualStudioCCompiler(CCompiler): std_warn_args = ['/W3'] std_opt_args = ['/O2'] - def __init__(self, exelist, version, is_cross, exe_wrap): + def __init__(self, exelist, version, is_cross, exe_wrap, is_64): CCompiler.__init__(self, exelist, version, is_cross, exe_wrap) self.id = 'msvc' # /showIncludes is needed for build dependency tracking in Ninja @@ -820,6 +820,7 @@ class VisualStudioCCompiler(CCompiler): '2': ['/W3'], '3': ['/W4']} self.base_options = ['b_pch'] # FIXME add lto, pgo and the like + self.is_64 = True # Override CCompiler.get_always_args def get_always_args(self): @@ -1005,3 +1006,10 @@ class VisualStudioCCompiler(CCompiler): if not isinstance(args, list): args = [args] return ['/WHOLEARCHIVE:' + x for x in args] + + def get_instruction_set_args(self, instruction_set): + if self.is_64: + return vs64_instruction_set_args.get(instruction_set, None) + return vs32_instruction_set_args.get(instruction_set, None) + + diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 9829d20da..76e6f6094 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -240,6 +240,30 @@ gnulike_instruction_set_args = {'mmx': ['-mmmx'], 'neon': ['-mfpu=neon'], } +vs32_instruction_set_args = {'mmx': ['/arch:SSE'], # There does not seem to be a flag just for MMX + 'sse': ['/arch:SSE'], + 'sse2': ['/arch:SSE2'], + 'sse3': ['/arch:AVX'], # VS leaped from SSE2 directly to AVX. + 'sse41': ['/arch:AVX'], + 'sse42': ['/arch:AVX'], + 'avx': ['/arch:AVX'], + 'avx2': ['/arch:AVX2'], + 'neon': None, +} + +# The 64 bit compiler defaults to /arch:avx. +vs64_instruction_set_args = {'mmx': ['/arch:AVX'], + 'sse': ['/arch:AVX'], + 'sse2': ['/arch:AVX'], + 'sse3': ['/arch:AVX'], + 'ssse3': ['/arch:AVX'], + 'sse41': ['/arch:AVX'], + 'sse42': ['/arch:AVX'], + 'avx': ['/arch:AVX'], + 'avx2': ['/arch:AVX2'], + 'neon': None, +} + def sanitizer_compile_args(value): if value == 'none': diff --git a/mesonbuild/compilers/cpp.py b/mesonbuild/compilers/cpp.py index 01525b06b..a8fc8a353 100644 --- a/mesonbuild/compilers/cpp.py +++ b/mesonbuild/compilers/cpp.py @@ -173,10 +173,10 @@ class IntelCPPCompiler(IntelCompiler, CPPCompiler): class VisualStudioCPPCompiler(VisualStudioCCompiler, CPPCompiler): - def __init__(self, exelist, version, is_cross, exe_wrap): + def __init__(self, exelist, version, is_cross, exe_wrap, is_64): self.language = 'cpp' CPPCompiler.__init__(self, exelist, version, is_cross, exe_wrap) - VisualStudioCCompiler.__init__(self, exelist, version, is_cross, exe_wrap) + VisualStudioCCompiler.__init__(self, exelist, version, is_cross, exe_wrap, is_64) self.base_options = ['b_pch'] # FIXME add lto, pgo and the like def get_options(self): diff --git a/mesonbuild/environment.py b/mesonbuild/environment.py index b3d72e664..ed5a216ed 100644 --- a/mesonbuild/environment.py +++ b/mesonbuild/environment.py @@ -537,8 +537,9 @@ class Environment: # Visual Studio prints version number to stderr but # everything else to stdout. Why? Lord only knows. version = search_version(err) + is_64 = err.split()[0].endswith(' x64') cls = VisualStudioCCompiler if lang == 'c' else VisualStudioCPPCompiler - return cls(compiler, version, is_cross, exe_wrap) + return cls(compiler, version, is_cross, exe_wrap, is_64) if '(ICC)' in out: # TODO: add microsoft add check OSX inteltype = ICC_STANDARD diff --git a/mesonbuild/modules/simd.py b/mesonbuild/modules/simd.py index 3a9fe59cf..4a9bdd76f 100644 --- a/mesonbuild/modules/simd.py +++ b/mesonbuild/modules/simd.py @@ -57,6 +57,7 @@ class SimdModule(ExtensionModule): iset_fname = kwargs[iset] # Migth also be an array or Files. static_library will validate. args = compiler.get_instruction_set_args(iset) if args is None: + mlog.log('Compiler supports %s:' % iset, mlog.red('NO')) continue if len(args) > 0: if not compiler.has_multi_arguments(args, state.environment): diff --git a/test cases/common/139 simd/simd_avx.c b/test cases/common/139 simd/simd_avx.c index 26d76a008..771c9d9e7 100644 --- a/test cases/common/139 simd/simd_avx.c +++ b/test cases/common/139 simd/simd_avx.c @@ -1,13 +1,21 @@ #include #include +#include + +#ifdef _MSC_VER +#include +int avx_available() { + return 1; +} +#else #include #include -#include int avx_available() { return __builtin_cpu_supports("avx"); } +#endif void increment_avx(float arr[4]) { double darr[4]; diff --git a/test cases/common/139 simd/simd_avx2.c b/test cases/common/139 simd/simd_avx2.c index bca8c99a8..b912ee1aa 100644 --- a/test cases/common/139 simd/simd_avx2.c +++ b/test cases/common/139 simd/simd_avx2.c @@ -1,13 +1,24 @@ #include #include +#include + +/* + * FIXME add proper runtime detection for VS. + */ +#ifdef _MSC_VER +#include +int avx2_available() { + return 0; +} +#else #include #include -#include int avx2_available() { return __builtin_cpu_supports("avx2"); } +#endif void increment_avx2(float arr[4]) { double darr[4]; diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c index 351f36e63..dd78dd6e9 100644 --- a/test cases/common/139 simd/simd_mmx.c +++ b/test cases/common/139 simd/simd_mmx.c @@ -1,10 +1,27 @@ #include #include -#include -#include #include +#ifdef _MSC_VER +#include +int mmx_available() { + return 1; +} + +/* Contrary to MSDN documentation, MMX intrinsics + * just plain don't work. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} + +#else +#include +#include int mmx_available() { return __builtin_cpu_supports("mmx"); } @@ -19,7 +36,9 @@ void increment_mmx(float arr[4]) { int64_t unpacker = _m_to_int64(result); _mm_empty(); for(int i=0; i<4; i++) { - arr[i] = unpacker & ((1<<16)-1); + arr[i] = (float)(unpacker & ((1<<16)-1)); unpacker >>= 16; } } + +#endif diff --git a/test cases/common/139 simd/simd_sse.c b/test cases/common/139 simd/simd_sse.c index 11a74c917..bfd7276bb 100644 --- a/test cases/common/139 simd/simd_sse.c +++ b/test cases/common/139 simd/simd_sse.c @@ -1,6 +1,12 @@ #include #include +#ifdef _MSC_VER +#include +int sse_available() { + return 1; +} +#else #include #include #include @@ -8,6 +14,7 @@ int sse_available() { return __builtin_cpu_supports("sse"); } +#endif void increment_sse(float arr[4]) { __m128 val = _mm_load_ps(arr); diff --git a/test cases/common/139 simd/simd_sse2.c b/test cases/common/139 simd/simd_sse2.c index 52a7e4017..b2e4f11c2 100644 --- a/test cases/common/139 simd/simd_sse2.c +++ b/test cases/common/139 simd/simd_sse2.c @@ -1,19 +1,26 @@ #include #include - #include + +#ifdef _MSC_VER +int sse2_available() { + return 1; +} + +#else #include #include int sse2_available() { return __builtin_cpu_supports("sse2"); } +#endif void increment_sse2(float arr[4]) { double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd1(1.0); + __m128d one = _mm_set_pd(1.0, 1.0); __m128d result = _mm_add_pd(val1, one); _mm_store_pd(darr, result); result = _mm_add_pd(val2, one); @@ -23,3 +30,4 @@ void increment_sse2(float arr[4]) { arr[2] = (float)darr[3]; arr[3] = (float)darr[2]; } + diff --git a/test cases/common/139 simd/simd_sse3.c b/test cases/common/139 simd/simd_sse3.c index db0eef62d..90630b1c8 100644 --- a/test cases/common/139 simd/simd_sse3.c +++ b/test cases/common/139 simd/simd_sse3.c @@ -1,6 +1,12 @@ #include #include +#ifdef _MSC_VER +#include +int sse3_available() { + return 1; +} +#else #include #include #include @@ -8,12 +14,13 @@ int sse3_available() { return __builtin_cpu_supports("sse3"); } +#endif void increment_sse3(float arr[4]) { double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd1(1.0); + __m128d one = _mm_set_pd(1.0, 1.0); __m128d result = _mm_add_pd(val1, one); _mm_store_pd(darr, result); result = _mm_add_pd(val2, one); diff --git a/test cases/common/139 simd/simd_sse41.c b/test cases/common/139 simd/simd_sse41.c index 6087e4006..8555ddc1e 100644 --- a/test cases/common/139 simd/simd_sse41.c +++ b/test cases/common/139 simd/simd_sse41.c @@ -1,19 +1,30 @@ #include #include +#include + +#ifdef _MSC_VER +#include + +int sse41_available() { + return 1; +} + +#else #include #include -#include int sse41_available() { return __builtin_cpu_supports("sse4.1"); } +#endif + void increment_sse41(float arr[4]) { double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd1(1.0); + __m128d one = _mm_set_pd(1.0, 1.0); __m128d result = _mm_add_pd(val1, one); result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ _mm_store_pd(darr, result); diff --git a/test cases/common/139 simd/simd_sse42.c b/test cases/common/139 simd/simd_sse42.c index 229ef039e..aee1837a9 100644 --- a/test cases/common/139 simd/simd_sse42.c +++ b/test cases/common/139 simd/simd_sse42.c @@ -1,19 +1,29 @@ #include #include +#include + +#ifdef _MSC_VER +#include + +int sse42_available() { + return 1; +} + +#else #include #include -#include int sse42_available() { return __builtin_cpu_supports("sse4.2"); } +#endif void increment_sse42(float arr[4]) { double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd1(1.0); + __m128d one = _mm_set_pd(1.0, 1.0); __m128d result = _mm_add_pd(val1, one); _mm_store_pd(darr, result); result = _mm_add_pd(val2, one); diff --git a/test cases/common/139 simd/simd_ssse3.c b/test cases/common/139 simd/simd_ssse3.c index f5b66d13d..40443bebc 100644 --- a/test cases/common/139 simd/simd_ssse3.c +++ b/test cases/common/139 simd/simd_ssse3.c @@ -3,6 +3,16 @@ #include #include + +#ifdef _MSC_VER +#include + +int ssse3_available() { + return 1; +} + +#else + #include #include @@ -14,11 +24,13 @@ int ssse3_available() { #endif } +#endif + void increment_ssse3(float arr[4]) { double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); - __m128d one = _mm_set_pd1(1.0); + __m128d one = _mm_set_pd(1.0, 1.0); __m128d result = _mm_add_pd(val1, one); __m128i tmp1, tmp2; tmp1 = tmp2 = _mm_set1_epi16(0); From 181510bd6eb767361da5e02e41be08cc65b36b08 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 19 Feb 2017 20:06:49 +0200 Subject: [PATCH 17/31] Fix checks on MinGW and VS2010. --- mesonbuild/compilers/c.py | 7 ++++++- mesonbuild/compilers/compilers.py | 2 +- test cases/common/139 simd/simd_mmx.c | 17 ++++++++++++++--- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/mesonbuild/compilers/c.py b/mesonbuild/compilers/c.py index 99c7cf476..4aac54222 100644 --- a/mesonbuild/compilers/c.py +++ b/mesonbuild/compilers/c.py @@ -820,7 +820,7 @@ class VisualStudioCCompiler(CCompiler): '2': ['/W3'], '3': ['/W4']} self.base_options = ['b_pch'] # FIXME add lto, pgo and the like - self.is_64 = True + self.is_64 = is_64 # Override CCompiler.get_always_args def get_always_args(self): @@ -1010,6 +1010,11 @@ class VisualStudioCCompiler(CCompiler): def get_instruction_set_args(self, instruction_set): if self.is_64: return vs64_instruction_set_args.get(instruction_set, None) + if self.version.split('.')[0] == '16' and instruction_set == 'avx': + # VS documentation says that this exists and should work, but + # it does not. The headers do not contain AVX intrinsics + # and the can not be called. + return None return vs32_instruction_set_args.get(instruction_set, None) diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 76e6f6094..0be390847 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -262,7 +262,7 @@ vs64_instruction_set_args = {'mmx': ['/arch:AVX'], 'avx': ['/arch:AVX'], 'avx2': ['/arch:AVX2'], 'neon': None, -} + } def sanitizer_compile_args(value): diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c index dd78dd6e9..6a959db27 100644 --- a/test cases/common/139 simd/simd_mmx.c +++ b/test cases/common/139 simd/simd_mmx.c @@ -8,7 +8,6 @@ int mmx_available() { return 1; } - /* Contrary to MSDN documentation, MMX intrinsics * just plain don't work. */ @@ -18,7 +17,18 @@ void increment_mmx(float arr[4]) { arr[2]++; arr[3]++; } - +#elif defined(__MINGW32__) +int mmx_available() { + return 1; +} +/* MinGW does not seem to ship with MMX or it is broken. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} #else #include #include @@ -30,12 +40,13 @@ void increment_mmx(float arr[4]) { /* Super ugly but we know that values in arr are always small * enough to fit in int16; */ + int i; __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); __m64 incr = _mm_set1_pi16(1); __m64 result = _mm_add_pi16(packed, incr); int64_t unpacker = _m_to_int64(result); _mm_empty(); - for(int i=0; i<4; i++) { + for(i=0; i<4; i++) { arr[i] = (float)(unpacker & ((1<<16)-1)); unpacker >>= 16; } From 2181d8f2abc977fa12ed72d9bd8369db9370dae8 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 19 Feb 2017 20:40:17 +0200 Subject: [PATCH 18/31] A few fixes for OSX and VS2010. --- test cases/common/139 simd/meson.build | 4 ++++ test cases/common/139 simd/simd_ssse3.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index e62cc448f..26a291b77 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -12,6 +12,10 @@ if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_ add_project_arguments('-march=armv7', language : 'c') endif +if cc.get_id() == 'msvc' and version_compare(cc.version(), '<17') + error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') +endif + simdlibs = simd.check('mysimds', mmx : 'simd_mmx.c', sse : 'simd_sse.c', diff --git a/test cases/common/139 simd/simd_ssse3.c b/test cases/common/139 simd/simd_ssse3.c index 40443bebc..ab4dff4f8 100644 --- a/test cases/common/139 simd/simd_ssse3.c +++ b/test cases/common/139 simd/simd_ssse3.c @@ -19,6 +19,9 @@ int ssse3_available() { int ssse3_available() { #ifdef __APPLE__ return 1; +#elif defined(__clang__) + /* https://github.com/numpy/numpy/issues/8130 */ + return __builtin_cpu_supports("sse4.1"); #else return __builtin_cpu_supports("ssse3"); #endif From f6f5644212ecfd54238e817dc6771c7d57b38ba8 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 19 Feb 2017 21:32:51 +0200 Subject: [PATCH 19/31] Fix MMX on 32 bit x86. --- test cases/common/139 simd/simd_mmx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c index 6a959db27..17f42dde2 100644 --- a/test cases/common/139 simd/simd_mmx.c +++ b/test cases/common/139 simd/simd_mmx.c @@ -44,7 +44,11 @@ void increment_mmx(float arr[4]) { __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); __m64 incr = _mm_set1_pi16(1); __m64 result = _mm_add_pi16(packed, incr); - int64_t unpacker = _m_to_int64(result); + /* Should be + * int64_t unpacker = _m_to_int64(result); + * but it does not exist on 32 bit platforms for some reason. + */ + int64_t unpacker = (int64_t)(result); _mm_empty(); for(i=0; i<4; i++) { arr[i] = (float)(unpacker & ((1<<16)-1)); From ef9e03b84786c4b8be63537eb9a1f58e991b19f3 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 19 Feb 2017 21:45:54 +0200 Subject: [PATCH 20/31] Hardcode processor features on OSX because it ships a broken __builtin_cpu_supports. --- test cases/common/139 simd/meson.build | 2 +- test cases/common/139 simd/simd_avx.c | 6 +++++- test cases/common/139 simd/simd_avx2.c | 4 ++++ test cases/common/139 simd/simd_mmx.c | 6 +++++- test cases/common/139 simd/simd_sse.c | 5 +++++ test cases/common/139 simd/simd_sse2.c | 4 ++++ test cases/common/139 simd/simd_sse3.c | 5 +++++ test cases/common/139 simd/simd_sse41.c | 5 ++++- test cases/common/139 simd/simd_sse42.c | 7 +++++++ 9 files changed, 40 insertions(+), 4 deletions(-) diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 26a291b77..9eff0a5eb 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -12,7 +12,7 @@ if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_ add_project_arguments('-march=armv7', language : 'c') endif -if cc.get_id() == 'msvc' and version_compare(cc.version(), '<17') +if cc.get_id() == 'msvc' and cc.version().version_compare('<17') error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') endif diff --git a/test cases/common/139 simd/simd_avx.c b/test cases/common/139 simd/simd_avx.c index 771c9d9e7..aa259330c 100644 --- a/test cases/common/139 simd/simd_avx.c +++ b/test cases/common/139 simd/simd_avx.c @@ -8,14 +8,18 @@ int avx_available() { return 1; } #else - #include #include +#ifdef __APPLE__ +int avx_available() { return 1; } +#else + int avx_available() { return __builtin_cpu_supports("avx"); } #endif +#endif void increment_avx(float arr[4]) { double darr[4]; diff --git a/test cases/common/139 simd/simd_avx2.c b/test cases/common/139 simd/simd_avx2.c index b912ee1aa..15297eb2b 100644 --- a/test cases/common/139 simd/simd_avx2.c +++ b/test cases/common/139 simd/simd_avx2.c @@ -15,10 +15,14 @@ int avx2_available() { #include #include +#if defined(__APPLE__) +int avx2_available() { return 0; } +#else int avx2_available() { return __builtin_cpu_supports("avx2"); } #endif +#endif void increment_avx2(float arr[4]) { double darr[4]; diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/139 simd/simd_mmx.c index 17f42dde2..731abd14a 100644 --- a/test cases/common/139 simd/simd_mmx.c +++ b/test cases/common/139 simd/simd_mmx.c @@ -32,10 +32,14 @@ void increment_mmx(float arr[4]) { #else #include #include + +#if defined(__APPLE__) +int mmx_available() { return 1; } +#else int mmx_available() { return __builtin_cpu_supports("mmx"); } - +#endif void increment_mmx(float arr[4]) { /* Super ugly but we know that values in arr are always small * enough to fit in int16; diff --git a/test cases/common/139 simd/simd_sse.c b/test cases/common/139 simd/simd_sse.c index bfd7276bb..3c9fe622e 100644 --- a/test cases/common/139 simd/simd_sse.c +++ b/test cases/common/139 simd/simd_sse.c @@ -7,14 +7,19 @@ int sse_available() { return 1; } #else + #include #include #include +#if defined(__APPLE__) +int sse_available() { return 1; } +#else int sse_available() { return __builtin_cpu_supports("sse"); } #endif +#endif void increment_sse(float arr[4]) { __m128 val = _mm_load_ps(arr); diff --git a/test cases/common/139 simd/simd_sse2.c b/test cases/common/139 simd/simd_sse2.c index b2e4f11c2..02745337b 100644 --- a/test cases/common/139 simd/simd_sse2.c +++ b/test cases/common/139 simd/simd_sse2.c @@ -11,10 +11,14 @@ int sse2_available() { #include #include +#if defined(__APPLE__) +int sse2_available() { return 1; } +#else int sse2_available() { return __builtin_cpu_supports("sse2"); } #endif +#endif void increment_sse2(float arr[4]) { double darr[4]; diff --git a/test cases/common/139 simd/simd_sse3.c b/test cases/common/139 simd/simd_sse3.c index 90630b1c8..e97d10285 100644 --- a/test cases/common/139 simd/simd_sse3.c +++ b/test cases/common/139 simd/simd_sse3.c @@ -7,14 +7,19 @@ int sse3_available() { return 1; } #else + #include #include #include +#if defined(__APPLE__) +int sse3_available() { return 1; } +#else int sse3_available() { return __builtin_cpu_supports("sse3"); } #endif +#endif void increment_sse3(float arr[4]) { double darr[4]; diff --git a/test cases/common/139 simd/simd_sse41.c b/test cases/common/139 simd/simd_sse41.c index 8555ddc1e..0308c7e49 100644 --- a/test cases/common/139 simd/simd_sse41.c +++ b/test cases/common/139 simd/simd_sse41.c @@ -14,10 +14,13 @@ int sse41_available() { #include #include +#if defined(__APPLE__) +int sse41_available() { return 1; } +#else int sse41_available() { return __builtin_cpu_supports("sse4.1"); } - +#endif #endif void increment_sse41(float arr[4]) { diff --git a/test cases/common/139 simd/simd_sse42.c b/test cases/common/139 simd/simd_sse42.c index aee1837a9..137ffc441 100644 --- a/test cases/common/139 simd/simd_sse42.c +++ b/test cases/common/139 simd/simd_sse42.c @@ -14,11 +14,18 @@ int sse42_available() { #include #include +#ifdef __APPLE__ +int sse42_available() { + return 1; +} +#else int sse42_available() { return __builtin_cpu_supports("sse4.2"); } #endif +#endif + void increment_sse42(float arr[4]) { double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); From 5660c766ab276f91e9b8f79dbbea9cebbe17a395 Mon Sep 17 00:00:00 2001 From: Nirbheek Chauhan Date: Tue, 21 Feb 2017 03:56:56 +0530 Subject: [PATCH 21/31] unittests: Ensure that the compiler version is not blank --- run_unittests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/run_unittests.py b/run_unittests.py index 6a5030206..e286a66ed 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -948,6 +948,7 @@ class AllPlatformTests(BasePlatformTests): # Detect with evar and do sanity checks on that if evar in os.environ: ecc = getattr(env, 'detect_{}_compiler'.format(lang))(False) + self.assertTrue(ecc.version) elinker = env.detect_static_linker(ecc) # Pop it so we don't use it for the next detection evalue = os.environ.pop(evar) @@ -971,6 +972,7 @@ class AllPlatformTests(BasePlatformTests): self.assertEqual(ecc.get_exelist(), shlex.split(evalue)) # Do auto-detection of compiler based on platform, PATH, etc. cc = getattr(env, 'detect_{}_compiler'.format(lang))(False) + self.assertTrue(cc.version) linker = env.detect_static_linker(cc) # Check compiler type if isinstance(cc, gnu): @@ -1027,6 +1029,8 @@ class AllPlatformTests(BasePlatformTests): # Ensure that the exelist is correct self.assertEqual(wcc.get_exelist(), wrappercc) self.assertEqual(wlinker.get_exelist(), wrapperlinker) + # Ensure that the version detection worked correctly + self.assertEqual(cc.version, wcc.version) def test_always_prefer_c_compiler_for_asm(self): testdir = os.path.join(self.common_test_dir, '141 c cpp and asm') From 37057abfefcc77be5d5350928421ae24e8b621af Mon Sep 17 00:00:00 2001 From: Nirbheek Chauhan Date: Tue, 21 Feb 2017 04:48:07 +0530 Subject: [PATCH 22/31] unittests: Check value of cc.is_64 with MSVC --- run_unittests.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/run_unittests.py b/run_unittests.py index e286a66ed..a6448ba0b 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -1006,6 +1006,13 @@ class AllPlatformTests(BasePlatformTests): self.assertTrue(is_windows()) self.assertIsInstance(linker, lib) self.assertEqual(cc.id, 'msvc') + self.assertTrue(hasattr(cc, 'is_64')) + # If we're in the appveyor CI, we know what the compiler will be + if 'arch' in os.environ: + if os.environ['arch'] == 'x64': + self.assertTrue(cc.is_64) + else: + self.assertFalse(cc.is_64) # Set evar ourselves to a wrapper script that just calls the same # exelist + some argument. This is meant to test that setting # something like `ccache gcc -pipe` or `distcc ccache gcc` works. @@ -1031,6 +1038,8 @@ class AllPlatformTests(BasePlatformTests): self.assertEqual(wlinker.get_exelist(), wrapperlinker) # Ensure that the version detection worked correctly self.assertEqual(cc.version, wcc.version) + if hasattr(cc, 'is_64'): + self.assertEqual(cc.is_64, wcc.is_64) def test_always_prefer_c_compiler_for_asm(self): testdir = os.path.join(self.common_test_dir, '141 c cpp and asm') From afdaedea01e372ab783477e6f2ce0ca81797be07 Mon Sep 17 00:00:00 2001 From: Nirbheek Chauhan Date: Tue, 21 Feb 2017 04:48:56 +0530 Subject: [PATCH 23/31] msvc: Split stderr by line and raise exception if empty std.split() splits on whitespace, but we want the first line. --- mesonbuild/environment.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mesonbuild/environment.py b/mesonbuild/environment.py index ed5a216ed..29ff19e2d 100644 --- a/mesonbuild/environment.py +++ b/mesonbuild/environment.py @@ -537,7 +537,10 @@ class Environment: # Visual Studio prints version number to stderr but # everything else to stdout. Why? Lord only knows. version = search_version(err) - is_64 = err.split()[0].endswith(' x64') + if not err or not err.split('\n')[0]: + m = 'Failed to detect MSVC compiler arch: stderr was\n{!r}' + raise EnvironmentException(m.format(err)) + is_64 = err.split('\n')[0].endswith(' x64') cls = VisualStudioCCompiler if lang == 'c' else VisualStudioCPPCompiler return cls(compiler, version, is_cross, exe_wrap, is_64) if '(ICC)' in out: From a27b4c4640c3b5a16b7aed947fd6258b7f736791 Mon Sep 17 00:00:00 2001 From: Nirbheek Chauhan Date: Tue, 21 Feb 2017 21:11:26 +0530 Subject: [PATCH 24/31] unittests: Don't pass /nologo to `cl` during detection This causes it to not output the version information to stderr, which we need to extract the version and the architecture. Found by Jussi. --- run_unittests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_unittests.py b/run_unittests.py index a6448ba0b..cc034c32d 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -1017,7 +1017,7 @@ class AllPlatformTests(BasePlatformTests): # exelist + some argument. This is meant to test that setting # something like `ccache gcc -pipe` or `distcc ccache gcc` works. wrapper = os.path.join(testdir, 'compiler wrapper.py') - wrappercc = [sys.executable, wrapper] + cc.get_exelist() + cc.get_always_args() + wrappercc = [sys.executable, wrapper] + cc.get_exelist() + ['-DSOME_ARG'] wrappercc_s = '' for w in wrappercc: wrappercc_s += shlex.quote(w) + ' ' From fe86c9c39953b47946fbf242ad3ced3d75c4d2b7 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Tue, 21 Feb 2017 21:53:19 +0200 Subject: [PATCH 25/31] Do not use AVX on OSX since we can't detect it reliably at runtime and some CI machines do not seem to have it. --- test cases/common/139 simd/simd_avx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test cases/common/139 simd/simd_avx.c b/test cases/common/139 simd/simd_avx.c index aa259330c..989620ba3 100644 --- a/test cases/common/139 simd/simd_avx.c +++ b/test cases/common/139 simd/simd_avx.c @@ -12,7 +12,12 @@ int avx_available() { #include #ifdef __APPLE__ -int avx_available() { return 1; } +/* + * Apple ships a broken __builtin_cpu_supports and + * some machines in the CI farm seem to be too + * old to have AVX so just always return 0 here. + */ +int avx_available() { return 0; } #else int avx_available() { From 6119beed4db77529830e466f581591c86e32e0d0 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Thu, 23 Feb 2017 00:21:01 +0200 Subject: [PATCH 26/31] Create a new configuration data object and return it as the result. --- mesonbuild/modules/simd.py | 9 +++------ test cases/common/139 simd/meson.build | 9 +++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/mesonbuild/modules/simd.py b/mesonbuild/modules/simd.py index 4a9bdd76f..b666f4bbe 100644 --- a/mesonbuild/modules/simd.py +++ b/mesonbuild/modules/simd.py @@ -46,11 +46,8 @@ class SimdModule(ExtensionModule): compiler = kwargs['compiler'].compiler if not isinstance(compiler, compilers.Compiler): raise mesonlib.MesonException('Compiler argument must be a compiler object.') - if 'configuration' not in kwargs: - raise mesonlib.MesonException('Must specify configuration object.') - conf = kwargs['configuration'].held_object - if not isinstance(conf, build.ConfigurationData): - raise mesonlib.MesonException('Configuration must be a configuration object.') + cdata = interpreter.func_configuration_data(None, [], {}) + conf = cdata.held_object for iset in self.isets: if iset not in kwargs: continue @@ -69,7 +66,7 @@ class SimdModule(ExtensionModule): lib_kwargs = {'sources': iset_fname, compiler.get_language() + '_args': args} result.append(interpreter.func_static_lib(None, [libname], lib_kwargs)) - return result + return [result, cdata] def initialize(): return SimdModule() diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 9eff0a5eb..2b7d722a6 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -4,8 +4,6 @@ simd = import('simd') cc = meson.get_compiler('c') -cdata = configuration_data() - if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' message('Adding -march=armv7 because assuming that this build happens on Raspbian.') message('Its Clang seems to be misconfigured and does not support NEON by default.') @@ -16,7 +14,8 @@ if cc.get_id() == 'msvc' and cc.version().version_compare('<17') error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') endif -simdlibs = simd.check('mysimds', +# FIXME add [a, b] = function() +rval = simd.check('mysimds', mmx : 'simd_mmx.c', sse : 'simd_sse.c', sse2 : 'simd_sse2.c', @@ -27,9 +26,11 @@ simdlibs = simd.check('mysimds', avx : 'simd_avx.c', avx2 : 'simd_avx2.c', neon : 'simd_neon.c', - configuration : cdata, compiler : cc) +simdlibs = rval[0] +# FIXME add cdata1.merge_from(cdata2) +cdata = rval[1] configure_file(output : 'simdconfig.h', configuration : cdata) From f78cf53141a1da1a35fb02324a154f371c488141 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Sun, 26 Feb 2017 21:07:09 +0200 Subject: [PATCH 27/31] Created merge_from method for ConfigurationData. --- mesonbuild/interpreter.py | 11 +++++++++++ test cases/common/139 simd/meson.build | 5 +++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mesonbuild/interpreter.py b/mesonbuild/interpreter.py index 63cdf9ea0..43ddd72a1 100644 --- a/mesonbuild/interpreter.py +++ b/mesonbuild/interpreter.py @@ -161,6 +161,7 @@ class ConfigurationDataHolder(MutableInterpreterObject): 'set_quoted': self.set_quoted_method, 'has': self.has_method, 'get': self.get_method, + 'merge_from': self.merge_from_method, }) def is_used(self): @@ -221,6 +222,16 @@ class ConfigurationDataHolder(MutableInterpreterObject): def keys(self): return self.held_object.values.keys() + def merge_from_method(self, args, kwargs): + if len(args) != 1: + raise InterpreterException('Merge_from takes one positional argument.') + from_object = args[0] + if not isinstance(from_object, ConfigurationDataHolder): + raise InterpreterException('Merge_from argument must be a configuration data object.') + from_object = from_object.held_object + for k, v in from_object.values.items(): + self.held_object.values[k] = v + # Interpreter objects can not be pickled so we must have # these wrappers. diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build index 2b7d722a6..d84b72248 100644 --- a/test cases/common/139 simd/meson.build +++ b/test cases/common/139 simd/meson.build @@ -4,6 +4,8 @@ simd = import('simd') cc = meson.get_compiler('c') +cdata = configuration_data() + if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' message('Adding -march=armv7 because assuming that this build happens on Raspbian.') message('Its Clang seems to be misconfigured and does not support NEON by default.') @@ -29,8 +31,7 @@ rval = simd.check('mysimds', compiler : cc) simdlibs = rval[0] -# FIXME add cdata1.merge_from(cdata2) -cdata = rval[1] +cdata.merge_from(rval[1]) configure_file(output : 'simdconfig.h', configuration : cdata) From be92a6a3a60666f4dd00c96833cfdbf7706ffb05 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Mon, 29 May 2017 20:17:18 +0300 Subject: [PATCH 28/31] Renamed test dir. --- test cases/common/{139 simd => 153 simd}/fallback.c | 0 test cases/common/{139 simd => 153 simd}/meson.build | 0 test cases/common/{139 simd => 153 simd}/simd_avx.c | 0 test cases/common/{139 simd => 153 simd}/simd_avx2.c | 0 test cases/common/{139 simd => 153 simd}/simd_mmx.c | 0 test cases/common/{139 simd => 153 simd}/simd_neon.c | 0 test cases/common/{139 simd => 153 simd}/simd_sse.c | 0 test cases/common/{139 simd => 153 simd}/simd_sse2.c | 0 test cases/common/{139 simd => 153 simd}/simd_sse3.c | 0 test cases/common/{139 simd => 153 simd}/simd_sse41.c | 0 test cases/common/{139 simd => 153 simd}/simd_sse42.c | 0 test cases/common/{139 simd => 153 simd}/simd_ssse3.c | 0 test cases/common/{139 simd => 153 simd}/simdchecker.c | 0 test cases/common/{139 simd => 153 simd}/simdfuncs.h | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename test cases/common/{139 simd => 153 simd}/fallback.c (100%) rename test cases/common/{139 simd => 153 simd}/meson.build (100%) rename test cases/common/{139 simd => 153 simd}/simd_avx.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_avx2.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_mmx.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_neon.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_sse.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_sse2.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_sse3.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_sse41.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_sse42.c (100%) rename test cases/common/{139 simd => 153 simd}/simd_ssse3.c (100%) rename test cases/common/{139 simd => 153 simd}/simdchecker.c (100%) rename test cases/common/{139 simd => 153 simd}/simdfuncs.h (100%) diff --git a/test cases/common/139 simd/fallback.c b/test cases/common/153 simd/fallback.c similarity index 100% rename from test cases/common/139 simd/fallback.c rename to test cases/common/153 simd/fallback.c diff --git a/test cases/common/139 simd/meson.build b/test cases/common/153 simd/meson.build similarity index 100% rename from test cases/common/139 simd/meson.build rename to test cases/common/153 simd/meson.build diff --git a/test cases/common/139 simd/simd_avx.c b/test cases/common/153 simd/simd_avx.c similarity index 100% rename from test cases/common/139 simd/simd_avx.c rename to test cases/common/153 simd/simd_avx.c diff --git a/test cases/common/139 simd/simd_avx2.c b/test cases/common/153 simd/simd_avx2.c similarity index 100% rename from test cases/common/139 simd/simd_avx2.c rename to test cases/common/153 simd/simd_avx2.c diff --git a/test cases/common/139 simd/simd_mmx.c b/test cases/common/153 simd/simd_mmx.c similarity index 100% rename from test cases/common/139 simd/simd_mmx.c rename to test cases/common/153 simd/simd_mmx.c diff --git a/test cases/common/139 simd/simd_neon.c b/test cases/common/153 simd/simd_neon.c similarity index 100% rename from test cases/common/139 simd/simd_neon.c rename to test cases/common/153 simd/simd_neon.c diff --git a/test cases/common/139 simd/simd_sse.c b/test cases/common/153 simd/simd_sse.c similarity index 100% rename from test cases/common/139 simd/simd_sse.c rename to test cases/common/153 simd/simd_sse.c diff --git a/test cases/common/139 simd/simd_sse2.c b/test cases/common/153 simd/simd_sse2.c similarity index 100% rename from test cases/common/139 simd/simd_sse2.c rename to test cases/common/153 simd/simd_sse2.c diff --git a/test cases/common/139 simd/simd_sse3.c b/test cases/common/153 simd/simd_sse3.c similarity index 100% rename from test cases/common/139 simd/simd_sse3.c rename to test cases/common/153 simd/simd_sse3.c diff --git a/test cases/common/139 simd/simd_sse41.c b/test cases/common/153 simd/simd_sse41.c similarity index 100% rename from test cases/common/139 simd/simd_sse41.c rename to test cases/common/153 simd/simd_sse41.c diff --git a/test cases/common/139 simd/simd_sse42.c b/test cases/common/153 simd/simd_sse42.c similarity index 100% rename from test cases/common/139 simd/simd_sse42.c rename to test cases/common/153 simd/simd_sse42.c diff --git a/test cases/common/139 simd/simd_ssse3.c b/test cases/common/153 simd/simd_ssse3.c similarity index 100% rename from test cases/common/139 simd/simd_ssse3.c rename to test cases/common/153 simd/simd_ssse3.c diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/153 simd/simdchecker.c similarity index 100% rename from test cases/common/139 simd/simdchecker.c rename to test cases/common/153 simd/simdchecker.c diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/153 simd/simdfuncs.h similarity index 100% rename from test cases/common/139 simd/simdfuncs.h rename to test cases/common/153 simd/simdfuncs.h From ce77fb89b991ef0537d72538ae0933b13fe09824 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Mon, 17 Jul 2017 19:40:21 +0300 Subject: [PATCH 29/31] Renamed test case number to avoid dupes. --- mesonbuild/compilers/c.py | 2 ++ mesonbuild/modules/simd.py | 2 +- test cases/common/{153 simd => 155 simd}/fallback.c | 0 test cases/common/{153 simd => 155 simd}/meson.build | 0 test cases/common/{153 simd => 155 simd}/simd_avx.c | 0 test cases/common/{153 simd => 155 simd}/simd_avx2.c | 0 test cases/common/{153 simd => 155 simd}/simd_mmx.c | 0 test cases/common/{153 simd => 155 simd}/simd_neon.c | 0 test cases/common/{153 simd => 155 simd}/simd_sse.c | 0 test cases/common/{153 simd => 155 simd}/simd_sse2.c | 0 test cases/common/{153 simd => 155 simd}/simd_sse3.c | 0 test cases/common/{153 simd => 155 simd}/simd_sse41.c | 0 test cases/common/{153 simd => 155 simd}/simd_sse42.c | 0 test cases/common/{153 simd => 155 simd}/simd_ssse3.c | 0 test cases/common/{153 simd => 155 simd}/simdchecker.c | 0 test cases/common/{153 simd => 155 simd}/simdfuncs.h | 0 16 files changed, 3 insertions(+), 1 deletion(-) rename test cases/common/{153 simd => 155 simd}/fallback.c (100%) rename test cases/common/{153 simd => 155 simd}/meson.build (100%) rename test cases/common/{153 simd => 155 simd}/simd_avx.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_avx2.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_mmx.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_neon.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_sse.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_sse2.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_sse3.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_sse41.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_sse42.c (100%) rename test cases/common/{153 simd => 155 simd}/simd_ssse3.c (100%) rename test cases/common/{153 simd => 155 simd}/simdchecker.c (100%) rename test cases/common/{153 simd => 155 simd}/simdfuncs.h (100%) diff --git a/mesonbuild/compilers/c.py b/mesonbuild/compilers/c.py index 4aac54222..593366acd 100644 --- a/mesonbuild/compilers/c.py +++ b/mesonbuild/compilers/c.py @@ -25,6 +25,8 @@ from .compilers import ( msvc_buildtype_args, msvc_buildtype_linker_args, msvc_winlibs, + vs32_instruction_set_args, + vs64_instruction_set_args, ClangCompiler, Compiler, CompilerArgs, diff --git a/mesonbuild/modules/simd.py b/mesonbuild/modules/simd.py index b666f4bbe..12d9839a6 100644 --- a/mesonbuild/modules/simd.py +++ b/mesonbuild/modules/simd.py @@ -44,7 +44,7 @@ class SimdModule(ExtensionModule): if 'compiler' not in kwargs: raise mesonlib.MesonException('Must specify compiler keyword') compiler = kwargs['compiler'].compiler - if not isinstance(compiler, compilers.Compiler): + if not isinstance(compiler, compilers.compilers.Compiler): raise mesonlib.MesonException('Compiler argument must be a compiler object.') cdata = interpreter.func_configuration_data(None, [], {}) conf = cdata.held_object diff --git a/test cases/common/153 simd/fallback.c b/test cases/common/155 simd/fallback.c similarity index 100% rename from test cases/common/153 simd/fallback.c rename to test cases/common/155 simd/fallback.c diff --git a/test cases/common/153 simd/meson.build b/test cases/common/155 simd/meson.build similarity index 100% rename from test cases/common/153 simd/meson.build rename to test cases/common/155 simd/meson.build diff --git a/test cases/common/153 simd/simd_avx.c b/test cases/common/155 simd/simd_avx.c similarity index 100% rename from test cases/common/153 simd/simd_avx.c rename to test cases/common/155 simd/simd_avx.c diff --git a/test cases/common/153 simd/simd_avx2.c b/test cases/common/155 simd/simd_avx2.c similarity index 100% rename from test cases/common/153 simd/simd_avx2.c rename to test cases/common/155 simd/simd_avx2.c diff --git a/test cases/common/153 simd/simd_mmx.c b/test cases/common/155 simd/simd_mmx.c similarity index 100% rename from test cases/common/153 simd/simd_mmx.c rename to test cases/common/155 simd/simd_mmx.c diff --git a/test cases/common/153 simd/simd_neon.c b/test cases/common/155 simd/simd_neon.c similarity index 100% rename from test cases/common/153 simd/simd_neon.c rename to test cases/common/155 simd/simd_neon.c diff --git a/test cases/common/153 simd/simd_sse.c b/test cases/common/155 simd/simd_sse.c similarity index 100% rename from test cases/common/153 simd/simd_sse.c rename to test cases/common/155 simd/simd_sse.c diff --git a/test cases/common/153 simd/simd_sse2.c b/test cases/common/155 simd/simd_sse2.c similarity index 100% rename from test cases/common/153 simd/simd_sse2.c rename to test cases/common/155 simd/simd_sse2.c diff --git a/test cases/common/153 simd/simd_sse3.c b/test cases/common/155 simd/simd_sse3.c similarity index 100% rename from test cases/common/153 simd/simd_sse3.c rename to test cases/common/155 simd/simd_sse3.c diff --git a/test cases/common/153 simd/simd_sse41.c b/test cases/common/155 simd/simd_sse41.c similarity index 100% rename from test cases/common/153 simd/simd_sse41.c rename to test cases/common/155 simd/simd_sse41.c diff --git a/test cases/common/153 simd/simd_sse42.c b/test cases/common/155 simd/simd_sse42.c similarity index 100% rename from test cases/common/153 simd/simd_sse42.c rename to test cases/common/155 simd/simd_sse42.c diff --git a/test cases/common/153 simd/simd_ssse3.c b/test cases/common/155 simd/simd_ssse3.c similarity index 100% rename from test cases/common/153 simd/simd_ssse3.c rename to test cases/common/155 simd/simd_ssse3.c diff --git a/test cases/common/153 simd/simdchecker.c b/test cases/common/155 simd/simdchecker.c similarity index 100% rename from test cases/common/153 simd/simdchecker.c rename to test cases/common/155 simd/simdchecker.c diff --git a/test cases/common/153 simd/simdfuncs.h b/test cases/common/155 simd/simdfuncs.h similarity index 100% rename from test cases/common/153 simd/simdfuncs.h rename to test cases/common/155 simd/simdfuncs.h From fc23d9d0f207a5e7d68128db9741db1f7c4ba190 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Tue, 18 Jul 2017 21:53:24 +0300 Subject: [PATCH 30/31] Turned SIMD into an unstable module. --- mesonbuild/interpreter.py | 4 ++++ mesonbuild/modules/{simd.py => unstable_simd.py} | 0 test cases/common/155 simd/meson.build | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) rename mesonbuild/modules/{simd.py => unstable_simd.py} (100%) diff --git a/mesonbuild/interpreter.py b/mesonbuild/interpreter.py index 43ddd72a1..359dd17f5 100644 --- a/mesonbuild/interpreter.py +++ b/mesonbuild/interpreter.py @@ -1490,6 +1490,10 @@ class Interpreter(InterpreterBase): if len(args) != 1: raise InvalidCode('Import takes one argument.') modname = args[0] + if modname.startswith('unstable-'): + plainname = modname.split('-', 1)[1] + mlog.warning('Module %s has no backwards or forwards compatibility and might not exist in future releases.' % modname) + modname = 'unstable_' + plainname if modname not in self.environment.coredata.modules: try: module = importlib.import_module('mesonbuild.modules.' + modname) diff --git a/mesonbuild/modules/simd.py b/mesonbuild/modules/unstable_simd.py similarity index 100% rename from mesonbuild/modules/simd.py rename to mesonbuild/modules/unstable_simd.py diff --git a/test cases/common/155 simd/meson.build b/test cases/common/155 simd/meson.build index d84b72248..9da165185 100644 --- a/test cases/common/155 simd/meson.build +++ b/test cases/common/155 simd/meson.build @@ -1,6 +1,6 @@ project('simd', 'c') -simd = import('simd') +simd = import('unstable-simd') cc = meson.get_compiler('c') From c8981ff111ccb2419c8689dadc567760e0a20750 Mon Sep 17 00:00:00 2001 From: Jussi Pakkanen Date: Tue, 18 Jul 2017 22:22:01 +0300 Subject: [PATCH 31/31] Added documentation for SIMD module. --- docs/markdown/Module-reference.md | 12 +++- docs/markdown/Release-notes-for-0.42.0.md | 7 +++ docs/markdown/Simd-module.md | 70 +++++++++++++++++++++++ docs/sitemap.txt | 3 +- mesonbuild/modules/unstable_simd.py | 2 +- 5 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 docs/markdown/Simd-module.md diff --git a/docs/markdown/Module-reference.md b/docs/markdown/Module-reference.md index 866c141c5..80e3b8f3a 100644 --- a/docs/markdown/Module-reference.md +++ b/docs/markdown/Module-reference.md @@ -1,4 +1,6 @@ -Meson has a selection of modules to make common requirements easy to use. Modules can be thought of like the standard library of a programming language. Currently Meson provides the following modules. +Meson has a selection of modules to make common requirements easy to use. +Modules can be thought of like the standard library of a programming language. +Currently Meson provides the following modules. * [Gnome](Gnome-module.md) * [i18n](i18n-module.md) @@ -8,3 +10,11 @@ Meson has a selection of modules to make common requirements easy to use. Module * [Python3](Python-3-module.md) * [RPM](RPM-module.md) * [Windows](Windows-module.md) + +In addition there are unstable modules. These are meant for testing new +functionality but note that they do *not* provide a stable API. It can +change in arbitrary ways between releases. The modules might also be removed +without warning in future releases. + + * [SIMD](Simd-module.md) + \ No newline at end of file diff --git a/docs/markdown/Release-notes-for-0.42.0.md b/docs/markdown/Release-notes-for-0.42.0.md index a19db4981..3374d3b01 100644 --- a/docs/markdown/Release-notes-for-0.42.0.md +++ b/docs/markdown/Release-notes-for-0.42.0.md @@ -58,3 +58,10 @@ Rust's [linkage reference][rust-linkage]. Both the address- and undefined behavior sanitizers can now be used simultaneously by passing `-Db_sanitize=address,undefined` to Meson. + +## Unstable SIMD module + +A new experimental module to compile code with many different SIMD +instruction sets and selecting the best one at runtime. This module +is unstable, meaning its API is subject to change in later releases. +It might also be removed altogether. diff --git a/docs/markdown/Simd-module.md b/docs/markdown/Simd-module.md new file mode 100644 index 000000000..0fd1dda70 --- /dev/null +++ b/docs/markdown/Simd-module.md @@ -0,0 +1,70 @@ +# Unstable SIMD module + +This module provides helper functionality to build code with SIMD instructions. +Available since 0.42.0. + +**Note**: this module is unstable. It is only provided as a technology preview. +Its API may change in arbitrary ways between releases or it might be removed +from Meson altogether. + +## Usage + +This module is designed for the use case where you have an algorithm with one +or more SIMD implementation and you choose which one to use at runtime. + +The module provides one method, `check`, which is used like this: + + rval = simd.check('mysimds', + mmx : 'simd_mmx.c', + sse : 'simd_sse.c', + sse2 : 'simd_sse2.c', + sse3 : 'simd_sse3.c', + ssse3 : 'simd_ssse3.c', + sse41 : 'simd_sse41.c', + sse42 : 'simd_sse42.c', + avx : 'simd_avx.c', + avx2 : 'simd_avx2.c', + neon : 'simd_neon.c', + compiler : cc) + +Here the individual files contain the accelerated versions of the functions +in question. The `compiler` keyword argument takes the compiler you are +going to use to compile them. The function returns an array with two values. +The first value is a bunch of libraries that contain the compiled code. Any +SIMD code that the compiler can't compile (for example, Neon instructions on +an x86 machine) are ignored. You should pass this value to the desired target +using `link_with`. The second value is a `configuration_data` object that +contains true for all the values that were supported. For example if the +compiler did support sse2 instructions, then the object would have `HAVE_SSE2` +set to 1. + +Generating code to detect the proper instruction set at runtime is +straightforward. First you create a header with the configuration object and +then a chooser function that looks like this: + + void (*fptr)(type_of_function_here) = NULL; + + #if HAVE_NEON + if(fptr == NULL && neon_available()) { + fptr = neon_accelerated_function; + } + #endif + #if HAVE_AVX2 + if(fptr == NULL && avx2_available()) { + fptr = avx_accelerated_function; + } + #endif + + ... + + if(fptr == NULL) { + fptr = default_function; + } + +Each source file provides two functions, the `xxx_available` function to query +whether the CPU currently in use supports the instruction set and +`xxx_accelerated_function` that is the corresponding accelerated +implementation. + +At the end of this function the function pointer points to the fastest +available implementation and can be invoked to do the computation. diff --git a/docs/sitemap.txt b/docs/sitemap.txt index 9831b93a3..c4df54bc9 100644 --- a/docs/sitemap.txt +++ b/docs/sitemap.txt @@ -27,14 +27,15 @@ index.md Build-options.md Subprojects.md Modules.md + Gnome-module.md i18n-module.md Pkgconfig-module.md Python-3-module.md Qt4-module.md Qt5-module.md RPM-module.md + Simd-module.md Windows-module.md - Gnome-module.md Java.md Vala.md IDE-integration.md diff --git a/mesonbuild/modules/unstable_simd.py b/mesonbuild/modules/unstable_simd.py index 12d9839a6..4aebc02f6 100644 --- a/mesonbuild/modules/unstable_simd.py +++ b/mesonbuild/modules/unstable_simd.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .. import mesonlib, compilers, build, mlog +from .. import mesonlib, compilers, mlog from . import ExtensionModule