#include #include #include /* * FIXME add proper runtime detection for VS. */ #ifdef _MSC_VER #include int avx2_available() { return 0; } #else #include #include #if defined(__APPLE__) int avx2_available() { return 0; } #else int avx2_available() { return __builtin_cpu_supports("avx2"); } #endif #endif void increment_avx2(float arr[4]) { double darr[4]; darr[0] = arr[0]; darr[1] = arr[1]; darr[2] = arr[2]; darr[3] = arr[3]; __m256d val = _mm256_loadu_pd(darr); __m256d one = _mm256_set1_pd(1.0); __m256d result = _mm256_add_pd(val, one); _mm256_storeu_pd(darr, result); one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ arr[0] = (float)darr[0]; arr[1] = (float)darr[1]; arr[2] = (float)darr[2]; arr[3] = (float)darr[3]; }