commit
e89b6cdd10
25 changed files with 879 additions and 8 deletions
@ -0,0 +1,72 @@ |
||||
# Copyright 2017 The Meson development team |
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); |
||||
# you may not use this file except in compliance with the License. |
||||
# You may obtain a copy of the License at |
||||
|
||||
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
# Unless required by applicable law or agreed to in writing, software |
||||
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
# See the License for the specific language governing permissions and |
||||
# limitations under the License. |
||||
|
||||
from .. import mesonlib, compilers, mlog |
||||
|
||||
from . import ExtensionModule |
||||
|
||||
class SimdModule(ExtensionModule): |
||||
|
||||
def __init__(self): |
||||
super().__init__() |
||||
self.snippets.add('check') |
||||
# FIXME add Altivec and AVX512. |
||||
self.isets = ('mmx', |
||||
'sse', |
||||
'sse2', |
||||
'sse3', |
||||
'ssse3', |
||||
'sse41', |
||||
'sse42', |
||||
'avx', |
||||
'avx2', |
||||
'neon', |
||||
) |
||||
|
||||
def check(self, interpreter, state, args, kwargs): |
||||
result = [] |
||||
if len(args) != 1: |
||||
raise mesonlib.MesonException('Check requires one argument, a name prefix for checks.') |
||||
prefix = args[0] |
||||
if not isinstance(prefix, str): |
||||
raise mesonlib.MesonException('Argument must be a string.') |
||||
if 'compiler' not in kwargs: |
||||
raise mesonlib.MesonException('Must specify compiler keyword') |
||||
compiler = kwargs['compiler'].compiler |
||||
if not isinstance(compiler, compilers.compilers.Compiler): |
||||
raise mesonlib.MesonException('Compiler argument must be a compiler object.') |
||||
cdata = interpreter.func_configuration_data(None, [], {}) |
||||
conf = cdata.held_object |
||||
for iset in self.isets: |
||||
if iset not in kwargs: |
||||
continue |
||||
iset_fname = kwargs[iset] # Migth also be an array or Files. static_library will validate. |
||||
args = compiler.get_instruction_set_args(iset) |
||||
if args is None: |
||||
mlog.log('Compiler supports %s:' % iset, mlog.red('NO')) |
||||
continue |
||||
if len(args) > 0: |
||||
if not compiler.has_multi_arguments(args, state.environment): |
||||
mlog.log('Compiler supports %s:' % iset, mlog.red('NO')) |
||||
continue |
||||
mlog.log('Compiler supports %s:' % iset, mlog.green('YES')) |
||||
conf.values['HAVE_' + iset.upper()] = ('1', 'Compiler supports %s.' % iset) |
||||
libname = prefix + '_' + iset |
||||
lib_kwargs = {'sources': iset_fname, |
||||
compiler.get_language() + '_args': args} |
||||
result.append(interpreter.func_static_lib(None, [libname], lib_kwargs)) |
||||
return [result, cdata] |
||||
|
||||
def initialize(): |
||||
return SimdModule() |
@ -0,0 +1,8 @@ |
||||
#include<simdfuncs.h> |
||||
|
||||
void increment_fallback(float arr[4]) { |
||||
int i; |
||||
for(i=0; i<4; i++) { |
||||
arr[i]++; |
||||
} |
||||
} |
@ -0,0 +1,43 @@ |
||||
project('simd', 'c') |
||||
|
||||
simd = import('unstable-simd') |
||||
|
||||
cc = meson.get_compiler('c') |
||||
|
||||
cdata = configuration_data() |
||||
|
||||
if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' |
||||
message('Adding -march=armv7 because assuming that this build happens on Raspbian.') |
||||
message('Its Clang seems to be misconfigured and does not support NEON by default.') |
||||
add_project_arguments('-march=armv7', language : 'c') |
||||
endif |
||||
|
||||
if cc.get_id() == 'msvc' and cc.version().version_compare('<17') |
||||
error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') |
||||
endif |
||||
|
||||
# FIXME add [a, b] = function() |
||||
rval = simd.check('mysimds', |
||||
mmx : 'simd_mmx.c', |
||||
sse : 'simd_sse.c', |
||||
sse2 : 'simd_sse2.c', |
||||
sse3 : 'simd_sse3.c', |
||||
ssse3 : 'simd_ssse3.c', |
||||
sse41 : 'simd_sse41.c', |
||||
sse42 : 'simd_sse42.c', |
||||
avx : 'simd_avx.c', |
||||
avx2 : 'simd_avx2.c', |
||||
neon : 'simd_neon.c', |
||||
compiler : cc) |
||||
|
||||
simdlibs = rval[0] |
||||
cdata.merge_from(rval[1]) |
||||
|
||||
configure_file(output : 'simdconfig.h', |
||||
configuration : cdata) |
||||
|
||||
p = executable('simdtest', 'simdchecker.c', 'fallback.c', |
||||
link_with : simdlibs) |
||||
|
||||
test('simdtest', p) |
||||
|
@ -0,0 +1,43 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
#include<stdint.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
int avx_available() { |
||||
return 1; |
||||
} |
||||
#else |
||||
#include<immintrin.h> |
||||
#include<cpuid.h> |
||||
|
||||
#ifdef __APPLE__ |
||||
/*
|
||||
* Apple ships a broken __builtin_cpu_supports and |
||||
* some machines in the CI farm seem to be too |
||||
* old to have AVX so just always return 0 here. |
||||
*/ |
||||
int avx_available() { return 0; } |
||||
#else |
||||
|
||||
int avx_available() { |
||||
return __builtin_cpu_supports("avx"); |
||||
} |
||||
#endif |
||||
#endif |
||||
|
||||
void increment_avx(float arr[4]) { |
||||
double darr[4]; |
||||
darr[0] = arr[0]; |
||||
darr[1] = arr[1]; |
||||
darr[2] = arr[2]; |
||||
darr[3] = arr[3]; |
||||
__m256d val = _mm256_loadu_pd(darr); |
||||
__m256d one = _mm256_set1_pd(1.0); |
||||
__m256d result = _mm256_add_pd(val, one); |
||||
_mm256_storeu_pd(darr, result); |
||||
arr[0] = (float)darr[0]; |
||||
arr[1] = (float)darr[1]; |
||||
arr[2] = (float)darr[2]; |
||||
arr[3] = (float)darr[3]; |
||||
} |
@ -0,0 +1,42 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
#include<stdint.h> |
||||
|
||||
/*
|
||||
* FIXME add proper runtime detection for VS. |
||||
*/ |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
int avx2_available() { |
||||
return 0; |
||||
} |
||||
#else |
||||
#include<immintrin.h> |
||||
#include<cpuid.h> |
||||
|
||||
#if defined(__APPLE__) |
||||
int avx2_available() { return 0; } |
||||
#else |
||||
int avx2_available() { |
||||
return __builtin_cpu_supports("avx2"); |
||||
} |
||||
#endif |
||||
#endif |
||||
|
||||
void increment_avx2(float arr[4]) { |
||||
double darr[4]; |
||||
darr[0] = arr[0]; |
||||
darr[1] = arr[1]; |
||||
darr[2] = arr[2]; |
||||
darr[3] = arr[3]; |
||||
__m256d val = _mm256_loadu_pd(darr); |
||||
__m256d one = _mm256_set1_pd(1.0); |
||||
__m256d result = _mm256_add_pd(val, one); |
||||
_mm256_storeu_pd(darr, result); |
||||
one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ |
||||
arr[0] = (float)darr[0]; |
||||
arr[1] = (float)darr[1]; |
||||
arr[2] = (float)darr[2]; |
||||
arr[3] = (float)darr[3]; |
||||
} |
@ -0,0 +1,63 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
|
||||
#include<stdint.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
int mmx_available() { |
||||
return 1; |
||||
} |
||||
/* Contrary to MSDN documentation, MMX intrinsics
|
||||
* just plain don't work. |
||||
*/ |
||||
void increment_mmx(float arr[4]) { |
||||
arr[0]++; |
||||
arr[1]++; |
||||
arr[2]++; |
||||
arr[3]++; |
||||
} |
||||
#elif defined(__MINGW32__) |
||||
int mmx_available() { |
||||
return 1; |
||||
} |
||||
/* MinGW does not seem to ship with MMX or it is broken.
|
||||
*/ |
||||
void increment_mmx(float arr[4]) { |
||||
arr[0]++; |
||||
arr[1]++; |
||||
arr[2]++; |
||||
arr[3]++; |
||||
} |
||||
#else |
||||
#include<mmintrin.h> |
||||
#include<cpuid.h> |
||||
|
||||
#if defined(__APPLE__) |
||||
int mmx_available() { return 1; } |
||||
#else |
||||
int mmx_available() { |
||||
return __builtin_cpu_supports("mmx"); |
||||
} |
||||
#endif |
||||
void increment_mmx(float arr[4]) { |
||||
/* Super ugly but we know that values in arr are always small
|
||||
* enough to fit in int16; |
||||
*/ |
||||
int i; |
||||
__m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); |
||||
__m64 incr = _mm_set1_pi16(1); |
||||
__m64 result = _mm_add_pi16(packed, incr); |
||||
/* Should be
|
||||
* int64_t unpacker = _m_to_int64(result); |
||||
* but it does not exist on 32 bit platforms for some reason. |
||||
*/ |
||||
int64_t unpacker = (int64_t)(result); |
||||
_mm_empty(); |
||||
for(i=0; i<4; i++) { |
||||
arr[i] = (float)(unpacker & ((1<<16)-1)); |
||||
unpacker >>= 16; |
||||
} |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,20 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
|
||||
#include<arm_neon.h> |
||||
#include<stdint.h> |
||||
|
||||
int neon_available() { |
||||
return 1; /* Incorrect, but I don't know how to check this properly. */ |
||||
} |
||||
|
||||
void increment_neon(float arr[4]) { |
||||
float32x2_t a1, a2, one; |
||||
a1 = vld1_f32(arr); |
||||
a2 = vld1_f32(&arr[2]); |
||||
one = vdup_n_f32(1.0); |
||||
a1 = vadd_f32(a1, one); |
||||
a2 = vadd_f32(a2, one); |
||||
vst1_f32(arr, a1); |
||||
vst1_f32(&arr[2], a2); |
||||
} |
@ -0,0 +1,29 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
int sse_available() { |
||||
return 1; |
||||
} |
||||
#else |
||||
|
||||
#include<xmmintrin.h> |
||||
#include<cpuid.h> |
||||
#include<stdint.h> |
||||
|
||||
#if defined(__APPLE__) |
||||
int sse_available() { return 1; } |
||||
#else |
||||
int sse_available() { |
||||
return __builtin_cpu_supports("sse"); |
||||
} |
||||
#endif |
||||
#endif |
||||
|
||||
void increment_sse(float arr[4]) { |
||||
__m128 val = _mm_load_ps(arr); |
||||
__m128 one = _mm_set_ps1(1.0); |
||||
__m128 result = _mm_add_ps(val, one); |
||||
_mm_storeu_ps(arr, result); |
||||
} |
@ -0,0 +1,37 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
#include<emmintrin.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
int sse2_available() { |
||||
return 1; |
||||
} |
||||
|
||||
#else |
||||
#include<cpuid.h> |
||||
#include<stdint.h> |
||||
|
||||
#if defined(__APPLE__) |
||||
int sse2_available() { return 1; } |
||||
#else |
||||
int sse2_available() { |
||||
return __builtin_cpu_supports("sse2"); |
||||
} |
||||
#endif |
||||
#endif |
||||
|
||||
void increment_sse2(float arr[4]) { |
||||
double darr[4]; |
||||
__m128d val1 = _mm_set_pd(arr[0], arr[1]); |
||||
__m128d val2 = _mm_set_pd(arr[2], arr[3]); |
||||
__m128d one = _mm_set_pd(1.0, 1.0); |
||||
__m128d result = _mm_add_pd(val1, one); |
||||
_mm_store_pd(darr, result); |
||||
result = _mm_add_pd(val2, one); |
||||
_mm_store_pd(&darr[2], result); |
||||
arr[0] = (float)darr[1]; |
||||
arr[1] = (float)darr[0]; |
||||
arr[2] = (float)darr[3]; |
||||
arr[3] = (float)darr[2]; |
||||
} |
||||
|
@ -0,0 +1,38 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
int sse3_available() { |
||||
return 1; |
||||
} |
||||
#else |
||||
|
||||
#include<pmmintrin.h> |
||||
#include<cpuid.h> |
||||
#include<stdint.h> |
||||
|
||||
#if defined(__APPLE__) |
||||
int sse3_available() { return 1; } |
||||
#else |
||||
int sse3_available() { |
||||
return __builtin_cpu_supports("sse3"); |
||||
} |
||||
#endif |
||||
#endif |
||||
|
||||
void increment_sse3(float arr[4]) { |
||||
double darr[4]; |
||||
__m128d val1 = _mm_set_pd(arr[0], arr[1]); |
||||
__m128d val2 = _mm_set_pd(arr[2], arr[3]); |
||||
__m128d one = _mm_set_pd(1.0, 1.0); |
||||
__m128d result = _mm_add_pd(val1, one); |
||||
_mm_store_pd(darr, result); |
||||
result = _mm_add_pd(val2, one); |
||||
_mm_store_pd(&darr[2], result); |
||||
result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */ |
||||
arr[0] = (float)darr[1]; |
||||
arr[1] = (float)darr[0]; |
||||
arr[2] = (float)darr[3]; |
||||
arr[3] = (float)darr[2]; |
||||
} |
@ -0,0 +1,40 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
|
||||
#include<stdint.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
|
||||
int sse41_available() { |
||||
return 1; |
||||
} |
||||
|
||||
#else |
||||
#include<smmintrin.h> |
||||
#include<cpuid.h> |
||||
|
||||
#if defined(__APPLE__) |
||||
int sse41_available() { return 1; } |
||||
#else |
||||
int sse41_available() { |
||||
return __builtin_cpu_supports("sse4.1"); |
||||
} |
||||
#endif |
||||
#endif |
||||
|
||||
void increment_sse41(float arr[4]) { |
||||
double darr[4]; |
||||
__m128d val1 = _mm_set_pd(arr[0], arr[1]); |
||||
__m128d val2 = _mm_set_pd(arr[2], arr[3]); |
||||
__m128d one = _mm_set_pd(1.0, 1.0); |
||||
__m128d result = _mm_add_pd(val1, one); |
||||
result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ |
||||
_mm_store_pd(darr, result); |
||||
result = _mm_add_pd(val2, one); |
||||
_mm_store_pd(&darr[2], result); |
||||
arr[0] = (float)darr[1]; |
||||
arr[1] = (float)darr[0]; |
||||
arr[2] = (float)darr[3]; |
||||
arr[3] = (float)darr[2]; |
||||
} |
@ -0,0 +1,43 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
#include<stdint.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
|
||||
int sse42_available() { |
||||
return 1; |
||||
} |
||||
|
||||
#else |
||||
|
||||
#include<nmmintrin.h> |
||||
#include<cpuid.h> |
||||
|
||||
#ifdef __APPLE__ |
||||
int sse42_available() { |
||||
return 1; |
||||
} |
||||
#else |
||||
int sse42_available() { |
||||
return __builtin_cpu_supports("sse4.2"); |
||||
} |
||||
#endif |
||||
|
||||
#endif |
||||
|
||||
void increment_sse42(float arr[4]) { |
||||
double darr[4]; |
||||
__m128d val1 = _mm_set_pd(arr[0], arr[1]); |
||||
__m128d val2 = _mm_set_pd(arr[2], arr[3]); |
||||
__m128d one = _mm_set_pd(1.0, 1.0); |
||||
__m128d result = _mm_add_pd(val1, one); |
||||
_mm_store_pd(darr, result); |
||||
result = _mm_add_pd(val2, one); |
||||
_mm_store_pd(&darr[2], result); |
||||
_mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */ |
||||
arr[0] = (float)darr[1]; |
||||
arr[1] = (float)darr[0]; |
||||
arr[2] = (float)darr[3]; |
||||
arr[3] = (float)darr[2]; |
||||
} |
@ -0,0 +1,48 @@ |
||||
#include<simdconfig.h> |
||||
#include<simdfuncs.h> |
||||
|
||||
#include<emmintrin.h> |
||||
#include<tmmintrin.h> |
||||
|
||||
#ifdef _MSC_VER |
||||
#include<intrin.h> |
||||
|
||||
int ssse3_available() { |
||||
return 1; |
||||
} |
||||
|
||||
#else |
||||
|
||||
#include<cpuid.h> |
||||
#include<stdint.h> |
||||
|
||||
int ssse3_available() { |
||||
#ifdef __APPLE__ |
||||
return 1; |
||||
#elif defined(__clang__) |
||||
/* https://github.com/numpy/numpy/issues/8130 */ |
||||
return __builtin_cpu_supports("sse4.1"); |
||||
#else |
||||
return __builtin_cpu_supports("ssse3"); |
||||
#endif |
||||
} |
||||
|
||||
#endif |
||||
|
||||
void increment_ssse3(float arr[4]) { |
||||
double darr[4]; |
||||
__m128d val1 = _mm_set_pd(arr[0], arr[1]); |
||||
__m128d val2 = _mm_set_pd(arr[2], arr[3]); |
||||
__m128d one = _mm_set_pd(1.0, 1.0); |
||||
__m128d result = _mm_add_pd(val1, one); |
||||
__m128i tmp1, tmp2; |
||||
tmp1 = tmp2 = _mm_set1_epi16(0); |
||||
_mm_store_pd(darr, result); |
||||
result = _mm_add_pd(val2, one); |
||||
_mm_store_pd(&darr[2], result); |
||||
tmp1 = _mm_hadd_epi32(tmp1, tmp2); /* This does nothing. Only here so we use an SSSE3 instruction. */ |
||||
arr[0] = (float)darr[1]; |
||||
arr[1] = (float)darr[0]; |
||||
arr[2] = (float)darr[3]; |
||||
arr[3] = (float)darr[2]; |
||||
} |
@ -0,0 +1,93 @@ |
||||
#include<simdfuncs.h> |
||||
#include<stdio.h> |
||||
|
||||
/*
|
||||
* A function that checks at runtime which simd accelerations are |
||||
* available and calls the best one. Falls |
||||
* back to plain C implementation if SIMD is not available. |
||||
*/ |
||||
|
||||
int main(int argc, char **argv) { |
||||
float four[4] = {2.0, 3.0, 4.0, 5.0}; |
||||
const float expected[4] = {3.0, 4.0, 5.0, 6.0}; |
||||
void (*fptr)(float[4]) = NULL; |
||||
const char *type; |
||||
int i; |
||||
|
||||
/* Add here. The first matched one is used so put "better" instruction
|
||||
* sets at the top. |
||||
*/ |
||||
#if HAVE_NEON |
||||
if(fptr == NULL && neon_available()) { |
||||
fptr = increment_neon; |
||||
type = "NEON"; |
||||
} |
||||
#endif |
||||
#if HAVE_AVX2 |
||||
if(fptr == NULL && avx2_available()) { |
||||
fptr = increment_avx2; |
||||
type = "AVX2"; |
||||
} |
||||
#endif |
||||
#if HAVE_AVX |
||||
if(fptr == NULL && avx_available()) { |
||||
fptr = increment_avx; |
||||
type = "AVX"; |
||||
} |
||||
#endif |
||||
#if HAVE_SSE42 |
||||
if(fptr == NULL && sse42_available()) { |
||||
fptr = increment_sse42; |
||||
type = "SSE42"; |
||||
} |
||||
#endif |
||||
#if HAVE_SSE41 |
||||
if(fptr == NULL && sse41_available()) { |
||||
fptr = increment_sse41; |
||||
type = "SSE41"; |
||||
} |
||||
#endif |
||||
#if HAVE_SSSE3 |
||||
if(fptr == NULL && ssse3_available()) { |
||||
fptr = increment_ssse3; |
||||
type = "SSSE3"; |
||||
} |
||||
#endif |
||||
#if HAVE_SSE3 |
||||
if(fptr == NULL && sse3_available()) { |
||||
fptr = increment_sse3; |
||||
type = "SSE3"; |
||||
} |
||||
#endif |
||||
#if HAVE_SSE2 |
||||
if(fptr == NULL && sse2_available()) { |
||||
fptr = increment_sse2; |
||||
type = "SSE2"; |
||||
} |
||||
#endif |
||||
#if HAVE_SSE |
||||
if(fptr == NULL && sse_available()) { |
||||
fptr = increment_sse; |
||||
type = "SSE"; |
||||
} |
||||
#endif |
||||
#if HAVE_MMX |
||||
if(fptr == NULL && mmx_available()) { |
||||
fptr = increment_mmx; |
||||
type = "MMX"; |
||||
} |
||||
#endif |
||||
if(fptr == NULL) { |
||||
fptr = increment_fallback; |
||||
type = "fallback"; |
||||
} |
||||
printf("Using %s.\n", type); |
||||
fptr(four); |
||||
for(i=0; i<4; i++) { |
||||
if(four[i] != expected[i]) { |
||||
printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); |
||||
return 1; |
||||
} |
||||
} |
||||
return 0; |
||||
} |
@ -0,0 +1,67 @@ |
||||
#pragma once |
||||
|
||||
#include<simdconfig.h> |
||||
|
||||
/* Yes, I do know that arr[4] decays into a pointer
|
||||
* as a function argument. Don't do this in real code |
||||
* but for this test it is ok. |
||||
*/ |
||||
|
||||
void increment_fallback(float arr[4]); |
||||
|
||||
#if HAVE_MMX |
||||
int mmx_available(); |
||||
void increment_mmx(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_SSE |
||||
int sse_available(); |
||||
void increment_sse(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_SSE2 |
||||
int sse2_available(); |
||||
void increment_sse2(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_SSE3 |
||||
int sse3_available(); |
||||
void increment_sse3(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_SSSE3 |
||||
int ssse3_available(); |
||||
void increment_ssse3(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_SSE41 |
||||
int sse41_available(); |
||||
void increment_sse41(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_SSE42 |
||||
int sse42_available(); |
||||
void increment_sse42(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_AVX |
||||
int avx_available(); |
||||
void increment_avx(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_AVX2 |
||||
int avx2_available(); |
||||
void increment_avx2(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_NEON |
||||
int neon_available(); |
||||
void increment_neon(float arr[4]); |
||||
#endif |
||||
|
||||
#if HAVE_ALTIVEC |
||||
int altivec_available(); |
||||
void increment_altivec(float arr[4]); |
||||
#endif |
||||
|
||||
/* And so on. */ |
Loading…
Reference in new issue