commit
142f8a1da6
7 changed files with 117 additions and 59 deletions
@ -1,93 +1,143 @@ |
|||||||
#include<simdfuncs.h> |
#include<simdfuncs.h> |
||||||
#include<stdio.h> |
#include<stdio.h> |
||||||
|
#include<string.h> |
||||||
|
|
||||||
/*
|
typedef void (*simd_func)(float*); |
||||||
* A function that checks at runtime which simd accelerations are |
|
||||||
* available and calls the best one. Falls |
int check_simd_implementation(float *four, |
||||||
* back to plain C implementation if SIMD is not available. |
const float *four_initial, |
||||||
*/ |
const char *simd_type, |
||||||
|
const float *expected, |
||||||
|
simd_func fptr, |
||||||
|
const int blocksize) { |
||||||
|
int rv = 0; |
||||||
|
memcpy(four, four_initial, blocksize*sizeof(float)); |
||||||
|
printf("Using %s.\n", simd_type); |
||||||
|
fptr(four); |
||||||
|
for(int i=0; i<blocksize; i++) { |
||||||
|
if(four[i] != expected[i]) { |
||||||
|
printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); |
||||||
|
rv = 1; |
||||||
|
} |
||||||
|
} |
||||||
|
return rv; |
||||||
|
} |
||||||
|
|
||||||
int main(int argc, char **argv) { |
int main(int argc, char **argv) { |
||||||
float four[4] = {2.0, 3.0, 4.0, 5.0}; |
static const float four_initial[4] = {2.0, 3.0, 4.0, 5.0}; |
||||||
|
ALIGN_16 float four[4]; |
||||||
const float expected[4] = {3.0, 4.0, 5.0, 6.0}; |
const float expected[4] = {3.0, 4.0, 5.0, 6.0}; |
||||||
void (*fptr)(float[4]) = NULL; |
int r=0; |
||||||
const char *type; |
const int blocksize = 4; |
||||||
int i; |
|
||||||
|
|
||||||
/* Add here. The first matched one is used so put "better" instruction
|
/*
|
||||||
* sets at the top. |
* Test all implementations that the current CPU supports. |
||||||
*/ |
*/ |
||||||
#if HAVE_NEON |
#if HAVE_NEON |
||||||
if(fptr == NULL && neon_available()) { |
if(neon_available()) { |
||||||
fptr = increment_neon; |
r += check_simd_implementation(four, |
||||||
type = "NEON"; |
four_initial, |
||||||
|
"NEON", |
||||||
|
expected, |
||||||
|
increment_neon, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_AVX2 |
#if HAVE_AVX2 |
||||||
if(fptr == NULL && avx2_available()) { |
if(avx2_available()) { |
||||||
fptr = increment_avx2; |
r += check_simd_implementation(four, |
||||||
type = "AVX2"; |
four_initial, |
||||||
|
"AVX2", |
||||||
|
expected, |
||||||
|
increment_avx2, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_AVX |
#if HAVE_AVX |
||||||
if(fptr == NULL && avx_available()) { |
if(avx_available()) { |
||||||
fptr = increment_avx; |
r += check_simd_implementation(four, |
||||||
type = "AVX"; |
four_initial, |
||||||
|
"AVC", |
||||||
|
expected, |
||||||
|
increment_avx, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_SSE42 |
#if HAVE_SSE42 |
||||||
if(fptr == NULL && sse42_available()) { |
if(sse42_available()) { |
||||||
fptr = increment_sse42; |
r += check_simd_implementation(four, |
||||||
type = "SSE42"; |
four_initial, |
||||||
|
"SSR42", |
||||||
|
expected, |
||||||
|
increment_sse42, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_SSE41 |
#if HAVE_SSE41 |
||||||
if(fptr == NULL && sse41_available()) { |
if(sse41_available()) { |
||||||
fptr = increment_sse41; |
r += check_simd_implementation(four, |
||||||
type = "SSE41"; |
four_initial, |
||||||
|
"SSE41", |
||||||
|
expected, |
||||||
|
increment_sse41, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_SSSE3 |
#if HAVE_SSSE3 |
||||||
if(fptr == NULL && ssse3_available()) { |
if(ssse3_available()) { |
||||||
fptr = increment_ssse3; |
r += check_simd_implementation(four, |
||||||
type = "SSSE3"; |
four_initial, |
||||||
|
"SSSE3", |
||||||
|
expected, |
||||||
|
increment_ssse3, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_SSE3 |
#if HAVE_SSE3 |
||||||
if(fptr == NULL && sse3_available()) { |
if(sse3_available()) { |
||||||
fptr = increment_sse3; |
r += check_simd_implementation(four, |
||||||
type = "SSE3"; |
four_initial, |
||||||
|
"SSE3", |
||||||
|
expected, |
||||||
|
increment_sse3, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_SSE2 |
#if HAVE_SSE2 |
||||||
if(fptr == NULL && sse2_available()) { |
if(sse2_available()) { |
||||||
fptr = increment_sse2; |
r += check_simd_implementation(four, |
||||||
type = "SSE2"; |
four_initial, |
||||||
|
"SSE2", |
||||||
|
expected, |
||||||
|
increment_sse2, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_SSE |
#if HAVE_SSE |
||||||
if(fptr == NULL && sse_available()) { |
if(sse_available()) { |
||||||
fptr = increment_sse; |
r += check_simd_implementation(four, |
||||||
type = "SSE"; |
four_initial, |
||||||
|
"SSE", |
||||||
|
expected, |
||||||
|
increment_sse, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
#if HAVE_MMX |
#if HAVE_MMX |
||||||
if(fptr == NULL && mmx_available()) { |
if(mmx_available()) { |
||||||
fptr = increment_mmx; |
r += check_simd_implementation(four, |
||||||
type = "MMX"; |
four_initial, |
||||||
|
"MMX", |
||||||
|
expected, |
||||||
|
increment_mmx, |
||||||
|
blocksize); |
||||||
} |
} |
||||||
#endif |
#endif |
||||||
if(fptr == NULL) { |
r += check_simd_implementation(four, |
||||||
fptr = increment_fallback; |
four_initial, |
||||||
type = "fallback"; |
"fallback", |
||||||
} |
expected, |
||||||
printf("Using %s.\n", type); |
increment_fallback, |
||||||
fptr(four); |
blocksize); |
||||||
for(i=0; i<4; i++) { |
return r; |
||||||
if(four[i] != expected[i]) { |
|
||||||
printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); |
|
||||||
return 1; |
|
||||||
} |
|
||||||
} |
|
||||||
return 0; |
|
||||||
} |
} |
||||||
|
Loading…
Reference in new issue