|
|
|
#include<simdfuncs.h>
|
|
|
|
#include<stdio.h>
|
|
|
|
#include<string.h>
|
|
|
|
|
|
|
|
typedef void (*simd_func)(float*);
|
|
|
|
|
|
|
|
int check_simd_implementation(float *four,
|
|
|
|
const float *four_initial,
|
|
|
|
const char *simd_type,
|
|
|
|
const float *expected,
|
|
|
|
simd_func fptr,
|
|
|
|
const int blocksize) {
|
|
|
|
int rv = 0;
|
|
|
|
memcpy(four, four_initial, blocksize*sizeof(float));
|
|
|
|
printf("Using %s.\n", simd_type);
|
|
|
|
fptr(four);
|
|
|
|
for(int i=0; i<blocksize; i++) {
|
|
|
|
if(four[i] != expected[i]) {
|
|
|
|
printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]);
|
|
|
|
rv = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(void) {
|
|
|
|
static const float four_initial[4] = {2.0, 3.0, 4.0, 5.0};
|
|
|
|
ALIGN_16 float four[4];
|
|
|
|
const float expected[4] = {3.0, 4.0, 5.0, 6.0};
|
|
|
|
int r=0;
|
|
|
|
const int blocksize = 4;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test all implementations that the current CPU supports.
|
|
|
|
*/
|
|
|
|
#if HAVE_NEON
|
|
|
|
if(neon_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"NEON",
|
|
|
|
expected,
|
|
|
|
increment_neon,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_AVX2
|
|
|
|
if(avx2_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"AVX2",
|
|
|
|
expected,
|
|
|
|
increment_avx2,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_AVX
|
|
|
|
if(avx_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"AVC",
|
|
|
|
expected,
|
|
|
|
increment_avx,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_SSE42
|
|
|
|
if(sse42_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"SSR42",
|
|
|
|
expected,
|
|
|
|
increment_sse42,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_SSE41
|
|
|
|
if(sse41_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"SSE41",
|
|
|
|
expected,
|
|
|
|
increment_sse41,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_SSSE3
|
|
|
|
if(ssse3_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"SSSE3",
|
|
|
|
expected,
|
|
|
|
increment_ssse3,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_SSE3
|
|
|
|
if(sse3_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"SSE3",
|
|
|
|
expected,
|
|
|
|
increment_sse3,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_SSE2
|
|
|
|
if(sse2_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"SSE2",
|
|
|
|
expected,
|
|
|
|
increment_sse2,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_SSE
|
|
|
|
if(sse_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"SSE",
|
|
|
|
expected,
|
|
|
|
increment_sse,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if HAVE_MMX
|
|
|
|
if(mmx_available()) {
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"MMX",
|
|
|
|
expected,
|
|
|
|
increment_mmx,
|
|
|
|
blocksize);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
r += check_simd_implementation(four,
|
|
|
|
four_initial,
|
|
|
|
"fallback",
|
|
|
|
expected,
|
|
|
|
increment_fallback,
|
|
|
|
blocksize);
|
|
|
|
return r;
|
|
|
|
}
|