/* * Copyright (C) 2010 The Android Open Source Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* ChangeLog for this library: * * NDK r8d: Add android_setCpu(). * * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. * * Rewrite the code to parse /proc/self/auxv instead of * the "Features" field in /proc/cpuinfo. * * Dynamically allocate the buffer that hold the content * of /proc/cpuinfo to deal with newer hardware. * * NDK r7c: Fix CPU count computation. The old method only reported the * number of _active_ CPUs when the library was initialized, * which could be less than the real total. * * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 * for an ARMv6 CPU (see below). * * Handle kernels that only report 'neon', and not 'vfpv3' * (VFPv3 is mandated by the ARM architecture is Neon is implemented) * * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' * * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in * android_getCpuFamily(). * * NDK r4: Initial release */ #include #ifdef __arm__ #include #endif #include #include "cpu-features.h" #include #include #include #include static pthread_once_t g_once; static int g_inited; static AndroidCpuFamily g_cpuFamily; static uint64_t g_cpuFeatures; static int g_cpuCount; static const int android_cpufeatures_debug = 0; #ifdef __arm__ # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM #elif defined __i386__ # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86 #else # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN #endif #define D(...) \ do { \ if (android_cpufeatures_debug) { \ printf(__VA_ARGS__); fflush(stdout); \ } \ } while (0) #ifdef __i386__ static __inline__ void x86_cpuid(int func, int values[4]) { int a, b, c, d; /* We need to preserve ebx since we're compiling PIC code */ /* this means we can't use "=b" for the second output register */ __asm__ __volatile__ ( \ "push %%ebx\n" "cpuid\n" \ "mov %%ebx, %1\n" "pop %%ebx\n" : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ : "a" (func) \ ); values[0] = a; values[1] = b; values[2] = c; values[3] = d; } #endif /* Get the size of a file by reading it until the end. This is needed * because files under /proc do not always return a valid size when * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. */ static int get_file_size(const char* pathname) { int fd, ret, result = 0; char buffer[256]; fd = open(pathname, O_RDONLY); if (fd < 0) { D("Can't open %s: %s\n", pathname, strerror(errno)); return -1; } for (;;) { int ret = read(fd, buffer, sizeof buffer); if (ret < 0) { if (errno == EINTR) continue; D("Error while reading %s: %s\n", pathname, strerror(errno)); break; } if (ret == 0) break; result += ret; } close(fd); return result; } /* Read the content of /proc/cpuinfo into a user-provided buffer. * Return the length of the data, or -1 on error. Does *not* * zero-terminate the content. Will not read more * than 'buffsize' bytes. */ static int read_file(const char* pathname, char* buffer, size_t buffsize) { int fd, count; fd = open(pathname, O_RDONLY); if (fd < 0) { D("Could not open %s: %s\n", pathname, strerror(errno)); return -1; } count = 0; while (count < (int)buffsize) { int ret = read(fd, buffer + count, buffsize - count); if (ret < 0) { if (errno == EINTR) continue; D("Error while reading from %s: %s\n", pathname, strerror(errno)); if (count == 0) count = -1; break; } if (ret == 0) break; count += ret; } close(fd); return count; } /* Extract the content of a the first occurence of a given field in * the content of /proc/cpuinfo and return it as a heap-allocated * string that must be freed by the caller. * * Return NULL if not found */ static char* extract_cpuinfo_field(const char* buffer, int buflen, const char* field) { int fieldlen = strlen(field); const char* bufend = buffer + buflen; char* result = NULL; int len, ignore; const char *p, *q; /* Look for first field occurence, and ensures it starts the line. */ p = buffer; bufend = buffer + buflen; for (;;) { p = memmem(p, bufend-p, field, fieldlen); if (p == NULL) goto EXIT; if (p == buffer || p[-1] == '\n') break; p += fieldlen; } /* Skip to the first column followed by a space */ p += fieldlen; p = memchr(p, ':', bufend-p); if (p == NULL || p[1] != ' ') goto EXIT; /* Find the end of the line */ p += 2; q = memchr(p, '\n', bufend-p); if (q == NULL) q = bufend; /* Copy the line into a heap-allocated buffer */ len = q-p; result = malloc(len+1); if (result == NULL) goto EXIT; memcpy(result, p, len); result[len] = '\0'; EXIT: return result; } /* Like strlen(), but for constant string literals */ #define STRLEN_CONST(x) ((sizeof(x)-1) /* Checks that a space-separated list of items contains one given 'item'. * Returns 1 if found, 0 otherwise. */ static int has_list_item(const char* list, const char* item) { const char* p = list; int itemlen = strlen(item); if (list == NULL) return 0; while (*p) { const char* q; /* skip spaces */ while (*p == ' ' || *p == '\t') p++; /* find end of current list item */ q = p; while (*q && *q != ' ' && *q != '\t') q++; if (itemlen == q-p && !memcmp(p, item, itemlen)) return 1; /* skip to next item */ p = q; } return 0; } /* Parse an decimal integer starting from 'input', but not going further * than 'limit'. Return the value into '*result'. * * NOTE: Does not skip over leading spaces, or deal with sign characters. * NOTE: Ignores overflows. * * The function returns NULL in case of error (bad format), or the new * position after the decimal number in case of success (which will always * be <= 'limit'). */ static const char* parse_decimal(const char* input, const char* limit, int* result) { const char* p = input; int val = 0; while (p < limit) { int d = (*p - '0'); if ((unsigned)d >= 10U) break; val = val*10 + d; p++; } if (p == input) return NULL; *result = val; return p; } /* This small data type is used to represent a CPU list / mask, as read * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt * * For now, we don't expect more than 32 cores on mobile devices, so keep * everything simple. */ typedef struct { uint32_t mask; } CpuList; static __inline__ void cpulist_init(CpuList* list) { list->mask = 0; } static __inline__ void cpulist_and(CpuList* list1, CpuList* list2) { list1->mask &= list2->mask; } static __inline__ void cpulist_set(CpuList* list, int index) { if ((unsigned)index < 32) { list->mask |= (uint32_t)(1U << index); } } static __inline__ int cpulist_count(CpuList* list) { return __builtin_popcount(list->mask); } /* Parse a textual list of cpus and store the result inside a CpuList object. * Input format is the following: * - comma-separated list of items (no spaces) * - each item is either a single decimal number (cpu index), or a range made * of two numbers separated by a single dash (-). Ranges are inclusive. * * Examples: 0 * 2,4-127,128-143 * 0-1 */ static void cpulist_parse(CpuList* list, const char* line, int line_len) { const char* p = line; const char* end = p + line_len; const char* q; /* NOTE: the input line coming from sysfs typically contains a * trailing newline, so take care of it in the code below */ while (p < end && *p != '\n') { int val, start_value, end_value; /* Find the end of current item, and put it into 'q' */ q = memchr(p, ',', end-p); if (q == NULL) { q = end; } /* Get first value */ p = parse_decimal(p, q, &start_value); if (p == NULL) goto BAD_FORMAT; end_value = start_value; /* If we're not at the end of the item, expect a dash and * and integer; extract end value. */ if (p < q && *p == '-') { p = parse_decimal(p+1, q, &end_value); if (p == NULL) goto BAD_FORMAT; } /* Set bits CPU list bits */ for (val = start_value; val <= end_value; val++) { cpulist_set(list, val); } /* Jump to next item */ p = q; if (p < end) p++; } BAD_FORMAT: ; } /* Read a CPU list from one sysfs file */ static void cpulist_read_from(CpuList* list, const char* filename) { char file[64]; int filelen; cpulist_init(list); filelen = read_file(filename, file, sizeof file); if (filelen < 0) { D("Could not read %s: %s\n", filename, strerror(errno)); return; } cpulist_parse(list, file, filelen); } // See kernel header. #define HWCAP_VFP (1 << 6) #define HWCAP_IWMMXT (1 << 9) #define HWCAP_NEON (1 << 12) #define HWCAP_VFPv3 (1 << 13) #define HWCAP_VFPv3D16 (1 << 14) #define HWCAP_VFPv4 (1 << 16) #define HWCAP_IDIVA (1 << 17) #define HWCAP_IDIVT (1 << 18) #define AT_HWCAP 16 #if defined(__arm__) /* Compute the ELF HWCAP flags. */ static uint32_t get_elf_hwcap(const char* cpuinfo, int cpuinfo_len) { /* IMPORTANT: * Accessing /proc/self/auxv doesn't work anymore on all * platform versions. More specifically, when running inside * a regular application process, most of /proc/self/ will be * non-readable, including /proc/self/auxv. This doesn't * happen however if the application is debuggable, or when * running under the "shell" UID, which is why this was not * detected appropriately. */ #if 0 uint32_t result = 0; const char filepath[] = "/proc/self/auxv"; int fd = open(filepath, O_RDONLY); if (fd < 0) { D("Could not open %s: %s\n", filepath, strerror(errno)); return 0; } struct { uint32_t tag; uint32_t value; } entry; for (;;) { int ret = read(fd, (char*)&entry, sizeof entry); if (ret < 0) { if (errno == EINTR) continue; D("Error while reading %s: %s\n", filepath, strerror(errno)); break; } // Detect end of list. if (ret == 0 || (entry.tag == 0 && entry.value == 0)) break; if (entry.tag == AT_HWCAP) { result = entry.value; break; } } close(fd); return result; #else // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag. uint32_t hwcaps = 0; char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); if (cpuFeatures != NULL) { D("Found cpuFeatures = '%s'\n", cpuFeatures); if (has_list_item(cpuFeatures, "vfp")) hwcaps |= HWCAP_VFP; if (has_list_item(cpuFeatures, "vfpv3")) hwcaps |= HWCAP_VFPv3; if (has_list_item(cpuFeatures, "vfpv3d16")) hwcaps |= HWCAP_VFPv3D16; if (has_list_item(cpuFeatures, "vfpv4")) hwcaps |= HWCAP_VFPv4; if (has_list_item(cpuFeatures, "neon")) hwcaps |= HWCAP_NEON; if (has_list_item(cpuFeatures, "idiva")) hwcaps |= HWCAP_IDIVA; if (has_list_item(cpuFeatures, "idivt")) hwcaps |= HWCAP_IDIVT; if (has_list_item(cpuFeatures, "idiv")) hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; if (has_list_item(cpuFeatures, "iwmmxt")) hwcaps |= HWCAP_IWMMXT; free(cpuFeatures); } return hwcaps; #endif } #endif /* __arm__ */ /* Return the number of cpus present on a given device. * * To handle all weird kernel configurations, we need to compute the * intersection of the 'present' and 'possible' CPU lists and count * the result. */ static int get_cpu_count(void) { CpuList cpus_present[1]; CpuList cpus_possible[1]; cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); /* Compute the intersection of both sets to get the actual number of * CPU cores that can be used on this device by the kernel. */ cpulist_and(cpus_present, cpus_possible); return cpulist_count(cpus_present); } static void android_cpuInitFamily(void) { #if defined(__ARM_ARCH__) g_cpuFamily = ANDROID_CPU_FAMILY_ARM; #elif defined(__i386__) g_cpuFamily = ANDROID_CPU_FAMILY_X86; #elif defined(_MIPS_ARCH) g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; #else g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; #endif } static void android_cpuInit(void) { char* cpuinfo = NULL; int cpuinfo_len; android_cpuInitFamily(); g_cpuFeatures = 0; g_cpuCount = 1; g_inited = 1; cpuinfo_len = get_file_size("/proc/cpuinfo"); if (cpuinfo_len < 0) { D("cpuinfo_len cannot be computed!"); return; } cpuinfo = malloc(cpuinfo_len); if (cpuinfo == NULL) { D("cpuinfo buffer could not be allocated"); return; } cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); if (cpuinfo_len < 0) /* should not happen */ { free(cpuinfo); return; } /* Count the CPU cores, the value may be 0 for single-core CPUs */ g_cpuCount = get_cpu_count(); if (g_cpuCount == 0) { g_cpuCount = 1; } D("found cpuCount = %d\n", g_cpuCount); #ifdef __ARM_ARCH__ { char* features = NULL; char* architecture = NULL; /* Extract architecture from the "CPU Architecture" field. * The list is well-known, unlike the the output of * the 'Processor' field which can vary greatly. * * See the definition of the 'proc_arch' array in * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in * same file. */ char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); if (cpuArch != NULL) { char* end; long archNumber; int hasARMv7 = 0; D("found cpuArch = '%s'\n", cpuArch); /* read the initial decimal number, ignore the rest */ archNumber = strtol(cpuArch, &end, 10); /* Here we assume that ARMv8 will be upwards compatible with v7 * in the future. Unfortunately, there is no 'Features' field to * indicate that Thumb-2 is supported. */ if (end > cpuArch && archNumber >= 7) { hasARMv7 = 1; } /* Unfortunately, it seems that certain ARMv6-based CPUs * report an incorrect architecture number of 7! * * See http://code.google.com/p/android/issues/detail?id=10812 * * We try to correct this by looking at the 'elf_format' * field reported by the 'Processor' field, which is of the * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for * an ARMv6-one. */ if (hasARMv7) { char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Processor"); if (cpuProc != NULL) { D("found cpuProc = '%s'\n", cpuProc); if (has_list_item(cpuProc, "(v6l)")) { D("CPU processor and architecture mismatch!!\n"); hasARMv7 = 0; } free(cpuProc); } } if (hasARMv7) { g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; } /* The LDREX / STREX instructions are available from ARMv6 */ if (archNumber >= 6) { g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; } free(cpuArch); } /* Extract the list of CPU features from ELF hwcaps */ uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len); if (hwcaps != 0) { int has_vfp = (hwcaps & HWCAP_VFP); int has_vfpv3 = (hwcaps & HWCAP_VFPv3); int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); int has_vfpv4 = (hwcaps & HWCAP_VFPv4); int has_neon = (hwcaps & HWCAP_NEON); int has_idiva = (hwcaps & HWCAP_IDIVA); int has_idivt = (hwcaps & HWCAP_IDIVT); int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); // The kernel does a poor job at ensuring consistency when // describing CPU features. So lots of guessing is needed. // 'vfpv4' implies VFPv3|VFP_FMA|FP16 if (has_vfpv4) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | ANDROID_CPU_ARM_FEATURE_VFP_FP16 | ANDROID_CPU_ARM_FEATURE_VFP_FMA; // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, // a value of 'vfpv3' doesn't necessarily mean that the D32 // feature is present, so be conservative. All CPUs in the // field that support D32 also support NEON, so this should // not be a problem in practice. if (has_vfpv3 || has_vfpv3d16) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; // 'vfp' is super ambiguous. Depending on the kernel, it can // either mean VFPv2 or VFPv3. Make it depend on ARMv7. if (has_vfp) { if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; else g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; } // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA if (has_neon) { g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | ANDROID_CPU_ARM_FEATURE_NEON | ANDROID_CPU_ARM_FEATURE_VFP_D32; if (has_vfpv4) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; } // VFPv3 implies VFPv2 and ARMv7 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | ANDROID_CPU_ARM_FEATURE_ARMv7; // Note that some buggy kernels do not report these even when // the CPU actually support the division instructions. However, // assume that if 'vfpv4' is detected, then the CPU supports // sdiv/udiv properly. if (has_idiva || has_vfpv4) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; if (has_idivt || has_vfpv4) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; if (has_iwmmxt) g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; } } #endif /* __ARM_ARCH__ */ #ifdef __i386__ int regs[4]; /* According to http://en.wikipedia.org/wiki/CPUID */ #define VENDOR_INTEL_b 0x756e6547 #define VENDOR_INTEL_c 0x6c65746e #define VENDOR_INTEL_d 0x49656e69 x86_cpuid(0, regs); int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && regs[2] == VENDOR_INTEL_c && regs[3] == VENDOR_INTEL_d); x86_cpuid(1, regs); if ((regs[2] & (1 << 9)) != 0) { g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; } if ((regs[2] & (1 << 23)) != 0) { g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; } if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; } #endif free(cpuinfo); } AndroidCpuFamily android_getCpuFamily(void) { pthread_once(&g_once, android_cpuInit); return g_cpuFamily; } uint64_t android_getCpuFeatures(void) { pthread_once(&g_once, android_cpuInit); return g_cpuFeatures; } int android_getCpuCount(void) { pthread_once(&g_once, android_cpuInit); return g_cpuCount; } static void android_cpuInitDummy(void) { g_inited = 1; } int android_setCpu(int cpu_count, uint64_t cpu_features) { /* Fail if the library was already initialized. */ if (g_inited) return 0; android_cpuInitFamily(); g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); g_cpuFeatures = cpu_features; pthread_once(&g_once, android_cpuInitDummy); return 1; } /* * Technical note: Making sense of ARM's FPU architecture versions. * * FPA was ARM's first attempt at an FPU architecture. There is no Android * device that actually uses it since this technology was already obsolete * when the project started. If you see references to FPA instructions * somewhere, you can be sure that this doesn't apply to Android at all. * * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of * new versions / additions to it. ARM considers this obsolete right now, * and no known Android device implements it either. * * VFPv2 added a few instructions to VFPv1, and is an *optional* extension * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device * supporting the 'armeabi' ABI doesn't necessarily support these. * * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means * that it provides 16 double-precision FPU registers (d0-d15) and 32 * single-precision ones (s0-s31) which happen to be mapped to the same * register banks. * * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 * additional double precision registers (d16-d31). Note that there are * still only 32 single precision registers. * * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which * are not supported by Android. Note that it is not compatible with VFPv2. * * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 * depending on context. For example GCC uses it for VFPv3-D32, but * the Linux kernel code uses it for VFPv3-D16 (especially in * /proc/cpuinfo). Always try to use the full designation when * possible. * * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides * instructions to perform parallel computations on vectors of 8, 16, * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all * NEON registers are also mapped to the same register banks. * * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to * perform fused multiply-accumulate on VFP registers, as well as * half-precision (16-bit) conversion operations. * * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision * registers. * * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused * multiply-accumulate instructions that work on the NEON registers. * * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 * depending on context. * * The following information was determined by scanning the binutils-2.22 * sources: * * Basic VFP instruction subsets: * * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add * * FPU types (excluding NEON) * * FPU_VFP_V1xD (EXT_V1xD) * | * +--------------------------+ * | | * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) * | | * | | * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) * | * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) * | * +--------------------------+ * | | * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) * | | * | FPU_VFP_V4 (+EXT_D32) * | * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) * * VFP architectures: * * ARCH_VFP_V1xD (EXT_V1xD) * | * +------------------+ * | | * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) * | | * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) * | | * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) * | * ARCH_VFP_V1 (+EXT_V1) * | * ARCH_VFP_V2 (+EXT_V2) * | * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) * | * +-------------------+ * | | * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) * | * +-------------------+ * | | * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) * | | * | ARCH_VFP_V4 (+EXT_D32) * | | * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) * | * ARCH_VFP_V3 (+EXT_D32) * | * +-------------------+ * | | * | ARCH_VFP_V3_FP16 (+EXT_FP16) * | * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) * | * ARCH_NEON_FP16 (+EXT_FP16) * * -fpu= values and their correspondance with FPU architectures above: * * {"vfp", FPU_ARCH_VFP_V2}, * {"vfp9", FPU_ARCH_VFP_V2}, * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. * {"vfp10", FPU_ARCH_VFP_V2}, * {"vfp10-r0", FPU_ARCH_VFP_V1}, * {"vfpxd", FPU_ARCH_VFP_V1xD}, * {"vfpv2", FPU_ARCH_VFP_V2}, * {"vfpv3", FPU_ARCH_VFP_V3}, * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, * {"neon-fp16", FPU_ARCH_NEON_FP16}, * {"vfpv4", FPU_ARCH_VFP_V4}, * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, * * * Simplified diagram that only includes FPUs supported by Android: * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, * all others are optional and must be probed at runtime. * * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) * | * +-------------------+ * | | * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) * | * +-------------------+ * | | * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) * | | * | ARCH_VFP_V4 (+EXT_D32) * | | * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) * | * ARCH_VFP_V3 (+EXT_D32) * | * +-------------------+ * | | * | ARCH_VFP_V3_FP16 (+EXT_FP16) * | * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) * | * ARCH_NEON_FP16 (+EXT_FP16) * */