|
|
|
/*
|
|
|
|
* Copyright © 2022 Rémi Denis-Courmont.
|
|
|
|
*
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
*
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include "libavutil/cpu.h"
|
|
|
|
#include "libavutil/cpu_internal.h"
|
|
|
|
#include "libavutil/macros.h"
|
|
|
|
#include "libavutil/log.h"
|
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
#if HAVE_GETAUXVAL
|
|
|
|
#include <sys/auxv.h>
|
|
|
|
#define HWCAP_RV(letter) (1ul << ((letter) - 'A'))
|
|
|
|
#endif
|
|
|
|
#if HAVE_SYS_HWPROBE_H
|
|
|
|
#include <sys/hwprobe.h>
|
|
|
|
#elif HAVE_ASM_HWPROBE_H
|
|
|
|
#include <asm/hwprobe.h>
|
|
|
|
#include <sys/syscall.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
static int __riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
|
|
|
|
size_t cpu_count, unsigned long *cpus,
|
|
|
|
unsigned int flags)
|
|
|
|
{
|
|
|
|
return syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_count, cpus,
|
|
|
|
flags);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int ff_get_cpu_flags_riscv(void)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
#if HAVE_SYS_HWPROBE_H || HAVE_ASM_HWPROBE_H
|
|
|
|
struct riscv_hwprobe pairs[] = {
|
|
|
|
{ RISCV_HWPROBE_KEY_BASE_BEHAVIOR, 0 },
|
|
|
|
{ RISCV_HWPROBE_KEY_IMA_EXT_0, 0 },
|
|
|
|
{ RISCV_HWPROBE_KEY_CPUPERF_0, 0 },
|
|
|
|
};
|
|
|
|
|
|
|
|
if (__riscv_hwprobe(pairs, FF_ARRAY_ELEMS(pairs), 0, NULL, 0) == 0) {
|
|
|
|
if (pairs[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
|
|
|
|
ret |= AV_CPU_FLAG_RVI;
|
|
|
|
#ifdef RISCV_HWPROBE_IMA_V
|
|
|
|
if (pairs[1].value & RISCV_HWPROBE_IMA_V)
|
|
|
|
ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64
|
|
|
|
| AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64;
|
|
|
|
#endif
|
|
|
|
#ifdef RISCV_HWPROBE_EXT_ZBB
|
|
|
|
if (pairs[1].value & RISCV_HWPROBE_EXT_ZBB)
|
|
|
|
ret |= AV_CPU_FLAG_RVB_BASIC;
|
|
|
|
#if defined (RISCV_HWPROBE_EXT_ZBA) && defined (RISCV_HWPROBE_EXT_ZBS)
|
|
|
|
if ((pairs[1].value & RISCV_HWPROBE_EXT_ZBA) &&
|
|
|
|
(pairs[1].value & RISCV_HWPROBE_EXT_ZBB) &&
|
|
|
|
(pairs[1].value & RISCV_HWPROBE_EXT_ZBS))
|
|
|
|
ret |= AV_CPU_FLAG_RVB;
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#ifdef RISCV_HWPROBE_EXT_ZVBB
|
|
|
|
if (pairs[1].value & RISCV_HWPROBE_EXT_ZVBB)
|
|
|
|
ret |= AV_CPU_FLAG_RV_ZVBB;
|
|
|
|
#endif
|
|
|
|
switch (pairs[2].value & RISCV_HWPROBE_MISALIGNED_MASK) {
|
|
|
|
case RISCV_HWPROBE_MISALIGNED_FAST:
|
|
|
|
ret |= AV_CPU_FLAG_RV_MISALIGNED;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
}
|
lavu/riscv: do not fallback to AT_HWCAP auxillary vector
If __riscv_hwprobe() fails, then the kernel version is presumably too
old. There is not much point falling back to the auxillary vector.
- The Linux kernel requires I, so the flag is always set on Linux, and
run-time detection is unnecessary. Our RISC-V assembler does anyway not
support targets without I.
- Linux can compile with or without F and D, but it cannot perform
run-time detection for them (a kernel with F support will not boot a
processor without F). The run-time detection is thus useless in that
case. Besides F and D extensions are used throughout the C code, so
their run-time detection would not be practical.
- Support for V was added in a later kernel version than riscv_hwprobe(),
so the system call will always be available if the kernel supports V.
The only exception would be vendor kernel forks, but those are known to
haphasardly pretend to support V on systems without actual V support, or
with only pre-ratification binary-incompatible version. Furthermore, a
large chunk of our optimisations require Zba and/or Zbb which cannot be
detected with HWCAP in those kernels.
For what it is worth, OpenJDK already took a similar action. Note that this
keeps AT_HWCAP usage for platforms with neither C run-time <sys/hwprobe.h>
nor kernel <asm/hwprobe.h>, notably kernels other than Linux.
8 months ago
|
|
|
}
|
|
|
|
#elif HAVE_GETAUXVAL
|
|
|
|
{
|
|
|
|
const unsigned long hwcap = ff_getauxval(AT_HWCAP);
|
|
|
|
|
|
|
|
if (hwcap & HWCAP_RV('I'))
|
|
|
|
ret |= AV_CPU_FLAG_RVI;
|
|
|
|
if (hwcap & HWCAP_RV('B'))
|
|
|
|
ret |= AV_CPU_FLAG_RVB_BASIC | AV_CPU_FLAG_RVB;
|
lavu/cpu: CPU flags for the RISC-V Vector extension
RVV defines a total of 12 different extensions, including:
- 5 different instruction subsets:
- Zve32x: 8-, 16- and 32-bit integers,
- Zve32f: Zve32x plus single precision floats,
- Zve64x: Zve32x plus 64-bit integers,
- Zve64f: Zve32f plus Zve64x,
- Zve64d: Zve64f plus double precision floats.
- 6 different vector lengths:
- Zvl32b (embedded only),
- Zvl64b (embedded only),
- Zvl128b,
- Zvl256b,
- Zvl512b,
- Zvl1024b,
- and the V extension proper: equivalent to Zve64f and Zvl128b.
In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (RVV_I32), floats (RVV_F32),
64-bit ints (RVV_I64) and doubles (RVV_F64).
Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
2 years ago
|
|
|
|
|
|
|
/* The V extension implies all Zve* functional subsets */
|
|
|
|
if (hwcap & HWCAP_RV('V'))
|
|
|
|
ret |= AV_CPU_FLAG_RVV_I32 | AV_CPU_FLAG_RVV_I64
|
|
|
|
| AV_CPU_FLAG_RVV_F32 | AV_CPU_FLAG_RVV_F64;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __riscv_i
|
|
|
|
ret |= AV_CPU_FLAG_RVI;
|
|
|
|
#endif
|
lavu/cpu: CPU flags for the RISC-V Vector extension
RVV defines a total of 12 different extensions, including:
- 5 different instruction subsets:
- Zve32x: 8-, 16- and 32-bit integers,
- Zve32f: Zve32x plus single precision floats,
- Zve64x: Zve32x plus 64-bit integers,
- Zve64f: Zve32f plus Zve64x,
- Zve64d: Zve64f plus double precision floats.
- 6 different vector lengths:
- Zvl32b (embedded only),
- Zvl64b (embedded only),
- Zvl128b,
- Zvl256b,
- Zvl512b,
- Zvl1024b,
- and the V extension proper: equivalent to Zve64f and Zvl128b.
In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (RVV_I32), floats (RVV_F32),
64-bit ints (RVV_I64) and doubles (RVV_F64).
Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
2 years ago
|
|
|
|
lavu/riscv: CPU flag for the Zbb extension
Unfortunately, it is common, and will remain so, that the Bit
manipulations are not enabled at compilation time. This is an official
policy for Debian ports in general (though they do not support RISC-V
officially as of yet) to stick to the minimal target baseline, which
does not include the B extension or even its Zbb subset.
For inline helpers (CPOP, REV8), compiler builtins (CTZ, CLZ) or
even plain C code (MIN, MAX, MINU, MAXU), run-time detection seems
impractical. But at least it can work for the byte-swap DSP functions.
2 years ago
|
|
|
#ifdef __riscv_zbb
|
|
|
|
ret |= AV_CPU_FLAG_RVB_BASIC;
|
|
|
|
#endif
|
|
|
|
#if defined (__riscv_b) || \
|
|
|
|
(defined (__riscv_zba) && defined (__riscv_zbb) && defined (__riscv_zbs))
|
|
|
|
ret |= AV_CPU_FLAG_RVB;
|
|
|
|
#endif
|
lavu/riscv: CPU flag for the Zbb extension
Unfortunately, it is common, and will remain so, that the Bit
manipulations are not enabled at compilation time. This is an official
policy for Debian ports in general (though they do not support RISC-V
officially as of yet) to stick to the minimal target baseline, which
does not include the B extension or even its Zbb subset.
For inline helpers (CPOP, REV8), compiler builtins (CTZ, CLZ) or
even plain C code (MIN, MAX, MINU, MAXU), run-time detection seems
impractical. But at least it can work for the byte-swap DSP functions.
2 years ago
|
|
|
|
lavu/cpu: CPU flags for the RISC-V Vector extension
RVV defines a total of 12 different extensions, including:
- 5 different instruction subsets:
- Zve32x: 8-, 16- and 32-bit integers,
- Zve32f: Zve32x plus single precision floats,
- Zve64x: Zve32x plus 64-bit integers,
- Zve64f: Zve32f plus Zve64x,
- Zve64d: Zve64f plus double precision floats.
- 6 different vector lengths:
- Zvl32b (embedded only),
- Zvl64b (embedded only),
- Zvl128b,
- Zvl256b,
- Zvl512b,
- Zvl1024b,
- and the V extension proper: equivalent to Zve64f and Zvl128b.
In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (RVV_I32), floats (RVV_F32),
64-bit ints (RVV_I64) and doubles (RVV_F64).
Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
2 years ago
|
|
|
/* If RV-V is enabled statically at compile-time, check the details. */
|
|
|
|
#ifdef __riscv_vector
|
lavu/cpu: CPU flags for the RISC-V Vector extension
RVV defines a total of 12 different extensions, including:
- 5 different instruction subsets:
- Zve32x: 8-, 16- and 32-bit integers,
- Zve32f: Zve32x plus single precision floats,
- Zve64x: Zve32x plus 64-bit integers,
- Zve64f: Zve32f plus Zve64x,
- Zve64d: Zve64f plus double precision floats.
- 6 different vector lengths:
- Zvl32b (embedded only),
- Zvl64b (embedded only),
- Zvl128b,
- Zvl256b,
- Zvl512b,
- Zvl1024b,
- and the V extension proper: equivalent to Zve64f and Zvl128b.
In total, there are 6 different possible sets of supported instructions
(including the empty set), but for convenience we allocate one bit for
each type sets: up-to-32-bit ints (RVV_I32), floats (RVV_F32),
64-bit ints (RVV_I64) and doubles (RVV_F64).
Whence the vector size is needed, it can be retrieved by reading the
unprivileged read-only vlenb CSR. This should probably be a separate
helper macro if needed at a later point.
2 years ago
|
|
|
ret |= AV_CPU_FLAG_RVV_I32;
|
|
|
|
#if __riscv_v_elen >= 64
|
|
|
|
ret |= AV_CPU_FLAG_RVV_I64;
|
|
|
|
#endif
|
|
|
|
#if __riscv_v_elen_fp >= 32
|
|
|
|
ret |= AV_CPU_FLAG_RVV_F32;
|
|
|
|
#if __riscv_v_elen_fp >= 64
|
|
|
|
ret |= AV_CPU_FLAG_RVV_F64;
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#ifdef __riscv_zvbb
|
|
|
|
ret |= AV_CPU_FLAG_RV_ZVBB;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|