You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

240 lines
6.6 KiB

/*
* Copyright © 2022 Rémi Denis-Courmont.
* Loosely based on earlier work copyrighted by Måns Rullgård, 2008.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#if defined (__riscv_float_abi_soft)
#define NOHWF
#define NOHWD
#define HWF #
#define HWD #
#elif defined (__riscv_float_abi_single)
#define NOHWF #
#define NOHWD
#define HWF
#define HWD #
#else
#define NOHWF #
#define NOHWD #
#define HWF
#define HWD
#endif
.macro archadd ext=, more:vararg
.ifnb \ext
.ifc \ext, b
# B was defined later, is known to fewer assemblers.
archadd zba, zbb, zbs
.else
.option arch, +\ext
.endif
archadd \more
.endif
.endm
.macro func sym, exts:vararg
.text
.option push
archadd \exts
.global \sym
.hidden \sym
.type \sym, %function
.option push
.option norvc
.align 2
\sym:
.option pop
.macro endfunc
.size \sym, . - \sym
.option pop
.previous
.purgem endfunc
.endm
.endm
.macro const sym, align=3, relocate=0
.if \relocate
.pushsection .data.rel.ro
.else
.pushsection .rodata
.endif
.align \align
\sym:
.macro endconst
.size \sym, . - \sym
.popsection
.purgem endconst
.endm
.endm
#if !defined (__riscv_zicfilp)
.macro lpad lpl
auipc zero, \lpl
.endm
#endif
#if defined (__riscv_v_elen)
# define RV_V_ELEN __riscv_v_elen
#else
/* Run-time detection of the V extension implies ELEN >= 64. */
# define RV_V_ELEN 64
#endif
#if RV_V_ELEN == 32
# define VSEW_MAX 2
#else
# define VSEW_MAX 3
#endif
.macro parse_vtype ew, tp, mp
.ifc \ew,e8
.equ vsew, 0
.else
.ifc \ew,e16
.equ vsew, 1
.else
.ifc \ew,e32
.equ vsew, 2
.else
.ifc \ew,e64
.equ vsew, 3
.else
.error "Unknown element width \ew"
.endif
.endif
.endif
.endif
.ifc \tp,tu
.equ tp, 0
.else
.ifc \tp,ta
.equ tp, 1
.else
.error "Unknown tail policy \tp"
.endif
.endif
.ifc \mp,mu
.equ mp, 0
.else
.ifc \mp,ma
.equ mp, 1
.else
.error "Unknown mask policy \mp"
.endif
.endif
.endm
/**
* Gets the vector type with the smallest suitable LMUL value.
* @param[out] rd vector type destination register
* @param vl vector length constant
* @param ew element width: e8, e16, e32 or e64
* @param tp tail policy: tu or ta
* @param mp mask policty: mu or ma
*/
.macro vtype_ivli rd, avl, ew, tp=tu, mp=mu
.if \avl <= 1
.equ log2vl, 0
.elseif \avl <= 2
.equ log2vl, 1
.elseif \avl <= 4
.equ log2vl, 2
.elseif \avl <= 8
.equ log2vl, 3
.elseif \avl <= 16
.equ log2vl, 4
.elseif \avl <= 32
.equ log2vl, 5
.elseif \avl <= 64
.equ log2vl, 6
.elseif \avl <= 128
.equ log2vl, 7
.else
.error "Vector length \avl out of range"
.endif
parse_vtype \ew, \tp, \mp
csrr \rd, vlenb
clz \rd, \rd
addi \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
.if vsew < VSEW_MAX
addi \rd, \rd, vsew - VSEW_MAX
andi \rd, \rd, 7
.endif
ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
.endm
/**
* Gets the vector type with the smallest suitable LMUL value.
* @param[out] rd vector type destination register
* @param rs vector length source register
* @param[out] tmp temporary register to be clobbered
* @param ew element width: e8, e16, e32 or e64
* @param tp tail policy: tu or ta
* @param mp mask policty: mu or ma
* @param addend optional addend for the vector length register
*/
.macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0
parse_vtype \ew, \tp, \mp
/*
* The difference between the CLZ's notionally equals the VLMUL value
* for 4-bit elements. But we want the value for SEW_MAX-bit elements.
*/
slli \tmp, \rs, 1 + VSEW_MAX
.if \addend - 1
addi \tmp, \tmp, \addend - 1
.endif
csrr \rd, vlenb
clz \tmp, \tmp
clz \rd, \rd
sub \rd, \rd, \tmp
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
.if vsew < VSEW_MAX
addi \rd, \rd, vsew - VSEW_MAX
andi \rd, \rd, 7
.endif
ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
.endm
/**
* Widens a vector type.
* @param[out] rd widened vector type destination register
* @param rs vector type source register
* @param n number of times to widen (once by default)
*/
.macro vwtypei rd, rs, n=1
xori \rd, \rs, 4
addi \rd, \rd, (\n) * 011
xori \rd, \rd, 4
.endm
/**
* Narrows a vector type.
* @param[out] rd narrowed vector type destination register
* @param rs vector type source register
* @param n number of times to narrow (once by default)
*/
.macro vntypei rd, rs, n=1
vwtypei \rd, \rs, -(\n)
.endm