mirror of https://github.com/FFmpeg/FFmpeg.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
243 lines
6.7 KiB
243 lines
6.7 KiB
/* |
|
* Copyright © 2022 Rémi Denis-Courmont. |
|
* Loosely based on earlier work copyrighted by Måns Rullgård, 2008. |
|
* |
|
* This file is part of FFmpeg. |
|
* |
|
* FFmpeg is free software; you can redistribute it and/or |
|
* modify it under the terms of the GNU Lesser General Public |
|
* License as published by the Free Software Foundation; either |
|
* version 2.1 of the License, or (at your option) any later version. |
|
* |
|
* FFmpeg is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
* Lesser General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU Lesser General Public |
|
* License along with FFmpeg; if not, write to the Free Software |
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
*/ |
|
|
|
#if defined (__riscv_float_abi_soft) |
|
#define NOHWF |
|
#define NOHWD |
|
#define HWF # |
|
#define HWD # |
|
#elif defined (__riscv_float_abi_single) |
|
#define NOHWF # |
|
#define NOHWD |
|
#define HWF |
|
#define HWD # |
|
#else |
|
#define NOHWF # |
|
#define NOHWD # |
|
#define HWF |
|
#define HWD |
|
#endif |
|
|
|
.macro func sym, ext1=, ext2= |
|
.text |
|
.align 2 |
|
|
|
.option push |
|
.ifnb \ext1 |
|
.option arch, +\ext1 |
|
.ifnb \ext2 |
|
.option arch, +\ext2 |
|
.endif |
|
.endif |
|
|
|
.global \sym |
|
.hidden \sym |
|
.type \sym, %function |
|
\sym: |
|
|
|
.macro endfunc |
|
.size \sym, . - \sym |
|
.option pop |
|
.previous |
|
.purgem endfunc |
|
.endm |
|
.endm |
|
|
|
.macro const sym, align=3, relocate=0 |
|
.if \relocate |
|
.pushsection .data.rel.ro |
|
.else |
|
.pushsection .rodata |
|
.endif |
|
.align \align |
|
\sym: |
|
|
|
.macro endconst |
|
.size \sym, . - \sym |
|
.popsection |
|
.purgem endconst |
|
.endm |
|
.endm |
|
|
|
#if !defined (__riscv_zba) |
|
/* SH{1,2,3}ADD definitions for pre-Zba assemblers */ |
|
.macro shnadd n, rd, rs1, rs2 |
|
.insn r OP, 2 * \n, 16, \rd, \rs1, \rs2 |
|
.endm |
|
|
|
.macro sh1add rd, rs1, rs2 |
|
shnadd 1, \rd, \rs1, \rs2 |
|
.endm |
|
|
|
.macro sh2add rd, rs1, rs2 |
|
shnadd 2, \rd, \rs1, \rs2 |
|
.endm |
|
|
|
.macro sh3add rd, rs1, rs2 |
|
shnadd 3, \rd, \rs1, \rs2 |
|
.endm |
|
#endif |
|
|
|
#if defined (__riscv_v_elen) |
|
# define RV_V_ELEN __riscv_v_elen |
|
#else |
|
/* Run-time detection of the V extension implies ELEN >= 64. */ |
|
# define RV_V_ELEN 64 |
|
#endif |
|
#if RV_V_ELEN == 32 |
|
# define VSEW_MAX 2 |
|
#else |
|
# define VSEW_MAX 3 |
|
#endif |
|
|
|
.macro parse_vtype ew, tp, mp |
|
.ifc \ew,e8 |
|
.equ vsew, 0 |
|
.else |
|
.ifc \ew,e16 |
|
.equ vsew, 1 |
|
.else |
|
.ifc \ew,e32 |
|
.equ vsew, 2 |
|
.else |
|
.ifc \ew,e64 |
|
.equ vsew, 3 |
|
.else |
|
.error "Unknown element width \ew" |
|
.endif |
|
.endif |
|
.endif |
|
.endif |
|
|
|
.ifc \tp,tu |
|
.equ tp, 0 |
|
.else |
|
.ifc \tp,ta |
|
.equ tp, 1 |
|
.else |
|
.error "Unknown tail policy \tp" |
|
.endif |
|
.endif |
|
|
|
.ifc \mp,mu |
|
.equ mp, 0 |
|
.else |
|
.ifc \mp,ma |
|
.equ mp, 1 |
|
.else |
|
.error "Unknown mask policy \mp" |
|
.endif |
|
.endif |
|
.endm |
|
|
|
/** |
|
* Gets the vector type with the smallest suitable LMUL value. |
|
* @param[out] rd vector type destination register |
|
* @param vl vector length constant |
|
* @param ew element width: e8, e16, e32 or e64 |
|
* @param tp tail policy: tu or ta |
|
* @param mp mask policty: mu or ma |
|
*/ |
|
.macro vtype_ivli rd, avl, ew, tp=tu, mp=mu |
|
.if \avl <= 1 |
|
.equ log2vl, 0 |
|
.elseif \avl <= 2 |
|
.equ log2vl, 1 |
|
.elseif \avl <= 4 |
|
.equ log2vl, 2 |
|
.elseif \avl <= 8 |
|
.equ log2vl, 3 |
|
.elseif \avl <= 16 |
|
.equ log2vl, 4 |
|
.elseif \avl <= 32 |
|
.equ log2vl, 5 |
|
.elseif \avl <= 64 |
|
.equ log2vl, 6 |
|
.elseif \avl <= 128 |
|
.equ log2vl, 7 |
|
.else |
|
.error "Vector length \avl out of range" |
|
.endif |
|
parse_vtype \ew, \tp, \mp |
|
csrr \rd, vlenb |
|
clz \rd, \rd |
|
addi \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen |
|
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX |
|
.if vsew < VSEW_MAX |
|
addi \rd, \rd, vsew - VSEW_MAX |
|
andi \rd, \rd, 7 |
|
.endif |
|
ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7) |
|
.endm |
|
|
|
/** |
|
* Gets the vector type with the smallest suitable LMUL value. |
|
* @param[out] rd vector type destination register |
|
* @param rs vector length source register |
|
* @param[out] tmp temporary register to be clobbered |
|
* @param ew element width: e8, e16, e32 or e64 |
|
* @param tp tail policy: tu or ta |
|
* @param mp mask policty: mu or ma |
|
* @param addend optional addend for the vector length register |
|
*/ |
|
.macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0 |
|
parse_vtype \ew, \tp, \mp |
|
/* |
|
* The difference between the CLZ's notionally equals the VLMUL value |
|
* for 4-bit elements. But we want the value for SEW_MAX-bit elements. |
|
*/ |
|
slli \tmp, \rs, 1 + VSEW_MAX |
|
.if \addend - 1 |
|
addi \tmp, \tmp, \addend - 1 |
|
.endif |
|
csrr \rd, vlenb |
|
clz \tmp, \tmp |
|
clz \rd, \rd |
|
sub \rd, \rd, \tmp |
|
max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX |
|
.if vsew < VSEW_MAX |
|
addi \rd, \rd, vsew - VSEW_MAX |
|
andi \rd, \rd, 7 |
|
.endif |
|
ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7) |
|
.endm |
|
|
|
/** |
|
* Widens a vector type. |
|
* @param[out] rd widened vector type destination register |
|
* @param rs vector type source register |
|
* @param n number of times to widen (once by default) |
|
*/ |
|
.macro vwtypei rd, rs, n=1 |
|
xori \rd, \rs, 4 |
|
addi \rd, \rd, (\n) * 011 |
|
xori \rd, \rd, 4 |
|
.endm |
|
|
|
/** |
|
* Narrows a vector type. |
|
* @param[out] rd narrowed vector type destination register |
|
* @param rs vector type source register |
|
* @param n number of times to narrow (once by default) |
|
*/ |
|
.macro vntypei rd, rs, n=1 |
|
vwtypei \rd, \rs, -(\n) |
|
.endm
|
|
|