mirror of https://github.com/FFmpeg/FFmpeg.git
The main loop processes 8 bytes in 5 instructions. For comparison, the optimal plain strnlen() requires 4 instructions per byte (6.4x worse): LBU; ADDI; BEQZ; BNE. The current libavcodec C code involves 5 instructions per byte (8x worse). Actual benchmarks may be slightly less favourable due to latency from ORC.B to BNE.release/7.1
parent
8b8b555de0
commit
4ad5b9c8db
6 changed files with 134 additions and 2 deletions
@ -0,0 +1,40 @@ |
||||
/*
|
||||
* Copyright © 2024 Rémi Denis-Courmont. |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavcodec/h264dsp.h" |
||||
|
||||
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int); |
||||
|
||||
av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, |
||||
const int chroma_format_idc) |
||||
{ |
||||
#if HAVE_RV |
||||
int flags = av_get_cpu_flags(); |
||||
|
||||
if (flags & AV_CPU_FLAG_RVB_BASIC) |
||||
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvb; |
||||
#endif |
||||
} |
@ -0,0 +1,83 @@ |
||||
/* |
||||
* Copyright © 2024 Rémi Denis-Courmont. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions are met: |
||||
* |
||||
* 1. Redistributions of source code must retain the above copyright notice, |
||||
* this list of conditions and the following disclaimer. |
||||
* |
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, |
||||
* this list of conditions and the following disclaimer in the documentation |
||||
* and/or other materials provided with the distribution. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
#include "libavutil/riscv/asm.S" |
||||
|
||||
.macro lx rd, addr |
||||
#if (__riscv_xlen == 32) |
||||
lw \rd, \addr |
||||
#elif (__riscv_xlen == 64) |
||||
ld \rd, \addr |
||||
#else |
||||
lq \rd, \addr |
||||
#endif |
||||
.endm |
||||
|
||||
func ff_startcode_find_candidate_rvb, zbb |
||||
add a1, a0, a1 |
||||
|
||||
// Potentially unaligned head |
||||
andi t0, a0, -(__riscv_xlen / 8) |
||||
beq a0, a1, 2f |
||||
|
||||
andi t1, a0, (__riscv_xlen / 8) - 1 |
||||
lx t2, (t0) |
||||
li t3, __riscv_xlen |
||||
orc.b t2, t2 |
||||
slli t1, t1, 3 |
||||
not t2, t2 |
||||
sub t3, t3, t1 |
||||
srl t2, t2, t1 |
||||
addi t0, t0, __riscv_xlen / 8 |
||||
sll t2, t2, t1 |
||||
bnez t2, 4f |
||||
|
||||
// Main loop (including potentially short tail) |
||||
bge t0, a1, 2f |
||||
li t3, -1 |
||||
1: |
||||
lx t2, (t0) |
||||
addi t0, t0, __riscv_xlen / 8 |
||||
orc.b t2, t2 |
||||
bne t2, t3, 3f // t2 != -1 iff (at least one) zero byte |
||||
blt t0, a1, 1b |
||||
|
||||
2: // No zero byte found |
||||
sub a0, a1, a0 |
||||
ret |
||||
|
||||
3: // Zero byte found in main loop |
||||
not t2, t2 |
||||
4: // Zero byte found in head |
||||
ctz t2, t2 |
||||
addi t0, t0, -(__riscv_xlen / 8) // back-track |
||||
srl t2, t2, 3 |
||||
add t0, t0, t2 |
||||
// Uncomment the following line for exact POSIX C strnlen() semantics. |
||||
//minu t0, t0, a1 // ignore zero byte in tail |
||||
sub a0, t0, a0 |
||||
ret |
||||
endfunc |
Loading…
Reference in new issue