From d3789eeeed3423bd1ca9dc40030a2f7a21ea5332 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sat, 5 Apr 2014 11:47:18 +0200 Subject: [PATCH] aarch64: implement videodsp.prefetch 8% faster h264 decoding on Apple A7. --- libavcodec/aarch64/Makefile | 3 +++ libavcodec/aarch64/videodsp.S | 28 ++++++++++++++++++++++++++ libavcodec/aarch64/videodsp_init.c | 32 ++++++++++++++++++++++++++++++ libavcodec/videodsp.c | 2 ++ libavcodec/videodsp.h | 1 + 5 files changed, 66 insertions(+) create mode 100644 libavcodec/aarch64/videodsp.S create mode 100644 libavcodec/aarch64/videodsp_init.c diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 59d1762269..757b499db0 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -3,10 +3,13 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o +OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o +ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o + NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \ aarch64/h264idct_neon.o diff --git a/libavcodec/aarch64/videodsp.S b/libavcodec/aarch64/videodsp.S new file mode 100644 index 0000000000..7ce5a7ddf6 --- /dev/null +++ b/libavcodec/aarch64/videodsp.S @@ -0,0 +1,28 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/aarch64/asm.S" + +function ff_prefetch_aarch64, export=1 + subs w2, w2, #2 + prfm pldl1strm, [x0] + prfm pldl1strm, [x0, x1] + add x0, x0, x1, lsl #1 + b.gt X(ff_prefetch_aarch64) + ret +endfunc diff --git a/libavcodec/aarch64/videodsp_init.c b/libavcodec/aarch64/videodsp_init.c new file mode 100644 index 0000000000..59b697d4f4 --- /dev/null +++ b/libavcodec/aarch64/videodsp_init.c @@ -0,0 +1,32 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/aarch64/cpu.h" +#include "libavcodec/videodsp.h" + +void ff_prefetch_aarch64(uint8_t *mem, ptrdiff_t stride, int h); + +av_cold void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_armv8(cpu_flags)) + ctx->prefetch = ff_prefetch_aarch64; +} diff --git a/libavcodec/videodsp.c b/libavcodec/videodsp.c index a6a1d3753a..e6d9303903 100644 --- a/libavcodec/videodsp.c +++ b/libavcodec/videodsp.c @@ -43,6 +43,8 @@ av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc) ctx->emulated_edge_mc = ff_emulated_edge_mc_16; } + if (ARCH_AARCH64) + ff_videodsp_init_aarch64(ctx, bpc); if (ARCH_ARM) ff_videodsp_init_arm(ctx, bpc); if (ARCH_PPC) diff --git a/libavcodec/videodsp.h b/libavcodec/videodsp.h index 2211c5dc22..04c012a826 100644 --- a/libavcodec/videodsp.h +++ b/libavcodec/videodsp.h @@ -68,6 +68,7 @@ typedef struct VideoDSPContext { void ff_videodsp_init(VideoDSPContext *ctx, int bpc); /* for internal use only (i.e. called by ff_videodsp_init() */ +void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc); void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc); void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc); void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc);