mirror of https://github.com/FFmpeg/FFmpeg.git
parent
8438b3f09f
commit
d5dd8c7bf0
6 changed files with 1175 additions and 0 deletions
@ -1,7 +1,9 @@ |
||||
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
|
||||
OBJS-$(CONFIG_RV40_DECODER) += aarch64/rv40dsp_init_aarch64.o
|
||||
OBJS-$(CONFIG_VC1_DECODER) += aarch64/vc1dsp_init_aarch64.o
|
||||
|
||||
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
|
||||
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264idct_neon.o
|
||||
NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o
|
||||
|
@ -0,0 +1,172 @@ |
||||
/*
|
||||
* ARM NEON optimised DSP functions |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "config.h" |
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/aarch64/cpu.h" |
||||
#include "libavcodec/h264qpel.h" |
||||
|
||||
void ff_put_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel16_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
|
||||
void ff_put_h264_qpel8_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_put_h264_qpel8_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
|
||||
void ff_avg_h264_qpel16_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel16_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
|
||||
void ff_avg_h264_qpel8_mc00_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc10_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc20_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc30_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc01_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc11_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc21_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc31_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc02_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc12_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc22_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc32_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc03_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc13_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc23_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
void ff_avg_h264_qpel8_mc33_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride); |
||||
|
||||
av_cold void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth) |
||||
{ |
||||
const int high_bit_depth = bit_depth > 8; |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_neon(cpu_flags) && !high_bit_depth) { |
||||
/* c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon; */ |
||||
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon; |
||||
c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon; |
||||
c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon; |
||||
c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon; |
||||
c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon; |
||||
c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon; |
||||
c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon; |
||||
c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon; |
||||
|
||||
/* c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon; */ |
||||
c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon; |
||||
c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon; |
||||
c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon; |
||||
c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon; |
||||
c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon; |
||||
c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon; |
||||
c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon; |
||||
c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon; |
||||
|
||||
/* c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon; */ |
||||
c->avg_h264_qpel_pixels_tab[0][ 1] = ff_avg_h264_qpel16_mc10_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 2] = ff_avg_h264_qpel16_mc20_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 3] = ff_avg_h264_qpel16_mc30_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 4] = ff_avg_h264_qpel16_mc01_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 5] = ff_avg_h264_qpel16_mc11_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 6] = ff_avg_h264_qpel16_mc21_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 7] = ff_avg_h264_qpel16_mc31_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 8] = ff_avg_h264_qpel16_mc02_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][ 9] = ff_avg_h264_qpel16_mc12_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_neon; |
||||
c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_neon; |
||||
|
||||
/* c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon; */ |
||||
c->avg_h264_qpel_pixels_tab[1][ 1] = ff_avg_h264_qpel8_mc10_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 2] = ff_avg_h264_qpel8_mc20_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 3] = ff_avg_h264_qpel8_mc30_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 4] = ff_avg_h264_qpel8_mc01_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 5] = ff_avg_h264_qpel8_mc11_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 6] = ff_avg_h264_qpel8_mc21_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 7] = ff_avg_h264_qpel8_mc31_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 8] = ff_avg_h264_qpel8_mc02_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][ 9] = ff_avg_h264_qpel8_mc12_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon; |
||||
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; |
||||
} |
||||
} |
@ -0,0 +1,934 @@ |
||||
/* |
||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||
* Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
|
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/aarch64/asm.S" |
||||
#include "neon.S" |
||||
|
||||
/* H.264 qpel MC */ |
||||
|
||||
.macro lowpass_const r |
||||
movz \r, #20, lsl #16 |
||||
movk \r, #5 |
||||
mov v6.S[0], \r |
||||
.endm |
||||
|
||||
//trashes v0-v5 |
||||
.macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1 |
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2 |
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3 |
||||
uaddl v2.8H, v2.8B, v3.8B |
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1 |
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4 |
||||
uaddl v4.8H, v4.8B, v5.8B |
||||
ext v1.8B, \r0\().8B, \r1\().8B, #5 |
||||
uaddl \d0\().8H, \r0\().8B, v1.8B |
||||
ext v0.8B, \r2\().8B, \r3\().8B, #2 |
||||
mla \d0\().8H, v2.8H, v6.H[1] |
||||
ext v1.8B, \r2\().8B, \r3\().8B, #3 |
||||
uaddl v0.8H, v0.8B, v1.8B |
||||
ext v1.8B, \r2\().8B, \r3\().8B, #1 |
||||
mls \d0\().8H, v4.8H, v6.H[0] |
||||
ext v3.8B, \r2\().8B, \r3\().8B, #4 |
||||
uaddl v1.8H, v1.8B, v3.8B |
||||
ext v2.8B, \r2\().8B, \r3\().8B, #5 |
||||
uaddl \d1\().8H, \r2\().8B, v2.8B |
||||
mla \d1\().8H, v0.8H, v6.H[1] |
||||
mls \d1\().8H, v1.8H, v6.H[0] |
||||
.if \narrow |
||||
sqrshrun \d0\().8B, \d0\().8H, #5 |
||||
sqrshrun \d1\().8B, \d1\().8H, #5 |
||||
.endif |
||||
.endm |
||||
|
||||
//trashes v0-v5, v7, v30-v31 |
||||
.macro lowpass_8H r0, r1 |
||||
ext v0.16B, \r0\().16B, \r0\().16B, #2 |
||||
ext v1.16B, \r0\().16B, \r0\().16B, #3 |
||||
uaddl v0.8H, v0.8B, v1.8B |
||||
ext v2.16B, \r0\().16B, \r0\().16B, #1 |
||||
ext v3.16B, \r0\().16B, \r0\().16B, #4 |
||||
uaddl v2.8H, v2.8B, v3.8B |
||||
ext v30.16B, \r0\().16B, \r0\().16B, #5 |
||||
uaddl \r0\().8H, \r0\().8B, v30.8B |
||||
ext v4.16B, \r1\().16B, \r1\().16B, #2 |
||||
mla \r0\().8H, v0.8H, v6.H[1] |
||||
ext v5.16B, \r1\().16B, \r1\().16B, #3 |
||||
uaddl v4.8H, v4.8B, v5.8B |
||||
ext v7.16B, \r1\().16B, \r1\().16B, #1 |
||||
mls \r0\().8H, v2.8H, v6.H[0] |
||||
ext v0.16B, \r1\().16B, \r1\().16B, #4 |
||||
uaddl v7.8H, v7.8B, v0.8B |
||||
ext v31.16B, \r1\().16B, \r1\().16B, #5 |
||||
uaddl \r1\().8H, \r1\().8B, v31.8B |
||||
mla \r1\().8H, v4.8H, v6.H[1] |
||||
mls \r1\().8H, v7.8H, v6.H[0] |
||||
.endm |
||||
|
||||
// trashes v2-v5, v30 |
||||
.macro lowpass_8_1 r0, r1, d0, narrow=1 |
||||
ext v2.8B, \r0\().8B, \r1\().8B, #2 |
||||
ext v3.8B, \r0\().8B, \r1\().8B, #3 |
||||
uaddl v2.8H, v2.8B, v3.8B |
||||
ext v4.8B, \r0\().8B, \r1\().8B, #1 |
||||
ext v5.8B, \r0\().8B, \r1\().8B, #4 |
||||
uaddl v4.8H, v4.8B, v5.8B |
||||
ext v30.8B, \r0\().8B, \r1\().8B, #5 |
||||
uaddl \d0\().8H, \r0\().8B, v30.8B |
||||
mla \d0\().8H, v2.8H, v6.H[1] |
||||
mls \d0\().8H, v4.8H, v6.H[0] |
||||
.if \narrow |
||||
sqrshrun \d0\().8B, \d0\().8H, #5 |
||||
.endif |
||||
.endm |
||||
|
||||
// trashed v0-v7 |
||||
.macro lowpass_8.16 r0, r1, r2 |
||||
ext v1.16B, \r0\().16B, \r1\().16B, #4 |
||||
ext v0.16B, \r0\().16B, \r1\().16B, #6 |
||||
saddl v5.4S, v1.4H, v0.4H |
||||
ext v2.16B, \r0\().16B, \r1\().16B, #2 |
||||
saddl2 v1.4S, v1.8H, v0.8H |
||||
ext v3.16B, \r0\().16B, \r1\().16B, #8 |
||||
saddl v6.4S, v2.4H, v3.4H |
||||
ext \r1\().16B, \r0\().16B, \r1\().16B, #10 |
||||
saddl2 v2.4S, v2.8H, v3.8H |
||||
saddl v0.4S, \r0\().4H, \r1\().4H |
||||
saddl2 v4.4S, \r0\().8H, \r1\().8H |
||||
|
||||
shl v3.4S, v5.4S, #4 |
||||
shl v5.4S, v5.4S, #2 |
||||
shl v7.4S, v6.4S, #2 |
||||
add v5.4S, v5.4S, v3.4S |
||||
add v6.4S, v6.4S, v7.4S |
||||
|
||||
shl v3.4S, v1.4S, #4 |
||||
shl v1.4S, v1.4S, #2 |
||||
shl v7.4S, v2.4S, #2 |
||||
add v1.4S, v1.4S, v3.4S |
||||
add v2.4S, v2.4S, v7.4S |
||||
|
||||
add v5.4S, v5.4S, v0.4S |
||||
sub v5.4S, v5.4S, v6.4S |
||||
|
||||
add v1.4S, v1.4S, v4.4S |
||||
sub v1.4S, v1.4S, v2.4S |
||||
|
||||
rshrn v5.4H, v5.4S, #10 |
||||
rshrn2 v5.8H, v1.4S, #10 |
||||
|
||||
sqxtun \r2\().8B, v5.8H |
||||
.endm |
||||
|
||||
function put_h264_qpel16_h_lowpass_neon_packed |
||||
mov x4, x30 |
||||
mov x12, #16 |
||||
mov x3, #8 |
||||
bl put_h264_qpel8_h_lowpass_neon |
||||
sub x1, x1, x2, lsl #4 |
||||
add x1, x1, #8 |
||||
mov x12, #16 |
||||
mov x30, x4 |
||||
b put_h264_qpel8_h_lowpass_neon |
||||
endfunc |
||||
|
||||
.macro h264_qpel_h_lowpass type |
||||
function \type\()_h264_qpel16_h_lowpass_neon |
||||
mov x13, x30 |
||||
mov x12, #16 |
||||
bl \type\()_h264_qpel8_h_lowpass_neon |
||||
sub x0, x0, x3, lsl #4 |
||||
sub x1, x1, x2, lsl #4 |
||||
add x0, x0, #8 |
||||
add x1, x1, #8 |
||||
mov x12, #16 |
||||
mov x30, x13 |
||||
endfunc |
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_neon |
||||
1: ld1 {v28.8B, v29.8B}, [x1], x2 |
||||
ld1 {v16.8B, v17.8B}, [x1], x2 |
||||
subs x12, x12, #2 |
||||
lowpass_8 v28, v29, v16, v17, v28, v16 |
||||
.ifc \type,avg |
||||
ld1 {v2.8B}, [x0], x3 |
||||
urhadd v28.8B, v28.8B, v2.8B |
||||
ld1 {v3.8B}, [x0] |
||||
urhadd v16.8B, v16.8B, v3.8B |
||||
sub x0, x0, x3 |
||||
.endif |
||||
st1 {v28.8B}, [x0], x3 |
||||
st1 {v16.8B}, [x0], x3 |
||||
b.ne 1b |
||||
ret |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel_h_lowpass put |
||||
h264_qpel_h_lowpass avg |
||||
|
||||
.macro h264_qpel_h_lowpass_l2 type |
||||
function \type\()_h264_qpel16_h_lowpass_l2_neon |
||||
mov x13, x30 |
||||
mov x12, #16 |
||||
bl \type\()_h264_qpel8_h_lowpass_l2_neon |
||||
sub x0, x0, x2, lsl #4 |
||||
sub x1, x1, x2, lsl #4 |
||||
sub x3, x3, x2, lsl #4 |
||||
add x0, x0, #8 |
||||
add x1, x1, #8 |
||||
add x3, x3, #8 |
||||
mov x12, #16 |
||||
mov x30, x13 |
||||
endfunc |
||||
|
||||
function \type\()_h264_qpel8_h_lowpass_l2_neon |
||||
1: ld1 {v26.8B, v27.8B}, [x1], x2 |
||||
ld1 {v16.8B, v17.8B}, [x1], x2 |
||||
ld1 {v28.8B}, [x3], x2 |
||||
ld1 {v29.8B}, [x3], x2 |
||||
subs x12, x12, #2 |
||||
lowpass_8 v26, v27, v16, v17, v26, v27 |
||||
urhadd v26.8B, v26.8B, v28.8B |
||||
urhadd v27.8B, v27.8B, v29.8B |
||||
.ifc \type,avg |
||||
ld1 {v2.8B}, [x0], x2 |
||||
urhadd v26.8B, v26.8B, v2.8B |
||||
ld1 {v3.8B}, [x0] |
||||
urhadd v27.8B, v27.8B, v3.8B |
||||
sub x0, x0, x2 |
||||
.endif |
||||
st1 {v26.8B}, [x0], x2 |
||||
st1 {v27.8B}, [x0], x2 |
||||
b.ne 1b |
||||
ret |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel_h_lowpass_l2 put |
||||
h264_qpel_h_lowpass_l2 avg |
||||
|
||||
function put_h264_qpel16_v_lowpass_neon_packed |
||||
mov x4, x30 |
||||
mov x2, #8 |
||||
bl put_h264_qpel8_v_lowpass_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
bl put_h264_qpel8_v_lowpass_neon |
||||
sub x1, x1, x3, lsl #4 |
||||
sub x1, x1, x3, lsl #2 |
||||
add x1, x1, #8 |
||||
bl put_h264_qpel8_v_lowpass_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
mov x30, x4 |
||||
b put_h264_qpel8_v_lowpass_neon |
||||
endfunc |
||||
|
||||
.macro h264_qpel_v_lowpass type |
||||
function \type\()_h264_qpel16_v_lowpass_neon |
||||
mov x4, x30 |
||||
bl \type\()_h264_qpel8_v_lowpass_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
bl \type\()_h264_qpel8_v_lowpass_neon |
||||
sub x0, x0, x2, lsl #4 |
||||
add x0, x0, #8 |
||||
sub x1, x1, x3, lsl #4 |
||||
sub x1, x1, x3, lsl #2 |
||||
add x1, x1, #8 |
||||
bl \type\()_h264_qpel8_v_lowpass_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
mov x30, x4 |
||||
endfunc |
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_neon |
||||
ld1 {v16.8B}, [x1], x3 |
||||
ld1 {v18.8B}, [x1], x3 |
||||
ld1 {v20.8B}, [x1], x3 |
||||
ld1 {v22.8B}, [x1], x3 |
||||
ld1 {v24.8B}, [x1], x3 |
||||
ld1 {v26.8B}, [x1], x3 |
||||
ld1 {v28.8B}, [x1], x3 |
||||
ld1 {v30.8B}, [x1], x3 |
||||
ld1 {v17.8B}, [x1], x3 |
||||
ld1 {v19.8B}, [x1], x3 |
||||
ld1 {v21.8B}, [x1], x3 |
||||
ld1 {v23.8B}, [x1], x3 |
||||
ld1 {v25.8B}, [x1] |
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1 |
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1 |
||||
lowpass_8 v16, v17, v18, v19, v16, v17 |
||||
lowpass_8 v20, v21, v22, v23, v18, v19 |
||||
lowpass_8 v24, v25, v26, v27, v20, v21 |
||||
lowpass_8 v28, v29, v30, v31, v22, v23 |
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1 |
||||
|
||||
.ifc \type,avg |
||||
ld1 {v24.8B}, [x0], x2 |
||||
urhadd v16.8B, v16.8B, v24.8B |
||||
ld1 {v25.8B}, [x0], x2 |
||||
urhadd v17.8B, v17.8B, v25.8B |
||||
ld1 {v26.8B}, [x0], x2 |
||||
urhadd v18.8B, v18.8B, v26.8B |
||||
ld1 {v27.8B}, [x0], x2 |
||||
urhadd v19.8B, v19.8B, v27.8B |
||||
ld1 {v28.8B}, [x0], x2 |
||||
urhadd v20.8B, v20.8B, v28.8B |
||||
ld1 {v29.8B}, [x0], x2 |
||||
urhadd v21.8B, v21.8B, v29.8B |
||||
ld1 {v30.8B}, [x0], x2 |
||||
urhadd v22.8B, v22.8B, v30.8B |
||||
ld1 {v31.8B}, [x0], x2 |
||||
urhadd v23.8B, v23.8B, v31.8B |
||||
sub x0, x0, x2, lsl #3 |
||||
.endif |
||||
|
||||
st1 {v16.8B}, [x0], x2 |
||||
st1 {v17.8B}, [x0], x2 |
||||
st1 {v18.8B}, [x0], x2 |
||||
st1 {v19.8B}, [x0], x2 |
||||
st1 {v20.8B}, [x0], x2 |
||||
st1 {v21.8B}, [x0], x2 |
||||
st1 {v22.8B}, [x0], x2 |
||||
st1 {v23.8B}, [x0], x2 |
||||
|
||||
ret |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel_v_lowpass put |
||||
h264_qpel_v_lowpass avg |
||||
|
||||
.macro h264_qpel_v_lowpass_l2 type |
||||
function \type\()_h264_qpel16_v_lowpass_l2_neon |
||||
mov x4, x30 |
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon |
||||
sub x0, x0, x3, lsl #4 |
||||
sub x12, x12, x2, lsl #4 |
||||
add x0, x0, #8 |
||||
add x12, x12, #8 |
||||
sub x1, x1, x3, lsl #4 |
||||
sub x1, x1, x3, lsl #2 |
||||
add x1, x1, #8 |
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
mov x30, x4 |
||||
endfunc |
||||
|
||||
function \type\()_h264_qpel8_v_lowpass_l2_neon |
||||
ld1 {v16.8B}, [x1], x3 |
||||
ld1 {v18.8B}, [x1], x3 |
||||
ld1 {v20.8B}, [x1], x3 |
||||
ld1 {v22.8B}, [x1], x3 |
||||
ld1 {v24.8B}, [x1], x3 |
||||
ld1 {v26.8B}, [x1], x3 |
||||
ld1 {v28.8B}, [x1], x3 |
||||
ld1 {v30.8B}, [x1], x3 |
||||
ld1 {v17.8B}, [x1], x3 |
||||
ld1 {v19.8B}, [x1], x3 |
||||
ld1 {v21.8B}, [x1], x3 |
||||
ld1 {v23.8B}, [x1], x3 |
||||
ld1 {v25.8B}, [x1] |
||||
|
||||
transpose_8x8B v16, v18, v20, v22, v24, v26, v28, v30, v0, v1 |
||||
transpose_8x8B v17, v19, v21, v23, v25, v27, v29, v31, v0, v1 |
||||
lowpass_8 v16, v17, v18, v19, v16, v17 |
||||
lowpass_8 v20, v21, v22, v23, v18, v19 |
||||
lowpass_8 v24, v25, v26, v27, v20, v21 |
||||
lowpass_8 v28, v29, v30, v31, v22, v23 |
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1 |
||||
|
||||
ld1 {v24.8B}, [x12], x2 |
||||
ld1 {v25.8B}, [x12], x2 |
||||
ld1 {v26.8B}, [x12], x2 |
||||
ld1 {v27.8B}, [x12], x2 |
||||
ld1 {v28.8B}, [x12], x2 |
||||
urhadd v16.8B, v24.8B, v16.8B |
||||
urhadd v17.8B, v25.8B, v17.8B |
||||
ld1 {v29.8B}, [x12], x2 |
||||
urhadd v18.8B, v26.8B, v18.8B |
||||
urhadd v19.8B, v27.8B, v19.8B |
||||
ld1 {v30.8B}, [x12], x2 |
||||
urhadd v20.8B, v28.8B, v20.8B |
||||
urhadd v21.8B, v29.8B, v21.8B |
||||
ld1 {v31.8B}, [x12], x2 |
||||
urhadd v22.8B, v30.8B, v22.8B |
||||
urhadd v23.8B, v31.8B, v23.8B |
||||
|
||||
.ifc \type,avg |
||||
ld1 {v24.8B}, [x0], x3 |
||||
urhadd v16.8B, v16.8B, v24.8B |
||||
ld1 {v25.8B}, [x0], x3 |
||||
urhadd v17.8B, v17.8B, v25.8B |
||||
ld1 {v26.8B}, [x0], x3 |
||||
urhadd v18.8B, v18.8B, v26.8B |
||||
ld1 {v27.8B}, [x0], x3 |
||||
urhadd v19.8B, v19.8B, v27.8B |
||||
ld1 {v28.8B}, [x0], x3 |
||||
urhadd v20.8B, v20.8B, v28.8B |
||||
ld1 {v29.8B}, [x0], x3 |
||||
urhadd v21.8B, v21.8B, v29.8B |
||||
ld1 {v30.8B}, [x0], x3 |
||||
urhadd v22.8B, v22.8B, v30.8B |
||||
ld1 {v31.8B}, [x0], x3 |
||||
urhadd v23.8B, v23.8B, v31.8B |
||||
sub x0, x0, x3, lsl #3 |
||||
.endif |
||||
|
||||
st1 {v16.8B}, [x0], x3 |
||||
st1 {v17.8B}, [x0], x3 |
||||
st1 {v18.8B}, [x0], x3 |
||||
st1 {v19.8B}, [x0], x3 |
||||
st1 {v20.8B}, [x0], x3 |
||||
st1 {v21.8B}, [x0], x3 |
||||
st1 {v22.8B}, [x0], x3 |
||||
st1 {v23.8B}, [x0], x3 |
||||
|
||||
ret |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel_v_lowpass_l2 put |
||||
h264_qpel_v_lowpass_l2 avg |
||||
|
||||
function put_h264_qpel8_hv_lowpass_neon_top |
||||
lowpass_const w12 |
||||
ld1 {v16.8H}, [x1], x3 |
||||
ld1 {v17.8H}, [x1], x3 |
||||
ld1 {v18.8H}, [x1], x3 |
||||
ld1 {v19.8H}, [x1], x3 |
||||
ld1 {v20.8H}, [x1], x3 |
||||
ld1 {v21.8H}, [x1], x3 |
||||
ld1 {v22.8H}, [x1], x3 |
||||
ld1 {v23.8H}, [x1], x3 |
||||
ld1 {v24.8H}, [x1], x3 |
||||
ld1 {v25.8H}, [x1], x3 |
||||
ld1 {v26.8H}, [x1], x3 |
||||
ld1 {v27.8H}, [x1], x3 |
||||
ld1 {v28.8H}, [x1] |
||||
lowpass_8H v16, v17 |
||||
lowpass_8H v18, v19 |
||||
lowpass_8H v20, v21 |
||||
lowpass_8H v22, v23 |
||||
lowpass_8H v24, v25 |
||||
lowpass_8H v26, v27 |
||||
lowpass_8H v28, v29 |
||||
|
||||
transpose_8x8H v16, v17, v18, v19, v20, v21, v22, v23, v0, v1 |
||||
transpose_8x8H v24, v25, v26, v27, v28, v29, v30, v31, v0, v1 |
||||
|
||||
lowpass_8.16 v16, v24, v16 |
||||
lowpass_8.16 v17, v25, v17 |
||||
|
||||
lowpass_8.16 v18, v26, v18 |
||||
lowpass_8.16 v19, v27, v19 |
||||
|
||||
lowpass_8.16 v20, v28, v20 |
||||
lowpass_8.16 v21, v29, v21 |
||||
|
||||
lowpass_8.16 v22, v30, v22 |
||||
lowpass_8.16 v23, v31, v23 |
||||
|
||||
transpose_8x8B v16, v17, v18, v19, v20, v21, v22, v23, v0, v1 |
||||
|
||||
ret |
||||
endfunc |
||||
|
||||
.macro h264_qpel8_hv_lowpass type |
||||
function \type\()_h264_qpel8_hv_lowpass_neon |
||||
mov x10, x30 |
||||
bl put_h264_qpel8_hv_lowpass_neon_top |
||||
.ifc \type,avg |
||||
ld1 {v0.8B}, [x0], x2 |
||||
urhadd v16.8B, v16.8B, v0.8B |
||||
ld1 {v1.8B}, [x0], x2 |
||||
urhadd v17.8B, v17.8B, v1.8B |
||||
ld1 {v2.8B}, [x0], x2 |
||||
urhadd v18.8B, v18.8B, v2.8B |
||||
ld1 {v3.8B}, [x0], x2 |
||||
urhadd v19.8B, v19.8B, v3.8B |
||||
ld1 {v4.8B}, [x0], x2 |
||||
urhadd v20.8B, v20.8B, v4.8B |
||||
ld1 {v5.8B}, [x0], x2 |
||||
urhadd v21.8B, v21.8B, v5.8B |
||||
ld1 {v6.8B}, [x0], x2 |
||||
urhadd v22.8B, v22.8B, v6.8B |
||||
ld1 {v7.8B}, [x0], x2 |
||||
urhadd v23.8B, v23.8B, v7.8B |
||||
sub x0, x0, x2, lsl #3 |
||||
.endif |
||||
|
||||
st1 {v16.8B}, [x0], x2 |
||||
st1 {v17.8B}, [x0], x2 |
||||
st1 {v18.8B}, [x0], x2 |
||||
st1 {v19.8B}, [x0], x2 |
||||
st1 {v20.8B}, [x0], x2 |
||||
st1 {v21.8B}, [x0], x2 |
||||
st1 {v22.8B}, [x0], x2 |
||||
st1 {v23.8B}, [x0], x2 |
||||
|
||||
ret x10 |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel8_hv_lowpass put |
||||
h264_qpel8_hv_lowpass avg |
||||
|
||||
.macro h264_qpel8_hv_lowpass_l2 type |
||||
function \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
mov x10, x30 |
||||
bl put_h264_qpel8_hv_lowpass_neon_top |
||||
|
||||
ld1 {v0.8B, v1.8B}, [x2], #16 |
||||
ld1 {v2.8B, v3.8B}, [x2], #16 |
||||
urhadd v0.8B, v0.8B, v16.8B |
||||
urhadd v1.8B, v1.8B, v17.8B |
||||
ld1 {v4.8B, v5.8B}, [x2], #16 |
||||
urhadd v2.8B, v2.8B, v18.8B |
||||
urhadd v3.8B, v3.8B, v19.8B |
||||
ld1 {v6.8B, v7.8B}, [x2], #16 |
||||
urhadd v4.8B, v4.8B, v20.8B |
||||
urhadd v5.8B, v5.8B, v21.8B |
||||
urhadd v6.8B, v6.8B, v22.8B |
||||
urhadd v7.8B, v7.8B, v23.8B |
||||
.ifc \type,avg |
||||
ld1 {v16.8B}, [x0], x3 |
||||
urhadd v0.8B, v0.8B, v16.8B |
||||
ld1 {v17.8B}, [x0], x3 |
||||
urhadd v1.8B, v1.8B, v17.8B |
||||
ld1 {v18.8B}, [x0], x3 |
||||
urhadd v2.8B, v2.8B, v18.8B |
||||
ld1 {v19.8B}, [x0], x3 |
||||
urhadd v3.8B, v3.8B, v19.8B |
||||
ld1 {v20.8B}, [x0], x3 |
||||
urhadd v4.8B, v4.8B, v20.8B |
||||
ld1 {v21.8B}, [x0], x3 |
||||
urhadd v5.8B, v5.8B, v21.8B |
||||
ld1 {v22.8B}, [x0], x3 |
||||
urhadd v6.8B, v6.8B, v22.8B |
||||
ld1 {v23.8B}, [x0], x3 |
||||
urhadd v7.8B, v7.8B, v23.8B |
||||
sub x0, x0, x3, lsl #3 |
||||
.endif |
||||
st1 {v0.8B}, [x0], x3 |
||||
st1 {v1.8B}, [x0], x3 |
||||
st1 {v2.8B}, [x0], x3 |
||||
st1 {v3.8B}, [x0], x3 |
||||
st1 {v4.8B}, [x0], x3 |
||||
st1 {v5.8B}, [x0], x3 |
||||
st1 {v6.8B}, [x0], x3 |
||||
st1 {v7.8B}, [x0], x3 |
||||
|
||||
ret x10 |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel8_hv_lowpass_l2 put |
||||
h264_qpel8_hv_lowpass_l2 avg |
||||
|
||||
.macro h264_qpel16_hv type |
||||
function \type\()_h264_qpel16_hv_lowpass_neon |
||||
mov x13, x30 |
||||
bl \type\()_h264_qpel8_hv_lowpass_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
bl \type\()_h264_qpel8_hv_lowpass_neon |
||||
sub x1, x1, x3, lsl #4 |
||||
sub x1, x1, x3, lsl #2 |
||||
add x1, x1, #8 |
||||
sub x0, x0, x2, lsl #4 |
||||
add x0, x0, #8 |
||||
bl \type\()_h264_qpel8_hv_lowpass_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
mov x30, x13 |
||||
b \type\()_h264_qpel8_hv_lowpass_neon |
||||
endfunc |
||||
|
||||
function \type\()_h264_qpel16_hv_lowpass_l2_neon |
||||
mov x13, x30 |
||||
sub x2, x4, #256 |
||||
bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
sub x1, x1, x3, lsl #4 |
||||
sub x1, x1, x3, lsl #2 |
||||
add x1, x1, #8 |
||||
sub x0, x0, x3, lsl #4 |
||||
add x0, x0, #8 |
||||
bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
sub x1, x1, x3, lsl #2 |
||||
mov x30, x13 |
||||
b \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel16_hv put |
||||
h264_qpel16_hv avg |
||||
|
||||
.macro h264_qpel8 type |
||||
function ff_\type\()_h264_qpel8_mc10_neon, export=1 |
||||
lowpass_const w3 |
||||
mov x3, x1 |
||||
sub x1, x1, #2 |
||||
mov x12, #8 |
||||
b \type\()_h264_qpel8_h_lowpass_l2_neon |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc20_neon, export=1 |
||||
lowpass_const w3 |
||||
sub x1, x1, #2 |
||||
mov x3, x2 |
||||
mov x12, #8 |
||||
b \type\()_h264_qpel8_h_lowpass_neon |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc30_neon, export=1 |
||||
lowpass_const w3 |
||||
add x3, x1, #1 |
||||
sub x1, x1, #2 |
||||
mov x12, #8 |
||||
b \type\()_h264_qpel8_h_lowpass_l2_neon |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc01_neon, export=1 |
||||
mov x14, x30 |
||||
mov x12, x1 |
||||
\type\()_h264_qpel8_mc01: |
||||
lowpass_const w3 |
||||
mov x3, x2 |
||||
sub x1, x1, x2, lsl #1 |
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc11_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
\type\()_h264_qpel8_mc11: |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub sp, sp, #64 |
||||
mov x0, sp |
||||
sub x1, x1, #2 |
||||
mov x3, #8 |
||||
mov x12, #8 |
||||
bl put_h264_qpel8_h_lowpass_neon |
||||
mov x0, x8 |
||||
mov x3, x2 |
||||
mov x12, sp |
||||
sub x1, x9, x2, lsl #1 |
||||
mov x2, #8 |
||||
bl \type\()_h264_qpel8_v_lowpass_l2_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc21_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
\type\()_h264_qpel8_mc21: |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub sp, sp, #(8*8+16*12) |
||||
sub x1, x1, #2 |
||||
mov x3, #8 |
||||
mov x0, sp |
||||
mov x12, #8 |
||||
bl put_h264_qpel8_h_lowpass_neon |
||||
mov x4, x0 |
||||
mov x0, x8 |
||||
sub x1, x9, x2, lsl #1 |
||||
sub x1, x1, #2 |
||||
mov x3, x2 |
||||
sub x2, x4, #64 |
||||
bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc31_neon, export=1 |
||||
add x1, x1, #1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
sub x1, x1, #1 |
||||
b \type\()_h264_qpel8_mc11 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc02_neon, export=1 |
||||
mov x14, x30 |
||||
lowpass_const w3 |
||||
sub x1, x1, x2, lsl #1 |
||||
mov x3, x2 |
||||
bl \type\()_h264_qpel8_v_lowpass_neon |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc12_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
\type\()_h264_qpel8_mc12: |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub sp, sp, #(8*8+16*12) |
||||
sub x1, x1, x2, lsl #1 |
||||
mov x3, x2 |
||||
mov x2, #8 |
||||
mov x0, sp |
||||
bl put_h264_qpel8_v_lowpass_neon |
||||
mov x4, x0 |
||||
mov x0, x8 |
||||
sub x1, x9, x3, lsl #1 |
||||
sub x1, x1, #2 |
||||
sub x2, x4, #64 |
||||
bl \type\()_h264_qpel8_hv_lowpass_l2_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc22_neon, export=1 |
||||
mov x14, x30 |
||||
mov x11, sp |
||||
sub x1, x1, x2, lsl #1 |
||||
sub x1, x1, #2 |
||||
mov x3, x2 |
||||
bl \type\()_h264_qpel8_hv_lowpass_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc32_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, #1 |
||||
b \type\()_h264_qpel8_mc12 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc03_neon, export=1 |
||||
mov x14, x30 |
||||
add x12, x1, x2 |
||||
b \type\()_h264_qpel8_mc01 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc13_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, x2 |
||||
b \type\()_h264_qpel8_mc11 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc23_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, x2 |
||||
b \type\()_h264_qpel8_mc21 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel8_mc33_neon, export=1 |
||||
add x1, x1, #1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, x2 |
||||
sub x1, x1, #1 |
||||
b \type\()_h264_qpel8_mc11 |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel8 put |
||||
h264_qpel8 avg |
||||
|
||||
.macro h264_qpel16 type |
||||
function ff_\type\()_h264_qpel16_mc10_neon, export=1 |
||||
lowpass_const w3 |
||||
mov x3, x1 |
||||
sub x1, x1, #2 |
||||
b \type\()_h264_qpel16_h_lowpass_l2_neon |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc20_neon, export=1 |
||||
lowpass_const w3 |
||||
sub x1, x1, #2 |
||||
mov x3, x2 |
||||
b \type\()_h264_qpel16_h_lowpass_neon |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc30_neon, export=1 |
||||
lowpass_const w3 |
||||
add x3, x1, #1 |
||||
sub x1, x1, #2 |
||||
b \type\()_h264_qpel16_h_lowpass_l2_neon |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc01_neon, export=1 |
||||
mov x14, x30 |
||||
mov x12, x1 |
||||
\type\()_h264_qpel16_mc01: |
||||
lowpass_const w3 |
||||
mov x3, x2 |
||||
sub x1, x1, x2, lsl #1 |
||||
bl \type\()_h264_qpel16_v_lowpass_l2_neon |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc11_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
\type\()_h264_qpel16_mc11: |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub sp, sp, #256 |
||||
mov x0, sp |
||||
sub x1, x1, #2 |
||||
mov x3, #16 |
||||
bl put_h264_qpel16_h_lowpass_neon |
||||
mov x0, x8 |
||||
mov x3, x2 |
||||
mov x12, sp |
||||
sub x1, x9, x2, lsl #1 |
||||
mov x2, #16 |
||||
bl \type\()_h264_qpel16_v_lowpass_l2_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc21_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
\type\()_h264_qpel16_mc21: |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub sp, sp, #(16*16+16*12) |
||||
sub x1, x1, #2 |
||||
mov x0, sp |
||||
bl put_h264_qpel16_h_lowpass_neon_packed |
||||
mov x4, x0 |
||||
mov x0, x8 |
||||
sub x1, x9, x2, lsl #1 |
||||
sub x1, x1, #2 |
||||
mov x3, x2 |
||||
bl \type\()_h264_qpel16_hv_lowpass_l2_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc31_neon, export=1 |
||||
add x1, x1, #1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
sub x1, x1, #1 |
||||
b \type\()_h264_qpel16_mc11 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc02_neon, export=1 |
||||
mov x14, x30 |
||||
lowpass_const w3 |
||||
sub x1, x1, x2, lsl #1 |
||||
mov x3, x2 |
||||
bl \type\()_h264_qpel16_v_lowpass_neon |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc12_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
\type\()_h264_qpel16_mc12: |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub sp, sp, #(16*16+16*12) |
||||
sub x1, x1, x2, lsl #1 |
||||
mov x0, sp |
||||
mov x3, x2 |
||||
bl put_h264_qpel16_v_lowpass_neon_packed |
||||
mov x4, x0 |
||||
mov x0, x8 |
||||
sub x1, x9, x3, lsl #1 |
||||
sub x1, x1, #2 |
||||
mov x2, x3 |
||||
bl \type\()_h264_qpel16_hv_lowpass_l2_neon |
||||
mov sp, x11 |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc22_neon, export=1 |
||||
mov x14, x30 |
||||
lowpass_const w3 |
||||
mov x11, sp |
||||
sub x1, x1, x2, lsl #1 |
||||
sub x1, x1, #2 |
||||
mov x3, x2 |
||||
bl \type\()_h264_qpel16_hv_lowpass_neon |
||||
mov sp, x11 // restore stack |
||||
ret x14 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc32_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, #1 |
||||
b \type\()_h264_qpel16_mc12 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc03_neon, export=1 |
||||
mov x14, x30 |
||||
add x12, x1, x2 |
||||
b \type\()_h264_qpel16_mc01 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc13_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, x2 |
||||
b \type\()_h264_qpel16_mc11 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc23_neon, export=1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, x2 |
||||
b \type\()_h264_qpel16_mc21 |
||||
endfunc |
||||
|
||||
function ff_\type\()_h264_qpel16_mc33_neon, export=1 |
||||
add x1, x1, #1 |
||||
mov x14, x30 |
||||
mov x8, x0 |
||||
mov x9, x1 |
||||
add x1, x1, x2 |
||||
sub x1, x1, #1 |
||||
b \type\()_h264_qpel16_mc11 |
||||
endfunc |
||||
.endm |
||||
|
||||
h264_qpel16 put |
||||
h264_qpel16 avg |
Loading…
Reference in new issue