ARM: make some NEON macros reusable

Signed-off-by: Mans Rullgard <mans@mansr.com>
pull/2/head
Janne Grunau 13 years ago committed by Mans Rullgard
parent 150ddbc148
commit a760f530bb
  1. 41
      libavcodec/arm/h264dsp_neon.S
  2. 59
      libavcodec/arm/neon.S
  3. 26
      libavcodec/arm/vp8dsp_neon.S

@ -19,46 +19,7 @@
*/
#include "asm.S"
.macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
vtrn.32 \r0, \r4
vtrn.32 \r1, \r5
vtrn.32 \r2, \r6
vtrn.32 \r3, \r7
vtrn.16 \r0, \r2
vtrn.16 \r1, \r3
vtrn.16 \r4, \r6
vtrn.16 \r5, \r7
vtrn.8 \r0, \r1
vtrn.8 \r2, \r3
vtrn.8 \r4, \r5
vtrn.8 \r6, \r7
.endm
.macro transpose_4x4 r0 r1 r2 r3
vtrn.16 \r0, \r2
vtrn.16 \r1, \r3
vtrn.8 \r0, \r1
vtrn.8 \r2, \r3
.endm
.macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
vswp \r0, \r4
vswp \r1, \r5
vswp \r2, \r6
vswp \r3, \r7
.endm
.macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
vtrn.32 \r0, \r2
vtrn.32 \r1, \r3
vtrn.32 \r4, \r6
vtrn.32 \r5, \r7
vtrn.16 \r0, \r1
vtrn.16 \r2, \r3
vtrn.16 \r4, \r5
vtrn.16 \r6, \r7
.endm
#include "neon.S"
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
.macro h264_chroma_mc8 type

@ -0,0 +1,59 @@
/*
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
.macro transpose_8x8 r0, r1, r2, r3, r4, r5, r6, r7
vtrn.32 \r0, \r4
vtrn.32 \r1, \r5
vtrn.32 \r2, \r6
vtrn.32 \r3, \r7
vtrn.16 \r0, \r2
vtrn.16 \r1, \r3
vtrn.16 \r4, \r6
vtrn.16 \r5, \r7
vtrn.8 \r0, \r1
vtrn.8 \r2, \r3
vtrn.8 \r4, \r5
vtrn.8 \r6, \r7
.endm
.macro transpose_4x4 r0, r1, r2, r3
vtrn.16 \r0, \r2
vtrn.16 \r1, \r3
vtrn.8 \r0, \r1
vtrn.8 \r2, \r3
.endm
.macro swap4 r0, r1, r2, r3, r4, r5, r6, r7
vswp \r0, \r4
vswp \r1, \r5
vswp \r2, \r6
vswp \r3, \r7
.endm
.macro transpose16_4x4 r0, r1, r2, r3, r4, r5, r6, r7
vtrn.32 \r0, \r2
vtrn.32 \r1, \r3
vtrn.32 \r4, \r6
vtrn.32 \r5, \r7
vtrn.16 \r0, \r1
vtrn.16 \r2, \r3
vtrn.16 \r4, \r5
vtrn.16 \r6, \r7
.endm

@ -22,6 +22,7 @@
*/
#include "asm.S"
#include "neon.S"
function ff_vp8_luma_dc_wht_neon, export=1
vld1.16 {q0-q1}, [r1,:128]
@ -454,23 +455,6 @@ endfunc
.endif
.endm
.macro transpose8x16matrix
vtrn.32 q0, q4
vtrn.32 q1, q5
vtrn.32 q2, q6
vtrn.32 q3, q7
vtrn.16 q0, q2
vtrn.16 q1, q3
vtrn.16 q4, q6
vtrn.16 q5, q7
vtrn.8 q0, q1
vtrn.8 q2, q3
vtrn.8 q4, q5
vtrn.8 q6, q7
.endm
.macro vp8_v_loop_filter16 name, inner=0, simple=0
function ff_vp8_v_loop_filter16\name\()_neon, export=1
vpush {q4-q7}
@ -605,7 +589,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
vld1.8 {d13}, [r0], r1
vld1.8 {d15}, [r0], r1
transpose8x16matrix
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
vdup.8 q14, r2 @ flim_E
.if !\simple
@ -616,7 +600,7 @@ function ff_vp8_h_loop_filter16\name\()_neon, export=1
sub r0, r0, r1, lsl #4 @ backup 16 rows
transpose8x16matrix
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
@ Store pixels:
vst1.8 {d0}, [r0], r1
@ -670,7 +654,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
vld1.8 {d14}, [r0], r2
vld1.8 {d15}, [r1], r2
transpose8x16matrix
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
vdup.8 q14, r3 @ flim_E
vdup.8 q15, r12 @ flim_I
@ -681,7 +665,7 @@ function ff_vp8_h_loop_filter8uv\name\()_neon, export=1
sub r0, r0, r2, lsl #3 @ backup u 8 rows
sub r1, r1, r2, lsl #3 @ backup v 8 rows
transpose8x16matrix
transpose_8x8 q0, q1, q2, q3, q4, q5, q6, q7
@ Store pixels:
vst1.8 {d0}, [r0], r2

Loading…
Cancel
Save