arm: vp9itxfm: Move the load_add_store macro out from the itxfm16 pass2 function

This allows reusing the macro for a separate implementation of the
pass2 function.

Signed-off-by: Martin Storsjö <martin@martin.st>
pull/273/head
Martin Storsjö 8 years ago
parent 115476018d
commit 47b3c2c18d
  1. 72
      libavcodec/arm/vp9itxfm_neon.S

@ -657,6 +657,42 @@ function iadst16
bx lr
endfunc
.macro load_add_store coef0, coef1, coef2, coef3
vrshr.s16 \coef0, \coef0, #6
vrshr.s16 \coef1, \coef1, #6
vld1.32 {d4[]}, [r0,:32], r1
vld1.32 {d4[1]}, [r3,:32], r1
vrshr.s16 \coef2, \coef2, #6
vrshr.s16 \coef3, \coef3, #6
vld1.32 {d5[]}, [r0,:32], r1
vld1.32 {d5[1]}, [r3,:32], r1
vaddw.u8 \coef0, \coef0, d4
vld1.32 {d6[]}, [r0,:32], r1
vld1.32 {d6[1]}, [r3,:32], r1
vaddw.u8 \coef1, \coef1, d5
vld1.32 {d7[]}, [r0,:32], r1
vld1.32 {d7[1]}, [r3,:32], r1
vqmovun.s16 d4, \coef0
vqmovun.s16 d5, \coef1
sub r0, r0, r1, lsl #2
sub r3, r3, r1, lsl #2
vaddw.u8 \coef2, \coef2, d6
vaddw.u8 \coef3, \coef3, d7
vst1.32 {d4[0]}, [r0,:32], r1
vst1.32 {d4[1]}, [r3,:32], r1
vqmovun.s16 d6, \coef2
vst1.32 {d5[0]}, [r0,:32], r1
vst1.32 {d5[1]}, [r3,:32], r1
vqmovun.s16 d7, \coef3
vst1.32 {d6[0]}, [r0,:32], r1
vst1.32 {d6[1]}, [r3,:32], r1
vst1.32 {d7[0]}, [r0,:32], r1
vst1.32 {d7[1]}, [r3,:32], r1
.endm
.macro itxfm16_1d_funcs txfm
@ Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
@ transpose into a horizontal 16x4 slice and store.
@ -739,44 +775,8 @@ function \txfm\()16_1d_4x16_pass2_neon
lsl r1, r1, #1
bl \txfm\()16
.macro load_add_store coef0, coef1, coef2, coef3
vrshr.s16 \coef0, \coef0, #6
vrshr.s16 \coef1, \coef1, #6
vld1.32 {d4[]}, [r0,:32], r1
vld1.32 {d4[1]}, [r3,:32], r1
vrshr.s16 \coef2, \coef2, #6
vrshr.s16 \coef3, \coef3, #6
vld1.32 {d5[]}, [r0,:32], r1
vld1.32 {d5[1]}, [r3,:32], r1
vaddw.u8 \coef0, \coef0, d4
vld1.32 {d6[]}, [r0,:32], r1
vld1.32 {d6[1]}, [r3,:32], r1
vaddw.u8 \coef1, \coef1, d5
vld1.32 {d7[]}, [r0,:32], r1
vld1.32 {d7[1]}, [r3,:32], r1
vqmovun.s16 d4, \coef0
vqmovun.s16 d5, \coef1
sub r0, r0, r1, lsl #2
sub r3, r3, r1, lsl #2
vaddw.u8 \coef2, \coef2, d6
vaddw.u8 \coef3, \coef3, d7
vst1.32 {d4[0]}, [r0,:32], r1
vst1.32 {d4[1]}, [r3,:32], r1
vqmovun.s16 d6, \coef2
vst1.32 {d5[0]}, [r0,:32], r1
vst1.32 {d5[1]}, [r3,:32], r1
vqmovun.s16 d7, \coef3
vst1.32 {d6[0]}, [r0,:32], r1
vst1.32 {d6[1]}, [r3,:32], r1
vst1.32 {d7[0]}, [r0,:32], r1
vst1.32 {d7[1]}, [r3,:32], r1
.endm
load_add_store q8, q9, q10, q11
load_add_store q12, q13, q14, q15
.purgem load_add_store
pop {pc}
endfunc

Loading…
Cancel
Save