mirror of https://github.com/FFmpeg/FFmpeg.git
on ev6. Originally committed as revision 979 to svn://svn.ffmpeg.org/ffmpeg/trunkpull/126/head
parent
ea689c8eb7
commit
f9bb4bdffc
4 changed files with 191 additions and 3 deletions
@ -0,0 +1,186 @@ |
||||
/* |
||||
* Alpha optimized DSP utils |
||||
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
||||
* |
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by |
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||||
*/ |
||||
|
||||
#include "regdef.h" |
||||
#ifdef HAVE_AV_CONFIG_H
|
||||
#include "config.h" |
||||
#endif |
||||
|
||||
/* Some nicer register names. */ |
||||
#define ta t10 |
||||
#define tb t11 |
||||
#define tc t12 |
||||
#define td AT |
||||
/* Danger: these overlap with the argument list and the return value */ |
||||
#define te a5 |
||||
#define tf a4 |
||||
#define tg a3 |
||||
#define th v0 |
||||
|
||||
.set noat
|
||||
.set noreorder
|
||||
.arch pca56
|
||||
.text |
||||
|
||||
/***************************************************************************** |
||||
* int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) |
||||
* |
||||
* This code is written with a pca56 in mind. For ev6, one should |
||||
* really take the increased latency of 3 cycles for MVI instructions |
||||
* into account. |
||||
* |
||||
* It is important to keep the loading and first use of a register as |
||||
* far apart as possible, because if a register is accessed before it |
||||
* has been fetched from memory, the CPU will stall. |
||||
*/ |
||||
.align 4
|
||||
.globl pix_abs16x16_mvi_asm
|
||||
.ent pix_abs16x16_mvi_asm
|
||||
pix_abs16x16_mvi_asm: |
||||
.frame sp, 0, ra, 0 |
||||
.prologue 0
|
||||
|
||||
#ifdef HAVE_GPROF |
||||
lda AT, _mcount |
||||
jsr AT, (AT), _mcount |
||||
#endif |
||||
|
||||
and a1, 7, t0 |
||||
clr v0 |
||||
lda a3, 16 |
||||
beq t0, $aligned |
||||
.align 4
|
||||
$unaligned: |
||||
/* Registers: |
||||
line 0: |
||||
t0: left_u -> left lo -> left |
||||
t1: mid |
||||
t2: right_u -> right hi -> right |
||||
t3: ref left |
||||
t4: ref right |
||||
line 1: |
||||
t5: left_u -> left lo -> left |
||||
t6: mid |
||||
t7: right_u -> right hi -> right |
||||
t8: ref left |
||||
t9: ref right |
||||
temp: |
||||
ta: left hi |
||||
tb: right lo |
||||
tc: error left |
||||
td: error right */ |
||||
|
||||
/* load line 0 */ |
||||
ldq_u t0, 0(a1) # left_u |
||||
ldq_u t1, 8(a1) # mid |
||||
ldq_u t2, 16(a1) # right_u |
||||
ldq t3, 0(a0) # ref left |
||||
ldq t4, 8(a0) # ref right |
||||
addq a0, a2, a0 # pix1 |
||||
addq a1, a2, a1 # pix2 |
||||
/* load line 1 */
|
||||
ldq_u t5, 0(a1) # left_u |
||||
ldq_u t6, 8(a1) # mid |
||||
ldq_u t7, 16(a1) # right_u |
||||
ldq t8, 0(a0) # ref left |
||||
ldq t9, 8(a0) # ref right |
||||
addq a0, a2, a0 # pix1 |
||||
addq a1, a2, a1 # pix2 |
||||
/* calc line 0 */ |
||||
extql t0, a1, t0 # left lo |
||||
extqh t1, a1, ta # left hi |
||||
extql t1, a1, tb # right lo |
||||
or t0, ta, t0 # left |
||||
extqh t2, a1, t2 # right hi |
||||
perr t3, t0, tc # error left |
||||
or t2, tb, t2 # right |
||||
perr t4, t2, td # error right |
||||
addq v0, tc, v0 # add error left |
||||
addq v0, td, v0 # add error left |
||||
/* calc line 1 */ |
||||
extql t5, a1, t5 # left lo |
||||
extqh t6, a1, ta # left hi |
||||
extql t6, a1, tb # right lo |
||||
or t5, ta, t5 # left |
||||
extqh t7, a1, t7 # right hi |
||||
perr t8, t5, tc # error left |
||||
or t7, tb, t7 # right |
||||
perr t9, t7, td # error right |
||||
addq v0, tc, v0 # add error left |
||||
addq v0, td, v0 # add error left |
||||
/* loop */ |
||||
subq a3, 2, a3 # h -= 2 |
||||
bne a3, $unaligned |
||||
ret |
||||
|
||||
.align 4
|
||||
$aligned: |
||||
/* load line 0 */ |
||||
ldq t0, 0(a1) # left |
||||
ldq t1, 8(a1) # right |
||||
addq a1, a2, a1 # pix2 |
||||
ldq t2, 0(a0) # ref left |
||||
ldq t3, 8(a0) # ref right |
||||
addq a0, a2, a0 # pix1 |
||||
/* load line 1 */ |
||||
ldq t4, 0(a1) # left |
||||
ldq t5, 8(a1) # right |
||||
addq a1, a2, a1 # pix2 |
||||
ldq t6, 0(a0) # ref left |
||||
ldq t7, 8(a0) # ref right |
||||
addq a0, a2, a0 # pix1 |
||||
/* load line 2 */ |
||||
ldq t8, 0(a1) # left |
||||
ldq t9, 8(a1) # right |
||||
addq a1, a2, a1 # pix2 |
||||
ldq ta, 0(a0) # ref left |
||||
ldq tb, 8(a0) # ref right |
||||
addq a0, a2, a0 # pix1 |
||||
/* load line 3 */ |
||||
ldq tc, 0(a1) # left |
||||
ldq td, 8(a1) # right |
||||
addq a1, a2, a1 # pix2 |
||||
ldq te, 0(a0) # ref left |
||||
ldq tf, 8(a0) # ref right |
||||
/* calc line 0 */ |
||||
perr t0, t2, t0 # error left |
||||
addq a0, a2, a0 # pix1 |
||||
perr t1, t3, t1 # error right |
||||
addq v0, t0, v0 # add error left |
||||
/* calc line 1 */ |
||||
perr t4, t6, t0 # error left |
||||
addq v0, t1, v0 # add error right |
||||
perr t5, t7, t1 # error right |
||||
addq v0, t0, v0 # add error left |
||||
/* calc line 2 */ |
||||
perr t8, ta, t0 # error left |
||||
addq v0, t1, v0 # add error right |
||||
perr t9, tb, t1 # error right |
||||
addq v0, t0, v0 # add error left |
||||
/* calc line 3 */ |
||||
perr tc, te, t0 # error left |
||||
addq v0, t1, v0 # add error right |
||||
perr td, tf, t1 # error right |
||||
addq v0, t0, v0 # add error left |
||||
addq v0, t1, v0 # add error right |
||||
/* loop */ |
||||
subq a3, 4, a3 # h -= 4 |
||||
bne a3, $aligned |
||||
ret |
||||
.end pix_abs16x16_mvi_asm
|
Loading…
Reference in new issue