mirror of https://github.com/FFmpeg/FFmpeg.git
Approximately 5% faster on Cortex-A8.
Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit a7878c9f73
)
oldabi
parent
5da7494dc5
commit
4ae3ee4ae9
6 changed files with 260 additions and 3 deletions
@ -0,0 +1,29 @@ |
|||||||
|
/**
|
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_ARM_VP8_H |
||||||
|
#define AVCODEC_ARM_VP8_H |
||||||
|
|
||||||
|
#if HAVE_ARMV6 |
||||||
|
#define decode_block_coeffs_internal ff_decode_block_coeffs_armv6 |
||||||
|
int ff_decode_block_coeffs_armv6(VP56RangeCoder *rc, DCTELEM block[16], |
||||||
|
uint8_t probs[8][3][NUM_DCT_TOKENS-1], |
||||||
|
int i, uint8_t *token_prob, int16_t qmul[2]); |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,220 @@ |
|||||||
|
/** |
||||||
|
* Copyright (C) 2010 Mans Rullgard |
||||||
|
* |
||||||
|
* This file is part of FFmpeg. |
||||||
|
* |
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* FFmpeg is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "asm.S" |
||||||
|
|
||||||
|
.syntax unified
|
||||||
|
|
||||||
|
.macro rac_get_prob h, bs, buf, cw, pr, t0, t1 |
||||||
|
adds \bs, \bs, \t0 |
||||||
|
lsl \cw, \cw, \t0 |
||||||
|
lsl \t0, \h, \t0 |
||||||
|
rsb \h, \pr, #256 |
||||||
|
ldrhcs \t1, [\buf], #2 |
||||||
|
smlabb \h, \t0, \pr, \h |
||||||
|
rev16cs \t1, \t1 |
||||||
|
orrcs \cw, \cw, \t1, lsl \bs |
||||||
|
subcs \bs, \bs, #16 |
||||||
|
lsr \h, \h, #8 |
||||||
|
cmp \cw, \h, lsl #16 |
||||||
|
subge \cw, \cw, \h, lsl #16 |
||||||
|
subge \h, \t0, \h |
||||||
|
.endm |
||||||
|
|
||||||
|
.macro rac_get_128 h, bs, buf, cw, t0, t1 |
||||||
|
adds \bs, \bs, \t0 |
||||||
|
lsl \cw, \cw, \t0 |
||||||
|
lsl \t0, \h, \t0 |
||||||
|
ldrhcs \t1, [\buf], #2 |
||||||
|
mov \h, #128 |
||||||
|
rev16cs \t1, \t1 |
||||||
|
add \h, \h, \t0, lsl #7 |
||||||
|
orrcs \cw, \cw, \t1, lsl \bs |
||||||
|
subcs \bs, \bs, #16 |
||||||
|
lsr \h, \h, #8 |
||||||
|
cmp \cw, \h, lsl #16 |
||||||
|
subge \cw, \cw, \h, lsl #16 |
||||||
|
subge \h, \t0, \h |
||||||
|
.endm |
||||||
|
|
||||||
|
function ff_decode_block_coeffs_armv6, export=1 |
||||||
|
push {r0,r1,r4-r11,lr} |
||||||
|
movrel lr, ff_vp56_norm_shift |
||||||
|
ldrd r4, r5, [sp, #44] @ token_prob, qmul
|
||||||
|
cmp r3, #0 |
||||||
|
ldr r11, [r5] |
||||||
|
ldm r0, {r5-r7} @ high, bits, buf
|
||||||
|
pkhtbne r11, r11, r11, asr #16 |
||||||
|
ldr r8, [r0, #16] @ code_word
|
||||||
|
0: |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
add r3, r3, #1 |
||||||
|
ldrb r0, [r4, #1] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
blt 2f |
||||||
|
|
||||||
|
ldrb r9, [lr, r5] |
||||||
|
ldrb r0, [r4, #2] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
bge 3f |
||||||
|
|
||||||
|
add r4, r3, r3, lsl #5 |
||||||
|
sxth r12, r11 |
||||||
|
add r4, r2, r4 |
||||||
|
adds r6, r6, r9 |
||||||
|
add r4, r4, #11 |
||||||
|
lsl r8, r8, r9 |
||||||
|
ldrhcs r10, [r7], #2 |
||||||
|
lsl r9, r5, r9 |
||||||
|
mov r5, #128 |
||||||
|
rev16cs r10, r10 |
||||||
|
add r5, r5, r9, lsl #7 |
||||||
|
orrcs r8, r8, r10, lsl r6 |
||||||
|
subcs r6, r6, #16 |
||||||
|
lsr r5, r5, #8 |
||||||
|
cmp r8, r5, lsl #16 |
||||||
|
movrel r10, zigzag_scan-1 |
||||||
|
subge r8, r8, r5, lsl #16 |
||||||
|
subge r5, r9, r5 |
||||||
|
ldrb r10, [r10, r3] |
||||||
|
rsbge r12, r12, #0 |
||||||
|
cmp r3, #16 |
||||||
|
strh r12, [r1, r10] |
||||||
|
bge 6f |
||||||
|
5: |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
ldrb r0, [r4] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
pkhtb r11, r11, r11, asr #16 |
||||||
|
bge 0b |
||||||
|
|
||||||
|
6: |
||||||
|
ldr r0, [sp] |
||||||
|
ldr r9, [r0, #12] |
||||||
|
cmp r7, r9 |
||||||
|
movhi r7, r9 |
||||||
|
stm r0, {r5-r7} @ high, bits, buf
|
||||||
|
str r8, [r0, #16] @ code_word
|
||||||
|
|
||||||
|
add sp, sp, #8 |
||||||
|
mov r0, r3 |
||||||
|
pop {r4-r11,pc} |
||||||
|
2: |
||||||
|
add r4, r3, r3, lsl #5 |
||||||
|
cmp r3, #16 |
||||||
|
add r4, r2, r4 |
||||||
|
pkhtb r11, r11, r11, asr #16 |
||||||
|
bne 0b |
||||||
|
b 6b |
||||||
|
3: |
||||||
|
ldrb r0, [r4, #3] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
bge 1f |
||||||
|
|
||||||
|
mov r12, #2 |
||||||
|
ldrb r0, [r4, #4] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
addge r12, #1 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
blt 4f |
||||||
|
ldrb r0, [r4, #5] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
addge r12, #1 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
b 4f |
||||||
|
1: |
||||||
|
ldrb r0, [r4, #6] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
bge 3f |
||||||
|
|
||||||
|
ldrb r0, [r4, #7] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
bge 2f |
||||||
|
|
||||||
|
mov r12, #5 |
||||||
|
mov r0, #159 |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
addge r12, r12, #1 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
b 4f |
||||||
|
2: |
||||||
|
mov r12, #7 |
||||||
|
mov r0, #165 |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
addge r12, r12, #2 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
mov r0, #145 |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
addge r12, r12, #1 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
b 4f |
||||||
|
3: |
||||||
|
ldrb r0, [r4, #8] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
addge r4, r4, #1 |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
movge r12, #2 |
||||||
|
movlt r12, #0 |
||||||
|
ldrb r0, [r4, #9] |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
mov r9, #8 |
||||||
|
addge r12, r12, #1 |
||||||
|
movrel r4, ff_vp8_dct_cat_prob |
||||||
|
lsl r9, r9, r12 |
||||||
|
ldr r4, [r4, r12, lsl #2] |
||||||
|
add r12, r9, #3 |
||||||
|
mov r1, #0 |
||||||
|
ldrb r0, [r4], #1 |
||||||
|
1: |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
lsl r1, r1, #1 |
||||||
|
rac_get_prob r5, r6, r7, r8, r0, r9, r10 |
||||||
|
ldrb r0, [r4], #1 |
||||||
|
addge r1, r1, #1 |
||||||
|
cmp r0, #0 |
||||||
|
bne 1b |
||||||
|
ldrb r9, [lr, r5] |
||||||
|
add r12, r12, r1 |
||||||
|
ldr r1, [sp, #4] |
||||||
|
4: |
||||||
|
add r4, r3, r3, lsl #5 |
||||||
|
add r4, r2, r4 |
||||||
|
add r4, r4, #22 |
||||||
|
rac_get_128 r5, r6, r7, r8, r9, r10 |
||||||
|
rsbge r12, r12, #0 |
||||||
|
smulbb r12, r12, r11 |
||||||
|
movrel r9, zigzag_scan-1 |
||||||
|
ldrb r9, [r9, r3] |
||||||
|
cmp r3, #16 |
||||||
|
strh r12, [r1, r9] |
||||||
|
bge 6b |
||||||
|
b 5b |
||||||
|
endfunc |
||||||
|
|
||||||
|
.section .rodata |
||||||
|
zigzag_scan: |
||||||
|
.byte 0, 2, 8, 16 |
||||||
|
.byte 10, 4, 6, 12 |
||||||
|
.byte 18, 24, 26, 20 |
||||||
|
.byte 14, 22, 28, 30 |
Loading…
Reference in new issue