mirror of https://github.com/FFmpeg/FFmpeg.git
parent
8d686ca59d
commit
c166148409
21 changed files with 444 additions and 296 deletions
@ -0,0 +1,76 @@ |
|||||||
|
/* |
||||||
|
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S" |
||||||
|
|
||||||
|
function ff_pix_norm1_armv6, export=1 |
||||||
|
push {r4-r6, lr} |
||||||
|
mov r12, #16 |
||||||
|
mov lr, #0 |
||||||
|
1: |
||||||
|
ldm r0, {r2-r5} |
||||||
|
uxtb16 r6, r2 |
||||||
|
uxtb16 r2, r2, ror #8 |
||||||
|
smlad lr, r6, r6, lr |
||||||
|
uxtb16 r6, r3 |
||||||
|
smlad lr, r2, r2, lr |
||||||
|
uxtb16 r3, r3, ror #8 |
||||||
|
smlad lr, r6, r6, lr |
||||||
|
uxtb16 r6, r4 |
||||||
|
smlad lr, r3, r3, lr |
||||||
|
uxtb16 r4, r4, ror #8 |
||||||
|
smlad lr, r6, r6, lr |
||||||
|
uxtb16 r6, r5 |
||||||
|
smlad lr, r4, r4, lr |
||||||
|
uxtb16 r5, r5, ror #8 |
||||||
|
smlad lr, r6, r6, lr |
||||||
|
subs r12, r12, #1 |
||||||
|
add r0, r0, r1 |
||||||
|
smlad lr, r5, r5, lr |
||||||
|
bgt 1b |
||||||
|
|
||||||
|
mov r0, lr |
||||||
|
pop {r4-r6, pc} |
||||||
|
endfunc |
||||||
|
|
||||||
|
function ff_pix_sum_armv6, export=1 |
||||||
|
push {r4-r7, lr} |
||||||
|
mov r12, #16 |
||||||
|
mov r2, #0 |
||||||
|
mov r3, #0 |
||||||
|
mov lr, #0 |
||||||
|
ldr r4, [r0] |
||||||
|
1: |
||||||
|
subs r12, r12, #1 |
||||||
|
ldr r5, [r0, #4] |
||||||
|
usada8 r2, r4, lr, r2 |
||||||
|
ldr r6, [r0, #8] |
||||||
|
usada8 r3, r5, lr, r3 |
||||||
|
ldr r7, [r0, #12] |
||||||
|
usada8 r2, r6, lr, r2 |
||||||
|
beq 2f |
||||||
|
ldr_pre r4, r0, r1 |
||||||
|
usada8 r3, r7, lr, r3 |
||||||
|
bgt 1b |
||||||
|
2: |
||||||
|
usada8 r3, r7, lr, r3 |
||||||
|
add r0, r2, r3 |
||||||
|
pop {r4-r7, pc} |
||||||
|
endfunc |
@ -0,0 +1,38 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavcodec/avcodec.h" |
||||||
|
#include "libavcodec/mpegvideoencdsp.h" |
||||||
|
|
||||||
|
int ff_pix_norm1_armv6(uint8_t *pix, int line_size); |
||||||
|
int ff_pix_sum_armv6(uint8_t *pix, int line_size); |
||||||
|
|
||||||
|
av_cold void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c, |
||||||
|
AVCodecContext *avctx) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (have_armv6(cpu_flags)) { |
||||||
|
c->pix_norm1 = ff_pix_norm1_armv6; |
||||||
|
c->pix_sum = ff_pix_sum_armv6; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,103 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include <stdint.h> |
||||||
|
#if HAVE_ALTIVEC_H |
||||||
|
#include <altivec.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/ppc/cpu.h" |
||||||
|
#include "libavutil/ppc/types_altivec.h" |
||||||
|
#include "libavutil/ppc/util_altivec.h" |
||||||
|
#include "libavcodec/mpegvideoencdsp.h" |
||||||
|
|
||||||
|
#if HAVE_ALTIVEC |
||||||
|
|
||||||
|
static int pix_norm1_altivec(uint8_t *pix, int line_size) |
||||||
|
{ |
||||||
|
int i, s = 0; |
||||||
|
const vector unsigned int zero = |
||||||
|
(const vector unsigned int) vec_splat_u32(0); |
||||||
|
vector unsigned char perm = vec_lvsl(0, pix); |
||||||
|
vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); |
||||||
|
vector signed int sum; |
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) { |
||||||
|
/* Read the potentially unaligned pixels. */ |
||||||
|
vector unsigned char pixl = vec_ld(0, pix); |
||||||
|
vector unsigned char pixr = vec_ld(15, pix); |
||||||
|
vector unsigned char pixv = vec_perm(pixl, pixr, perm); |
||||||
|
|
||||||
|
/* Square the values, and add them to our sum. */ |
||||||
|
sv = vec_msum(pixv, pixv, sv); |
||||||
|
|
||||||
|
pix += line_size; |
||||||
|
} |
||||||
|
/* Sum up the four partial sums, and put the result into s. */ |
||||||
|
sum = vec_sums((vector signed int) sv, (vector signed int) zero); |
||||||
|
sum = vec_splat(sum, 3); |
||||||
|
vec_ste(sum, 0, &s); |
||||||
|
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
static int pix_sum_altivec(uint8_t *pix, int line_size) |
||||||
|
{ |
||||||
|
int i, s; |
||||||
|
const vector unsigned int zero = |
||||||
|
(const vector unsigned int) vec_splat_u32(0); |
||||||
|
vector unsigned char perm = vec_lvsl(0, pix); |
||||||
|
vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); |
||||||
|
vector signed int sumdiffs; |
||||||
|
|
||||||
|
for (i = 0; i < 16; i++) { |
||||||
|
/* Read the potentially unaligned 16 pixels into t1. */ |
||||||
|
vector unsigned char pixl = vec_ld(0, pix); |
||||||
|
vector unsigned char pixr = vec_ld(15, pix); |
||||||
|
vector unsigned char t1 = vec_perm(pixl, pixr, perm); |
||||||
|
|
||||||
|
/* Add each 4 pixel group together and put 4 results into sad. */ |
||||||
|
sad = vec_sum4s(t1, sad); |
||||||
|
|
||||||
|
pix += line_size; |
||||||
|
} |
||||||
|
|
||||||
|
/* Sum up the four partial sums, and put the result into s. */ |
||||||
|
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
||||||
|
sumdiffs = vec_splat(sumdiffs, 3); |
||||||
|
vec_ste(sumdiffs, 0, &s); |
||||||
|
|
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
|
||||||
|
av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, |
||||||
|
AVCodecContext *avctx) |
||||||
|
{ |
||||||
|
#if HAVE_ALTIVEC |
||||||
|
if (!PPC_ALTIVEC(av_get_cpu_flags())) |
||||||
|
return; |
||||||
|
|
||||||
|
c->pix_norm1 = pix_norm1_altivec; |
||||||
|
c->pix_sum = pix_sum_altivec; |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
} |
@ -0,0 +1,95 @@ |
|||||||
|
;***************************************************************************** |
||||||
|
;* SIMD-optimized MPEG encoding functions |
||||||
|
;***************************************************************************** |
||||||
|
;* Copyright (c) 2000, 2001 Fabrice Bellard |
||||||
|
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
||||||
|
;* |
||||||
|
;* This file is part of Libav. |
||||||
|
;* |
||||||
|
;* Libav is free software; you can redistribute it and/or |
||||||
|
;* modify it under the terms of the GNU Lesser General Public |
||||||
|
;* License as published by the Free Software Foundation; either |
||||||
|
;* version 2.1 of the License, or (at your option) any later version. |
||||||
|
;* |
||||||
|
;* Libav is distributed in the hope that it will be useful, |
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
;* Lesser General Public License for more details. |
||||||
|
;* |
||||||
|
;* You should have received a copy of the GNU Lesser General Public |
||||||
|
;* License along with Libav; if not, write to the Free Software |
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
;***************************************************************************** |
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm" |
||||||
|
|
||||||
|
SECTION .text |
||||||
|
|
||||||
|
INIT_MMX mmx |
||||||
|
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size) |
||||||
|
cglobal pix_sum16, 2, 3 |
||||||
|
movsxdifnidn r1, r1d |
||||||
|
mov r2, r1 |
||||||
|
neg r2 |
||||||
|
shl r2, 4 |
||||||
|
sub r0, r2 |
||||||
|
pxor m7, m7 |
||||||
|
pxor m6, m6 |
||||||
|
.loop: |
||||||
|
mova m0, [r0+r2+0] |
||||||
|
mova m1, [r0+r2+0] |
||||||
|
mova m2, [r0+r2+8] |
||||||
|
mova m3, [r0+r2+8] |
||||||
|
punpcklbw m0, m7 |
||||||
|
punpckhbw m1, m7 |
||||||
|
punpcklbw m2, m7 |
||||||
|
punpckhbw m3, m7 |
||||||
|
paddw m1, m0 |
||||||
|
paddw m3, m2 |
||||||
|
paddw m3, m1 |
||||||
|
paddw m6, m3 |
||||||
|
add r2, r1 |
||||||
|
js .loop |
||||||
|
mova m5, m6 |
||||||
|
psrlq m6, 32 |
||||||
|
paddw m6, m5 |
||||||
|
mova m5, m6 |
||||||
|
psrlq m6, 16 |
||||||
|
paddw m6, m5 |
||||||
|
movd eax, m6 |
||||||
|
and eax, 0xffff |
||||||
|
RET |
||||||
|
|
||||||
|
INIT_MMX mmx |
||||||
|
; int ff_pix_norm1_mmx(uint8_t *pix, int line_size) |
||||||
|
cglobal pix_norm1, 2, 4 |
||||||
|
movsxdifnidn r1, r1d |
||||||
|
mov r2, 16 |
||||||
|
pxor m0, m0 |
||||||
|
pxor m7, m7 |
||||||
|
.loop: |
||||||
|
mova m2, [r0+0] |
||||||
|
mova m3, [r0+8] |
||||||
|
mova m1, m2 |
||||||
|
punpckhbw m1, m0 |
||||||
|
punpcklbw m2, m0 |
||||||
|
mova m4, m3 |
||||||
|
punpckhbw m3, m0 |
||||||
|
punpcklbw m4, m0 |
||||||
|
pmaddwd m1, m1 |
||||||
|
pmaddwd m2, m2 |
||||||
|
pmaddwd m3, m3 |
||||||
|
pmaddwd m4, m4 |
||||||
|
paddd m2, m1 |
||||||
|
paddd m4, m3 |
||||||
|
paddd m7, m2 |
||||||
|
add r0, r1 |
||||||
|
paddd m7, m4 |
||||||
|
dec r2 |
||||||
|
jne .loop |
||||||
|
mova m1, m7 |
||||||
|
psrlq m7, 32 |
||||||
|
paddd m1, m7 |
||||||
|
movd eax, m1 |
||||||
|
RET |
||||||
|
|
Loading…
Reference in new issue