mirror of https://github.com/FFmpeg/FFmpeg.git
parent
8d686ca59d
commit
c166148409
21 changed files with 444 additions and 296 deletions
@ -0,0 +1,76 @@ |
||||
/* |
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||
* |
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/arm/asm.S" |
||||
|
||||
function ff_pix_norm1_armv6, export=1 |
||||
push {r4-r6, lr} |
||||
mov r12, #16 |
||||
mov lr, #0 |
||||
1: |
||||
ldm r0, {r2-r5} |
||||
uxtb16 r6, r2 |
||||
uxtb16 r2, r2, ror #8 |
||||
smlad lr, r6, r6, lr |
||||
uxtb16 r6, r3 |
||||
smlad lr, r2, r2, lr |
||||
uxtb16 r3, r3, ror #8 |
||||
smlad lr, r6, r6, lr |
||||
uxtb16 r6, r4 |
||||
smlad lr, r3, r3, lr |
||||
uxtb16 r4, r4, ror #8 |
||||
smlad lr, r6, r6, lr |
||||
uxtb16 r6, r5 |
||||
smlad lr, r4, r4, lr |
||||
uxtb16 r5, r5, ror #8 |
||||
smlad lr, r6, r6, lr |
||||
subs r12, r12, #1 |
||||
add r0, r0, r1 |
||||
smlad lr, r5, r5, lr |
||||
bgt 1b |
||||
|
||||
mov r0, lr |
||||
pop {r4-r6, pc} |
||||
endfunc |
||||
|
||||
function ff_pix_sum_armv6, export=1 |
||||
push {r4-r7, lr} |
||||
mov r12, #16 |
||||
mov r2, #0 |
||||
mov r3, #0 |
||||
mov lr, #0 |
||||
ldr r4, [r0] |
||||
1: |
||||
subs r12, r12, #1 |
||||
ldr r5, [r0, #4] |
||||
usada8 r2, r4, lr, r2 |
||||
ldr r6, [r0, #8] |
||||
usada8 r3, r5, lr, r3 |
||||
ldr r7, [r0, #12] |
||||
usada8 r2, r6, lr, r2 |
||||
beq 2f |
||||
ldr_pre r4, r0, r1 |
||||
usada8 r3, r7, lr, r3 |
||||
bgt 1b |
||||
2: |
||||
usada8 r3, r7, lr, r3 |
||||
add r0, r2, r3 |
||||
pop {r4-r7, pc} |
||||
endfunc |
@ -0,0 +1,38 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavcodec/avcodec.h" |
||||
#include "libavcodec/mpegvideoencdsp.h" |
||||
|
||||
int ff_pix_norm1_armv6(uint8_t *pix, int line_size); |
||||
int ff_pix_sum_armv6(uint8_t *pix, int line_size); |
||||
|
||||
av_cold void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c, |
||||
AVCodecContext *avctx) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_armv6(cpu_flags)) { |
||||
c->pix_norm1 = ff_pix_norm1_armv6; |
||||
c->pix_sum = ff_pix_sum_armv6; |
||||
} |
||||
} |
@ -0,0 +1,103 @@ |
||||
/*
|
||||
* This file is part of Libav. |
||||
* |
||||
* Libav is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* Libav is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with Libav; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#include <stdint.h> |
||||
#if HAVE_ALTIVEC_H |
||||
#include <altivec.h> |
||||
#endif |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/ppc/cpu.h" |
||||
#include "libavutil/ppc/types_altivec.h" |
||||
#include "libavutil/ppc/util_altivec.h" |
||||
#include "libavcodec/mpegvideoencdsp.h" |
||||
|
||||
#if HAVE_ALTIVEC |
||||
|
||||
static int pix_norm1_altivec(uint8_t *pix, int line_size) |
||||
{ |
||||
int i, s = 0; |
||||
const vector unsigned int zero = |
||||
(const vector unsigned int) vec_splat_u32(0); |
||||
vector unsigned char perm = vec_lvsl(0, pix); |
||||
vector unsigned int sv = (vector unsigned int) vec_splat_u32(0); |
||||
vector signed int sum; |
||||
|
||||
for (i = 0; i < 16; i++) { |
||||
/* Read the potentially unaligned pixels. */ |
||||
vector unsigned char pixl = vec_ld(0, pix); |
||||
vector unsigned char pixr = vec_ld(15, pix); |
||||
vector unsigned char pixv = vec_perm(pixl, pixr, perm); |
||||
|
||||
/* Square the values, and add them to our sum. */ |
||||
sv = vec_msum(pixv, pixv, sv); |
||||
|
||||
pix += line_size; |
||||
} |
||||
/* Sum up the four partial sums, and put the result into s. */ |
||||
sum = vec_sums((vector signed int) sv, (vector signed int) zero); |
||||
sum = vec_splat(sum, 3); |
||||
vec_ste(sum, 0, &s); |
||||
|
||||
return s; |
||||
} |
||||
|
||||
static int pix_sum_altivec(uint8_t *pix, int line_size) |
||||
{ |
||||
int i, s; |
||||
const vector unsigned int zero = |
||||
(const vector unsigned int) vec_splat_u32(0); |
||||
vector unsigned char perm = vec_lvsl(0, pix); |
||||
vector unsigned int sad = (vector unsigned int) vec_splat_u32(0); |
||||
vector signed int sumdiffs; |
||||
|
||||
for (i = 0; i < 16; i++) { |
||||
/* Read the potentially unaligned 16 pixels into t1. */ |
||||
vector unsigned char pixl = vec_ld(0, pix); |
||||
vector unsigned char pixr = vec_ld(15, pix); |
||||
vector unsigned char t1 = vec_perm(pixl, pixr, perm); |
||||
|
||||
/* Add each 4 pixel group together and put 4 results into sad. */ |
||||
sad = vec_sum4s(t1, sad); |
||||
|
||||
pix += line_size; |
||||
} |
||||
|
||||
/* Sum up the four partial sums, and put the result into s. */ |
||||
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
||||
sumdiffs = vec_splat(sumdiffs, 3); |
||||
vec_ste(sumdiffs, 0, &s); |
||||
|
||||
return s; |
||||
} |
||||
|
||||
#endif /* HAVE_ALTIVEC */ |
||||
|
||||
av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, |
||||
AVCodecContext *avctx) |
||||
{ |
||||
#if HAVE_ALTIVEC |
||||
if (!PPC_ALTIVEC(av_get_cpu_flags())) |
||||
return; |
||||
|
||||
c->pix_norm1 = pix_norm1_altivec; |
||||
c->pix_sum = pix_sum_altivec; |
||||
#endif /* HAVE_ALTIVEC */ |
||||
} |
@ -0,0 +1,95 @@ |
||||
;***************************************************************************** |
||||
;* SIMD-optimized MPEG encoding functions |
||||
;***************************************************************************** |
||||
;* Copyright (c) 2000, 2001 Fabrice Bellard |
||||
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
||||
;* |
||||
;* This file is part of Libav. |
||||
;* |
||||
;* Libav is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* Libav is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with Libav; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;***************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION .text |
||||
|
||||
INIT_MMX mmx |
||||
; int ff_pix_sum16_mmx(uint8_t *pix, int line_size) |
||||
cglobal pix_sum16, 2, 3 |
||||
movsxdifnidn r1, r1d |
||||
mov r2, r1 |
||||
neg r2 |
||||
shl r2, 4 |
||||
sub r0, r2 |
||||
pxor m7, m7 |
||||
pxor m6, m6 |
||||
.loop: |
||||
mova m0, [r0+r2+0] |
||||
mova m1, [r0+r2+0] |
||||
mova m2, [r0+r2+8] |
||||
mova m3, [r0+r2+8] |
||||
punpcklbw m0, m7 |
||||
punpckhbw m1, m7 |
||||
punpcklbw m2, m7 |
||||
punpckhbw m3, m7 |
||||
paddw m1, m0 |
||||
paddw m3, m2 |
||||
paddw m3, m1 |
||||
paddw m6, m3 |
||||
add r2, r1 |
||||
js .loop |
||||
mova m5, m6 |
||||
psrlq m6, 32 |
||||
paddw m6, m5 |
||||
mova m5, m6 |
||||
psrlq m6, 16 |
||||
paddw m6, m5 |
||||
movd eax, m6 |
||||
and eax, 0xffff |
||||
RET |
||||
|
||||
INIT_MMX mmx |
||||
; int ff_pix_norm1_mmx(uint8_t *pix, int line_size) |
||||
cglobal pix_norm1, 2, 4 |
||||
movsxdifnidn r1, r1d |
||||
mov r2, 16 |
||||
pxor m0, m0 |
||||
pxor m7, m7 |
||||
.loop: |
||||
mova m2, [r0+0] |
||||
mova m3, [r0+8] |
||||
mova m1, m2 |
||||
punpckhbw m1, m0 |
||||
punpcklbw m2, m0 |
||||
mova m4, m3 |
||||
punpckhbw m3, m0 |
||||
punpcklbw m4, m0 |
||||
pmaddwd m1, m1 |
||||
pmaddwd m2, m2 |
||||
pmaddwd m3, m3 |
||||
pmaddwd m4, m4 |
||||
paddd m2, m1 |
||||
paddd m4, m3 |
||||
paddd m7, m2 |
||||
add r0, r1 |
||||
paddd m7, m4 |
||||
dec r2 |
||||
jne .loop |
||||
mova m1, m7 |
||||
psrlq m7, 32 |
||||
paddd m1, m7 |
||||
movd eax, m1 |
||||
RET |
||||
|
Loading…
Reference in new issue