mirror of https://github.com/FFmpeg/FFmpeg.git
parent
256da0770e
commit
054013a0fc
17 changed files with 468 additions and 267 deletions
@ -0,0 +1,44 @@ |
|||||||
|
/*
|
||||||
|
* Monkey's Audio lossless audio decoder |
||||||
|
* Copyright (c) 2007 Benjamin Zores <ben@geexbox.org> |
||||||
|
* based upon libdemac from Dave Chapman. |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_APEDSP_H |
||||||
|
#define AVCODEC_APEDSP_H |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
typedef struct APEDSPContext { |
||||||
|
/**
|
||||||
|
* Calculate scalar product of v1 and v2, |
||||||
|
* and v1[i] += v3[i] * mul |
||||||
|
* @param len length of vectors, should be multiple of 16 |
||||||
|
*/ |
||||||
|
int32_t (*scalarproduct_and_madd_int16)(int16_t *v1 /* align 16 */, |
||||||
|
const int16_t *v2, |
||||||
|
const int16_t *v3, |
||||||
|
int len, int mul); |
||||||
|
} APEDSPContext; |
||||||
|
|
||||||
|
void ff_apedsp_init_arm(APEDSPContext *c); |
||||||
|
void ff_apedsp_init_ppc(APEDSPContext *c); |
||||||
|
void ff_apedsp_init_x86(APEDSPContext *c); |
||||||
|
|
||||||
|
#endif /* AVCODEC_APEDSP_H */ |
@ -0,0 +1,38 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavcodec/apedsp.h" |
||||||
|
|
||||||
|
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, |
||||||
|
const int16_t *v3, int len, int mul); |
||||||
|
|
||||||
|
av_cold void ff_apedsp_init_arm(APEDSPContext *c) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (have_neon(cpu_flags)) { |
||||||
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,62 @@ |
|||||||
|
/* |
||||||
|
* ARM NEON optimised integer operations |
||||||
|
* Copyright (c) 2009 Kostya Shishkov |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S" |
||||||
|
|
||||||
|
@ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul)
|
||||||
|
function ff_scalarproduct_and_madd_int16_neon, export=1 |
||||||
|
vld1.16 {d28[],d29[]}, [sp] |
||||||
|
vmov.i16 q0, #0 |
||||||
|
vmov.i16 q1, #0 |
||||||
|
vmov.i16 q2, #0 |
||||||
|
vmov.i16 q3, #0 |
||||||
|
mov r12, r0 |
||||||
|
|
||||||
|
1: vld1.16 {d16-d17}, [r0,:128]! |
||||||
|
vld1.16 {d18-d19}, [r1]! |
||||||
|
vld1.16 {d20-d21}, [r2]! |
||||||
|
vld1.16 {d22-d23}, [r0,:128]! |
||||||
|
vld1.16 {d24-d25}, [r1]! |
||||||
|
vld1.16 {d26-d27}, [r2]! |
||||||
|
vmul.s16 q10, q10, q14 |
||||||
|
vmul.s16 q13, q13, q14 |
||||||
|
vmlal.s16 q0, d16, d18 |
||||||
|
vmlal.s16 q1, d17, d19 |
||||||
|
vadd.s16 q10, q8, q10 |
||||||
|
vadd.s16 q13, q11, q13 |
||||||
|
vmlal.s16 q2, d22, d24 |
||||||
|
vmlal.s16 q3, d23, d25 |
||||||
|
vst1.16 {q10}, [r12,:128]! |
||||||
|
subs r3, r3, #16 |
||||||
|
vst1.16 {q13}, [r12,:128]! |
||||||
|
bne 1b |
||||||
|
|
||||||
|
vpadd.s32 d16, d0, d1 |
||||||
|
vpadd.s32 d17, d2, d3 |
||||||
|
vpadd.s32 d18, d4, d5 |
||||||
|
vpadd.s32 d19, d6, d7 |
||||||
|
vpadd.s32 d0, d16, d17 |
||||||
|
vpadd.s32 d1, d18, d19 |
||||||
|
vpadd.s32 d2, d0, d1 |
||||||
|
vpaddl.s32 d3, d2 |
||||||
|
vmov.32 r0, d3[0] |
||||||
|
bx lr |
||||||
|
endfunc |
@ -0,0 +1,77 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#if HAVE_ALTIVEC_H |
||||||
|
#include <altivec.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/ppc/types_altivec.h" |
||||||
|
#include "libavcodec/apedsp.h" |
||||||
|
|
||||||
|
#if HAVE_ALTIVEC |
||||||
|
static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, |
||||||
|
const int16_t *v2, |
||||||
|
const int16_t *v3, |
||||||
|
int order, int mul) |
||||||
|
{ |
||||||
|
LOAD_ZERO; |
||||||
|
vec_s16 *pv1 = (vec_s16 *) v1; |
||||||
|
register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul }; |
||||||
|
register vec_s16 t0, t1, i0, i1, i4; |
||||||
|
register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3); |
||||||
|
register vec_s32 res = zero_s32v; |
||||||
|
register vec_u8 align = vec_lvsl(0, v2); |
||||||
|
int32_t ires; |
||||||
|
|
||||||
|
order >>= 4; |
||||||
|
do { |
||||||
|
i1 = vec_ld(16, v2); |
||||||
|
t0 = vec_perm(i2, i1, align); |
||||||
|
i2 = vec_ld(32, v2); |
||||||
|
t1 = vec_perm(i1, i2, align); |
||||||
|
i0 = pv1[0]; |
||||||
|
i1 = pv1[1]; |
||||||
|
res = vec_msum(t0, i0, res); |
||||||
|
res = vec_msum(t1, i1, res); |
||||||
|
i4 = vec_ld(16, v3); |
||||||
|
t0 = vec_perm(i3, i4, align); |
||||||
|
i3 = vec_ld(32, v3); |
||||||
|
t1 = vec_perm(i4, i3, align); |
||||||
|
pv1[0] = vec_mladd(t0, muls, i0); |
||||||
|
pv1[1] = vec_mladd(t1, muls, i1); |
||||||
|
pv1 += 2; |
||||||
|
v2 += 16; |
||||||
|
v3 += 16; |
||||||
|
} while (--order); |
||||||
|
res = vec_splat(vec_sums(res, zero_s32v), 3); |
||||||
|
vec_ste(res, 0, &ires); |
||||||
|
|
||||||
|
return ires; |
||||||
|
} |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
|
||||||
|
av_cold void ff_apedsp_init_ppc(APEDSPContext *c) |
||||||
|
{ |
||||||
|
#if HAVE_ALTIVEC |
||||||
|
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec; |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
} |
@ -0,0 +1,167 @@ |
|||||||
|
;****************************************************************************** |
||||||
|
;* Copyright (c) 2008 Loren Merritt |
||||||
|
;* |
||||||
|
;* This file is part of Libav. |
||||||
|
;* |
||||||
|
;* Libav is free software; you can redistribute it and/or |
||||||
|
;* modify it under the terms of the GNU Lesser General Public |
||||||
|
;* License as published by the Free Software Foundation; either |
||||||
|
;* version 2.1 of the License, or (at your option) any later version. |
||||||
|
;* |
||||||
|
;* Libav is distributed in the hope that it will be useful, |
||||||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
;* Lesser General Public License for more details. |
||||||
|
;* |
||||||
|
;* You should have received a copy of the GNU Lesser General Public |
||||||
|
;* License along with Libav; if not, write to the Free Software |
||||||
|
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
;****************************************************************************** |
||||||
|
|
||||||
|
%include "libavutil/x86/x86util.asm" |
||||||
|
|
||||||
|
SECTION_TEXT |
||||||
|
|
||||||
|
%macro SCALARPRODUCT 0 |
||||||
|
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, |
||||||
|
; int order, int mul) |
||||||
|
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul |
||||||
|
shl orderq, 1 |
||||||
|
movd m7, mulm |
||||||
|
%if mmsize == 16 |
||||||
|
pshuflw m7, m7, 0 |
||||||
|
punpcklqdq m7, m7 |
||||||
|
%else |
||||||
|
pshufw m7, m7, 0 |
||||||
|
%endif |
||||||
|
pxor m6, m6 |
||||||
|
add v1q, orderq |
||||||
|
add v2q, orderq |
||||||
|
add v3q, orderq |
||||||
|
neg orderq |
||||||
|
.loop: |
||||||
|
movu m0, [v2q + orderq] |
||||||
|
movu m1, [v2q + orderq + mmsize] |
||||||
|
mova m4, [v1q + orderq] |
||||||
|
mova m5, [v1q + orderq + mmsize] |
||||||
|
movu m2, [v3q + orderq] |
||||||
|
movu m3, [v3q + orderq + mmsize] |
||||||
|
pmaddwd m0, m4 |
||||||
|
pmaddwd m1, m5 |
||||||
|
pmullw m2, m7 |
||||||
|
pmullw m3, m7 |
||||||
|
paddd m6, m0 |
||||||
|
paddd m6, m1 |
||||||
|
paddw m2, m4 |
||||||
|
paddw m3, m5 |
||||||
|
mova [v1q + orderq], m2 |
||||||
|
mova [v1q + orderq + mmsize], m3 |
||||||
|
add orderq, mmsize*2 |
||||||
|
jl .loop |
||||||
|
%if mmsize == 16 |
||||||
|
movhlps m0, m6 |
||||||
|
paddd m6, m0 |
||||||
|
pshuflw m0, m6, 0x4e |
||||||
|
%else |
||||||
|
pshufw m0, m6, 0x4e |
||||||
|
%endif |
||||||
|
paddd m6, m0 |
||||||
|
movd eax, m6 |
||||||
|
RET |
||||||
|
%endmacro |
||||||
|
|
||||||
|
INIT_MMX mmxext |
||||||
|
SCALARPRODUCT |
||||||
|
INIT_XMM sse2 |
||||||
|
SCALARPRODUCT |
||||||
|
|
||||||
|
%macro SCALARPRODUCT_LOOP 1 |
||||||
|
align 16 |
||||||
|
.loop%1: |
||||||
|
sub orderq, mmsize*2 |
||||||
|
%if %1 |
||||||
|
mova m1, m4 |
||||||
|
mova m4, [v2q + orderq] |
||||||
|
mova m0, [v2q + orderq + mmsize] |
||||||
|
palignr m1, m0, %1 |
||||||
|
palignr m0, m4, %1 |
||||||
|
mova m3, m5 |
||||||
|
mova m5, [v3q + orderq] |
||||||
|
mova m2, [v3q + orderq + mmsize] |
||||||
|
palignr m3, m2, %1 |
||||||
|
palignr m2, m5, %1 |
||||||
|
%else |
||||||
|
mova m0, [v2q + orderq] |
||||||
|
mova m1, [v2q + orderq + mmsize] |
||||||
|
mova m2, [v3q + orderq] |
||||||
|
mova m3, [v3q + orderq + mmsize] |
||||||
|
%endif |
||||||
|
%define t0 [v1q + orderq] |
||||||
|
%define t1 [v1q + orderq + mmsize] |
||||||
|
%if ARCH_X86_64 |
||||||
|
mova m8, t0 |
||||||
|
mova m9, t1 |
||||||
|
%define t0 m8 |
||||||
|
%define t1 m9 |
||||||
|
%endif |
||||||
|
pmaddwd m0, t0 |
||||||
|
pmaddwd m1, t1 |
||||||
|
pmullw m2, m7 |
||||||
|
pmullw m3, m7 |
||||||
|
paddw m2, t0 |
||||||
|
paddw m3, t1 |
||||||
|
paddd m6, m0 |
||||||
|
paddd m6, m1 |
||||||
|
mova [v1q + orderq], m2 |
||||||
|
mova [v1q + orderq + mmsize], m3 |
||||||
|
jg .loop%1 |
||||||
|
%if %1 |
||||||
|
jmp .end |
||||||
|
%endif |
||||||
|
%endmacro |
||||||
|
|
||||||
|
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, |
||||||
|
; int order, int mul) |
||||||
|
INIT_XMM ssse3 |
||||||
|
cglobal scalarproduct_and_madd_int16, 4,5,10, v1, v2, v3, order, mul |
||||||
|
shl orderq, 1 |
||||||
|
movd m7, mulm |
||||||
|
pshuflw m7, m7, 0 |
||||||
|
punpcklqdq m7, m7 |
||||||
|
pxor m6, m6 |
||||||
|
mov r4d, v2d |
||||||
|
and r4d, 15 |
||||||
|
and v2q, ~15 |
||||||
|
and v3q, ~15 |
||||||
|
mova m4, [v2q + orderq] |
||||||
|
mova m5, [v3q + orderq] |
||||||
|
; linear is faster than branch tree or jump table, because the branches taken are cyclic (i.e. predictable) |
||||||
|
cmp r4d, 0 |
||||||
|
je .loop0 |
||||||
|
cmp r4d, 2 |
||||||
|
je .loop2 |
||||||
|
cmp r4d, 4 |
||||||
|
je .loop4 |
||||||
|
cmp r4d, 6 |
||||||
|
je .loop6 |
||||||
|
cmp r4d, 8 |
||||||
|
je .loop8 |
||||||
|
cmp r4d, 10 |
||||||
|
je .loop10 |
||||||
|
cmp r4d, 12 |
||||||
|
je .loop12 |
||||||
|
SCALARPRODUCT_LOOP 14 |
||||||
|
SCALARPRODUCT_LOOP 12 |
||||||
|
SCALARPRODUCT_LOOP 10 |
||||||
|
SCALARPRODUCT_LOOP 8 |
||||||
|
SCALARPRODUCT_LOOP 6 |
||||||
|
SCALARPRODUCT_LOOP 4 |
||||||
|
SCALARPRODUCT_LOOP 2 |
||||||
|
SCALARPRODUCT_LOOP 0 |
||||||
|
.end: |
||||||
|
movhlps m0, m6 |
||||||
|
paddd m6, m0 |
||||||
|
pshuflw m0, m6, 0x4e |
||||||
|
paddd m6, m0 |
||||||
|
movd eax, m6 |
||||||
|
RET |
@ -0,0 +1,47 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/x86/cpu.h" |
||||||
|
#include "libavcodec/apedsp.h" |
||||||
|
|
||||||
|
int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2, |
||||||
|
const int16_t *v3, |
||||||
|
int order, int mul); |
||||||
|
int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, |
||||||
|
const int16_t *v3, |
||||||
|
int order, int mul); |
||||||
|
int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, |
||||||
|
const int16_t *v3, |
||||||
|
int order, int mul); |
||||||
|
|
||||||
|
av_cold void ff_apedsp_init_x86(APEDSPContext *c) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (EXTERNAL_MMXEXT(cpu_flags)) |
||||||
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; |
||||||
|
|
||||||
|
if (EXTERNAL_SSE2(cpu_flags)) |
||||||
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; |
||||||
|
|
||||||
|
if (EXTERNAL_SSSE3(cpu_flags) && |
||||||
|
!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
|
||||||
|
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; |
||||||
|
} |
Loading…
Reference in new issue