mirror of https://github.com/FFmpeg/FFmpeg.git
* commit '054013a0fc6f2b52c60cee3e051be8cc7f82cef3': dsputil: Move APE-specific bits into apedsp Conflicts: libavcodec/arm/int_neon.S libavcodec/x86/dsputil.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>pull/74/head
commit
40f3a87c10
17 changed files with 458 additions and 256 deletions
@ -0,0 +1,44 @@ |
||||
/*
|
||||
* Monkey's Audio lossless audio decoder |
||||
* Copyright (c) 2007 Benjamin Zores <ben@geexbox.org> |
||||
* based upon libdemac from Dave Chapman. |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#ifndef AVCODEC_APEDSP_H |
||||
#define AVCODEC_APEDSP_H |
||||
|
||||
#include <stdint.h> |
||||
|
||||
typedef struct APEDSPContext { |
||||
/**
|
||||
* Calculate scalar product of v1 and v2, |
||||
* and v1[i] += v3[i] * mul |
||||
* @param len length of vectors, should be multiple of 16 |
||||
*/ |
||||
int32_t (*scalarproduct_and_madd_int16)(int16_t *v1 /* align 16 */, |
||||
const int16_t *v2, |
||||
const int16_t *v3, |
||||
int len, int mul); |
||||
} APEDSPContext; |
||||
|
||||
void ff_apedsp_init_arm(APEDSPContext *c); |
||||
void ff_apedsp_init_ppc(APEDSPContext *c); |
||||
void ff_apedsp_init_x86(APEDSPContext *c); |
||||
|
||||
#endif /* AVCODEC_APEDSP_H */ |
@ -0,0 +1,38 @@ |
||||
/*
|
||||
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include <stdint.h> |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/arm/cpu.h" |
||||
#include "libavcodec/apedsp.h" |
||||
|
||||
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, |
||||
const int16_t *v3, int len, int mul); |
||||
|
||||
av_cold void ff_apedsp_init_arm(APEDSPContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (have_neon(cpu_flags)) { |
||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; |
||||
} |
||||
} |
@ -0,0 +1,62 @@ |
||||
/* |
||||
* ARM NEON optimised integer operations |
||||
* Copyright (c) 2009 Kostya Shishkov |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/arm/asm.S" |
||||
|
||||
@ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul)
|
||||
function ff_scalarproduct_and_madd_int16_neon, export=1 |
||||
vld1.16 {d28[],d29[]}, [sp] |
||||
vmov.i16 q0, #0 |
||||
vmov.i16 q1, #0 |
||||
vmov.i16 q2, #0 |
||||
vmov.i16 q3, #0 |
||||
mov r12, r0 |
||||
|
||||
1: vld1.16 {d16-d17}, [r0,:128]! |
||||
vld1.16 {d18-d19}, [r1]! |
||||
vld1.16 {d20-d21}, [r2]! |
||||
vld1.16 {d22-d23}, [r0,:128]! |
||||
vld1.16 {d24-d25}, [r1]! |
||||
vld1.16 {d26-d27}, [r2]! |
||||
vmul.s16 q10, q10, q14 |
||||
vmul.s16 q13, q13, q14 |
||||
vmlal.s16 q0, d16, d18 |
||||
vmlal.s16 q1, d17, d19 |
||||
vadd.s16 q10, q8, q10 |
||||
vadd.s16 q13, q11, q13 |
||||
vmlal.s16 q2, d22, d24 |
||||
vmlal.s16 q3, d23, d25 |
||||
vst1.16 {q10}, [r12,:128]! |
||||
subs r3, r3, #16 |
||||
vst1.16 {q13}, [r12,:128]! |
||||
bgt 1b |
||||
|
||||
vpadd.s32 d16, d0, d1 |
||||
vpadd.s32 d17, d2, d3 |
||||
vpadd.s32 d18, d4, d5 |
||||
vpadd.s32 d19, d6, d7 |
||||
vpadd.s32 d0, d16, d17 |
||||
vpadd.s32 d1, d18, d19 |
||||
vpadd.s32 d2, d0, d1 |
||||
vpaddl.s32 d3, d2 |
||||
vmov.32 r0, d3[0] |
||||
bx lr |
||||
endfunc |
@ -0,0 +1,77 @@ |
||||
/*
|
||||
* Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org> |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
#if HAVE_ALTIVEC_H |
||||
#include <altivec.h> |
||||
#endif |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/ppc/types_altivec.h" |
||||
#include "libavcodec/apedsp.h" |
||||
|
||||
#if HAVE_ALTIVEC |
||||
static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, |
||||
const int16_t *v2, |
||||
const int16_t *v3, |
||||
int order, int mul) |
||||
{ |
||||
LOAD_ZERO; |
||||
vec_s16 *pv1 = (vec_s16 *) v1; |
||||
register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul }; |
||||
register vec_s16 t0, t1, i0, i1, i4; |
||||
register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3); |
||||
register vec_s32 res = zero_s32v; |
||||
register vec_u8 align = vec_lvsl(0, v2); |
||||
int32_t ires; |
||||
|
||||
order >>= 4; |
||||
do { |
||||
i1 = vec_ld(16, v2); |
||||
t0 = vec_perm(i2, i1, align); |
||||
i2 = vec_ld(32, v2); |
||||
t1 = vec_perm(i1, i2, align); |
||||
i0 = pv1[0]; |
||||
i1 = pv1[1]; |
||||
res = vec_msum(t0, i0, res); |
||||
res = vec_msum(t1, i1, res); |
||||
i4 = vec_ld(16, v3); |
||||
t0 = vec_perm(i3, i4, align); |
||||
i3 = vec_ld(32, v3); |
||||
t1 = vec_perm(i4, i3, align); |
||||
pv1[0] = vec_mladd(t0, muls, i0); |
||||
pv1[1] = vec_mladd(t1, muls, i1); |
||||
pv1 += 2; |
||||
v2 += 16; |
||||
v3 += 16; |
||||
} while (--order); |
||||
res = vec_splat(vec_sums(res, zero_s32v), 3); |
||||
vec_ste(res, 0, &ires); |
||||
|
||||
return ires; |
||||
} |
||||
#endif /* HAVE_ALTIVEC */ |
||||
|
||||
av_cold void ff_apedsp_init_ppc(APEDSPContext *c) |
||||
{ |
||||
#if HAVE_ALTIVEC |
||||
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec; |
||||
#endif /* HAVE_ALTIVEC */ |
||||
} |
@ -0,0 +1,157 @@ |
||||
;****************************************************************************** |
||||
;* Copyright (c) 2008 Loren Merritt |
||||
;* |
||||
;* This file is part of FFmpeg. |
||||
;* |
||||
;* FFmpeg is free software; you can redistribute it and/or |
||||
;* modify it under the terms of the GNU Lesser General Public |
||||
;* License as published by the Free Software Foundation; either |
||||
;* version 2.1 of the License, or (at your option) any later version. |
||||
;* |
||||
;* FFmpeg is distributed in the hope that it will be useful, |
||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
;* Lesser General Public License for more details. |
||||
;* |
||||
;* You should have received a copy of the GNU Lesser General Public |
||||
;* License along with FFmpeg; if not, write to the Free Software |
||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
;****************************************************************************** |
||||
|
||||
%include "libavutil/x86/x86util.asm" |
||||
|
||||
SECTION_TEXT |
||||
|
||||
%macro SCALARPRODUCT 0 |
||||
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, |
||||
; int order, int mul) |
||||
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul |
||||
shl orderq, 1 |
||||
movd m7, mulm |
||||
%if mmsize == 16 |
||||
pshuflw m7, m7, 0 |
||||
punpcklqdq m7, m7 |
||||
%else |
||||
pshufw m7, m7, 0 |
||||
%endif |
||||
pxor m6, m6 |
||||
add v1q, orderq |
||||
add v2q, orderq |
||||
add v3q, orderq |
||||
neg orderq |
||||
.loop: |
||||
movu m0, [v2q + orderq] |
||||
movu m1, [v2q + orderq + mmsize] |
||||
mova m4, [v1q + orderq] |
||||
mova m5, [v1q + orderq + mmsize] |
||||
movu m2, [v3q + orderq] |
||||
movu m3, [v3q + orderq + mmsize] |
||||
pmaddwd m0, m4 |
||||
pmaddwd m1, m5 |
||||
pmullw m2, m7 |
||||
pmullw m3, m7 |
||||
paddd m6, m0 |
||||
paddd m6, m1 |
||||
paddw m2, m4 |
||||
paddw m3, m5 |
||||
mova [v1q + orderq], m2 |
||||
mova [v1q + orderq + mmsize], m3 |
||||
add orderq, mmsize*2 |
||||
jl .loop |
||||
HADDD m6, m0 |
||||
movd eax, m6 |
||||
RET |
||||
%endmacro |
||||
|
||||
INIT_MMX mmxext |
||||
SCALARPRODUCT |
||||
INIT_XMM sse2 |
||||
SCALARPRODUCT |
||||
|
||||
%macro SCALARPRODUCT_LOOP 1 |
||||
align 16 |
||||
.loop%1: |
||||
sub orderq, mmsize*2 |
||||
%if %1 |
||||
mova m1, m4 |
||||
mova m4, [v2q + orderq] |
||||
mova m0, [v2q + orderq + mmsize] |
||||
palignr m1, m0, %1 |
||||
palignr m0, m4, %1 |
||||
mova m3, m5 |
||||
mova m5, [v3q + orderq] |
||||
mova m2, [v3q + orderq + mmsize] |
||||
palignr m3, m2, %1 |
||||
palignr m2, m5, %1 |
||||
%else |
||||
mova m0, [v2q + orderq] |
||||
mova m1, [v2q + orderq + mmsize] |
||||
mova m2, [v3q + orderq] |
||||
mova m3, [v3q + orderq + mmsize] |
||||
%endif |
||||
%define t0 [v1q + orderq] |
||||
%define t1 [v1q + orderq + mmsize] |
||||
%if ARCH_X86_64 |
||||
mova m8, t0 |
||||
mova m9, t1 |
||||
%define t0 m8 |
||||
%define t1 m9 |
||||
%endif |
||||
pmaddwd m0, t0 |
||||
pmaddwd m1, t1 |
||||
pmullw m2, m7 |
||||
pmullw m3, m7 |
||||
paddw m2, t0 |
||||
paddw m3, t1 |
||||
paddd m6, m0 |
||||
paddd m6, m1 |
||||
mova [v1q + orderq], m2 |
||||
mova [v1q + orderq + mmsize], m3 |
||||
jg .loop%1 |
||||
%if %1 |
||||
jmp .end |
||||
%endif |
||||
%endmacro |
||||
|
||||
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, |
||||
; int order, int mul) |
||||
INIT_XMM ssse3 |
||||
cglobal scalarproduct_and_madd_int16, 4,5,10, v1, v2, v3, order, mul |
||||
shl orderq, 1 |
||||
movd m7, mulm |
||||
pshuflw m7, m7, 0 |
||||
punpcklqdq m7, m7 |
||||
pxor m6, m6 |
||||
mov r4d, v2d |
||||
and r4d, 15 |
||||
and v2q, ~15 |
||||
and v3q, ~15 |
||||
mova m4, [v2q + orderq] |
||||
mova m5, [v3q + orderq] |
||||
; linear is faster than branch tree or jump table, because the branches taken are cyclic (i.e. predictable) |
||||
cmp r4d, 0 |
||||
je .loop0 |
||||
cmp r4d, 2 |
||||
je .loop2 |
||||
cmp r4d, 4 |
||||
je .loop4 |
||||
cmp r4d, 6 |
||||
je .loop6 |
||||
cmp r4d, 8 |
||||
je .loop8 |
||||
cmp r4d, 10 |
||||
je .loop10 |
||||
cmp r4d, 12 |
||||
je .loop12 |
||||
SCALARPRODUCT_LOOP 14 |
||||
SCALARPRODUCT_LOOP 12 |
||||
SCALARPRODUCT_LOOP 10 |
||||
SCALARPRODUCT_LOOP 8 |
||||
SCALARPRODUCT_LOOP 6 |
||||
SCALARPRODUCT_LOOP 4 |
||||
SCALARPRODUCT_LOOP 2 |
||||
SCALARPRODUCT_LOOP 0 |
||||
.end: |
||||
HADDD m6, m0 |
||||
movd eax, m6 |
||||
RET |
@ -0,0 +1,47 @@ |
||||
/*
|
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with FFmpeg; if not, write to the Free Software |
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
*/ |
||||
|
||||
#include "libavutil/attributes.h" |
||||
#include "libavutil/cpu.h" |
||||
#include "libavutil/x86/cpu.h" |
||||
#include "libavcodec/apedsp.h" |
||||
|
||||
int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2, |
||||
const int16_t *v3, |
||||
int order, int mul); |
||||
int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, |
||||
const int16_t *v3, |
||||
int order, int mul); |
||||
int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, |
||||
const int16_t *v3, |
||||
int order, int mul); |
||||
|
||||
av_cold void ff_apedsp_init_x86(APEDSPContext *c) |
||||
{ |
||||
int cpu_flags = av_get_cpu_flags(); |
||||
|
||||
if (EXTERNAL_MMXEXT(cpu_flags)) |
||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; |
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) |
||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; |
||||
|
||||
if (EXTERNAL_SSSE3(cpu_flags) && |
||||
!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
|
||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; |
||||
} |
Loading…
Reference in new issue