From 060dd9300041eff91e7fb5622c12f9dd4975502d Mon Sep 17 00:00:00 2001 From: Vitor Sessak Date: Thu, 1 Jul 2010 21:04:12 +0000 Subject: [PATCH] Altivec-optimized mp{1,2,3} windowing Originally committed as revision 23943 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/mpegaudio.h | 1 + libavcodec/mpegaudiodec.c | 2 + libavcodec/ppc/Makefile | 6 ++ libavcodec/ppc/mpegaudiodec_altivec.c | 130 ++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 libavcodec/ppc/mpegaudiodec_altivec.c diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h index 5a530b71e8..e2ad911b0c 100644 --- a/libavcodec/mpegaudio.h +++ b/libavcodec/mpegaudio.h @@ -188,6 +188,7 @@ void ff_mpa_synth_filter_float(MPADecodeContext *s, INTFLOAT sb_samples[SBLIMIT]); void ff_mpegaudiodec_init_mmx(MPADecodeContext *s); +void ff_mpegaudiodec_init_altivec(MPADecodeContext *s); /* fast header check for resync */ static inline int ff_mpa_check_header(uint32_t header){ diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c index 0ae21e03fb..856eebd135 100644 --- a/libavcodec/mpegaudiodec.c +++ b/libavcodec/mpegaudiodec.c @@ -325,6 +325,8 @@ static av_cold int decode_init(AVCodecContext * avctx) #if HAVE_MMX ff_mpegaudiodec_init_mmx(s); #endif + if (HAVE_ALTIVEC && CONFIG_FLOAT) ff_mpegaudiodec_init_altivec(s); + avctx->sample_fmt= OUT_FMT; s->error_recognition= avctx->error_recognition; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 32c9082342..3bbd6c3630 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -7,6 +7,12 @@ ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o +ALTIVEC-OBJS-$(CONFIG_MP1FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o +ALTIVEC-OBJS-$(CONFIG_MP2FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o +ALTIVEC-OBJS-$(CONFIG_MP3FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o +ALTIVEC-OBJS-$(CONFIG_MP3ON4FLOAT_DECODER) += ppc/mpegaudiodec_altivec.o +ALTIVEC-OBJS-$(CONFIG_MP3ADUFLOAT_DECODER) += ppc/mpegaudiodec_altivec.o + OBJS-$(HAVE_ALTIVEC) += ppc/check_altivec.o \ ppc/dsputil_altivec.o \ ppc/fdct_altivec.o \ diff --git a/libavcodec/ppc/mpegaudiodec_altivec.c b/libavcodec/ppc/mpegaudiodec_altivec.c new file mode 100644 index 0000000000..e087d4add1 --- /dev/null +++ b/libavcodec/ppc/mpegaudiodec_altivec.c @@ -0,0 +1,130 @@ +/* + * Altivec optimized MP3 decoding functions + * Copyright (c) 2010 Vitor Sessak + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dsputil_altivec.h" +#include "util_altivec.h" + +#define CONFIG_FLOAT 1 +#include "libavcodec/mpegaudio.h" + +#define MACS(rt, ra, rb) rt+=(ra)*(rb) +#define MLSS(rt, ra, rb) rt-=(ra)*(rb) + +#define SUM8(op, sum, w, p) \ +{ \ + op(sum, (w)[0 * 64], (p)[0 * 64]); \ + op(sum, (w)[1 * 64], (p)[1 * 64]); \ + op(sum, (w)[2 * 64], (p)[2 * 64]); \ + op(sum, (w)[3 * 64], (p)[3 * 64]); \ + op(sum, (w)[4 * 64], (p)[4 * 64]); \ + op(sum, (w)[5 * 64], (p)[5 * 64]); \ + op(sum, (w)[6 * 64], (p)[6 * 64]); \ + op(sum, (w)[7 * 64], (p)[7 * 64]); \ +} + +static void apply_window(const float *buf, const float *win1, + const float *win2, float *sum1, float *sum2, int len) +{ + const vector float *win1a = (const vector float *) win1; + const vector float *win2a = (const vector float *) win2; + const vector float *bufa = (const vector float *) buf; + vector float *sum1a = (vector float *) sum1; + vector float *sum2a = (vector float *) sum2; + vector float av_uninit(v0), av_uninit(v4); + vector float v1, v2, v3; + + len = len >> 2; + +#define MULT(a, b) \ + { \ + v1 = vec_ld(a, win1a); \ + v2 = vec_ld(b, win2a); \ + v3 = vec_ld(a, bufa); \ + v0 = vec_madd(v3, v1, v0); \ + v4 = vec_madd(v2, v3, v4); \ + } + + while (len--) { + v0 = vec_xor(v0, v0); + v4 = vec_xor(v4, v4); + + MULT( 0, 0); + MULT( 256, 64); + MULT( 512, 128); + MULT( 768, 192); + MULT(1024, 256); + MULT(1280, 320); + MULT(1536, 384); + MULT(1792, 448); + + vec_st(v0, 0, sum1a); + vec_st(v4, 0, sum2a); + sum1a++; + sum2a++; + win1a++; + win2a++; + bufa++; + } +} + +static void apply_window_mp3(float *in, float *win, int *unused, float *out, + int incr) +{ + LOCAL_ALIGNED_16(float, suma, [17]); + LOCAL_ALIGNED_16(float, sumb, [17]); + LOCAL_ALIGNED_16(float, sumc, [17]); + LOCAL_ALIGNED_16(float, sumd, [17]); + + float sum; + int j; + float *out2 = out + 32 * incr; + + /* copy to avoid wrap */ + memcpy(in + 512, in, 32 * sizeof(*in)); + + apply_window(in + 16, win , win + 512, suma, sumc, 16); + apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16); + + SUM8(MLSS, suma[0], win + 32, in + 48); + + sumc[ 0] = 0; + sumb[16] = 0; + sumd[16] = 0; + + out[0 ] = suma[ 0]; + out += incr; + out2 -= incr; + for(j=1;j<16;j++) { + *out = suma[ j] - sumd[16-j]; + *out2 = -sumb[16-j] - sumc[ j]; + out += incr; + out2 -= incr; + } + + sum = 0; + SUM8(MLSS, sum, win + 16 + 32, in + 32); + *out = sum; +} + +void ff_mpegaudiodec_init_altivec(MPADecodeContext *s) +{ + s->apply_window_mp3 = apply_window_mp3; +}