[imported from MPlayer, based on a52dec's libao] Originally committed as revision 1780 to svn://svn.ffmpeg.org/ffmpeg/trunkpull/126/head
7 changed files with 858 additions and 0 deletions
@ -0,0 +1,32 @@ |
* a52_util.h |
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
* |
* This file is part of a52dec, a free ATSC A-52 stream decoder. |
* See http://liba52.sourceforge.net/ for updates.
* |
* a52dec is free software; you can redistribute it and/or modify |
* it under the terms of the GNU General Public License as published by |
* the Free Software Foundation; either version 2 of the License, or |
* (at your option) any later version. |
* |
* a52dec is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* GNU General Public License for more details. |
* |
* You should have received a copy of the GNU General Public License |
* along with this program; if not, write to the Free Software |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
*/ |
#ifndef A52_UTIL_H |
#define A52_UTIL_H |
uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes); |
void* a52_resample_init(uint32_t mm_accel,int flags,int chans); |
extern int (* a52_resample) (float * _f, int16_t * s16); |
#endif /* A52_H */ |
@ -0,0 +1,73 @@ |
* crc.c |
* |
* Copyright (C) Aaron Holtzman - May 1999 |
* |
* This file is part of ac3dec, a free Dolby AC-3 stream decoder. |
* ac3dec is free software; you can redistribute it and/or modify |
* it under the terms of the GNU General Public License as published by |
* the Free Software Foundation; either version 2, or (at your option) |
* any later version. |
* ac3dec is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* GNU General Public License for more details. |
* You should have received a copy of the GNU General Public License |
* along with GNU Make; see the file COPYING. If not, write to |
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* |
*/ |
#include <stdlib.h> |
#include <stdio.h> |
#include <inttypes.h> |
static const uint16_t crc_lut[256] =
{ |
0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, |
0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, |
0x8063,0x0066,0x006c,0x8069,0x0078,0x807d,0x8077,0x0072, |
0x0050,0x8055,0x805f,0x005a,0x804b,0x004e,0x0044,0x8041, |
0x80c3,0x00c6,0x00cc,0x80c9,0x00d8,0x80dd,0x80d7,0x00d2, |
0x00f0,0x80f5,0x80ff,0x00fa,0x80eb,0x00ee,0x00e4,0x80e1, |
0x00a0,0x80a5,0x80af,0x00aa,0x80bb,0x00be,0x00b4,0x80b1, |
0x8093,0x0096,0x009c,0x8099,0x0088,0x808d,0x8087,0x0082, |
0x8183,0x0186,0x018c,0x8189,0x0198,0x819d,0x8197,0x0192, |
0x01b0,0x81b5,0x81bf,0x01ba,0x81ab,0x01ae,0x01a4,0x81a1, |
0x01e0,0x81e5,0x81ef,0x01ea,0x81fb,0x01fe,0x01f4,0x81f1, |
0x81d3,0x01d6,0x01dc,0x81d9,0x01c8,0x81cd,0x81c7,0x01c2, |
0x0140,0x8145,0x814f,0x014a,0x815b,0x015e,0x0154,0x8151, |
0x8173,0x0176,0x017c,0x8179,0x0168,0x816d,0x8167,0x0162, |
0x8123,0x0126,0x012c,0x8129,0x0138,0x813d,0x8137,0x0132, |
0x0110,0x8115,0x811f,0x011a,0x810b,0x010e,0x0104,0x8101, |
0x8303,0x0306,0x030c,0x8309,0x0318,0x831d,0x8317,0x0312, |
0x0330,0x8335,0x833f,0x033a,0x832b,0x032e,0x0324,0x8321, |
0x0360,0x8365,0x836f,0x036a,0x837b,0x037e,0x0374,0x8371, |
0x8353,0x0356,0x035c,0x8359,0x0348,0x834d,0x8347,0x0342, |
0x03c0,0x83c5,0x83cf,0x03ca,0x83db,0x03de,0x03d4,0x83d1, |
0x83f3,0x03f6,0x03fc,0x83f9,0x03e8,0x83ed,0x83e7,0x03e2, |
0x83a3,0x03a6,0x03ac,0x83a9,0x03b8,0x83bd,0x83b7,0x03b2, |
0x0390,0x8395,0x839f,0x039a,0x838b,0x038e,0x0384,0x8381, |
0x0280,0x8285,0x828f,0x028a,0x829b,0x029e,0x0294,0x8291, |
0x82b3,0x02b6,0x02bc,0x82b9,0x02a8,0x82ad,0x82a7,0x02a2, |
0x82e3,0x02e6,0x02ec,0x82e9,0x02f8,0x82fd,0x82f7,0x02f2, |
0x02d0,0x82d5,0x82df,0x02da,0x82cb,0x02ce,0x02c4,0x82c1, |
0x8243,0x0246,0x024c,0x8249,0x0258,0x825d,0x8257,0x0252, |
0x0270,0x8275,0x827f,0x027a,0x826b,0x026e,0x0264,0x8261, |
0x0220,0x8225,0x822f,0x022a,0x823b,0x023e,0x0234,0x8231, |
0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202 |
}; |
uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes) |
{ |
uint32_t i; |
uint16_t state=0; |
for(i=0;i<num_bytes;i++) |
state = crc_lut[data[i] ^ (state>>8)] ^ (state<<8); |
return state; |
} |
@ -0,0 +1,45 @@ |
// a52_resample_init should find the requested converter (from type flags ->
// given number of channels) and set up some function pointers...
// a52_resample() should do the conversion.
#include <inttypes.h> |
#include <stdio.h> |
#include "a52.h" |
#include "mm_accel.h" |
#include "config.h" |
#include "../libpostproc/mangle.h" |
int (* a52_resample) (float * _f, int16_t * s16)=NULL; |
#include "resample_c.c" |
#ifdef ARCH_X86 |
#include "resample_mmx.c" |
#endif |
void* a52_resample_init(uint32_t mm_accel,int flags,int chans){ |
void* tmp; |
#ifdef ARCH_X86 |
if(mm_accel&MM_ACCEL_X86_MMX){ |
tmp=a52_resample_MMX(flags,chans); |
if(tmp){ |
if(a52_resample==NULL) fprintf(stderr, "Using MMX optimized resampler\n"); |
a52_resample=tmp; |
return tmp; |
} |
} |
#endif |
tmp=a52_resample_C(flags,chans); |
if(tmp){ |
if(a52_resample==NULL) fprintf(stderr, "No accelerated resampler found\n"); |
a52_resample=tmp; |
return tmp; |
} |
fprintf(stderr, "Unimplemented resampler for mode 0x%X -> %d channels conversion - Contact MPlayer developers!\n", flags, chans); |
return NULL; |
} |
@ -0,0 +1,183 @@ |
// this code is based on a52dec/libao/audio_out_oss.c
static inline int16_t convert (int32_t i) |
{ |
if (i > 0x43c07fff) |
return 32767; |
else if (i < 0x43bf8000) |
return -32768; |
else |
return i - 0x43c00000; |
} |
static int a52_resample_MONO_to_5_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; |
s16[5*i+4] = convert (f[i]); |
} |
return 5*256; |
} |
static int a52_resample_MONO_to_1_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[i] = convert (f[i]); |
} |
return 1*256; |
} |
static int a52_resample_STEREO_to_2_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[2*i] = convert (f[i]); |
s16[2*i+1] = convert (f[i+256]); |
} |
return 2*256; |
} |
static int a52_resample_3F_to_5_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[5*i] = convert (f[i]); |
s16[5*i+1] = convert (f[i+512]); |
s16[5*i+2] = s16[5*i+3] = 0; |
s16[5*i+4] = convert (f[i+256]); |
} |
return 5*256; |
} |
static int a52_resample_2F_2R_to_4_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[4*i] = convert (f[i]); |
s16[4*i+1] = convert (f[i+256]); |
s16[4*i+2] = convert (f[i+512]); |
s16[4*i+3] = convert (f[i+768]); |
} |
return 4*256; |
} |
static int a52_resample_3F_2R_to_5_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[5*i] = convert (f[i]); |
s16[5*i+1] = convert (f[i+512]); |
s16[5*i+2] = convert (f[i+768]); |
s16[5*i+3] = convert (f[i+1024]); |
s16[5*i+4] = convert (f[i+256]); |
} |
return 5*256; |
} |
static int a52_resample_MONO_LFE_to_6_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; |
s16[6*i+4] = convert (f[i+256]); |
s16[6*i+5] = convert (f[i]); |
} |
return 6*256; |
} |
static int a52_resample_STEREO_LFE_to_6_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[6*i] = convert (f[i+256]); |
s16[6*i+1] = convert (f[i+512]); |
s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; |
s16[6*i+5] = convert (f[i]); |
} |
return 6*256; |
} |
static int a52_resample_3F_LFE_to_6_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[6*i] = convert (f[i+256]); |
s16[6*i+1] = convert (f[i+768]); |
s16[6*i+2] = s16[6*i+3] = 0; |
s16[6*i+4] = convert (f[i+512]); |
s16[6*i+5] = convert (f[i]); |
} |
return 6*256; |
} |
static int a52_resample_2F_2R_LFE_to_6_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[6*i] = convert (f[i+256]); |
s16[6*i+1] = convert (f[i+512]); |
s16[6*i+2] = convert (f[i+768]); |
s16[6*i+3] = convert (f[i+1024]); |
s16[6*i+4] = 0; |
s16[6*i+5] = convert (f[i]); |
} |
return 6*256; |
} |
static int a52_resample_3F_2R_LFE_to_6_C(float * _f, int16_t * s16){ |
int i; |
int32_t * f = (int32_t *) _f; |
for (i = 0; i < 256; i++) { |
s16[6*i] = convert (f[i+256]); |
s16[6*i+1] = convert (f[i+768]); |
s16[6*i+2] = convert (f[i+1024]); |
s16[6*i+3] = convert (f[i+1280]); |
s16[6*i+4] = convert (f[i+512]); |
s16[6*i+5] = convert (f[i]); |
} |
return 6*256; |
} |
static void* a52_resample_C(int flags, int ch){ |
switch (flags) { |
case A52_MONO: |
if(ch==5) return a52_resample_MONO_to_5_C; |
if(ch==1) return a52_resample_MONO_to_1_C; |
break; |
case A52_CHANNEL: |
case A52_STEREO: |
case A52_DOLBY: |
if(ch==2) return a52_resample_STEREO_to_2_C; |
break; |
case A52_3F: |
if(ch==5) return a52_resample_3F_to_5_C; |
break; |
case A52_2F2R: |
if(ch==4) return a52_resample_2F_2R_to_4_C; |
break; |
case A52_3F2R: |
if(ch==5) return a52_resample_3F_2R_to_5_C; |
break; |
case A52_MONO | A52_LFE: |
if(ch==6) return a52_resample_MONO_LFE_to_6_C; |
break; |
case A52_CHANNEL | A52_LFE: |
case A52_STEREO | A52_LFE: |
case A52_DOLBY | A52_LFE: |
if(ch==6) return a52_resample_STEREO_LFE_to_6_C; |
break; |
case A52_3F | A52_LFE: |
if(ch==6) return a52_resample_3F_LFE_to_6_C; |
break; |
case A52_2F2R | A52_LFE: |
if(ch==6) return a52_resample_2F_2R_LFE_to_6_C; |
break; |
case A52_3F2R | A52_LFE: |
if(ch==6) return a52_resample_3F_2R_LFE_to_6_C; |
break; |
} |
return NULL; |
} |
@ -0,0 +1,518 @@ |
// MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL)
/* optimization TODO / NOTES
movntq is slightly faster (0.5% with the current test.c benchmark)
(but thats just test.c so that needs to be testd in reallity) |
and it would mean (C / MMX2 / MMX / 3DNOW) versions
*/ |
static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; |
static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; |
static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; |
static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-512, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"movq "MANGLE(wm1100)", %%mm3 \n\t" |
"movq "MANGLE(wm0101)", %%mm4 \n\t" |
"movq "MANGLE(wm1010)", %%mm5 \n\t" |
"pxor %%mm6, %%mm6 \n\t" |
"1: \n\t" |
"movq (%1, %%esi, 2), %%mm0 \n\t" |
"movq 8(%1, %%esi, 2), %%mm1 \n\t" |
"leal (%%esi, %%esi, 4), %%edi \n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"pand %%mm4, %%mm0 \n\t" |
"pand %%mm5, %%mm1 \n\t" |
"movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0
"movd %%mm0, 8(%0, %%edi) \n\t" // A 0
"pand %%mm3, %%mm0 \n\t" |
"movd %%mm6, 12(%0, %%edi) \n\t" // 0 0
"movd %%mm1, 16(%0, %%edi) \n\t" // 0 B
"pand %%mm3, %%mm1 \n\t" |
"movd %%mm6, 20(%0, %%edi) \n\t" // 0 0
"movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0
"movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B
"addl $8, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1280), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 5*256; |
} |
static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
#ifdef HAVE_SSE |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"1: \n\t" |
"cvtps2pi (%1, %%esi), %%mm0 \n\t" |
"cvtps2pi 1024(%1, %%esi), %%mm2\n\t" |
"movq %%mm0, %%mm1 \n\t" |
"punpcklwd %%mm2, %%mm0 \n\t" |
"punpckhwd %%mm2, %%mm1 \n\t" |
"movq %%mm0, (%0, %%esi) \n\t" |
"movq %%mm1, 8(%0, %%esi) \n\t" |
"addl $16, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+512), "r" (f+256) |
:"%esi", "memory" |
);*/ |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"1: \n\t" |
"movq (%1, %%esi), %%mm0 \n\t" |
"movq 8(%1, %%esi), %%mm1 \n\t" |
"movq 1024(%1, %%esi), %%mm2 \n\t" |
"movq 1032(%1, %%esi), %%mm3 \n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"punpcklwd %%mm2, %%mm0 \n\t" |
"punpckhwd %%mm2, %%mm1 \n\t" |
"movq %%mm0, (%0, %%esi) \n\t" |
"movq %%mm1, 8(%0, %%esi) \n\t" |
"addl $16, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+512), "r" (f+256) |
:"%esi", "memory" |
); |
return 2*256; |
} |
static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"pxor %%mm6, %%mm6 \n\t" |
"movq %%mm7, %%mm5 \n\t" |
"punpckldq %%mm6, %%mm5 \n\t" |
"1: \n\t" |
"movd (%1, %%esi), %%mm0 \n\t" |
"punpckldq 2048(%1, %%esi), %%mm0\n\t" |
"movd 1024(%1, %%esi), %%mm1 \n\t" |
"punpckldq 4(%1, %%esi), %%mm1 \n\t" |
"movd 2052(%1, %%esi), %%mm2 \n\t" |
"movq %%mm7, %%mm3 \n\t" |
"punpckldq 1028(%1, %%esi), %%mm3\n\t" |
"movd 8(%1, %%esi), %%mm4 \n\t" |
"punpckldq 2056(%1, %%esi), %%mm4\n\t" |
"leal (%%esi, %%esi, 4), %%edi \n\t" |
"sarl $1, %%edi \n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm5, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"psubd %%mm7, %%mm4 \n\t" |
"packssdw %%mm6, %%mm0 \n\t" |
"packssdw %%mm2, %%mm1 \n\t" |
"packssdw %%mm4, %%mm3 \n\t" |
"movq %%mm0, (%0, %%edi) \n\t" |
"movq %%mm1, 8(%0, %%edi) \n\t" |
"movq %%mm3, 16(%0, %%edi) \n\t" |
"movd 1032(%1, %%esi), %%mm1 \n\t" |
"punpckldq 12(%1, %%esi), %%mm1\n\t" |
"movd 2060(%1, %%esi), %%mm2 \n\t" |
"movq %%mm7, %%mm3 \n\t" |
"punpckldq 1036(%1, %%esi), %%mm3\n\t" |
"pxor %%mm0, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm5, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"movq %%mm0, 24(%0, %%edi) \n\t" |
"movq %%mm2, 32(%0, %%edi) \n\t" |
"addl $16, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1280), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 5*256; |
} |
static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"1: \n\t" |
"movq (%1, %%esi), %%mm0 \n\t" |
"movq 8(%1, %%esi), %%mm1 \n\t" |
"movq 1024(%1, %%esi), %%mm2 \n\t" |
"movq 1032(%1, %%esi), %%mm3 \n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"movq 2048(%1, %%esi), %%mm3 \n\t" |
"movq 2056(%1, %%esi), %%mm4 \n\t" |
"movq 3072(%1, %%esi), %%mm5 \n\t" |
"movq 3080(%1, %%esi), %%mm6 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"psubd %%mm7, %%mm4 \n\t" |
"psubd %%mm7, %%mm5 \n\t" |
"psubd %%mm7, %%mm6 \n\t" |
"packssdw %%mm4, %%mm3 \n\t" |
"packssdw %%mm6, %%mm5 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"movq %%mm3, %%mm4 \n\t" |
"punpcklwd %%mm2, %%mm0 \n\t" |
"punpckhwd %%mm2, %%mm1 \n\t" |
"punpcklwd %%mm5, %%mm3 \n\t" |
"punpckhwd %%mm5, %%mm4 \n\t" |
"movq %%mm0, %%mm2 \n\t" |
"movq %%mm1, %%mm5 \n\t" |
"punpckldq %%mm3, %%mm0 \n\t" |
"punpckhdq %%mm3, %%mm2 \n\t" |
"punpckldq %%mm4, %%mm1 \n\t" |
"punpckhdq %%mm4, %%mm5 \n\t" |
"movq %%mm0, (%0, %%esi,2) \n\t" |
"movq %%mm2, 8(%0, %%esi,2) \n\t" |
"movq %%mm1, 16(%0, %%esi,2) \n\t" |
"movq %%mm5, 24(%0, %%esi,2) \n\t" |
"addl $16, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1024), "r" (f+256) |
:"%esi", "memory" |
); |
return 4*256; |
} |
static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"1: \n\t" |
"movd (%1, %%esi), %%mm0 \n\t" |
"punpckldq 2048(%1, %%esi), %%mm0\n\t" |
"movd 3072(%1, %%esi), %%mm1 \n\t" |
"punpckldq 4096(%1, %%esi), %%mm1\n\t" |
"movd 1024(%1, %%esi), %%mm2 \n\t" |
"punpckldq 4(%1, %%esi), %%mm2 \n\t" |
"movd 2052(%1, %%esi), %%mm3 \n\t" |
"punpckldq 3076(%1, %%esi), %%mm3\n\t" |
"movd 4100(%1, %%esi), %%mm4 \n\t" |
"punpckldq 1028(%1, %%esi), %%mm4\n\t" |
"movd 8(%1, %%esi), %%mm5 \n\t" |
"punpckldq 2056(%1, %%esi), %%mm5\n\t" |
"leal (%%esi, %%esi, 4), %%edi \n\t" |
"sarl $1, %%edi \n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"psubd %%mm7, %%mm4 \n\t" |
"psubd %%mm7, %%mm5 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"packssdw %%mm5, %%mm4 \n\t" |
"movq %%mm0, (%0, %%edi) \n\t" |
"movq %%mm2, 8(%0, %%edi) \n\t" |
"movq %%mm4, 16(%0, %%edi) \n\t" |
"movd 3080(%1, %%esi), %%mm0 \n\t" |
"punpckldq 4104(%1, %%esi), %%mm0\n\t" |
"movd 1032(%1, %%esi), %%mm1 \n\t" |
"punpckldq 12(%1, %%esi), %%mm1\n\t" |
"movd 2060(%1, %%esi), %%mm2 \n\t" |
"punpckldq 3084(%1, %%esi), %%mm2\n\t" |
"movd 4108(%1, %%esi), %%mm3 \n\t" |
"punpckldq 1036(%1, %%esi), %%mm3\n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"movq %%mm0, 24(%0, %%edi) \n\t" |
"movq %%mm2, 32(%0, %%edi) \n\t" |
"addl $16, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1280), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 5*256; |
} |
static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"pxor %%mm6, %%mm6 \n\t" |
"1: \n\t" |
"movq 1024(%1, %%esi), %%mm0 \n\t" |
"movq 1032(%1, %%esi), %%mm1 \n\t" |
"movq (%1, %%esi), %%mm2 \n\t" |
"movq 8(%1, %%esi), %%mm3 \n\t" |
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"packssdw %%mm1, %%mm0 \n\t" |
"packssdw %%mm3, %%mm2 \n\t" |
"movq %%mm0, %%mm1 \n\t" |
"punpcklwd %%mm2, %%mm0 \n\t" |
"punpckhwd %%mm2, %%mm1 \n\t" |
"leal (%%esi, %%esi, 2), %%edi \n\t" |
"movq %%mm6, (%0, %%edi) \n\t" |
"movd %%mm0, 8(%0, %%edi) \n\t" |
"punpckhdq %%mm0, %%mm0 \n\t" |
"movq %%mm6, 12(%0, %%edi) \n\t" |
"movd %%mm0, 20(%0, %%edi) \n\t" |
"movq %%mm6, 24(%0, %%edi) \n\t" |
"movd %%mm1, 32(%0, %%edi) \n\t" |
"punpckhdq %%mm1, %%mm1 \n\t" |
"movq %%mm6, 36(%0, %%edi) \n\t" |
"movd %%mm1, 44(%0, %%edi) \n\t" |
"addl $16, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1536), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 6*256; |
} |
static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"pxor %%mm6, %%mm6 \n\t" |
"1: \n\t" |
"movq 1024(%1, %%esi), %%mm0 \n\t" |
"movq 2048(%1, %%esi), %%mm1 \n\t" |
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm5 \n\t" |
"leal (%%esi, %%esi, 2), %%edi \n\t" |
"pxor %%mm4, %%mm4 \n\t" |
"packssdw %%mm5, %%mm0 \n\t" // FfAa
"packssdw %%mm4, %%mm1 \n\t" // 00Bb
"punpckhwd %%mm0, %%mm4 \n\t" // F0f0
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"movq %%mm0, %%mm1 \n\t" // BAba
"punpckldq %%mm4, %%mm3 \n\t" // f0XX
"punpckldq %%mm6, %%mm0 \n\t" // 00ba
"punpckhdq %%mm1, %%mm3 \n\t" // BAf0
"movq %%mm0, (%0, %%edi) \n\t" // 00ba
"punpckhdq %%mm4, %%mm0 \n\t" // F000
"movq %%mm3, 8(%0, %%edi) \n\t" // BAf0
"movq %%mm0, 16(%0, %%edi) \n\t" // F000
"addl $8, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1536), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 6*256; |
} |
static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
"pxor %%mm6, %%mm6 \n\t" |
"1: \n\t" |
"movq 1024(%1, %%esi), %%mm0 \n\t" |
"movq 3072(%1, %%esi), %%mm1 \n\t" |
"movq 2048(%1, %%esi), %%mm4 \n\t" |
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm4 \n\t" |
"psubd %%mm7, %%mm5 \n\t" |
"leal (%%esi, %%esi, 2), %%edi \n\t" |
"packssdw %%mm4, %%mm0 \n\t" // EeAa
"packssdw %%mm5, %%mm1 \n\t" // FfBb
"movq %%mm0, %%mm2 \n\t" // EeAa
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"punpckhwd %%mm1, %%mm2 \n\t" // FEfe
"movq %%mm0, %%mm1 \n\t" // BAba
"punpckldq %%mm6, %%mm0 \n\t" // 00ba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
"movq %%mm0, (%0, %%edi) \n\t" |
"punpckhdq %%mm2, %%mm0 \n\t" // FE00
"punpckldq %%mm1, %%mm2 \n\t" // BAfe
"movq %%mm2, 8(%0, %%edi) \n\t" |
"movq %%mm0, 16(%0, %%edi) \n\t" |
"addl $8, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1536), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 6*256; |
} |
static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
// "pxor %%mm6, %%mm6 \n\t"
"1: \n\t" |
"movq 1024(%1, %%esi), %%mm0 \n\t" |
"movq 2048(%1, %%esi), %%mm1 \n\t" |
"movq 3072(%1, %%esi), %%mm2 \n\t" |
"movq 4096(%1, %%esi), %%mm3 \n\t" |
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"psubd %%mm7, %%mm5 \n\t" |
"leal (%%esi, %%esi, 2), %%edi \n\t" |
"packssdw %%mm2, %%mm0 \n\t" // CcAa
"packssdw %%mm3, %%mm1 \n\t" // DdBb
"packssdw %%mm5, %%mm5 \n\t" // FfFf
"movq %%mm0, %%mm2 \n\t" // CcAa
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"punpckhwd %%mm1, %%mm2 \n\t" // DCdc
"pxor %%mm4, %%mm4 \n\t" // 0000
"punpcklwd %%mm5, %%mm4 \n\t" // F0f0
"movq %%mm0, %%mm1 \n\t" // BAba
"movq %%mm4, %%mm3 \n\t" // F0f0
"punpckldq %%mm2, %%mm0 \n\t" // dcba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
"punpckldq %%mm1, %%mm4 \n\t" // BAf0
"punpckhdq %%mm3, %%mm2 \n\t" // F0DC
"movq %%mm0, (%0, %%edi) \n\t" |
"movq %%mm4, 8(%0, %%edi) \n\t" |
"movq %%mm2, 16(%0, %%edi) \n\t" |
"addl $8, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1536), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 6*256; |
} |
static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ |
int32_t * f = (int32_t *) _f; |
asm volatile( |
"movl $-1024, %%esi \n\t" |
"movq "MANGLE(magicF2W)", %%mm7 \n\t" |
// "pxor %%mm6, %%mm6 \n\t"
"1: \n\t" |
"movq 1024(%1, %%esi), %%mm0 \n\t" |
"movq 3072(%1, %%esi), %%mm1 \n\t" |
"movq 4096(%1, %%esi), %%mm2 \n\t" |
"movq 5120(%1, %%esi), %%mm3 \n\t" |
"movq 2048(%1, %%esi), %%mm4 \n\t" |
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t" |
"psubd %%mm7, %%mm1 \n\t" |
"psubd %%mm7, %%mm2 \n\t" |
"psubd %%mm7, %%mm3 \n\t" |
"psubd %%mm7, %%mm4 \n\t" |
"psubd %%mm7, %%mm5 \n\t" |
"leal (%%esi, %%esi, 2), %%edi \n\t" |
"packssdw %%mm2, %%mm0 \n\t" // CcAa
"packssdw %%mm3, %%mm1 \n\t" // DdBb
"packssdw %%mm4, %%mm4 \n\t" // EeEe
"packssdw %%mm5, %%mm5 \n\t" // FfFf
"movq %%mm0, %%mm2 \n\t" // CcAa
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"punpckhwd %%mm1, %%mm2 \n\t" // DCdc
"punpcklwd %%mm5, %%mm4 \n\t" // FEfe
"movq %%mm0, %%mm1 \n\t" // BAba
"movq %%mm4, %%mm3 \n\t" // FEfe
"punpckldq %%mm2, %%mm0 \n\t" // dcba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
"punpckldq %%mm1, %%mm4 \n\t" // BAfe
"punpckhdq %%mm3, %%mm2 \n\t" // FEDC
"movq %%mm0, (%0, %%edi) \n\t" |
"movq %%mm4, 8(%0, %%edi) \n\t" |
"movq %%mm2, 16(%0, %%edi) \n\t" |
"addl $8, %%esi \n\t" |
" jnz 1b \n\t" |
"emms \n\t" |
:: "r" (s16+1536), "r" (f+256) |
:"%esi", "%edi", "memory" |
); |
return 6*256; |
} |
static void* a52_resample_MMX(int flags, int ch){ |
switch (flags) { |
case A52_MONO: |
if(ch==5) return a52_resample_MONO_to_5_MMX; |
break; |
case A52_CHANNEL: |
case A52_STEREO: |
case A52_DOLBY: |
if(ch==2) return a52_resample_STEREO_to_2_MMX; |
break; |
case A52_3F: |
if(ch==5) return a52_resample_3F_to_5_MMX; |
break; |
case A52_2F2R: |
if(ch==4) return a52_resample_2F_2R_to_4_MMX; |
break; |
case A52_3F2R: |
if(ch==5) return a52_resample_3F_2R_to_5_MMX; |
break; |
case A52_MONO | A52_LFE: |
if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; |
break; |
case A52_CHANNEL | A52_LFE: |
case A52_STEREO | A52_LFE: |
case A52_DOLBY | A52_LFE: |
if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; |
break; |
case A52_3F | A52_LFE: |
if(ch==6) return a52_resample_3F_LFE_to_6_MMX; |
break; |
case A52_2F2R | A52_LFE: |
if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; |
break; |
case A52_3F2R | A52_LFE: |
if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; |
break; |
} |
return NULL; |
} |
Reference in new issue