Merge remote-tracking branch 'qatar/master'

* qatar/master: (36 commits)
  ARM: allow unaligned buffer in fixed-point NEON FFT4
  fate: test more FFT etc sizes
  dca: set AVCodecContext frame_size for DTS audio
  YASM: Shut up unused variable compiler warning with --disable-yasm.
  x86_32: Fix build on x86_32 with --disable-yasm.
  iirfilter: add fate test
  doxygen: Add qmul docs.
  ogg: propagate return values and return more meaningful error values
  H.264: fix overreads of qscale_table
  Remove unused static tables and static inline functions.
  eval: clear Parser instances before using
  dct-test: remove 'ref' function pointer from tables
  build: Remove deleted 'check' target from .PHONY list.
  oggdec: Abort Ogg header parsing when encountering a data packet.
  Add LGPL license boilerplate to files lacking it.
  mxfenc: small typo fix
  doxygen: Fix documentation for some VP8 functions.
  sha: use AV_RB32() instead of assuming buffer can be cast to uint32_t*
  des: allow unaligned input and output buffers
  aes: allow unaligned input and output buffers
  ...

Conflicts:
	libavcodec/dct-test.c
	libavcodec/libvpxenc.c
	libavcodec/x86/dsputil_mmx.c
	libavcodec/x86/h264_qpel_mmx.c
	libavfilter/x86/gradfun.c
	libavformat/oggdec.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/2/head
Michael Niedermayer 14 years ago
commit 5d4fd1d1ad
  1. 2
      Makefile
  2. 4
      libavcodec/arm/fft_fixed_neon.S
  3. 1
      libavcodec/dca.c
  4. 471
      libavcodec/dct-test.c
  5. 1
      libavcodec/h264idct_template.c
  6. 18
      libavcodec/high_bit_depth.h
  7. 13
      libavcodec/iirfilter.c
  8. 1
      libavcodec/libvorbis.c
  9. 1
      libavcodec/libvpxenc.c
  10. 1
      libavcodec/libxvidff.c
  11. 5
      libavcodec/mpegvideo.c
  12. 1
      libavcodec/mpegvideo.h
  13. 18
      libavcodec/opt.h
  14. 20
      libavcodec/rv34.c
  15. 1
      libavcodec/vc1dec.c
  16. 33
      libavcodec/vp8.c
  17. 4
      libavcodec/x86/dsputil_mmx.c
  18. 2
      libavcodec/x86/h264_qpel_mmx.c
  19. 20
      libavfilter/x86/gradfun.c
  20. 2
      libavformat/mxfenc.c
  21. 19
      libavformat/oggdec.c
  22. 41
      libavutil/aes.c
  23. 8
      libavutil/des.c
  24. 17
      libavutil/eval.c
  25. 4
      libavutil/sha.c
  26. 11
      libswscale/swscale.c
  27. 1
      tests/Makefile
  28. 1
      tests/fate-run.sh
  29. 5
      tests/fate/dct.mak
  30. 46
      tests/fate/fft.mak
  31. 4
      tests/fate/libavutil.mak
  32. 4
      tests/fate2.mak
  33. 161
      tests/ref/fate/eval
  34. 1024
      tests/ref/fate/iirfilter

@ -164,5 +164,5 @@ $(sort $(OBJDIRS)):
# so this saves some time on slow systems.
.SUFFIXES:
.PHONY: all all-yes alltools *clean check config examples install*
.PHONY: all all-yes alltools *clean config examples install*
.PHONY: testprogs uninstall*

@ -75,9 +75,9 @@
.endm
function fft4_neon
vld1.16 {d0-d1}, [r0,:128]
vld1.16 {d0-d1}, [r0]
fft4 d0, d1, d2, d3
vst1.16 {d0-d1}, [r0,:128]
vst1.16 {d0-d1}, [r0]
bx lr
endfunc

@ -1650,6 +1650,7 @@ static int dca_decode_frame(AVCodecContext * avctx,
//set AVCodec values with parsed data
avctx->sample_rate = s->sample_rate;
avctx->bit_rate = s->bit_rate;
avctx->frame_size = s->sample_blocks * 32;
s->profile = FF_PROFILE_DTS;

@ -68,12 +68,12 @@ void ff_simple_idct_neon(DCTELEM *data);
void ff_simple_idct_axp(DCTELEM *data);
struct algo {
const char *name;
enum { FDCT, IDCT } is_idct;
void (* func) (DCTELEM *block);
void (* ref) (DCTELEM *block);
enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
int mm_support;
const char *name;
void (*func)(DCTELEM *block);
enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
SSE2_PERM, PARTTRANS_PERM } format;
int mm_support;
int nonspec;
};
#ifndef FAAN_POSTSCALE
@ -84,61 +84,69 @@ struct algo {
static int cpu_flags;
struct algo algos[] = {
{"REF-DBL", 0, ff_ref_fdct, ff_ref_fdct, NO_PERM},
{"FAAN", 0, ff_faandct, ff_ref_fdct, FAAN_SCALE},
{"FAANI", 1, ff_faanidct, ff_ref_idct, NO_PERM},
{"IJG-AAN-INT", 0, fdct_ifast, ff_ref_fdct, SCALE_PERM},
{"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
{"REF-DBL", 1, ff_ref_idct, ff_ref_idct, NO_PERM},
{"INT", 1, j_rev_dct, ff_ref_idct, MMX_PERM},
{"SIMPLE-C", 1, ff_simple_idct, ff_ref_idct, NO_PERM},
static const struct algo fdct_tab[] = {
{ "REF-DBL", ff_ref_fdct, NO_PERM },
{ "FAAN", ff_faandct, FAAN_SCALE },
{ "IJG-AAN-INT", fdct_ifast, SCALE_PERM },
{ "IJG-LLM-INT", ff_jpeg_fdct_islow, NO_PERM },
#if HAVE_MMX
{"MMX", 0, ff_fdct_mmx, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX},
#if HAVE_MMX2
{"MMX2", 0, ff_fdct_mmx2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_MMX2},
{"SSE2", 0, ff_fdct_sse2, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_SSE2},
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
{ "MMX2", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMX2 },
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 },
#endif
#if CONFIG_GPL
{"LIBMPEG2-MMX", 1, ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX},
{"LIBMPEG2-MMX2", 1, ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2},
#if HAVE_ALTIVEC
{ "altivecfdct", fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
#endif
{"SIMPLE-MMX", 1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX},
{"XVID-MMX", 1, ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX},
{"XVID-MMX2", 1, ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2},
{"XVID-SSE2", 1, ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2},
#if ARCH_BFIN
{ "BFINfdct", ff_bfin_fdct, NO_PERM },
#endif
#if HAVE_ALTIVEC
{"altivecfdct", 0, fdct_altivec, ff_ref_fdct, NO_PERM, AV_CPU_FLAG_ALTIVEC},
{ 0 }
};
static const struct algo idct_tab[] = {
{ "FAANI", ff_faanidct, NO_PERM },
{ "REF-DBL", ff_ref_idct, NO_PERM },
{ "INT", j_rev_dct, MMX_PERM },
{ "SIMPLE-C", ff_simple_idct, NO_PERM },
#if HAVE_MMX
#if CONFIG_GPL
{ "LIBMPEG2-MMX", ff_mmx_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1 },
{ "LIBMPEG2-MMX2", ff_mmxext_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1 },
#endif
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
{ "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
#endif
#if ARCH_BFIN
{"BFINfdct", 0, ff_bfin_fdct, ff_ref_fdct, NO_PERM},
{"BFINidct", 1, ff_bfin_idct, ff_ref_idct, NO_PERM},
{ "BFINidct", ff_bfin_idct, NO_PERM },
#endif
#if ARCH_ARM
{"SIMPLE-ARM", 1, ff_simple_idct_arm, ff_ref_idct, NO_PERM },
{"INT-ARM", 1, ff_j_rev_dct_arm, ff_ref_idct, MMX_PERM },
{ "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
{ "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
#endif
#if HAVE_ARMV5TE
{"SIMPLE-ARMV5TE", 1, ff_simple_idct_armv5te, ff_ref_idct, NO_PERM },
{ "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM },
#endif
#if HAVE_ARMV6
{"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
{ "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM },
#endif
#if HAVE_NEON
{"SIMPLE-NEON", 1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
{ "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM },
#endif
#endif /* ARCH_ARM */
#if ARCH_ALPHA
{"SIMPLE-ALPHA", 1, ff_simple_idct_axp, ff_ref_idct, NO_PERM },
{ "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
#endif
{ 0 }
{ 0 }
};
#define AANSCALE_BITS 12
@ -148,7 +156,7 @@ uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
static int64_t gettime(void)
{
struct timeval tv;
gettimeofday(&tv,NULL);
gettimeofday(&tv, NULL);
return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
}
@ -157,18 +165,18 @@ static int64_t gettime(void)
static short idct_mmx_perm[64];
static short idct_simple_mmx_perm[64]={
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
static short idct_simple_mmx_perm[64] = {
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
static void idct_mmx_init(void)
{
@ -177,13 +185,12 @@ static void idct_mmx_init(void)
/* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
for (i = 0; i < 64; i++) {
idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
// idct_simple_mmx_perm[i] = simple_block_permute_op(i);
}
}
DECLARE_ALIGNED(16, static DCTELEM, block)[64];
DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
static inline void mmx_emms(void)
{
@ -193,188 +200,177 @@ static inline void mmx_emms(void)
#endif
}
static void dct_error(const char *name, int is_idct,
void (*fdct_func)(DCTELEM *block),
void (*fdct_ref)(DCTELEM *block), int form, int test, const int bits)
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
{
void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
int it, i, scale;
int err_inf, v;
int64_t err2, ti, ti1, it1;
int64_t sysErr[64], sysErrMax=0;
int maxout=0;
int blockSumErrMax=0, blockSumErr;
int64_t err2, ti, ti1, it1, err_sum = 0;
int64_t sysErr[64], sysErrMax = 0;
int maxout = 0;
int blockSumErrMax = 0, blockSumErr;
AVLFG prng;
const int vals=1<<bits;
double omse, ome;
int spec_err;
av_lfg_init(&prng, 1);
err_inf = 0;
err2 = 0;
for(i=0; i<64; i++) sysErr[i]=0;
for(it=0;it<NB_ITS;it++) {
for(i=0;i<64;i++)
for (i = 0; i < 64; i++)
sysErr[i] = 0;
for (it = 0; it < NB_ITS; it++) {
for (i = 0; i < 64; i++)
block1[i] = 0;
switch(test){
switch (test) {
case 0:
for(i=0;i<64;i++)
for (i = 0; i < 64; i++)
block1[i] = (av_lfg_get(&prng) % (2*vals)) -vals;
if (is_idct){
if (is_idct) {
ff_ref_fdct(block1);
for(i=0;i<64;i++)
block1[i]>>=3;
for (i = 0; i < 64; i++)
block1[i] >>= 3;
}
break;
case 1:{
int num = av_lfg_get(&prng) % 10 + 1;
for(i=0;i<num;i++)
block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % (2*vals) -vals;
}break;
break;
case 1: {
int num = av_lfg_get(&prng) % 10 + 1;
for (i = 0; i < num; i++)
block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % (2*vals) -vals;
}
break;
case 2:
block1[0] = av_lfg_get(&prng) % (16*vals) - (8*vals);
block1[63]= (block1[0]&1)^1;
break;
block1[63] = (block1[0] & 1) ^ 1;
break;
}
#if 0 // simulate mismatch control
{ int sum=0;
for(i=0;i<64;i++)
sum+=block1[i];
if((sum&1)==0) block1[63]^=1;
}
#endif
for (i = 0; i < 64; i++)
block_org[i] = block1[i];
for(i=0; i<64; i++)
block_org[i]= block1[i];
if (form == MMX_PERM) {
for(i=0;i<64;i++)
if (dct->format == MMX_PERM) {
for (i = 0; i < 64; i++)
block[idct_mmx_perm[i]] = block1[i];
} else if (form == MMX_SIMPLE_PERM) {
for(i=0;i<64;i++)
} else if (dct->format == MMX_SIMPLE_PERM) {
for (i = 0; i < 64; i++)
block[idct_simple_mmx_perm[i]] = block1[i];
} else if (form == SSE2_PERM) {
for(i=0; i<64; i++)
block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
} else if (form == PARTTRANS_PERM) {
for(i=0; i<64; i++)
block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
} else if (dct->format == SSE2_PERM) {
for (i = 0; i < 64; i++)
block[(i & 0x38) | idct_sse2_row_perm[i & 7]] = block1[i];
} else if (dct->format == PARTTRANS_PERM) {
for (i = 0; i < 64; i++)
block[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = block1[i];
} else {
for(i=0; i<64; i++)
block[i]= block1[i];
for (i = 0; i < 64; i++)
block[i] = block1[i];
}
#if 0 // simulate mismatch control for tested IDCT but not the ref
{ int sum=0;
for(i=0;i<64;i++)
sum+=block[i];
if((sum&1)==0) block[63]^=1;
}
#endif
fdct_func(block);
dct->func(block);
mmx_emms();
if (form == SCALE_PERM) {
for(i=0; i<64; i++) {
scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
if (dct->format == SCALE_PERM) {
for (i = 0; i < 64; i++) {
scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
block[i] = (block[i] * scale) >> AANSCALE_BITS;
}
}
fdct_ref(block1);
ref(block1);
blockSumErr=0;
for(i=0;i<64;i++) {
v = abs(block[i] - block1[i]);
blockSumErr = 0;
for (i = 0; i < 64; i++) {
int err = block[i] - block1[i];
err_sum += err;
v = abs(err);
if (v > err_inf)
err_inf = v;
err2 += v * v;
sysErr[i] += block[i] - block1[i];
blockSumErr += v;
if( abs(block[i])>maxout) maxout=abs(block[i]);
if (abs(block[i]) > maxout)
maxout = abs(block[i]);
}
if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
#if 0 // print different matrix pairs
if(blockSumErr){
printf("\n");
for(i=0; i<64; i++){
if((i&7)==0) printf("\n");
printf("%4d ", block_org[i]);
}
for(i=0; i<64; i++){
if((i&7)==0) printf("\n");
printf("%4d ", block[i] - block1[i]);
}
}
#endif
if (blockSumErrMax < blockSumErr)
blockSumErrMax = blockSumErr;
}
for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
for (i = 0; i < 64; i++)
sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
for(i=0; i<64; i++){
if(i%8==0) printf("\n");
printf("%7d ", (int)sysErr[i]);
for (i = 0; i < 64; i++) {
if (i % 8 == 0)
printf("\n");
printf("%7d ", (int) sysErr[i]);
}
printf("\n");
printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
is_idct ? "IDCT" : "DCT",
name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
omse = (double) err2 / NB_ITS / 64;
ome = (double) err_sum / NB_ITS / 64;
spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
is_idct ? "IDCT" : "DCT", dct->name, err_inf,
omse, ome, (double) sysErrMax / NB_ITS,
maxout, blockSumErrMax);
if (spec_err && !dct->nonspec)
return 1;
if (!speed)
return 0;
/* speed test */
for(i=0;i<64;i++)
for (i = 0; i < 64; i++)
block1[i] = 0;
switch(test){
switch (test) {
case 0:
for(i=0;i<64;i++)
for (i = 0; i < 64; i++)
block1[i] = av_lfg_get(&prng) % (2*vals) -vals;
if (is_idct){
if (is_idct) {
ff_ref_fdct(block1);
for(i=0;i<64;i++)
block1[i]>>=3;
for (i = 0; i < 64; i++)
block1[i] >>= 3;
}
break;
case 1:{
break;
case 1:
case 2:
block1[0] = av_lfg_get(&prng) % (2*vals) -vals;
block1[1] = av_lfg_get(&prng) % (2*vals) -vals;
block1[2] = av_lfg_get(&prng) % (2*vals) -vals;
block1[3] = av_lfg_get(&prng) % (2*vals) -vals;
}break;
break;
}
if (form == MMX_PERM) {
for(i=0;i<64;i++)
if (dct->format == MMX_PERM) {
for (i = 0; i < 64; i++)
block[idct_mmx_perm[i]] = block1[i];
} else if(form == MMX_SIMPLE_PERM) {
for(i=0;i<64;i++)
} else if (dct->format == MMX_SIMPLE_PERM) {
for (i = 0; i < 64; i++)
block[idct_simple_mmx_perm[i]] = block1[i];
} else {
for(i=0; i<64; i++)
block[i]= block1[i];
for (i = 0; i < 64; i++)
block[i] = block1[i];
}
ti = gettime();
it1 = 0;
do {
for(it=0;it<NB_ITS_SPEED;it++) {
for(i=0; i<64; i++)
block[i]= block1[i];
// memcpy(block, block1, sizeof(DCTELEM) * 64);
// do not memcpy especially not fastmemcpy because it does movntq !!!
fdct_func(block);
for (it = 0; it < NB_ITS_SPEED; it++) {
for (i = 0; i < 64; i++)
block[i] = block1[i];
dct->func(block);
}
it1 += NB_ITS_SPEED;
ti1 = gettime() - ti;
} while (ti1 < 1000000);
mmx_emms();
printf("%s %s: %0.1f kdct/s\n",
is_idct ? "IDCT" : "DCT",
name, (double)it1 * 1000.0 / (double)ti1);
printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
(double) it1 * 1000.0 / (double) ti1);
return 0;
}
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
@ -392,19 +388,19 @@ static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
if (!init) {
init = 1;
for(i=0;i<8;i++) {
for (i = 0; i < 8; i++) {
sum = 0;
for(j=0;j<8;j++) {
s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
for (j = 0; j < 8; j++) {
s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
sum += c8[i][j] * c8[i][j];
}
}
for(i=0;i<4;i++) {
for (i = 0; i < 4; i++) {
sum = 0;
for(j=0;j<4;j++) {
s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
for (j = 0; j < 4; j++) {
s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
sum += c4[i][j] * c4[i][j];
}
@ -413,58 +409,59 @@ static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
/* butterfly */
s = 0.5 * sqrt(2.0);
for(i=0;i<4;i++) {
for(j=0;j<8;j++) {
block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
for (i = 0; i < 4; i++) {
for (j = 0; j < 8; j++) {
block1[8 * (2 * i) + j] =
(block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
block1[8 * (2 * i + 1) + j] =
(block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
}
}
/* idct8 on lines */
for(i=0;i<8;i++) {
for(j=0;j<8;j++) {
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
sum = 0;
for(k=0;k<8;k++)
sum += c8[k][j] * block1[8*i+k];
block2[8*i+j] = sum;
for (k = 0; k < 8; k++)
sum += c8[k][j] * block1[8 * i + k];
block2[8 * i + j] = sum;
}
}
/* idct4 */
for(i=0;i<8;i++) {
for(j=0;j<4;j++) {
for (i = 0; i < 8; i++) {
for (j = 0; j < 4; j++) {
/* top */
sum = 0;
for(k=0;k<4;k++)
sum += c4[k][j] * block2[8*(2*k)+i];
block3[8*(2*j)+i] = sum;
for (k = 0; k < 4; k++)
sum += c4[k][j] * block2[8 * (2 * k) + i];
block3[8 * (2 * j) + i] = sum;
/* bottom */
sum = 0;
for(k=0;k<4;k++)
sum += c4[k][j] * block2[8*(2*k+1)+i];
block3[8*(2*j+1)+i] = sum;
for (k = 0; k < 4; k++)
sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
block3[8 * (2 * j + 1) + i] = sum;
}
}
/* clamp and store the result */
for(i=0;i<8;i++) {
for(j=0;j<8;j++) {
v = block3[8*i+j];
if (v < 0)
v = 0;
else if (v > 255)
v = 255;
dest[i * linesize + j] = (int)rint(v);
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
v = block3[8 * i + j];
if (v < 0) v = 0;
else if (v > 255) v = 255;
dest[i * linesize + j] = (int) rint(v);
}
}
}
static void idct248_error(const char *name,
void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
void (*idct248_put)(uint8_t *dest, int line_size,
int16_t *block),
int speed)
{
int it, i, it1, ti, ti1, err_max, v;
AVLFG prng;
av_lfg_init(&prng, 1);
@ -472,23 +469,22 @@ static void idct248_error(const char *name,
/* just one test to see if code is correct (precision is less
important here) */
err_max = 0;
for(it=0;it<NB_ITS;it++) {
for (it = 0; it < NB_ITS; it++) {
/* XXX: use forward transform to generate values */
for(i=0;i<64;i++)
for (i = 0; i < 64; i++)
block1[i] = av_lfg_get(&prng) % 256 - 128;
block1[0] += 1024;
for(i=0; i<64; i++)
block[i]= block1[i];
for (i = 0; i < 64; i++)
block[i] = block1[i];
idct248_ref(img_dest1, 8, block);
for(i=0; i<64; i++)
block[i]= block1[i];
for (i = 0; i < 64; i++)
block[i] = block1[i];
idct248_put(img_dest, 8, block);
for(i=0;i<64;i++) {
v = abs((int)img_dest[i] - (int)img_dest1[i]);
for (i = 0; i < 64; i++) {
v = abs((int) img_dest[i] - (int) img_dest1[i]);
if (v == 255)
printf("%d %d\n", img_dest[i], img_dest1[i]);
if (v > err_max)
@ -514,18 +510,17 @@ static void idct248_error(const char *name,
}
#endif
}
printf("%s %s: err_inf=%d\n",
1 ? "IDCT248" : "DCT248",
name, err_max);
printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
if (!speed)
return;
ti = gettime();
it1 = 0;
do {
for(it=0;it<NB_ITS_SPEED;it++) {
for(i=0; i<64; i++)
block[i]= block1[i];
// memcpy(block, block1, sizeof(DCTELEM) * 64);
// do not memcpy especially not fastmemcpy because it does movntq !!!
for (it = 0; it < NB_ITS_SPEED; it++) {
for (i = 0; i < 64; i++)
block[i] = block1[i];
idct248_put(img_dest, 8, block);
}
it1 += NB_ITS_SPEED;
@ -533,9 +528,8 @@ static void idct248_error(const char *name,
} while (ti1 < 1000000);
mmx_emms();
printf("%s %s: %0.1f kdct/s\n",
1 ? "IDCT248" : "DCT248",
name, (double)it1 * 1000.0 / (double)ti1);
printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
(double) it1 * 1000.0 / (double) ti1);
}
static void help(void)
@ -545,56 +539,67 @@ static void help(void)
" 1 -> test with random sparse matrixes\n"
" 2 -> do 3. test from mpeg4 std\n"
"-i test IDCT implementations\n"
"-4 test IDCT248 implementations\n");
"-4 test IDCT248 implementations\n"
"-t speed test\n");
}
int main(int argc, char **argv)
{
int test_idct = 0, test_248_dct = 0;
int c,i;
int test=1;
int c, i;
int test = 1;
int speed = 0;
int err = 0;
int bits=8;
cpu_flags = av_get_cpu_flags();
ff_ref_dct_init();
idct_mmx_init();
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
for(i=0;i<MAX_NEG_CROP;i++) {
for (i = 0; i < 256; i++)
cropTbl[i + MAX_NEG_CROP] = i;
for (i = 0; i < MAX_NEG_CROP; i++) {
cropTbl[i] = 0;
cropTbl[i + MAX_NEG_CROP + 256] = 255;
}
for(;;) {
c = getopt(argc, argv, "ih4");
for (;;) {
c = getopt(argc, argv, "ih4t");
if (c == -1)
break;
switch(c) {
switch (c) {
case 'i':
test_idct = 1;
break;
case '4':
test_248_dct = 1;
break;
default :
case 't':
speed = 1;
break;
default:
case 'h':
help();
return 0;
}
}
if(optind <argc) test= atoi(argv[optind]);
if (optind < argc)
test = atoi(argv[optind]);
if(optind+1 < argc) bits= atoi(argv[optind+1]);
printf("ffmpeg DCT/IDCT test\n");
if (test_248_dct) {
idct248_error("SIMPLE-C", ff_simple_idct248_put);
idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
} else {
for (i=0;algos[i].name;i++)
if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test, bits);
}
const struct algo *algos = test_idct ? idct_tab : fdct_tab;
for (i = 0; algos[i].name; i++)
if (!(~cpu_flags & algos[i].mm_support)) {
err |= dct_error(&algos[i], test, test_idct, speed, bits);
}
}
return 0;
return err;
}

@ -237,6 +237,7 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
}
/**
* IDCT transforms the 16 dc values and dequantizes them.
* @param qmul quantization parameter
*/
void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, int qmul){
#define stride 16

@ -1,3 +1,21 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "dsputil.h"
#ifndef BIT_DEPTH

@ -311,6 +311,9 @@ av_cold void ff_iir_filter_free_coeffs(struct FFIIRFilterCoeffs *coeffs)
}
#ifdef TEST
#undef printf
#include <stdio.h>
#define FILT_ORDER 4
#define SIZE 1024
int main(void)
@ -320,7 +323,6 @@ int main(void)
float cutoff_coeff = 0.4;
int16_t x[SIZE], y[SIZE];
int i;
FILE* fd;
fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH,
FF_FILTER_MODE_LOWPASS, FILT_ORDER,
@ -333,13 +335,8 @@ int main(void)
ff_iir_filter(fcoeffs, fstate, SIZE, x, 1, y, 1);
fd = fopen("in.bin", "w");
fwrite(x, sizeof(x[0]), SIZE, fd);
fclose(fd);
fd = fopen("out.bin", "w");
fwrite(y, sizeof(y[0]), SIZE, fd);
fclose(fd);
for (i = 0; i < SIZE; i++)
printf("%6d %6d\n", x[i], y[i]);
ff_iir_filter_free_coeffs(fcoeffs);
ff_iir_filter_free_state(fstate);

@ -30,6 +30,7 @@
#include "avcodec.h"
#include "bytestream.h"
#include "vorbis.h"
#include "libavutil/mathematics.h"
#undef NDEBUG
#include <assert.h>

@ -31,6 +31,7 @@
#include "avcodec.h"
#include "libavutil/base64.h"
#include "libavutil/opt.h"
#include "libavutil/mathematics.h"
/**
* Portion of struct vpx_codec_cx_pkt from vpx_encoder.h.

@ -30,6 +30,7 @@
#include "avcodec.h"
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libxvid_internal.h"
#if !HAVE_MKSTEMP
#include <fcntl.h>

@ -285,9 +285,10 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){
}
FF_ALLOCZ_OR_GOTO(s->avctx, pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2, fail) //the +2 is for the slice end check
FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table , mb_array_size * sizeof(uint8_t) , fail)
FF_ALLOCZ_OR_GOTO(s->avctx, pic->qscale_table_base , (big_mb_num + s->mb_stride) * sizeof(uint8_t) , fail)
FF_ALLOCZ_OR_GOTO(s->avctx, pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t), fail)
pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
pic->qscale_table = pic->qscale_table_base + 2*s->mb_stride + 1;
if(s->out_format == FMT_H264){
for(i=0; i<2; i++){
FF_ALLOCZ_OR_GOTO(s->avctx, pic->motion_val_base[i], 2 * (b4_array_size+4) * sizeof(int16_t), fail)
@ -339,7 +340,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
av_freep(&pic->mc_mb_var);
av_freep(&pic->mb_mean);
av_freep(&pic->mbskip_table);
av_freep(&pic->qscale_table);
av_freep(&pic->qscale_table_base);
av_freep(&pic->mb_type_base);
av_freep(&pic->dct_coeff);
av_freep(&pic->pan_scan);

@ -88,6 +88,7 @@ typedef struct Picture{
* halfpel luma planes.
*/
uint8_t *interpolated[3];
int8_t *qscale_table_base;
int16_t (*motion_val_base[2])[2];
uint32_t *mb_type_base;
#define MB_TYPE_INTRA MB_TYPE_INTRA4x4 //default mb_type if there is just one type

@ -1,3 +1,21 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* This header is provided for compatibility only and will be removed

@ -423,17 +423,6 @@ static inline RV34VLC* choose_vlc_set(int quant, int mod, int type)
: &intra_vlcs[rv34_quant_to_vlc_set[0][av_clip(quant, 0, 30)]];
}
/**
* Decode quantizer difference and return modified quantizer.
*/
static inline int rv34_decode_dquant(GetBitContext *gb, int quant)
{
if(get_bits1(gb))
return rv34_dquant_tab[get_bits1(gb)][quant];
else
return get_bits(gb, 5);
}
/**
* Decode macroblock header and return CBP in case of success, -1 otherwise.
*/
@ -1255,15 +1244,6 @@ static int check_slice_end(RV34DecContext *r, MpegEncContext *s)
return 0;
}
static inline int slice_compare(SliceInfo *si1, SliceInfo *si2)
{
return si1->type != si2->type ||
si1->start >= si2->start ||
si1->width != si2->width ||
si1->height != si2->height||
si1->pts != si2->pts;
}
static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int buf_size)
{
MpegEncContext *s = &r->s;

@ -45,7 +45,6 @@
#define MB_INTRA_VLC_BITS 9
#define DC_VLC_BITS 9
#define AC_VLC_BITS 9
static const uint16_t table_mb_intra[64][2];
static const uint16_t vlc_offs[] = {

@ -641,8 +641,6 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_
* @param block destination for block coefficients
* @param probs probabilities to use when reading trees from the bitstream
* @param i initial coeff index, 0 unless a separate DC block is coded
* @param zero_nhood the initial prediction context for number of surrounding
* all-zero blocks (only left/top, so 0-2)
* @param qmul array holding the dc/ac dequant factor at position 0/1
* @return 0 if no coeffs were decoded
* otherwise, the index of the last coeff decoded plus one
@ -701,6 +699,17 @@ skip_eob:
}
#endif
/**
* @param c arithmetic bitstream reader context
* @param block destination for block coefficients
* @param probs probabilities to use when reading trees from the bitstream
* @param i initial coeff index, 0 unless a separate DC block is coded
* @param zero_nhood the initial prediction context for number of surrounding
* all-zero blocks (only left/top, so 0-2)
* @param qmul array holding the dc/ac dequant factor at position 0/1
* @return 0 if no coeffs were decoded
* otherwise, the index of the last coeff decoded plus one
*/
static av_always_inline
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
uint8_t probs[16][3][NUM_DCT_TOKENS-1],
@ -1034,10 +1043,9 @@ static const uint8_t subpel_idx[3][8] = {
};
/**
* Generic MC function.
* luma MC function
*
* @param s VP8 decoding context
* @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
* @param dst target buffer for block data at block position
* @param ref reference picture buffer at origin (0, 0)
* @param mv motion vector (relative to block position) to get pixel data from
@ -1083,6 +1091,23 @@ void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
}
}
/**
* chroma MC function
*
* @param s VP8 decoding context
* @param dst1 target buffer for block data at block position (U plane)
* @param dst2 target buffer for block data at block position (V plane)
* @param ref reference picture buffer at origin (0, 0)
* @param mv motion vector (relative to block position) to get pixel data from
* @param x_off horizontal position of block from origin (0, 0)
* @param y_off vertical position of block from origin (0, 0)
* @param block_w width of block (16, 8 or 4)
* @param block_h height of block (always same as block_w)
* @param width width of src/dst plane data
* @param height height of src/dst plane data
* @param linesize size of a single line of plane data, including padding
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)
*/
static av_always_inline
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
const VP56mv *mv, int x_off, int y_off,

@ -2563,8 +2563,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, );
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, );
}
#if HAVE_YASM
else if (bit_depth == 10) {
#if HAVE_YASM
#if !ARCH_X86_64
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
@ -2573,8 +2573,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
}
#endif
}
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );

@ -1294,6 +1294,6 @@ QPEL16_OP(mc31, MMX)\
QPEL16_OP(mc32, MMX)\
QPEL16_OP(mc33, MMX)
#if HAVE_YASM && ARCH_X86_32 // ARCH_X86_64 implies sse2+
#if ARCH_X86_32 && HAVE_YASM // ARCH_X86_64 implies sse2+
QPEL16(mmxext)
#endif

@ -1,19 +1,21 @@
/*
* Copyright (C) 2009 Loren Merritt <lorenm@u.washignton.edu>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"

@ -1549,7 +1549,7 @@ static uint32_t ff_framenum_to_12m_time_code(unsigned frame, int drop, int fps)
((((frame / (fps * 60)) % 60) / 10) << 12) | // tens of minutes
((((frame / (fps * 60)) % 60) % 10) << 8) | // units of minutes
(0 << 7) | // b1
(0 << 6) | // b2 (NSC), field phase (PAL)
(0 << 6) | // b2 (NTSC), field phase (PAL)
((((frame / (fps * 3600) % 24)) / 10) << 4) | // tens of hours
( (frame / (fps * 3600) % 24)) % 10; // units of hours
}

@ -201,7 +201,7 @@ static int ogg_read_page(AVFormatContext *s, int *str)
uint8_t sync[4];
int sp = 0;
ret = avio_read (bc, sync, 4);
ret = avio_read(bc, sync, 4);
if (ret < 4)
return ret < 0 ? ret : AVERROR_EOF;
@ -259,7 +259,7 @@ static int ogg_read_page(AVFormatContext *s, int *str)
if(os->psize > 0)
ogg_new_buf(ogg, idx);
ret = avio_read (bc, os->segments, nsegs);
ret = avio_read(bc, os->segments, nsegs);
if (ret < nsegs)
return ret < 0 ? ret : AVERROR_EOF;
@ -292,7 +292,7 @@ static int ogg_read_page(AVFormatContext *s, int *str)
os->buf = nb;
}
ret = avio_read (bc, os->buf + os->bufpos, size);
ret = avio_read(bc, os->buf + os->bufpos, size);
if (ret < size)
return ret < 0 ? ret : AVERROR_EOF;
@ -321,7 +321,7 @@ static int ogg_packet(AVFormatContext *s, int *str, int *dstart, int *dsize,
idx = ogg->curidx;
while (idx < 0){
ret = ogg_read_page (s, &idx);
ret = ogg_read_page(s, &idx);
if (ret < 0)
return ret;
}
@ -437,7 +437,7 @@ static int ogg_get_headers(AVFormatContext *s)
int ret;
do{
ret = ogg_packet (s, NULL, NULL, NULL, NULL);
ret = ogg_packet(s, NULL, NULL, NULL, NULL);
if (ret < 0)
return ret;
}while (!ogg->headers);
@ -501,10 +501,9 @@ static int ogg_read_header(AVFormatContext *s, AVFormatParameters *ap)
int ret, i;
ogg->curidx = -1;
//linear headers seek from start
ret = ogg_get_headers (s);
if (ret < 0){
ret = ogg_get_headers(s);
if (ret < 0)
return ret;
}
for (i = 0; i < ogg->nstreams; i++)
if (ogg->streams[i].header < 0)
@ -558,7 +557,7 @@ static int ogg_read_packet(AVFormatContext *s, AVPacket *pkt)
//Get an ogg packet
retry:
do{
ret = ogg_packet (s, &idx, &pstart, &psize, &fpos);
ret = ogg_packet(s, &idx, &pstart, &psize, &fpos);
if (ret < 0)
return ret;
}while (idx < 0 || !s->streams[idx]);
@ -574,7 +573,7 @@ retry:
os->keyframe_seek = 0;
//Alloc a pkt
ret = av_new_packet (pkt, psize);
ret = av_new_packet(pkt, psize);
if (ret < 0)
return ret;
pkt->stream_index = idx;

@ -22,6 +22,7 @@
#include "common.h"
#include "aes.h"
#include "intreadwrite.h"
typedef union {
uint64_t u64[2];
@ -67,6 +68,20 @@ static inline void addkey(av_aes_block *dst, const av_aes_block *src,
dst->u64[1] = src->u64[1] ^ round_key->u64[1];
}
static inline void addkey_s(av_aes_block *dst, const uint8_t *src,
const av_aes_block *round_key)
{
dst->u64[0] = AV_RN64(src) ^ round_key->u64[0];
dst->u64[1] = AV_RN64(src + 8) ^ round_key->u64[1];
}
static inline void addkey_d(uint8_t *dst, const av_aes_block *src,
const av_aes_block *round_key)
{
AV_WN64(dst, src->u64[0] ^ round_key->u64[0]);
AV_WN64(dst + 8, src->u64[1] ^ round_key->u64[1]);
}
static void subshift(av_aes_block s0[2], int s, const uint8_t *box)
{
av_aes_block *s1 = (av_aes_block *) (s0[0].u8 - s);
@ -119,32 +134,28 @@ static inline void crypt(AVAES *a, int s, const uint8_t *sbox,
subshift(&a->state[0], s, sbox);
}
void av_aes_crypt(AVAES *a, uint8_t *dst_, const uint8_t *src_,
int count, uint8_t *iv_, int decrypt)
void av_aes_crypt(AVAES *a, uint8_t *dst, const uint8_t *src,
int count, uint8_t *iv, int decrypt)
{
av_aes_block *dst = (av_aes_block *) dst_;
const av_aes_block *src = (const av_aes_block *) src_;
av_aes_block *iv = (av_aes_block *) iv_;
while (count--) {
addkey(&a->state[1], src, &a->round_key[a->rounds]);
addkey_s(&a->state[1], src, &a->round_key[a->rounds]);
if (decrypt) {
crypt(a, 0, inv_sbox, dec_multbl);
if (iv) {
addkey(&a->state[0], &a->state[0], iv);
*iv = *src;
addkey_s(&a->state[0], iv, &a->state[0]);
memcpy(iv, src, 16);
}
addkey(dst, &a->state[0], &a->round_key[0]);
addkey_d(dst, &a->state[0], &a->round_key[0]);
} else {
if (iv)
addkey(&a->state[1], &a->state[1], iv);
addkey_s(&a->state[1], iv, &a->state[1]);
crypt(a, 2, sbox, enc_multbl);
addkey(dst, &a->state[0], &a->round_key[0]);
addkey_d(dst, &a->state[0], &a->round_key[0]);
if (iv)
*iv = *dst;
memcpy(iv, dst, 16);
}
src++;
dst++;
src += 16;
dst += 16;
}
}

@ -299,10 +299,10 @@ int av_des_init(AVDES *d, const uint8_t *key, int key_bits, int decrypt) {
}
void av_des_crypt(AVDES *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt) {
uint64_t iv_val = iv ? av_be2ne64(*(uint64_t *)iv) : 0;
uint64_t iv_val = iv ? AV_RB64(iv) : 0;
while (count-- > 0) {
uint64_t dst_val;
uint64_t src_val = src ? av_be2ne64(*(const uint64_t *)src) : 0;
uint64_t src_val = src ? AV_RB64(src) : 0;
if (decrypt) {
uint64_t tmp = src_val;
if (d->triple_des) {
@ -319,12 +319,12 @@ void av_des_crypt(AVDES *d, uint8_t *dst, const uint8_t *src, int count, uint8_t
}
iv_val = iv ? dst_val : 0;
}
*(uint64_t *)dst = av_be2ne64(dst_val);
AV_WB64(dst, dst_val);
src += 8;
dst += 8;
}
if (iv)
*(uint64_t *)iv = av_be2ne64(iv_val);
AV_WB64(iv, iv_val);
}
#ifdef TEST

@ -472,7 +472,7 @@ int av_expr_parse(AVExpr **expr, const char *s,
const char * const *func2_names, double (* const *funcs2)(void *, double, double),
int log_offset, void *log_ctx)
{
Parser p;
Parser p = { 0 };
AVExpr *e = NULL;
char *w = av_malloc(strlen(s) + 1);
char *wp = w;
@ -517,7 +517,7 @@ end:
double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
{
Parser p;
Parser p = { 0 };
p.const_values = const_values;
p.opaque = opaque;
@ -576,6 +576,8 @@ void av_free_expr(AVExpr *e)
#ifdef TEST
#undef printf
#include <string.h>
static double const_values[] = {
M_PI,
M_E,
@ -588,7 +590,7 @@ static const char *const_names[] = {
0
};
int main(void)
int main(int argc, char **argv)
{
int i;
double d;
@ -669,13 +671,16 @@ int main(void)
NULL, NULL, NULL, NULL, NULL, 0, NULL);
printf("%f == 0.931322575\n", d);
for (i=0; i<1050; i++) {
START_TIMER
if (argc > 1 && !strcmp(argv[1], "-t")) {
for (i = 0; i < 1050; i++) {
START_TIMER;
av_expr_parse_and_eval(&d, "1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)",
const_names, const_values,
NULL, NULL, NULL, NULL, NULL, 0, NULL);
STOP_TIMER("av_expr_parse_and_eval")
STOP_TIMER("av_expr_parse_and_eval");
}
}
return 0;
}
#endif

@ -42,7 +42,7 @@ const int av_sha_size = sizeof(AVSHA);
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
#define blk0(i) (block[i] = av_be2ne32(((const uint32_t*)buffer)[i]))
#define blk0(i) (block[i] = AV_RB32(buffer + 4 * (i)))
#define blk(i) (block[i] = rol(block[i-3] ^ block[i-8] ^ block[i-14] ^ block[i-16], 1))
#define R0(v,w,x,y,z,i) z += ((w&(x^y))^y) + blk0(i) + 0x5A827999 + rol(v, 5); w = rol(w, 30);
@ -67,7 +67,7 @@ static void sha1_transform(uint32_t state[5], const uint8_t buffer[64])
for (i = 0; i < 80; i++) {
int t;
if (i < 16)
t = av_be2ne32(((uint32_t*)buffer)[i]);
t = AV_RB32(buffer + 4 * i);
else
t = rol(block[i-3] ^ block[i-8] ^ block[i-14] ^ block[i-16], 1);
block[i] = t;

@ -80,17 +80,6 @@ untested special converters
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
static const double rgb2yuv_table[8][9]={
{0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
{0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
{0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
{0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
};
/*
NOTES
Special versions: fast Y 1:1 scaling (no interpolation in y direction)

@ -41,6 +41,7 @@ include $(SRC_PATH)/tests/fate/aac.mak
include $(SRC_PATH)/tests/fate/als.mak
include $(SRC_PATH)/tests/fate/amrnb.mak
include $(SRC_PATH)/tests/fate/amrwb.mak
include $(SRC_PATH)/tests/fate/dct.mak
include $(SRC_PATH)/tests/fate/fft.mak
include $(SRC_PATH)/tests/fate/h264.mak
include $(SRC_PATH)/tests/fate/libavutil.mak

@ -123,6 +123,7 @@ if test -e "$ref"; then
diff) diff -u -w "$ref" "$outfile" >$cmpfile ;;
oneoff) oneoff "$ref" "$outfile" "$fuzz" >$cmpfile ;;
stddev) stddev "$ref" "$outfile" "$fuzz" >$cmpfile ;;
null) cat "$outfile" >$cmpfile ;;
esac
cmperr=$?
test $err = 0 && err=$cmperr

@ -0,0 +1,5 @@
FATE_TESTS += fate-idct8x8
fate-idct8x8: libavcodec/dct-test$(EXESUF)
fate-idct8x8: CMD = run libavcodec/dct-test -i
fate-idct8x8: REF = /dev/null
fate-idct8x8: CMP = null

@ -1,28 +1,36 @@
FATE_FFT = fate-fft fate-ifft \
fate-mdct fate-imdct \
fate-rdft fate-irdft \
fate-dct1d fate-idct1d
fate-fft: CMD = run libavcodec/fft-test
fate-ifft: CMD = run libavcodec/fft-test -i
fate-mdct: CMD = run libavcodec/fft-test -m
fate-imdct: CMD = run libavcodec/fft-test -m -i
fate-rdft: CMD = run libavcodec/fft-test -r
fate-irdft: CMD = run libavcodec/fft-test -r -i
fate-dct1d: CMD = run libavcodec/fft-test -d
fate-idct1d: CMD = run libavcodec/fft-test -d -i
define DEF_FFT
FATE_FFT += fate-fft-$(1) fate-ifft-$(1) \
fate-mdct-$(1) fate-imdct-$(1) \
fate-rdft-$(1) fate-irdft-$(1) \
fate-dct1d-$(1) fate-idct1d-$(1)
fate-fft-$(N): CMD = run libavcodec/fft-test -n$(1)
fate-ifft-$(N): CMD = run libavcodec/fft-test -n$(1) -i
fate-mdct-$(N): CMD = run libavcodec/fft-test -n$(1) -m
fate-imdct-$(N): CMD = run libavcodec/fft-test -n$(1) -m -i
fate-rdft-$(N): CMD = run libavcodec/fft-test -n$(1) -r
fate-irdft-$(N): CMD = run libavcodec/fft-test -n$(1) -r -i
fate-dct1d-$(N): CMD = run libavcodec/fft-test -n$(1) -d
fate-idct1d-$(N): CMD = run libavcodec/fft-test -n$(1) -d -i
endef
$(foreach N, 4 5 6 7 8 9 10 11 12, $(eval $(call DEF_FFT,$(N))))
fate-fft-test: $(FATE_FFT)
$(FATE_FFT): libavcodec/fft-test$(EXESUF)
$(FATE_FFT): REF = /dev/null
FATE_FFT_FIXED = fate-fft-fixed fate-ifft-fixed \
fate-mdct-fixed fate-imdct-fixed
define DEF_FFT_FIXED
FATE_FFT_FIXED += fate-fft-fixed-$(1) fate-ifft-fixed-$(1) \
fate-mdct-fixed-$(1) fate-imdct-fixed-$(1)
fate-fft-fixed-$(1): CMD = run libavcodec/fft-fixed-test -n$(1)
fate-ifft-fixed-$(1): CMD = run libavcodec/fft-fixed-test -n$(1) -i
fate-mdct-fixed-$(1): CMD = run libavcodec/fft-fixed-test -n$(1) -m
fate-imdct-fixed-$(1): CMD = run libavcodec/fft-fixed-test -n$(1) -m -i
endef
fate-fft-fixed: CMD = run libavcodec/fft-fixed-test
fate-ifft-fixed: CMD = run libavcodec/fft-fixed-test -i
fate-mdct-fixed: CMD = run libavcodec/fft-fixed-test -m
fate-imdct-fixed: CMD = run libavcodec/fft-fixed-test -m -i
$(foreach N, 4 5 6 7 8 9 10 11 12, $(eval $(call DEF_FFT_FIXED,$(N))))
fate-fft-fixed-test: $(FATE_FFT_FIXED)
$(FATE_FFT_FIXED): libavcodec/fft-fixed-test$(EXESUF)

@ -21,6 +21,10 @@ fate-des: libavutil/des-test$(EXESUF)
fate-des: CMD = run libavutil/des-test
fate-des: REF = /dev/null
FATE_TESTS += fate-eval
fate-eval: libavutil/eval-test$(EXESUF)
fate-eval: CMD = run libavutil/eval-test
FATE_TESTS += fate-md5
fate-md5: libavutil/md5-test$(EXESUF)
fate-md5: CMD = run libavutil/md5-test

@ -218,3 +218,7 @@ fate-musepack7: CMD = pcm -i $(SAMPLES)/musepack/inside-mp7.mpc
fate-musepack7: CMP = oneoff
fate-musepack7: REF = $(SAMPLES)/musepack/inside-mp7.pcm
fate-musepack7: FUZZ = 1
FATE_TESTS += fate-iirfilter
fate-iirfilter: libavcodec/iirfilter-test$(EXESUF)
fate-iirfilter: CMD = run libavcodec/iirfilter-test

@ -0,0 +1,161 @@
Evaluating ''
'' -> nan
Evaluating '1;2'
'1;2' -> 2.000000
Evaluating '-20'
'-20' -> -20.000000
Evaluating '-PI'
'-PI' -> -3.141593
Evaluating '+PI'
'+PI' -> 3.141593
Evaluating '1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)'
'1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)' -> 12.700000
Evaluating '80G/80Gi1k'
'80G/80Gi1k' -> nan
Evaluating '1Gi'
'1Gi' -> 1073741824.000000
Evaluating '1gi'
'1gi' -> nan
Evaluating '1GiFoo'
'1GiFoo' -> nan
Evaluating '1k+1k'
'1k+1k' -> 2000.000000
Evaluating '1Gi*3foo'
'1Gi*3foo' -> nan
Evaluating 'foo'
'foo' -> nan
Evaluating 'foo('
'foo(' -> nan
Evaluating 'foo()'
'foo()' -> nan
Evaluating 'foo)'
'foo)' -> nan
Evaluating 'sin'
'sin' -> nan
Evaluating 'sin('
'sin(' -> nan
Evaluating 'sin()'
'sin()' -> nan
Evaluating 'sin)'
'sin)' -> nan
Evaluating 'sin 10'
'sin 10' -> nan
Evaluating 'sin(1,2,3)'
'sin(1,2,3)' -> nan
Evaluating 'sin(1 )'
'sin(1 )' -> 0.841471
Evaluating '1'
'1' -> 1.000000
Evaluating '1foo'
'1foo' -> nan
Evaluating 'bar + PI + E + 100f*2 + foo'
'bar + PI + E + 100f*2 + foo' -> nan
Evaluating '13k + 12f - foo(1, 2)'
'13k + 12f - foo(1, 2)' -> nan
Evaluating '1gi'
'1gi' -> nan
Evaluating '1Gi'
'1Gi' -> 1073741824.000000
Evaluating 'st(0, 123)'
'st(0, 123)' -> 123.000000
Evaluating 'st(1, 123); ld(1)'
'st(1, 123); ld(1)' -> 123.000000
Evaluating 'st(0, 1); while(lte(ld(0), 100), st(1, ld(1)+ld(0));st(0, ld(0)+1)); ld(1)'
'st(0, 1); while(lte(ld(0), 100), st(1, ld(1)+ld(0));st(0, ld(0)+1)); ld(1)' -> 4950.000000
Evaluating 'st(1, 1); st(2, 2); st(0, 1); while(lte(ld(0),10), st(3, ld(1)+ld(2)); st(1, ld(2)); st(2, ld(3)); st(0, ld(0)+1)); ld(3)'
'st(1, 1); st(2, 2); st(0, 1); while(lte(ld(0),10), st(3, ld(1)+ld(2)); st(1, ld(2)); st(2, ld(3)); st(0, ld(0)+1)); ld(3)' -> 144.000000
Evaluating 'while(0, 10)'
'while(0, 10)' -> nan
Evaluating 'st(0, 1); while(lte(ld(0),100), st(1, ld(1)+ld(0)); st(0, ld(0)+1))'
'st(0, 1); while(lte(ld(0),100), st(1, ld(1)+ld(0)); st(0, ld(0)+1))' -> 100.000000
Evaluating 'isnan(1)'
'isnan(1)' -> 0.000000
Evaluating 'isnan(NAN)'
'isnan(NAN)' -> 1.000000
Evaluating 'floor(NAN)'
'floor(NAN)' -> nan
Evaluating 'floor(123.123)'
'floor(123.123)' -> 123.000000
Evaluating 'floor(-123.123)'
'floor(-123.123)' -> -124.000000
Evaluating 'trunc(123.123)'
'trunc(123.123)' -> 123.000000
Evaluating 'trunc(-123.123)'
'trunc(-123.123)' -> -123.000000
Evaluating 'ceil(123.123)'
'ceil(123.123)' -> 124.000000
Evaluating 'ceil(-123.123)'
'ceil(-123.123)' -> -123.000000
Evaluating 'sqrt(1764)'
'sqrt(1764)' -> 42.000000
Evaluating 'sqrt(-1)'
'sqrt(-1)' -> -nan
Evaluating 'not(1)'
'not(1)' -> 0.000000
Evaluating 'not(NAN)'
'not(NAN)' -> 0.000000
Evaluating 'not(0)'
'not(0)' -> 1.000000
Evaluating 'pow(0,1.23)'
'pow(0,1.23)' -> 0.000000
Evaluating 'pow(PI,1.23)'
'pow(PI,1.23)' -> 4.087844
Evaluating 'PI^1.23'
'PI^1.23' -> 4.087844
Evaluating 'pow(-1,1.23)'
'pow(-1,1.23)' -> -nan
12.700000 == 12.7
0.931323 == 0.931322575

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save