|
|
|
@ -40,6 +40,7 @@ |
|
|
|
|
#include "libavutil/time.h" |
|
|
|
|
|
|
|
|
|
#include "dct.h" |
|
|
|
|
#include "idctdsp.h" |
|
|
|
|
#include "simple_idct.h" |
|
|
|
|
#include "aandcttab.h" |
|
|
|
|
#include "faandct.h" |
|
|
|
@ -60,64 +61,63 @@ void ff_simple_idct_neon(int16_t *data); |
|
|
|
|
struct algo { |
|
|
|
|
const char *name; |
|
|
|
|
void (*func)(int16_t *block); |
|
|
|
|
enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, |
|
|
|
|
SSE2_PERM, PARTTRANS_PERM } format; |
|
|
|
|
enum idct_permutation_type perm_type; |
|
|
|
|
int cpu_flag; |
|
|
|
|
int nonspec; |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static const struct algo fdct_tab[] = { |
|
|
|
|
{ "REF-DBL", ff_ref_fdct, NO_PERM }, |
|
|
|
|
{ "FAAN", ff_faandct, NO_PERM }, |
|
|
|
|
{ "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM }, |
|
|
|
|
{ "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM }, |
|
|
|
|
{ "REF-DBL", ff_ref_fdct, FF_IDCT_PERM_NONE }, |
|
|
|
|
{ "FAAN", ff_faandct, FF_IDCT_PERM_NONE }, |
|
|
|
|
{ "IJG-AAN-INT", ff_fdct_ifast, FF_IDCT_PERM_NONE }, |
|
|
|
|
{ "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE }, |
|
|
|
|
|
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, |
|
|
|
|
{ "MMX", ff_fdct_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
{ "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT }, |
|
|
|
|
{ "MMXEXT", ff_fdct_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_SSE2_INLINE |
|
|
|
|
{ "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, |
|
|
|
|
{ "SSE2", ff_fdct_sse2, FF_IDCT_PERM_NONE, AV_CPU_FLAG_SSE2 }, |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if HAVE_ALTIVEC |
|
|
|
|
{ "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC }, |
|
|
|
|
{ "altivecfdct", ff_fdct_altivec, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ALTIVEC }, |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
{ 0 } |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
static const struct algo idct_tab[] = { |
|
|
|
|
{ "FAANI", ff_faanidct, NO_PERM }, |
|
|
|
|
{ "REF-DBL", ff_ref_idct, NO_PERM }, |
|
|
|
|
{ "INT", ff_j_rev_dct, MMX_PERM }, |
|
|
|
|
{ "SIMPLE-C", ff_simple_idct_8, NO_PERM }, |
|
|
|
|
{ "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, |
|
|
|
|
{ "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, |
|
|
|
|
{ "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, |
|
|
|
|
{ "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE }, |
|
|
|
|
|
|
|
|
|
#if HAVE_MMX_INLINE |
|
|
|
|
{ "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, |
|
|
|
|
{ "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, |
|
|
|
|
{ "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX }, |
|
|
|
|
{ "XVID-MMX", ff_idct_xvid_mmx, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX, 1 }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_MMXEXT_INLINE |
|
|
|
|
{ "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, |
|
|
|
|
{ "XVID-MMXEXT", ff_idct_xvid_mmxext, FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMXEXT, 1 }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_SSE2_INLINE |
|
|
|
|
{ "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, |
|
|
|
|
{ "XVID-SSE2", ff_idct_xvid_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if ARCH_ARM |
|
|
|
|
{ "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM }, |
|
|
|
|
{ "INT-ARM", ff_j_rev_dct_arm, MMX_PERM }, |
|
|
|
|
{ "SIMPLE-ARM", ff_simple_idct_arm, FF_IDCT_PERM_NONE }, |
|
|
|
|
{ "INT-ARM", ff_j_rev_dct_arm, FF_IDCT_PERM_LIBMPEG2 }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_ARMV5TE |
|
|
|
|
{ "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE }, |
|
|
|
|
{ "SIMPLE-ARMV5TE", ff_simple_idct_armv5te, FF_IDCT_PERM_NONE, AV_CPU_FLAG_ARMV5TE }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_ARMV6 |
|
|
|
|
{ "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 }, |
|
|
|
|
{ "SIMPLE-ARMV6", ff_simple_idct_armv6, FF_IDCT_PERM_LIBMPEG2, AV_CPU_FLAG_ARMV6 }, |
|
|
|
|
#endif |
|
|
|
|
#if HAVE_NEON && ARCH_ARM |
|
|
|
|
{ "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON }, |
|
|
|
|
{ "SIMPLE-NEON", ff_simple_idct_neon, FF_IDCT_PERM_PARTTRANS, AV_CPU_FLAG_NEON }, |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
{ 0 } |
|
|
|
@ -172,25 +172,32 @@ static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void permute(int16_t dst[64], const int16_t src[64], int perm) |
|
|
|
|
static void permute(int16_t dst[64], const int16_t src[64], |
|
|
|
|
enum idct_permutation_type perm_type) |
|
|
|
|
{ |
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
if (perm == MMX_PERM) { |
|
|
|
|
switch (perm_type) { |
|
|
|
|
case FF_IDCT_PERM_LIBMPEG2: |
|
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
|
dst[(i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2)] = src[i]; |
|
|
|
|
} else if (perm == MMX_SIMPLE_PERM) { |
|
|
|
|
break; |
|
|
|
|
case FF_IDCT_PERM_SIMPLE: |
|
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
|
dst[idct_simple_mmx_perm[i]] = src[i]; |
|
|
|
|
} else if (perm == SSE2_PERM) { |
|
|
|
|
break; |
|
|
|
|
case FF_IDCT_PERM_SSE2: |
|
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
|
dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i]; |
|
|
|
|
} else if (perm == PARTTRANS_PERM) { |
|
|
|
|
break; |
|
|
|
|
case FF_IDCT_PERM_PARTTRANS: |
|
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
|
dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; |
|
|
|
|
} else { |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
for (i = 0; i < 64; i++) |
|
|
|
|
dst[i] = src[i]; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -215,12 +222,12 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
|
|
|
sysErr[i] = 0; |
|
|
|
|
for (it = 0; it < NB_ITS; it++) { |
|
|
|
|
init_block(block1, test, is_idct, &prng); |
|
|
|
|
permute(block, block1, dct->format); |
|
|
|
|
permute(block, block1, dct->perm_type); |
|
|
|
|
|
|
|
|
|
dct->func(block); |
|
|
|
|
emms_c(); |
|
|
|
|
|
|
|
|
|
if (dct->format == SCALE_PERM) { |
|
|
|
|
if (!strcmp(dct->name, "IJG-AAN-INT")) { |
|
|
|
|
for (i = 0; i < 64; i++) { |
|
|
|
|
scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; |
|
|
|
|
block[i] = (block[i] * scale) >> AANSCALE_BITS; |
|
|
|
@ -273,7 +280,7 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
|
|
|
|
|
|
|
|
/* speed test */ |
|
|
|
|
init_block(block, test, is_idct, &prng); |
|
|
|
|
permute(block1, block, dct->format); |
|
|
|
|
permute(block1, block, dct->perm_type); |
|
|
|
|
|
|
|
|
|
ti = av_gettime(); |
|
|
|
|
it1 = 0; |
|
|
|
|