Merge remote branch 'qatar/master'

* qatar/master: (23 commits)
  ac3enc: correct the flipped sign in the ac3_fixed encoder
  Eliminate pointless '#if 1' statements without matching '#else'.
  Add AVX FFT implementation.
  Increase alignment of av_malloc() as needed by AVX ASM.
  Update x86inc.asm from x264 to allow AVX emulation using SSE and MMX.
  mjpeg: Detect overreads in mjpeg_decode_scan() and error out.
  documentation: extend documentation for ffmpeg -aspect option
  APIChanges: update commit hashes for recent additions.
  lavc: deprecate FF_*_TYPE macros in favor of AV_PICTURE_TYPE_* enums
  aac: add headers needed for log2f()
  lavc: remove FF_API_MB_Q cruft
  lavc: remove FF_API_RATE_EMU cruft
  lavc: remove FF_API_HURRY_UP cruft
  pad: make the filter parametric
  vsrc_movie: add key_frame and pict_type.
  vsrc_movie: fix leak in request_frame()
  lavfi: add key_frame and pict_type to AVFilterBufferRefVideo.
  vsrc_buffer: add sample_aspect_ratio fields to arguments.
  lavfi: add fieldorder filter
  scale: make the filter parametric
  ...

Conflicts:
	Changelog
	doc/filters.texi
	ffmpeg.c
	libavcodec/ac3dec.h
	libavcodec/dsputil.c
	libavfilter/avfilter.h
	libavfilter/vf_scale.c
	libavfilter/vf_yadif.c
	libavfilter/vsrc_buffer.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
oldabi
Michael Niedermayer 14 years ago
commit d7e5aebae7
  1. 2
      Changelog
  2. 11
      doc/APIchanges
  3. 4
      ffmpeg.c
  4. 2
      ffserver.c
  5. 10
      libavcodec/aac.h
  6. 1
      libavcodec/aaccoder.c
  7. 2
      libavcodec/aacenc.h
  8. 1
      libavcodec/aacsbr.c
  9. 14
      libavcodec/ac3dec.h
  10. 2
      libavcodec/ac3enc.c
  11. 2
      libavcodec/ac3enc_fixed.c
  12. 20
      libavcodec/atrac1.c
  13. 6
      libavcodec/atrac3.c
  14. 60
      libavcodec/avcodec.h
  15. 2
      libavcodec/binkaudio.c
  16. 2
      libavcodec/cook.c
  17. 10
      libavcodec/dca.c
  18. 5
      libavcodec/dct-test.c
  19. 17
      libavcodec/error_resilience.c
  20. 53
      libavcodec/fft.c
  21. 3
      libavcodec/fft.h
  22. 4
      libavcodec/h261dec.c
  23. 8
      libavcodec/h263dec.c
  24. 18
      libavcodec/h264.c
  25. 2
      libavcodec/h264.h
  26. 2
      libavcodec/imc.c
  27. 2
      libavcodec/motion_est_template.c
  28. 8
      libavcodec/mpeg12.c
  29. 6
      libavcodec/mpegvideo.c
  30. 5
      libavcodec/mpegvideo.h
  31. 7
      libavcodec/msmpeg4.c
  32. 4
      libavcodec/nellymoserdec.c
  33. 6
      libavcodec/nellymoserenc.c
  34. 10
      libavcodec/options.c
  35. 3
      libavcodec/pthread.c
  36. 2
      libavcodec/qdm2.c
  37. 9
      libavcodec/rv34.c
  38. 2
      libavcodec/sh4/qpel.c
  39. 2
      libavcodec/snow.c
  40. 3
      libavcodec/svq1dec.c
  41. 8
      libavcodec/svq3.c
  42. 13
      libavcodec/utils.c
  43. 10
      libavcodec/vc1dec.c
  44. 14
      libavcodec/version.h
  45. 8
      libavcodec/wma.h
  46. 4
      libavcodec/wmaprodec.c
  47. 6
      libavcodec/wmavoice.c
  48. 9
      libavcodec/x86/fft.c
  49. 2
      libavcodec/x86/fft.h
  50. 484
      libavcodec/x86/fft_mmx.asm
  51. 8
      libavcodec/x86/fft_sse.c
  52. 249
      libavcodec/x86/x86inc.asm
  53. 6
      libavfilter/avfilter.h
  54. 4
      libavfilter/vf_scale.c
  55. 3
      libavfilter/vf_yadif.c
  56. 2
      libavfilter/vsrc_buffer.c
  57. 2
      libavformat/nutenc.c
  58. 21
      libavutil/avutil.h
  59. 16
      libavutil/mem.c
  60. 3
      libavutil/pca.c
  61. 14
      libavutil/utils.c
  62. 2
      tests/ref/acodec/ac3_fixed
  63. 2
      tests/ref/lavf/rm
  64. 3
      tests/ref/seek/ac3_rm

@ -10,7 +10,7 @@ version <next>:
- libxvid aspect pickiness fixed
- Frame multithreaded decoding
- Lots of deprecated API cruft removed
- fft and imdct optimizations for AVX (Sandy Bridge) processors
version 0.7_beta1:

@ -13,6 +13,17 @@ libavutil: 2011-04-18
API changes, most recent first:
2011-04-XX - bebe72f - lavu 51.1.0 - avutil.h
Add AVPictureType enum and av_get_picture_type_char(), deprecate
FF_*_TYPE defines and av_get_pict_type_char() defined in
libavcodec/avcodec.h.
2011-04-xx - 10d3940 - lavfi 2.3.0 - avfilter.h
Add pict_type and key_frame fields to AVFilterBufferRefVideo.
2011-04-xx - 7a11c82 - lavfi 2.2.0 - vsrc_buffer
Add sample_aspect_ratio fields to vsrc_buffer arguments
2011-04-21 - 94f7451 - lavc 53.1.0 - avcodec.h
Add CODEC_CAP_SLICE_THREADS for codecs supporting sliced threading.

@ -2908,6 +2908,10 @@ static void opt_frame_aspect_ratio(const char *arg)
ffmpeg_exit(1);
}
frame_aspect_ratio = ar;
x = vfilters ? strlen(vfilters) : 0;
vfilters = av_realloc(vfilters, x+100);
snprintf(vfilters+x, x+100, "%csetdar=%f\n", x?',':' ', ar);
}
static int opt_metadata(const char *opt, const char *arg)

@ -2185,10 +2185,8 @@ static int open_input_stream(HTTPContext *c, const char *info)
}
}
#if 1
if (c->fmt_in->iformat->read_seek)
av_seek_frame(c->fmt_in, -1, stream_pos, 0);
#endif
/* set the start time (needed for maxtime and RTP packet timing) */
c->start_time = cur_time;
c->first_pts = AV_NOPTS_VALUE;

@ -223,9 +223,9 @@ typedef struct {
float sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
DECLARE_ALIGNED(16, float, coeffs)[1024]; ///< coefficients for IMDCT
DECLARE_ALIGNED(16, float, saved)[1024]; ///< overlap
DECLARE_ALIGNED(16, float, ret)[2048]; ///< PCM output
DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT
DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap
DECLARE_ALIGNED(32, float, ret)[2048]; ///< PCM output
DECLARE_ALIGNED(16, int16_t, ltp_state)[3072]; ///< time signal for LTP
PredictorState predictor_state[MAX_PREDICTORS];
} SingleChannelElement;
@ -272,7 +272,7 @@ typedef struct {
* @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.)
* @{
*/
DECLARE_ALIGNED(16, float, buf_mdct)[1024];
DECLARE_ALIGNED(32, float, buf_mdct)[1024];
/** @} */
/**
@ -296,7 +296,7 @@ typedef struct {
int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
/** @} */
DECLARE_ALIGNED(16, float, temp)[128];
DECLARE_ALIGNED(32, float, temp)[128];
enum OCStatus output_configured;
} AACContext;

@ -37,6 +37,7 @@
#include "aac.h"
#include "aacenc.h"
#include "aactab.h"
#include "libavutil/libm.h"
/** bits needed to code codebook run value for long windows */
static const uint8_t run_value_bits_long[64] = {

@ -64,7 +64,7 @@ typedef struct AACEncContext {
int last_frame;
float lambda;
DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
DECLARE_ALIGNED(16, float, scoefs)[1024]; ///< scaled coefficients
DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
} AACEncContext;
#endif /* AVCODEC_AACENC_H */

@ -32,6 +32,7 @@
#include "aacsbrdata.h"
#include "fft.h"
#include "aacps.h"
#include "libavutil/libm.h"
#include <stdint.h>
#include <float.h>

@ -201,13 +201,13 @@ typedef struct {
///@}
///@defgroup arrays aligned arrays
DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< fixed-point transform coefficients
DECLARE_ALIGNED(16, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients
DECLARE_ALIGNED(16, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block
DECLARE_ALIGNED(16, float, window)[AC3_BLOCK_SIZE]; ///< window coefficients
DECLARE_ALIGNED(16, float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing
DECLARE_ALIGNED(16, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing
DECLARE_ALIGNED(16, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
DECLARE_ALIGNED(16, int, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///> fixed-point transform coefficients
DECLARE_ALIGNED(32, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS]; ///< transform coefficients
DECLARE_ALIGNED(32, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< delay - added to the next block
DECLARE_ALIGNED(32, float, window)[AC3_BLOCK_SIZE]; ///< window coefficients
DECLARE_ALIGNED(32, float, tmp_output)[AC3_BLOCK_SIZE]; ///< temporary storage for output before windowing
DECLARE_ALIGNED(32, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE]; ///< output after imdct transform and windowing
DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
///@}
} AC3DecodeContext;

@ -171,7 +171,7 @@ typedef struct AC3EncodeContext {
uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
DECLARE_ALIGNED(16, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
DECLARE_ALIGNED(32, SampleType, windowed_samples)[AC3_WINDOW_SIZE];
} AC3EncodeContext;
typedef struct AC3Mant {

@ -47,7 +47,7 @@ static av_cold void mdct_end(AC3MDCTContext *mdct)
static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
int nbits)
{
int ret = ff_mdct_init(&mdct->fft, nbits, 0, 1.0);
int ret = ff_mdct_init(&mdct->fft, nbits, 0, -1.0);
mdct->window = ff_ac3_window;
return ret;
}

@ -60,11 +60,11 @@ typedef struct {
int log2_block_count[AT1_QMF_BANDS]; ///< log2 number of blocks in a band
int num_bfus; ///< number of Block Floating Units
float* spectrum[2];
DECLARE_ALIGNED(16, float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(16, float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(16, float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED(16, float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED(16, float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter
DECLARE_ALIGNED(32, float, spec1)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(32, float, spec2)[AT1_SU_SAMPLES]; ///< mdct buffer
DECLARE_ALIGNED(32, float, fst_qmf_delay)[46]; ///< delay line for the 1st stacked QMF filter
DECLARE_ALIGNED(32, float, snd_qmf_delay)[46]; ///< delay line for the 2nd stacked QMF filter
DECLARE_ALIGNED(32, float, last_qmf_delay)[256+23]; ///< delay line for the last stacked QMF filter
} AT1SUCtx;
/**
@ -72,13 +72,13 @@ typedef struct {
*/
typedef struct {
AT1SUCtx SUs[AT1_MAX_CHANNELS]; ///< channel sound unit
DECLARE_ALIGNED(16, float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer
DECLARE_ALIGNED(32, float, spec)[AT1_SU_SAMPLES]; ///< the mdct spectrum buffer
DECLARE_ALIGNED(16, float, low)[256];
DECLARE_ALIGNED(16, float, mid)[256];
DECLARE_ALIGNED(16, float, high)[512];
DECLARE_ALIGNED(32, float, low)[256];
DECLARE_ALIGNED(32, float, mid)[256];
DECLARE_ALIGNED(32, float, high)[512];
float* bands[3];
DECLARE_ALIGNED(16, float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES];
DECLARE_ALIGNED(32, float, out_samples)[AT1_MAX_CHANNELS][AT1_SU_SAMPLES];
FFTContext mdct_ctx[3];
int channels;
DSPContext dsp;

@ -74,8 +74,8 @@ typedef struct {
int gcBlkSwitch;
gain_block gainBlock[2];
DECLARE_ALIGNED(16, float, spectrum)[1024];
DECLARE_ALIGNED(16, float, IMDCT_buf)[1024];
DECLARE_ALIGNED(32, float, spectrum)[1024];
DECLARE_ALIGNED(32, float, IMDCT_buf)[1024];
float delayBuf1[46]; ///<qmf delay buffers
float delayBuf2[46];
@ -122,7 +122,7 @@ typedef struct {
FFTContext mdct_ctx;
} ATRAC3Context;
static DECLARE_ALIGNED(16, float,mdct_window)[512];
static DECLARE_ALIGNED(32, float, mdct_window)[512];
static VLC spectral_coeff_tab[7];
static float gain_tab1[16];
static float gain_tab2[31];

@ -766,7 +766,7 @@ typedef struct AVPanScan{
* - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
* - decoding: Set by libavcodec.\
*/\
int pict_type;\
enum AVPictureType pict_type;\
\
/**\
* presentation timestamp in time_base units (time when frame should be shown to user)\
@ -1016,14 +1016,16 @@ typedef struct AVPanScan{
#define FF_BUFFER_TYPE_SHARED 4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared.
#define FF_BUFFER_TYPE_COPY 8 ///< Just a (modified) copy of some other buffer, don't deallocate anything.
#define FF_I_TYPE 1 ///< Intra
#define FF_P_TYPE 2 ///< Predicted
#define FF_B_TYPE 3 ///< Bi-dir predicted
#define FF_S_TYPE 4 ///< S(GMC)-VOP MPEG4
#define FF_SI_TYPE 5 ///< Switching Intra
#define FF_SP_TYPE 6 ///< Switching Predicted
#define FF_BI_TYPE 7
#if FF_API_OLD_FF_PICT_TYPES
/* DEPRECATED, directly use the AV_PICTURE_TYPE_* enum values */
#define FF_I_TYPE AV_PICTURE_TYPE_I ///< Intra
#define FF_P_TYPE AV_PICTURE_TYPE_P ///< Predicted
#define FF_B_TYPE AV_PICTURE_TYPE_B ///< Bi-dir predicted
#define FF_S_TYPE AV_PICTURE_TYPE_S ///< S(GMC)-VOP MPEG4
#define FF_SI_TYPE AV_PICTURE_TYPE_SI ///< Switching Intra
#define FF_SP_TYPE AV_PICTURE_TYPE_SP ///< Switching Predicted
#define FF_BI_TYPE AV_PICTURE_TYPE_BI
#endif
#define FF_BUFFER_HINTS_VALID 0x01 // Buffer hints value is meaningful (if 0 ignore).
#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer.
@ -1215,16 +1217,6 @@ typedef struct AVCodecContext {
*/
enum PixelFormat pix_fmt;
#if FF_API_RATE_EMU
/**
* Frame rate emulation. If not zero, the lower layer (i.e. format handler)
* has to read frames at native frame rate.
* - encoding: Set by user.
* - decoding: unused
*/
attribute_deprecated int rate_emu;
#endif
/**
* If non NULL, 'draw_horiz_band' is called by the libavcodec
* decoder to draw a horizontal band. It improves cache usage. Not
@ -1326,16 +1318,6 @@ typedef struct AVCodecContext {
int b_frame_strategy;
#if FF_API_HURRY_UP
/**
* hurry up amount
* - encoding: unused
* - decoding: Set by user. 1-> Skip B-frames, 2-> Skip IDCT/dequant too, 5-> Skip everything except header
* @deprecated Deprecated in favor of skip_idct and skip_frame.
*/
attribute_deprecated int hurry_up;
#endif
struct AVCodec *codec;
void *priv_data;
@ -1800,22 +1782,6 @@ typedef struct AVCodecContext {
*/
uint64_t error[4];
#if FF_API_MB_Q
/**
* minimum MB quantizer
* - encoding: unused
* - decoding: unused
*/
attribute_deprecated int mb_qmin;
/**
* maximum MB quantizer
* - encoding: unused
* - decoding: unused
*/
attribute_deprecated int mb_qmax;
#endif
/**
* motion estimation comparison function
* - encoding: Set by user.
@ -3866,13 +3832,17 @@ void avcodec_default_free_buffers(AVCodecContext *s);
/* misc useful functions */
#if FF_API_OLD_FF_PICT_TYPES
/**
* Return a single letter to describe the given picture type pict_type.
*
* @param[in] pict_type the picture type
* @return A single character representing the picture type.
* @deprecated Use av_get_picture_type_char() instead.
*/
attribute_deprecated
char av_get_pict_type_char(int pict_type);
#endif
/**
* Return codec bits per sample.

@ -55,7 +55,7 @@ typedef struct {
int num_bands;
unsigned int *bands;
float root;
DECLARE_ALIGNED(16, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
DECLARE_ALIGNED(32, FFTSample, coeffs)[BINK_BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, short, previous)[BINK_BLOCK_MAX_SIZE / 16]; ///< coeffs from previous audio block
float *coeffs_ptr[MAX_CHANNELS]; ///< pointers to the coeffs arrays for float_to_int16_interleave
union {

@ -153,7 +153,7 @@ typedef struct cook {
/* data buffers */
uint8_t* decoded_bytes_buffer;
DECLARE_ALIGNED(16, float,mono_mdct_output)[2048];
DECLARE_ALIGNED(32, float, mono_mdct_output)[2048];
float decode_buffer_1[1024];
float decode_buffer_2[1024];
float decode_buffer_0[1060]; /* static allocation for joint decode */

@ -321,16 +321,16 @@ typedef struct {
/* Subband samples history (for ADPCM) */
float subband_samples_hist[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
DECLARE_ALIGNED(16, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
DECLARE_ALIGNED(16, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32];
DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][512];
DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][32];
int hist_index[DCA_PRIM_CHANNELS_MAX];
DECLARE_ALIGNED(16, float, raXin)[32];
DECLARE_ALIGNED(32, float, raXin)[32];
int output; ///< type of output
float scale_bias; ///< output scale
DECLARE_ALIGNED(16, float, subband_samples)[DCA_BLOCKS_MAX][DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][8];
DECLARE_ALIGNED(16, float, samples)[(DCA_PRIM_CHANNELS_MAX+1)*256];
DECLARE_ALIGNED(32, float, subband_samples)[DCA_BLOCKS_MAX][DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][8];
DECLARE_ALIGNED(32, float, samples)[(DCA_PRIM_CHANNELS_MAX+1)*256];
const float *samples_chanptr[DCA_PRIM_CHANNELS_MAX+1];
uint8_t dca_buffer[DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE + DCA_BUFFER_PADDING_SIZE];

@ -312,18 +312,16 @@ static void dct_error(const char *name, int is_idct,
}
for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
#if 1 // dump systematic errors
for(i=0; i<64; i++){
if(i%8==0) printf("\n");
printf("%7d ", (int)sysErr[i]);
}
printf("\n");
#endif
printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
is_idct ? "IDCT" : "DCT",
name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
#if 1 //Speed test
/* speed test */
for(i=0;i<64;i++)
block1[i] = 0;
@ -376,7 +374,6 @@ static void dct_error(const char *name, int is_idct,
printf("%s %s: %0.1f kdct/s\n",
is_idct ? "IDCT" : "DCT",
name, (double)it1 * 1000.0 / (double)ti1);
#endif
}
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];

@ -786,7 +786,6 @@ void ff_er_frame_end(MpegEncContext *s){
}
}
#if 1
/* handle overlapping slices */
for(error_type=1; error_type<=3; error_type++){
int end_ok=0;
@ -807,8 +806,7 @@ void ff_er_frame_end(MpegEncContext *s){
end_ok=0;
}
}
#endif
#if 1
/* handle slices with partitions of different length */
if(s->partitioned_frame){
int end_ok=0;
@ -829,7 +827,7 @@ void ff_er_frame_end(MpegEncContext *s){
end_ok=0;
}
}
#endif
/* handle missing slices */
if(s->error_recognition>=4){
int end_ok=1;
@ -853,7 +851,6 @@ void ff_er_frame_end(MpegEncContext *s){
}
}
#if 1
/* backward mark errors */
distance=9999999;
for(error_type=1; error_type<=3; error_type++){
@ -878,7 +875,6 @@ void ff_er_frame_end(MpegEncContext *s){
distance= 9999999;
}
}
#endif
/* forward mark errors */
error=0;
@ -893,7 +889,7 @@ void ff_er_frame_end(MpegEncContext *s){
s->error_status_table[mb_xy]|= error;
}
}
#if 1
/* handle not partitioned case */
if(!s->partitioned_frame){
for(i=0; i<s->mb_num; i++){
@ -904,7 +900,6 @@ void ff_er_frame_end(MpegEncContext *s){
s->error_status_table[mb_xy]= error;
}
}
#endif
dc_error= ac_error= mv_error=0;
for(i=0; i<s->mb_num; i++){
@ -1065,16 +1060,15 @@ void ff_er_frame_end(MpegEncContext *s){
s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3;
}
}
#if 1
/* guess DC for damaged blocks */
guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1);
guess_dc(s, s->dc_val[1], s->mb_width , s->mb_height , s->mb_stride, 0);
guess_dc(s, s->dc_val[2], s->mb_width , s->mb_height , s->mb_stride, 0);
#endif
/* filter luma DC */
filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride);
#if 1
/* render DC only intra */
for(mb_y=0; mb_y<s->mb_height; mb_y++){
for(mb_x=0; mb_x<s->mb_width; mb_x++){
@ -1094,7 +1088,6 @@ void ff_er_frame_end(MpegEncContext *s){
put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
}
}
#endif
if(s->avctx->error_concealment&FF_EC_DEBLOCK){
/* filter horizontal block boundaries */

@ -93,6 +93,44 @@ av_cold void ff_init_ff_cos_tabs(int index)
#endif
}
static const int avx_tab[] = {
0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
};
static int is_second_half_of_fft32(int i, int n)
{
if (n <= 32)
return i >= 16;
else if (i < n/2)
return is_second_half_of_fft32(i, n/2);
else if (i < 3*n/4)
return is_second_half_of_fft32(i - n/2, n/4);
else
return is_second_half_of_fft32(i - 3*n/4, n/4);
}
static av_cold void fft_perm_avx(FFTContext *s)
{
int i;
int n = 1 << s->nbits;
for (i = 0; i < n; i += 16) {
int k;
if (is_second_half_of_fft32(i, n)) {
for (k = 0; k < 16; k++)
s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
i + avx_tab[k];
} else {
for (k = 0; k < 16; k++) {
int j = i + k;
j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
}
}
}
}
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
{
int i, j, n;
@ -132,11 +170,16 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
for(j=4; j<=nbits; j++) {
ff_init_ff_cos_tabs(j);
}
for(i=0; i<n; i++) {
int j = i;
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
if (s->fft_permutation == FF_FFT_PERM_AVX) {
fft_perm_avx(s);
} else {
for(i=0; i<n; i++) {
int j = i;
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
}
}
return 0;

@ -85,6 +85,7 @@ struct FFTContext {
int fft_permutation;
#define FF_FFT_PERM_DEFAULT 0
#define FF_FFT_PERM_SWAP_LSBS 1
#define FF_FFT_PERM_AVX 2
int mdct_permutation;
#define FF_MDCT_PERM_NONE 0
#define FF_MDCT_PERM_INTERLEAVE 1
@ -97,7 +98,7 @@ struct FFTContext {
#endif
#define COSTABLE(size) \
COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
COSTABLE_CONST DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
extern COSTABLE(16);
extern COSTABLE(32);

@ -599,10 +599,6 @@ retry:
s->current_picture.pict_type= s->pict_type;
s->current_picture.key_frame= s->pict_type == FF_I_TYPE;
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL)

@ -612,18 +612,10 @@ retry:
/* skip B-frames if we don't have reference frames */
if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size);
#if FF_API_HURRY_UP
/* skip b frames if we are in a hurry */
if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return get_consumed_bytes(s, buf_size);
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
|| (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL)
return get_consumed_bytes(s, buf_size);
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
#endif
if(s->next_p_frame_damaged){
if(s->pict_type==FF_B_TYPE)

@ -2966,11 +2966,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
buf_index += consumed;
//FIXME do not discard SEI id
if(
#if FF_API_HURRY_UP
(s->hurry_up == 1 && h->nal_ref_idc == 0) ||
#endif
(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
continue;
again:
@ -3007,9 +3003,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
}
if(hx->redundant_pic_count==0
#if FF_API_HURRY_UP
&& hx->s.hurry_up < 5
#endif
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
@ -3047,9 +3040,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
&& s->context_initialized
#if FF_API_HURRY_UP
&& s->hurry_up < 5
#endif
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
@ -3186,11 +3176,7 @@ static int decode_frame(AVCodecContext *avctx,
}
if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
if (avctx->skip_frame >= AVDISCARD_NONREF
#if FF_API_HURRY_UP
|| s->hurry_up
#endif
)
if (avctx->skip_frame >= AVDISCARD_NONREF)
return 0;
av_log(avctx, AV_LOG_ERROR, "no frame!\n");
return -1;

@ -1007,7 +1007,6 @@ static void fill_decode_caches(H264Context *h, int mb_type){
}
}
#if 1
if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
int list;
for(list=0; list<h->list_count; list++){
@ -1182,7 +1181,6 @@ static void fill_decode_caches(H264Context *h, int mb_type){
}
}
}
#endif
h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
}

@ -88,7 +88,7 @@ typedef struct {
DSPContext dsp;
FFTContext fft;
DECLARE_ALIGNED(16, FFTComplex, samples)[COEFFS/2];
DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS/2];
float *out_samples;
} IMCContext;

@ -158,7 +158,6 @@ static int hpel_motion_search(MpegEncContext * s,
const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
#if 1
int key;
int map_generation= c->map_generation;
#ifndef NDEBUG
@ -172,7 +171,6 @@ static int hpel_motion_search(MpegEncContext * s,
assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
#endif
if(t<=b){
CHECK_HALF_MV(0, 1, mx ,my-1)
if(l<=r){

@ -2476,18 +2476,10 @@ static int decode_chunks(AVCodecContext *avctx,
/* Skip P-frames if we do not have a reference frame or we have an invalid header. */
if(s2->pict_type==FF_P_TYPE && !s->sync) break;
}
#if FF_API_HURRY_UP
/* Skip B-frames if we are in a hurry. */
if(avctx->hurry_up && s2->pict_type==FF_B_TYPE) break;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s2->pict_type==FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s2->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL)
break;
#if FF_API_HURRY_UP
/* Skip everything if we are in a hurry>=5. */
if(avctx->hurry_up>=5) break;
#endif
if (!s->mpeg_enc_ctx_allocated) break;

@ -1131,9 +1131,6 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
}
}
#if FF_API_HURRY_UP
s->hurry_up= s->avctx->hurry_up;
#endif
s->error_recognition= avctx->error_recognition;
/* set dequantizer, we can't do it during init as it might change for mpeg4
@ -2125,9 +2122,6 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
}
/* skip dequant / idct if we are really late ;) */
#if FF_API_HURRY_UP
if(s->hurry_up>1) goto skip_idct;
#endif
if(s->avctx->skip_idct){
if( (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)

@ -391,11 +391,6 @@ typedef struct MpegEncContext {
int no_rounding; /**< apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
for b-frames rounding mode is always 0 */
#if FF_API_HURRY_UP
int hurry_up; /**< when set to 1 during decoding, b frames will be skipped
when set to 2 idct/dequant will be skipped too */
#endif
/* macroblock layer */
int mb_x, mb_y;
int mb_skip_run;

@ -985,10 +985,9 @@ void ff_msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n)
if(level<=MAX_LEVEL && run<=MAX_RUN){
s->ac_stats[s->mb_intra][n>3][level][run][last]++;
}
#if 0
else
s->ac_stats[s->mb_intra][n>3][40][63][0]++; //esc3 like
#endif
s->ac_stats[s->mb_intra][n > 3][40][63][0]++; //esc3 like
code = get_rl_index(rl, last, run, level);
put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
if (code == rl->n) {

@ -47,7 +47,7 @@
typedef struct NellyMoserDecodeContext {
AVCodecContext* avctx;
DECLARE_ALIGNED(16, float,float_buf)[NELLY_SAMPLES];
DECLARE_ALIGNED(32, float, float_buf)[NELLY_SAMPLES];
float state[128];
AVLFG random_state;
GetBitContext gb;
@ -55,7 +55,7 @@ typedef struct NellyMoserDecodeContext {
DSPContext dsp;
FFTContext imdct_ctx;
FmtConvertContext fmt_conv;
DECLARE_ALIGNED(16, float,imdct_out)[NELLY_BUF_LEN * 2];
DECLARE_ALIGNED(32, float, imdct_out)[NELLY_BUF_LEN * 2];
} NellyMoserDecodeContext;
static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in)

@ -55,9 +55,9 @@ typedef struct NellyMoserEncodeContext {
int have_saved;
DSPContext dsp;
FFTContext mdct_ctx;
DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer
float (*opt )[NELLY_BANDS];
uint8_t (*path)[NELLY_BANDS];
} NellyMoserEncodeContext;

@ -105,9 +105,6 @@ static const AVOption options[]={
{"extradata_size", NULL, OFFSET(extradata_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"time_base", NULL, OFFSET(time_base), FF_OPT_TYPE_RATIONAL, DEFAULT, INT_MIN, INT_MAX},
{"g", "set the group of picture size", OFFSET(gop_size), FF_OPT_TYPE_INT, 12, INT_MIN, INT_MAX, V|E},
#if FF_API_RATE_EMU
{"rate_emu", "frame rate emulation", OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
#endif
{"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"ac", "set number of audio channels", OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
@ -124,9 +121,6 @@ static const AVOption options[]={
{"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
{"wpredp", "weighted prediction analysis method", OFFSET(weighted_p_pred), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
#if FF_API_HURRY_UP
{"hurry_up", "deprecated, use skip_idct/skip_frame instead", OFFSET(hurry_up), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
#endif
{"ps", "rtp payload size in bytes", OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"mv_bits", NULL, OFFSET(mv_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"header_bits", NULL, OFFSET(header_bits), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
@ -253,10 +247,6 @@ static const AVOption options[]={
{"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
{"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"},
{"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"},
#if FF_API_MB_Q
{"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
#endif
{"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
{"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},
{"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"},

@ -380,9 +380,6 @@ static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
dst->release_buffer = src->release_buffer;
dst->opaque = src->opaque;
#if FF_API_HURRY_UP
dst->hurry_up = src->hurry_up;
#endif
dst->dsp_mask = src->dsp_mask;
dst->debug = src->debug;
dst->debug_mv = src->debug_mv;

@ -120,7 +120,7 @@ typedef struct {
} FFTCoefficient;
typedef struct {
DECLARE_ALIGNED(16, QDM2Complex, complex)[MPA_MAX_CHANNELS][256];
DECLARE_ALIGNED(32, QDM2Complex, complex)[MPA_MAX_CHANNELS][256];
} QDM2FFT;
/**

@ -1454,19 +1454,10 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
}
if((!s->last_picture_ptr || !s->last_picture_ptr->data[0]) && si.type == FF_B_TYPE)
return -1;
#if FF_API_HURRY_UP
/* skip b frames if we are in a hurry */
if(avctx->hurry_up && si.type==FF_B_TYPE) return buf_size;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && si.type==FF_B_TYPE)
|| (avctx->skip_frame >= AVDISCARD_NONKEY && si.type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL)
return buf_size;
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5)
return buf_size;
#endif
for(i=0; i<slice_count; i++){
int offset= get_slice_offset(avctx, slices_hdr, i);

@ -897,7 +897,6 @@ QPEL_MC(0, avg_ , _ , op_avg)
#undef op_put
#undef op_put_no_rnd
#if 1
#define H264_LOWPASS(OPNAME, OP, OP2) \
static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
@ -1298,7 +1297,6 @@ H264_MC(avg_, 16)
#undef op_put
#undef op2_avg
#undef op2_put
#endif
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;

@ -3293,10 +3293,8 @@ static void iterative_me(SnowContext *s){
}
best_rd= ref_rd;
*block= ref_b;
#if 1
check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
//FIXME RD style color selection
#endif
if(!same_block(block, &backup)){
if(tb ) tb ->type &= ~BLOCK_OPT;
if(lb ) lb ->type &= ~BLOCK_OPT;

@ -684,9 +684,6 @@ static int svq1_decode_frame(AVCodecContext *avctx,
//this should be removed after libavcodec can handle more flexible picture types & ordering
if(s->pict_type==FF_B_TYPE && s->last_picture_ptr==NULL) return buf_size;
#if FF_API_HURRY_UP
if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return buf_size;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL)

@ -952,14 +952,6 @@ static int svq3_decode_frame(AVCodecContext *avctx,
/* Skip B-frames if we do not have reference frames. */
if (s->last_picture_ptr == NULL && s->pict_type == FF_B_TYPE)
return 0;
#if FF_API_HURRY_UP
/* Skip B-frames if we are in a hurry. */
if (avctx->hurry_up && s->pict_type == FF_B_TYPE)
return 0;
/* Skip everything if we are in a hurry >= 5. */
if (avctx->hurry_up >= 5)
return 0;
#endif
if ( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL)

@ -1105,18 +1105,11 @@ void avcodec_default_free_buffers(AVCodecContext *s){
s->internal_buffer_count=0;
}
#if FF_API_OLD_FF_PICT_TYPES
char av_get_pict_type_char(int pict_type){
switch(pict_type){
case FF_I_TYPE: return 'I';
case FF_P_TYPE: return 'P';
case FF_B_TYPE: return 'B';
case FF_S_TYPE: return 'S';
case FF_SI_TYPE:return 'i';
case FF_SP_TYPE:return 'p';
case FF_BI_TYPE:return 'b';
default: return '?';
}
return av_get_picture_type_char(pict_type);
}
#endif
int av_get_bits_per_sample(enum CodecID codec_id){
switch(codec_id){

@ -3519,21 +3519,11 @@ static int vc1_decode_frame(AVCodecContext *avctx,
if(s->last_picture_ptr==NULL && (s->pict_type==FF_B_TYPE || s->dropable)){
goto err;
}
#if FF_API_HURRY_UP
/* skip b frames if we are in a hurry */
if(avctx->hurry_up && s->pict_type==FF_B_TYPE) return -1;//buf_size;
#endif
if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
|| (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
|| avctx->skip_frame >= AVDISCARD_ALL) {
goto end;
}
#if FF_API_HURRY_UP
/* skip everything if we are in a hurry>=5 */
if(avctx->hurry_up>=5) {
goto err;
}
#endif
if(s->next_p_frame_damaged){
if(s->pict_type==FF_B_TYPE)

@ -22,7 +22,7 @@
#define LIBAVCODEC_VERSION_MAJOR 53
#define LIBAVCODEC_VERSION_MINOR 1
#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_MICRO 1
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \
@ -47,15 +47,6 @@
#ifndef FF_API_OLD_AUDIOCONVERT
#define FF_API_OLD_AUDIOCONVERT (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
#ifndef FF_API_HURRY_UP
#define FF_API_HURRY_UP (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
#ifndef FF_API_RATE_EMU
#define FF_API_RATE_EMU (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
#ifndef FF_API_MB_Q
#define FF_API_MB_Q (LIBAVCODEC_VERSION_MAJOR < 53)
#endif
#ifndef FF_API_ANTIALIAS_ALGO
#define FF_API_ANTIALIAS_ALGO (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
@ -68,5 +59,8 @@
#ifndef FF_API_THREAD_INIT
#define FF_API_THREAD_INIT (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
#ifndef FF_API_OLD_FF_PICT_TYPES
#define FF_API_OLD_FF_PICT_TYPES (LIBAVCODEC_VERSION_MAJOR < 54)
#endif
#endif /* AVCODEC_VERSION_H */

@ -113,15 +113,15 @@ typedef struct WMACodecContext {
uint8_t ms_stereo; ///< true if mid/side stereo mode
uint8_t channel_coded[MAX_CHANNELS]; ///< true if channel is coded
int exponents_bsize[MAX_CHANNELS]; ///< log2 ratio frame/exp. length
DECLARE_ALIGNED(16, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(32, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
float max_exponent[MAX_CHANNELS];
WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(16, FFTSample, output)[BLOCK_MAX_SIZE * 2];
DECLARE_ALIGNED(32, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED(32, FFTSample, output)[BLOCK_MAX_SIZE * 2];
FFTContext mdct_ctx[BLOCK_NB_SIZES];
float *windows[BLOCK_NB_SIZES];
/* output buffer for one frame and the last for IMDCT windowing */
DECLARE_ALIGNED(16, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
DECLARE_ALIGNED(32, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
/* last frame info */
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
int last_bitoffset;

@ -145,7 +145,7 @@ typedef struct {
uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
float* coeffs; ///< pointer to the subframe decode buffer
uint16_t num_vec_coeffs; ///< number of vector coded coefficients
DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
DECLARE_ALIGNED(32, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
} WMAProChannelCtx;
/**
@ -170,7 +170,7 @@ typedef struct WMAProDecodeCtx {
FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer
FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size
DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
DECLARE_ALIGNED(32, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes
/* frame size dependent frame information (set during initialization) */

@ -275,11 +275,11 @@ typedef struct {
///< by postfilter
float denoise_filter_cache[MAX_FRAMESIZE];
int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80];
DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
///< aligned buffer for LPC tilting
DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80];
DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
///< aligned buffer for denoise coefficients
DECLARE_ALIGNED(16, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
///< aligned buffer for postfilter speech
///< synthesis
/**

@ -25,7 +25,14 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
{
#if HAVE_YASM
int has_vectors = av_get_cpu_flags();
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) {
/* AVX for SB */
s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_avx;
s->fft_permute = ff_fft_permute_sse;
s->fft_calc = ff_fft_calc_avx;
s->fft_permutation = FF_FFT_PERM_AVX;
} else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
/* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse;

@ -22,6 +22,7 @@
#include "libavcodec/fft.h"
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
@ -32,6 +33,7 @@ void ff_imdct_calc_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input
void ff_imdct_half_3dn2(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_dct32_float_sse(FFTSample *out, const FFTSample *in);
#endif

@ -1,6 +1,7 @@
;******************************************************************************
;* FFT transform with SSE/3DNow optimizations
;* Copyright (c) 2008 Loren Merritt
;* Copyright (c) 2011 Vitor Sessak
;*
;* This algorithm (though not any of the implementation details) is
;* based on libdjbfft by D. J. Bernstein.
@ -49,9 +50,21 @@ endstruc
SECTION_RODATA
%define M_SQRT1_2 0.70710678118654752440
ps_root2: times 4 dd M_SQRT1_2
ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
ps_p1p1m1p1: dd 0, 0, 1<<31, 0
%define M_COS_PI_1_8 0.923879532511287
%define M_COS_PI_3_8 0.38268343236509
align 32
ps_cos16_1: dd 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8, 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8
ps_cos16_2: dd 0, M_COS_PI_3_8, M_SQRT1_2, M_COS_PI_1_8, 0, -M_COS_PI_3_8, -M_SQRT1_2, -M_COS_PI_1_8
ps_root2: times 8 dd M_SQRT1_2
ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2, -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
ps_p1p1m1p1: dd 0, 0, 1<<31, 0, 0, 0, 1<<31, 0
perm1: dd 0x00, 0x02, 0x03, 0x01, 0x03, 0x00, 0x02, 0x01
perm2: dd 0x00, 0x01, 0x02, 0x03, 0x01, 0x00, 0x02, 0x03
ps_p1p1m1p1root2: dd 1.0, 1.0, -1.0, 1.0, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
ps_m1m1p1m1p1m1m1m1: dd 1<<31, 1<<31, 0, 1<<31, 0, 1<<31, 1<<31, 1<<31
ps_m1p1: dd 1<<31, 0
%assign i 16
@ -96,51 +109,80 @@ section .text align=16
SWAP %3, %6
%endmacro
; in: %1 = {r0,i0,r2,i2,r4,i4,r6,i6}
; %2 = {r1,i1,r3,i3,r5,i5,r7,i7}
; %3, %4, %5 tmp
; out: %1 = {r0,r1,r2,r3,i0,i1,i2,i3}
; %2 = {r4,r5,r6,r7,i4,i5,i6,i7}
%macro T8_AVX 5
vsubps %5, %1, %2 ; v = %1 - %2
vaddps %3, %1, %2 ; w = %1 + %2
vmulps %2, %5, [ps_p1p1m1p1root2] ; v *= vals1
vpermilps %2, %2, [perm1]
vblendps %1, %2, %3, 0x33 ; q = {w1,w2,v4,v2,w5,w6,v7,v6}
vshufps %5, %3, %2, 0x4e ; r = {w3,w4,v1,v3,w7,w8,v8,v5}
vsubps %4, %5, %1 ; s = r - q
vaddps %1, %5, %1 ; u = r + q
vpermilps %1, %1, [perm2] ; k = {u1,u2,u3,u4,u6,u5,u7,u8}
vshufps %5, %4, %1, 0xbb
vshufps %3, %4, %1, 0xee
vperm2f128 %3, %3, %5, 0x13
vxorps %4, %4, [ps_m1m1p1m1p1m1m1m1] ; s *= {1,1,-1,-1,1,-1,-1,-1}
vshufps %2, %1, %4, 0xdd
vshufps %1, %1, %4, 0x88
vperm2f128 %4, %2, %1, 0x02 ; v = {k1,k3,s1,s3,k2,k4,s2,s4}
vperm2f128 %1, %1, %2, 0x13 ; w = {k6,k8,s6,s8,k5,k7,s5,s7}
vsubps %5, %1, %3
vblendps %1, %5, %1, 0x55 ; w -= {0,s7,0,k7,0,s8,0,k8}
vsubps %2, %4, %1 ; %2 = v - w
vaddps %1, %4, %1 ; %1 = v + w
%endmacro
; In SSE mode do one fft4 transforms
; in: %1={r0,i0,r2,i2} %2={r1,i1,r3,i3}
; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3}
;
; In AVX mode do two fft4 transforms
; in: %1={r0,i0,r2,i2,r4,i4,r6,i6} %2={r1,i1,r3,i3,r5,i5,r7,i7}
; out: %1={r0,r1,r2,r3,r4,r5,r6,r7} %2={i0,i1,i2,i3,i4,i5,i6,i7}
%macro T4_SSE 3
mova %3, %1
addps %1, %2 ; {t1,t2,t6,t5}
subps %3, %2 ; {t3,t4,-t8,t7}
xorps %3, [ps_p1p1m1p1]
mova %2, %1
shufps %1, %3, 0x44 ; {t1,t2,t3,t4}
shufps %2, %3, 0xbe ; {t6,t5,t7,t8}
mova %3, %1
addps %1, %2 ; {r0,i0,r1,i1}
subps %3, %2 ; {r2,i2,r3,i3}
mova %2, %1
shufps %1, %3, 0x88 ; {r0,r1,r2,r3}
shufps %2, %3, 0xdd ; {i0,i1,i2,i3}
subps %3, %1, %2 ; {t3,t4,-t8,t7}
addps %1, %1, %2 ; {t1,t2,t6,t5}
xorps %3, %3, [ps_p1p1m1p1]
shufps %2, %1, %3, 0xbe ; {t6,t5,t7,t8}
shufps %1, %1, %3, 0x44 ; {t1,t2,t3,t4}
subps %3, %1, %2 ; {r2,i2,r3,i3}
addps %1, %1, %2 ; {r0,i0,r1,i1}
shufps %2, %1, %3, 0xdd ; {i0,i1,i2,i3}
shufps %1, %1, %3, 0x88 ; {r0,r1,r2,r3}
%endmacro
; In SSE mode do one FFT8
; in: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %3={r4,i4,r6,i6} %4={r5,i5,r7,i7}
; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3} %1={r4,r5,r6,r7} %2={i4,i5,i6,i7}
;
; In AVX mode do two FFT8
; in: %1={r0,i0,r2,i2,r8, i8, r10,i10} %2={r1,i1,r3,i3,r9, i9, r11,i11}
; %3={r4,i4,r6,i6,r12,i12,r14,i14} %4={r5,i5,r7,i7,r13,i13,r15,i15}
; out: %1={r0,r1,r2,r3,r8, r9, r10,r11} %2={i0,i1,i2,i3,i8, i9, i10,i11}
; %3={r4,r5,r6,r7,r12,r13,r14,r15} %4={i4,i5,i6,i7,i12,i13,i14,i15}
%macro T8_SSE 6
mova %6, %3
subps %3, %4 ; {r5,i5,r7,i7}
addps %6, %4 ; {t1,t2,t3,t4}
mova %4, %3
shufps %4, %4, 0xb1 ; {i5,r5,i7,r7}
mulps %3, [ps_root2mppm] ; {-r5,i5,r7,-i7}
mulps %4, [ps_root2]
addps %3, %4 ; {t8,t7,ta,t9}
mova %4, %6
shufps %6, %3, 0x36 ; {t3,t2,t9,t8}
shufps %4, %3, 0x9c ; {t1,t4,t7,ta}
mova %3, %6
addps %6, %4 ; {t1,t2,t9,ta}
subps %3, %4 ; {t6,t5,tc,tb}
mova %4, %6
shufps %6, %3, 0xd8 ; {t1,t9,t5,tb}
shufps %4, %3, 0x8d ; {t2,ta,t6,tc}
mova %3, %1
mova %5, %2
addps %1, %6 ; {r0,r1,r2,r3}
addps %2, %4 ; {i0,i1,i2,i3}
subps %3, %6 ; {r4,r5,r6,r7}
subps %5, %4 ; {i4,i5,i6,i7}
SWAP %4, %5
addps %6, %3, %4 ; {t1,t2,t3,t4}
subps %3, %3, %4 ; {r5,i5,r7,i7}
shufps %4, %3, %3, 0xb1 ; {i5,r5,i7,r7}
mulps %3, %3, [ps_root2mppm] ; {-r5,i5,r7,-i7}
mulps %4, %4, [ps_root2]
addps %3, %3, %4 ; {t8,t7,ta,t9}
shufps %4, %6, %3, 0x9c ; {t1,t4,t7,ta}
shufps %6, %6, %3, 0x36 ; {t3,t2,t9,t8}
subps %3, %6, %4 ; {t6,t5,tc,tb}
addps %6, %6, %4 ; {t1,t2,t9,ta}
shufps %5, %6, %3, 0x8d ; {t2,ta,t6,tc}
shufps %6, %6, %3, 0xd8 ; {t1,t9,t5,tb}
subps %3, %1, %6 ; {r4,r5,r6,r7}
addps %1, %1, %6 ; {r0,r1,r2,r3}
subps %4, %2, %5 ; {i4,i5,i6,i7}
addps %2, %2, %5 ; {i0,i1,i2,i3}
%endmacro
; scheduled for cpu-bound sizes
@ -148,52 +190,44 @@ section .text align=16
IF%1 mova m4, Z(4)
IF%1 mova m5, Z(5)
mova m0, %2 ; wre
mova m2, m4
mova m1, %3 ; wim
mova m3, m5
mulps m2, m0 ; r2*wre
mulps m2, m4, m0 ; r2*wre
IF%1 mova m6, Z2(6)
mulps m3, m1 ; i2*wim
mulps m3, m5, m1 ; i2*wim
IF%1 mova m7, Z2(7)
mulps m4, m1 ; r2*wim
mulps m5, m0 ; i2*wre
addps m2, m3 ; r2*wre + i2*wim
mova m3, m1
mulps m1, m6 ; r3*wim
subps m5, m4 ; i2*wre - r2*wim
mova m4, m0
mulps m3, m7 ; i3*wim
mulps m4, m6 ; r3*wre
mulps m0, m7 ; i3*wre
subps m4, m3 ; r3*wre - i3*wim
mulps m4, m4, m1 ; r2*wim
mulps m5, m5, m0 ; i2*wre
addps m2, m2, m3 ; r2*wre + i2*wim
mulps m3, m1, m7 ; i3*wim
subps m5, m5, m4 ; i2*wre - r2*wim
mulps m1, m1, m6 ; r3*wim
mulps m4, m0, m6 ; r3*wre
mulps m0, m0, m7 ; i3*wre
subps m4, m4, m3 ; r3*wre - i3*wim
mova m3, Z(0)
addps m0, m1 ; i3*wre + r3*wim
mova m1, m4
addps m4, m2 ; t5
subps m1, m2 ; t3
subps m3, m4 ; r2
addps m4, Z(0) ; r0
addps m0, m0, m1 ; i3*wre + r3*wim
subps m1, m4, m2 ; t3
addps m4, m4, m2 ; t5
subps m3, m3, m4 ; r2
addps m4, m4, Z(0) ; r0
mova m6, Z(2)
mova Z(4), m3
mova Z(0), m4
mova m3, m5
subps m5, m0 ; t4
mova m4, m6
subps m6, m5 ; r3
addps m5, m4 ; r1
mova Z2(6), m6
mova Z(2), m5
subps m3, m5, m0 ; t4
subps m4, m6, m3 ; r3
addps m3, m3, m6 ; r1
mova Z2(6), m4
mova Z(2), m3
mova m2, Z(3)
addps m3, m0 ; t6
subps m2, m1 ; i3
addps m3, m5, m0 ; t6
subps m2, m2, m1 ; i3
mova m7, Z(1)
addps m1, Z(3) ; i1
addps m1, m1, Z(3) ; i1
mova Z2(7), m2
mova Z(3), m1
mova m4, m7
subps m7, m3 ; i2
addps m3, m4 ; i0
mova Z(5), m7
subps m4, m7, m3 ; i2
addps m3, m3, m7 ; i0
mova Z(5), m4
mova Z(1), m3
%endmacro
@ -201,77 +235,55 @@ IF%1 mova m7, Z2(7)
%macro PASS_BIG 1 ; (!interleave)
mova m4, Z(4) ; r2
mova m5, Z(5) ; i2
mova m2, m4
mova m0, [wq] ; wre
mova m3, m5
mova m1, [wq+o1q] ; wim
mulps m2, m0 ; r2*wre
mulps m2, m4, m0 ; r2*wre
mova m6, Z2(6) ; r3
mulps m3, m1 ; i2*wim
mulps m3, m5, m1 ; i2*wim
mova m7, Z2(7) ; i3
mulps m4, m1 ; r2*wim
mulps m5, m0 ; i2*wre
addps m2, m3 ; r2*wre + i2*wim
mova m3, m1
mulps m1, m6 ; r3*wim
subps m5, m4 ; i2*wre - r2*wim
mova m4, m0
mulps m3, m7 ; i3*wim
mulps m4, m6 ; r3*wre
mulps m0, m7 ; i3*wre
subps m4, m3 ; r3*wre - i3*wim
mulps m4, m4, m1 ; r2*wim
mulps m5, m5, m0 ; i2*wre
addps m2, m2, m3 ; r2*wre + i2*wim
mulps m3, m1, m7 ; i3*wim
mulps m1, m1, m6 ; r3*wim
subps m5, m5, m4 ; i2*wre - r2*wim
mulps m4, m0, m6 ; r3*wre
mulps m0, m0, m7 ; i3*wre
subps m4, m4, m3 ; r3*wre - i3*wim
mova m3, Z(0)
addps m0, m1 ; i3*wre + r3*wim
mova m1, m4
addps m4, m2 ; t5
subps m1, m2 ; t3
subps m3, m4 ; r2
addps m4, Z(0) ; r0
addps m0, m0, m1 ; i3*wre + r3*wim
subps m1, m4, m2 ; t3
addps m4, m4, m2 ; t5
subps m3, m3, m4 ; r2
addps m4, m4, Z(0) ; r0
mova m6, Z(2)
mova Z(4), m3
mova Z(0), m4
mova m3, m5
subps m5, m0 ; t4
mova m4, m6
subps m6, m5 ; r3
addps m5, m4 ; r1
IF%1 mova Z2(6), m6
IF%1 mova Z(2), m5
subps m3, m5, m0 ; t4
subps m4, m6, m3 ; r3
addps m3, m3, m6 ; r1
IF%1 mova Z2(6), m4
IF%1 mova Z(2), m3
mova m2, Z(3)
addps m3, m0 ; t6
subps m2, m1 ; i3
addps m5, m5, m0 ; t6
subps m2, m2, m1 ; i3
mova m7, Z(1)
addps m1, Z(3) ; i1
addps m1, m1, Z(3) ; i1
IF%1 mova Z2(7), m2
IF%1 mova Z(3), m1
mova m4, m7
subps m7, m3 ; i2
addps m3, m4 ; i0
IF%1 mova Z(5), m7
IF%1 mova Z(1), m3
subps m6, m7, m5 ; i2
addps m5, m5, m7 ; i0
IF%1 mova Z(5), m6
IF%1 mova Z(1), m5
%if %1==0
mova m4, m5 ; r1
mova m0, m6 ; r3
unpcklps m5, m1
unpckhps m4, m1
unpcklps m6, m2
unpckhps m0, m2
INTERL m1, m3, m7, Z, 2
INTERL m2, m4, m0, Z2, 6
mova m1, Z(0)
mova m2, Z(4)
mova Z(2), m5
mova Z(3), m4
mova Z2(6), m6
mova Z2(7), m0
mova m5, m1 ; r0
mova m4, m2 ; r2
unpcklps m1, m3
unpckhps m5, m3
unpcklps m2, m7
unpckhps m4, m7
mova Z(0), m1
mova Z(1), m5
mova Z(4), m2
mova Z(5), m4
INTERL m5, m1, m3, Z, 0
INTERL m6, m2, m7, Z, 4
%endif
%endmacro
@ -281,13 +293,106 @@ IF%1 mova Z(1), m3
punpckhdq %3, %2
%endmacro
INIT_XMM
%define mova movaps
%define Z(x) [r0+mmsize*x]
%define Z2(x) [r0+mmsize*x]
%define ZH(x) [r0+mmsize*x+mmsize/2]
INIT_YMM
align 16
fft8_avx:
mova m0, Z(0)
mova m1, Z(1)
T8_AVX m0, m1, m2, m3, m4
mova Z(0), m0
mova Z(1), m1
ret
align 16
fft16_avx:
mova m2, Z(2)
mova m3, Z(3)
T4_SSE m2, m3, m7
mova m0, Z(0)
mova m1, Z(1)
T8_AVX m0, m1, m4, m5, m7
mova m4, [ps_cos16_1]
mova m5, [ps_cos16_2]
vmulps m6, m2, m4
vmulps m7, m3, m5
vaddps m7, m7, m6
vmulps m2, m2, m5
vmulps m3, m3, m4
vsubps m3, m3, m2
vblendps m2, m7, m3, 0xf0
vperm2f128 m3, m7, m3, 0x21
vaddps m4, m2, m3
vsubps m2, m3, m2
vperm2f128 m2, m2, m2, 0x01
vsubps m3, m1, m2
vaddps m1, m1, m2
vsubps m5, m0, m4
vaddps m0, m0, m4
vextractf128 Z(0), m0, 0
vextractf128 ZH(0), m1, 0
vextractf128 Z(1), m0, 1
vextractf128 ZH(1), m1, 1
vextractf128 Z(2), m5, 0
vextractf128 ZH(2), m3, 0
vextractf128 Z(3), m5, 1
vextractf128 ZH(3), m3, 1
ret
align 16
fft32_avx:
call fft16_avx
mova m0, Z(4)
mova m1, Z(5)
T4_SSE m0, m1, m4
mova m2, Z(6)
mova m3, Z(7)
T8_SSE m0, m1, m2, m3, m4, m6
; m0={r0,r1,r2,r3,r8, r9, r10,r11} m1={i0,i1,i2,i3,i8, i9, i10,i11}
; m2={r4,r5,r6,r7,r12,r13,r14,r15} m3={i4,i5,i6,i7,i12,i13,i14,i15}
vperm2f128 m4, m0, m2, 0x20
vperm2f128 m5, m1, m3, 0x20
vperm2f128 m6, m0, m2, 0x31
vperm2f128 m7, m1, m3, 0x31
PASS_SMALL 0, [cos_32], [cos_32+32]
ret
fft32_interleave_avx:
call fft32_avx
mov r2d, 32
.deint_loop:
mova m2, Z(0)
mova m3, Z(1)
vunpcklps m0, m2, m3
vunpckhps m1, m2, m3
vextractf128 Z(0), m0, 0
vextractf128 ZH(0), m1, 0
vextractf128 Z(1), m0, 1
vextractf128 ZH(1), m1, 1
add r0, mmsize*2
sub r2d, mmsize/4
jg .deint_loop
ret
INIT_XMM
%define movdqa movaps
align 16
fft4_avx:
fft4_sse:
mova m0, Z(0)
mova m1, Z(1)
@ -406,6 +511,8 @@ FFT48_3DN _3dn
%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)]
%define Z2(x) [zq + o3q + mmsize*(x&1)]
%define ZH(x) [zq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
%define Z2H(x) [zq + o3q + mmsize*(x&1) + mmsize/2]
%macro DECL_PASS 2+ ; name, payload
align 16
@ -423,8 +530,34 @@ DEFINE_ARGS z, w, n, o1, o3
rep ret
%endmacro
INIT_YMM
%macro INTERL_AVX 5
vunpckhps %3, %2, %1
vunpcklps %2, %2, %1
vextractf128 %4(%5), %2, 0
vextractf128 %4 %+ H(%5), %3, 0
vextractf128 %4(%5 + 1), %2, 1
vextractf128 %4 %+ H(%5 + 1), %3, 1
%endmacro
%define INTERL INTERL_AVX
DECL_PASS pass_avx, PASS_BIG 1
DECL_PASS pass_interleave_avx, PASS_BIG 0
INIT_XMM
%define mova movaps
%macro INTERL_SSE 5
mova %3, %2
unpcklps %2, %1
unpckhps %3, %1
mova %4(%5), %2
mova %4(%5+1), %3
%endmacro
%define INTERL INTERL_SSE
DECL_PASS pass_sse, PASS_BIG 1
DECL_PASS pass_interleave_sse, PASS_BIG 0
@ -457,9 +590,12 @@ DECL_PASS pass_interleave_3dn, PASS_BIG 0
%macro DECL_FFT 2-3 ; nbits, cpu, suffix
%xdefine list_of_fft fft4%2 SECTION_REL, fft8%2 SECTION_REL
%if %1==5
%if %1>=5
%xdefine list_of_fft list_of_fft, fft16%2 SECTION_REL
%endif
%if %1>=6
%xdefine list_of_fft list_of_fft, fft32%3%2 SECTION_REL
%endif
%assign n 1<<%1
%rep 17-%1
@ -492,9 +628,14 @@ section .text
; The others pass args in registers and don't spill anything.
cglobal fft_dispatch%3%2, 2,5,8, z, nbits
FFT_DISPATCH %3%2, nbits
%ifidn %2, _avx
vzeroupper
%endif
RET
%endmacro ; DECL_FFT
DECL_FFT 6, _avx
DECL_FFT 6, _avx, _interleave
DECL_FFT 5, _sse
DECL_FFT 5, _sse, _interleave
DECL_FFT 4, _3dn
@ -533,21 +674,53 @@ INIT_XMM
%endmacro
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
movaps xmm6, [%4+%1*2]
movaps %2, [%4+%1*2+0x10]
movaps %3, xmm6
movaps xmm7, %2
mulps xmm6, [%5+%1]
mulps %2, [%6+%1]
mulps %3, [%6+%1]
mulps xmm7, [%5+%1]
subps %2, xmm6
addps %3, xmm7
mulps m6, %3, [%5+%1]
mulps m7, %2, [%5+%1]
mulps %2, %2, [%6+%1]
mulps %3, %3, [%6+%1]
subps %2, %2, m6
addps %3, %3, m7
%endmacro
%macro POSROTATESHUF_AVX 5 ;j, k, z+n8, tcos+n8, tsin+n8
.post:
vmovaps ymm1, [%3+%1*2]
vmovaps ymm0, [%3+%1*2+0x20]
vmovaps ymm3, [%3+%2*2]
vmovaps ymm2, [%3+%2*2+0x20]
CMUL %1, ymm0, ymm1, %3, %4, %5
CMUL %2, ymm2, ymm3, %3, %4, %5
vshufps ymm1, ymm1, ymm1, 0x1b
vshufps ymm3, ymm3, ymm3, 0x1b
vperm2f128 ymm1, ymm1, ymm1, 0x01
vperm2f128 ymm3, ymm3, ymm3, 0x01
vunpcklps ymm6, ymm2, ymm1
vunpckhps ymm4, ymm2, ymm1
vunpcklps ymm7, ymm0, ymm3
vunpckhps ymm5, ymm0, ymm3
vextractf128 [%3+%1*2], ymm7, 0
vextractf128 [%3+%1*2+0x10], ymm5, 0
vextractf128 [%3+%1*2+0x20], ymm7, 1
vextractf128 [%3+%1*2+0x30], ymm5, 1
vextractf128 [%3+%2*2], ymm6, 0
vextractf128 [%3+%2*2+0x10], ymm4, 0
vextractf128 [%3+%2*2+0x20], ymm6, 1
vextractf128 [%3+%2*2+0x30], ymm4, 1
sub %2, 0x20
add %1, 0x20
jl .post
%endmacro
%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
.post:
movaps xmm1, [%3+%1*2]
movaps xmm0, [%3+%1*2+0x10]
CMUL %1, xmm0, xmm1, %3, %4, %5
movaps xmm5, [%3+%2*2]
movaps xmm4, [%3+%2*2+0x10]
CMUL %2, xmm4, xmm5, %3, %4, %5
shufps xmm1, xmm1, 0x1b
shufps xmm5, xmm5, 0x1b
@ -566,7 +739,8 @@ INIT_XMM
jl .post
%endmacro
cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
%macro DECL_IMDCT 2
cglobal imdct_half%1, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
%ifdef ARCH_X86_64
%define rrevtab r10
%define rtcos r11
@ -641,7 +815,7 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
mov r0, r1
mov r1d, [r5+FFTContext.nbits]
FFT_DISPATCH _sse, r1
FFT_DISPATCH %1, r1
mov r0d, [r5+FFTContext.mdctsize]
add r6, r0
@ -653,14 +827,24 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
mov rtsin, [esp+4]
%endif
neg r0
mov r1, -16
mov r1, -mmsize
sub r1, r0
POSROTATESHUF r0, r1, r6, rtcos, rtsin
%2 r0, r1, r6, rtcos, rtsin
%ifdef ARCH_X86_64
pop r14
pop r13
pop r12
%else
add esp, 12
%endif
%ifidn avx_enabled, 1
vzeroupper
%endif
RET
%endmacro
DECL_IMDCT _sse, POSROTATESHUF
INIT_YMM
DECL_IMDCT _avx, POSROTATESHUF_AVX

@ -28,6 +28,12 @@ DECLARE_ASM_CONST(16, int, ff_m1m1m1m1)[4] =
void ff_fft_dispatch_sse(FFTComplex *z, int nbits);
void ff_fft_dispatch_interleave_sse(FFTComplex *z, int nbits);
void ff_fft_dispatch_interleave_avx(FFTComplex *z, int nbits);
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z)
{
ff_fft_dispatch_interleave_avx(z, s->nbits);
}
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
{
@ -77,7 +83,7 @@ void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input)
long n = s->mdct_size;
long n4 = n >> 2;
ff_imdct_half_sse(s, output+n4, input);
s->imdct_half(s, output + n4, input);
j = -n;
k = n-16;

@ -1,10 +1,11 @@
;*****************************************************************************
;* x86inc.asm
;*****************************************************************************
;* Copyright (C) 2005-2008 x264 project
;* Copyright (C) 2005-2011 x264 project
;*
;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Anton Mitrofanov <BugMaster@narod.ru>
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;*
;* Permission to use, copy, modify, and/or distribute this software for any
;* purpose with or without fee is hereby granted, provided that the above
@ -499,6 +500,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro
%macro INIT_MMX 0
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_MMX
%define mmsize 8
%define num_mmregs 8
@ -520,6 +522,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endmacro
%macro INIT_XMM 0
%assign avx_enabled 0
%define RESET_MM_PERMUTATION INIT_XMM
%define mmsize 16
%define num_mmregs 8
@ -538,6 +541,31 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%endrep
%endmacro
%macro INIT_AVX 0
INIT_XMM
%assign avx_enabled 1
%define PALIGNR PALIGNR_SSSE3
%define RESET_MM_PERMUTATION INIT_AVX
%endmacro
%macro INIT_YMM 0
%assign avx_enabled 1
%define RESET_MM_PERMUTATION INIT_YMM
%define mmsize 32
%define num_mmregs 8
%ifdef ARCH_X86_64
%define num_mmregs 16
%endif
%define mova vmovaps
%define movu vmovups
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE m, %%i, ymm %+ %%i
CAT_XDEFINE nymm, %%i, %%i
%assign %%i %%i+1
%endrep
%endmacro
INIT_MMX
; I often want to use macros that permute their arguments. e.g. there's no
@ -645,3 +673,222 @@ INIT_MMX
sub %1, %2
%endif
%endmacro
;=============================================================================
; AVX abstraction layer
;=============================================================================
%assign i 0
%rep 16
%if i < 8
CAT_XDEFINE sizeofmm, i, 8
%endif
CAT_XDEFINE sizeofxmm, i, 16
CAT_XDEFINE sizeofymm, i, 32
%assign i i+1
%endrep
%undef i
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 0 if 3-operand (xmm, xmm, xmm), 1 if 4-operand (xmm, xmm, xmm, imm)
;%4 == number of operands given
;%5+: operands
%macro RUN_AVX_INSTR 6-7+
%if sizeof%5==32
v%1 %5, %6, %7
%else
%if sizeof%5==8
%define %%regmov movq
%elif %2
%define %%regmov movaps
%else
%define %%regmov movdqa
%endif
%if %4>=3+%3
%ifnidn %5, %6
%if avx_enabled && sizeof%5==16
v%1 %5, %6, %7
%else
%%regmov %5, %6
%1 %5, %7
%endif
%else
%1 %5, %7
%endif
%elif %3
%1 %5, %6, %7
%else
%1 %5, %6
%endif
%endif
%endmacro
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 0 if 3-operand (xmm, xmm, xmm), 1 if 4-operand (xmm, xmm, xmm, imm)
%macro AVX_INSTR 3
%macro %1 2-8 fnord, fnord, fnord, %1, %2, %3
%ifidn %3, fnord
RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
%elifidn %4, fnord
RUN_AVX_INSTR %6, %7, %8, 3, %1, %2, %3
%elifidn %5, fnord
RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
%else
RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
%endif
%endmacro
%endmacro
AVX_INSTR addpd, 1, 0
AVX_INSTR addps, 1, 0
AVX_INSTR addsd, 1, 0
AVX_INSTR addss, 1, 0
AVX_INSTR addsubpd, 1, 0
AVX_INSTR addsubps, 1, 0
AVX_INSTR andpd, 1, 0
AVX_INSTR andps, 1, 0
AVX_INSTR andnpd, 1, 0
AVX_INSTR andnps, 1, 0
AVX_INSTR blendpd, 1, 0
AVX_INSTR blendps, 1, 0
AVX_INSTR blendvpd, 1, 0
AVX_INSTR blendvps, 1, 0
AVX_INSTR cmppd, 1, 0
AVX_INSTR cmpps, 1, 0
AVX_INSTR cmpsd, 1, 0
AVX_INSTR cmpss, 1, 0
AVX_INSTR divpd, 1, 0
AVX_INSTR divps, 1, 0
AVX_INSTR divsd, 1, 0
AVX_INSTR divss, 1, 0
AVX_INSTR dppd, 1, 0
AVX_INSTR dpps, 1, 0
AVX_INSTR haddpd, 1, 0
AVX_INSTR haddps, 1, 0
AVX_INSTR hsubpd, 1, 0
AVX_INSTR hsubps, 1, 0
AVX_INSTR maxpd, 1, 0
AVX_INSTR maxps, 1, 0
AVX_INSTR maxsd, 1, 0
AVX_INSTR maxss, 1, 0
AVX_INSTR minpd, 1, 0
AVX_INSTR minps, 1, 0
AVX_INSTR minsd, 1, 0
AVX_INSTR minss, 1, 0
AVX_INSTR mpsadbw, 0, 1
AVX_INSTR mulpd, 1, 0
AVX_INSTR mulps, 1, 0
AVX_INSTR mulsd, 1, 0
AVX_INSTR mulss, 1, 0
AVX_INSTR orpd, 1, 0
AVX_INSTR orps, 1, 0
AVX_INSTR packsswb, 0, 0
AVX_INSTR packssdw, 0, 0
AVX_INSTR packuswb, 0, 0
AVX_INSTR packusdw, 0, 0
AVX_INSTR paddb, 0, 0
AVX_INSTR paddw, 0, 0
AVX_INSTR paddd, 0, 0
AVX_INSTR paddq, 0, 0
AVX_INSTR paddsb, 0, 0
AVX_INSTR paddsw, 0, 0
AVX_INSTR paddusb, 0, 0
AVX_INSTR paddusw, 0, 0
AVX_INSTR palignr, 0, 1
AVX_INSTR pand, 0, 0
AVX_INSTR pandn, 0, 0
AVX_INSTR pavgb, 0, 0
AVX_INSTR pavgw, 0, 0
AVX_INSTR pblendvb, 0, 0
AVX_INSTR pblendw, 0, 1
AVX_INSTR pcmpestri, 0, 0
AVX_INSTR pcmpestrm, 0, 0
AVX_INSTR pcmpistri, 0, 0
AVX_INSTR pcmpistrm, 0, 0
AVX_INSTR pcmpeqb, 0, 0
AVX_INSTR pcmpeqw, 0, 0
AVX_INSTR pcmpeqd, 0, 0
AVX_INSTR pcmpeqq, 0, 0
AVX_INSTR pcmpgtb, 0, 0
AVX_INSTR pcmpgtw, 0, 0
AVX_INSTR pcmpgtd, 0, 0
AVX_INSTR pcmpgtq, 0, 0
AVX_INSTR phaddw, 0, 0
AVX_INSTR phaddd, 0, 0
AVX_INSTR phaddsw, 0, 0
AVX_INSTR phsubw, 0, 0
AVX_INSTR phsubd, 0, 0
AVX_INSTR phsubsw, 0, 0
AVX_INSTR pmaddwd, 0, 0
AVX_INSTR pmaddubsw, 0, 0
AVX_INSTR pmaxsb, 0, 0
AVX_INSTR pmaxsw, 0, 0
AVX_INSTR pmaxsd, 0, 0
AVX_INSTR pmaxub, 0, 0
AVX_INSTR pmaxuw, 0, 0
AVX_INSTR pmaxud, 0, 0
AVX_INSTR pminsb, 0, 0
AVX_INSTR pminsw, 0, 0
AVX_INSTR pminsd, 0, 0
AVX_INSTR pminub, 0, 0
AVX_INSTR pminuw, 0, 0
AVX_INSTR pminud, 0, 0
AVX_INSTR pmulhuw, 0, 0
AVX_INSTR pmulhrsw, 0, 0
AVX_INSTR pmulhw, 0, 0
AVX_INSTR pmullw, 0, 0
AVX_INSTR pmulld, 0, 0
AVX_INSTR pmuludq, 0, 0
AVX_INSTR pmuldq, 0, 0
AVX_INSTR por, 0, 0
AVX_INSTR psadbw, 0, 0
AVX_INSTR pshufb, 0, 0
AVX_INSTR psignb, 0, 0
AVX_INSTR psignw, 0, 0
AVX_INSTR psignd, 0, 0
AVX_INSTR psllw, 0, 0
AVX_INSTR pslld, 0, 0
AVX_INSTR psllq, 0, 0
AVX_INSTR pslldq, 0, 0
AVX_INSTR psraw, 0, 0
AVX_INSTR psrad, 0, 0
AVX_INSTR psrlw, 0, 0
AVX_INSTR psrld, 0, 0
AVX_INSTR psrlq, 0, 0
AVX_INSTR psrldq, 0, 0
AVX_INSTR psubb, 0, 0
AVX_INSTR psubw, 0, 0
AVX_INSTR psubd, 0, 0
AVX_INSTR psubq, 0, 0
AVX_INSTR psubsb, 0, 0
AVX_INSTR psubsw, 0, 0
AVX_INSTR psubusb, 0, 0
AVX_INSTR psubusw, 0, 0
AVX_INSTR punpckhbw, 0, 0
AVX_INSTR punpckhwd, 0, 0
AVX_INSTR punpckhdq, 0, 0
AVX_INSTR punpckhqdq, 0, 0
AVX_INSTR punpcklbw, 0, 0
AVX_INSTR punpcklwd, 0, 0
AVX_INSTR punpckldq, 0, 0
AVX_INSTR punpcklqdq, 0, 0
AVX_INSTR pxor, 0, 0
AVX_INSTR shufps, 0, 1
AVX_INSTR subpd, 1, 0
AVX_INSTR subps, 1, 0
AVX_INSTR subsd, 1, 0
AVX_INSTR subss, 1, 0
AVX_INSTR unpckhpd, 1, 0
AVX_INSTR unpckhps, 1, 0
AVX_INSTR unpcklpd, 1, 0
AVX_INSTR unpcklps, 1, 0
AVX_INSTR xorpd, 1, 0
AVX_INSTR xorps, 1, 0
; 3DNow instructions, for sharing code between AVX, SSE and 3DN
AVX_INSTR pfadd, 1, 0
AVX_INSTR pfsub, 1, 0
AVX_INSTR pfmul, 1, 0

@ -26,8 +26,8 @@
#include "libavutil/samplefmt.h"
#define LIBAVFILTER_VERSION_MAJOR 2
#define LIBAVFILTER_VERSION_MINOR 0
#define LIBAVFILTER_VERSION_MICRO 0
#define LIBAVFILTER_VERSION_MINOR 3
#define LIBAVFILTER_VERSION_MICRO 1
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
LIBAVFILTER_VERSION_MINOR, \
@ -115,7 +115,7 @@ typedef struct AVFilterBufferRefVideoProps {
AVRational pixel_aspect; ///< pixel aspect ratio
int interlaced; ///< is frame interlaced
int top_field_first; ///< field order
int pict_type; ///< Picture type of the frame
enum AVPictureType pict_type; ///< picture type of the frame
int key_frame; ///< 1 -> keyframe, 0-> not
} AVFilterBufferRefVideoProps;

@ -75,8 +75,8 @@ typedef struct {
int input_is_pal; ///< set to 1 if the input format is paletted
int interlaced;
char w_expr[256]; ///< width expression string
char h_expr[256]; ///< height expression string
char w_expr[256]; ///< width expression string
char h_expr[256]; ///< height expression string
} ScaleContext;
static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)

@ -1,7 +1,6 @@
/*
* Copyright (C) 2006-2010 Michael Niedermayer <michaelni@gmx.at>
* 2010 James Darnley <james.darnley@gmail.com>
* This file is part of FFmpeg.
* 2010 James Darnley <james.darnley@gmail.com>
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

@ -131,7 +131,7 @@ static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
(n = sscanf(args, "%d:%d:%127[^:]:%d:%d:%d:%d", &c->w, &c->h, pix_fmt_str,
&c->time_base.num, &c->time_base.den,
&c->pixel_aspect.num, &c->pixel_aspect.den)) != 7) {
av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but %d found in '%s'\n", n, args);
av_log(ctx, AV_LOG_ERROR, "Expected 7 arguments, but only %d found in '%s'\n", n, args);
return AVERROR(EINVAL);
}
if ((c->pix_fmt = av_get_pix_fmt(pix_fmt_str)) == PIX_FMT_NONE) {

@ -175,7 +175,6 @@ static void build_frame_code(AVFormatContext *s){
}
key_frame= intra_only;
#if 1
if(is_audio){
int frame_bytes= codec->frame_size*(int64_t)codec->bit_rate / (8*codec->sample_rate);
int pts;
@ -199,7 +198,6 @@ static void build_frame_code(AVFormatContext *s){
ft->pts_delta=1;
start2++;
}
#endif
if(codec->has_b_frames){
pred_count=5;

@ -40,7 +40,7 @@
#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
#define LIBAVUTIL_VERSION_MAJOR 51
#define LIBAVUTIL_VERSION_MINOR 0
#define LIBAVUTIL_VERSION_MINOR 1
#define LIBAVUTIL_VERSION_MICRO 0
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
@ -97,6 +97,25 @@ enum AVMediaType {
#define AV_TIME_BASE 1000000
#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
enum AVPictureType {
AV_PICTURE_TYPE_I = 1, ///< Intra
AV_PICTURE_TYPE_P, ///< Predicted
AV_PICTURE_TYPE_B, ///< Bi-dir predicted
AV_PICTURE_TYPE_S, ///< S(GMC)-VOP MPEG4
AV_PICTURE_TYPE_SI, ///< Switching Intra
AV_PICTURE_TYPE_SP, ///< Switching Predicted
AV_PICTURE_TYPE_BI, ///< BI type
};
/**
* Return a single letter to describe the given picture type
* pict_type.
*
* @param[in] pict_type the picture type @return a single character
* representing the picture type, '?' if pict_type is unknown
*/
char av_get_picture_type_char(enum AVPictureType pict_type);
#include "common.h"
#include "error.h"
#include "mathematics.h"

@ -69,21 +69,21 @@ void *av_malloc(size_t size)
#endif
/* let's disallow possible ambiguous cases */
if(size > (INT_MAX-16) )
if(size > (INT_MAX-32) )
return NULL;
#if CONFIG_MEMALIGN_HACK
ptr = malloc(size+16);
ptr = malloc(size+32);
if(!ptr)
return ptr;
diff= ((-(long)ptr - 1)&15) + 1;
diff= ((-(long)ptr - 1)&31) + 1;
ptr = (char*)ptr + diff;
((char*)ptr)[-1]= diff;
#elif HAVE_POSIX_MEMALIGN
if (posix_memalign(&ptr,16,size))
if (posix_memalign(&ptr,32,size))
ptr = NULL;
#elif HAVE_MEMALIGN
ptr = memalign(16,size);
ptr = memalign(32,size);
/* Why 64?
Indeed, we should align it:
on 4 for 386
@ -93,10 +93,8 @@ void *av_malloc(size_t size)
Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here!
*/
/* Why 16?
Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
it will just trigger an exception and the unaligned load will be done in the
exception handler or it will just segfault (SSE2 on P4).
/* Why 32?
For AVX ASM. SSE / NEON needs only 16.
Why not larger? Because I did not see a difference in benchmarks ...
*/
/* benchmarks with P3

@ -218,7 +218,6 @@ int main(void){
printf("\n");
}
#if 1
for(i=0; i<LEN; i++){
double v[LEN];
double error=0;
@ -233,7 +232,7 @@ int main(void){
printf("%f ", error);
}
printf("\n");
#endif
for(i=0; i<LEN; i++){
for(j=0; j<LEN; j++){
printf("%9.6f ", eigenvector[i + j*LEN]);

@ -39,3 +39,17 @@ const char *avutil_license(void)
#define LICENSE_PREFIX "libavutil license: "
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}
char av_get_picture_type_char(enum AVPictureType pict_type)
{
switch (pict_type) {
case AV_PICTURE_TYPE_I: return 'I';
case AV_PICTURE_TYPE_P: return 'P';
case AV_PICTURE_TYPE_B: return 'B';
case AV_PICTURE_TYPE_S: return 'S';
case AV_PICTURE_TYPE_SI: return 'i';
case AV_PICTURE_TYPE_SP: return 'p';
case AV_PICTURE_TYPE_BI: return 'b';
default: return '?';
}
}

@ -1,2 +1,2 @@
5ddb6d25dd117db29627f9d286153a7a *./tests/data/acodec/ac3.rm
0f14801e166819dd4a58981aea36e08b *./tests/data/acodec/ac3.rm
98751 ./tests/data/acodec/ac3.rm

@ -1,2 +1,2 @@
a1c71456f21d5459d2824d75bbdcc80c *./tests/data/lavf/lavf.rm
2e3d6b1944c6cd2cf14e13055aecf82a *./tests/data/lavf/lavf.rm
346706 ./tests/data/lavf/lavf.rm

@ -11,7 +11,8 @@ ret:-1 st:-1 flags:1 ts: 1.470835
ret:-1 st: 0 flags:0 ts: 0.365000
ret: 0 st: 0 flags:1 ts:-0.741000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:0 ts: 2.153336
ret: 0 st:-1 flags:0 ts: 2.153336
ret: 0 st: 0 flags:1 dts: 2.159000 pts: 2.159000 pos: 35567 size: 556
ret:-1 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:0 ts:-0.058000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556

Loading…
Cancel
Save