diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 155e5e32ee..a5d71c2773 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -5,8 +5,8 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4647 -#define LIBAVCODEC_BUILD_STR "4647" +#define LIBAVCODEC_BUILD 4648 +#define LIBAVCODEC_BUILD_STR "4648" enum CodecID { CODEC_ID_NONE, @@ -146,6 +146,7 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG, #define CODEC_FLAG_INTERLACED_DCT 0x00040000 /* use interlaced dct */ #define CODEC_FLAG_LOW_DELAY 0x00080000 /* force low delay / will fail on b frames */ #define CODEC_FLAG_ALT_SCAN 0x00100000 /* use alternate scan */ +#define CODEC_FLAG_TRELLIS_QUANT 0x00200000 /* use trellis quantization */ /* codec capabilities */ diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index c48c71119b..80ff8f4022 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -1467,10 +1467,10 @@ static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){ return pix_abs8x8_c(a,b,stride); } -void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) +void ff_block_permute(DCTELEM *block, UINT8 *permutation, const UINT8 *scantable, int last) { int i; - INT16 temp[64]; + DCTELEM temp[64]; if(last<=0) return; //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms diff --git a/libavcodec/h263.c b/libavcodec/h263.c index 9d06581bf0..c1bfbaf64d 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -82,12 +82,13 @@ static UINT16 (*mv_penalty)[MAX_MV*2+1]= NULL; static UINT8 fcode_tab[MAX_MV*2+1]; static UINT8 umv_fcode_tab[MAX_MV*2+1]; -static UINT32 uni_mpeg4_intra_rl_bits[64*64*2*2]; -static UINT8 uni_mpeg4_intra_rl_len [64*64*2*2]; -static UINT32 uni_mpeg4_inter_rl_bits[64*64*2*2]; -static UINT8 uni_mpeg4_inter_rl_len [64*64*2*2]; -#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level)) +static uint32_t uni_mpeg4_intra_rl_bits[64*64*2*2]; +static uint8_t uni_mpeg4_intra_rl_len [64*64*2*2]; +static uint32_t uni_mpeg4_inter_rl_bits[64*64*2*2]; +static uint8_t uni_mpeg4_inter_rl_len [64*64*2*2]; +//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level)) //#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64) +#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run)*128 + (level)) /* mpeg4 inter @@ -1441,6 +1442,11 @@ void h263_encode_init(MpegEncContext *s) s->fcode_tab= fcode_tab; s->min_qcoeff= -2048; s->max_qcoeff= 2047; + s->intra_ac_vlc_length = uni_mpeg4_intra_rl_len; + s->intra_ac_vlc_last_length= uni_mpeg4_intra_rl_len + 128*64; + s->inter_ac_vlc_length = uni_mpeg4_inter_rl_len; + s->inter_ac_vlc_last_length= uni_mpeg4_inter_rl_len + 128*64; + s->ac_esc_length= 7+2+1+6+1+12+1; break; case CODEC_ID_H263P: s->fcode_tab= umv_fcode_tab; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 7c5cf59c87..eb12efc9dd 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -41,6 +41,7 @@ static void dct_unquantize_h263_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w); static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); +static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c; @@ -122,7 +123,8 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 */ - qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); + qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); +// qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; @@ -228,6 +230,10 @@ int DCT_common_init(MpegEncContext *s) MPV_common_init_ppc(s); #endif + if(s->flags&CODEC_FLAG_TRELLIS_QUANT){ + s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_* + } + switch(s->idct_permutation_type){ case FF_NO_IDCT_PERM: for(i=0; i<64; i++) @@ -3253,6 +3259,251 @@ static void encode_picture(MpegEncContext *s, int picture_number) } } +static int dct_quantize_trellis_c(MpegEncContext *s, + DCTELEM *block, int n, + int qscale, int *overflow){ + const int *qmat; + const UINT8 *scantable= s->intra_scantable.scantable; + int max=0; + unsigned int threshold1, threshold2; + int bias=0; + int run_tab[65]; + int last_run[65]; + int level_tab[65]; + int last_level[65]; + int score_tab[65]; + int last_score[65]; + int coeff[4][64]; + int coeff_count[64]; + int lambda, qmul, qadd, start_i, best_i, best_score, last_non_zero, i; + const int esc_length= s->ac_esc_length; + uint8_t * length; + uint8_t * last_length; + + s->fdct (block); + + qmul= qscale*16; + qadd= ((qscale-1)|1)*8; + + if (s->mb_intra) { + int q; + if (!s->h263_aic) { + if (n < 4) + q = s->y_dc_scale; + else + q = s->c_dc_scale; + q = q << 3; + } else{ + /* For AIC we skip quant/dequant of INTRADC */ + q = 1 << 3; + qadd=0; + } + + /* note: block[0] is assumed to be positive */ + block[0] = (block[0] + (q >> 1)) / q; + start_i = 1; + last_non_zero = 0; + qmat = s->q_intra_matrix[qscale]; + if(s->mpeg_quant) + bias= 1<<(QMAT_SHIFT-1); + length = s->intra_ac_vlc_length; + last_length= s->intra_ac_vlc_last_length; + } else { + start_i = 0; + last_non_zero = -1; + qmat = s->q_inter_matrix[qscale]; + length = s->inter_ac_vlc_length; + last_length= s->inter_ac_vlc_last_length; + } + + threshold1= (1<= (1<<(QMAT_SHIFT - 3)) +// || bias-level >= (1<<(QMAT_SHIFT - 3))){ + if(((unsigned)(level+threshold1))>threshold2){ + if(level>0){ + level= (bias + level)>>QMAT_SHIFT; + coeff[0][k]= level; + coeff[1][k]= level-1; + coeff[2][k]= level-2; + coeff[3][k]= level-3; + coeff_count[k]= FFMIN(level, 4); + }else{ + level= (bias - level)>>QMAT_SHIFT; + coeff[0][k]= -level; + coeff[1][k]= -level+1; + coeff[2][k]= -level+2; + coeff[3][k]= -level+3; + coeff_count[k]= FFMIN(level, 4); + } + max |=level; + last_non_zero = i; + }else{ + if(level < 0) + coeff[0][k]= -1; + else + coeff[0][k]= 1; + coeff_count[k]= 1; + } + } + + *overflow= s->max_qcoeff < max; //overflow might have happend + + if(last_non_zero < start_i){ + memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); + return last_non_zero; + } + + lambda= (qscale*qscale*64*82 + 50)/100; //FIXME finetune + + score_tab[0]= + last_score[0]= 0; +//printf("qscale:%d\n", qscale); + for(i=0; i<=last_non_zero - start_i; i++){ + int level_index, run, j; + const int dct_coeff= block[ scantable[i + start_i] ]; + const int zero_distoration= dct_coeff*dct_coeff; + int best_score=256*256*256*120, best_last_score= 256*256*256*120; + +//printf("%2d %5d ", i, dct_coeff); + + for(level_index=0; level_index < coeff_count[i]; level_index++){ + int distoration; + int level= coeff[level_index][i]; + int unquant_coeff; + + assert(level); + + if(s->out_format == FMT_H263){ + if(level>0){ + unquant_coeff= level*qmul + qadd; + }else{ + unquant_coeff= level*qmul - qadd; + } + } //FIXME else +//printf("(%d %d) ", level, unquant_coeff); + distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff); + + level+=64; + if((level&(~127)) == 0){ + for(run=0; run<=i; run++){ + int score= distoration + length[UNI_ENC_INDEX(run, level)]*lambda; + score += score_tab[i-run]; + + if(score < best_score){ + best_score= + score_tab[i+1]= score; + run_tab[i+1]= run; + level_tab[i+1]= level-64; + } + } + + if(s->out_format == FMT_H263){ + for(run=0; run<=i; run++){ + int score= distoration + last_length[UNI_ENC_INDEX(run, level)]*lambda; + score += score_tab[i-run]; + if(score < best_last_score){ + best_last_score= + last_score[i+1]= score; + last_run[i+1]= run; + last_level[i+1]= level-64; + } + } + } + }else{ + distoration += esc_length*lambda; + for(run=0; run<=i; run++){ + int score= distoration + score_tab[i-run]; + + if(score < best_score){ + best_score= + score_tab[i+1]= score; + run_tab[i+1]= run; + level_tab[i+1]= level-64; + } + } + + if(s->out_format == FMT_H263){ + for(run=0; run<=i; run++){ + int score= distoration + score_tab[i-run]; + if(score < best_last_score){ + best_last_score= + last_score[i+1]= score; + last_run[i+1]= run; + last_level[i+1]= level-64; + } + } + } + } + } + + for(j=0; j<=i; j++){ + score_tab[j] += zero_distoration; +// printf("%6d ", score_tab[j]); + } +// printf("%6d ", score_tab[j]); + +// printf("last: "); + if(s->out_format == FMT_H263){ + for(j=0; j<=i; j++){ + last_score[j] += zero_distoration; +// printf("%6d ", last_score[j]); + } +// printf("%6d ", last_score[j]); + } +// printf("\n"); + } + + if(s->out_format != FMT_H263){ + for(i=0; i<=last_non_zero - start_i + 1; i++){ + last_score[i]= score_tab[i]; + if(i) last_score[i] += lambda*2; //FIXME exacter? + last_run[i]= run_tab[i]; + last_level[i]= level_tab[i]; + } + } + + //FIXME add some cbp penalty + best_i= 0; + best_score= 256*256*256*120; + for(i=0; i<=last_non_zero - start_i + 1; i++){ + int score= last_score[i]; + if(score < best_score){ + best_score= score; + best_i= i; + } + } + + last_non_zero= best_i - 1 + start_i; + memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM)); + + if(last_non_zero < start_i) + return last_non_zero; + + i= best_i; +//printf("%d %d %d %d %d\n", last_level[i], i, start_i, last_non_zero, best_score); + assert(last_level[i]); +//FIXME use permutated scantable + block[ s->idct_permutation[ scantable[last_non_zero] ] ]= last_level[i]; + i -= last_run[i] + 1; + + for(;i>0 ; i -= run_tab[i] + 1){ + const int j= s->idct_permutation[ scantable[i - 1 + start_i] ]; + + block[j]= level_tab[i]; + assert(block[j]); + } + + return last_non_zero; +} + static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow) diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 65c8011ed7..e6af7fa287 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -307,6 +307,13 @@ typedef struct MpegEncContext { int inter_quant_bias; /* bias for the quantizer */ int min_qcoeff; /* minimum encodable coefficient */ int max_qcoeff; /* maximum encodable coefficient */ + int ac_esc_length; /* num of bits needed to encode the longest esc */ + uint8_t *intra_ac_vlc_length; + uint8_t *intra_ac_vlc_last_length; + uint8_t *inter_ac_vlc_length; + uint8_t *inter_ac_vlc_last_length; +#define UNI_ENC_INDEX(run,level) ((run)*128 + (level)) + /* precomputed matrix (combine qscale and DCT renorm) */ int __align8 q_intra_matrix[32][64]; int __align8 q_inter_matrix[32][64];