diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 9811df73e3..f45d051e21 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -83,9 +83,24 @@ static UINT8 simple_mmx_permutation[64]={ 0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F, }; +/* used to skip zeros at the end */ +UINT8 zigzag_end[64]; + UINT8 permutation[64]; //UINT8 invPermutation[64]; +static void build_zigzag_end() +{ + int lastIndex; + int lastIndexAfterPerm=0; + for(lastIndex=0; lastIndex<64; lastIndex++) + { + if(zigzag_direct[lastIndex] > lastIndexAfterPerm) + lastIndexAfterPerm= zigzag_direct[lastIndex]; + zigzag_end[lastIndex]= lastIndexAfterPerm + 1; + } +} + void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) { DCTELEM *p; @@ -509,4 +524,6 @@ void dsputil_init(void) block_permute(default_intra_matrix); block_permute(default_non_intra_matrix); } + + build_zigzag_end(); } diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c index b34d4eb038..f2fc528b0c 100644 --- a/libavcodec/i386/mpegvideo_mmx.c +++ b/libavcodec/i386/mpegvideo_mmx.c @@ -17,11 +17,14 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Optimized for ia32 cpus by Nick Kurshev + * h263 dequantizer by Michael Niedermayer */ #include "../dsputil.h" #include "../mpegvideo.h" +extern UINT8 zigzag_end[64]; + #if 0 /* XXX: GL: I don't understand why this function needs optimization @@ -69,8 +72,8 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000 static void dct_unquantize_h263_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int i, level, qmul, qadd; - + int i, level, qmul, qadd, nCoeffs; + qmul = s->qscale << 1; qadd = (s->qscale - 1) | 1; @@ -91,10 +94,12 @@ static void dct_unquantize_h263_mmx(MpegEncContext *s, block[i] = level; } } + nCoeffs=64; } else { i = 0; + nCoeffs= zigzag_end[ s->block_last_index[n] ]; } - +//printf("%d %d ", qmul, qadd); asm volatile( "movd %1, %%mm6 \n\t" //qmul "packssdw %%mm6, %%mm6 \n\t" @@ -138,9 +143,8 @@ asm volatile( "movq %%mm1, 8(%0, %3) \n\t" "addl $16, %3 \n\t" - "cmpl $128, %3 \n\t" - "jb 1b \n\t" - ::"r" (block), "g"(qmul), "g" (qadd), "r" (2*i) + "js 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs)) : "memory" ); } @@ -178,17 +182,22 @@ asm volatile( static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int i, level; + int i, level, nCoeffs; const UINT16 *quant_matrix; + + if(s->alternate_scan) nCoeffs= 64; + else nCoeffs= nCoeffs= zigzag_end[ s->block_last_index[n] ]; + if (s->mb_intra) { if (n < 4) block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; - if (s->out_format == FMT_H263) { + /* isnt used anymore (we have a h263 unquantizer since some time) + if (s->out_format == FMT_H263) { i = 1; goto unquant_even; - } + }*/ /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; i=1; @@ -214,7 +223,7 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, "packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */ "pxor %%mm6, %%mm6\n\t" ::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory"); - for(;i<64;i+=4) { + for(;iblock_last_index[i] >= 0) { if (!s->mpeg2) - s->dct_unquantize(s, block, i, s->qscale); + if(s->encoding || s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MSMPEG4) + s->dct_unquantize(s, block, i, s->qscale); ff_idct (block); add_pixels_clamped(block, dest, line_size); } @@ -1206,9 +1209,12 @@ static int dct_quantize_mmx(MpegEncContext *s, static void dct_unquantize_mpeg1_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int i, level; + int i, level, nCoeffs; const UINT16 *quant_matrix; + if(s->alternate_scan) nCoeffs= 64; + else nCoeffs= s->block_last_index[n]+1; + if (s->mb_intra) { if (n < 4) block[0] = block[0] * s->y_dc_scale; @@ -1216,47 +1222,49 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s, block[0] = block[0] * s->c_dc_scale; /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; - for(i=1;i<64;i++) { - level = block[i]; + for(i=1;i> 3; + level = (int)(level * qscale * quant_matrix[j]) >> 3; level = (level - 1) | 1; level = -level; } else { - level = (int)(level * qscale * quant_matrix[i]) >> 3; + level = (int)(level * qscale * quant_matrix[j]) >> 3; level = (level - 1) | 1; } #ifdef PARANOID if (level < -2048 || level > 2047) fprintf(stderr, "unquant error %d %d\n", i, level); #endif - block[i] = level; + block[j] = level; } } } else { i = 0; quant_matrix = s->non_intra_matrix; - for(;i<64;i++) { - level = block[i]; + for(i=1;i> 4; + ((int) (quant_matrix[j]))) >> 4; level = (level - 1) | 1; level = -level; } else { level = (((level << 1) + 1) * qscale * - ((int) (quant_matrix[i]))) >> 4; + ((int) (quant_matrix[j]))) >> 4; level = (level - 1) | 1; } #ifdef PARANOID if (level < -2048 || level > 2047) fprintf(stderr, "unquant error %d %d\n", i, level); #endif - block[i] = level; + block[j] = level; } } } @@ -1266,6 +1274,7 @@ static void dct_unquantize_h263_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, qmul, qadd; + int nCoeffs; if (s->mb_intra) { if (n < 4) @@ -1273,14 +1282,16 @@ static void dct_unquantize_h263_c(MpegEncContext *s, else block[0] = block[0] * s->c_dc_scale; i = 1; + nCoeffs= 64; //does not allways use zigzag table } else { i = 0; + nCoeffs= zigzag_end[ s->block_last_index[n] ]; } qmul = s->qscale << 1; qadd = (s->qscale - 1) | 1; - for(;i<64;i++) { + for(;ipict_type = get_bits(&s->gb, 2) + 1; if (s->pict_type != I_TYPE && @@ -642,6 +643,7 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) code = get_bits(&s->gb, 5); /* 0x17: one slice, 0x18: three slices */ /* XXX: implement it */ + //printf("%d %d %d\n", code, s->slice_height, s->first_slice_line); if (code < 0x17) return -1; s->slice_height = s->mb_height / (code - 0x16); @@ -650,6 +652,11 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) s->dc_table_index = get_bits1(&s->gb); s->no_rounding = 1; +/* printf(" %d %d %d %d \n", + s->qscale, + s->rl_chroma_table_index, + s->rl_table_index, + s->dc_table_index);*/ } else { s->use_skip_mb_code = get_bits1(&s->gb); @@ -659,7 +666,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) s->dc_table_index = get_bits1(&s->gb); s->mv_table_index = get_bits1(&s->gb); - s->no_rounding ^= 1; +/* printf(" %d %d %d %d %d \n", + s->use_skip_mb_code, + s->rl_table_index, + s->rl_chroma_table_index, + s->dc_table_index, + s->mv_table_index);*/ + if(weirdAl) + s->no_rounding = 0; + else + s->no_rounding ^= 1; } #ifdef DEBUG printf("*****frame %d:\n", frame_count++); @@ -785,8 +801,12 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int dc_pred_dir; RLTable *rl; const UINT8 *scan_table; + int qmul, qadd; if (s->mb_intra) { + qmul=1; + qadd=0; + /* DC coef */ set_stat(ST_DC); level = msmpeg4_decode_dc(s, n, &dc_pred_dir); @@ -798,6 +818,7 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, } else { rl = &rl_table[3 + s->rl_chroma_table_index]; } + run_diff = 0; i = 1; if (!coded) { @@ -813,6 +834,8 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, } set_stat(ST_INTRA_AC); } else { + qmul = s->qscale << 1; + qadd = (s->qscale - 1) | 1; i = 0; rl = &rl_table[3 + s->rl_table_index]; run_diff = 1; @@ -837,13 +860,15 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, run = get_bits(&s->gb, 6); level = get_bits(&s->gb, 8); level = (level << 24) >> 24; /* sign extend */ + if(level>0) level= level * qmul + qadd; + else level= level * qmul - qadd; } else { /* second escape */ code = get_vlc(&s->gb, &rl->vlc); if (code < 0 || code >= rl->n) return -1; run = rl->table_run[code]; - level = rl->table_level[code]; + level = rl->table_level[code] * qmul + qadd; last = code >= rl->last; run += rl->max_run[last][level] + run_diff; if (get_bits1(&s->gb)) @@ -858,12 +883,13 @@ static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, level = rl->table_level[code]; last = code >= rl->last; level += rl->max_level[last][run]; + level= level * qmul + qadd; if (get_bits1(&s->gb)) level = -level; } } else { run = rl->table_run[code]; - level = rl->table_level[code]; + level = rl->table_level[code] * qmul + qadd; last = code >= rl->last; if (get_bits1(&s->gb)) level = -level;