optimize block_permute()

optimize dct_quantize_c() dont permute s->q_inter/intra_matrix Originally committed as revision 1067 to svn://svn.ffmpeg.org/ffmpeg/trunk
22 years ago · 7801d21d13
parent 4a3d7fbcbc
commit 7801d21d13
4 changed files with 32 additions and 18 deletions
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@ -1553,16 +1553,25 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
    return s;
 }
-/* permute block according so that it corresponds to the MMX idct
+void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
   order */
 void block_permute(INT16 *block, UINT8 *permutation)
 {
    int i;
    INT16 temp[64];
-	for(i=0; i<64; i++) temp[ permutation[i] ] = block[i];
+    if(last<=0) return;
    if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
-	for(i=0; i<64; i++) block[i] = temp[i];
+    for(i=0; i<=last; i++){
        const int j= scantable[i];
        temp[j]= block[j];
        block[j]=0;
    }
    for(i=0; i<=last; i++){
        const int j= scantable[i];
        const int perm_j= permutation[j];
        block[perm_j]= temp[j];
    }
 }
 void clear_blocks_c(DCTELEM *blocks)
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@ -115,7 +115,11 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
-void block_permute(INT16 *block, UINT8 *permutation);
+/**
 * permute block according to permuatation.
 * @param last last non zero element in scantable order
 */
 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
 #if defined(HAVE_MMX)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@ -94,7 +94,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
-                qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / 
+                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / 
                                (qscale * quant_matrix[j]));
            }
        } else if (s->fdct == fdct_ifast) {
@ -105,7 +105,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
-                qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / 
+                qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / 
                                (aanscales[i] * qscale * quant_matrix[j]));
            }
        } else {
@ -139,6 +139,8 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scanta
    int i;
    int end;
    st->scantable= src_scantable;
    for(i=0; i<64; i++){
        int j;
        j = src_scantable[i];
@ -2968,18 +2970,13 @@ static int dct_quantize_c(MpegEncContext *s,
 {
    int i, j, level, last_non_zero, q;
    const int *qmat;
    const UINT8 *scantable= s->intra_scantable.scantable;
    int bias;
    int max=0;
    unsigned int threshold1, threshold2;
    s->fdct (block);
 #ifndef ARCH_ALPHA              /* Alpha uses unpermuted matrix */
    /* we need this permutation so that we correct the IDCT
       permutation. will be moved into DCT code */
    block_permute(block, s->idct_permutation); //FIXME remove
 #endif
    if (s->mb_intra) {
        if (!s->h263_aic) {
            if (n < 4)
@ -3007,7 +3004,7 @@ static int dct_quantize_c(MpegEncContext *s,
    threshold2= (threshold1<<1);
    for(;i<64;i++) {
-        j = s->intra_scantable.permutated[i];
+        j = scantable[i];
        level = block[j];
        level = level * qmat[j];
@ -3029,6 +3026,9 @@ static int dct_quantize_c(MpegEncContext *s,
    }
    *overflow= s->max_qcoeff < max; //overflow might have happend
    /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
    ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
    return last_non_zero;
 }
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@ -100,6 +100,7 @@ typedef struct ReorderBuffer{
 } ReorderBuffer;
 typedef struct ScanTable{
    const UINT8 *scantable;
    UINT8 permutated[64];
    UINT8 raster_end[64];
 } ScanTable;