From 2e1866011591deab0fdeef23e0892589852a4a42 Mon Sep 17 00:00:00 2001
From: Jason Garrett-Glaser <darkshikari@gmail.com>
Date: Fri, 14 Jan 2011 22:23:42 +0000
Subject: [PATCH] Fix SVQ3 Regression in r26336-7.

Originally committed as revision 26341 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/dsputil.h |  2 +-
 libavcodec/h264.c    | 20 +++++++++++---------
 libavcodec/svq3.c    | 20 +++++++++++---------
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 0efbad918a..e6b50e9c12 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -65,7 +65,7 @@ void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block,
 void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 
 void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
-void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
+void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 
 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index bd13e1d847..d48f31e5f4 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1203,8 +1203,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                 }
             }else{
                 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-                if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
-                    if(is_h264){
+                if(is_h264){
+                    if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
                         if(!transform_bypass)
                             h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
                         else{
@@ -1213,9 +1213,9 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                             for(i = 0; i < 16; i++)
                                 h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
                         }
-                    }else
-                        ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
-                }
+                    }
+                }else
+                    ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
             }
             if(h->deblocking_filter)
                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
@@ -1283,15 +1283,17 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                     }
                 }
             }else{
-                if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
-                    chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
-                if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
-                    chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                 if(is_h264){
+                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
+                        chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
+                        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     h->h264dsp.h264_idct_add8(dest, block_offset,
                                               h->mb, uvlinesize,
                                               h->non_zero_count_cache);
                 }else{
+                    chroma_dc_dequant_idct_c(h->mb + 16*16     , h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+                    chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
                     for(i=16; i<16+8; i++){
                         if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
                             uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 6d10fc5ae5..4a4a1c52cd 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -126,19 +126,21 @@ static const uint32_t svq3_dequant_coeff[32] = {
 };
 
 
-void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp)
+void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp)
 {
     const int qmul = svq3_dequant_coeff[qp];
 #define stride 16
     int i;
     int temp[16];
     static const int x_offset[4] = {0, 1*stride, 4* stride,  5*stride};
+    static const int y_offset[4] = {0, 2*stride, 8* stride, 10*stride};
 
     for (i = 0; i < 4; i++){
-        const int z0= 13*(input[4*i+0] +    input[4*i+1]);
-        const int z1= 13*(input[4*i+0] -    input[4*i+1]);
-        const int z2=  7* input[4*i+2] - 17*input[4*i+3];
-        const int z3= 17* input[4*i+2] +  7*input[4*i+3];
+        const int offset = y_offset[i];
+        const int z0 = 13*(block[offset+stride*0] +    block[offset+stride*4]);
+        const int z1 = 13*(block[offset+stride*0] -    block[offset+stride*4]);
+        const int z2 =  7* block[offset+stride*1] - 17*block[offset+stride*5];
+        const int z3 = 17* block[offset+stride*1] +  7*block[offset+stride*5];
 
         temp[4*i+0] = z0+z3;
         temp[4*i+1] = z1+z2;
@@ -153,10 +155,10 @@ void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp)
         const int z2 =  7* temp[4*1+i] - 17*temp[4*3+i];
         const int z3 = 17* temp[4*1+i] +  7*temp[4*3+i];
 
-        output[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20;
-        output[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20;
-        output[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20;
-        output[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20;
+        block[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20;
+        block[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20;
+        block[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20;
+        block[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20;
     }
 }
 #undef stride