From 91029be79005b17311fcf75f3f9f977f4d20cfd7 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 19 Apr 2002 03:25:20 +0000
Subject: [PATCH] much better ME for b frames (a bit slow though) fixed MC
 rounding for b frames fixed hq mode with b-frames

Originally committed as revision 406 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/common.h     |  10 ++
 libavcodec/h263.c       |  56 ++++++---
 libavcodec/motion_est.c | 269 ++++++++++++++++++++++++++++++++++++----
 libavcodec/mpegvideo.c  | 194 ++++++++++++++++++++++-------
 libavcodec/mpegvideo.h  |  11 +-
 5 files changed, 446 insertions(+), 94 deletions(-)

diff --git a/libavcodec/common.h b/libavcodec/common.h
index 96b545d0dc..21cdd541b4 100644
--- a/libavcodec/common.h
+++ b/libavcodec/common.h
@@ -881,6 +881,16 @@ static inline int mid_pred(int a, int b, int c)
     return a + b + c - vmin - vmax;
 }
 
+static inline int clip(int a, int amin, int amax)
+{
+    if (a < amin)
+        return amin;
+    else if (a > amax)
+        return amax;
+    else
+        return a;
+}
+
 /* memory */
 void *av_mallocz(int size);
 
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index bcb9ea2508..42ae076ce4 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -18,7 +18,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * ac prediction encoding by Michael Niedermayer <michaelni@gmx.at>
+ * ac prediction encoding & b-frame support by Michael Niedermayer <michaelni@gmx.at>
  */
 #include "common.h"
 #include "dsputil.h"
@@ -282,7 +282,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->mv[0][0][1]= 
                 s->mv[1][0][0]= 
                 s->mv[1][0][1]= 0;
-//                s->mv_dir= MV_DIR_FORWARD; //doesnt matter
+                s->mv_dir= MV_DIR_FORWARD; //doesnt matter
                 return;
             }
 
@@ -334,7 +334,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->last_mv[0][0][0]= motion_x;
                 s->last_mv[0][0][1]= motion_y;
                 break;
-            default: 
+            default:
+                printf("unknown mb type\n");
                 return;
             }
             bits= get_bit_count(&s->pb);
@@ -959,6 +960,31 @@ static void put_string(PutBitContext * pbc, char *s)
     put_bits(pbc, 8, 0);
 }
 
+/* must be called before writing the header */
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
+    int time_div, time_mod;
+
+    if(s->pict_type==I_TYPE){ //we will encode a vol header
+        s->time_increment_resolution= s->frame_rate/ff_gcd(s->frame_rate, FRAME_RATE_BASE);
+        if(s->time_increment_resolution>=256*256) s->time_increment_resolution= 256*128;
+
+        s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+    }
+
+    s->time= picture_number*(int64_t)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
+    time_div= s->time/s->time_increment_resolution;
+    time_mod= s->time%s->time_increment_resolution;
+
+    if(s->pict_type==B_TYPE){
+        s->bp_time= s->last_non_b_time - s->time;
+    }else{
+        s->last_time_base= s->time_base;
+        s->time_base= time_div;
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
+    }
+}
+
 static void mpeg4_encode_vol_header(MpegEncContext * s)
 {
     int vo_ver_id=1; //must be 2 if we want GMC or q-pel
@@ -983,11 +1009,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
     put_bits(&s->pb, 2, RECT_SHAPE);	/* vol shape= rectangle */
     put_bits(&s->pb, 1, 1);		/* marker bit */
     
-    s->time_increment_resolution= s->frame_rate/ff_gcd(s->frame_rate, FRAME_RATE_BASE);
-    if(s->time_increment_resolution>=256*256) s->time_increment_resolution= 256*128;
-
     put_bits(&s->pb, 16, s->time_increment_resolution);
-    s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
     put_bits(&s->pb, 1, 1);		/* marker bit */
@@ -1034,9 +1056,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     
     if(s->pict_type==I_TYPE) mpeg4_encode_vol_header(s);
     
-    s->time= s->picture_number*(int64_t)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
-    time_div= s->time/s->time_increment_resolution;
-    time_mod= s->time%s->time_increment_resolution;
 //printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE);
     
     if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
@@ -1044,15 +1063,8 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     put_bits(&s->pb, 16, 0x1B6);	/* vop header */
     put_bits(&s->pb, 2, s->pict_type - 1);	/* pict type: I = 0 , P = 1 */
 
-    if(s->pict_type==B_TYPE){
-        s->bp_time= s->last_non_b_time - s->time;
-    }else{
-        s->last_time_base= s->time_base;
-        s->time_base= time_div;
-        s->pp_time= s->time - s->last_non_b_time;
-        s->last_non_b_time= s->time;
-    }
-
+    time_div= s->time/s->time_increment_resolution;
+    time_mod= s->time%s->time_increment_resolution;
     time_incr= time_div - s->last_time_base;
     while(time_incr--)
         put_bits(&s->pb, 1, 1);
@@ -1770,6 +1782,7 @@ int h263_decode_mb(MpegEncContext *s,
             s->last_mv[0][0][1]= 
             s->last_mv[1][0][0]= 
             s->last_mv[1][0][1]= 0;
+//            printf("\n");
         }
 
         /* if we skipped it in the future P Frame than skip it now too */
@@ -1789,6 +1802,7 @@ int h263_decode_mb(MpegEncContext *s,
 //FIXME is this correct?
 /*            s->last_mv[0][0][0]=
             s->last_mv[0][0][1]=0;*/
+//            printf("S");
             return 0;
         }
 
@@ -1837,6 +1851,7 @@ int h263_decode_mb(MpegEncContext *s,
             s->mv[0][0][1] = 
             s->mv[1][0][0] = 
             s->mv[1][0][1] = 1000;*/
+//            printf("D");
             break;
         case 1: 
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
@@ -1849,6 +1864,7 @@ int h263_decode_mb(MpegEncContext *s,
             my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
             s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
             s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+//            printf("I");
             break;
         case 2: 
             s->mv_dir = MV_DIR_BACKWARD;
@@ -1856,6 +1872,7 @@ int h263_decode_mb(MpegEncContext *s,
             my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
             s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
             s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+//            printf("B");
             break;
         case 3:
             s->mv_dir = MV_DIR_FORWARD;
@@ -1863,6 +1880,7 @@ int h263_decode_mb(MpegEncContext *s,
             my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
             s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
             s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+//            printf("F");
             break;
         default: return -1;
         }
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 7fc0cec18c..1ba246ae5c 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -29,7 +29,7 @@
 #define MAX(a,b) ((a) > (b) ? (a) : (b))
 #define INTER_BIAS	257
 
-static void halfpel_motion_search(MpegEncContext * s,
+static int halfpel_motion_search(MpegEncContext * s,
 				  int *mx_ptr, int *my_ptr, int dmin,
 				  int xmin, int ymin, int xmax, int ymax,
                                   int pred_x, int pred_y, uint8_t *ref_picture);
@@ -673,7 +673,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block,
     
 /* The idea would be to make half pel ME after Inter/Intra decision to 
    save time. */
-static inline void halfpel_motion_search(MpegEncContext * s,
+static inline int halfpel_motion_search(MpegEncContext * s,
 				  int *mx_ptr, int *my_ptr, int dmin,
 				  int xmin, int ymin, int xmax, int ymax,
                                   int pred_x, int pred_y, uint8_t *ref_picture)
@@ -702,7 +702,7 @@ static inline void halfpel_motion_search(MpegEncContext * s,
         if(dmin < Z_THRESHOLD && mx==0 && my==0){
             *mx_ptr = 0;
             *my_ptr = 0;
-            return;
+            return dmin;
         }
         
         pen_x= pred_x + mx;
@@ -727,6 +727,7 @@ static inline void halfpel_motion_search(MpegEncContext * s,
 
     *mx_ptr = mx;
     *my_ptr = my;
+    return dminh;
 }
 
 static inline void halfpel_motion_search4(MpegEncContext * s,
@@ -1044,17 +1045,15 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
     set_p_mv_tables(s, mx, my);
 }
 
-void ff_estimate_motion_b(MpegEncContext * s,
+int ff_estimate_motion_b(MpegEncContext * s,
                        int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code)
 {
-    UINT8 *pix, *ppix;
-    int sum, varc, vard, mx, my, range, dmin, xx, yy;
+    int mx, my, range, dmin;
     int xmin, ymin, xmax, ymax;
     int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
     int pred_x=0, pred_y=0;
     int P[6][2];
     const int shift= 1+s->quarter_sample;
-    int mb_type=0;
     const int mot_stride = s->mb_width + 2;
     const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
     
@@ -1124,18 +1123,210 @@ void ff_estimate_motion_b(MpegEncContext * s,
     /* At this point (mx,my) are full-pell and the absolute displacement */
 //    ppix = ref_picture + (my * s->linesize) + mx;
     
-    halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
+    dmin= halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
 
 //    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
     mv_table[mot_xy][0]= mx;
     mv_table[mot_xy][1]= my;
+    return dmin;
 }
 
 
-int ff_decide_type(MpegEncContext * s,
-                int mb_x, int mb_y)
+static inline int check_bidir_mv(MpegEncContext * s,
+                   int mb_x, int mb_y,
+                   int motion_fx, int motion_fy,
+                   int motion_bx, int motion_by,
+                   int pred_fx, int pred_fy,
+                   int pred_bx, int pred_by)
 {
+    //FIXME optimize?
+    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+    uint8_t *dest_y = s->me_scratchpad;
+    uint8_t *ptr;
+    int dxy;
+    int src_x, src_y;
+    int fbmin;
+
+    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale;
+
+    dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
+    src_x = mb_x * 16 + (motion_fx >> 1);
+    src_y = mb_y * 16 + (motion_fy >> 1);
+            
+    ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
+    put_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
+    put_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+    
+    fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
+
+    dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
+    src_x = mb_x * 16 + (motion_bx >> 1);
+    src_y = mb_y * 16 + (motion_by >> 1);
+            
+    ptr = s->next_picture[0] + (src_y * s->linesize) + src_x;
+    avg_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
+    avg_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+    
+    fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+    return fbmin;
+}
 
+/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
+static inline int bidir_refine(MpegEncContext * s,
+                                  int mb_x, int mb_y)
+{
+    const int mot_stride = s->mb_width + 2;
+    const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
+    int fbmin;
+    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
+    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
+    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
+    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
+    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
+    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
+    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
+    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
+
+    //FIXME do refinement and add flag
+    
+    fbmin= check_bidir_mv(s, mb_x, mb_y, 
+                          motion_fx, motion_fy,
+                          motion_bx, motion_by,
+                          pred_fx, pred_fy,
+                          pred_bx, pred_by);
+
+   return fbmin;
+}
+
+static inline int direct_search(MpegEncContext * s,
+                                int mb_x, int mb_y)
+{
+    int P[6][2];
+    const int mot_stride = s->mb_width + 2;
+    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
+    int dmin, dmin2;
+    int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0;
+    int motion_dx, motion_dy;
+    const int motion_px= s->p_mv_table[mot_xy][0];
+    const int motion_py= s->p_mv_table[mot_xy][1];
+    const int time_pp= s->pp_time;
+    const int time_bp= s->bp_time;
+    const int time_pb= time_pp - time_bp;
+    int bx, by;
+    int mx, my, mx2, my2;
+    uint8_t *ref_picture= s->me_scratchpad - (mb_x + 1 + (mb_y + 1)*s->linesize)*16;
+    int16_t (*mv_table)[2]= s->b_direct_mv_table;
+    uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+
+    /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */
+    motion_fx= (motion_px*time_pb)/time_pp;
+    motion_fy= (motion_py*time_pb)/time_pp;
+    motion_bx0= (-motion_px*time_bp)/time_pp;
+    motion_by0= (-motion_py*time_bp)/time_pp;
+    motion_dx= motion_dy=0;
+    dmin2= check_bidir_mv(s, mb_x, mb_y, 
+                          motion_fx, motion_fy,
+                          motion_bx0, motion_by0,
+                          motion_fx, motion_fy,
+                          motion_bx0, motion_by0) - s->qscale;
+
+    motion_bx= motion_fx - motion_px;
+    motion_by= motion_fy - motion_py;
+    for(by=-1; by<2; by++){
+        for(bx=-1; bx<2; bx++){
+            uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16;
+            uint8_t *ptr;
+            int dxy;
+            int src_x, src_y;
+            const int width= s->width;
+            const int height= s->height;
+
+            dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
+            src_x = (mb_x + bx) * 16 + (motion_fx >> 1);
+            src_y = (mb_y + by) * 16 + (motion_fy >> 1);
+            src_x = clip(src_x, -16, width);
+            if (src_x == width) dxy &= ~1;
+            src_y = clip(src_y, -16, height);
+            if (src_y == height) dxy &= ~2;
+
+            ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
+            put_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
+            put_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+
+            dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
+            src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
+            src_y = (mb_y + by) * 16 + (motion_by >> 1);
+            src_x = clip(src_x, -16, width);
+            if (src_x == width) dxy &= ~1;
+            src_y = clip(src_y, -16, height);
+            if (src_y == height) dxy &= ~2;
+
+            avg_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
+            avg_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
+        }
+    }
+
+    P[0][0] = mv_table[mot_xy    ][0];
+    P[0][1] = mv_table[mot_xy    ][1];
+    P[1][0] = mv_table[mot_xy - 1][0];
+    P[1][1] = mv_table[mot_xy - 1][1];
+
+    /* special case for first line */
+    if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
+        P[4][0] = P[1][0];
+        P[4][1] = P[1][1];
+    } else {
+        P[2][0] = mv_table[mot_xy - mot_stride             ][0];
+        P[2][1] = mv_table[mot_xy - mot_stride             ][1];
+        P[3][0] = mv_table[mot_xy - mot_stride + 1         ][0];
+        P[3][1] = mv_table[mot_xy - mot_stride + 1         ][1];
+    
+        P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
+        P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
+    }
+    dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture);
+    if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff
+    if(dmin2<dmin){ 
+        dmin= dmin2;
+        mx=0;
+        my=0;
+    }
+#if 1
+    mx2= mx= mx*2; 
+    my2= my= my*2;
+    for(by=-1; by<2; by++){
+        if(my2+by < -32) continue;
+        for(bx=-1; bx<2; bx++){
+            if(bx==0 && by==0) continue;
+            if(mx2+bx < -32) continue;
+            dmin2= check_bidir_mv(s, mb_x, mb_y, 
+                          mx2+bx+motion_fx, my2+by+motion_fy,
+                          mx2+bx+motion_bx, my2+by+motion_by,
+                          mx2+bx+motion_fx, my2+by+motion_fy,
+                          motion_bx, motion_by) - s->qscale;
+            
+            if(dmin2<dmin){
+                dmin=dmin2;
+                mx= mx2 + bx;
+                my= my2 + by;
+            }
+        }
+    }
+#else
+    mx*=2; my*=2;
+#endif
+    if(mx==0 && my==0){
+        motion_bx= motion_bx0;
+        motion_by= motion_by0;
+    }
+
+    s->b_direct_mv_table[mot_xy][0]= mx;
+    s->b_direct_mv_table[mot_xy][1]= my;
+    s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx;
+    s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my;
+    s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx;
+    s->b_direct_back_mv_table[mot_xy][1]= motion_by + my;
+    return dmin;
 }
 
 void ff_estimate_b_frame_motion(MpegEncContext * s,
@@ -1143,16 +1334,41 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
 {
     const int mot_stride = s->mb_width + 2;
     const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
+    const int quant= s->qscale;
+    int fmin, bmin, dmin, fbmin;
+    int type=0;
+    int motion_fx, motion_fy, motion_bx, motion_by;
+    
+    dmin= direct_search(s, mb_x, mb_y);
 
-    ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture[0], s->f_code);
-    ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture[0], s->b_code);
+    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture[0], s->f_code);
+    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture[0], s->b_code) - quant;
 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
-    s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
-    s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
-    s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
-    s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
-    
-    s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_FORWARD; //FIXME
+
+    fbmin= bidir_refine(s, mb_x, mb_y);
+
+    if(s->flags&CODEC_FLAG_HQ){
+        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT;
+    }else{
+        int score= dmin;
+        type=MB_TYPE_DIRECT;
+        
+        if(fmin<score){
+            score=fmin;
+            type= MB_TYPE_FORWARD; 
+        }
+        if(bmin<score){
+            score=bmin;
+            type= MB_TYPE_BACKWARD; 
+        }
+        if(fbmin<score){
+            score=fbmin;
+            type= MB_TYPE_BIDIR;
+        }
+        s->mc_mb_var += score;
+    }
+
+    s->mb_type[mb_y*s->mb_width + mb_x]= type;
 }
 
 /* find best f_code for ME which do unlimited searches */
@@ -1184,8 +1400,12 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
         }
 
         for(i=MAX_FCODE; i>1; i--){
+            int threshold;
             loose+= mv_num[i];
-            if(loose > s->mb_num/20) break; //FIXME this is pretty ineffective
+
+            if(s->pict_type==B_TYPE) threshold= 0;
+            else                     threshold= s->mb_num/20; //FIXME 
+            if(loose > threshold) break;
         }
 //    printf("fcode: %d type: %d\n", i, s->pict_type);
         return i;
@@ -1275,11 +1495,12 @@ void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, i
                    || fcode_tab[mv_table[xy][0] + MAX_MV] == 0
                    || fcode_tab[mv_table[xy][1] + MAX_MV] > f_code
                    || fcode_tab[mv_table[xy][1] + MAX_MV] == 0 ){
-                    s->mb_type[i] &= ~type;
-                    if(s->mb_type[i]==0) s->mb_type[i]= MB_TYPE_FORWARD; //FIXME 
-                    mv_table[xy][0] = 0;
-                    mv_table[xy][1] = 0;
-                    //this is certainly bad FIXME
+                    if(s->mb_type[i]&(~type)) s->mb_type[i] &= ~type;
+                    else{
+                        mv_table[xy][0] = 0;
+                        mv_table[xy][1] = 0;
+                        //this is certainly bad FIXME            
+                    }
                 }
             }
             xy++;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index af1d1c3396..d1c7e16372 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -225,6 +225,12 @@ int MPV_common_init(MpegEncContext *s)
             goto fail;
         }
 
+        s->me_scratchpad = av_mallocz( s->linesize*16*3*sizeof(uint8_t));
+        if (s->me_scratchpad == NULL) {
+            perror("malloc");
+            goto fail;
+        }
+
         if(s->max_b_frames){
             for(j=0; j<REORDER_BUFFER_SIZE; j++){
                 int i;
@@ -297,7 +303,7 @@ int MPV_common_init(MpegEncContext *s)
     if (!s->mbskip_table)
         goto fail;
     
-    s->block= s->intra_block;
+    s->block= s->blocks[0];
 
     s->context_initialized = 1;
     return 0;
@@ -333,6 +339,7 @@ void MPV_common_end(MpegEncContext *s)
     CHECK_FREE(s->ac_val[0]);
     CHECK_FREE(s->coded_block);
     CHECK_FREE(s->mbintra_table);
+    CHECK_FREE(s->me_scratchpad);
 
     CHECK_FREE(s->mbskip_table);
     for(i=0;i<3;i++) {
@@ -761,16 +768,6 @@ int MPV_encode_picture(AVCodecContext *avctx,
     return pbBufPtr(&s->pb) - s->pb.buf;
 }
 
-static inline int clip(int a, int amin, int amax)
-{
-    if (a < amin)
-        return amin;
-    else if (a > amax)
-        return amax;
-    else
-        return a;
-}
-
 static inline void gmc1_motion(MpegEncContext *s,
                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
                                int dest_offset,
@@ -1225,7 +1222,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         if (!s->mb_intra) {
             /* motion handling */
             if((s->flags&CODEC_FLAG_HQ) || (!s->encoding)){
-                if (!s->no_rounding){
+                if ((!s->no_rounding) || s->pict_type==B_TYPE){                
                     op_pix = put_pixels_tab;
                     op_qpix= qpel_mc_rnd_tab;
                 }else{
@@ -1235,7 +1232,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 
                 if (s->mv_dir & MV_DIR_FORWARD) {
                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-                    if (!s->no_rounding) 
+                    if ((!s->no_rounding) || s->pict_type==B_TYPE)
                         op_pix = avg_pixels_tab;
                     else
                         op_pix = avg_no_rnd_pixels_tab;
@@ -1312,7 +1309,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         dest_cb = s->current_picture[1] + (mb_y * 8  * (s->linesize >> 1)) + mb_x * 8;
         dest_cr = s->current_picture[2] + (mb_y * 8  * (s->linesize >> 1)) + mb_x * 8;
 
-        if (!s->no_rounding){
+        if ((!s->no_rounding) || s->pict_type==B_TYPE){
             op_pix = put_pixels_tab;
             op_qpix= qpel_mc_rnd_tab;
         }else{
@@ -1322,7 +1319,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 
         if (s->mv_dir & MV_DIR_FORWARD) {
             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-            if (!s->no_rounding) 
+           if ((!s->no_rounding) || s->pict_type==B_TYPE)
                 op_pix = avg_pixels_tab;
             else
                 op_pix = avg_no_rnd_pixels_tab;
@@ -1429,6 +1426,8 @@ static void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int
     d->skip_count= s->skip_count;
     d->misc_bits= s->misc_bits;
     d->last_bits= s->last_bits;
+
+    d->mb_skiped= s->mb_skiped;
 }
 
 static void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
@@ -1453,6 +1452,7 @@ static void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int
     d->last_bits= s->last_bits;
 
     d->mb_intra= s->mb_intra;
+    d->mb_skiped= s->mb_skiped;
     d->mv_type= s->mv_type;
     d->mv_dir= s->mv_dir;
     d->pb= s->pb;
@@ -1468,7 +1468,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     int i;
     int bits;
     MpegEncContext best_s, backup_s;
-    UINT8 bit_buf[4][3000]; //FIXME check that this is ALLWAYS large enogh for a MB
+    UINT8 bit_buf[7][3000]; //FIXME check that this is ALLWAYS large enogh for a MB
 
     s->picture_number = picture_number;
 
@@ -1483,7 +1483,11 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     /* Reset the average MB variance */
     s->avg_mb_var = 0;
     s->mc_mb_var = 0;
-    
+
+    /* we need to initialize some time vars before we can encode b-frames */
+    if (s->h263_pred && !s->h263_msmpeg4)
+        ff_set_mpeg4_time(s, s->picture_number); 
+
     /* Estimate motion for every MB */
     if(s->pict_type != I_TYPE){
 //        int16_t (*tmp)[2]= s->p_mv_table;
@@ -1535,9 +1539,11 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     if(s->pict_type==B_TYPE){
         s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
         s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
-        //FIXME if BIDIR != for&back
-        ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD |MB_TYPE_BIDIR);
-        ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD|MB_TYPE_BIDIR);
+
+        ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
+        ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
+        ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
+        ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
     }
     
 //printf("f_code %d ///\n", s->f_code);
@@ -1632,7 +1638,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-            /*const */int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
+            const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
+            const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
             PutBitContext pb;
             int d;
             int dmin=10000000;
@@ -1647,19 +1654,19 @@ static void encode_picture(MpegEncContext *s, int picture_number)
             s->block_index[4]++;
             s->block_index[5]++;
             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
+                int next_block=0;
                 pb= s->pb;
-                s->mv_dir = MV_DIR_FORWARD;
 
                 copy_context_before_encode(&backup_s, s, -1);
 
                 if(mb_type&MB_TYPE_INTER){
-                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_16X16;
                     s->mb_intra= 0;
                     s->mv[0][0][0] = s->p_mv_table[xy][0];
                     s->mv[0][0][1] = s->p_mv_table[xy][1];
                     init_put_bits(&s->pb, bit_buf[1], 3000, NULL, NULL);
-                    s->block= s->inter_block;
+                    s->block= s->blocks[next_block];
 
                     encode_mb(s, s->mv[0][0][0], s->mv[0][0][1]);
                     d= get_bit_count(&s->pb);
@@ -1668,10 +1675,12 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                         dmin=d;
                         copy_context_after_encode(&best_s, s, MB_TYPE_INTER);
                         best=1;
+                        next_block^=1;
                     }
                 }
                 if(mb_type&MB_TYPE_INTER4V){                 
                     copy_context_before_encode(s, &backup_s, MB_TYPE_INTER4V);
+                    s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_8X8;
                     s->mb_intra= 0;
                     for(i=0; i<4; i++){
@@ -1679,25 +1688,111 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
                     }
                     init_put_bits(&s->pb, bit_buf[2], 3000, NULL, NULL);
-                    s->block= s->inter4v_block;
+                    s->block= s->blocks[next_block];
 
                     encode_mb(s, 0, 0);
                     d= get_bit_count(&s->pb);
-                    if(d<dmin && 0){
+                    if(d<dmin){
                         flush_put_bits(&s->pb);
                         dmin=d;
                         copy_context_after_encode(&best_s, s, MB_TYPE_INTER4V);
                         best=2;
+                        next_block^=1;
+                    }
+                }
+                if(mb_type&MB_TYPE_FORWARD){
+                    copy_context_before_encode(s, &backup_s, MB_TYPE_FORWARD);
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
+                    init_put_bits(&s->pb, bit_buf[3], 3000, NULL, NULL);
+                    s->block= s->blocks[next_block];
+
+                    encode_mb(s, s->mv[0][0][0], s->mv[0][0][1]);
+                    d= get_bit_count(&s->pb);
+                    if(d<dmin){
+                        flush_put_bits(&s->pb);
+                        dmin=d;
+                        copy_context_after_encode(&best_s, s, MB_TYPE_FORWARD);
+                        best=3;
+                        next_block^=1;
+                    }
+                }
+                if(mb_type&MB_TYPE_BACKWARD){
+                    copy_context_before_encode(s, &backup_s, MB_TYPE_BACKWARD);
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[1][0][0] = s->b_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_back_mv_table[xy][1];
+                    init_put_bits(&s->pb, bit_buf[4], 3000, NULL, NULL);
+                    s->block= s->blocks[next_block];
+
+                    encode_mb(s, s->mv[1][0][0], s->mv[1][0][1]);
+                    d= get_bit_count(&s->pb);
+                    if(d<dmin){
+                        flush_put_bits(&s->pb);
+                        dmin=d;
+                        copy_context_after_encode(&best_s, s, MB_TYPE_BACKWARD);
+                        best=4;
+                        next_block^=1;
+                    }
+                }
+                if(mb_type&MB_TYPE_BIDIR){
+                    copy_context_before_encode(s, &backup_s, MB_TYPE_BIDIR);
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_16X16;
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
+                    s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
+                    init_put_bits(&s->pb, bit_buf[5], 3000, NULL, NULL);
+                    s->block= s->blocks[next_block];
+
+                    encode_mb(s, 0, 0);
+                    d= get_bit_count(&s->pb);
+                    if(d<dmin){
+                        flush_put_bits(&s->pb);
+                        dmin=d;
+                        copy_context_after_encode(&best_s, s, MB_TYPE_BIDIR);
+                        best=5;
+                        next_block^=1;
+                    }
+                }
+                if(mb_type&MB_TYPE_DIRECT){
+                    copy_context_before_encode(s, &backup_s, MB_TYPE_DIRECT);
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
+                    s->mv_type = MV_TYPE_16X16; //FIXME
+                    s->mb_intra= 0;
+                    s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
+                    s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
+                    init_put_bits(&s->pb, bit_buf[6], 3000, NULL, NULL);
+                    s->block= s->blocks[next_block];
+
+                    encode_mb(s, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
+                    d= get_bit_count(&s->pb);
+                    if(d<dmin){
+                        flush_put_bits(&s->pb);
+                        dmin=d;
+                        copy_context_after_encode(&best_s, s, MB_TYPE_DIRECT);
+                        best=6;
+                        next_block^=1;
                     }
                 }
                 if(mb_type&MB_TYPE_INTRA){
                     copy_context_before_encode(s, &backup_s, MB_TYPE_INTRA);
+                    s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_16X16;
                     s->mb_intra= 1;
                     s->mv[0][0][0] = 0;
                     s->mv[0][0][1] = 0;
                     init_put_bits(&s->pb, bit_buf[0], 3000, NULL, NULL);
-                    s->block= s->intra_block;
+                    s->block= s->blocks[next_block];
                    
                     encode_mb(s, 0, 0);
                     d= get_bit_count(&s->pb);
@@ -1706,6 +1801,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                         dmin=d;
                         copy_context_after_encode(&best_s, s, MB_TYPE_INTRA);
                         best=0;
+                        next_block^=1;
                     }
                     /* force cleaning of ac/dc pred stuff if needed ... */
                     if(s->h263_pred || s->h263_aic)
@@ -1718,30 +1814,30 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 int motion_x, motion_y;
                 s->mv_type=MV_TYPE_16X16;
                 // only one MB-Type possible
-                //FIXME convert to swicth()
-                if(mb_type&MB_TYPE_INTRA){
+                switch(mb_type){
+                case MB_TYPE_INTRA:
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mb_intra= 1;
                     motion_x= s->mv[0][0][0] = 0;
                     motion_y= s->mv[0][0][1] = 0;
-                }else if(mb_type&MB_TYPE_INTER){
-                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    break;
+                case MB_TYPE_INTER:
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mb_intra= 0;
                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
-                }else if(mb_type&MB_TYPE_DIRECT){
-                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    break;
+                case MB_TYPE_DIRECT:
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                     s->mb_intra= 0;
-                    motion_x=0;
-                    motion_y=0;
-                    s->mv[0][0][0] = 0;
-                    s->mv[0][0][1] = 0;
-                    s->mv[1][0][0] = 0;
-                    s->mv[1][0][1] = 0;
-                }else if(mb_type&MB_TYPE_BIDIR){
-                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    motion_x=s->b_direct_mv_table[xy][0];
+                    motion_y=s->b_direct_mv_table[xy][1];
+                    s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
+                    s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
+                    s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
+                    s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
+                    break;
+                case MB_TYPE_BIDIR:
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                     s->mb_intra= 0;
                     motion_x=0;
@@ -1750,25 +1846,31 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
-                }else if(mb_type&MB_TYPE_BACKWARD){
-                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    break;
+                case MB_TYPE_BACKWARD:
                     s->mv_dir = MV_DIR_BACKWARD;
                     s->mb_intra= 0;
                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
-                }else if(mb_type&MB_TYPE_FORWARD){
-                    int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+                    break;
+                case MB_TYPE_FORWARD:
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mb_intra= 0;
                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
 //                    printf(" %d %d ", motion_x, motion_y);
-                }else{
+                    break;
+                default:
                     motion_x=motion_y=0; //gcc warning fix
                     printf("illegal MB type\n");
                 }
                 encode_mb(s, motion_x, motion_y);
             }
+            /* clean the MV table in IPS frames for direct mode in B frames */
+            if(s->mb_intra /* && I,P,S_TYPE */){
+                s->p_mv_table[xy][0]=0;
+                s->p_mv_table[xy][1]=0;
+            }
 
             MPV_decode_mb(s, s->block);
         }
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 841d40fb3c..24943e5b19 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -141,7 +141,8 @@ typedef struct MpegEncContext {
     INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
     INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
     INT16 (*b_direct_mv_table)[2];     /* MV table (1MV per MB) direct mode b-frame encoding */
-    int me_method;          /* ME algorithm */
+    int me_method;                     /* ME algorithm */
+    uint8_t *me_scratchpad;            /* data area for the me algo, so that the ME doesnt need to malloc/free */
     int mv_dir;
 #define MV_DIR_BACKWARD  1
 #define MV_DIR_FORWARD   2
@@ -164,7 +165,8 @@ typedef struct MpegEncContext {
     UINT8 *fcode_tab; /* smallest fcode needed for each MV */
 
     int has_b_frames;
-    int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) */
+    int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) 
+                        for b-frames rounding mode is allways 0 */
 
     /* macroblock layer */
     int mb_x, mb_y;
@@ -335,9 +337,7 @@ typedef struct MpegEncContext {
     UINT32 mb_line_avgsize;
     
     DCTELEM (*block)[64]; /* points to one of the following blocks */
-    DCTELEM intra_block[6][64] __align8;
-    DCTELEM inter_block[6][64] __align8;
-    DCTELEM inter4v_block[6][64] __align8;
+    DCTELEM blocks[2][6][64] __align8; // for HQ mode we need to keep the best block
     void (*dct_unquantize_mpeg1)(struct MpegEncContext *s, 
                            DCTELEM *block, int n, int qscale);
     void (*dct_unquantize_mpeg2)(struct MpegEncContext *s, 
@@ -421,6 +421,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
                         int *px, int *py);
 void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, 
                    int dir);
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number);
 void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
 void h263_encode_init(MpegEncContext *s);